diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index f4d17b3596..bb4e2d24c9 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ -blank_issues_enabled: false +blank_issues_enabled: true contact_links: - name: Prometheus Community Support url: https://prometheus.io/community/ diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml index 8ddbc34aeb..144859486d 100644 --- a/.github/workflows/container_description.yml +++ b/.github/workflows/container_description.yml @@ -18,7 +18,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub @@ -40,7 +40,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b99fd9c175..ea05e84b02 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,11 +21,6 @@ jobs: - name: Checkout Repo uses: actions/checkout@v4 - # This file would normally be created by `make assets`, here we just - # mock it because the file is required for the tests to pass. - - name: Mock building of necessary react file - run: mkdir web/ui/static/react && touch web/ui/static/react/index.html - - name: Run Tests run: GOOPTS=-tags=${{ matrix.buildtags }} make common-test diff --git a/.gitignore b/.gitignore index e85d766b09..0d99305f69 100644 --- a/.gitignore +++ b/.gitignore @@ -22,7 +22,7 @@ benchmark.txt /documentation/examples/remote_storage/example_write_adapter/example_write_adapter npm_licenses.tar.bz2 -/web/ui/static/react +/web/ui/static /vendor /.build diff --git a/.golangci.yml b/.golangci.yml index 303cd33d8b..66b8800333 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -23,6 +23,7 @@ linters: - usestdlibvars - whitespace - loggercheck + - sloglint issues: max-issues-per-linter: 0 @@ -78,7 +79,9 @@ linters-settings: - pkg: "github.com/stretchr/testify/assert" desc: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert" - pkg: "github.com/go-kit/kit/log" - desc: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log" + desc: "Use log/slog instead of github.com/go-kit/kit/log" + - pkg: "github.com/go-kit/log" + desc: "Use log/slog instead of github.com/go-kit/log" - pkg: "io/ioutil" desc: "Use corresponding 'os' or 'io' functions instead." - pkg: "regexp" @@ -100,8 +103,6 @@ linters-settings: - (net/http.ResponseWriter).Write # No need to check for errors on server's shutdown. - (*net/http.Server).Shutdown - # Never check for logger errors. - - (github.com/go-kit/log.Logger).Log # Never check for rollback errors as Rollback() is called when a previous error was detected. - (github.com/prometheus/prometheus/storage.Appender).Rollback goimports: @@ -153,14 +154,4 @@ linters-settings: disable: - float-compare - go-require - enable: - - bool-compare - - compares - - empty - - error-is-as - - error-nil - - expected-actual - - len - - require-error - - suite-dont-use-pkg - - suite-extra-assert-call + enable-all: true diff --git a/.promu.yml b/.promu.yml index 699a7fa585..6feaa6ef64 100644 --- a/.promu.yml +++ b/.promu.yml @@ -28,8 +28,6 @@ tarball: # Whenever there are new files to include in the tarball, # remember to make sure the new files will be generated after `make build`. files: - - consoles - - console_libraries - documentation/examples/prometheus.yml - LICENSE - NOTICE diff --git a/CHANGELOG.md b/CHANGELOG.md index ffa5eaf008..211f488954 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,100 @@ ## unreleased -* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910 -* [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200 -* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706 -* [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042 -* [ENHANCEMENT] OTLP: During translation, check for context cancellation/timeout. #654 +* [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136 +* [CHANGE] Remote-write: default enable_http2 to false. #15219 +* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164 +* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178 +* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657 +* [CHANGE] Disallow configuring AM with the v1 api. #13883 +* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 +* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196 +* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 * [ENHANCEMENT] OTLP receiver: If the feature flag `--created-timestamp-zero-ingestion` is true, convert OTel start timestamps to Prometheus zero samples. #14759 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 + +## 3.0.0-beta.1 / 2024-10-09 + +* [CHANGE] regexp `.` now matches all characters (performance improvement). #14505 +* [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930 +* [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894 +* [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160 +* [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906 +* [CHANGE] Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion' is enabled. #14738 +* [CHANGE] Float literals and time durations being the same is now a stable fetaure. #15111 +* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096 +* [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082 +* [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 +* [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 +* [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 +* [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929 +* [ENHANCEMENT] Consul SD: Support catalog filters. #11224 +* [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 +* [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932 +* [PERF] TSDB: Grow postings by doubling. #14721 +* [PERF] Relabeling: Optimize adding a constant label pair. #12180 +* [BUGFIX] Scraping: Unit was missing when using protobuf format. #15095 +* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910 +* [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854 +* [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884 +* [BUGFIX] PromQL: Unary negation of native histograms. #14821 +* [BUGFIX] PromQL: Handle stale marker in native histogram series (e.g. if series goes away and comes back). #15025 +* [BUGFIX] Autoreload: Reload invalid yaml files. #14947 + +## 3.0.0-beta.0 / 2024-09-05 + +Release 3.0.0-beta.0 includes new features such as a brand new UI and UTF-8 support enabled by default. As a new major version, several breaking changes are introduced. The breaking changes are mainly around the removal of deprecated feature flags and CLI arguments, and the full list can be found below. Most users should be able to try this release out of the box without any configuration changes. + +As is traditional with a beta release, we do **not** recommend users install 3.0.0-beta on critical production systems, but we do want everyone to test it out and find bugs. + +* [CHANGE] UI: The old web UI has been replaced by a completely new one that is less cluttered and adds a few new features (PromLens-style tree view, better metrics explorer, "Explain" tab). However, it is still missing some features of the old UI (notably, exemplar display and heatmaps). To switch back to the old UI, you can use the feature flag `--enable-feature=old-ui` for the time being. #14872 +* [CHANGE] PromQL: Range selectors and the lookback delta are now left-open, i.e. a sample coinciding with the lower time limit is excluded rather than included. #13904 +* [CHANGE] Kubernetes SD: Remove support for `discovery.k8s.io/v1beta1` API version of EndpointSlice. This version is no longer served as of Kubernetes v1.25. #14365 +* [CHANGE] Kubernetes SD: Remove support for `networking.k8s.io/v1beta1` API version of Ingress. This version is no longer served as of Kubernetes v1.22. #14365 +* [CHANGE] UTF-8: Enable UTF-8 support by default. Prometheus now allows all UTF-8 characters in metric and label names. The corresponding `utf8-name` feature flag has been removed. #14705 +* [CHANGE] Console: Remove example files for the console feature. Users can continue using the console feature by supplying their own JavaScript and templates. #14807 +* [CHANGE] SD: Enable the new service discovery manager by default. This SD manager does not restart unchanged discoveries upon reloading. This makes reloads faster and reduces pressure on service discoveries' sources. The corresponding `new-service-discovery-manager` feature flag has been removed. #14770 +* [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747 +* [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526 +* [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643 +* [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875 +* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 +* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 + +## 2.55.0 / 2024-10-22 + +* [FEATURE] PromQL: Add experimental `info` function. #14495 +* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 +* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 +* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 +* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 +* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200 +* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346 +* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403 +* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506 +* [FEATURE] TSDB: Add `delayed-compaction` feature flag, for people running many Prometheus to randomize timing. #12532 +* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706 +* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 +* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 +* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 +* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 +* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985 +* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 +* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 +* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 +* [ENHANCEMENT] API: Support multiple listening addresses. #14665 +* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 +* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120 +* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 +* [BUGFIX] PromQL: make sort_by_label stable. #14985 +* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 +* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 +* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 +* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766 +* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 +* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 +* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 ## 2.54.1 / 2024-08-27 @@ -106,7 +194,7 @@ This release changes the default for GOGC, the Go runtime control for the trade- * [ENHANCEMENT] TSDB: Pause regular block compactions if the head needs to be compacted (prioritize head as it increases memory consumption). #13754 * [ENHANCEMENT] Observability: Improved logging during signal handling termination. #13772 * [ENHANCEMENT] Observability: All log lines for drop series use "num_dropped" key consistently. #13823 -* [ENHANCEMENT] Observability: Log chunk snapshot and mmaped chunk replay duration during WAL replay. #13838 +* [ENHANCEMENT] Observability: Log chunk snapshot and mmapped chunk replay duration during WAL replay. #13838 * [ENHANCEMENT] Observability: Log if the block is being created from WBL during compaction. #13846 * [BUGFIX] PromQL: Fix inaccurate sample number statistic when querying histograms. #13667 * [BUGFIX] PromQL: Fix `histogram_stddev` and `histogram_stdvar` for cases where the histogram has negative buckets. #13852 @@ -643,7 +731,7 @@ The binaries published with this release are built with Go1.17.8 to avoid [CVE-2 ## 2.33.0 / 2022-01-29 -* [CHANGE] PromQL: Promote negative offset and `@` modifer to stable features. #10121 +* [CHANGE] PromQL: Promote negative offset and `@` modifier to stable features. #10121 * [CHANGE] Web: Promote remote-write-receiver to stable. #10119 * [FEATURE] Config: Add `stripPort` template function. #10002 * [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045 @@ -880,7 +968,7 @@ This vulnerability has been reported by Aaron Devaney from MDSec. * [ENHANCEMENT] Templating: Enable parsing strings in `humanize` functions. #8682 * [BUGFIX] UI: Provide errors instead of blank page on TSDB Status Page. #8654 #8659 * [BUGFIX] TSDB: Do not panic when writing very large records to the WAL. #8790 -* [BUGFIX] TSDB: Avoid panic when mmaped memory is referenced after the file is closed. #8723 +* [BUGFIX] TSDB: Avoid panic when mmapped memory is referenced after the file is closed. #8723 * [BUGFIX] Scaleway Discovery: Fix nil pointer dereference. #8737 * [BUGFIX] Consul Discovery: Restart no longer required after config update with no targets. #8766 @@ -1806,7 +1894,7 @@ information, read the announcement blog post and migration guide. ## 1.7.0 / 2017-06-06 * [CHANGE] Compress remote storage requests and responses with unframed/raw snappy. -* [CHANGE] Properly ellide secrets in config. +* [CHANGE] Properly elide secrets in config. * [FEATURE] Add OpenStack service discovery. * [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces. * [FEATURE] Add metric for discovered number of Alertmanagers. diff --git a/Dockerfile b/Dockerfile index b47f77dcd6..b96b3b765d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,21 +8,16 @@ ARG OS="linux" COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus COPY .build/${OS}-${ARCH}/promtool /bin/promtool COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml -COPY console_libraries/ /usr/share/prometheus/console_libraries/ -COPY consoles/ /usr/share/prometheus/consoles/ COPY LICENSE /LICENSE COPY NOTICE /NOTICE COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2 WORKDIR /prometheus -RUN ln -s /usr/share/prometheus/console_libraries /usr/share/prometheus/consoles/ /etc/prometheus/ && \ - chown -R nobody:nobody /etc/prometheus /prometheus +RUN chown -R nobody:nobody /etc/prometheus /prometheus USER nobody EXPOSE 9090 VOLUME [ "/prometheus" ] ENTRYPOINT [ "/bin/prometheus" ] CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ - "--storage.tsdb.path=/prometheus", \ - "--web.console.libraries=/usr/share/prometheus/console_libraries", \ - "--web.console.templates=/usr/share/prometheus/consoles" ] + "--storage.tsdb.path=/prometheus" ] diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 3661ddaa0a..de3f3c73b7 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -2,7 +2,6 @@ General maintainers: * Bryan Boreham (bjboreham@gmail.com / @bboreham) -* Levi Harrison (levi@leviharrison.dev / @LeviHarrison) * Ayoub Mrini (ayoubmrini424@gmail.com / @machine424) * Julien Pivotto (roidelapluie@prometheus.io / @roidelapluie) @@ -13,13 +12,12 @@ Maintainers for specific parts of the codebase: * `k8s`: Frederic Branczyk ( / @brancz) * `documentation` * `prometheus-mixin`: Matthias Loibl ( / @metalmatze) -* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), +* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), George Krajcsovits ( / @krajorama) * `storage` * `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( / @npazosmendez), Alex Greenbank ( / @alexgreenbank) - * `otlptranslator`: Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) + * `otlptranslator`: Arthur Silva Sens ( / @ArthurSens), Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) * `tsdb`: Ganesh Vernekar ( / @codesome), Bartłomiej Płotka ( / @bwplotka), Jesús Vázquez ( / @jesusvazquez) - * `agent`: Robert Fratto ( / @rfratto) * `web` * `ui`: Julius Volz ( / @juliusv) * `module`: Augustin Husson ( @nexucis) diff --git a/Makefile b/Makefile index f2bb3fcb7a..0b5935de00 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,11 @@ include Makefile.common DOCKER_IMAGE_NAME ?= prometheus +# Only build UI if PREBUILT_ASSETS_STATIC_DIR is not set +ifdef PREBUILT_ASSETS_STATIC_DIR + SKIP_UI_BUILD = true +endif + .PHONY: update-npm-deps update-npm-deps: @echo ">> updating npm dependencies" @@ -42,13 +47,17 @@ upgrade-npm-deps: .PHONY: ui-bump-version ui-bump-version: - version=$$(sed s/2/0/ < VERSION) && ./scripts/ui_release.sh --bump-version "$${version}" + version=$$(./scripts/get_module_version.sh) && ./scripts/ui_release.sh --bump-version "$${version}" cd web/ui && npm install git add "./web/ui/package-lock.json" "./**/package.json" .PHONY: ui-install ui-install: cd $(UI_PATH) && npm install + # The old React app has been separated from the npm workspaces setup to avoid + # issues with conflicting dependencies. This is a temporary solution until the + # new Mantine-based UI is fully integrated and the old app can be removed. + cd $(UI_PATH)/react-app && npm install .PHONY: ui-build ui-build: @@ -65,10 +74,30 @@ ui-test: .PHONY: ui-lint ui-lint: cd $(UI_PATH) && npm run lint + # The old React app has been separated from the npm workspaces setup to avoid + # issues with conflicting dependencies. This is a temporary solution until the + # new Mantine-based UI is fully integrated and the old app can be removed. + cd $(UI_PATH)/react-app && npm run lint .PHONY: assets +ifndef SKIP_UI_BUILD assets: ui-install ui-build +.PHONY: npm_licenses +npm_licenses: ui-install + @echo ">> bundling npm licenses" + rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses + ln -s . npm_licenses + find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=- + rm -f npm_licenses +else +assets: + @echo '>> skipping assets build, pre-built assets provided' + +npm_licenses: + @echo '>> skipping assets npm licenses, pre-built assets provided' +endif + .PHONY: assets-compress assets-compress: assets @echo '>> compressing assets' @@ -117,14 +146,6 @@ else test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version endif -.PHONY: npm_licenses -npm_licenses: ui-install - @echo ">> bundling npm licenses" - rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses - ln -s . npm_licenses - find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=- - rm -f npm_licenses - .PHONY: tarball tarball: npm_licenses common-tarball diff --git a/Makefile.common b/Makefile.common index db87051bef..2224df9ff5 100644 --- a/Makefile.common +++ b/Makefile.common @@ -277,3 +277,9 @@ $(1)_precheck: exit 1; \ fi endef + +govulncheck: install-govulncheck + govulncheck ./... + +install-govulncheck: + command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest diff --git a/README.md b/README.md index df974e1097..7528147b0e 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ The Makefile provides several targets: Prometheus is bundled with many service discovery plugins. When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml) -file to disable some service discoveries. The file is a yaml-formated list of go +file to disable some service discoveries. The file is a yaml-formatted list of go import path that will be built into the Prometheus binary. After you have changed the file, you diff --git a/RELEASE.md b/RELEASE.md index 0d3f7456cd..8e78a6a3ec 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -59,6 +59,7 @@ Release cadence of first pre-releases being cut is 6 weeks. | v2.52 | 2024-04-22 | Arthur Silva Sens (GitHub: @ArthurSens) | | v2.53 LTS | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) | | v2.54 | 2024-07-17 | Bryan Boreham (GitHub: @bboreham) | +| v2.55 | 2024-09-17 | Bryan Boreham (GitHub: @bboreham) | If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. @@ -187,7 +188,7 @@ the Prometheus server, we use major version zero releases for the libraries. Tag the new library release via the following commands: ```bash -tag="v$(sed s/2/0/ < VERSION)" +tag="v$(./scripts/get_module_version.sh)" git tag -s "${tag}" -m "${tag}" git push origin "${tag}" ``` diff --git a/VERSION b/VERSION index 3a40665f50..1941d52827 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.54.1 +3.0.0-beta.1 diff --git a/cmd/compact/main.go b/cmd/compact/main.go index 7bac352f6b..b5b4117897 100644 --- a/cmd/compact/main.go +++ b/cmd/compact/main.go @@ -9,7 +9,7 @@ import ( "runtime/pprof" "syscall" - golog "github.com/go-kit/log" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/tsdb" ) @@ -35,7 +35,7 @@ func main() { flag.Parse() - logger := golog.NewLogfmtLogger(os.Stderr) + logger := promslog.New(&promslog.Config{}) var blockDirs []string for _, d := range flag.Args() { diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 1f37d1cdcd..39fef52bd3 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -18,11 +18,11 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "math/bits" "net" "net/http" - _ "net/http/pprof" // Comment this line to disable pprof endpoint. "net/url" "os" "os/signal" @@ -38,8 +38,6 @@ import ( "github.com/KimMachineGun/automemlimit/memlimit" "github.com/alecthomas/kingpin/v2" "github.com/alecthomas/units" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/mwitkow/go-conntrack" "github.com/oklog/run" @@ -47,8 +45,8 @@ import ( "github.com/prometheus/client_golang/prometheus/collectors" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" - promlogflag "github.com/prometheus/common/promlog/flag" + "github.com/prometheus/common/promslog" + promslogflag "github.com/prometheus/common/promslog/flag" "github.com/prometheus/common/version" toolkit_web "github.com/prometheus/exporter-toolkit/web" "go.uber.org/atomic" @@ -58,8 +56,6 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/legacymanager" - "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -79,10 +75,50 @@ import ( "github.com/prometheus/prometheus/tsdb/wlog" "github.com/prometheus/prometheus/util/documentcli" "github.com/prometheus/prometheus/util/logging" + "github.com/prometheus/prometheus/util/notifications" prom_runtime "github.com/prometheus/prometheus/util/runtime" "github.com/prometheus/prometheus/web" ) +// klogv1OutputCallDepth is the stack depth where we can find the origin of this call. +const klogv1OutputCallDepth = 6 + +// klogv1DefaultPrefixLength is the length of the log prefix that we have to strip out. +const klogv1DefaultPrefixLength = 53 + +// klogv1Writer is used in SetOutputBySeverity call below to redirect any calls +// to klogv1 to end up in klogv2. +// This is a hack to support klogv1 without use of go-kit/log. It is inspired +// by klog's upstream klogv1/v2 coexistence example: +// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go +type klogv1Writer struct{} + +// Write redirects klogv1 calls to klogv2. +// This is a hack to support klogv1 without use of go-kit/log. It is inspired +// by klog's upstream klogv1/v2 coexistence example: +// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go +func (kw klogv1Writer) Write(p []byte) (n int, err error) { + if len(p) < klogv1DefaultPrefixLength { + klogv2.InfoDepth(klogv1OutputCallDepth, string(p)) + return len(p), nil + } + + switch p[0] { + case 'I': + klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'W': + klogv2.WarningDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'E': + klogv2.ErrorDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'F': + klogv2.FatalDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + default: + klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + } + + return len(p), nil +} + var ( appName = "prometheus" @@ -103,6 +139,8 @@ var ( ) func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation prometheus.MustRegister(versioncollector.NewCollector(strings.ReplaceAll(appName, "-", "_"))) var err error @@ -135,24 +173,25 @@ func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagCla type flagConfig struct { configFile string - agentStoragePath string - serverStoragePath string - notifier notifier.Options - forGracePeriod model.Duration - outageTolerance model.Duration - resendDelay model.Duration - maxConcurrentEvals int64 - web web.Options - scrape scrape.Options - tsdb tsdbOptions - agent agentOptions - lookbackDelta model.Duration - webTimeout model.Duration - queryTimeout model.Duration - queryConcurrency int - queryMaxSamples int - RemoteFlushDeadline model.Duration - nameEscapingScheme string + agentStoragePath string + serverStoragePath string + notifier notifier.Options + forGracePeriod model.Duration + outageTolerance model.Duration + resendDelay model.Duration + maxConcurrentEvals int64 + web web.Options + scrape scrape.Options + tsdb tsdbOptions + agent agentOptions + lookbackDelta model.Duration + webTimeout model.Duration + queryTimeout model.Duration + queryConcurrency int + queryMaxSamples int + RemoteFlushDeadline model.Duration + nameEscapingScheme string + maxNotificationsSubscribers int enableAutoReload bool autoReloadInterval model.Duration @@ -161,110 +200,88 @@ type flagConfig struct { memlimitRatio float64 // These options are extracted from featureList // for ease of use. - enableExpandExternalLabels bool - enableNewSDManager bool - enablePerStepStats bool - enableAutoGOMAXPROCS bool - enableAutoGOMEMLIMIT bool - enableConcurrentRuleEval bool + enablePerStepStats bool + enableAutoGOMAXPROCS bool + enableAutoGOMEMLIMIT bool + enableConcurrentRuleEval bool prometheusURL string corsRegexString string - promlogConfig promlog.Config - promqlEnableDelayedNameRemoval bool + + promslogConfig promslog.Config } // setFeatureListOptions sets the corresponding options from the featureList. -func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { +func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { for _, f := range c.featureList { opts := strings.Split(f, ",") for _, o := range opts { switch o { - case "remote-write-receiver": - c.web.EnableRemoteWriteReceiver = true - level.Warn(logger).Log("msg", "Remote write receiver enabled via feature flag remote-write-receiver. This is DEPRECATED. Use --web.enable-remote-write-receiver.") - case "otlp-write-receiver": - c.web.EnableOTLPWriteReceiver = true - level.Info(logger).Log("msg", "Experimental OTLP write receiver enabled") - case "expand-external-labels": - c.enableExpandExternalLabels = true - level.Info(logger).Log("msg", "Experimental expand-external-labels enabled") case "exemplar-storage": c.tsdb.EnableExemplarStorage = true - level.Info(logger).Log("msg", "Experimental in-memory exemplar storage enabled") + logger.Info("Experimental in-memory exemplar storage enabled") case "memory-snapshot-on-shutdown": c.tsdb.EnableMemorySnapshotOnShutdown = true - level.Info(logger).Log("msg", "Experimental memory snapshot on shutdown enabled") + logger.Info("Experimental memory snapshot on shutdown enabled") case "extra-scrape-metrics": c.scrape.ExtraMetrics = true - level.Info(logger).Log("msg", "Experimental additional scrape metrics enabled") + logger.Info("Experimental additional scrape metrics enabled") case "metadata-wal-records": c.scrape.AppendMetadata = true - level.Info(logger).Log("msg", "Experimental metadata records in WAL enabled, required for remote write 2.0") - case "new-service-discovery-manager": - c.enableNewSDManager = true - level.Info(logger).Log("msg", "Experimental service discovery manager") - case "agent": - agentMode = true - level.Info(logger).Log("msg", "Experimental agent mode enabled.") + logger.Info("Experimental metadata records in WAL enabled, required for remote write 2.0") case "promql-per-step-stats": c.enablePerStepStats = true - level.Info(logger).Log("msg", "Experimental per-step statistics reporting") + logger.Info("Experimental per-step statistics reporting") case "auto-gomaxprocs": c.enableAutoGOMAXPROCS = true - level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota") + logger.Info("Automatically set GOMAXPROCS to match Linux container CPU quota") case "auto-reload-config": c.enableAutoReload = true if s := time.Duration(c.autoReloadInterval).Seconds(); s > 0 && s < 1 { c.autoReloadInterval, _ = model.ParseDuration("1s") } - level.Info(logger).Log("msg", fmt.Sprintf("Enabled automatic configuration file reloading. Checking for configuration changes every %s.", c.autoReloadInterval)) + logger.Info("Enabled automatic configuration file reloading. Checking for configuration changes every", "interval", c.autoReloadInterval) case "auto-gomemlimit": c.enableAutoGOMEMLIMIT = true - level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit") + logger.Info("Automatically set GOMEMLIMIT to match Linux container or system memory limit") case "concurrent-rule-eval": c.enableConcurrentRuleEval = true - level.Info(logger).Log("msg", "Experimental concurrent rule evaluation enabled.") - case "no-default-scrape-port": - c.scrape.NoDefaultPort = true - level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.") + logger.Info("Experimental concurrent rule evaluation enabled.") case "promql-experimental-functions": parser.EnableExperimentalFunctions = true - level.Info(logger).Log("msg", "Experimental PromQL functions enabled.") + logger.Info("Experimental PromQL functions enabled.") case "native-histograms": c.tsdb.EnableNativeHistograms = true c.scrape.EnableNativeHistogramsIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols - level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + logger.Info("Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) case "ooo-native-histograms": c.tsdb.EnableOOONativeHistograms = true - level.Info(logger).Log("msg", "Experimental out-of-order native histogram ingestion enabled. This will only take effect if OutOfOrderTimeWindow is > 0 and if EnableNativeHistograms = true") + logger.Info("Experimental out-of-order native histogram ingestion enabled. This will only take effect if OutOfOrderTimeWindow is > 0 and if EnableNativeHistograms = true") case "created-timestamp-zero-ingestion": c.scrape.EnableCreatedTimestampZeroIngestion = true c.web.EnableCreatedTimestampZeroIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols - level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + logger.Info("Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) case "delayed-compaction": c.tsdb.EnableDelayedCompaction = true - level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.") + logger.Info("Experimental delayed compaction is enabled.") case "promql-delayed-name-removal": c.promqlEnableDelayedNameRemoval = true - level.Info(logger).Log("msg", "Experimental PromQL delayed name removal enabled.") - case "utf8-names": - model.NameValidationScheme = model.UTF8Validation - level.Info(logger).Log("msg", "Experimental UTF-8 support enabled") + logger.Info("Experimental PromQL delayed name removal enabled.") case "": continue - case "promql-at-modifier", "promql-negative-offset": - level.Warn(logger).Log("msg", "This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", o) + case "old-ui": + c.web.UseOldUI = true + logger.Info("Serving previous version of the Prometheus web UI.") default: - level.Warn(logger).Log("msg", "Unknown option for --enable-feature", "option", o) + logger.Warn("Unknown option for --enable-feature", "option", o) } } } @@ -278,11 +295,6 @@ func main() { runtime.SetMutexProfileFraction(20) } - var ( - oldFlagRetentionDuration model.Duration - newFlagRetentionDuration model.Duration - ) - // Unregister the default GoCollector, and reregister with our defaults. if prometheus.Unregister(collectors.NewGoCollector()) { prometheus.MustRegister( @@ -303,7 +315,7 @@ func main() { Registerer: prometheus.DefaultRegisterer, Gatherer: prometheus.DefaultGatherer, }, - promlogConfig: promlog.Config{}, + promslogConfig: promslog.Config{}, } a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server").UsageWriter(os.Stdout) @@ -336,6 +348,9 @@ func main() { a.Flag("web.max-connections", "Maximum number of simultaneous connections across all listeners."). Default("512").IntVar(&cfg.web.MaxConnections) + a.Flag("web.max-notifications-subscribers", "Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close."). + Default("16").IntVar(&cfg.maxNotificationsSubscribers) + a.Flag("web.external-url", "The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically."). PlaceHolder("").StringVar(&cfg.prometheusURL) @@ -362,6 +377,9 @@ func main() { a.Flag("web.remote-write-receiver.accepted-protobuf-messages", fmt.Sprintf("List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: %v", supportedRemoteWriteProtoMsgs.String())). Default(supportedRemoteWriteProtoMsgs.Strings()...).SetValue(rwProtoMsgFlagValue(&cfg.web.AcceptRemoteWriteProtoMsgs)) + a.Flag("web.enable-otlp-receiver", "Enable API endpoint accepting OTLP write requests."). + Default("false").BoolVar(&cfg.web.EnableOTLPWriteReceiver) + a.Flag("web.console.templates", "Path to the console template directory, available at /consoles."). Default("consoles").StringVar(&cfg.web.ConsoleTemplatesPath) @@ -392,11 +410,8 @@ func main() { "Size at which to split the tsdb WAL segment files. Example: 100MB"). Hidden().PlaceHolder("").BytesVar(&cfg.tsdb.WALSegmentSize) - serverOnlyFlag(a, "storage.tsdb.retention", "[DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use \"storage.tsdb.retention.time\" instead."). - SetValue(&oldFlagRetentionDuration) - - serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms."). - SetValue(&newFlagRetentionDuration) + serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. If neither this flag nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms."). + SetValue(&cfg.tsdb.RetentionDuration) serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B."). BytesVar(&cfg.tsdb.MaxBytes) @@ -404,11 +419,6 @@ func main() { serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory."). Default("false").BoolVar(&cfg.tsdb.NoLockfile) - // TODO: Remove in Prometheus 3.0. - var b bool - serverOnlyFlag(a, "storage.tsdb.allow-overlapping-blocks", "[DEPRECATED] This flag has no effect. Overlapping blocks are enabled by default now."). - Default("true").Hidden().BoolVar(&b) - serverOnlyFlag(a, "storage.tsdb.allow-overlapping-compaction", "Allow compaction of overlapping blocks. If set to false, TSDB stops vertical compaction and leaves overlapping blocks there. The use case is to let another component handle the compaction of overlapping blocks."). Default("true").Hidden().BoolVar(&cfg.tsdb.EnableOverlappingCompaction) @@ -488,9 +498,6 @@ func main() { serverOnlyFlag(a, "alertmanager.drain-notification-queue-on-shutdown", "Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down."). Default("true").BoolVar(&cfg.notifier.DrainOnShutdown) - // TODO: Remove in Prometheus 3.0. - alertmanagerTimeout := a.Flag("alertmanager.timeout", "[DEPRECATED] This flag has no effect.").Hidden().String() - serverOnlyFlag(a, "query.lookback-delta", "The maximum lookback duration for retrieving metrics during expression evaluations and federation."). Default("5m").SetValue(&cfg.lookbackDelta) @@ -506,12 +513,12 @@ func main() { a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates."). Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval) - a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme) - - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, auto-reload-config, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) - promlogflag.AddFlags(a, &cfg.promlogConfig) + a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode) + + promslogflag.AddFlags(a, &cfg.promslogConfig) a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(ctx *kingpin.ParseContext) error { if err := documentcli.GenerateMarkdown(a.Model(), os.Stdout); err != nil { @@ -529,7 +536,13 @@ func main() { os.Exit(2) } - logger := promlog.New(&cfg.promlogConfig) + logger := promslog.New(&cfg.promslogConfig) + slog.SetDefault(logger) + + notifs := notifications.NewNotifications(cfg.maxNotificationsSubscribers, prometheus.DefaultRegisterer) + cfg.web.NotificationsSub = notifs.Sub + cfg.web.NotificationsGetter = notifs.Get + notifs.AddNotification(notifications.StartingUp) if err := cfg.setFeatureListOptions(logger); err != nil { fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing feature list: %w", err)) @@ -577,18 +590,14 @@ func main() { os.Exit(2) } - if *alertmanagerTimeout != "" { - level.Warn(logger).Log("msg", "The flag --alertmanager.timeout has no effect and will be removed in the future.") - } - // Throw error for invalid config before starting other components. var cfgFile *config.Config - if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, log.NewNopLogger()); err != nil { + if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil { absPath, pathErr := filepath.Abs(cfg.configFile) if pathErr != nil { absPath = cfg.configFile } - level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err) + logger.Error(fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err) os.Exit(2) } if _, err := cfgFile.GetScrapeConfigs(); err != nil { @@ -596,7 +605,7 @@ func main() { if pathErr != nil { absPath = cfg.configFile } - level.Error(logger).Log("msg", fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err) + logger.Error(fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err) os.Exit(2) } if cfg.tsdb.EnableExemplarStorage { @@ -627,20 +636,9 @@ func main() { cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") if !agentMode { - // Time retention settings. - if oldFlagRetentionDuration != 0 { - level.Warn(logger).Log("deprecation_notice", "'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead.") - cfg.tsdb.RetentionDuration = oldFlagRetentionDuration - } - - // When the new flag is set it takes precedence. - if newFlagRetentionDuration != 0 { - cfg.tsdb.RetentionDuration = newFlagRetentionDuration - } - if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 { cfg.tsdb.RetentionDuration = defaultRetentionDuration - level.Info(logger).Log("msg", "No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) + logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) } // Check for overflows. This limits our max retention to 100y. @@ -650,7 +648,7 @@ func main() { panic(err) } cfg.tsdb.RetentionDuration = y - level.Warn(logger).Log("msg", "Time retention value is too high. Limiting to: "+y.String()) + logger.Warn("Time retention value is too high. Limiting to: " + y.String()) } // Max block size settings. @@ -671,11 +669,8 @@ func main() { noStepSubqueryInterval := &safePromQLNoStepSubqueryInterval{} noStepSubqueryInterval.Set(config.DefaultGlobalConfig.EvaluationInterval) - // Above level 6, the k8s client would log bearer tokens in clear-text. - klog.ClampLevel(6) - klog.SetLogger(log.With(logger, "component", "k8s_client_runtime")) - klogv2.ClampLevel(6) - klogv2.SetLogger(log.With(logger, "component", "k8s_client_runtime")) + klogv2.SetSlogLogger(logger.With("component", "k8s_client_runtime")) + klog.SetOutputBySeverity("INFO", klogv1Writer{}) modeAppName := "Prometheus Server" mode := "server" @@ -684,20 +679,22 @@ func main() { mode = "agent" } - level.Info(logger).Log("msg", "Starting "+modeAppName, "mode", mode, "version", version.Info()) + logger.Info("Starting "+modeAppName, "mode", mode, "version", version.Info()) if bits.UintSize < 64 { - level.Warn(logger).Log("msg", "This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH) + logger.Warn("This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH) } - level.Info(logger).Log("build_context", version.BuildContext()) - level.Info(logger).Log("host_details", prom_runtime.Uname()) - level.Info(logger).Log("fd_limits", prom_runtime.FdLimits()) - level.Info(logger).Log("vm_limits", prom_runtime.VMLimits()) + logger.Info("operational information", + "build_context", version.BuildContext(), + "host_details", prom_runtime.Uname(), + "fd_limits", prom_runtime.FdLimits(), + "vm_limits", prom_runtime.VMLimits(), + ) var ( localStorage = &readyStorage{stats: tsdb.NewDBStats()} scraper = &readyScrapeManager{} - remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata) + remoteStorage = remote.NewStorage(logger.With("component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata) fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage) ) @@ -705,12 +702,12 @@ func main() { ctxWeb, cancelWeb = context.WithCancel(context.Background()) ctxRule = context.Background() - notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier")) + notifierManager = notifier.NewManager(&cfg.notifier, logger.With("component", "notifier")) ctxScrape, cancelScrape = context.WithCancel(context.Background()) ctxNotify, cancelNotify = context.WithCancel(context.Background()) - discoveryManagerScrape discoveryManager - discoveryManagerNotify discoveryManager + discoveryManagerScrape *discovery.Manager + discoveryManagerNotify *discovery.Manager ) // Kubernetes client metrics are used by Kubernetes SD. @@ -720,62 +717,37 @@ func main() { // they are not specific to an SD instance. err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer) if err != nil { - level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err) + logger.Error("failed to register Kubernetes client metrics", "err", err) os.Exit(1) } sdMetrics, err := discovery.CreateAndRegisterSDMetrics(prometheus.DefaultRegisterer) if err != nil { - level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err) + logger.Error("failed to register service discovery metrics", "err", err) os.Exit(1) } - if cfg.enableNewSDManager { - { - discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape")) - if discMgr == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager scrape") - os.Exit(1) - } - discoveryManagerScrape = discMgr - } - - { - discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify")) - if discMgr == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager notify") - os.Exit(1) - } - discoveryManagerNotify = discMgr - } - } else { - { - discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("scrape")) - if discMgr == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager scrape") - os.Exit(1) - } - discoveryManagerScrape = discMgr - } + discoveryManagerScrape = discovery.NewManager(ctxScrape, logger.With("component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape")) + if discoveryManagerScrape == nil { + logger.Error("failed to create a discovery manager scrape") + os.Exit(1) + } - { - discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("notify")) - if discMgr == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager notify") - os.Exit(1) - } - discoveryManagerNotify = discMgr - } + discoveryManagerNotify = discovery.NewManager(ctxNotify, logger.With("component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify")) + if discoveryManagerNotify == nil { + logger.Error("failed to create a discovery manager notify") + os.Exit(1) } scrapeManager, err := scrape.NewManager( &cfg.scrape, - log.With(logger, "component", "scrape manager"), + logger.With("component", "scrape manager"), + logging.NewJSONFileLogger, fanoutStorage, prometheus.DefaultRegisterer, ) if err != nil { - level.Error(logger).Log("msg", "failed to create a scrape manager", "err", err) + logger.Error("failed to create a scrape manager", "err", err) os.Exit(1) } @@ -788,10 +760,10 @@ func main() { if cfg.enableAutoGOMAXPROCS { l := func(format string, a ...interface{}) { - level.Info(logger).Log("component", "automaxprocs", "msg", fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...)) + logger.Info(fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...), "component", "automaxprocs") } if _, err := maxprocs.Set(maxprocs.Logger(l)); err != nil { - level.Warn(logger).Log("component", "automaxprocs", "msg", "Failed to set GOMAXPROCS automatically", "err", err) + logger.Warn("Failed to set GOMAXPROCS automatically", "component", "automaxprocs", "err", err) } } @@ -805,17 +777,17 @@ func main() { ), ), ); err != nil { - level.Warn(logger).Log("component", "automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err) + logger.Warn("automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err) } } if !agentMode { opts := promql.EngineOpts{ - Logger: log.With(logger, "component", "query engine"), + Logger: logger.With("component", "query engine"), Reg: prometheus.DefaultRegisterer, MaxSamples: cfg.queryMaxSamples, Timeout: time.Duration(cfg.queryTimeout), - ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, log.With(logger, "component", "activeQueryTracker")), + ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, logger.With("component", "activeQueryTracker")), LookbackDelta: time.Duration(cfg.lookbackDelta), NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get, // EnableAtModifier and EnableNegativeOffset have to be @@ -836,7 +808,7 @@ func main() { Context: ctxRule, ExternalURL: cfg.web.ExternalURL, Registerer: prometheus.DefaultRegisterer, - Logger: log.With(logger, "component", "rule manager"), + Logger: logger.With("component", "rule manager"), OutageTolerance: time.Duration(cfg.outageTolerance), ForGracePeriod: time.Duration(cfg.forGracePeriod), ResendDelay: time.Duration(cfg.resendDelay), @@ -887,7 +859,7 @@ func main() { } // Depends on cfg.web.ScrapeManager so needs to be after cfg.web.ScrapeManager = scrapeManager. - webHandler := web.New(log.With(logger, "component", "web"), &cfg.web) + webHandler := web.New(logger.With("component", "web"), &cfg.web) // Monitor outgoing connections on default transport with conntrack. http.DefaultTransport.(*http.Transport).DialContext = conntrack.NewDialContextFunc( @@ -1014,18 +986,18 @@ func main() { listeners, err := webHandler.Listeners() if err != nil { - level.Error(logger).Log("msg", "Unable to start web listeners", "err", err) + logger.Error("Unable to start web listener", "err", err) if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } os.Exit(1) } err = toolkit_web.Validate(*webConfig) if err != nil { - level.Error(logger).Log("msg", "Unable to validate web configuration file", "err", err) + logger.Error("Unable to validate web configuration file", "err", err) if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } os.Exit(1) } @@ -1041,21 +1013,22 @@ func main() { // Don't forget to release the reloadReady channel so that waiting blocks can exit normally. select { case sig := <-term: - level.Warn(logger).Log("msg", "Received an OS signal, exiting gracefully...", "signal", sig.String()) + logger.Warn("Received an OS signal, exiting gracefully...", "signal", sig.String()) reloadReady.Close() case <-webHandler.Quit(): - level.Warn(logger).Log("msg", "Received termination request via web service, exiting gracefully...") + logger.Warn("Received termination request via web service, exiting gracefully...") case <-cancel: reloadReady.Close() } if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } return nil }, func(err error) { close(cancel) - webHandler.SetReady(false) + webHandler.SetReady(web.Stopping) + notifs.AddNotification(notifications.ShuttingDown) }, ) } @@ -1064,11 +1037,11 @@ func main() { g.Add( func() error { err := discoveryManagerScrape.Run() - level.Info(logger).Log("msg", "Scrape discovery manager stopped") + logger.Info("Scrape discovery manager stopped") return err }, func(err error) { - level.Info(logger).Log("msg", "Stopping scrape discovery manager...") + logger.Info("Stopping scrape discovery manager...") cancelScrape() }, ) @@ -1078,11 +1051,11 @@ func main() { g.Add( func() error { err := discoveryManagerNotify.Run() - level.Info(logger).Log("msg", "Notify discovery manager stopped") + logger.Info("Notify discovery manager stopped") return err }, func(err error) { - level.Info(logger).Log("msg", "Stopping notify discovery manager...") + logger.Info("Stopping notify discovery manager...") cancelNotify() }, ) @@ -1111,7 +1084,7 @@ func main() { <-reloadReady.C err := scrapeManager.Run(discoveryManagerScrape.SyncCh()) - level.Info(logger).Log("msg", "Scrape manager stopped") + logger.Info("Scrape manager stopped") return err }, func(err error) { @@ -1119,7 +1092,7 @@ func main() { // so that it doesn't try to write samples to a closed storage. // We should also wait for rule manager to be fully stopped to ensure // we don't trigger any false positive alerts for rules using absent(). - level.Info(logger).Log("msg", "Stopping scrape manager...") + logger.Info("Stopping scrape manager...") scrapeManager.Stop() }, ) @@ -1150,8 +1123,16 @@ func main() { if cfg.enableAutoReload { checksum, err = config.GenerateChecksum(cfg.configFile) if err != nil { - level.Error(logger).Log("msg", "Failed to generate initial checksum for configuration file", "err", err) + logger.Error("Failed to generate initial checksum for configuration file", "err", err) + } + } + + callback := func(success bool) { + if success { + notifs.DeleteNotification(notifications.ConfigurationUnsuccessful) + return } + notifs.AddNotification(notifications.ConfigurationUnsuccessful) } g.Add( @@ -1161,18 +1142,18 @@ func main() { for { select { case <-hup: - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) } else if cfg.enableAutoReload { if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil { checksum = currentChecksum } else { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) + logger.Error("Failed to generate checksum during configuration reload", "err", err) } } case rc := <-webHandler.Reload(): - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) rc <- err } else { rc <- nil @@ -1180,7 +1161,7 @@ func main() { if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil { checksum = currentChecksum } else { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) + logger.Error("Failed to generate checksum during configuration reload", "err", err) } } } @@ -1190,16 +1171,14 @@ func main() { } currentChecksum, err := config.GenerateChecksum(cfg.configFile) if err != nil { - level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err) - continue - } - if currentChecksum == checksum { + logger.Error("Failed to generate checksum during configuration reload", "err", err) + } else if currentChecksum == checksum { continue } - level.Info(logger).Log("msg", "Configuration file change detected, reloading the configuration.") + logger.Info("Configuration file change detected, reloading the configuration.") - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) } else { checksum = currentChecksum } @@ -1228,14 +1207,15 @@ func main() { return nil } - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil { return fmt.Errorf("error loading config from %q: %w", cfg.configFile, err) } reloadReady.Close() - webHandler.SetReady(true) - level.Info(logger).Log("msg", "Server is ready to receive web requests.") + webHandler.SetReady(web.Ready) + notifs.DeleteNotification(notifications.StartingUp) + logger.Info("Server is ready to receive web requests.") <-cancel return nil }, @@ -1250,7 +1230,7 @@ func main() { cancel := make(chan struct{}) g.Add( func() error { - level.Info(logger).Log("msg", "Starting TSDB ...") + logger.Info("Starting TSDB ...") if cfg.tsdb.WALSegmentSize != 0 { if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 { return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB") @@ -1269,13 +1249,13 @@ func main() { switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": - level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") + logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) default: - level.Info(logger).Log("fs_type", fsType) + logger.Info("filesystem information", "fs_type", fsType) } - level.Info(logger).Log("msg", "TSDB started") - level.Debug(logger).Log("msg", "TSDB options", + logger.Info("TSDB started") + logger.Debug("TSDB options", "MinBlockDuration", cfg.tsdb.MinBlockDuration, "MaxBlockDuration", cfg.tsdb.MaxBlockDuration, "MaxBytes", cfg.tsdb.MaxBytes, @@ -1294,7 +1274,7 @@ func main() { }, func(err error) { if err := fanoutStorage.Close(); err != nil { - level.Error(logger).Log("msg", "Error stopping storage", "err", err) + logger.Error("Error stopping storage", "err", err) } close(cancel) }, @@ -1306,7 +1286,7 @@ func main() { cancel := make(chan struct{}) g.Add( func() error { - level.Info(logger).Log("msg", "Starting WAL storage ...") + logger.Info("Starting WAL storage ...") if cfg.agent.WALSegmentSize != 0 { if cfg.agent.WALSegmentSize < 10*1024*1024 || cfg.agent.WALSegmentSize > 256*1024*1024 { return errors.New("flag 'storage.agent.wal-segment-size' must be set between 10MB and 256MB") @@ -1325,13 +1305,13 @@ func main() { switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": - level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") + logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") default: - level.Info(logger).Log("fs_type", fsType) + logger.Info(fsType) } - level.Info(logger).Log("msg", "Agent WAL storage started") - level.Debug(logger).Log("msg", "Agent WAL storage options", + logger.Info("Agent WAL storage started") + logger.Debug("Agent WAL storage options", "WALSegmentSize", cfg.agent.WALSegmentSize, "WALCompression", cfg.agent.WALCompression, "StripeSize", cfg.agent.StripeSize, @@ -1349,7 +1329,7 @@ func main() { }, func(e error) { if err := fanoutStorage.Close(); err != nil { - level.Error(logger).Log("msg", "Error stopping storage", "err", err) + logger.Error("Error stopping storage", "err", err) } close(cancel) }, @@ -1383,7 +1363,7 @@ func main() { <-reloadReady.C notifierManager.Run(discoveryManagerNotify.SyncCh()) - level.Info(logger).Log("msg", "Notifier manager stopped") + logger.Info("Notifier manager stopped") return nil }, func(err error) { @@ -1392,16 +1372,16 @@ func main() { ) } if err := g.Run(); err != nil { - level.Error(logger).Log("err", err) + logger.Error("Error running goroutines from run.Group", "err", err) os.Exit(1) } - level.Info(logger).Log("msg", "See you next time!") + logger.Info("See you next time!") } -func openDBWithMetrics(dir string, logger log.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) { +func openDBWithMetrics(dir string, logger *slog.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) { db, err := tsdb.Open( dir, - log.With(logger, "component", "tsdb"), + logger.With("component", "tsdb"), reg, opts, stats, @@ -1454,21 +1434,23 @@ type reloader struct { reloader func(*config.Config) error } -func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage bool, logger log.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, rls ...reloader) (err error) { +func reloadConfig(filename string, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) { start := time.Now() - timings := []interface{}{} - level.Info(logger).Log("msg", "Loading configuration file", "filename", filename) + timingsLogger := logger + logger.Info("Loading configuration file", "filename", filename) defer func() { if err == nil { configSuccess.Set(1) configSuccessTime.SetToCurrentTime() + callback(true) } else { configSuccess.Set(0) + callback(false) } }() - conf, err := config.LoadFile(filename, agentMode, expandExternalLabels, logger) + conf, err := config.LoadFile(filename, agentMode, logger) if err != nil { return fmt.Errorf("couldn't load configuration (--config.file=%q): %w", filename, err) } @@ -1483,10 +1465,10 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b for _, rl := range rls { rstart := time.Now() if err := rl.reloader(conf); err != nil { - level.Error(logger).Log("msg", "Failed to apply configuration", "err", err) + logger.Error("Failed to apply configuration", "err", err) failed = true } - timings = append(timings, rl.name, time.Since(rstart)) + timingsLogger = timingsLogger.With((rl.name), time.Since(rstart)) } if failed { return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) @@ -1494,7 +1476,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b oldGoGC := debug.SetGCPercent(conf.Runtime.GoGC) if oldGoGC != conf.Runtime.GoGC { - level.Info(logger).Log("msg", "updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC) + logger.Info("updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC) } // Write the new setting out to the ENV var for runtime API output. if conf.Runtime.GoGC >= 0 { @@ -1504,8 +1486,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b } noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval) - l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)} - level.Info(logger).Log(append(l, timings...)...) + timingsLogger.Info("Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)) return nil } @@ -1659,6 +1640,9 @@ func (s *readyStorage) Appender(ctx context.Context) storage.Appender { type notReadyAppender struct{} +// SetOptions does nothing in this appender implementation. +func (n notReadyAppender) SetOptions(opts *storage.AppendOptions) {} + func (n notReadyAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { return 0, tsdb.ErrNotReady } @@ -1671,6 +1655,10 @@ func (n notReadyAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels return 0, tsdb.ErrNotReady } +func (n notReadyAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + return 0, tsdb.ErrNotReady +} + func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { return 0, tsdb.ErrNotReady } @@ -1866,15 +1854,6 @@ func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Option } } -// discoveryManager interfaces the discovery manager. This is used to keep using -// the manager that restarts SD's on reload for a few releases until we feel -// the new manager can be enabled for all users. -type discoveryManager interface { - ApplyConfig(cfg map[string]discovery.Configs) error - Run() error - SyncCh() <-chan map[string][]*targetgroup.Group -} - // rwProtoMsgFlagParser is a custom parser for config.RemoteWriteProtoMsg enum. type rwProtoMsgFlagParser struct { msgs *[]config.RemoteWriteProtoMsg diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index c827812e60..4bd1c71b2d 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -31,9 +31,9 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -42,6 +42,11 @@ import ( "github.com/prometheus/prometheus/rules" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + const startupTime = 10 * time.Second var ( @@ -120,6 +125,7 @@ func TestFailedStartupExitCode(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() fakeInputFile := "fake-input-file" expectedExitStatus := 2 @@ -206,83 +212,125 @@ func TestWALSegmentSizeBounds(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() - - err = prom.Start() - require.NoError(t, err) - - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "9MB", + exitCode: 1, + }, + { + size: "257MB", + exitCode: 1, + }, + { + size: "10", + exitCode: 2, + }, + { + size: "1GB", + exitCode: 1, + }, + { + size: "12MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() + + err = prom.Start() + require.NoError(t, err) + + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) { - t.Parallel() - if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() + + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "512KB", + exitCode: 1, + }, + { + size: "1MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - for size, expectedExitStatus := range map[string]int{"512KB": 1, "1MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() - - err = prom.Start() - require.NoError(t, err) - - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() + + err = prom.Start() + require.NoError(t, err) + + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } @@ -290,7 +338,7 @@ func TestTimeMetrics(t *testing.T) { tmpDir := t.TempDir() reg := prometheus.NewRegistry() - db, err := openDBWithMetrics(tmpDir, log.NewNopLogger(), reg, nil, nil) + db, err := openDBWithMetrics(tmpDir, promslog.NewNopLogger(), reg, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) @@ -348,7 +396,9 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames } func TestAgentSuccessfulStartup(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) + t.Parallel() + + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) require.NoError(t, prom.Start()) actualExitStatus := 0 @@ -366,7 +416,9 @@ func TestAgentSuccessfulStartup(t *testing.T) { } func TestAgentFailedStartupWithServerFlag(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) + t.Parallel() + + prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) output := bytes.Buffer{} prom.Stderr = &output @@ -393,7 +445,9 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) { } func TestAgentFailedStartupWithInvalidConfig(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) + t.Parallel() + + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) require.NoError(t, prom.Start()) actualExitStatus := 0 @@ -414,6 +468,7 @@ func TestModeSpecificFlags(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() testcases := []struct { mode string @@ -428,10 +483,11 @@ func TestModeSpecificFlags(t *testing.T) { for _, tc := range testcases { t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) { + t.Parallel() args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"} if tc.mode == "agent" { - args = append(args, "--enable-feature=agent", "--config.file="+agentConfig) + args = append(args, "--agent", "--config.file="+agentConfig) } else { args = append(args, "--config.file="+promConfig) } @@ -479,6 +535,8 @@ func TestDocumentation(t *testing.T) { if runtime.GOOS == "windows" { t.SkipNow() } + t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -503,6 +561,8 @@ func TestDocumentation(t *testing.T) { } func TestRwProtoMsgFlagParser(t *testing.T) { + t.Parallel() + defaultOpts := config.RemoteWriteProtoMsgs{ config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2, } diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go index 2011fb123f..94eec27e79 100644 --- a/cmd/prometheus/main_unix_test.go +++ b/cmd/prometheus/main_unix_test.go @@ -34,6 +34,7 @@ func TestStartupInterrupt(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index 62e317bf8b..25abf5e965 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -125,12 +125,61 @@ func (p *queryLogTest) query(t *testing.T) { require.NoError(t, err) require.Equal(t, 200, r.StatusCode) case ruleOrigin: - time.Sleep(2 * time.Second) + // Poll the /api/v1/rules endpoint until a new rule evaluation is detected. + var lastEvalTime time.Time + for { + r, err := http.Get(fmt.Sprintf("http://%s:%d/api/v1/rules", p.host, p.port)) + require.NoError(t, err) + + rulesBody, err := io.ReadAll(r.Body) + require.NoError(t, err) + defer r.Body.Close() + + // Parse the rules response to find the last evaluation time. + newEvalTime := parseLastEvaluation(rulesBody) + if newEvalTime.After(lastEvalTime) { + if !lastEvalTime.IsZero() { + break + } + lastEvalTime = newEvalTime + } + + time.Sleep(100 * time.Millisecond) + } default: panic("can't query this origin") } } +// parseLastEvaluation extracts the last evaluation timestamp from the /api/v1/rules response. +func parseLastEvaluation(rulesBody []byte) time.Time { + var ruleResponse struct { + Status string `json:"status"` + Data struct { + Groups []struct { + Rules []struct { + LastEvaluation string `json:"lastEvaluation"` + } `json:"rules"` + } `json:"groups"` + } `json:"data"` + } + + err := json.Unmarshal(rulesBody, &ruleResponse) + if err != nil { + return time.Time{} + } + + for _, group := range ruleResponse.Data.Groups { + for _, rule := range group.Rules { + if evalTime, err := time.Parse(time.RFC3339Nano, rule.LastEvaluation); err == nil { + return evalTime + } + } + } + + return time.Time{} +} + // queryString returns the expected queryString of a this test. func (p *queryLogTest) queryString() string { switch p.origin { @@ -322,7 +371,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Len(t, ql, qc) } else { - require.Greater(t, len(ql), qc, "no queries logged") + require.GreaterOrEqual(t, len(ql), qc, "no queries logged") } p.validateLastQuery(t, ql) qc = len(ql) @@ -353,7 +402,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Len(t, ql, qc) } else { - require.Greater(t, len(ql), qc, "no queries logged") + require.GreaterOrEqual(t, len(ql), qc, "no queries logged") } p.validateLastQuery(t, ql) @@ -393,6 +442,7 @@ func readQueryLog(t *testing.T, path string) []queryLogLine { file, err := os.Open(path) require.NoError(t, err) defer file.Close() + scanner := bufio.NewScanner(file) for scanner.Scan() { var q queryLogLine @@ -406,6 +456,7 @@ func TestQueryLog(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() cwd, err := os.Getwd() require.NoError(t, err) @@ -424,6 +475,7 @@ func TestQueryLog(t *testing.T) { } t.Run(p.String(), func(t *testing.T) { + t.Parallel() p.run(t) }) } diff --git a/cmd/prometheus/scrape_failure_log_test.go b/cmd/prometheus/scrape_failure_log_test.go new file mode 100644 index 0000000000..8d86d719f9 --- /dev/null +++ b/cmd/prometheus/scrape_failure_log_test.go @@ -0,0 +1,193 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.uber.org/atomic" + + "github.com/prometheus/prometheus/util/testutil" +) + +func TestScrapeFailureLogFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + // Tracks the number of requests made to the mock server. + var requestCount atomic.Int32 + + // Starts a server that always returns HTTP 500 errors. + mockServerAddress := startGarbageServer(t, &requestCount) + + // Create a temporary directory for Prometheus configuration and logs. + tempDir := t.TempDir() + + // Define file paths for the scrape failure log and Prometheus configuration. + // Like other files, the scrape failure log file should be relative to the + // config file. Therefore, we split the name we put in the file and the full + // path used to check the content of the file. + scrapeFailureLogFileName := "scrape_failure.log" + scrapeFailureLogFile := filepath.Join(tempDir, scrapeFailureLogFileName) + promConfigFile := filepath.Join(tempDir, "prometheus.yml") + + // Step 1: Set up an initial Prometheus configuration that globally + // specifies a scrape failure log file. + promConfig := fmt.Sprintf(` +global: + scrape_interval: 500ms + scrape_failure_log_file: %s + +scrape_configs: + - job_name: 'test_job' + static_configs: + - targets: ['%s'] +`, scrapeFailureLogFileName, mockServerAddress) + + err := os.WriteFile(promConfigFile, []byte(promConfig), 0o644) + require.NoError(t, err, "Failed to write Prometheus configuration file") + + // Start Prometheus with the generated configuration and a random port, enabling the lifecycle API. + port := testutil.RandomUnprivilegedPort(t) + params := []string{ + "-test.main", + "--config.file=" + promConfigFile, + "--storage.tsdb.path=" + filepath.Join(tempDir, "data"), + fmt.Sprintf("--web.listen-address=127.0.0.1:%d", port), + "--web.enable-lifecycle", + } + prometheusProcess := exec.Command(promPath, params...) + prometheusProcess.Stdout = os.Stdout + prometheusProcess.Stderr = os.Stderr + + err = prometheusProcess.Start() + require.NoError(t, err, "Failed to start Prometheus") + defer prometheusProcess.Process.Kill() + + // Wait until the mock server receives at least two requests from Prometheus. + require.Eventually(t, func() bool { + return requestCount.Load() >= 2 + }, 30*time.Second, 500*time.Millisecond, "Expected at least two requests to the mock server") + + // Verify that the scrape failures have been logged to the specified file. + content, err := os.ReadFile(scrapeFailureLogFile) + require.NoError(t, err, "Failed to read scrape failure log") + require.Contains(t, string(content), "server returned HTTP status 500 Internal Server Error", "Expected scrape failure log entry not found") + + // Step 2: Update the Prometheus configuration to remove the scrape failure + // log file setting. + promConfig = fmt.Sprintf(` +global: + scrape_interval: 1s + +scrape_configs: + - job_name: 'test_job' + static_configs: + - targets: ['%s'] +`, mockServerAddress) + + err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644) + require.NoError(t, err, "Failed to update Prometheus configuration file") + + // Reload Prometheus with the updated configuration. + reloadPrometheus(t, port) + + // Count the number of lines in the scrape failure log file before any + // further requests. + preReloadLogLineCount := countLinesInFile(scrapeFailureLogFile) + + // Wait for at least two more requests to the mock server to ensure + // Prometheus continues scraping. + requestsBeforeReload := requestCount.Load() + require.Eventually(t, func() bool { + return requestCount.Load() >= requestsBeforeReload+2 + }, 30*time.Second, 500*time.Millisecond, "Expected two more requests to the mock server after configuration reload") + + // Ensure that no new lines were added to the scrape failure log file after + // the configuration change. + require.Equal(t, preReloadLogLineCount, countLinesInFile(scrapeFailureLogFile), "No new lines should be added to the scrape failure log file after removing the log setting") + + // Step 3: Re-add the scrape failure log file setting, but this time under + // scrape_configs, and reload Prometheus. + promConfig = fmt.Sprintf(` +global: + scrape_interval: 1s + +scrape_configs: + - job_name: 'test_job' + scrape_failure_log_file: %s + static_configs: + - targets: ['%s'] +`, scrapeFailureLogFileName, mockServerAddress) + + err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644) + require.NoError(t, err, "Failed to update Prometheus configuration file") + + // Reload Prometheus with the updated configuration. + reloadPrometheus(t, port) + + // Wait for at least two more requests to the mock server and verify that + // new log entries are created. + postReloadLogLineCount := countLinesInFile(scrapeFailureLogFile) + requestsBeforeReAddingLog := requestCount.Load() + require.Eventually(t, func() bool { + return requestCount.Load() >= requestsBeforeReAddingLog+2 + }, 30*time.Second, 500*time.Millisecond, "Expected two additional requests after re-adding the log setting") + + // Confirm that new lines were added to the scrape failure log file. + require.Greater(t, countLinesInFile(scrapeFailureLogFile), postReloadLogLineCount, "New lines should be added to the scrape failure log file after re-adding the log setting") +} + +// reloadPrometheus sends a reload request to the Prometheus server to apply +// updated configurations. +func reloadPrometheus(t *testing.T, port int) { + resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d/-/reload", port), "", nil) + require.NoError(t, err, "Failed to reload Prometheus") + require.Equal(t, http.StatusOK, resp.StatusCode, "Unexpected status code when reloading Prometheus") +} + +// startGarbageServer sets up a mock server that returns a 500 Internal Server Error +// for all requests. It also increments the request count each time it's hit. +func startGarbageServer(t *testing.T, requestCount *atomic.Int32) string { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestCount.Inc() + w.WriteHeader(http.StatusInternalServerError) + })) + t.Cleanup(server.Close) + + parsedURL, err := url.Parse(server.URL) + require.NoError(t, err, "Failed to parse mock server URL") + + return parsedURL.Host +} + +// countLinesInFile counts and returns the number of lines in the specified file. +func countLinesInFile(filePath string) int { + data, err := os.ReadFile(filePath) + if err != nil { + return 0 // Return 0 if the file doesn't exist or can't be read. + } + return bytes.Count(data, []byte{'\n'}) +} diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 16491f0416..1408975df9 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -21,9 +21,10 @@ import ( "math" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/tsdb" @@ -120,7 +121,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn // also need to append samples throughout the whole block range. To allow that, we // pretend that the block is twice as large here, but only really add sample in the // original interval later. - w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, 2*blockDuration) + w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), outputDir, 2*blockDuration) if err != nil { return fmt.Errorf("block writer: %w", err) } diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index d1b6c0fcd9..49676ee5c4 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -32,13 +32,13 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" "github.com/google/pprof/profile" "github.com/prometheus/client_golang/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil/promlint" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/exporter-toolkit/web" "gopkg.in/yaml.v2" @@ -58,10 +58,16 @@ import ( _ "github.com/prometheus/prometheus/plugins" // Register plugins. "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/promqltest" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/util/documentcli" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + const ( successExitCode = 0 failureExitCode = 1 @@ -211,6 +217,7 @@ func main() { "test-rule-file", "The unit test file.", ).Required().ExistingFiles() + testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool() testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool() defaultDBPath := "data/" @@ -286,7 +293,7 @@ func main() { promQLLabelsDeleteQuery := promQLLabelsDeleteCmd.Arg("query", "PromQL query.").Required().String() promQLLabelsDeleteName := promQLLabelsDeleteCmd.Arg("name", "Name of the label to delete.").Required().String() - featureList := app.Flag("enable-feature", "Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details.").Default("").Strings() + featureList := app.Flag("enable-feature", "Comma separated feature names to enable. Currently unused.").Default("").Strings() documentationCmd := app.Command("write-documentation", "Generate command line documentation. Internal use.").Hidden() @@ -316,26 +323,21 @@ func main() { } } - var noDefaultScrapePort bool for _, f := range *featureList { opts := strings.Split(f, ",") for _, o := range opts { switch o { - case "no-default-scrape-port": - noDefaultScrapePort = true case "": continue - case "promql-at-modifier", "promql-negative-offset": - fmt.Printf(" WARNING: Option for --enable-feature is a no-op after promotion to a stable feature: %q\n", o) default: - fmt.Printf(" WARNING: Unknown option for --enable-feature: %q\n", o) + fmt.Printf(" WARNING: --enable-feature is currently a no-op") } } } switch parsedCmd { case sdCheckCmd.FullCommand(): - os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer)) + os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer)) case checkConfigCmd.FullCommand(): os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...)) @@ -391,6 +393,7 @@ func main() { }, *testRulesRun, *testRulesDiff, + *testRulesDebug, *testRulesFiles...), ) @@ -575,7 +578,7 @@ func checkFileExists(fn string) error { func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) { fmt.Println("Checking", filename) - cfg, err := config.LoadFile(filename, agentMode, false, log.NewNopLogger()) + cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger()) if err != nil { return nil, err } @@ -895,30 +898,30 @@ func compare(a, b compareRuleType) int { func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType { var duplicates []compareRuleType - var rules compareRuleTypes + var cRules compareRuleTypes for _, group := range groups { for _, rule := range group.Rules { - rules = append(rules, compareRuleType{ + cRules = append(cRules, compareRuleType{ metric: ruleMetric(rule), - label: labels.FromMap(rule.Labels), + label: rules.FromMaps(group.Labels, rule.Labels), }) } } - if len(rules) < 2 { + if len(cRules) < 2 { return duplicates } - sort.Sort(rules) + sort.Sort(cRules) - last := rules[0] - for i := 1; i < len(rules); i++ { - if compare(last, rules[i]) == 0 { + last := cRules[0] + for i := 1; i < len(cRules); i++ { + if compare(last, cRules[i]) == 0 { // Don't add a duplicated rule multiple times. if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 { - duplicates = append(duplicates, rules[i]) + duplicates = append(duplicates, cRules[i]) } } - last = rules[i] + last = cRules[i] } return duplicates @@ -1182,7 +1185,7 @@ func importRules(url *url.URL, roundTripper http.RoundTripper, start, end, outpu return fmt.Errorf("new api client error: %w", err) } - ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, api) + ruleImporter := newRuleImporter(promslog.New(&promslog.Config{}), cfg, api) errs := ruleImporter.loadGroups(ctx, files) for _, err := range errs { if err != nil { @@ -1216,7 +1219,7 @@ func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *c lb := labels.NewBuilder(labels.EmptyLabels()) for _, tg := range targetGroups { var failures []error - targets, failures = scrape.TargetsFromGroup(tg, scfg, false, targets, lb) + targets, failures = scrape.TargetsFromGroup(tg, scfg, targets, lb) if len(failures) > 0 { first := failures[0] return first diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 2b086851f3..698e6641d1 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -31,6 +31,7 @@ import ( "testing" "time" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" @@ -38,6 +39,11 @@ import ( "github.com/prometheus/prometheus/promql/promqltest" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + var promtoolPath = os.Args[0] func TestMain(m *testing.M) { @@ -140,7 +146,7 @@ func TestCheckSDFile(t *testing.T) { t.Run(test.name, func(t *testing.T) { _, err := checkSDFile(test.file) if test.err != "" { - require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -222,7 +228,7 @@ func TestCheckTargetConfig(t *testing.T) { t.Run(test.name, func(t *testing.T) { _, err := checkConfig(false, "testdata/"+test.file, false) if test.err != "" { - require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -309,7 +315,7 @@ func TestCheckConfigSyntax(t *testing.T) { expectedErrMsg = test.errWindows } if expectedErrMsg != "" { - require.Equalf(t, expectedErrMsg, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, expectedErrMsg, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -339,7 +345,7 @@ func TestAuthorizationConfig(t *testing.T) { t.Run(test.name, func(t *testing.T) { _, err := checkConfig(false, "testdata/"+test.file, false) if test.err != "" { - require.Contains(t, err.Error(), test.err, "Expected error to contain %q, got %q", test.err, err.Error()) + require.ErrorContains(t, err, test.err, "Expected error to contain %q, got %q", test.err, err.Error()) return } require.NoError(t, err) diff --git a/cmd/promtool/rules.go b/cmd/promtool/rules.go index 5a18644842..adb214b812 100644 --- a/cmd/promtool/rules.go +++ b/cmd/promtool/rules.go @@ -16,12 +16,12 @@ package main import ( "context" "fmt" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" @@ -38,7 +38,7 @@ type queryRangeAPI interface { } type ruleImporter struct { - logger log.Logger + logger *slog.Logger config ruleImporterConfig apiClient queryRangeAPI @@ -57,8 +57,8 @@ type ruleImporterConfig struct { // newRuleImporter creates a new rule importer that can be used to parse and evaluate recording rule files and create new series // written to disk in blocks. -func newRuleImporter(logger log.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter { - level.Info(logger).Log("backfiller", "new rule importer", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822)) +func newRuleImporter(logger *slog.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter { + logger.Info("new rule importer", "component", "backfiller", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822)) return &ruleImporter{ logger: logger, config: config, @@ -80,10 +80,10 @@ func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string) // importAll evaluates all the recording rules and creates new time series and writes them to disk in blocks. func (importer *ruleImporter) importAll(ctx context.Context) (errs []error) { for name, group := range importer.groups { - level.Info(importer.logger).Log("backfiller", "processing group", "name", name) + importer.logger.Info("processing group", "component", "backfiller", "name", name) for i, r := range group.Rules() { - level.Info(importer.logger).Log("backfiller", "processing rule", "id", i, "name", r.Name()) + importer.logger.Info("processing rule", "component", "backfiller", "id", i, "name", r.Name()) if err := importer.importRule(ctx, r.Query().String(), r.Name(), r.Labels(), importer.config.start, importer.config.end, int64(importer.config.maxBlockDuration/time.Millisecond), group); err != nil { errs = append(errs, err) } @@ -124,7 +124,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName return fmt.Errorf("query range: %w", err) } if warnings != nil { - level.Warn(importer.logger).Log("msg", "Range query returned warnings.", "warnings", warnings) + importer.logger.Warn("Range query returned warnings.", "warnings", warnings) } // To prevent races with compaction, a block writer only allows appending samples @@ -133,7 +133,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName // also need to append samples throughout the whole block range. To allow that, we // pretend that the block is twice as large here, but only really add sample in the // original interval later. - w, err := tsdb.NewBlockWriter(log.NewNopLogger(), importer.config.outputDir, 2*blockDuration) + w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), importer.config.outputDir, 2*blockDuration) if err != nil { return fmt.Errorf("new block writer: %w", err) } diff --git a/cmd/promtool/rules_test.go b/cmd/promtool/rules_test.go index d55fb0c896..94e28e570d 100644 --- a/cmd/promtool/rules_test.go +++ b/cmd/promtool/rules_test.go @@ -21,9 +21,9 @@ import ( "testing" "time" - "github.com/go-kit/log" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" @@ -161,7 +161,7 @@ func TestBackfillRuleIntegration(t *testing.T) { } func newTestRuleImporter(_ context.Context, start time.Time, tmpDir string, testSamples model.Matrix, maxBlockDuration time.Duration) (*ruleImporter, error) { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() cfg := ruleImporterConfig{ outputDir: tmpDir, start: start.Add(-10 * time.Hour), diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go index e65262d439..5e005bca8b 100644 --- a/cmd/promtool/sd.go +++ b/cmd/promtool/sd.go @@ -20,9 +20,9 @@ import ( "os" "time" - "github.com/go-kit/log" "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" @@ -38,10 +38,10 @@ type sdCheckResult struct { } // CheckSD performs service discovery for the given job name and reports the results. -func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) +func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, registerer prometheus.Registerer) int { + logger := promslog.New(&promslog.Config{}) - cfg, err := config.LoadFile(sdConfigFiles, false, false, logger) + cfg, err := config.LoadFile(sdConfigFiles, false, logger) if err != nil { fmt.Fprintln(os.Stderr, "Cannot load config", err) return failureExitCode @@ -114,7 +114,7 @@ outerLoop: } results := []sdCheckResult{} for _, tgs := range sdCheckResults { - results = append(results, getSDCheckResult(tgs, scrapeConfig, noDefaultScrapePort)...) + results = append(results, getSDCheckResult(tgs, scrapeConfig)...) } res, err := json.MarshalIndent(results, "", " ") @@ -127,7 +127,7 @@ outerLoop: return successExitCode } -func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig, noDefaultScrapePort bool) []sdCheckResult { +func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig) []sdCheckResult { sdCheckResults := []sdCheckResult{} lb := labels.NewBuilder(labels.EmptyLabels()) for _, targetGroup := range targetGroups { @@ -144,7 +144,7 @@ func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.Sc } } - res, orig, err := scrape.PopulateLabels(lb, scrapeConfig, noDefaultScrapePort) + res, orig, err := scrape.PopulateLabels(lb, scrapeConfig) result := sdCheckResult{ DiscoveredLabels: orig, Labels: res, diff --git a/cmd/promtool/sd_test.go b/cmd/promtool/sd_test.go index cb65ee72aa..44d8084651 100644 --- a/cmd/promtool/sd_test.go +++ b/cmd/promtool/sd_test.go @@ -70,5 +70,5 @@ func TestSDCheckResult(t *testing.T) { }, } - testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig, true)) + testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig)) } diff --git a/cmd/promtool/testdata/config_with_service_discovery_files.yml b/cmd/promtool/testdata/config_with_service_discovery_files.yml index 13b6d7faff..6a550a8403 100644 --- a/cmd/promtool/testdata/config_with_service_discovery_files.yml +++ b/cmd/promtool/testdata/config_with_service_discovery_files.yml @@ -6,7 +6,7 @@ scrape_configs: alerting: alertmanagers: - scheme: http - api_version: v1 + api_version: v2 file_sd_configs: - files: - nonexistent_file.yml diff --git a/cmd/promtool/testdata/unittest.yml b/cmd/promtool/testdata/unittest.yml index d6224d785f..e2a8230902 100644 --- a/cmd/promtool/testdata/unittest.yml +++ b/cmd/promtool/testdata/unittest.yml @@ -89,11 +89,11 @@ tests: # Ensure lookback delta is respected, when a value is missing. - expr: timestamp(test_missing) - eval_time: 5m + eval_time: 4m59s exp_samples: - value: 0 - expr: timestamp(test_missing) - eval_time: 5m1s + eval_time: 5m exp_samples: [] # Minimal test case to check edge case of a single sample. @@ -113,7 +113,7 @@ tests: - expr: count_over_time(fixed_data[1h]) eval_time: 1h exp_samples: - - value: 61 + - value: 60 - expr: timestamp(fixed_data) eval_time: 1h exp_samples: @@ -183,7 +183,7 @@ tests: - expr: job:test:count_over_time1m eval_time: 1m exp_samples: - - value: 61 + - value: 60 labels: 'job:test:count_over_time1m{job="test"}' - expr: timestamp(job:test:count_over_time1m) eval_time: 1m10s @@ -194,7 +194,7 @@ tests: - expr: job:test:count_over_time1m eval_time: 2m exp_samples: - - value: 61 + - value: 60 labels: 'job:test:count_over_time1m{job="test"}' - expr: timestamp(job:test:count_over_time1m) eval_time: 2m59s999ms diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 971ea8ab00..727275aa6b 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "runtime" @@ -32,9 +33,10 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" "go.uber.org/atomic" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/storage" @@ -60,7 +62,7 @@ type writeBenchmark struct { memprof *os.File blockprof *os.File mtxprof *os.File - logger log.Logger + logger *slog.Logger } func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) error { @@ -68,7 +70,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err outPath: outPath, samplesFile: samplesFile, numMetrics: numMetrics, - logger: log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), + logger: promslog.New(&promslog.Config{}), } if b.outPath == "" { dir, err := os.MkdirTemp("", "tsdb_bench") @@ -87,9 +89,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err dir := filepath.Join(b.outPath, "storage") - l := log.With(b.logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller) - - st, err := tsdb.Open(dir, l, nil, &tsdb.Options{ + st, err := tsdb.Open(dir, b.logger, nil, &tsdb.Options{ RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond), MinBlockDuration: int64(2 * time.Hour / time.Millisecond), }, tsdb.NewDBStats()) @@ -367,25 +367,25 @@ func printBlocks(blocks []tsdb.BlockReader, writeHeader, humanReadable bool) { fmt.Fprintf(tw, "%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n", meta.ULID, - getFormatedTime(meta.MinTime, humanReadable), - getFormatedTime(meta.MaxTime, humanReadable), + getFormattedTime(meta.MinTime, humanReadable), + getFormattedTime(meta.MaxTime, humanReadable), time.Duration(meta.MaxTime-meta.MinTime)*time.Millisecond, meta.Stats.NumSamples, meta.Stats.NumChunks, meta.Stats.NumSeries, - getFormatedBytes(b.Size(), humanReadable), + getFormattedBytes(b.Size(), humanReadable), ) } } -func getFormatedTime(timestamp int64, humanReadable bool) string { +func getFormattedTime(timestamp int64, humanReadable bool) string { if humanReadable { return time.Unix(timestamp/1000, 0).UTC().String() } return strconv.FormatInt(timestamp, 10) } -func getFormatedBytes(bytes int64, humanReadable bool) string { +func getFormattedBytes(bytes int64, humanReadable bool) string { if humanReadable { return units.Base2Bytes(bytes).String() } diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 7030635d1c..78dacdc569 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -26,13 +26,13 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/google/go-cmp/cmp" "github.com/grafana/regexp" "github.com/nsf/jsondiff" "gopkg.in/yaml.v2" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -46,11 +46,11 @@ import ( // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { - return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, files...) +func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { + return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, files...) } -func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { +func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { failed := false junit := &junitxml.JUnitXML{} @@ -60,7 +60,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, } for _, f := range files { - if errs := ruleUnitTest(f, queryOpts, run, diffFlag, junit.Suite(f)); errs != nil { + if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, junit.Suite(f)); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) @@ -82,7 +82,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, return successExitCode } -func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool, ts *junitxml.TestSuite) []error { +func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug bool, ts *junitxml.TestSuite) []error { b, err := os.ReadFile(filename) if err != nil { ts.Abort(err) @@ -131,7 +131,7 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg if t.Interval == 0 { t.Interval = unitTestInp.EvaluationInterval } - ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...) + ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, unitTestInp.RuleFiles...) if ers != nil { for _, e := range ers { tc.Fail(e.Error()) @@ -198,7 +198,14 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { +func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug bool, ruleFiles ...string) (outErr []error) { + if debug { + testStart := time.Now() + fmt.Printf("DEBUG: Starting test %s\n", testname) + defer func() { + fmt.Printf("DEBUG: Test %s finished, took %v\n", testname, time.Since(testStart)) + }() + } // Setup testing suite. suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts) if err != nil { @@ -218,7 +225,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i Appendable: suite.Storage(), Context: context.Background(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } m := rules.NewManager(opts) groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ruleFiles...) @@ -482,6 +489,32 @@ Outer: } } + if debug { + ts := tg.maxEvalTime() + // Potentially a test can be specified at a time with fractional seconds, + // which PromQL cannot represent, so round up to the next whole second. + ts = (ts + time.Second).Truncate(time.Second) + expr := fmt.Sprintf(`{__name__=~".+"}[%v]`, ts) + q, err := suite.QueryEngine().NewInstantQuery(context.Background(), suite.Queryable(), nil, expr, mint.Add(ts)) + if err != nil { + fmt.Printf("DEBUG: Failed querying, expr: %q, err: %v\n", expr, err) + return errs + } + res := q.Exec(suite.Context()) + if res.Err != nil { + fmt.Printf("DEBUG: Failed query exec, expr: %q, err: %v\n", expr, res.Err) + return errs + } + switch v := res.Value.(type) { + case promql.Matrix: + fmt.Printf("DEBUG: Dump of all data (input_series and rules) at %v:\n", ts) + fmt.Println(v.String()) + default: + fmt.Printf("DEBUG: Got unexpected type %T\n", v) + return errs + } + } + if len(errs) > 0 { return errs } diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index 9bbac28e9f..9b73dcdc1c 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -141,14 +141,14 @@ func TestRulesUnitTest(t *testing.T) { reuseCount[tt.want] += len(tt.args.files) } t.Run(tt.name, func(t *testing.T) { - if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want { + if got := RulesUnitTest(tt.queryOpts, nil, false, false, tt.args.files...); got != tt.want { t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want) } }) } t.Run("Junit xml output ", func(t *testing.T) { var buf bytes.Buffer - if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, reuseFiles...); got != 1 { + if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, reuseFiles...); got != 1 { t.Errorf("RulesUnitTestResults() = %v, want 1", got) } var test junitxml.JUnitXML @@ -230,7 +230,7 @@ func TestRulesUnitTestRun(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...) + got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.args.files...) require.Equal(t, tt.want, got) }) } diff --git a/config/config.go b/config/config.go index c9e8efbf3e..30a74e0402 100644 --- a/config/config.go +++ b/config/config.go @@ -16,6 +16,8 @@ package config import ( "errors" "fmt" + "log/slog" + "mime" "net/url" "os" "path/filepath" @@ -25,8 +27,6 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -73,7 +73,7 @@ const ( ) // Load parses the YAML input s into a Config. -func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) { +func Load(s string, logger *slog.Logger) (*Config, error) { cfg := &Config{} // If the entire config body is empty the UnmarshalYAML method is // never called. We thus have to set the DefaultConfig at the entry @@ -85,10 +85,6 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro return nil, err } - if !expandExternalLabels { - return cfg, nil - } - b := labels.NewScratchBuilder(0) cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) { newV := os.Expand(v.Value, func(s string) string { @@ -98,26 +94,28 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro if v := os.Getenv(s); v != "" { return v } - level.Warn(logger).Log("msg", "Empty environment variable", "name", s) + logger.Warn("Empty environment variable", "name", s) return "" }) if newV != v.Value { - level.Debug(logger).Log("msg", "External label replaced", "label", v.Name, "input", v.Value, "output", newV) + logger.Debug("External label replaced", "label", v.Name, "input", v.Value, "output", newV) } // Note newV can be blank. https://github.com/prometheus/prometheus/issues/11024 b.Add(v.Name, newV) }) - cfg.GlobalConfig.ExternalLabels = b.Labels() + if !b.Labels().IsEmpty() { + cfg.GlobalConfig.ExternalLabels = b.Labels() + } return cfg, nil } // LoadFile parses the given YAML file into a Config. -func LoadFile(filename string, agentMode, expandExternalLabels bool, logger log.Logger) (*Config, error) { +func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, error) { content, err := os.ReadFile(filename) if err != nil { return nil, err } - cfg, err := Load(string(content), expandExternalLabels, logger) + cfg, err := Load(string(content), logger) if err != nil { return nil, fmt.Errorf("parsing YAML file %s: %w", filename, err) } @@ -166,13 +164,13 @@ var ( // DefaultScrapeConfig is the default scrape configuration. DefaultScrapeConfig = ScrapeConfig{ // ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals. - ScrapeClassicHistograms: false, - MetricsPath: "/metrics", - Scheme: "http", - HonorLabels: false, - HonorTimestamps: true, - HTTPClientConfig: config.DefaultHTTPClientConfig, - EnableCompression: true, + AlwaysScrapeClassicHistograms: false, + MetricsPath: "/metrics", + Scheme: "http", + HonorLabels: false, + HonorTimestamps: true, + HTTPClientConfig: config.DefaultHTTPClientConfig, + EnableCompression: true, } // DefaultAlertmanagerConfig is the default alertmanager configuration. @@ -183,13 +181,18 @@ var ( HTTPClientConfig: config.DefaultHTTPClientConfig, } + DefaultRemoteWriteHTTPClientConfig = config.HTTPClientConfig{ + FollowRedirects: true, + EnableHTTP2: false, + } + // DefaultRemoteWriteConfig is the default remote write configuration. DefaultRemoteWriteConfig = RemoteWriteConfig{ RemoteTimeout: model.Duration(30 * time.Second), ProtobufMessage: RemoteWriteProtoMsgV1, QueueConfig: DefaultQueueConfig, MetadataConfig: DefaultMetadataConfig, - HTTPClientConfig: config.DefaultHTTPClientConfig, + HTTPClientConfig: DefaultRemoteWriteHTTPClientConfig, } // DefaultQueueConfig is the default remote queue configuration. @@ -429,6 +432,8 @@ type GlobalConfig struct { RuleQueryOffset model.Duration `yaml:"rule_query_offset,omitempty"` // File to which PromQL queries are logged. QueryLogFile string `yaml:"query_log_file,omitempty"` + // File to which scrape failures are logged. + ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"` // The labels to add to any timeseries that this Prometheus instance scrapes. ExternalLabels labels.Labels `yaml:"external_labels,omitempty"` // An uncompressed response body larger than this many bytes will cause the @@ -474,9 +479,22 @@ func (s ScrapeProtocol) Validate() error { return nil } +// HeaderMediaType returns the MIME mediaType for a particular ScrapeProtocol. +func (s ScrapeProtocol) HeaderMediaType() string { + if _, ok := ScrapeProtocolsHeaders[s]; !ok { + return "" + } + mediaType, _, err := mime.ParseMediaType(ScrapeProtocolsHeaders[s]) + if err != nil { + return "" + } + return mediaType +} + var ( PrometheusProto ScrapeProtocol = "PrometheusProto" PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4" + PrometheusText1_0_0 ScrapeProtocol = "PrometheusText1.0.0" OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1" OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0" UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8 @@ -484,6 +502,7 @@ var ( ScrapeProtocolsHeaders = map[ScrapeProtocol]string{ PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited", PrometheusText0_0_4: "text/plain;version=0.0.4", + PrometheusText1_0_0: "text/plain;version=1.0.0;escaping=allow-utf-8", OpenMetricsText0_0_1: "application/openmetrics-text;version=0.0.1", OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0", } @@ -493,6 +512,7 @@ var ( DefaultScrapeProtocols = []ScrapeProtocol{ OpenMetricsText1_0_0, OpenMetricsText0_0_1, + PrometheusText1_0_0, PrometheusText0_0_4, } @@ -504,6 +524,7 @@ var ( PrometheusProto, OpenMetricsText1_0_0, OpenMetricsText0_0_1, + PrometheusText1_0_0, PrometheusText0_0_4, } ) @@ -529,6 +550,7 @@ func validateAcceptScrapeProtocols(sps []ScrapeProtocol) error { // SetDirectory joins any relative file paths with dir. func (c *GlobalConfig) SetDirectory(dir string) { c.QueryLogFile = config.JoinDir(dir, c.QueryLogFile) + c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile) } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -591,6 +613,7 @@ func (c *GlobalConfig) isZero() bool { c.EvaluationInterval == 0 && c.RuleQueryOffset == 0 && c.QueryLogFile == "" && + c.ScrapeFailureLogFile == "" && c.ScrapeProtocols == nil } @@ -628,10 +651,19 @@ type ScrapeConfig struct { // The protocols to negotiate during a scrape. It tells clients what // protocol are accepted by Prometheus and with what preference (most wanted is first). // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, - // OpenMetricsText1.0.0, PrometheusText0.0.4. + // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4. ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"` - // Whether to scrape a classic histogram that is also exposed as a native histogram. - ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"` + // The fallback protocol to use if the Content-Type provided by the target + // is not provided, blank, or not one of the expected values. + // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, + // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4. + ScrapeFallbackProtocol ScrapeProtocol `yaml:"fallback_scrape_protocol,omitempty"` + // Whether to scrape a classic histogram, even if it is also exposed as a native histogram. + AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"` + // Whether to convert all scraped classic histograms into a native histogram with custom buckets. + ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"` + // File to which scrape failures are logged. + ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"` // The HTTP resource path on which to fetch metrics from targets. MetricsPath string `yaml:"metrics_path,omitempty"` // The URL scheme with which to fetch metrics from targets. @@ -684,6 +716,7 @@ type ScrapeConfig struct { func (c *ScrapeConfig) SetDirectory(dir string) { c.ServiceDiscoveryConfigs.SetDirectory(dir) c.HTTPClientConfig.SetDirectory(dir) + c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile) } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -765,6 +798,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { if c.KeepDroppedTargets == 0 { c.KeepDroppedTargets = globalConfig.KeepDroppedTargets } + if c.ScrapeFailureLogFile == "" { + c.ScrapeFailureLogFile = globalConfig.ScrapeFailureLogFile + } if c.ScrapeProtocols == nil { c.ScrapeProtocols = globalConfig.ScrapeProtocols @@ -773,11 +809,17 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName) } + if c.ScrapeFallbackProtocol != "" { + if err := c.ScrapeFallbackProtocol.Validate(); err != nil { + return fmt.Errorf("invalid fallback_scrape_protocol for scrape config with job name %q: %w", c.JobName, err) + } + } + switch globalConfig.MetricNameValidationScheme { - case "", LegacyValidationConfig: - case UTF8ValidationConfig: + case LegacyValidationConfig: + case "", UTF8ValidationConfig: if model.NameValidationScheme != model.UTF8Validation { - return fmt.Errorf("utf8 name validation requested but feature not enabled via --enable-feature=utf8-names") + panic("utf8 name validation requested but model.NameValidationScheme is not set to UTF8") } default: return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme) @@ -948,6 +990,7 @@ func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig { // AlertmanagerAPIVersion represents a version of the // github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'. +// 'v1' is no longer supported. type AlertmanagerAPIVersion string // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -977,7 +1020,7 @@ const ( ) var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{ - AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2, + AlertmanagerAPIVersionV2, } // AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with. diff --git a/config/config_test.go b/config/config_test.go index 7b910b5d18..c3148f93a7 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -24,10 +24,10 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -62,6 +62,11 @@ import ( "github.com/prometheus/prometheus/util/testutil" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + func mustParseURL(u string) *config.URL { parsed, err := url.Parse(u) if err != nil { @@ -78,14 +83,16 @@ const ( globLabelNameLengthLimit = 200 globLabelValueLengthLimit = 200 globalGoGC = 42 + globScrapeFailureLogFile = "testdata/fail.log" ) var expectedConf = &Config{ GlobalConfig: GlobalConfig{ - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, - EvaluationInterval: model.Duration(30 * time.Second), - QueryLogFile: "", + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + EvaluationInterval: model.Duration(30 * time.Second), + QueryLogFile: "testdata/query.log", + ScrapeFailureLogFile: globScrapeFailureLogFile, ExternalLabels: labels.FromStrings("foo", "bar", "monitor", "codelab"), @@ -135,7 +142,7 @@ var expectedConf = &Config{ }, }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, }, { @@ -151,7 +158,7 @@ var expectedConf = &Config{ KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, Headers: map[string]string{"name": "value"}, }, @@ -199,18 +206,20 @@ var expectedConf = &Config{ { JobName: "prometheus", - HonorLabels: true, - HonorTimestamps: true, - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, - EnableCompression: true, - BodySizeLimit: globBodySizeLimit, - SampleLimit: globSampleLimit, - TargetLimit: globTargetLimit, - LabelLimit: globLabelLimit, - LabelNameLengthLimit: globLabelNameLengthLimit, - LabelValueLengthLimit: globLabelValueLengthLimit, - ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + HonorLabels: true, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + EnableCompression: true, + BodySizeLimit: globBodySizeLimit, + SampleLimit: globSampleLimit, + TargetLimit: globTargetLimit, + LabelLimit: globLabelLimit, + LabelNameLengthLimit: globLabelNameLengthLimit, + LabelValueLengthLimit: globLabelValueLengthLimit, + ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFallbackProtocol: PrometheusText0_0_4, + ScrapeFailureLogFile: "testdata/fail_prom.log", MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -323,6 +332,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: 210, LabelValueLengthLimit: 210, ScrapeProtocols: []ScrapeProtocol{PrometheusText0_0_4}, + ScrapeFailureLogFile: globScrapeFailureLogFile, HTTPClientConfig: config.HTTPClientConfig{ BasicAuth: &config.BasicAuth{ @@ -420,6 +430,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -475,6 +486,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: "/metrics", Scheme: "http", @@ -508,6 +520,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -547,6 +560,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -586,6 +600,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -615,6 +630,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -652,6 +668,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -686,6 +703,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -727,6 +745,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -758,6 +777,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -792,6 +812,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -819,6 +840,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -849,6 +871,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: "/federate", Scheme: DefaultScrapeConfig.Scheme, @@ -879,6 +902,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -909,6 +933,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -936,6 +961,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -971,6 +997,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1005,6 +1032,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1036,6 +1064,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1066,6 +1095,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1100,6 +1130,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1137,6 +1168,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1193,6 +1225,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1220,6 +1253,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, HTTPClientConfig: config.DefaultHTTPClientConfig, MetricsPath: DefaultScrapeConfig.MetricsPath, @@ -1258,6 +1292,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, HTTPClientConfig: config.DefaultHTTPClientConfig, MetricsPath: DefaultScrapeConfig.MetricsPath, @@ -1302,6 +1337,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1337,6 +1373,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, HTTPClientConfig: config.DefaultHTTPClientConfig, MetricsPath: DefaultScrapeConfig.MetricsPath, @@ -1366,6 +1403,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1398,6 +1436,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1462,8 +1501,13 @@ var expectedConf = &Config{ }, } +func TestYAMLNotLongerSupportedAMApi(t *testing.T) { + _, err := LoadFile("testdata/config_with_no_longer_supported_am_api_config.yml", false, promslog.NewNopLogger()) + require.Error(t, err) +} + func TestYAMLRoundtrip(t *testing.T) { - want, err := LoadFile("testdata/roundtrip.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/roundtrip.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1476,7 +1520,7 @@ func TestYAMLRoundtrip(t *testing.T) { } func TestRemoteWriteRetryOnRateLimit(t *testing.T) { - want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1491,7 +1535,7 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) { func TestOTLPSanitizeResourceAttributes(t *testing.T) { t.Run("good config", func(t *testing.T) { - want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, false, log.NewNopLogger()) + want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1503,7 +1547,7 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { }) t.Run("bad config", func(t *testing.T) { - _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, false, log.NewNopLogger()) + _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, promslog.NewNopLogger()) require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`) require.ErrorContains(t, err, `empty promoted OTel resource attribute`) }) @@ -1512,16 +1556,17 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. - _, err := LoadFile("testdata/global_timeout.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/global_timeout.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) + require.NoError(t, err) require.Equal(t, expectedConf, c) } func TestScrapeIntervalLarger(t *testing.T) { - c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.ScrapeConfigs, 1) for _, sc := range c.ScrapeConfigs { @@ -1531,7 +1576,7 @@ func TestScrapeIntervalLarger(t *testing.T) { // YAML marshaling must not reveal authentication credentials. func TestElideSecrets(t *testing.T) { - c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) secretRe := regexp.MustCompile(`\\u003csecret\\u003e|`) @@ -1548,31 +1593,31 @@ func TestElideSecrets(t *testing.T) { func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) { // Parse a valid file that sets a rule files with an absolute path - c, err := LoadFile(ruleFilesConfigFile, false, false, log.NewNopLogger()) + c, err := LoadFile(ruleFilesConfigFile, false, promslog.NewNopLogger()) require.NoError(t, err) require.Equal(t, ruleFilesExpectedConf, c) } func TestKubernetesEmptyAPIServer(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesWithKubeConfig(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesSelectors(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } @@ -2042,24 +2087,39 @@ var expectedErrors = []struct { }, { filename: "scrape_config_files_scrape_protocols.bad.yml", - errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4] for scrape config with job name "node"`, + errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0] for scrape config with job name "node"`, }, { filename: "scrape_config_files_scrape_protocols2.bad.yml", errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols2.bad.yml: duplicated protocol in scrape_protocols, got [OpenMetricsText1.0.0 PrometheusProto OpenMetricsText1.0.0] for scrape config with job name "node"`, }, + { + filename: "scrape_config_files_fallback_scrape_protocol1.bad.yml", + errMsg: `parsing YAML file testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml: invalid fallback_scrape_protocol for scrape config with job name "node": unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0]`, + }, + { + filename: "scrape_config_files_fallback_scrape_protocol2.bad.yml", + errMsg: `unmarshal errors`, + }, } func TestBadConfigs(t *testing.T) { + model.NameValidationScheme = model.LegacyValidation + defer func() { + model.NameValidationScheme = model.UTF8Validation + }() for _, ee := range expectedErrors { - _, err := LoadFile("testdata/"+ee.filename, false, false, log.NewNopLogger()) - require.Error(t, err, "%s", ee.filename) - require.Contains(t, err.Error(), ee.errMsg, + _, err := LoadFile("testdata/"+ee.filename, false, promslog.NewNopLogger()) + require.ErrorContains(t, err, ee.errMsg, "Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err) } } func TestBadStaticConfigsJSON(t *testing.T) { + model.NameValidationScheme = model.LegacyValidation + defer func() { + model.NameValidationScheme = model.UTF8Validation + }() content, err := os.ReadFile("testdata/static_config.bad.json") require.NoError(t, err) var tg targetgroup.Group @@ -2068,6 +2128,10 @@ func TestBadStaticConfigsJSON(t *testing.T) { } func TestBadStaticConfigsYML(t *testing.T) { + model.NameValidationScheme = model.LegacyValidation + defer func() { + model.NameValidationScheme = model.UTF8Validation + }() content, err := os.ReadFile("testdata/static_config.bad.yml") require.NoError(t, err) var tg targetgroup.Group @@ -2076,7 +2140,7 @@ func TestBadStaticConfigsYML(t *testing.T) { } func TestEmptyConfig(t *testing.T) { - c, err := Load("", false, log.NewNopLogger()) + c, err := Load("", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig require.Equal(t, exp, *c) @@ -2086,38 +2150,34 @@ func TestExpandExternalLabels(t *testing.T) { // Cleanup ant TEST env variable that could exist on the system. os.Setenv("TEST", "") - c, err := LoadFile("testdata/external_labels.good.yml", false, false, log.NewNopLogger()) - require.NoError(t, err) - testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foo${TEST}bar", "foo", "${TEST}", "qux", "foo$${TEST}", "xyz", "foo$$bar"), c.GlobalConfig.ExternalLabels) - - c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger()) + c, err := LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foobar", "foo", "", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) os.Setenv("TEST", "TestValue") - c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger()) + c, err = LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "fooTestValuebar", "foo", "TestValue", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) } func TestAgentMode(t *testing.T) { - _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, false, log.NewNopLogger()) + _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field rule_files is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field remote_read is not allowed in agent mode") - c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger()) + c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Empty(t, c.RemoteWriteConfigs) - c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger()) + c, err = LoadFile("testdata/agent_mode.good.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.RemoteWriteConfigs, 1) require.Equal( @@ -2128,7 +2188,7 @@ func TestAgentMode(t *testing.T) { } func TestEmptyGlobalBlock(t *testing.T) { - c, err := Load("global:\n", false, log.NewNopLogger()) + c, err := Load("global:\n", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig exp.Runtime = DefaultRuntimeConfig @@ -2283,7 +2343,7 @@ func TestGetScrapeConfigs(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - c, err := LoadFile(tc.configFile, false, false, log.NewNopLogger()) + c, err := LoadFile(tc.configFile, false, promslog.NewNopLogger()) require.NoError(t, err) scfgs, err := c.GetScrapeConfigs() @@ -2301,7 +2361,7 @@ func kubernetesSDHostURL() config.URL { } func TestScrapeConfigDisableCompression(t *testing.T) { - want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2332,23 +2392,23 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { { name: "global setting implies local settings", inputFile: "scrape_config_global_validation_mode", - expectScheme: "utf8", + expectScheme: "legacy", }, { name: "local setting", inputFile: "scrape_config_local_validation_mode", - expectScheme: "utf8", + expectScheme: "legacy", }, { name: "local setting overrides global setting", inputFile: "scrape_config_local_global_validation_mode", - expectScheme: "legacy", + expectScheme: "utf8", }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, log.NewNopLogger()) + want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2361,3 +2421,54 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { }) } } + +func TestScrapeProtocolHeader(t *testing.T) { + tests := []struct { + name string + proto ScrapeProtocol + expectedValue string + }{ + { + name: "blank", + proto: ScrapeProtocol(""), + expectedValue: "", + }, + { + name: "invalid", + proto: ScrapeProtocol("invalid"), + expectedValue: "", + }, + { + name: "prometheus protobuf", + proto: PrometheusProto, + expectedValue: "application/vnd.google.protobuf", + }, + { + name: "prometheus text 0.0.4", + proto: PrometheusText0_0_4, + expectedValue: "text/plain", + }, + { + name: "prometheus text 1.0.0", + proto: PrometheusText1_0_0, + expectedValue: "text/plain", + }, + { + name: "openmetrics 0.0.1", + proto: OpenMetricsText0_0_1, + expectedValue: "application/openmetrics-text", + }, + { + name: "openmetrics 1.0.0", + proto: OpenMetricsText1_0_0, + expectedValue: "application/openmetrics-text", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + mediaType := tc.proto.HeaderMediaType() + + require.Equal(t, tc.expectedValue, mediaType) + }) + } +} diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index a826282d2c..2501652d5b 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -8,6 +8,8 @@ global: label_limit: 30 label_name_length_limit: 200 label_value_length_limit: 200 + query_log_file: query.log + scrape_failure_log_file: fail.log # scrape_timeout is set to the global default (10s). external_labels: @@ -72,6 +74,9 @@ scrape_configs: # metrics_path defaults to '/metrics' # scheme defaults to 'http'. + fallback_scrape_protocol: PrometheusText0.0.4 + + scrape_failure_log_file: fail_prom.log file_sd_configs: - files: - foo/*.slow.json diff --git a/config/testdata/config_with_deprecated_am_api_config.yml b/config/testdata/config_with_deprecated_am_api_config.yml new file mode 100644 index 0000000000..ac89537ff1 --- /dev/null +++ b/config/testdata/config_with_deprecated_am_api_config.yml @@ -0,0 +1,7 @@ +alerting: + alertmanagers: + - scheme: http + api_version: v1 + file_sd_configs: + - files: + - nonexistent_file.yml diff --git a/config/testdata/jobname_dup.bad.yml b/config/testdata/jobname_dup.bad.yml index 0265493c30..d03cb0cf97 100644 --- a/config/testdata/jobname_dup.bad.yml +++ b/config/testdata/jobname_dup.bad.yml @@ -1,4 +1,6 @@ # Two scrape configs with the same job names are not allowed. +global: + metric_name_validation_scheme: legacy scrape_configs: - job_name: prometheus - job_name: service-x diff --git a/config/testdata/lowercase.bad.yml b/config/testdata/lowercase.bad.yml index 9bc9583341..6dd72e6476 100644 --- a/config/testdata/lowercase.bad.yml +++ b/config/testdata/lowercase.bad.yml @@ -1,3 +1,5 @@ +global: + metric_name_validation_scheme: legacy scrape_configs: - job_name: prometheus relabel_configs: diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml new file mode 100644 index 0000000000..07cfe47594 --- /dev/null +++ b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: node + fallback_scrape_protocol: "prometheusproto" + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml new file mode 100644 index 0000000000..c5d133f9c4 --- /dev/null +++ b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: node + fallback_scrape_protocol: ["OpenMetricsText1.0.0", "PrometheusText0.0.4"] + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_global_validation_mode.yml b/config/testdata/scrape_config_global_validation_mode.yml index 1548554397..e9b0618c70 100644 --- a/config/testdata/scrape_config_global_validation_mode.yml +++ b/config/testdata/scrape_config_global_validation_mode.yml @@ -1,4 +1,4 @@ global: - metric_name_validation_scheme: utf8 + metric_name_validation_scheme: legacy scrape_configs: - job_name: prometheus diff --git a/config/testdata/scrape_config_local_global_validation_mode.yml b/config/testdata/scrape_config_local_global_validation_mode.yml index d13605e21d..30b54834a5 100644 --- a/config/testdata/scrape_config_local_global_validation_mode.yml +++ b/config/testdata/scrape_config_local_global_validation_mode.yml @@ -1,5 +1,5 @@ global: - metric_name_validation_scheme: utf8 + metric_name_validation_scheme: legacy scrape_configs: - job_name: prometheus - metric_name_validation_scheme: legacy + metric_name_validation_scheme: utf8 diff --git a/config/testdata/scrape_config_local_validation_mode.yml b/config/testdata/scrape_config_local_validation_mode.yml index fad4235806..90279ff081 100644 --- a/config/testdata/scrape_config_local_validation_mode.yml +++ b/config/testdata/scrape_config_local_validation_mode.yml @@ -1,3 +1,3 @@ scrape_configs: - job_name: prometheus - metric_name_validation_scheme: utf8 + metric_name_validation_scheme: legacy diff --git a/console_libraries/menu.lib b/console_libraries/menu.lib deleted file mode 100644 index 199ebf9f48..0000000000 --- a/console_libraries/menu.lib +++ /dev/null @@ -1,82 +0,0 @@ -{{/* vim: set ft=html: */}} - -{{/* Navbar, should be passed . */}} -{{ define "navbar" }} - -{{ end }} - -{{/* LHS menu, should be passed . */}} -{{ define "menu" }} -
- -
-{{ end }} - -{{/* Helper, pass (args . path name) */}} -{{ define "_menuItem" }} - -{{ end }} - diff --git a/console_libraries/prom.lib b/console_libraries/prom.lib deleted file mode 100644 index d7d436f947..0000000000 --- a/console_libraries/prom.lib +++ /dev/null @@ -1,138 +0,0 @@ -{{/* vim: set ft=html: */}} -{{/* Load Prometheus console library JS/CSS. Should go in */}} -{{ define "prom_console_head" }} - - - - - - - - - - - - - -{{ end }} - -{{/* Top of all pages. */}} -{{ define "head" -}} - - - -{{ template "prom_console_head" }} - - -{{ template "navbar" . }} - -{{ template "menu" . }} -{{ end }} - -{{ define "__prom_query_drilldown_noop" }}{{ . }}{{ end }} -{{ define "humanize" }}{{ humanize . }}{{ end }} -{{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }} -{{ define "humanize1024" }}{{ humanize1024 . }}{{ end }} -{{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }} -{{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }} -{{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }} -{{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }} -{{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }} - -{{/* prom_query_drilldown (args expr suffix? renderTemplate?) -Displays the result of the expression, with a link to /graph for it. - -renderTemplate is the name of the template to use to render the value. -*/}} -{{ define "prom_query_drilldown" }} -{{ $expr := .arg0 }}{{ $suffix := (or .arg1 "") }}{{ $renderTemplate := (or .arg2 "__prom_query_drilldown_noop") }} -{{ with query $expr }}{{tmpl $renderTemplate ( . | first | value )}}{{ $suffix }}{{ else }}-{{ end }} -{{ end }} - -{{ define "prom_path" }}/consoles/{{ .Path }}?{{ range $param, $value := .Params }}{{ $param }}={{ $value }}&{{ end }}{{ end }}" - -{{ define "prom_right_table_head" }} -
- -{{ end }} -{{ define "prom_right_table_tail" }} -
-
-{{ end }} - -{{/* RHS table head, pass job name. Should be used after prom_right_table_head. */}} -{{ define "prom_right_table_job_head" }} - - {{ . }} - {{ template "prom_query_drilldown" (args (printf "sum(up{job='%s'})" .)) }} / {{ template "prom_query_drilldown" (args (printf "count(up{job='%s'})" .)) }} - - - CPU - {{ template "prom_query_drilldown" (args (printf "avg by(job)(irate(process_cpu_seconds_total{job='%s'}[5m]))" .) "s/s" "humanizeNoSmallPrefix") }} - - - Memory - {{ template "prom_query_drilldown" (args (printf "avg by(job)(process_resident_memory_bytes{job='%s'})" .) "B" "humanize1024") }} - -{{ end }} - - -{{ define "prom_content_head" }} -
-
-{{ template "prom_graph_timecontrol" . }} -{{ end }} -{{ define "prom_content_tail" }} -
-
-{{ end }} - -{{ define "prom_graph_timecontrol" }} -
-
-
- -
-
- -
-
-
- - - -
-
-
- -
-{{ end }} - -{{/* Bottom of all pages. */}} -{{ define "tail" }} - - -{{ end }} diff --git a/consoles/index.html.example b/consoles/index.html.example deleted file mode 100644 index c725d30dea..0000000000 --- a/consoles/index.html.example +++ /dev/null @@ -1,28 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Overview

-

These are example consoles for Prometheus.

- -

These consoles expect exporters to have the following job labels:

- - - - - - - - - - - - - -
ExporterJob label
Node Exporternode
Prometheusprometheus
- -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/node-cpu.html b/consoles/node-cpu.html deleted file mode 100644 index 284ad738f2..0000000000 --- a/consoles/node-cpu.html +++ /dev/null @@ -1,60 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - CPU(s): {{ template "prom_query_drilldown" (args (printf "scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance)) }} - -{{ range printf "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='%s'}[5m])) * 100 / scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance .Params.instance | query | sortByLabel "mode" }} - - {{ .Labels.mode | title }} CPU - {{ .Value | printf "%.1f" }}% - -{{ end }} - Misc - - Processes Running - {{ template "prom_query_drilldown" (args (printf "node_procs_running{job='node',instance='%s'}" .Params.instance) "" "humanize") }} - - - Processes Blocked - {{ template "prom_query_drilldown" (args (printf "node_procs_blocked{job='node',instance='%s'}" .Params.instance) "" "humanize") }} - - - Forks - {{ template "prom_query_drilldown" (args (printf "irate(node_forks_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} - - - Context Switches - {{ template "prom_query_drilldown" (args (printf "irate(node_context_switches_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} - - - Interrupts - {{ template "prom_query_drilldown" (args (printf "irate(node_intr_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} - - - 1m Loadavg - {{ template "prom_query_drilldown" (args (printf "node_load1{job='node',instance='%s'}" .Params.instance)) }} - - - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Node CPU - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

- -

CPU Usage

-
- -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/node-disk.html b/consoles/node-disk.html deleted file mode 100644 index ffff41b797..0000000000 --- a/consoles/node-disk.html +++ /dev/null @@ -1,78 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - Disks - -{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }} - {{ .Labels.device }} - - Utilization - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }} - - - Throughput - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }} - - - Avg Read Time - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_reads_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }} - - - Avg Write Time - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_write_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_writes_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }} - -{{ end }} - - Filesystem Fullness - -{{ define "roughlyNearZero" }} -{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }} -{{ end }} -{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }} - - {{ .Labels.mountpoint }} - {{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }} - -{{ end }} - - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Node Disk - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

- -

Disk I/O Utilization

-
- -

Filesystem Usage

-
- -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/node-overview.html b/consoles/node-overview.html deleted file mode 100644 index 4ae8984b99..0000000000 --- a/consoles/node-overview.html +++ /dev/null @@ -1,121 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - Overview - - User CPU - {{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='user'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }} - - - System CPU - {{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='system'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }} - - - Memory Total - {{ template "prom_query_drilldown" (args (printf "node_memory_MemTotal_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }} - - - Memory Free - {{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }} - - - Network - -{{ range printf "node_network_receive_bytes_total{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device" }} - - {{ .Labels.device }} Received - {{ template "prom_query_drilldown" (args (printf "irate(node_network_receive_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }} - - - {{ .Labels.device }} Transmitted - {{ template "prom_query_drilldown" (args (printf "irate(node_network_transmit_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }} - -{{ end }} - - Disks - -{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s',device!~'^(md\\\\d+$|dm-)'}" .Params.instance | query | sortByLabel "device" }} - - {{ .Labels.device }} Utilization - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }} - -{{ end }} -{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }} - - {{ .Labels.device }} Throughput - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }} - -{{ end }} - - Filesystem Fullness - -{{ define "roughlyNearZero" }} -{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }} -{{ end }} -{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }} - - {{ .Labels.mountpoint }} - {{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }} - -{{ end }} - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Node Overview - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

- -

CPU Usage

-
- - -

Disk I/O Utilization

-
- - -

Memory

-
- - -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/node.html b/consoles/node.html deleted file mode 100644 index c1dfc1a891..0000000000 --- a/consoles/node.html +++ /dev/null @@ -1,35 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - Node - {{ template "prom_query_drilldown" (args "sum(up{job='node'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='node'})") }} - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Node

- - - - - - - - -{{ range query "up{job='node'}" | sortByLabel "instance" }} - - - Yes{{ else }} class="alert-danger">No{{ end }} - - - -{{ else }} - -{{ end }} -
NodeUpCPU
Used
Memory
Available
{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Labels.instance }}{{ template "prom_query_drilldown" (args (printf "100 * (1 - avg by(instance) (sum without(mode) (irate(node_cpu_seconds_total{job='node',mode=~'idle|iowait|steal',instance='%s'}[5m]))))" .Labels.instance) "%" "printf.1f") }}{{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'} + node_memory_Cached_bytes{job='node',instance='%s'} + node_memory_Buffers_bytes{job='node',instance='%s'}" .Labels.instance .Labels.instance .Labels.instance) "B" "humanize1024") }}
No nodes found.
- - -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/prometheus-overview.html b/consoles/prometheus-overview.html deleted file mode 100644 index 08e027de06..0000000000 --- a/consoles/prometheus-overview.html +++ /dev/null @@ -1,96 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - Overview - - - CPU - {{ template "prom_query_drilldown" (args (printf "irate(process_cpu_seconds_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "s/s" "humanizeNoSmallPrefix") }} - - - Memory - {{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Params.instance) "B" "humanize1024") }} - - - Version - {{ with query (printf "prometheus_build_info{job='prometheus',instance='%s'}" .Params.instance) }}{{. | first | label "version"}}{{end}} - - - - Storage - - - Ingested Samples - {{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "/s" "humanizeNoSmallPrefix") }} - - - Head Series - {{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_head_series{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} - - - Blocks Loaded - {{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_blocks_loaded{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} - - - Rules - - - Evaluation Duration - {{ template "prom_query_drilldown" (args (printf "irate(prometheus_evaluator_duration_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_evaluator_duration_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }} - - - Notification Latency - {{ template "prom_query_drilldown" (args (printf "irate(prometheus_notifications_latency_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_notifications_latency_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }} - - - Notification Queue - {{ template "prom_query_drilldown" (args (printf "prometheus_notifications_queue_length{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} - - - HTTP Server - -{{ range printf "prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s'}" .Params.instance | query | sortByLabel "handler" }} - - {{ .Labels.handler }} - {{ template "prom_query_drilldown" (args (printf "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }} - -{{ end }} - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -
-

Prometheus Overview - {{ .Params.instance }}

- -

Ingested Samples

-
- - -

HTTP Server

-
- -
-{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/prometheus.html b/consoles/prometheus.html deleted file mode 100644 index e0d026376d..0000000000 --- a/consoles/prometheus.html +++ /dev/null @@ -1,34 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - Prometheus - {{ template "prom_query_drilldown" (args "sum(up{job='prometheus'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='prometheus'})") }} - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Prometheus

- - - - - - - - -{{ range query "up{job='prometheus'}" | sortByLabel "instance" }} - - - - - - -{{ else }} - -{{ end }} -
PrometheusUpIngested SamplesMemory
{{ .Labels.instance }}Yes{{ else }} class="alert-danger">No{{ end }}{{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Labels.instance) "/s" "humanizeNoSmallPrefix") }}{{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Labels.instance) "B" "humanize1024")}}
No devices found.
- -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/discovery/README.md b/discovery/README.md index 4c06608625..d5418e7fb1 100644 --- a/discovery/README.md +++ b/discovery/README.md @@ -233,7 +233,7 @@ type Config interface { } type DiscovererOptions struct { - Logger log.Logger + Logger *slog.Logger // A registerer for the Discoverer's metrics. Registerer prometheus.Registerer diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index a44912481a..5a725cb48f 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" @@ -29,11 +30,11 @@ import ( "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -146,9 +147,9 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // the Discoverer interface. type EC2Discovery struct { *refresh.Discovery - logger log.Logger + logger *slog.Logger cfg *EC2SDConfig - ec2 *ec2.EC2 + ec2 ec2iface.EC2API // azToAZID maps this account's availability zones to their underlying AZ // ID, e.g. eu-west-2a -> euw2-az2. Refreshes are performed sequentially, so @@ -157,14 +158,14 @@ type EC2Discovery struct { } // NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets. -func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) { +func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) { m, ok := metrics.(*ec2Metrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } d := &EC2Discovery{ logger: logger, @@ -182,7 +183,7 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.Dis return d, nil } -func (d *EC2Discovery) ec2Client(context.Context) (*ec2.EC2, error) { +func (d *EC2Discovery) ec2Client(context.Context) (ec2iface.EC2API, error) { if d.ec2 != nil { return d.ec2, nil } @@ -254,8 +255,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error // Prometheus requires a reload if AWS adds a new AZ to the region. if d.azToAZID == nil { if err := d.refreshAZIDs(ctx); err != nil { - level.Debug(d.logger).Log( - "msg", "Unable to describe availability zones", + d.logger.Debug( + "Unable to describe availability zones", "err", err) } } @@ -296,8 +297,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone) azID, ok := d.azToAZID[*inst.Placement.AvailabilityZone] if !ok && d.azToAZID != nil { - level.Debug(d.logger).Log( - "msg", "Availability zone ID not found", + d.logger.Debug( + "Availability zone ID not found", "az", *inst.Placement.AvailabilityZone) } labels[ec2LabelAZID] = model.LabelValue(azID) diff --git a/discovery/aws/ec2_test.go b/discovery/aws/ec2_test.go new file mode 100644 index 0000000000..f34065c23e --- /dev/null +++ b/discovery/aws/ec2_test.go @@ -0,0 +1,434 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aws + +import ( + "context" + "errors" + "testing" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + + "github.com/prometheus/prometheus/discovery/targetgroup" +) + +// Helper function to get pointers on literals. +// NOTE: this is common between a few tests. In the future it might worth to move this out into a separate package. +func strptr(str string) *string { + return &str +} + +func boolptr(b bool) *bool { + return &b +} + +func int64ptr(i int64) *int64 { + return &i +} + +// Struct for test data. +type ec2DataStore struct { + region string + + azToAZID map[string]string + + ownerID string + + instances []*ec2.Instance +} + +// The tests itself. +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} + +func TestEC2DiscoveryRefreshAZIDs(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + shouldFail bool + ec2Data *ec2DataStore + }{ + { + name: "Normal", + shouldFail: false, + ec2Data: &ec2DataStore{ + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + }, + }, + { + name: "HandleError", + shouldFail: true, + ec2Data: &ec2DataStore{}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + } + + err := d.refreshAZIDs(ctx) + if tt.shouldFail { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, client.ec2Data.azToAZID, d.azToAZID) + } + }) + } +} + +func TestEC2DiscoveryRefresh(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + ec2Data *ec2DataStore + expected []*targetgroup.Group + }{ + { + name: "NoPrivateIp", + ec2Data: &ec2DataStore{ + region: "region-noprivateip", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + InstanceId: strptr("instance-id-noprivateip"), + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-noprivateip", + }, + }, + }, + { + name: "NoVpc", + ec2Data: &ec2DataStore{ + region: "region-novpc", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + ownerID: "owner-id-novpc", + instances: []*ec2.Instance{ + { + // set every possible options and test them here + Architecture: strptr("architecture-novpc"), + ImageId: strptr("ami-novpc"), + InstanceId: strptr("instance-id-novpc"), + InstanceLifecycle: strptr("instance-lifecycle-novpc"), + InstanceType: strptr("instance-type-novpc"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + Platform: strptr("platform-novpc"), + PrivateDnsName: strptr("private-dns-novpc"), + PrivateIpAddress: strptr("1.2.3.4"), + PublicDnsName: strptr("public-dns-novpc"), + PublicIpAddress: strptr("42.42.42.2"), + State: &ec2.InstanceState{Name: strptr("running")}, + // test tags once and for all + Tags: []*ec2.Tag{ + {Key: strptr("tag-1-key"), Value: strptr("tag-1-value")}, + {Key: strptr("tag-2-key"), Value: strptr("tag-2-value")}, + nil, + {Value: strptr("tag-4-value")}, + {Key: strptr("tag-5-key")}, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-novpc", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("1.2.3.4:4242"), + "__meta_ec2_ami": model.LabelValue("ami-novpc"), + "__meta_ec2_architecture": model.LabelValue("architecture-novpc"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-novpc"), + "__meta_ec2_instance_lifecycle": model.LabelValue("instance-lifecycle-novpc"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-novpc"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_owner_id": model.LabelValue("owner-id-novpc"), + "__meta_ec2_platform": model.LabelValue("platform-novpc"), + "__meta_ec2_private_dns_name": model.LabelValue("private-dns-novpc"), + "__meta_ec2_private_ip": model.LabelValue("1.2.3.4"), + "__meta_ec2_public_dns_name": model.LabelValue("public-dns-novpc"), + "__meta_ec2_public_ip": model.LabelValue("42.42.42.2"), + "__meta_ec2_region": model.LabelValue("region-novpc"), + "__meta_ec2_tag_tag_1_key": model.LabelValue("tag-1-value"), + "__meta_ec2_tag_tag_2_key": model.LabelValue("tag-2-value"), + }, + }, + }, + }, + }, + { + name: "Ipv4", + ec2Data: &ec2DataStore{ + region: "region-ipv4", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv4"), + InstanceId: strptr("instance-id-ipv4"), + InstanceType: strptr("instance-type-ipv4"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-c")}, + PrivateIpAddress: strptr("5.6.7.8"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-3"), + VpcId: strptr("vpc-ipv4"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without subnet -> should be ignored + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:1::1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + }, + // interface with subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + // interface with another subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-1"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv4", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("5.6.7.8:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv4"), + "__meta_ec2_availability_zone": model.LabelValue("azname-c"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-3"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv4"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv4"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-3"), + "__meta_ec2_private_ip": model.LabelValue("5.6.7.8"), + "__meta_ec2_region": model.LabelValue("region-ipv4"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-3,azid-1,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv4"), + }, + }, + }, + }, + }, + { + name: "Ipv6", + ec2Data: &ec2DataStore{ + region: "region-ipv6", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv6"), + InstanceId: strptr("instance-id-ipv6"), + InstanceType: strptr("instance-type-ipv6"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + PrivateIpAddress: strptr("9.10.11.12"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-2"), + VpcId: strptr("vpc-ipv6"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without primary IPv6, index 2 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::1:1"), + IsPrimaryIpv6: boolptr(false), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 1 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(1), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::2:1"), + IsPrimaryIpv6: boolptr(false), + }, + { + Ipv6Address: strptr("2001:db8:2::2:2"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 3 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::3:1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-1"), + }, + // interface without primary IPv6, index 0 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(0), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv6", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("9.10.11.12:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv6"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv6"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv6"), + "__meta_ec2_ipv6_addresses": model.LabelValue(",2001:db8:2::1:1,2001:db8:2::2:1,2001:db8:2::2:2,2001:db8:2::3:1,"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_ipv6_addresses": model.LabelValue(",,2001:db8:2::2:2,,2001:db8:2::3:1,"), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-2"), + "__meta_ec2_private_ip": model.LabelValue("9.10.11.12"), + "__meta_ec2_region": model.LabelValue("region-ipv6"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-2,azid-1,azid-3,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv6"), + }, + }, + }, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + cfg: &EC2SDConfig{ + Port: 4242, + Region: client.ec2Data.region, + }, + } + + g, err := d.refresh(ctx) + require.NoError(t, err) + require.Equal(t, tt.expected, g) + }) + } +} + +// EC2 client mock. +type mockEC2Client struct { + ec2iface.EC2API + ec2Data ec2DataStore +} + +func newMockEC2Client(ec2Data *ec2DataStore) *mockEC2Client { + client := mockEC2Client{ + ec2Data: *ec2Data, + } + return &client +} + +func (m *mockEC2Client) DescribeAvailabilityZonesWithContext(ctx aws.Context, input *ec2.DescribeAvailabilityZonesInput, opts ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) { + if len(m.ec2Data.azToAZID) == 0 { + return nil, errors.New("No AZs found") + } + + azs := make([]*ec2.AvailabilityZone, len(m.ec2Data.azToAZID)) + + i := 0 + for k, v := range m.ec2Data.azToAZID { + azs[i] = &ec2.AvailabilityZone{ + ZoneName: strptr(k), + ZoneId: strptr(v), + } + i++ + } + + return &ec2.DescribeAvailabilityZonesOutput{ + AvailabilityZones: azs, + }, nil +} + +func (m *mockEC2Client) DescribeInstancesPagesWithContext(ctx aws.Context, input *ec2.DescribeInstancesInput, fn func(*ec2.DescribeInstancesOutput, bool) bool, opts ...request.Option) error { + r := ec2.Reservation{} + r.SetInstances(m.ec2Data.instances) + r.SetOwnerId(m.ec2Data.ownerID) + + o := ec2.DescribeInstancesOutput{} + o.SetReservations([]*ec2.Reservation{&r}) + + _ = fn(&o, true) + + return nil +} diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go index 0ad7f2d541..0b046be6d9 100644 --- a/discovery/aws/lightsail.go +++ b/discovery/aws/lightsail.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" @@ -29,10 +30,10 @@ import ( "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/lightsail" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -130,14 +131,14 @@ type LightsailDiscovery struct { } // NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets. -func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) { +func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) { m, ok := metrics.(*lightsailMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } d := &LightsailDiscovery{ diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 70d95b9f3a..35bbc3847c 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "math/rand" "net" "net/http" @@ -35,10 +36,9 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" cache "github.com/Code-Hex/go-generics-cache" "github.com/Code-Hex/go-generics-cache/policy/lru" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -175,7 +175,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type Discovery struct { *refresh.Discovery - logger log.Logger + logger *slog.Logger cfg *SDConfig port int cache *cache.Cache[string, *armnetwork.Interface] @@ -183,14 +183,14 @@ type Discovery struct { } // NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets. -func NewDiscovery(cfg *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(cfg *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*azureMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000))) d := &Discovery{ @@ -228,26 +228,26 @@ type azureClient struct { vm *armcompute.VirtualMachinesClient vmss *armcompute.VirtualMachineScaleSetsClient vmssvm *armcompute.VirtualMachineScaleSetVMsClient - logger log.Logger + logger *slog.Logger } var _ client = &azureClient{} -// createAzureClient is a helper function for creating an Azure compute client to ARM. -func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { - cloudConfiguration, err := CloudConfigurationFromName(cfg.Environment) +// createAzureClient is a helper method for creating an Azure compute client to ARM. +func (d *Discovery) createAzureClient() (client, error) { + cloudConfiguration, err := CloudConfigurationFromName(d.cfg.Environment) if err != nil { return &azureClient{}, err } var c azureClient - c.logger = logger + c.logger = d.logger telemetry := policy.TelemetryOptions{ ApplicationID: userAgent, } - credential, err := newCredential(cfg, policy.ClientOptions{ + credential, err := newCredential(*d.cfg, policy.ClientOptions{ Cloud: cloudConfiguration, Telemetry: telemetry, }) @@ -255,7 +255,7 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { return &azureClient{}, err } - client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "azure_sd") + client, err := config_util.NewClientFromConfig(d.cfg.HTTPClientConfig, "azure_sd") if err != nil { return &azureClient{}, err } @@ -267,22 +267,22 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { }, } - c.vm, err = armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, credential, options) + c.vm, err = armcompute.NewVirtualMachinesClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.nic, err = armnetwork.NewInterfacesClient(cfg.SubscriptionID, credential, options) + c.nic, err = armnetwork.NewInterfacesClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(cfg.SubscriptionID, credential, options) + c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(cfg.SubscriptionID, credential, options) + c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } @@ -337,35 +337,27 @@ type virtualMachine struct { } // Create a new azureResource object from an ID string. -func newAzureResourceFromID(id string, logger log.Logger) (*arm.ResourceID, error) { +func newAzureResourceFromID(id string, logger *slog.Logger) (*arm.ResourceID, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } resourceID, err := arm.ParseResourceID(id) if err != nil { err := fmt.Errorf("invalid ID '%s': %w", id, err) - level.Error(logger).Log("err", err) + logger.Error("Failed to parse resource ID", "err", err) return &arm.ResourceID{}, err } return resourceID, nil } -func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { - defer level.Debug(d.logger).Log("msg", "Azure discovery completed") - - client, err := createAzureClient(*d.cfg, d.logger) - if err != nil { - d.metrics.failuresCount.Inc() - return nil, fmt.Errorf("could not create Azure client: %w", err) - } - +func (d *Discovery) refreshAzureClient(ctx context.Context, client client) ([]*targetgroup.Group, error) { machines, err := client.getVMs(ctx, d.cfg.ResourceGroup) if err != nil { d.metrics.failuresCount.Inc() return nil, fmt.Errorf("could not get virtual machines: %w", err) } - level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines)) + d.logger.Debug("Found virtual machines during Azure discovery.", "count", len(machines)) // Load the vms managed by scale sets. scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup) @@ -418,6 +410,18 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { return []*targetgroup.Group{&tg}, nil } +func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { + defer d.logger.Debug("Azure discovery completed") + + client, err := d.createAzureClient() + if err != nil { + d.metrics.failuresCount.Inc() + return nil, fmt.Errorf("could not create Azure client: %w", err) + } + + return d.refreshAzureClient(ctx, client) +} + func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualMachine) (model.LabelSet, error) { r, err := newAzureResourceFromID(vm.ID, d.logger) if err != nil { @@ -459,7 +463,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM } if err != nil { if errors.Is(err, errorNotFound) { - level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) + d.logger.Warn("Network interface does not exist", "name", nicID, "err", err) } else { return nil, err } @@ -480,7 +484,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM // yet support this. On deallocated machines, this value happens to be nil so it // is a cheap and easy way to determine if a machine is allocated or not. if networkInterface.Properties.Primary == nil { - level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name) + d.logger.Debug("Skipping deallocated virtual machine", "machine", vm.Name) return nil, nil } @@ -724,7 +728,7 @@ func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) { rs := time.Duration(random) * time.Second exptime := time.Duration(d.cfg.RefreshInterval*10) + rs d.cache.Set(nicID, netInt, cache.WithExpiration(exptime)) - level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds()) + d.logger.Debug("Adding nic", "nic", nicID, "time", exptime.Seconds()) } // getFromCache will get the network Interface for the specified nicID diff --git a/discovery/azure/azure_test.go b/discovery/azure/azure_test.go index 32dab66c8c..b905e9fcef 100644 --- a/discovery/azure/azure_test.go +++ b/discovery/azure/azure_test.go @@ -15,19 +15,34 @@ package azure import ( "context" - "fmt" + "log/slog" + "net/http" + "slices" + "strings" "testing" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + azfake "github.com/Azure/azure-sdk-for-go/sdk/azcore/fake" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" + fake "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5/fake" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" + fakenetwork "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4/fake" cache "github.com/Code-Hex/go-generics-cache" "github.com/Code-Hex/go-generics-cache/policy/lru" - "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" + + "github.com/prometheus/prometheus/discovery" + "github.com/prometheus/prometheus/discovery/targetgroup" ) +const defaultMockNetworkID string = "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}" + func TestMain(m *testing.M) { goleak.VerifyTestMain(m, goleak.IgnoreTopFunction("github.com/Code-Hex/go-generics-cache.(*janitor).run.func1"), @@ -96,13 +111,12 @@ func TestVMToLabelSet(t *testing.T) { vmType := "type" location := "westeurope" computerName := "computer_name" - networkID := "/subscriptions/00000000-0000-0000-0000-000000000000/network1" ipAddress := "10.20.30.40" primary := true networkProfile := armcompute.NetworkProfile{ NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ { - ID: &networkID, + ID: to.Ptr(defaultMockNetworkID), Properties: &armcompute.NetworkInterfaceReferenceProperties{Primary: &primary}, }, }, @@ -139,7 +153,7 @@ func TestVMToLabelSet(t *testing.T) { Location: location, OsType: "Linux", Tags: map[string]*string{}, - NetworkInterfaces: []string{networkID}, + NetworkInterfaces: []string{defaultMockNetworkID}, Size: size, } @@ -150,11 +164,12 @@ func TestVMToLabelSet(t *testing.T) { cfg := DefaultSDConfig d := &Discovery{ cfg: &cfg, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), cache: cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5))), } network := armnetwork.Interface{ - Name: &networkID, + Name: to.Ptr(defaultMockNetworkID), + ID: to.Ptr(defaultMockNetworkID), Properties: &armnetwork.InterfacePropertiesFormat{ Primary: &primary, IPConfigurations: []*armnetwork.InterfaceIPConfiguration{ @@ -164,9 +179,9 @@ func TestVMToLabelSet(t *testing.T) { }, }, } - client := &mockAzureClient{ - networkInterface: &network, - } + + client := createMockAzureClient(t, nil, nil, nil, network, nil) + labelSet, err := d.vmToLabelSet(context.Background(), client, actualVM) require.NoError(t, err) require.Len(t, labelSet, 11) @@ -475,34 +490,372 @@ func TestNewAzureResourceFromID(t *testing.T) { } } +func TestAzureRefresh(t *testing.T) { + tests := []struct { + scenario string + vmResp []armcompute.VirtualMachinesClientListAllResponse + vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse + vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse + interfacesResp armnetwork.Interface + expectedTG []*targetgroup.Group + }{ + { + scenario: "VMs, VMSS and VMSSVMs in Multiple Responses", + vmResp: []armcompute.VirtualMachinesClientListAllResponse{ + { + VirtualMachineListResult: armcompute.VirtualMachineListResult{ + Value: []*armcompute.VirtualMachine{ + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1"), to.Ptr("vm1")), + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2"), to.Ptr("vm2")), + }, + }, + }, + { + VirtualMachineListResult: armcompute.VirtualMachineListResult{ + Value: []*armcompute.VirtualMachine{ + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3"), to.Ptr("vm3")), + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4"), to.Ptr("vm4")), + }, + }, + }, + }, + vmssResp: []armcompute.VirtualMachineScaleSetsClientListAllResponse{ + { + VirtualMachineScaleSetListWithLinkResult: armcompute.VirtualMachineScaleSetListWithLinkResult{ + Value: []*armcompute.VirtualMachineScaleSet{ + { + ID: to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1"), + Name: to.Ptr("vmScaleSet1"), + Location: to.Ptr("australiaeast"), + Type: to.Ptr("Microsoft.Compute/virtualMachineScaleSets"), + }, + }, + }, + }, + }, + vmssvmResp: []armcompute.VirtualMachineScaleSetVMsClientListResponse{ + { + VirtualMachineScaleSetVMListResult: armcompute.VirtualMachineScaleSetVMListResult{ + Value: []*armcompute.VirtualMachineScaleSetVM{ + defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1"), to.Ptr("vmScaleSet1_vm1")), + defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2"), to.Ptr("vmScaleSet1_vm2")), + }, + }, + }, + }, + interfacesResp: armnetwork.Interface{ + ID: to.Ptr(defaultMockNetworkID), + Properties: &armnetwork.InterfacePropertiesFormat{ + Primary: to.Ptr(true), + IPConfigurations: []*armnetwork.InterfaceIPConfiguration{ + {Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{ + PrivateIPAddress: to.Ptr("10.0.0.1"), + }}, + }, + }, + }, + expectedTG: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm1", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm2", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm3", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm4", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vmScaleSet1_vm1", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_scale_set": "vmScaleSet1", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vmScaleSet1_vm2", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_scale_set": "vmScaleSet1", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + }, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.scenario, func(t *testing.T) { + t.Parallel() + azureSDConfig := &DefaultSDConfig + + azureClient := createMockAzureClient(t, tc.vmResp, tc.vmssResp, tc.vmssvmResp, tc.interfacesResp, nil) + + reg := prometheus.NewRegistry() + refreshMetrics := discovery.NewRefreshMetrics(reg) + metrics := azureSDConfig.NewDiscovererMetrics(reg, refreshMetrics) + + sd, err := NewDiscovery(azureSDConfig, nil, metrics) + require.NoError(t, err) + + tg, err := sd.refreshAzureClient(context.Background(), azureClient) + require.NoError(t, err) + + sortTargetsByID(tg[0].Targets) + require.Equal(t, tc.expectedTG, tg) + }) + } +} + type mockAzureClient struct { - networkInterface *armnetwork.Interface + azureClient } -var _ client = &mockAzureClient{} +func createMockAzureClient(t *testing.T, vmResp []armcompute.VirtualMachinesClientListAllResponse, vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse, vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse, interfaceResp armnetwork.Interface, logger *slog.Logger) client { + t.Helper() + mockVMServer := defaultMockVMServer(vmResp) + mockVMSSServer := defaultMockVMSSServer(vmssResp) + mockVMScaleSetVMServer := defaultMockVMSSVMServer(vmssvmResp) + mockInterfaceServer := defaultMockInterfaceServer(interfaceResp) + + vmClient, err := armcompute.NewVirtualMachinesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachinesServerTransport(&mockVMServer), + }, + }) + require.NoError(t, err) + + vmssClient, err := armcompute.NewVirtualMachineScaleSetsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachineScaleSetsServerTransport(&mockVMSSServer), + }, + }) + require.NoError(t, err) + + vmssvmClient, err := armcompute.NewVirtualMachineScaleSetVMsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachineScaleSetVMsServerTransport(&mockVMScaleSetVMServer), + }, + }) + require.NoError(t, err) -func (*mockAzureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) { - return nil, nil + interfacesClient, err := armnetwork.NewInterfacesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fakenetwork.NewInterfacesServerTransport(&mockInterfaceServer), + }, + }) + require.NoError(t, err) + + return &mockAzureClient{ + azureClient: azureClient{ + vm: vmClient, + vmss: vmssClient, + vmssvm: vmssvmClient, + nic: interfacesClient, + logger: logger, + }, + } } -func (*mockAzureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) { - return nil, nil +func defaultMockInterfaceServer(interfaceResp armnetwork.Interface) fakenetwork.InterfacesServer { + return fakenetwork.InterfacesServer{ + Get: func(ctx context.Context, resourceGroupName, networkInterfaceName string, options *armnetwork.InterfacesClientGetOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetResponse], errResp azfake.ErrorResponder) { + resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetResponse{Interface: interfaceResp}, nil) + return + }, + GetVirtualMachineScaleSetNetworkInterface: func(ctx context.Context, resourceGroupName, virtualMachineScaleSetName, virtualmachineIndex, networkInterfaceName string, options *armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse], errResp azfake.ErrorResponder) { + resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse{Interface: interfaceResp}, nil) + return + }, + } +} + +func defaultMockVMServer(vmResp []armcompute.VirtualMachinesClientListAllResponse) fake.VirtualMachinesServer { + return fake.VirtualMachinesServer{ + NewListAllPager: func(options *armcompute.VirtualMachinesClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachinesClientListAllResponse]) { + for _, page := range vmResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } +} + +func defaultMockVMSSServer(vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse) fake.VirtualMachineScaleSetsServer { + return fake.VirtualMachineScaleSetsServer{ + NewListAllPager: func(options *armcompute.VirtualMachineScaleSetsClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetsClientListAllResponse]) { + for _, page := range vmssResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } } -func (*mockAzureClient) getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) { - return nil, nil +func defaultMockVMSSVMServer(vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse) fake.VirtualMachineScaleSetVMsServer { + return fake.VirtualMachineScaleSetVMsServer{ + NewListPager: func(resourceGroupName, virtualMachineScaleSetName string, options *armcompute.VirtualMachineScaleSetVMsClientListOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetVMsClientListResponse]) { + for _, page := range vmssvmResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } } -func (m *mockAzureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) { - if networkInterfaceID == "" { - return nil, fmt.Errorf("parameter networkInterfaceID cannot be empty") +func defaultVMWithIDAndName(id, name *string) *armcompute.VirtualMachine { + vmSize := armcompute.VirtualMachineSizeTypes("size") + osType := armcompute.OperatingSystemTypesLinux + defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/testVM" + defaultName := "testVM" + + if id == nil { + id = &defaultID + } + if name == nil { + name = &defaultName + } + + return &armcompute.VirtualMachine{ + ID: id, + Name: name, + Type: to.Ptr("Microsoft.Compute/virtualMachines"), + Location: to.Ptr("australiaeast"), + Properties: &armcompute.VirtualMachineProperties{ + OSProfile: &armcompute.OSProfile{ + ComputerName: to.Ptr("computer_name"), + }, + StorageProfile: &armcompute.StorageProfile{ + OSDisk: &armcompute.OSDisk{ + OSType: &osType, + }, + }, + NetworkProfile: &armcompute.NetworkProfile{ + NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ + { + ID: to.Ptr(defaultMockNetworkID), + }, + }, + }, + HardwareProfile: &armcompute.HardwareProfile{ + VMSize: &vmSize, + }, + }, + Tags: map[string]*string{ + "prometheus": new(string), + }, } - return m.networkInterface, nil } -func (m *mockAzureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) { - if scaleSetName == "" { - return nil, fmt.Errorf("parameter virtualMachineScaleSetName cannot be empty") +func defaultVMSSVMWithIDAndName(id, name *string) *armcompute.VirtualMachineScaleSetVM { + vmSize := armcompute.VirtualMachineSizeTypes("size") + osType := armcompute.OperatingSystemTypesLinux + defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/testVMScaleSet/virtualMachines/testVM" + defaultName := "testVM" + + if id == nil { + id = &defaultID + } + if name == nil { + name = &defaultName + } + + return &armcompute.VirtualMachineScaleSetVM{ + ID: id, + Name: name, + Type: to.Ptr("Microsoft.Compute/virtualMachines"), + InstanceID: to.Ptr("123"), + Location: to.Ptr("australiaeast"), + Properties: &armcompute.VirtualMachineScaleSetVMProperties{ + OSProfile: &armcompute.OSProfile{ + ComputerName: to.Ptr("computer_name"), + }, + StorageProfile: &armcompute.StorageProfile{ + OSDisk: &armcompute.OSDisk{ + OSType: &osType, + }, + }, + NetworkProfile: &armcompute.NetworkProfile{ + NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ + {ID: to.Ptr(defaultMockNetworkID)}, + }, + }, + HardwareProfile: &armcompute.HardwareProfile{ + VMSize: &vmSize, + }, + }, + Tags: map[string]*string{ + "prometheus": new(string), + }, } - return m.networkInterface, nil +} + +func sortTargetsByID(targets []model.LabelSet) { + slices.SortFunc(targets, func(a, b model.LabelSet) int { + return strings.Compare(string(a["__meta_azure_machine_id"]), string(b["__meta_azure_machine_id"])) + }) } diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index bdc1fc8dce..fcae7b186f 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" consul "github.com/hashicorp/consul/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -113,8 +113,11 @@ type SDConfig struct { Services []string `yaml:"services,omitempty"` // A list of tags used to filter instances inside a service. Services must contain all tags in the list. ServiceTags []string `yaml:"tags,omitempty"` - // Desired node metadata. + // Desired node metadata. As of Consul 1.14, consider `filter` instead. NodeMeta map[string]string `yaml:"node_meta,omitempty"` + // Consul filter string + // See https://www.consul.io/api-docs/catalog#filtering-1, for syntax + Filter string `yaml:"filter,omitempty"` HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` } @@ -174,22 +177,23 @@ type Discovery struct { watchedServices []string // Set of services which will be discovered. watchedTags []string // Tags used to filter instances of a service. watchedNodeMeta map[string]string + watchedFilter string allowStale bool refreshInterval time.Duration finalizer func() - logger log.Logger + logger *slog.Logger metrics *consulMetrics } // NewDiscovery returns a new Discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*consulMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } wrapper, err := config.NewClientFromConfig(conf.HTTPClientConfig, "consul_sd", config.WithIdleConnTimeout(2*watchTimeout)) @@ -218,6 +222,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere watchedServices: conf.Services, watchedTags: conf.ServiceTags, watchedNodeMeta: conf.NodeMeta, + watchedFilter: conf.Filter, allowStale: conf.AllowStale, refreshInterval: time.Duration(conf.RefreshInterval), clientDatacenter: conf.Datacenter, @@ -282,7 +287,7 @@ func (d *Discovery) getDatacenter() error { info, err := d.client.Agent().Self() if err != nil { - level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) + d.logger.Error("Error retrieving datacenter name", "err", err) d.metrics.rpcFailuresCount.Inc() return err } @@ -290,12 +295,12 @@ func (d *Discovery) getDatacenter() error { dc, ok := info["Config"]["Datacenter"].(string) if !ok { err := fmt.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"]) - level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) + d.logger.Error("Error retrieving datacenter name", "err", err) return err } d.clientDatacenter = dc - d.logger = log.With(d.logger, "datacenter", dc) + d.logger = d.logger.With("datacenter", dc) return nil } @@ -361,13 +366,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { // entire list of services. func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) { catalog := d.client.Catalog() - level.Debug(d.logger).Log("msg", "Watching services", "tags", strings.Join(d.watchedTags, ",")) + d.logger.Debug("Watching services", "tags", strings.Join(d.watchedTags, ","), "filter", d.watchedFilter) opts := &consul.QueryOptions{ WaitIndex: *lastIndex, WaitTime: watchTimeout, AllowStale: d.allowStale, NodeMeta: d.watchedNodeMeta, + Filter: d.watchedFilter, } t0 := time.Now() srvs, meta, err := catalog.Services(opts.WithContext(ctx)) @@ -382,7 +388,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. } if err != nil { - level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err) + d.logger.Error("Error refreshing service list", "err", err) d.metrics.rpcFailuresCount.Inc() time.Sleep(retryInterval) return @@ -445,7 +451,7 @@ type consulService struct { discovery *Discovery client *consul.Client tagSeparator string - logger log.Logger + logger *slog.Logger rpcFailuresCount prometheus.Counter serviceRPCDuration prometheus.Observer } @@ -490,7 +496,7 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G // Get updates for a service. func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, health *consul.Health, lastIndex *uint64) { - level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ",")) + srv.logger.Debug("Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ",")) opts := &consul.QueryOptions{ WaitIndex: *lastIndex, @@ -513,7 +519,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr } if err != nil { - level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err) + srv.logger.Error("Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err) srv.rpcFailuresCount.Inc() time.Sleep(retryInterval) return diff --git a/discovery/consul/consul_test.go b/discovery/consul/consul_test.go index e3bc7938f5..cdbb80baba 100644 --- a/discovery/consul/consul_test.go +++ b/discovery/consul/consul_test.go @@ -21,10 +21,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" "gopkg.in/yaml.v2" @@ -252,6 +252,8 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { case "/v1/catalog/services?index=1&wait=120000ms": time.Sleep(5 * time.Second) response = ServicesTestAnswer + case "/v1/catalog/services?filter=NodeMeta.rack_name+%3D%3D+%222304%22&index=1&wait=120000ms": + response = ServicesTestAnswer default: t.Errorf("Unhandled consul call: %s", r.URL) } @@ -270,7 +272,7 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { } func newDiscovery(t *testing.T, config *SDConfig) *Discovery { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() metrics := NewTestMetrics(t, config, prometheus.NewRegistry()) @@ -369,6 +371,27 @@ func TestAllOptions(t *testing.T) { <-ch } +// Watch the test service with a specific tag and node-meta via Filter parameter. +func TestFilterOption(t *testing.T) { + stub, config := newServer(t) + defer stub.Close() + + config.Services = []string{"test"} + config.Filter = `NodeMeta.rack_name == "2304"` + config.Token = "fake-token" + + d := newDiscovery(t, config) + + ctx, cancel := context.WithCancel(context.Background()) + ch := make(chan []*targetgroup.Group) + go func() { + d.Run(ctx, ch) + close(ch) + }() + checkOneTarget(t, <-ch) + cancel() +} + func TestGetDatacenterShouldReturnError(t *testing.T) { for _, tc := range []struct { handler func(http.ResponseWriter, *http.Request) @@ -407,7 +430,7 @@ func TestGetDatacenterShouldReturnError(t *testing.T) { err = d.getDatacenter() // An error should be returned. - require.Equal(t, tc.errMessage, err.Error()) + require.EqualError(t, err, tc.errMessage) // Should still be empty. require.Equal(t, "", d.clientDatacenter) } diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go index ecee60cb1f..52f3a9c57a 100644 --- a/discovery/digitalocean/digitalocean.go +++ b/discovery/digitalocean/digitalocean.go @@ -16,6 +16,7 @@ package digitalocean import ( "context" "fmt" + "log/slog" "net" "net/http" "strconv" @@ -23,7 +24,6 @@ import ( "time" "github.com/digitalocean/godo" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -111,7 +111,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*digitaloceanMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/digitalocean/digitalocean_test.go b/discovery/digitalocean/digitalocean_test.go index 841b5ef977..a282225ac2 100644 --- a/discovery/digitalocean/digitalocean_test.go +++ b/discovery/digitalocean/digitalocean_test.go @@ -19,9 +19,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -57,7 +57,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdmock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/discovery.go b/discovery/discovery.go index a91faf6c86..c400de3632 100644 --- a/discovery/discovery.go +++ b/discovery/discovery.go @@ -15,9 +15,9 @@ package discovery import ( "context" + "log/slog" "reflect" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -47,7 +47,7 @@ type DiscovererMetrics interface { // DiscovererOptions provides options for a Discoverer. type DiscovererOptions struct { - Logger log.Logger + Logger *slog.Logger Metrics DiscovererMetrics @@ -109,7 +109,7 @@ func (c *Configs) SetDirectory(dir string) { // UnmarshalYAML implements yaml.Unmarshaler. func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error { - cfgTyp := getConfigType(configsType) + cfgTyp := reflect.StructOf(configFields) cfgPtr := reflect.New(cfgTyp) cfgVal := cfgPtr.Elem() @@ -124,7 +124,7 @@ func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error { // MarshalYAML implements yaml.Marshaler. func (c Configs) MarshalYAML() (interface{}, error) { - cfgTyp := getConfigType(configsType) + cfgTyp := reflect.StructOf(configFields) cfgPtr := reflect.New(cfgTyp) cfgVal := cfgPtr.Elem() diff --git a/util/testutil/logging.go b/discovery/discovery_test.go similarity index 57% rename from util/testutil/logging.go rename to discovery/discovery_test.go index db096ea234..af327195f2 100644 --- a/util/testutil/logging.go +++ b/discovery/discovery_test.go @@ -1,4 +1,4 @@ -// Copyright 2019 The Prometheus Authors +// Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -11,25 +11,26 @@ // See the License for the specific language governing permissions and // limitations under the License. -package testutil +package discovery import ( "testing" - "github.com/go-kit/log" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v2" ) -type logger struct { - t *testing.T -} - -// NewLogger returns a gokit compatible Logger which calls t.Log. -func NewLogger(t *testing.T) log.Logger { - return logger{t: t} -} +func TestConfigsCustomUnMarshalMarshal(t *testing.T) { + input := `static_configs: +- targets: + - foo:1234 + - bar:4321 +` + cfg := &Configs{} + err := yaml.UnmarshalStrict([]byte(input), cfg) + require.NoError(t, err) -// Log implements log.Logger. -func (t logger) Log(keyvals ...interface{}) error { - t.t.Log(keyvals...) - return nil + output, err := yaml.Marshal(cfg) + require.NoError(t, err) + require.Equal(t, input, string(output)) } diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index 314c3d38cd..5de7f64886 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/miekg/dns" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -111,21 +111,21 @@ type Discovery struct { names []string port int qtype uint16 - logger log.Logger + logger *slog.Logger metrics *dnsMetrics - lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) + lookupFn func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*dnsMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } qtype := dns.TypeSRV @@ -174,7 +174,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { for _, name := range d.names { go func(n string) { if err := d.refreshOne(ctx, n, ch); err != nil && !errors.Is(err, context.Canceled) { - level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err) + d.logger.Error("Error refreshing DNS targets", "err", err) } wg.Done() }(name) @@ -238,7 +238,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ // CNAME responses can occur with "Type: A" dns_sd_config requests. continue default: - level.Warn(d.logger).Log("msg", "Invalid record", "record", record) + d.logger.Warn("Invalid record", "record", record) continue } tg.Targets = append(tg.Targets, model.LabelSet{ @@ -288,7 +288,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ // error will be generic-looking, because trying to return all the errors // returned by the combination of all name permutations and servers is a // nightmare. -func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { +func lookupWithSearchPath(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { conf, err := dns.ClientConfigFromFile(resolvConf) if err != nil { return nil, fmt.Errorf("could not load resolv.conf: %w", err) @@ -337,14 +337,14 @@ func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Ms // A non-viable answer is "anything else", which encompasses both various // system-level problems (like network timeouts) and also // valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc). -func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger log.Logger) (*dns.Msg, error) { +func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger *slog.Logger) (*dns.Msg, error) { client := &dns.Client{} for _, server := range conf.Servers { servAddr := net.JoinHostPort(server, conf.Port) msg, err := askServerForName(name, qtype, client, servAddr, true) if err != nil { - level.Warn(logger).Log("msg", "DNS resolution failed", "server", server, "name", name, "err", err) + logger.Warn("DNS resolution failed", "server", server, "name", name, "err", err) continue } diff --git a/discovery/dns/dns_test.go b/discovery/dns/dns_test.go index 33a976827d..96bb32491f 100644 --- a/discovery/dns/dns_test.go +++ b/discovery/dns/dns_test.go @@ -16,11 +16,11 @@ package dns import ( "context" "fmt" + "log/slog" "net" "testing" "time" - "github.com/go-kit/log" "github.com/miekg/dns" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -40,7 +40,7 @@ func TestDNS(t *testing.T) { testCases := []struct { name string config SDConfig - lookup func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) + lookup func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) expected []*targetgroup.Group }{ @@ -52,7 +52,7 @@ func TestDNS(t *testing.T) { Port: 80, Type: "A", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return nil, fmt.Errorf("some error") }, expected: []*targetgroup.Group{}, @@ -65,7 +65,7 @@ func TestDNS(t *testing.T) { Port: 80, Type: "A", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.A{A: net.IPv4(192, 0, 2, 2)}, @@ -97,7 +97,7 @@ func TestDNS(t *testing.T) { Port: 80, Type: "AAAA", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.AAAA{AAAA: net.IPv6loopback}, @@ -128,7 +128,7 @@ func TestDNS(t *testing.T) { Type: "SRV", RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.SRV{Port: 3306, Target: "db1.example.com."}, @@ -167,7 +167,7 @@ func TestDNS(t *testing.T) { Names: []string{"_mysql._tcp.db.example.com."}, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.SRV{Port: 3306, Target: "db1.example.com."}, @@ -198,7 +198,7 @@ func TestDNS(t *testing.T) { Names: []string{"_mysql._tcp.db.example.com."}, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{}, nil }, expected: []*targetgroup.Group{ @@ -215,7 +215,7 @@ func TestDNS(t *testing.T) { Port: 25, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.MX{Preference: 0, Mx: "smtp1.example.com."}, diff --git a/discovery/eureka/eureka.go b/discovery/eureka/eureka.go index 779c081aee..5087346486 100644 --- a/discovery/eureka/eureka.go +++ b/discovery/eureka/eureka.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "net/http" "net/url" "strconv" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -126,7 +126,7 @@ type Discovery struct { } // NewDiscovery creates a new Eureka discovery for the given role. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*eurekaMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/file/file.go b/discovery/file/file.go index e7e9d0870f..1c36b254cc 100644 --- a/discovery/file/file.go +++ b/discovery/file/file.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "strings" @@ -26,12 +27,11 @@ import ( "time" "github.com/fsnotify/fsnotify" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery" @@ -175,20 +175,20 @@ type Discovery struct { // and how many target groups they contained. // This is used to detect deleted target groups. lastRefresh map[string]int - logger log.Logger + logger *slog.Logger metrics *fileMetrics } // NewDiscovery returns a new file discovery for the given paths. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { fm, ok := metrics.(*fileMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } disc := &Discovery{ @@ -210,7 +210,7 @@ func (d *Discovery) listFiles() []string { for _, p := range d.paths { files, err := filepath.Glob(p) if err != nil { - level.Error(d.logger).Log("msg", "Error expanding glob", "glob", p, "err", err) + d.logger.Error("Error expanding glob", "glob", p, "err", err) continue } paths = append(paths, files...) @@ -231,7 +231,7 @@ func (d *Discovery) watchFiles() { p = "./" } if err := d.watcher.Add(p); err != nil { - level.Error(d.logger).Log("msg", "Error adding file watch", "path", p, "err", err) + d.logger.Error("Error adding file watch", "path", p, "err", err) } } } @@ -240,7 +240,7 @@ func (d *Discovery) watchFiles() { func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { watcher, err := fsnotify.NewWatcher() if err != nil { - level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err) + d.logger.Error("Error adding file watcher", "err", err) d.metrics.fileWatcherErrorsCount.Inc() return } @@ -280,7 +280,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { case err := <-d.watcher.Errors: if err != nil { - level.Error(d.logger).Log("msg", "Error watching file", "err", err) + d.logger.Error("Error watching file", "err", err) } } } @@ -300,7 +300,7 @@ func (d *Discovery) deleteTimestamp(filename string) { // stop shuts down the file watcher. func (d *Discovery) stop() { - level.Debug(d.logger).Log("msg", "Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths)) + d.logger.Debug("Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths)) done := make(chan struct{}) defer close(done) @@ -320,10 +320,10 @@ func (d *Discovery) stop() { } }() if err := d.watcher.Close(); err != nil { - level.Error(d.logger).Log("msg", "Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err) + d.logger.Error("Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err) } - level.Debug(d.logger).Log("msg", "File discovery stopped") + d.logger.Debug("File discovery stopped") } // refresh reads all files matching the discovery's patterns and sends the respective @@ -339,7 +339,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) if err != nil { d.metrics.fileSDReadErrorsCount.Inc() - level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err) + d.logger.Error("Error reading file", "path", p, "err", err) // Prevent deletion down below. ref[p] = d.lastRefresh[p] continue @@ -356,7 +356,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) for f, n := range d.lastRefresh { m, ok := ref[f] if !ok || n > m { - level.Debug(d.logger).Log("msg", "file_sd refresh found file that should be removed", "file", f) + d.logger.Debug("file_sd refresh found file that should be removed", "file", f) d.deleteTimestamp(f) for i := m; i < n; i++ { select { diff --git a/discovery/gce/gce.go b/discovery/gce/gce.go index 15f32dd247..a509a144e1 100644 --- a/discovery/gce/gce.go +++ b/discovery/gce/gce.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "golang.org/x/oauth2/google" @@ -129,7 +129,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*gceMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/hetzner/hcloud.go b/discovery/hetzner/hcloud.go index df56f94c5f..ba64250c0f 100644 --- a/discovery/hetzner/hcloud.go +++ b/discovery/hetzner/hcloud.go @@ -15,12 +15,12 @@ package hetzner import ( "context" + "log/slog" "net" "net/http" "strconv" "time" - "github.com/go-kit/log" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -58,7 +58,7 @@ type hcloudDiscovery struct { } // newHcloudDiscovery returns a new hcloudDiscovery which periodically refreshes its targets. -func newHcloudDiscovery(conf *SDConfig, _ log.Logger) (*hcloudDiscovery, error) { +func newHcloudDiscovery(conf *SDConfig, _ *slog.Logger) (*hcloudDiscovery, error) { d := &hcloudDiscovery{ port: conf.Port, } diff --git a/discovery/hetzner/hcloud_test.go b/discovery/hetzner/hcloud_test.go index 10b799037a..fa8291625a 100644 --- a/discovery/hetzner/hcloud_test.go +++ b/discovery/hetzner/hcloud_test.go @@ -18,8 +18,8 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -43,7 +43,7 @@ func TestHCloudSDRefresh(t *testing.T) { cfg.HTTPClientConfig.BearerToken = hcloudTestToken cfg.hcloudEndpoint = suite.Mock.Endpoint() - d, err := newHcloudDiscovery(&cfg, log.NewNopLogger()) + d, err := newHcloudDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) diff --git a/discovery/hetzner/hetzner.go b/discovery/hetzner/hetzner.go index 69c823d382..980c197d77 100644 --- a/discovery/hetzner/hetzner.go +++ b/discovery/hetzner/hetzner.go @@ -17,9 +17,9 @@ import ( "context" "errors" "fmt" + "log/slog" "time" - "github.com/go-kit/log" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -135,7 +135,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*hetznerMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") @@ -157,7 +157,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere ), nil } -func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) { switch conf.Role { case HetznerRoleHcloud: if conf.hcloudEndpoint == "" { diff --git a/discovery/hetzner/robot.go b/discovery/hetzner/robot.go index 516470b05a..958f8f710f 100644 --- a/discovery/hetzner/robot.go +++ b/discovery/hetzner/robot.go @@ -18,13 +18,13 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -51,7 +51,7 @@ type robotDiscovery struct { } // newRobotDiscovery returns a new robotDiscovery which periodically refreshes its targets. -func newRobotDiscovery(conf *SDConfig, _ log.Logger) (*robotDiscovery, error) { +func newRobotDiscovery(conf *SDConfig, _ *slog.Logger) (*robotDiscovery, error) { d := &robotDiscovery{ port: conf.Port, endpoint: conf.robotEndpoint, diff --git a/discovery/hetzner/robot_test.go b/discovery/hetzner/robot_test.go index abee5fea90..2618bd097c 100644 --- a/discovery/hetzner/robot_test.go +++ b/discovery/hetzner/robot_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -42,7 +42,7 @@ func TestRobotSDRefresh(t *testing.T) { cfg.HTTPClientConfig.BasicAuth = &config.BasicAuth{Username: robotTestUsername, Password: robotTestPassword} cfg.robotEndpoint = suite.Mock.Endpoint() - d, err := newRobotDiscovery(&cfg, log.NewNopLogger()) + d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) @@ -91,12 +91,11 @@ func TestRobotSDRefreshHandleError(t *testing.T) { cfg := DefaultSDConfig cfg.robotEndpoint = suite.Mock.Endpoint() - d, err := newRobotDiscovery(&cfg, log.NewNopLogger()) + d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) - require.Error(t, err) - require.Equal(t, "non 2xx status '401' response during hetzner service discovery with role robot", err.Error()) + require.EqualError(t, err, "non 2xx status '401' response during hetzner service discovery with role robot") require.Empty(t, targetGroups) } diff --git a/discovery/http/http.go b/discovery/http/http.go index ff76fd7627..65404694c4 100644 --- a/discovery/http/http.go +++ b/discovery/http/http.go @@ -19,17 +19,18 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "net/url" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/discovery" @@ -40,8 +41,8 @@ import ( var ( // DefaultSDConfig is the default HTTP SD configuration. DefaultSDConfig = SDConfig{ - RefreshInterval: model.Duration(60 * time.Second), HTTPClientConfig: config.DefaultHTTPClientConfig, + RefreshInterval: model.Duration(60 * time.Second), } userAgent = fmt.Sprintf("Prometheus/%s", version.Version) matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`) @@ -114,14 +115,14 @@ type Discovery struct { } // NewDiscovery returns a new HTTP discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*httpMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http", clientOpts...) diff --git a/discovery/http/http_test.go b/discovery/http/http_test.go index 0cafe035dc..9d3a3fb5e7 100644 --- a/discovery/http/http_test.go +++ b/discovery/http/http_test.go @@ -21,11 +21,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -49,7 +49,7 @@ func TestHTTPValidRefresh(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -94,7 +94,7 @@ func TestHTTPInvalidCode(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -123,7 +123,7 @@ func TestHTTPInvalidFormat(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -442,7 +442,7 @@ func TestSourceDisappeared(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) for _, test := range cases { ctx := context.Background() diff --git a/discovery/ionos/ionos.go b/discovery/ionos/ionos.go index c8b4f7f8e5..1aa21667e3 100644 --- a/discovery/ionos/ionos.go +++ b/discovery/ionos/ionos.go @@ -16,9 +16,9 @@ package ionos import ( "errors" "fmt" + "log/slog" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -43,7 +43,7 @@ func init() { type Discovery struct{} // NewDiscovery returns a new refresh.Discovery for IONOS Cloud. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*ionosMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/ionos/server.go b/discovery/ionos/server.go index a850fbbfb4..18e89b1d43 100644 --- a/discovery/ionos/server.go +++ b/discovery/ionos/server.go @@ -16,13 +16,13 @@ package ionos import ( "context" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" ionoscloud "github.com/ionos-cloud/sdk-go/v6" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -60,7 +60,7 @@ type serverDiscovery struct { datacenterID string } -func newServerDiscovery(conf *SDConfig, _ log.Logger) (*serverDiscovery, error) { +func newServerDiscovery(conf *SDConfig, _ *slog.Logger) (*serverDiscovery, error) { d := &serverDiscovery{ port: conf.Port, datacenterID: conf.DatacenterID, diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index c7a60ae6d3..14d3bc7a99 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -33,7 +33,7 @@ import ( // Endpoints discovers new endpoint targets. type Endpoints struct { - logger log.Logger + logger *slog.Logger endpointsInf cache.SharedIndexInformer serviceInf cache.SharedInformer @@ -49,9 +49,9 @@ type Endpoints struct { } // NewEndpoints returns a new endpoints discovery. -func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints { +func NewEndpoints(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd) @@ -92,26 +92,23 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding endpoints event handler.", "err", err) + l.Error("Error adding endpoints event handler.", "err", err) } serviceUpdate := func(o interface{}) { svc, err := convertToService(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err) + e.logger.Error("converting to Service object failed", "err", err) return } - ep := &apiv1.Endpoints{} - ep.Namespace = svc.Namespace - ep.Name = svc.Name - obj, exists, err := e.endpointsStore.Get(ep) + obj, exists, err := e.endpointsStore.GetByKey(namespacedName(svc.Namespace, svc.Name)) if exists && err == nil { e.enqueue(obj.(*apiv1.Endpoints)) } if err != nil { - level.Error(e.logger).Log("msg", "retrieving endpoints failed", "err", err) + e.logger.Error("retrieving endpoints failed", "err", err) } } _, err = e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -131,7 +128,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } _, err = e.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: func(old, cur interface{}) { @@ -154,7 +151,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } if e.withNodeMetadata { _, err = e.nodeInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -167,12 +164,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca e.enqueueNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - e.enqueueNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + e.enqueueNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } } @@ -182,7 +182,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca func (e *Endpoints) enqueueNode(nodeName string) { endpoints, err := e.endpointsInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err) + e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err) return } @@ -194,7 +194,7 @@ func (e *Endpoints) enqueueNode(nodeName string) { func (e *Endpoints) enqueuePod(podNamespacedName string) { endpoints, err := e.endpointsInf.GetIndexer().ByIndex(podIndex, podNamespacedName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for pod", "pod", podNamespacedName, "err", err) + e.logger.Error("Error getting endpoints for pod", "pod", podNamespacedName, "err", err) return } @@ -223,7 +223,7 @@ func (e *Endpoints) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache") + e.logger.Error("endpoints informer unable to sync cache") } return } @@ -247,13 +247,13 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group) namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - level.Error(e.logger).Log("msg", "splitting key failed", "key", key) + e.logger.Error("splitting key failed", "key", key) return true } o, exists, err := e.endpointsStore.GetByKey(key) if err != nil { - level.Error(e.logger).Log("msg", "getting object from store failed", "key", key) + e.logger.Error("getting object from store failed", "key", key) return true } if !exists { @@ -262,7 +262,7 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group) } eps, err := convertToEndpoints(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Endpoints object failed", "err", err) + e.logger.Error("converting to Endpoints object failed", "err", err) return true } send(ctx, ch, e.buildEndpoints(eps)) @@ -361,16 +361,19 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { target = target.Merge(podLabels(pod)) // Attach potential container port labels matching the endpoint port. - for _, c := range pod.Spec.Containers { + containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { if port.Port == cport.ContainerPort { ports := strconv.FormatUint(uint64(port.Port), 10) + isInit := i >= len(pod.Spec.Containers) target[podContainerNameLabel] = lv(c.Name) target[podContainerImageLabel] = lv(c.Image) target[podContainerPortNameLabel] = lv(cport.Name) target[podContainerPortNumberLabel] = lv(ports) target[podContainerPortProtocolLabel] = lv(string(port.Protocol)) + target[podContainerIsInit] = lv(strconv.FormatBool(isInit)) break } } @@ -397,10 +400,10 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { v := eps.Labels[apiv1.EndpointsOverCapacity] if v == "truncated" { - level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name) + e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name) } if v == "warning" { - level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name) + e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name) } // For all seen pods, check all container ports. If they were not covered @@ -411,7 +414,8 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { continue } - for _, c := range pe.pod.Spec.Containers { + containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { hasSeenPort := func() bool { for _, eport := range pe.servicePorts { @@ -428,6 +432,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10)) ports := strconv.FormatUint(uint64(cport.ContainerPort), 10) + isInit := i >= len(pe.pod.Spec.Containers) target := model.LabelSet{ model.AddressLabel: lv(a), podContainerNameLabel: lv(c.Name), @@ -435,6 +440,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { podContainerPortNameLabel: lv(cport.Name), podContainerPortNumberLabel: lv(ports), podContainerPortProtocolLabel: lv(string(cport.Protocol)), + podContainerIsInit: lv(strconv.FormatBool(isInit)), } tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod))) } @@ -448,13 +454,10 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { - level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) + e.logger.Error("resolving pod ref failed", "err", err) return nil } if !exists { @@ -464,31 +467,27 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { } func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { - svc := &apiv1.Service{} - svc.Namespace = ns - svc.Name = name - - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { - level.Error(e.logger).Log("msg", "retrieving service failed", "err", err) + e.logger.Error("retrieving service failed", "err", err) return } if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } -func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger log.Logger, nodeName *string) model.LabelSet { +func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger *slog.Logger, nodeName *string) model.LabelSet { if nodeName == nil { return tg } obj, exists, err := nodeInf.GetStore().GetByKey(*nodeName) if err != nil { - level.Error(logger).Log("msg", "Error getting node", "node", *nodeName, "err", err) + logger.Error("Error getting node", "node", *nodeName, "err", err) return tg } diff --git a/discovery/kubernetes/endpoints_test.go b/discovery/kubernetes/endpoints_test.go index 3ea98c5db9..a1ac6e5d48 100644 --- a/discovery/kubernetes/endpoints_test.go +++ b/discovery/kubernetes/endpoints_test.go @@ -18,10 +18,12 @@ import ( "testing" "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -244,6 +246,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, { "__address__": "1.2.3.4:9001", @@ -259,6 +262,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9001", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -821,6 +825,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1078,6 +1083,7 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1089,3 +1095,186 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) { }, }.Run(t) } + +func TestEndpointsDiscoverySidecarContainer(t *testing.T) { + objs := []runtime.Object{ + &v1.Endpoints{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testsidecar", + Namespace: "default", + }, + Subsets: []v1.EndpointSubset{ + { + Addresses: []v1.EndpointAddress{ + { + IP: "4.3.2.1", + TargetRef: &v1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "default", + }, + }, + }, + Ports: []v1.EndpointPort{ + { + Name: "testport", + Port: 9000, + Protocol: v1.ProtocolTCP, + }, + { + Name: "initport", + Port: 9111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + }, + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + Namespace: "default", + UID: types.UID("deadbeef"), + }, + Spec: v1.PodSpec{ + NodeName: "testnode", + InitContainers: []v1.Container{ + { + Name: "ic1", + Image: "ic1:latest", + Ports: []v1.ContainerPort{ + { + Name: "initport", + ContainerPort: 1111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + { + Name: "ic2", + Image: "ic2:latest", + Ports: []v1.ContainerPort{ + { + Name: "initport", + ContainerPort: 9111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + Containers: []v1.Container{ + { + Name: "c1", + Image: "c1:latest", + Ports: []v1.ContainerPort{ + { + Name: "mainport", + ContainerPort: 9000, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + }, + Status: v1.PodStatus{ + HostIP: "2.3.4.5", + PodIP: "4.3.2.1", + }, + }, + } + + n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, objs...) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 1, + expectedRes: map[string]*targetgroup.Group{ + "endpoints/default/testsidecar": { + Targets: []model.LabelSet{ + { + "__address__": "4.3.2.1:9000", + "__meta_kubernetes_endpoint_address_target_kind": "Pod", + "__meta_kubernetes_endpoint_address_target_name": "testpod", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + "__meta_kubernetes_pod_container_image": "c1:latest", + "__meta_kubernetes_pod_container_name": "c1", + "__meta_kubernetes_pod_container_port_name": "mainport", + "__meta_kubernetes_pod_container_port_number": "9000", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", + }, + { + "__address__": "4.3.2.1:9111", + "__meta_kubernetes_endpoint_address_target_kind": "Pod", + "__meta_kubernetes_endpoint_address_target_name": "testpod", + "__meta_kubernetes_endpoint_port_name": "initport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + "__meta_kubernetes_pod_container_image": "ic2:latest", + "__meta_kubernetes_pod_container_name": "ic2", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "9111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + { + "__address__": "4.3.2.1:1111", + "__meta_kubernetes_pod_container_image": "ic1:latest", + "__meta_kubernetes_pod_container_name": "ic1", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "1111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_endpoints_name": "testsidecar", + "__meta_kubernetes_namespace": "default", + }, + Source: "endpoints/default/testsidecar", + }, + }, + }.Run(t) +} + +func BenchmarkResolvePodRef(b *testing.B) { + indexer := cache.NewIndexer(cache.DeletionHandlingMetaNamespaceKeyFunc, nil) + e := &Endpoints{ + podStore: indexer, + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + p := e.resolvePodRef(&v1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "foo", + }) + require.Nil(b, p) + } +} diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index 7a70255c12..45bc43eff9 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -17,16 +17,15 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" v1 "k8s.io/api/discovery/v1" - "k8s.io/api/discovery/v1beta1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -36,7 +35,7 @@ import ( // EndpointSlice discovers new endpoint targets. type EndpointSlice struct { - logger log.Logger + logger *slog.Logger endpointSliceInf cache.SharedIndexInformer serviceInf cache.SharedInformer @@ -52,9 +51,9 @@ type EndpointSlice struct { } // NewEndpointSlice returns a new endpointslice discovery. -func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice { +func NewEndpointSlice(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd) @@ -93,23 +92,23 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod }, }) if err != nil { - level.Error(l).Log("msg", "Error adding endpoint slices event handler.", "err", err) + l.Error("Error adding endpoint slices event handler.", "err", err) } serviceUpdate := func(o interface{}) { svc, err := convertToService(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err) + e.logger.Error("converting to Service object failed", "err", err) return } - // TODO(brancz): use cache.Indexer to index endpoints by - // disv1beta1.LabelServiceName so this operation doesn't have to - // iterate over all endpoint objects. + // TODO(brancz): use cache.Indexer to index endpointslices by + // LabelServiceName so this operation doesn't have to iterate over all + // endpoint objects. for _, obj := range e.endpointSliceStore.List() { esa, err := e.getEndpointSliceAdaptor(obj) if err != nil { - level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err) + e.logger.Error("converting to EndpointSlice object failed", "err", err) continue } if lv, exists := esa.labels()[esa.labelServiceName()]; exists && lv == svc.Name { @@ -132,7 +131,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } if e.withNodeMetadata { @@ -146,12 +145,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod e.enqueueNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - e.enqueueNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + e.enqueueNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } } @@ -161,7 +163,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod func (e *EndpointSlice) enqueueNode(nodeName string) { endpoints, err := e.endpointSliceInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err) + e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err) return } @@ -189,7 +191,7 @@ func (e *EndpointSlice) Run(ctx context.Context, ch chan<- []*targetgroup.Group) } if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(e.logger).Log("msg", "endpointslice informer unable to sync cache") + e.logger.Error("endpointslice informer unable to sync cache") } return } @@ -213,13 +215,13 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - level.Error(e.logger).Log("msg", "splitting key failed", "key", key) + e.logger.Error("splitting key failed", "key", key) return true } o, exists, err := e.endpointSliceStore.GetByKey(key) if err != nil { - level.Error(e.logger).Log("msg", "getting object from store failed", "key", key) + e.logger.Error("getting object from store failed", "key", key) return true } if !exists { @@ -229,7 +231,7 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr esa, err := e.getEndpointSliceAdaptor(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err) + e.logger.Error("converting to EndpointSlice object failed", "err", err) return true } @@ -241,8 +243,6 @@ func (e *EndpointSlice) getEndpointSliceAdaptor(o interface{}) (endpointSliceAda switch endpointSlice := o.(type) { case *v1.EndpointSlice: return newEndpointSliceAdaptorFromV1(endpointSlice), nil - case *v1beta1.EndpointSlice: - return newEndpointSliceAdaptorFromV1beta1(endpointSlice), nil default: return nil, fmt.Errorf("received unexpected object: %v", o) } @@ -380,19 +380,23 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou target = target.Merge(podLabels(pod)) // Attach potential container port labels matching the endpoint port. - for _, c := range pod.Spec.Containers { + containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { if port.port() == nil { continue } + if *port.port() == cport.ContainerPort { ports := strconv.FormatUint(uint64(*port.port()), 10) + isInit := i >= len(pod.Spec.Containers) target[podContainerNameLabel] = lv(c.Name) target[podContainerImageLabel] = lv(c.Image) target[podContainerPortNameLabel] = lv(cport.Name) target[podContainerPortNumberLabel] = lv(ports) target[podContainerPortProtocolLabel] = lv(string(cport.Protocol)) + target[podContainerIsInit] = lv(strconv.FormatBool(isInit)) break } } @@ -420,7 +424,8 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou continue } - for _, c := range pe.pod.Spec.Containers { + containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { hasSeenPort := func() bool { for _, eport := range pe.servicePorts { @@ -440,6 +445,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10)) ports := strconv.FormatUint(uint64(cport.ContainerPort), 10) + isInit := i >= len(pe.pod.Spec.Containers) target := model.LabelSet{ model.AddressLabel: lv(a), podContainerNameLabel: lv(c.Name), @@ -447,6 +453,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou podContainerPortNameLabel: lv(cport.Name), podContainerPortNumberLabel: lv(ports), podContainerPortProtocolLabel: lv(string(cport.Protocol)), + podContainerIsInit: lv(strconv.FormatBool(isInit)), } tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod))) } @@ -460,13 +467,10 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { - level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) + e.logger.Error("resolving pod ref failed", "err", err) return nil } if !exists { @@ -477,27 +481,27 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgroup.Group) { var ( - svc = &apiv1.Service{} found bool + name string ) - svc.Namespace = esa.namespace() + ns := esa.namespace() // Every EndpointSlice object has the Service they belong to in the // kubernetes.io/service-name label. - svc.Name, found = esa.labels()[esa.labelServiceName()] + name, found = esa.labels()[esa.labelServiceName()] if !found { return } - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { - level.Error(e.logger).Log("msg", "retrieving service failed", "err", err) + e.logger.Error("retrieving service failed", "err", err) return } if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } diff --git a/discovery/kubernetes/endpointslice_adaptor.go b/discovery/kubernetes/endpointslice_adaptor.go index edd64fcb32..81243e2ce0 100644 --- a/discovery/kubernetes/endpointslice_adaptor.go +++ b/discovery/kubernetes/endpointslice_adaptor.go @@ -16,7 +16,6 @@ package kubernetes import ( corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/discovery/v1" - "k8s.io/api/discovery/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -109,59 +108,6 @@ func (e *endpointSliceAdaptorV1) labelServiceName() string { return v1.LabelServiceName } -// Adaptor for k8s.io/api/discovery/v1beta1. -type endpointSliceAdaptorV1Beta1 struct { - endpointSlice *v1beta1.EndpointSlice -} - -func newEndpointSliceAdaptorFromV1beta1(endpointSlice *v1beta1.EndpointSlice) endpointSliceAdaptor { - return &endpointSliceAdaptorV1Beta1{endpointSlice: endpointSlice} -} - -func (e *endpointSliceAdaptorV1Beta1) get() interface{} { - return e.endpointSlice -} - -func (e *endpointSliceAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta { - return e.endpointSlice.ObjectMeta -} - -func (e *endpointSliceAdaptorV1Beta1) name() string { - return e.endpointSlice.Name -} - -func (e *endpointSliceAdaptorV1Beta1) namespace() string { - return e.endpointSlice.Namespace -} - -func (e *endpointSliceAdaptorV1Beta1) addressType() string { - return string(e.endpointSlice.AddressType) -} - -func (e *endpointSliceAdaptorV1Beta1) endpoints() []endpointSliceEndpointAdaptor { - eps := make([]endpointSliceEndpointAdaptor, 0, len(e.endpointSlice.Endpoints)) - for i := 0; i < len(e.endpointSlice.Endpoints); i++ { - eps = append(eps, newEndpointSliceEndpointAdaptorFromV1beta1(e.endpointSlice.Endpoints[i])) - } - return eps -} - -func (e *endpointSliceAdaptorV1Beta1) ports() []endpointSlicePortAdaptor { - ports := make([]endpointSlicePortAdaptor, 0, len(e.endpointSlice.Ports)) - for i := 0; i < len(e.endpointSlice.Ports); i++ { - ports = append(ports, newEndpointSlicePortAdaptorFromV1beta1(e.endpointSlice.Ports[i])) - } - return ports -} - -func (e *endpointSliceAdaptorV1Beta1) labels() map[string]string { - return e.endpointSlice.Labels -} - -func (e *endpointSliceAdaptorV1Beta1) labelServiceName() string { - return v1beta1.LabelServiceName -} - type endpointSliceEndpointAdaptorV1 struct { endpoint v1.Endpoint } @@ -218,62 +164,6 @@ func (e *endpointSliceEndpointConditionsAdaptorV1) terminating() *bool { return e.endpointConditions.Terminating } -type endpointSliceEndpointAdaptorV1beta1 struct { - endpoint v1beta1.Endpoint -} - -func newEndpointSliceEndpointAdaptorFromV1beta1(endpoint v1beta1.Endpoint) endpointSliceEndpointAdaptor { - return &endpointSliceEndpointAdaptorV1beta1{endpoint: endpoint} -} - -func (e *endpointSliceEndpointAdaptorV1beta1) addresses() []string { - return e.endpoint.Addresses -} - -func (e *endpointSliceEndpointAdaptorV1beta1) hostname() *string { - return e.endpoint.Hostname -} - -func (e *endpointSliceEndpointAdaptorV1beta1) nodename() *string { - return e.endpoint.NodeName -} - -func (e *endpointSliceEndpointAdaptorV1beta1) zone() *string { - return nil -} - -func (e *endpointSliceEndpointAdaptorV1beta1) conditions() endpointSliceEndpointConditionsAdaptor { - return newEndpointSliceEndpointConditionsAdaptorFromV1beta1(e.endpoint.Conditions) -} - -func (e *endpointSliceEndpointAdaptorV1beta1) targetRef() *corev1.ObjectReference { - return e.endpoint.TargetRef -} - -func (e *endpointSliceEndpointAdaptorV1beta1) topology() map[string]string { - return e.endpoint.Topology -} - -type endpointSliceEndpointConditionsAdaptorV1beta1 struct { - endpointConditions v1beta1.EndpointConditions -} - -func newEndpointSliceEndpointConditionsAdaptorFromV1beta1(endpointConditions v1beta1.EndpointConditions) endpointSliceEndpointConditionsAdaptor { - return &endpointSliceEndpointConditionsAdaptorV1beta1{endpointConditions: endpointConditions} -} - -func (e *endpointSliceEndpointConditionsAdaptorV1beta1) ready() *bool { - return e.endpointConditions.Ready -} - -func (e *endpointSliceEndpointConditionsAdaptorV1beta1) serving() *bool { - return e.endpointConditions.Serving -} - -func (e *endpointSliceEndpointConditionsAdaptorV1beta1) terminating() *bool { - return e.endpointConditions.Terminating -} - type endpointSlicePortAdaptorV1 struct { endpointPort v1.EndpointPort } @@ -298,28 +188,3 @@ func (e *endpointSlicePortAdaptorV1) protocol() *string { func (e *endpointSlicePortAdaptorV1) appProtocol() *string { return e.endpointPort.AppProtocol } - -type endpointSlicePortAdaptorV1beta1 struct { - endpointPort v1beta1.EndpointPort -} - -func newEndpointSlicePortAdaptorFromV1beta1(port v1beta1.EndpointPort) endpointSlicePortAdaptor { - return &endpointSlicePortAdaptorV1beta1{endpointPort: port} -} - -func (e *endpointSlicePortAdaptorV1beta1) name() *string { - return e.endpointPort.Name -} - -func (e *endpointSlicePortAdaptorV1beta1) port() *int32 { - return e.endpointPort.Port -} - -func (e *endpointSlicePortAdaptorV1beta1) protocol() *string { - val := string(*e.endpointPort.Protocol) - return &val -} - -func (e *endpointSlicePortAdaptorV1beta1) appProtocol() *string { - return e.endpointPort.AppProtocol -} diff --git a/discovery/kubernetes/endpointslice_adaptor_test.go b/discovery/kubernetes/endpointslice_adaptor_test.go index 1ee3337193..de33c64b66 100644 --- a/discovery/kubernetes/endpointslice_adaptor_test.go +++ b/discovery/kubernetes/endpointslice_adaptor_test.go @@ -18,7 +18,6 @@ import ( "github.com/stretchr/testify/require" v1 "k8s.io/api/discovery/v1" - "k8s.io/api/discovery/v1beta1" ) func Test_EndpointSliceAdaptor_v1(t *testing.T) { @@ -48,31 +47,3 @@ func Test_EndpointSliceAdaptor_v1(t *testing.T) { require.Equal(t, endpointSlice.Ports[i].AppProtocol, portAdaptor.appProtocol()) } } - -func Test_EndpointSliceAdaptor_v1beta1(t *testing.T) { - endpointSlice := makeEndpointSliceV1beta1() - adaptor := newEndpointSliceAdaptorFromV1beta1(endpointSlice) - - require.Equal(t, endpointSlice.ObjectMeta.Name, adaptor.name()) - require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace()) - require.Equal(t, endpointSlice.AddressType, v1beta1.AddressType(adaptor.addressType())) - require.Equal(t, endpointSlice.Labels, adaptor.labels()) - require.Equal(t, "testendpoints", endpointSlice.Labels[v1beta1.LabelServiceName]) - - for i, endpointAdaptor := range adaptor.endpoints() { - require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses()) - require.Equal(t, endpointSlice.Endpoints[i].Hostname, endpointAdaptor.hostname()) - require.Equal(t, endpointSlice.Endpoints[i].Conditions.Ready, endpointAdaptor.conditions().ready()) - require.Equal(t, endpointSlice.Endpoints[i].Conditions.Serving, endpointAdaptor.conditions().serving()) - require.Equal(t, endpointSlice.Endpoints[i].Conditions.Terminating, endpointAdaptor.conditions().terminating()) - require.Equal(t, endpointSlice.Endpoints[i].TargetRef, endpointAdaptor.targetRef()) - require.Equal(t, endpointSlice.Endpoints[i].Topology, endpointAdaptor.topology()) - } - - for i, portAdaptor := range adaptor.ports() { - require.Equal(t, endpointSlice.Ports[i].Name, portAdaptor.name()) - require.Equal(t, endpointSlice.Ports[i].Port, portAdaptor.port()) - require.EqualValues(t, endpointSlice.Ports[i].Protocol, portAdaptor.protocol()) - require.Equal(t, endpointSlice.Ports[i].AppProtocol, portAdaptor.appProtocol()) - } -} diff --git a/discovery/kubernetes/endpointslice_test.go b/discovery/kubernetes/endpointslice_test.go index 6ef83081be..cc92c7ddaa 100644 --- a/discovery/kubernetes/endpointslice_test.go +++ b/discovery/kubernetes/endpointslice_test.go @@ -21,7 +21,6 @@ import ( "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/discovery/v1" - "k8s.io/api/discovery/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -114,62 +113,8 @@ func makeEndpointSliceV1() *v1.EndpointSlice { } } -func makeEndpointSliceV1beta1() *v1beta1.EndpointSlice { - return &v1beta1.EndpointSlice{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testendpoints", - Namespace: "default", - Labels: map[string]string{ - v1beta1.LabelServiceName: "testendpoints", - }, - Annotations: map[string]string{ - "test.annotation": "test", - }, - }, - AddressType: v1beta1.AddressTypeIPv4, - Ports: []v1beta1.EndpointPort{ - { - Name: strptr("testport"), - Port: int32ptr(9000), - Protocol: protocolptr(corev1.ProtocolTCP), - }, - }, - Endpoints: []v1beta1.Endpoint{ - { - Addresses: []string{"1.2.3.4"}, - Hostname: strptr("testendpoint1"), - }, { - Addresses: []string{"2.3.4.5"}, - Conditions: v1beta1.EndpointConditions{ - Ready: boolptr(true), - Serving: boolptr(true), - Terminating: boolptr(false), - }, - }, { - Addresses: []string{"3.4.5.6"}, - Conditions: v1beta1.EndpointConditions{ - Ready: boolptr(false), - Serving: boolptr(true), - Terminating: boolptr(true), - }, - }, { - Addresses: []string{"4.5.6.7"}, - Conditions: v1beta1.EndpointConditions{ - Ready: boolptr(true), - Serving: boolptr(true), - Terminating: boolptr(false), - }, - TargetRef: &corev1.ObjectReference{ - Kind: "Node", - Name: "barbaz", - }, - }, - }, - } -} - func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.25.0") + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}) k8sDiscoveryTest{ discovery: n, @@ -249,71 +194,6 @@ func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) { }.Run(t) } -func TestEndpointSliceDiscoveryBeforeRunV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "1.20.0") - - k8sDiscoveryTest{ - discovery: n, - beforeRun: func() { - obj := makeEndpointSliceV1beta1() - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{}) - }, - expectedMaxItems: 1, - expectedRes: map[string]*targetgroup.Group{ - "endpointslice/default/testendpoints": { - Targets: []model.LabelSet{ - { - "__address__": "1.2.3.4:9000", - "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "2.3.4.5:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "3.4.5.6:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "4.5.6.7:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "barbaz", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - }, - Labels: model.LabelSet{ - "__meta_kubernetes_endpointslice_address_type": "IPv4", - "__meta_kubernetes_namespace": "default", - "__meta_kubernetes_endpointslice_name": "testendpoints", - "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints", - "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", - "__meta_kubernetes_endpointslice_annotation_test_annotation": "test", - "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true", - }, - Source: "endpointslice/default/testendpoints", - }, - }, - }.Run(t) -} - func TestEndpointSliceDiscoveryAdd(t *testing.T) { obj := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -353,25 +233,25 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { PodIP: "1.2.3.4", }, } - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.20.0", obj) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, obj) k8sDiscoveryTest{ discovery: n, afterStart: func() { - obj := &v1beta1.EndpointSlice{ + obj := &v1.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", }, - AddressType: v1beta1.AddressTypeIPv4, - Ports: []v1beta1.EndpointPort{ + AddressType: v1.AddressTypeIPv4, + Ports: []v1.EndpointPort{ { Name: strptr("testport"), Port: int32ptr(9000), Protocol: protocolptr(corev1.ProtocolTCP), }, }, - Endpoints: []v1beta1.Endpoint{ + Endpoints: []v1.Endpoint{ { Addresses: []string{"4.3.2.1"}, TargetRef: &corev1.ObjectReference{ @@ -379,13 +259,13 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { Name: "testpod", Namespace: "default", }, - Conditions: v1beta1.EndpointConditions{ + Conditions: v1.EndpointConditions{ Ready: boolptr(false), }, }, }, } - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{}) + c.DiscoveryV1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{}) }, expectedMaxItems: 1, expectedRes: map[string]*targetgroup.Group{ @@ -411,6 +291,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, { "__address__": "1.2.3.4:9001", @@ -426,6 +307,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -440,118 +322,34 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { } func TestEndpointSliceDiscoveryDelete(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeEndpointSliceV1() - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Delete(context.Background(), obj.Name, metav1.DeleteOptions{}) + c.DiscoveryV1().EndpointSlices(obj.Namespace).Delete(context.Background(), obj.Name, metav1.DeleteOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpointslice/default/testendpoints": { Source: "endpointslice/default/testendpoints", - Targets: []model.LabelSet{ - { - "__address__": "1.2.3.4:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "", - "__meta_kubernetes_endpointslice_address_target_name": "", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1", - "__meta_kubernetes_endpointslice_endpoint_node_name": "foobar", - "__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true", - "__meta_kubernetes_endpointslice_endpoint_topology_topology": "value", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "2.3.4.5:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1b", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "3.4.5.6:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "4.5.6.7:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "barbaz", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - }, - Labels: map[model.LabelName]model.LabelValue{ - "__meta_kubernetes_endpointslice_address_type": "IPv4", - "__meta_kubernetes_endpointslice_name": "testendpoints", - "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints", - "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", - "__meta_kubernetes_endpointslice_annotation_test_annotation": "test", - "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true", - "__meta_kubernetes_namespace": "default", - }, }, }, }.Run(t) } func TestEndpointSliceDiscoveryUpdate(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, afterStart: func() { - obj := &v1beta1.EndpointSlice{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testendpoints", - Namespace: "default", - }, - AddressType: v1beta1.AddressTypeIPv4, - Ports: []v1beta1.EndpointPort{ - { - Name: strptr("testport"), - Port: int32ptr(9000), - Protocol: protocolptr(corev1.ProtocolTCP), - }, - }, - Endpoints: []v1beta1.Endpoint{ - { - Addresses: []string{"1.2.3.4"}, - Hostname: strptr("testendpoint1"), - }, { - Addresses: []string{"2.3.4.5"}, - Conditions: v1beta1.EndpointConditions{ - Ready: boolptr(true), - }, - }, - }, - } - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{}) + obj := makeEndpointSliceV1() + obj.ObjectMeta.Labels = nil + obj.ObjectMeta.Annotations = nil + obj.Endpoints = obj.Endpoints[0:2] + c.DiscoveryV1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ @@ -586,39 +384,11 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) { "__meta_kubernetes_endpointslice_port_name": "testport", "__meta_kubernetes_endpointslice_port_protocol": "TCP", }, - { - "__address__": "3.4.5.6:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "4.5.6.7:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "barbaz", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, }, Labels: model.LabelSet{ - "__meta_kubernetes_endpointslice_address_type": "IPv4", - "__meta_kubernetes_endpointslice_name": "testendpoints", - "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints", - "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", - "__meta_kubernetes_endpointslice_annotation_test_annotation": "test", - "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true", - "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_endpointslice_address_type": "IPv4", + "__meta_kubernetes_endpointslice_name": "testendpoints", + "__meta_kubernetes_namespace": "default", }, }, }, @@ -626,85 +396,18 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) { } func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, afterStart: func() { - obj := &v1beta1.EndpointSlice{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testendpoints", - Namespace: "default", - }, - AddressType: v1beta1.AddressTypeIPv4, - Ports: []v1beta1.EndpointPort{ - { - Name: strptr("testport"), - Port: int32ptr(9000), - Protocol: protocolptr(corev1.ProtocolTCP), - }, - }, - Endpoints: []v1beta1.Endpoint{}, - } - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{}) + obj := makeEndpointSliceV1() + obj.Endpoints = []v1.Endpoint{} + c.DiscoveryV1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpointslice/default/testendpoints": { - Targets: []model.LabelSet{ - { - "__address__": "1.2.3.4:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "", - "__meta_kubernetes_endpointslice_address_target_name": "", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1", - "__meta_kubernetes_endpointslice_endpoint_node_name": "foobar", - "__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true", - "__meta_kubernetes_endpointslice_endpoint_topology_topology": "value", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "2.3.4.5:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1b", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "3.4.5.6:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "4.5.6.7:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "barbaz", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - }, Labels: model.LabelSet{ "__meta_kubernetes_endpointslice_address_type": "IPv4", "__meta_kubernetes_endpointslice_name": "testendpoints", @@ -721,7 +424,7 @@ func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) { } func TestEndpointSliceDiscoveryWithService(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, @@ -813,7 +516,7 @@ func TestEndpointSliceDiscoveryWithService(t *testing.T) { } func TestEndpointSliceDiscoveryWithServiceUpdate(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, @@ -1285,6 +988,7 @@ func TestEndpointSliceDiscoveryNamespaces(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1498,3 +1202,165 @@ func TestEndpointSliceInfIndexersCount(t *testing.T) { }) } } + +func TestEndpointSliceDiscoverySidecarContainer(t *testing.T) { + objs := []runtime.Object{ + &v1.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testsidecar", + Namespace: "default", + }, + AddressType: v1.AddressTypeIPv4, + Ports: []v1.EndpointPort{ + { + Name: strptr("testport"), + Port: int32ptr(9000), + Protocol: protocolptr(corev1.ProtocolTCP), + }, + { + Name: strptr("initport"), + Port: int32ptr(9111), + Protocol: protocolptr(corev1.ProtocolTCP), + }, + }, + Endpoints: []v1.Endpoint{ + { + Addresses: []string{"4.3.2.1"}, + TargetRef: &corev1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "default", + }, + }, + }, + }, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + Namespace: "default", + UID: types.UID("deadbeef"), + }, + Spec: corev1.PodSpec{ + NodeName: "testnode", + InitContainers: []corev1.Container{ + { + Name: "ic1", + Image: "ic1:latest", + Ports: []corev1.ContainerPort{ + { + Name: "initport", + ContainerPort: 1111, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + { + Name: "ic2", + Image: "ic2:latest", + Ports: []corev1.ContainerPort{ + { + Name: "initport", + ContainerPort: 9111, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "c1", + Image: "c1:latest", + Ports: []corev1.ContainerPort{ + { + Name: "mainport", + ContainerPort: 9000, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + HostIP: "2.3.4.5", + PodIP: "4.3.2.1", + }, + }, + } + + n, _ := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{}, objs...) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 1, + expectedRes: map[string]*targetgroup.Group{ + "endpointslice/default/testsidecar": { + Targets: []model.LabelSet{ + { + "__address__": "4.3.2.1:9000", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "testpod", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + "__meta_kubernetes_pod_container_image": "c1:latest", + "__meta_kubernetes_pod_container_name": "c1", + "__meta_kubernetes_pod_container_port_name": "mainport", + "__meta_kubernetes_pod_container_port_number": "9000", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", + }, + { + "__address__": "4.3.2.1:9111", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "testpod", + "__meta_kubernetes_endpointslice_port": "9111", + "__meta_kubernetes_endpointslice_port_name": "initport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + "__meta_kubernetes_pod_container_image": "ic2:latest", + "__meta_kubernetes_pod_container_name": "ic2", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "9111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + { + "__address__": "4.3.2.1:1111", + "__meta_kubernetes_pod_container_image": "ic1:latest", + "__meta_kubernetes_pod_container_name": "ic1", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "1111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_endpointslice_address_type": "IPv4", + "__meta_kubernetes_endpointslice_name": "testsidecar", + "__meta_kubernetes_namespace": "default", + }, + Source: "endpointslice/default/testsidecar", + }, + }, + }.Run(t) +} diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go index 7b6366b257..1b7847c5c4 100644 --- a/discovery/kubernetes/ingress.go +++ b/discovery/kubernetes/ingress.go @@ -17,14 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" v1 "k8s.io/api/networking/v1" - "k8s.io/api/networking/v1beta1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -33,14 +31,14 @@ import ( // Ingress implements discovery of Kubernetes ingress. type Ingress struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewIngress returns a new ingress discovery. -func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { +func NewIngress(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd) ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate) ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete) @@ -67,7 +65,7 @@ func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C }, }) if err != nil { - level.Error(l).Log("msg", "Error adding ingresses event handler.", "err", err) + l.Error("Error adding ingresses event handler.", "err", err) } return s } @@ -87,7 +85,7 @@ func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), i.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(i.logger).Log("msg", "ingress informer unable to sync cache") + i.logger.Error("ingress informer unable to sync cache") } return } @@ -127,10 +125,8 @@ func (i *Ingress) process(ctx context.Context, ch chan<- []*targetgroup.Group) b switch ingress := o.(type) { case *v1.Ingress: ia = newIngressAdaptorFromV1(ingress) - case *v1beta1.Ingress: - ia = newIngressAdaptorFromV1beta1(ingress) default: - level.Error(i.logger).Log("msg", "converting to Ingress object failed", "err", + i.logger.Error("converting to Ingress object failed", "err", fmt.Errorf("received unexpected object: %v", o)) return true } diff --git a/discovery/kubernetes/ingress_adaptor.go b/discovery/kubernetes/ingress_adaptor.go index d1a7b7f2a2..84281196b4 100644 --- a/discovery/kubernetes/ingress_adaptor.go +++ b/discovery/kubernetes/ingress_adaptor.go @@ -15,7 +15,6 @@ package kubernetes import ( v1 "k8s.io/api/networking/v1" - "k8s.io/api/networking/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -89,56 +88,3 @@ func (i *ingressRuleAdaptorV1) paths() []string { } func (i *ingressRuleAdaptorV1) host() string { return i.rule.Host } - -// Adaptor for networking.k8s.io/v1beta1. -type ingressAdaptorV1Beta1 struct { - ingress *v1beta1.Ingress -} - -func newIngressAdaptorFromV1beta1(ingress *v1beta1.Ingress) ingressAdaptor { - return &ingressAdaptorV1Beta1{ingress: ingress} -} -func (i *ingressAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta { return i.ingress.ObjectMeta } -func (i *ingressAdaptorV1Beta1) name() string { return i.ingress.Name } -func (i *ingressAdaptorV1Beta1) namespace() string { return i.ingress.Namespace } -func (i *ingressAdaptorV1Beta1) labels() map[string]string { return i.ingress.Labels } -func (i *ingressAdaptorV1Beta1) annotations() map[string]string { return i.ingress.Annotations } -func (i *ingressAdaptorV1Beta1) ingressClassName() *string { return i.ingress.Spec.IngressClassName } - -func (i *ingressAdaptorV1Beta1) tlsHosts() []string { - var hosts []string - for _, tls := range i.ingress.Spec.TLS { - hosts = append(hosts, tls.Hosts...) - } - return hosts -} - -func (i *ingressAdaptorV1Beta1) rules() []ingressRuleAdaptor { - var rules []ingressRuleAdaptor - for _, rule := range i.ingress.Spec.Rules { - rules = append(rules, newIngressRuleAdaptorFromV1Beta1(rule)) - } - return rules -} - -type ingressRuleAdaptorV1Beta1 struct { - rule v1beta1.IngressRule -} - -func newIngressRuleAdaptorFromV1Beta1(rule v1beta1.IngressRule) ingressRuleAdaptor { - return &ingressRuleAdaptorV1Beta1{rule: rule} -} - -func (i *ingressRuleAdaptorV1Beta1) paths() []string { - rv := i.rule.IngressRuleValue - if rv.HTTP == nil { - return nil - } - paths := make([]string, len(rv.HTTP.Paths)) - for n, p := range rv.HTTP.Paths { - paths[n] = p.Path - } - return paths -} - -func (i *ingressRuleAdaptorV1Beta1) host() string { return i.rule.Host } diff --git a/discovery/kubernetes/ingress_test.go b/discovery/kubernetes/ingress_test.go index 8e6654c2cc..9bddfb1e14 100644 --- a/discovery/kubernetes/ingress_test.go +++ b/discovery/kubernetes/ingress_test.go @@ -20,7 +20,6 @@ import ( "github.com/prometheus/common/model" v1 "k8s.io/api/networking/v1" - "k8s.io/api/networking/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -89,60 +88,6 @@ func makeIngress(tls TLSMode) *v1.Ingress { return ret } -func makeIngressV1beta1(tls TLSMode) *v1beta1.Ingress { - ret := &v1beta1.Ingress{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testingress", - Namespace: "default", - Labels: map[string]string{"test/label": "testvalue"}, - Annotations: map[string]string{"test/annotation": "testannotationvalue"}, - }, - Spec: v1beta1.IngressSpec{ - IngressClassName: classString("testclass"), - TLS: nil, - Rules: []v1beta1.IngressRule{ - { - Host: "example.com", - IngressRuleValue: v1beta1.IngressRuleValue{ - HTTP: &v1beta1.HTTPIngressRuleValue{ - Paths: []v1beta1.HTTPIngressPath{ - {Path: "/"}, - {Path: "/foo"}, - }, - }, - }, - }, - { - // No backend config, ignored - Host: "nobackend.example.com", - IngressRuleValue: v1beta1.IngressRuleValue{ - HTTP: &v1beta1.HTTPIngressRuleValue{}, - }, - }, - { - Host: "test.example.com", - IngressRuleValue: v1beta1.IngressRuleValue{ - HTTP: &v1beta1.HTTPIngressRuleValue{ - Paths: []v1beta1.HTTPIngressPath{{}}, - }, - }, - }, - }, - }, - } - - switch tls { - case TLSYes: - ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com", "test.example.com"}}} - case TLSMixed: - ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com"}}} - case TLSWildcard: - ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"*.example.com"}}} - } - - return ret -} - func classString(v string) *string { return &v } @@ -212,20 +157,6 @@ func TestIngressDiscoveryAdd(t *testing.T) { }.Run(t) } -func TestIngressDiscoveryAddV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0") - - k8sDiscoveryTest{ - discovery: n, - afterStart: func() { - obj := makeIngressV1beta1(TLSNo) - c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{}) - }, - expectedMaxItems: 1, - expectedRes: expectedTargetGroups("default", TLSNo), - }.Run(t) -} - func TestIngressDiscoveryAddTLS(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) @@ -240,20 +171,6 @@ func TestIngressDiscoveryAddTLS(t *testing.T) { }.Run(t) } -func TestIngressDiscoveryAddTLSV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0") - - k8sDiscoveryTest{ - discovery: n, - afterStart: func() { - obj := makeIngressV1beta1(TLSYes) - c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{}) - }, - expectedMaxItems: 1, - expectedRes: expectedTargetGroups("default", TLSYes), - }.Run(t) -} - func TestIngressDiscoveryAddMixed(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) @@ -268,20 +185,6 @@ func TestIngressDiscoveryAddMixed(t *testing.T) { }.Run(t) } -func TestIngressDiscoveryAddMixedV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0") - - k8sDiscoveryTest{ - discovery: n, - afterStart: func() { - obj := makeIngressV1beta1(TLSMixed) - c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{}) - }, - expectedMaxItems: 1, - expectedRes: expectedTargetGroups("default", TLSMixed), - }.Run(t) -} - func TestIngressDiscoveryNamespaces(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}) @@ -303,27 +206,6 @@ func TestIngressDiscoveryNamespaces(t *testing.T) { }.Run(t) } -func TestIngressDiscoveryNamespacesV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, "v1.18.0") - - expected := expectedTargetGroups("ns1", TLSNo) - for k, v := range expectedTargetGroups("ns2", TLSNo) { - expected[k] = v - } - k8sDiscoveryTest{ - discovery: n, - afterStart: func() { - for _, ns := range []string{"ns1", "ns2"} { - obj := makeIngressV1beta1(TLSNo) - obj.Namespace = ns - c.NetworkingV1beta1().Ingresses(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{}) - } - }, - expectedMaxItems: 2, - expectedRes: expected, - }.Run(t) -} - func TestIngressDiscoveryOwnNamespace(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{IncludeOwnNamespace: true}) diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go index a8b6f85899..64e8886cfd 100644 --- a/discovery/kubernetes/kubernetes.go +++ b/discovery/kubernetes/kubernetes.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "os" "reflect" "strings" @@ -25,23 +26,18 @@ import ( "github.com/prometheus/prometheus/util/strutil" - disv1beta1 "k8s.io/api/discovery/v1beta1" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" apiv1 "k8s.io/api/core/v1" disv1 "k8s.io/api/discovery/v1" networkv1 "k8s.io/api/networking/v1" - "k8s.io/api/networking/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" - utilversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -264,7 +260,7 @@ type Discovery struct { sync.RWMutex client kubernetes.Interface role Role - logger log.Logger + logger *slog.Logger namespaceDiscovery *NamespaceDiscovery discoverers []discovery.Discoverer selectors roleSelector @@ -289,14 +285,14 @@ func (d *Discovery) getNamespaces() []string { } // New creates a new Kubernetes discovery for the given role. -func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) { +func New(l *slog.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) { m, ok := metrics.(*kubernetesMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } var ( kcfg *rest.Config @@ -328,7 +324,7 @@ func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Di ownNamespace = string(ownNamespaceContents) } - level.Info(l).Log("msg", "Using pod service account via in-cluster config") + l.Info("Using pod service account via in-cluster config") default: rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd") if err != nil { @@ -401,55 +397,22 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { switch d.role { case RoleEndpointSlice: - // Check "networking.k8s.io/v1" availability with retries. - // If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility - var v1Supported bool - if retryOnError(ctx, 10*time.Second, - func() (err error) { - v1Supported, err = checkDiscoveryV1Supported(d.client) - if err != nil { - level.Error(d.logger).Log("msg", "Failed to check networking.k8s.io/v1 availability", "err", err) - } - return err - }, - ) { - d.Unlock() - return - } - for _, namespace := range namespaces { var informer cache.SharedIndexInformer - if v1Supported { - e := d.client.DiscoveryV1().EndpointSlices(namespace) - elw := &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.FieldSelector = d.selectors.endpointslice.field - options.LabelSelector = d.selectors.endpointslice.label - return e.List(ctx, options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.FieldSelector = d.selectors.endpointslice.field - options.LabelSelector = d.selectors.endpointslice.label - return e.Watch(ctx, options) - }, - } - informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{}) - } else { - e := d.client.DiscoveryV1beta1().EndpointSlices(namespace) - elw := &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.FieldSelector = d.selectors.endpointslice.field - options.LabelSelector = d.selectors.endpointslice.label - return e.List(ctx, options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.FieldSelector = d.selectors.endpointslice.field - options.LabelSelector = d.selectors.endpointslice.label - return e.Watch(ctx, options) - }, - } - informer = d.newEndpointSlicesByNodeInformer(elw, &disv1beta1.EndpointSlice{}) + e := d.client.DiscoveryV1().EndpointSlices(namespace) + elw := &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + options.FieldSelector = d.selectors.endpointslice.field + options.LabelSelector = d.selectors.endpointslice.label + return e.List(ctx, options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + options.FieldSelector = d.selectors.endpointslice.field + options.LabelSelector = d.selectors.endpointslice.label + return e.Watch(ctx, options) + }, } + informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{}) s := d.client.CoreV1().Services(namespace) slw := &cache.ListWatch{ @@ -483,7 +446,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { go nodeInf.Run(ctx.Done()) } eps := NewEndpointSlice( - log.With(d.logger, "role", "endpointslice"), + d.logger.With("role", "endpointslice"), informer, d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled), @@ -543,7 +506,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } eps := NewEndpoints( - log.With(d.logger, "role", "endpoint"), + d.logger.With("role", "endpoint"), d.newEndpointsByNodeInformer(elw), d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled), @@ -577,7 +540,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { }, } pod := NewPod( - log.With(d.logger, "role", "pod"), + d.logger.With("role", "pod"), d.newPodsByNodeInformer(plw), nodeInformer, d.metrics.eventCount, @@ -601,7 +564,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { }, } svc := NewService( - log.With(d.logger, "role", "service"), + d.logger.With("role", "service"), d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.metrics.eventCount, ) @@ -609,57 +572,24 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { go svc.informer.Run(ctx.Done()) } case RoleIngress: - // Check "networking.k8s.io/v1" availability with retries. - // If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility - var v1Supported bool - if retryOnError(ctx, 10*time.Second, - func() (err error) { - v1Supported, err = checkNetworkingV1Supported(d.client) - if err != nil { - level.Error(d.logger).Log("msg", "Failed to check networking.k8s.io/v1 availability", "err", err) - } - return err - }, - ) { - d.Unlock() - return - } - for _, namespace := range namespaces { var informer cache.SharedInformer - if v1Supported { - i := d.client.NetworkingV1().Ingresses(namespace) - ilw := &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.FieldSelector = d.selectors.ingress.field - options.LabelSelector = d.selectors.ingress.label - return i.List(ctx, options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.FieldSelector = d.selectors.ingress.field - options.LabelSelector = d.selectors.ingress.label - return i.Watch(ctx, options) - }, - } - informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled) - } else { - i := d.client.NetworkingV1beta1().Ingresses(namespace) - ilw := &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.FieldSelector = d.selectors.ingress.field - options.LabelSelector = d.selectors.ingress.label - return i.List(ctx, options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.FieldSelector = d.selectors.ingress.field - options.LabelSelector = d.selectors.ingress.label - return i.Watch(ctx, options) - }, - } - informer = d.mustNewSharedInformer(ilw, &v1beta1.Ingress{}, resyncDisabled) + i := d.client.NetworkingV1().Ingresses(namespace) + ilw := &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + options.FieldSelector = d.selectors.ingress.field + options.LabelSelector = d.selectors.ingress.label + return i.List(ctx, options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + options.FieldSelector = d.selectors.ingress.field + options.LabelSelector = d.selectors.ingress.label + return i.Watch(ctx, options) + }, } + informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled) ingress := NewIngress( - log.With(d.logger, "role", "ingress"), + d.logger.With("role", "ingress"), informer, d.metrics.eventCount, ) @@ -668,11 +598,11 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } case RoleNode: nodeInformer := d.newNodeInformer(ctx) - node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.metrics.eventCount) + node := NewNode(d.logger.With("role", "node"), nodeInformer, d.metrics.eventCount) d.discoverers = append(d.discoverers, node) go node.informer.Run(ctx.Done()) default: - level.Error(d.logger).Log("msg", "unknown Kubernetes discovery kind", "role", d.role) + d.logger.Error("unknown Kubernetes discovery kind", "role", d.role) } var wg sync.WaitGroup @@ -720,20 +650,6 @@ func retryOnError(ctx context.Context, interval time.Duration, f func() error) ( } } -func checkNetworkingV1Supported(client kubernetes.Interface) (bool, error) { - k8sVer, err := client.Discovery().ServerVersion() - if err != nil { - return false, err - } - semVer, err := utilversion.ParseSemantic(k8sVer.String()) - if err != nil { - return false, err - } - // networking.k8s.io/v1 is available since Kubernetes v1.19 - // https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md - return semVer.Major() >= 1 && semVer.Minor() >= 19, nil -} - func (d *Discovery) newNodeInformer(ctx context.Context) cache.SharedInformer { nlw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { @@ -834,19 +750,6 @@ func (d *Discovery) newEndpointSlicesByNodeInformer(plw *cache.ListWatch, object } } } - case *disv1beta1.EndpointSlice: - for _, target := range e.Endpoints { - if target.TargetRef != nil { - switch target.TargetRef.Kind { - case "Pod": - if target.NodeName != nil { - nodes = append(nodes, *target.NodeName) - } - case "Node": - nodes = append(nodes, target.TargetRef.Name) - } - } - } default: return nil, fmt.Errorf("object is not an endpointslice") } @@ -882,21 +785,6 @@ func (d *Discovery) mustNewSharedIndexInformer(lw cache.ListerWatcher, exampleOb return informer } -func checkDiscoveryV1Supported(client kubernetes.Interface) (bool, error) { - k8sVer, err := client.Discovery().ServerVersion() - if err != nil { - return false, err - } - semVer, err := utilversion.ParseSemantic(k8sVer.String()) - if err != nil { - return false, err - } - // The discovery.k8s.io/v1beta1 API version of EndpointSlice will no longer be served in v1.25. - // discovery.k8s.io/v1 is available since Kubernetes v1.21 - // https://kubernetes.io/docs/reference/using-api/deprecation-guide/#v1-25 - return semVer.Major() >= 1 && semVer.Minor() >= 21, nil -} - func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, role Role) { labelSet[model.LabelName(metaLabelPrefix+string(role)+"_name")] = lv(objectMeta.Name) @@ -916,3 +804,13 @@ func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, func namespacedName(namespace, name string) string { return namespace + "/" + name } + +// nodeName knows how to handle the cache.DeletedFinalStateUnknown tombstone. +// It assumes the MetaNamespaceKeyFunc keyFunc is used, which uses the node name as the tombstone key. +func nodeName(o interface{}) (string, error) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(o) + if err != nil { + return "", err + } + return key, nil +} diff --git a/discovery/kubernetes/kubernetes_test.go b/discovery/kubernetes/kubernetes_test.go index 50f25a20ab..a14f2b3d1b 100644 --- a/discovery/kubernetes/kubernetes_test.go +++ b/discovery/kubernetes/kubernetes_test.go @@ -20,10 +20,12 @@ import ( "testing" "time" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" + apiv1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/version" "k8s.io/apimachinery/pkg/watch" @@ -46,7 +48,7 @@ func TestMain(m *testing.M) { // makeDiscovery creates a kubernetes.Discovery instance for testing. func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface) { - return makeDiscoveryWithVersion(role, nsDiscovery, "v1.22.0", objects...) + return makeDiscoveryWithVersion(role, nsDiscovery, "v1.25.0", objects...) } // makeDiscoveryWithVersion creates a kubernetes.Discovery instance with the specified kubernetes version for testing. @@ -71,7 +73,7 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer d := &Discovery{ client: clientset, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), role: role, namespaceDiscovery: &nsDiscovery, ownNamespace: "own-ns", @@ -285,40 +287,6 @@ func TestRetryOnError(t *testing.T) { } } -func TestCheckNetworkingV1Supported(t *testing.T) { - tests := []struct { - version string - wantSupported bool - wantErr bool - }{ - {version: "v1.18.0", wantSupported: false, wantErr: false}, - {version: "v1.18.1", wantSupported: false, wantErr: false}, - // networking v1 is supported since Kubernetes v1.19 - {version: "v1.19.0", wantSupported: true, wantErr: false}, - {version: "v1.20.0-beta.2", wantSupported: true, wantErr: false}, - // error patterns - {version: "", wantSupported: false, wantErr: true}, - {version: "<>", wantSupported: false, wantErr: true}, - } - - for _, tc := range tests { - tc := tc - t.Run(tc.version, func(t *testing.T) { - clientset := fake.NewSimpleClientset() - fakeDiscovery, _ := clientset.Discovery().(*fakediscovery.FakeDiscovery) - fakeDiscovery.FakedServerVersion = &version.Info{GitVersion: tc.version} - supported, err := checkNetworkingV1Supported(clientset) - - if tc.wantErr { - require.Error(t, err) - } else { - require.NoError(t, err) - } - require.Equal(t, tc.wantSupported, supported) - }) - } -} - func TestFailuresCountMetric(t *testing.T) { tests := []struct { role Role @@ -354,3 +322,18 @@ func TestFailuresCountMetric(t *testing.T) { }) } } + +func TestNodeName(t *testing.T) { + node := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + }, + } + name, err := nodeName(node) + require.NoError(t, err) + require.Equal(t, "foo", name) + + name, err = nodeName(cache.DeletedFinalStateUnknown{Key: "bar"}) + require.NoError(t, err) + require.Equal(t, "bar", name) +} diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go index 74d87e22c4..0e0c5745f2 100644 --- a/discovery/kubernetes/node.go +++ b/discovery/kubernetes/node.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -38,16 +38,16 @@ const ( // Node discovers Kubernetes nodes. type Node struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewNode returns a new node discovery. -func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node { +func NewNode(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd) @@ -76,13 +76,13 @@ func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.Coun }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } return n } func (n *Node) enqueue(obj interface{}) { - key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + key, err := nodeName(obj) if err != nil { return } @@ -96,7 +96,7 @@ func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), n.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(n.logger).Log("msg", "node informer unable to sync cache") + n.logger.Error("node informer unable to sync cache") } return } @@ -133,7 +133,7 @@ func (n *Node) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool } node, err := convertToNode(o) if err != nil { - level.Error(n.logger).Log("msg", "converting to Node object failed", "err", err) + n.logger.Error("converting to Node object failed", "err", err) return true } send(ctx, ch, n.buildNode(node)) @@ -181,7 +181,7 @@ func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group { addr, addrMap, err := nodeAddress(node) if err != nil { - level.Warn(n.logger).Log("msg", "No node address found", "err", err) + n.logger.Warn("No node address found", "err", err) return nil } addr = net.JoinHostPort(addr, strconv.FormatInt(int64(node.Status.DaemonEndpoints.KubeletEndpoint.Port), 10)) diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index 02990e415f..8704a66239 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -17,14 +17,14 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" @@ -44,14 +44,14 @@ type Pod struct { nodeInf cache.SharedInformer withNodeMetadata bool store cache.Store - logger log.Logger + logger *slog.Logger queue *workqueue.Type } // NewPod creates a new pod discovery. -func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod { +func NewPod(l *slog.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd) @@ -81,7 +81,7 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } if p.withNodeMetadata { @@ -95,12 +95,15 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo p.enqueuePodsForNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - p.enqueuePodsForNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + p.enqueuePodsForNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } } @@ -127,7 +130,7 @@ func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(p.logger).Log("msg", "pod informer unable to sync cache") + p.logger.Error("pod informer unable to sync cache") } return } @@ -164,7 +167,7 @@ func (p *Pod) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool } pod, err := convertToPod(o) if err != nil { - level.Error(p.logger).Log("msg", "converting to Pod object failed", "err", err) + p.logger.Error("converting to Pod object failed", "err", err) return true } send(ctx, ch, p.buildPod(pod)) @@ -246,7 +249,7 @@ func (p *Pod) findPodContainerStatus(statuses *[]apiv1.ContainerStatus, containe func (p *Pod) findPodContainerID(statuses *[]apiv1.ContainerStatus, containerName string) string { cStatus, err := p.findPodContainerStatus(statuses, containerName) if err != nil { - level.Debug(p.logger).Log("msg", "cannot find container ID", "err", err) + p.logger.Debug("cannot find container ID", "err", err) return "" } return cStatus.ContainerID @@ -315,7 +318,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group { func (p *Pod) enqueuePodsForNode(nodeName string) { pods, err := p.podInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(p.logger).Log("msg", "Error getting pods for node", "node", nodeName, "err", err) + p.logger.Error("Error getting pods for node", "node", nodeName, "err", err) return } diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go index 51204a5a1a..e666497c86 100644 --- a/discovery/kubernetes/service.go +++ b/discovery/kubernetes/service.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -33,16 +33,16 @@ import ( // Service implements discovery of Kubernetes services. type Service struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewService returns a new service discovery. -func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service { +func NewService(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd) @@ -71,7 +71,7 @@ func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } return s } @@ -91,7 +91,7 @@ func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), s.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(s.logger).Log("msg", "service informer unable to sync cache") + s.logger.Error("service informer unable to sync cache") } return } @@ -128,7 +128,7 @@ func (s *Service) process(ctx context.Context, ch chan<- []*targetgroup.Group) b } eps, err := convertToService(o) if err != nil { - level.Error(s.logger).Log("msg", "converting to Service object failed", "err", err) + s.logger.Error("converting to Service object failed", "err", err) return true } send(ctx, ch, s.buildService(eps)) diff --git a/discovery/legacymanager/manager.go b/discovery/legacymanager/manager.go deleted file mode 100644 index 6fc61485d1..0000000000 --- a/discovery/legacymanager/manager.go +++ /dev/null @@ -1,332 +0,0 @@ -// Copyright 2016 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package legacymanager - -import ( - "context" - "fmt" - "reflect" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/targetgroup" -) - -type poolKey struct { - setName string - provider string -} - -// provider holds a Discoverer instance, its configuration and its subscribers. -type provider struct { - name string - d discovery.Discoverer - subs []string - config interface{} -} - -// NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]discovery.DiscovererMetrics, options ...func(*Manager)) *Manager { - if logger == nil { - logger = log.NewNopLogger() - } - mgr := &Manager{ - logger: logger, - syncCh: make(chan map[string][]*targetgroup.Group), - targets: make(map[poolKey]map[string]*targetgroup.Group), - discoverCancel: []context.CancelFunc{}, - ctx: ctx, - updatert: 5 * time.Second, - triggerSend: make(chan struct{}, 1), - registerer: registerer, - sdMetrics: sdMetrics, - } - for _, option := range options { - option(mgr) - } - - // Register the metrics. - // We have to do this after setting all options, so that the name of the Manager is set. - if metrics, err := discovery.NewManagerMetrics(registerer, mgr.name); err == nil { - mgr.metrics = metrics - } else { - level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err) - return nil - } - - return mgr -} - -// Name sets the name of the manager. -func Name(n string) func(*Manager) { - return func(m *Manager) { - m.mtx.Lock() - defer m.mtx.Unlock() - m.name = n - } -} - -// Manager maintains a set of discovery providers and sends each update to a map channel. -// Targets are grouped by the target set name. -type Manager struct { - logger log.Logger - name string - mtx sync.RWMutex - ctx context.Context - discoverCancel []context.CancelFunc - - // Some Discoverers(eg. k8s) send only the updates for a given target group - // so we use map[tg.Source]*targetgroup.Group to know which group to update. - targets map[poolKey]map[string]*targetgroup.Group - // providers keeps track of SD providers. - providers []*provider - // The sync channel sends the updates as a map where the key is the job value from the scrape config. - syncCh chan map[string][]*targetgroup.Group - - // How long to wait before sending updates to the channel. The variable - // should only be modified in unit tests. - updatert time.Duration - - // The triggerSend channel signals to the manager that new updates have been received from providers. - triggerSend chan struct{} - - // A registerer for all service discovery metrics. - registerer prometheus.Registerer - - metrics *discovery.Metrics - sdMetrics map[string]discovery.DiscovererMetrics -} - -// Run starts the background processing. -func (m *Manager) Run() error { - go m.sender() - <-m.ctx.Done() - m.cancelDiscoverers() - return m.ctx.Err() -} - -// SyncCh returns a read only channel used by all the clients to receive target updates. -func (m *Manager) SyncCh() <-chan map[string][]*targetgroup.Group { - return m.syncCh -} - -// ApplyConfig removes all running discovery providers and starts new ones using the provided config. -func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error { - m.mtx.Lock() - defer m.mtx.Unlock() - - for pk := range m.targets { - if _, ok := cfg[pk.setName]; !ok { - m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, pk.setName) - } - } - m.cancelDiscoverers() - m.targets = make(map[poolKey]map[string]*targetgroup.Group) - m.providers = nil - m.discoverCancel = nil - - failedCount := 0 - for name, scfg := range cfg { - failedCount += m.registerProviders(scfg, name) - m.metrics.DiscoveredTargets.WithLabelValues(name).Set(0) - } - m.metrics.FailedConfigs.Set(float64(failedCount)) - - for _, prov := range m.providers { - m.startProvider(m.ctx, prov) - } - - return nil -} - -// StartCustomProvider is used for sdtool. Only use this if you know what you're doing. -func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker discovery.Discoverer) { - p := &provider{ - name: name, - d: worker, - subs: []string{name}, - } - m.providers = append(m.providers, p) - m.startProvider(ctx, p) -} - -func (m *Manager) startProvider(ctx context.Context, p *provider) { - level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) - ctx, cancel := context.WithCancel(ctx) - updates := make(chan []*targetgroup.Group) - - m.discoverCancel = append(m.discoverCancel, cancel) - - go p.d.Run(ctx, updates) - go m.updater(ctx, p, updates) -} - -func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targetgroup.Group) { - for { - select { - case <-ctx.Done(): - return - case tgs, ok := <-updates: - m.metrics.ReceivedUpdates.Inc() - if !ok { - level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) - return - } - - for _, s := range p.subs { - m.updateGroup(poolKey{setName: s, provider: p.name}, tgs) - } - - select { - case m.triggerSend <- struct{}{}: - default: - } - } - } -} - -func (m *Manager) sender() { - ticker := time.NewTicker(m.updatert) - defer ticker.Stop() - - for { - select { - case <-m.ctx.Done(): - return - case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker. - select { - case <-m.triggerSend: - m.metrics.SentUpdates.Inc() - select { - case m.syncCh <- m.allGroups(): - default: - m.metrics.DelayedUpdates.Inc() - level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") - select { - case m.triggerSend <- struct{}{}: - default: - } - } - default: - } - } - } -} - -func (m *Manager) cancelDiscoverers() { - for _, c := range m.discoverCancel { - c() - } -} - -func (m *Manager) updateGroup(poolKey poolKey, tgs []*targetgroup.Group) { - m.mtx.Lock() - defer m.mtx.Unlock() - - if _, ok := m.targets[poolKey]; !ok { - m.targets[poolKey] = make(map[string]*targetgroup.Group) - } - for _, tg := range tgs { - if tg != nil { // Some Discoverers send nil target group so need to check for it to avoid panics. - m.targets[poolKey][tg.Source] = tg - } - } -} - -func (m *Manager) allGroups() map[string][]*targetgroup.Group { - m.mtx.RLock() - defer m.mtx.RUnlock() - - tSets := map[string][]*targetgroup.Group{} - n := map[string]int{} - for pkey, tsets := range m.targets { - for _, tg := range tsets { - // Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager' - // to signal that it needs to stop all scrape loops for this target set. - tSets[pkey.setName] = append(tSets[pkey.setName], tg) - n[pkey.setName] += len(tg.Targets) - } - } - for setName, v := range n { - m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v)) - } - return tSets -} - -// registerProviders returns a number of failed SD config. -func (m *Manager) registerProviders(cfgs discovery.Configs, setName string) int { - var ( - failed int - added bool - ) - add := func(cfg discovery.Config) { - for _, p := range m.providers { - if reflect.DeepEqual(cfg, p.config) { - p.subs = append(p.subs, setName) - added = true - return - } - } - typ := cfg.Name() - d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{ - Logger: log.With(m.logger, "discovery", typ, "config", setName), - Metrics: m.sdMetrics[typ], - }) - if err != nil { - level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName) - failed++ - return - } - m.providers = append(m.providers, &provider{ - name: fmt.Sprintf("%s/%d", typ, len(m.providers)), - d: d, - config: cfg, - subs: []string{setName}, - }) - added = true - } - for _, cfg := range cfgs { - add(cfg) - } - if !added { - // Add an empty target group to force the refresh of the corresponding - // scrape pool and to notify the receiver that this target set has no - // current targets. - // It can happen because the combined set of SD configurations is empty - // or because we fail to instantiate all the SD configurations. - add(discovery.StaticConfig{{}}) - } - return failed -} - -// StaticProvider holds a list of target groups that never change. -type StaticProvider struct { - TargetGroups []*targetgroup.Group -} - -// Run implements the Worker interface. -func (sd *StaticProvider) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { - // We still have to consider that the consumer exits right away in which case - // the context will be canceled. - select { - case ch <- sd.TargetGroups: - case <-ctx.Done(): - } - close(ch) -} diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go deleted file mode 100644 index f1be963113..0000000000 --- a/discovery/legacymanager/manager_test.go +++ /dev/null @@ -1,1185 +0,0 @@ -// Copyright 2016 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package legacymanager - -import ( - "context" - "fmt" - "sort" - "strconv" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" - client_testutil "github.com/prometheus/client_golang/prometheus/testutil" - "github.com/prometheus/common/model" - "github.com/stretchr/testify/require" - - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/prometheus/prometheus/util/testutil" -) - -func TestMain(m *testing.M) { - testutil.TolerantVerifyLeak(m) -} - -func newTestMetrics(t *testing.T, reg prometheus.Registerer) (*discovery.RefreshMetricsManager, map[string]discovery.DiscovererMetrics) { - refreshMetrics := discovery.NewRefreshMetrics(reg) - sdMetrics, err := discovery.RegisterSDMetrics(reg, refreshMetrics) - require.NoError(t, err) - return &refreshMetrics, sdMetrics -} - -// TestTargetUpdatesOrder checks that the target updates are received in the expected order. -func TestTargetUpdatesOrder(t *testing.T) { - // The order by which the updates are send is determined by the interval passed to the mock discovery adapter - // Final targets array is ordered alphabetically by the name of the discoverer. - // For example discoverer "A" with targets "t2,t3" and discoverer "B" with targets "t1,t2" will result in "t2,t3,t1,t2" after the merge. - testCases := []struct { - title string - updates map[string][]update - expectedTargets [][]*targetgroup.Group - }{ - { - title: "Single TP no updates", - updates: map[string][]update{ - "tp1": {}, - }, - expectedTargets: nil, - }, - { - title: "Multiple TPs no updates", - updates: map[string][]update{ - "tp1": {}, - "tp2": {}, - "tp3": {}, - }, - expectedTargets: nil, - }, - { - title: "Single TP empty initials", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{}, - interval: 5 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - {}, - }, - }, - { - title: "Multiple TPs empty initials", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{}, - interval: 5 * time.Millisecond, - }, - }, - "tp2": { - { - targetGroups: []targetgroup.Group{}, - interval: 200 * time.Millisecond, - }, - }, - "tp3": { - { - targetGroups: []targetgroup.Group{}, - interval: 100 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - {}, - {}, - {}, - }, - }, - { - title: "Single TP initials only", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - { - title: "Multiple TPs initials only", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - "tp2": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - }, - }, - }, - { - title: "Single TP initials followed by empty updates", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 0, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{}, - }, - }, - interval: 10 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{}, - }, - }, - }, - }, - { - title: "Single TP initials and new groups", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 0, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp1_group3", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp1_group3", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - }, - { - title: "Multiple TPs initials and new groups", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group3", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group4", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - interval: 500 * time.Millisecond, - }, - }, - "tp2": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - }, - interval: 100 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group3", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group4", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - { - Source: "tp2_group3", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group4", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - { - Source: "tp1_group3", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group4", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - { - Source: "tp2_group3", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group4", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - }, - }, - { - title: "One TP initials arrive after other TP updates.", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - interval: 150 * time.Millisecond, - }, - }, - "tp2": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - }, - interval: 200 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - interval: 100 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - }, - }, - - { - title: "Single TP empty update in between", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 30 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{}, - }, - }, - interval: 10 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - interval: 300 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - }, - }, - } - - for i, tc := range testCases { - tc := tc - t.Run(tc.title, func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - - var totalUpdatesCount int - for _, up := range tc.updates { - if len(up) > 0 { - totalUpdatesCount += len(up) - } - } - provUpdates := make(chan []*targetgroup.Group, totalUpdatesCount) - - for _, up := range tc.updates { - go newMockDiscoveryProvider(up...).Run(ctx, provUpdates) - } - - for x := 0; x < totalUpdatesCount; x++ { - select { - case <-ctx.Done(): - t.Fatalf("%d: no update arrived within the timeout limit", x) - case tgs := <-provUpdates: - discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(i), provider: tc.title}, tgs) - for _, got := range discoveryManager.allGroups() { - assertEqualGroups(t, got, tc.expectedTargets[x]) - } - } - } - }) - } -} - -func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group) { - t.Helper() - - // Need to sort by the groups's source as the received order is not guaranteed. - sort.Sort(byGroupSource(got)) - sort.Sort(byGroupSource(expected)) - - require.Equal(t, expected, got) -} - -func staticConfig(addrs ...string) discovery.StaticConfig { - var cfg discovery.StaticConfig - for i, addr := range addrs { - cfg = append(cfg, &targetgroup.Group{ - Source: strconv.Itoa(i), - Targets: []model.LabelSet{ - {model.AddressLabel: model.LabelValue(addr)}, - }, - }) - } - return cfg -} - -func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) { - t.Helper() - if _, ok := tSets[poolKey]; !ok { - t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets) - } - - match := false - var mergedTargets string - for _, targetGroup := range tSets[poolKey] { - for _, l := range targetGroup.Targets { - mergedTargets = mergedTargets + " " + l.String() - if l.String() == label { - match = true - } - } - } - if match != present { - msg := "" - if !present { - msg = "not" - } - t.Fatalf("%q should %s be present in Targets labels: %q", label, msg, mergedTargets) - } -} - -func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - staticConfig("foo:9090", "bar:9090"), - }, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", true) - - c["prometheus"] = discovery.Configs{ - staticConfig("foo:9090"), - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", false) -} - -func TestDiscovererConfigs(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - staticConfig("foo:9090", "bar:9090"), - staticConfig("baz:9090"), - }, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/1"}, "{__address__=\"baz:9090\"}", true) -} - -// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after -// removing all targets from the static_configs sends an update with empty targetGroups. -// This is required to signal the receiver that this target set has no current targets. -func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - staticConfig("foo:9090"), - }, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - - c["prometheus"] = discovery.Configs{ - discovery.StaticConfig{{}}, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - - pkey := poolKey{setName: "prometheus", provider: "static/0"} - targetGroups, ok := discoveryManager.targets[pkey] - if !ok { - t.Fatalf("'%v' should be present in target groups", pkey) - } - group, ok := targetGroups[""] - if !ok { - t.Fatalf("missing '' key in target groups %v", targetGroups) - } - - if len(group.Targets) != 0 { - t.Fatalf("Invalid number of targets: expected 0, got %d", len(group.Targets)) - } -} - -func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, nil, reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - staticConfig("foo:9090"), - }, - "prometheus2": { - staticConfig("foo:9090"), - }, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus2", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - if len(discoveryManager.providers) != 1 { - t.Fatalf("Invalid number of providers: expected 1, got %d", len(discoveryManager.providers)) - } -} - -func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { - originalConfig := discovery.Configs{ - staticConfig("foo:9090", "bar:9090", "baz:9090"), - } - processedConfig := discovery.Configs{ - staticConfig("foo:9090", "bar:9090", "baz:9090"), - } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - cfgs := map[string]discovery.Configs{ - "prometheus": processedConfig, - } - discoveryManager.ApplyConfig(cfgs) - <-discoveryManager.SyncCh() - - for _, cfg := range cfgs { - require.Equal(t, originalConfig, cfg) - } -} - -type errorConfig struct{ err error } - -func (e errorConfig) Name() string { return "error" } -func (e errorConfig) NewDiscoverer(discovery.DiscovererOptions) (discovery.Discoverer, error) { - return nil, e.err -} - -// NewDiscovererMetrics implements discovery.Config. -func (errorConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics { - return &discovery.NoopDiscovererMetrics{} -} - -func TestGaugeFailedConfigs(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - errorConfig{fmt.Errorf("tests error 0")}, - errorConfig{fmt.Errorf("tests error 1")}, - errorConfig{fmt.Errorf("tests error 2")}, - }, - } - discoveryManager.ApplyConfig(c) - <-discoveryManager.SyncCh() - - failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs) - if failedCount != 3 { - t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount) - } - - c["prometheus"] = discovery.Configs{ - staticConfig("foo:9090"), - } - discoveryManager.ApplyConfig(c) - <-discoveryManager.SyncCh() - - failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs) - if failedCount != 0 { - t.Fatalf("Expected to get no failed config, got: %v", failedCount) - } -} - -func TestCoordinationWithReceiver(t *testing.T) { - updateDelay := 100 * time.Millisecond - - type expect struct { - delay time.Duration - tgs map[string][]*targetgroup.Group - } - - testCases := []struct { - title string - providers map[string]discovery.Discoverer - expected []expect - }{ - { - title: "Receiver should get all updates even when one provider closes its channel", - providers: map[string]discovery.Discoverer{ - "once1": &onceProvider{ - tgs: []*targetgroup.Group{ - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - "mock1": newMockDiscoveryProvider( - update{ - interval: 2 * updateDelay, - targetGroups: []targetgroup.Group{ - { - Source: "tg2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - ), - }, - expected: []expect{ - { - tgs: map[string][]*targetgroup.Group{ - "once1": { - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - }, - { - tgs: map[string][]*targetgroup.Group{ - "once1": { - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - "mock1": { - { - Source: "tg2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - }, - }, - { - title: "Receiver should get all updates even when the channel is blocked", - providers: map[string]discovery.Discoverer{ - "mock1": newMockDiscoveryProvider( - update{ - targetGroups: []targetgroup.Group{ - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - update{ - interval: 4 * updateDelay, - targetGroups: []targetgroup.Group{ - { - Source: "tg2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - ), - }, - expected: []expect{ - { - delay: 2 * updateDelay, - tgs: map[string][]*targetgroup.Group{ - "mock1": { - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - }, - { - delay: 4 * updateDelay, - tgs: map[string][]*targetgroup.Group{ - "mock1": { - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tg2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - }, - }, - } - - for _, tc := range testCases { - t.Run(tc.title, func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - mgr := NewManager(ctx, nil, reg, sdMetrics) - require.NotNil(t, mgr) - mgr.updatert = updateDelay - go mgr.Run() - - for name, p := range tc.providers { - mgr.StartCustomProvider(ctx, name, p) - } - - for i, expected := range tc.expected { - time.Sleep(expected.delay) - select { - case <-ctx.Done(): - t.Fatalf("step %d: no update received in the expected timeframe", i) - case tgs, ok := <-mgr.SyncCh(): - if !ok { - t.Fatalf("step %d: discovery manager channel is closed", i) - } - if len(tgs) != len(expected.tgs) { - t.Fatalf("step %d: target groups mismatch, got: %d, expected: %d\ngot: %#v\nexpected: %#v", - i, len(tgs), len(expected.tgs), tgs, expected.tgs) - } - for k := range expected.tgs { - if _, ok := tgs[k]; !ok { - t.Fatalf("step %d: target group not found: %s\ngot: %#v", i, k, tgs) - } - assertEqualGroups(t, tgs[k], expected.tgs[k]) - } - } - } - }) - } -} - -type update struct { - targetGroups []targetgroup.Group - interval time.Duration -} - -type mockdiscoveryProvider struct { - updates []update -} - -func newMockDiscoveryProvider(updates ...update) mockdiscoveryProvider { - tp := mockdiscoveryProvider{ - updates: updates, - } - return tp -} - -func (tp mockdiscoveryProvider) Run(ctx context.Context, upCh chan<- []*targetgroup.Group) { - for _, u := range tp.updates { - if u.interval > 0 { - select { - case <-ctx.Done(): - return - case <-time.After(u.interval): - } - } - tgs := make([]*targetgroup.Group, len(u.targetGroups)) - for i := range u.targetGroups { - tgs[i] = &u.targetGroups[i] - } - upCh <- tgs - } - <-ctx.Done() -} - -// byGroupSource implements sort.Interface so we can sort by the Source field. -type byGroupSource []*targetgroup.Group - -func (a byGroupSource) Len() int { return len(a) } -func (a byGroupSource) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a byGroupSource) Less(i, j int) bool { return a[i].Source < a[j].Source } - -// onceProvider sends updates once (if any) and closes the update channel. -type onceProvider struct { - tgs []*targetgroup.Group -} - -func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) { - if len(o.tgs) > 0 { - ch <- o.tgs - } - close(ch) -} diff --git a/discovery/legacymanager/registry.go b/discovery/legacymanager/registry.go deleted file mode 100644 index 955705394d..0000000000 --- a/discovery/legacymanager/registry.go +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2020 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package legacymanager - -import ( - "errors" - "fmt" - "reflect" - "sort" - "strconv" - "strings" - "sync" - - "gopkg.in/yaml.v2" - - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/targetgroup" -) - -const ( - configFieldPrefix = "AUTO_DISCOVERY_" - staticConfigsKey = "static_configs" - staticConfigsFieldName = configFieldPrefix + staticConfigsKey -) - -var ( - configNames = make(map[string]discovery.Config) - configFieldNames = make(map[reflect.Type]string) - configFields []reflect.StructField - - configTypesMu sync.Mutex - configTypes = make(map[reflect.Type]reflect.Type) - - emptyStructType = reflect.TypeOf(struct{}{}) - configsType = reflect.TypeOf(discovery.Configs{}) -) - -// RegisterConfig registers the given Config type for YAML marshaling and unmarshaling. -func RegisterConfig(config discovery.Config) { - registerConfig(config.Name()+"_sd_configs", reflect.TypeOf(config), config) -} - -func init() { - // N.B.: static_configs is the only Config type implemented by default. - // All other types are registered at init by their implementing packages. - elemTyp := reflect.TypeOf(&targetgroup.Group{}) - registerConfig(staticConfigsKey, elemTyp, discovery.StaticConfig{}) -} - -func registerConfig(yamlKey string, elemType reflect.Type, config discovery.Config) { - name := config.Name() - if _, ok := configNames[name]; ok { - panic(fmt.Sprintf("discovery: Config named %q is already registered", name)) - } - configNames[name] = config - - fieldName := configFieldPrefix + yamlKey // Field must be exported. - configFieldNames[elemType] = fieldName - - // Insert fields in sorted order. - i := sort.Search(len(configFields), func(k int) bool { - return fieldName < configFields[k].Name - }) - configFields = append(configFields, reflect.StructField{}) // Add empty field at end. - copy(configFields[i+1:], configFields[i:]) // Shift fields to the right. - configFields[i] = reflect.StructField{ // Write new field in place. - Name: fieldName, - Type: reflect.SliceOf(elemType), - Tag: reflect.StructTag(`yaml:"` + yamlKey + `,omitempty"`), - } -} - -func getConfigType(out reflect.Type) reflect.Type { - configTypesMu.Lock() - defer configTypesMu.Unlock() - if typ, ok := configTypes[out]; ok { - return typ - } - // Initial exported fields map one-to-one. - var fields []reflect.StructField - for i, n := 0, out.NumField(); i < n; i++ { - switch field := out.Field(i); { - case field.PkgPath == "" && field.Type != configsType: - fields = append(fields, field) - default: - fields = append(fields, reflect.StructField{ - Name: "_" + field.Name, // Field must be unexported. - PkgPath: out.PkgPath(), - Type: emptyStructType, - }) - } - } - // Append extra config fields on the end. - fields = append(fields, configFields...) - typ := reflect.StructOf(fields) - configTypes[out] = typ - return typ -} - -// UnmarshalYAMLWithInlineConfigs helps implement yaml.Unmarshal for structs -// that have a Configs field that should be inlined. -func UnmarshalYAMLWithInlineConfigs(out interface{}, unmarshal func(interface{}) error) error { - outVal := reflect.ValueOf(out) - if outVal.Kind() != reflect.Ptr { - return fmt.Errorf("discovery: can only unmarshal into a struct pointer: %T", out) - } - outVal = outVal.Elem() - if outVal.Kind() != reflect.Struct { - return fmt.Errorf("discovery: can only unmarshal into a struct pointer: %T", out) - } - outTyp := outVal.Type() - - cfgTyp := getConfigType(outTyp) - cfgPtr := reflect.New(cfgTyp) - cfgVal := cfgPtr.Elem() - - // Copy shared fields (defaults) to dynamic value. - var configs *discovery.Configs - for i, n := 0, outVal.NumField(); i < n; i++ { - if outTyp.Field(i).Type == configsType { - configs = outVal.Field(i).Addr().Interface().(*discovery.Configs) - continue - } - if cfgTyp.Field(i).PkgPath != "" { - continue // Field is unexported: ignore. - } - cfgVal.Field(i).Set(outVal.Field(i)) - } - if configs == nil { - return fmt.Errorf("discovery: Configs field not found in type: %T", out) - } - - // Unmarshal into dynamic value. - if err := unmarshal(cfgPtr.Interface()); err != nil { - return replaceYAMLTypeError(err, cfgTyp, outTyp) - } - - // Copy shared fields from dynamic value. - for i, n := 0, outVal.NumField(); i < n; i++ { - if cfgTyp.Field(i).PkgPath != "" { - continue // Field is unexported: ignore. - } - outVal.Field(i).Set(cfgVal.Field(i)) - } - - var err error - *configs, err = readConfigs(cfgVal, outVal.NumField()) - return err -} - -func readConfigs(structVal reflect.Value, startField int) (discovery.Configs, error) { - var ( - configs discovery.Configs - targets []*targetgroup.Group - ) - for i, n := startField, structVal.NumField(); i < n; i++ { - field := structVal.Field(i) - if field.Kind() != reflect.Slice { - panic("discovery: internal error: field is not a slice") - } - for k := 0; k < field.Len(); k++ { - val := field.Index(k) - if val.IsZero() || (val.Kind() == reflect.Ptr && val.Elem().IsZero()) { - key := configFieldNames[field.Type().Elem()] - key = strings.TrimPrefix(key, configFieldPrefix) - return nil, fmt.Errorf("empty or null section in %s", key) - } - switch c := val.Interface().(type) { - case *targetgroup.Group: - // Add index to the static config target groups for unique identification - // within scrape pool. - c.Source = strconv.Itoa(len(targets)) - // Coalesce multiple static configs into a single static config. - targets = append(targets, c) - case discovery.Config: - configs = append(configs, c) - default: - panic("discovery: internal error: slice element is not a Config") - } - } - } - if len(targets) > 0 { - configs = append(configs, discovery.StaticConfig(targets)) - } - return configs, nil -} - -// MarshalYAMLWithInlineConfigs helps implement yaml.Marshal for structs -// that have a Configs field that should be inlined. -func MarshalYAMLWithInlineConfigs(in interface{}) (interface{}, error) { - inVal := reflect.ValueOf(in) - for inVal.Kind() == reflect.Ptr { - inVal = inVal.Elem() - } - inTyp := inVal.Type() - - cfgTyp := getConfigType(inTyp) - cfgPtr := reflect.New(cfgTyp) - cfgVal := cfgPtr.Elem() - - // Copy shared fields to dynamic value. - var configs *discovery.Configs - for i, n := 0, inTyp.NumField(); i < n; i++ { - if inTyp.Field(i).Type == configsType { - configs = inVal.Field(i).Addr().Interface().(*discovery.Configs) - } - if cfgTyp.Field(i).PkgPath != "" { - continue // Field is unexported: ignore. - } - cfgVal.Field(i).Set(inVal.Field(i)) - } - if configs == nil { - return nil, fmt.Errorf("discovery: Configs field not found in type: %T", in) - } - - if err := writeConfigs(cfgVal, *configs); err != nil { - return nil, err - } - - return cfgPtr.Interface(), nil -} - -func writeConfigs(structVal reflect.Value, configs discovery.Configs) error { - targets := structVal.FieldByName(staticConfigsFieldName).Addr().Interface().(*[]*targetgroup.Group) - for _, c := range configs { - if sc, ok := c.(discovery.StaticConfig); ok { - *targets = append(*targets, sc...) - continue - } - fieldName, ok := configFieldNames[reflect.TypeOf(c)] - if !ok { - return fmt.Errorf("discovery: cannot marshal unregistered Config type: %T", c) - } - field := structVal.FieldByName(fieldName) - field.Set(reflect.Append(field, reflect.ValueOf(c))) - } - return nil -} - -func replaceYAMLTypeError(err error, oldTyp, newTyp reflect.Type) error { - var e *yaml.TypeError - if errors.As(err, &e) { - oldStr := oldTyp.String() - newStr := newTyp.String() - for i, s := range e.Errors { - e.Errors[i] = strings.ReplaceAll(s, oldStr, newStr) - } - } - return err -} diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index 634a6b1d4b..dfc12417c0 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/linode/linodego" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -138,7 +138,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*linodeMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/linode/linode_test.go b/discovery/linode/linode_test.go index 3c10650653..7bcaa05ba4 100644 --- a/discovery/linode/linode_test.go +++ b/discovery/linode/linode_test.go @@ -19,10 +19,10 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -238,7 +238,7 @@ func TestLinodeSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdmock.Endpoint()) require.NoError(t, err) diff --git a/discovery/manager.go b/discovery/manager.go index cefa90a866..87e0ecc44b 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -16,14 +16,14 @@ package discovery import ( "context" "fmt" + "log/slog" "reflect" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -81,9 +81,9 @@ func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]Discovere } // NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager { +func NewManager(ctx context.Context, logger *slog.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } mgr := &Manager{ logger: logger, @@ -104,7 +104,7 @@ func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Re if metrics, err := NewManagerMetrics(registerer, mgr.name); err == nil { mgr.metrics = metrics } else { - level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err) + logger.Error("Failed to create discovery manager metrics", "manager", mgr.name, "err", err) return nil } @@ -141,7 +141,7 @@ func HTTPClientOptions(opts ...config.HTTPClientOption) func(*Manager) { // Manager maintains a set of discovery providers and sends each update to a map channel. // Targets are grouped by the target set name. type Manager struct { - logger log.Logger + logger *slog.Logger name string httpOpts []config.HTTPClientOption mtx sync.RWMutex @@ -294,7 +294,7 @@ func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker D } func (m *Manager) startProvider(ctx context.Context, p *Provider) { - level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) + m.logger.Debug("Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) ctx, cancel := context.WithCancel(ctx) updates := make(chan []*targetgroup.Group) @@ -328,7 +328,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ case tgs, ok := <-updates: m.metrics.ReceivedUpdates.Inc() if !ok { - level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) + m.logger.Debug("Discoverer channel closed", "provider", p.name) // Wait for provider cancellation to ensure targets are cleaned up when expected. <-ctx.Done() return @@ -364,7 +364,7 @@ func (m *Manager) sender() { case m.syncCh <- m.allGroups(): default: m.metrics.DelayedUpdates.Inc() - level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") + m.logger.Debug("Discovery receiver's channel was full so will retry the next cycle") select { case m.triggerSend <- struct{}{}: default: @@ -458,12 +458,12 @@ func (m *Manager) registerProviders(cfgs Configs, setName string) int { } typ := cfg.Name() d, err := cfg.NewDiscoverer(DiscovererOptions{ - Logger: log.With(m.logger, "discovery", typ, "config", setName), + Logger: m.logger.With("discovery", typ, "config", setName), HTTPClientOptions: m.httpOpts, Metrics: m.sdMetrics[typ], }) if err != nil { - level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName) + m.logger.Error("Cannot create service discovery", "err", err, "type", typ, "config", setName) failed++ return } diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 831cefe514..b882c0b02e 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -22,10 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -675,7 +675,7 @@ func TestTargetUpdatesOrder(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond @@ -791,7 +791,7 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -828,7 +828,7 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -868,7 +868,7 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -911,7 +911,7 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -979,7 +979,7 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1023,7 +1023,7 @@ func TestDiscovererConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1060,7 +1060,7 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1141,7 +1141,7 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1202,7 +1202,7 @@ func TestGaugeFailedConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1454,7 +1454,7 @@ func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1551,7 +1551,7 @@ func TestUnregisterMetrics(t *testing.T) { refreshMetrics, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) // discoveryManager will be nil if there was an error configuring metrics. require.NotNil(t, discoveryManager) // Unregister all metrics. diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go index 38b47accff..f81a4410eb 100644 --- a/discovery/marathon/marathon.go +++ b/discovery/marathon/marathon.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math/rand" "net" "net/http" @@ -27,7 +28,6 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -140,7 +140,7 @@ type Discovery struct { } // NewDiscovery returns a new Marathon Discovery. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*marathonMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go index 68f6fe3ccc..1a732c0502 100644 --- a/discovery/moby/docker.go +++ b/discovery/moby/docker.go @@ -16,6 +16,7 @@ package moby import ( "context" "fmt" + "log/slog" "net" "net/http" "net/url" @@ -28,7 +29,6 @@ import ( "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/network" "github.com/docker/docker/client" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -128,7 +128,7 @@ type DockerDiscovery struct { } // NewDockerDiscovery returns a new DockerDiscovery which periodically refreshes its targets. -func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) { +func NewDockerDiscovery(conf *DockerSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) { m, ok := metrics.(*dockerMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/moby/docker_test.go b/discovery/moby/docker_test.go index 398393a15a..00e6a3e4f3 100644 --- a/discovery/moby/docker_test.go +++ b/discovery/moby/docker_test.go @@ -19,9 +19,9 @@ import ( "sort" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -226,7 +226,7 @@ host: %s require.NoError(t, metrics.Register()) defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/dockerswarm.go b/discovery/moby/dockerswarm.go index b0147467d2..9e93e581f3 100644 --- a/discovery/moby/dockerswarm.go +++ b/discovery/moby/dockerswarm.go @@ -16,13 +16,13 @@ package moby import ( "context" "fmt" + "log/slog" "net/http" "net/url" "time" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/client" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -125,7 +125,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *DockerSwarmSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*dockerswarmMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/moby/mock_test.go b/discovery/moby/mock_test.go index 3f35258c8f..7ef5cb07c3 100644 --- a/discovery/moby/mock_test.go +++ b/discovery/moby/mock_test.go @@ -98,7 +98,7 @@ func (m *SDMock) SetupHandlers() { if len(query) == 2 { h := sha1.New() h.Write([]byte(query[1])) - // Avoing long filenames for Windows. + // Avoiding long filenames for Windows. f += "__" + base64.URLEncoding.EncodeToString(h.Sum(nil))[:10] } } diff --git a/discovery/moby/nodes_test.go b/discovery/moby/nodes_test.go index 4ad1088d1a..973b83c4b6 100644 --- a/discovery/moby/nodes_test.go +++ b/discovery/moby/nodes_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/services_test.go b/discovery/moby/services_test.go index 47ca69e33a..7a966cfeee 100644 --- a/discovery/moby/services_test.go +++ b/discovery/moby/services_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -349,7 +349,7 @@ filters: defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/tasks_test.go b/discovery/moby/tasks_test.go index ef71bc02f5..59d8831c3b 100644 --- a/discovery/moby/tasks_test.go +++ b/discovery/moby/tasks_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/testdata/swarmprom/services.json b/discovery/moby/testdata/swarmprom/services.json index 72caa7a7f8..8f6c0793dd 100644 --- a/discovery/moby/testdata/swarmprom/services.json +++ b/discovery/moby/testdata/swarmprom/services.json @@ -224,7 +224,7 @@ "Args": [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/prometheus", - "--storage.tsdb.retention=24h" + "--storage.tsdb.retention.time=24h" ], "Privileges": { "CredentialSpec": null, diff --git a/discovery/moby/testdata/swarmprom/tasks.json b/discovery/moby/testdata/swarmprom/tasks.json index 33d81f25ce..af5ff9fe28 100644 --- a/discovery/moby/testdata/swarmprom/tasks.json +++ b/discovery/moby/testdata/swarmprom/tasks.json @@ -973,7 +973,7 @@ "Args": [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/prometheus", - "--storage.tsdb.retention=24h" + "--storage.tsdb.retention.time=24h" ], "Privileges": { "CredentialSpec": null, diff --git a/discovery/nomad/nomad.go b/discovery/nomad/nomad.go index d9c48120ae..1dbd8f1608 100644 --- a/discovery/nomad/nomad.go +++ b/discovery/nomad/nomad.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" nomad "github.com/hashicorp/nomad/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -121,7 +121,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*nomadMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/nomad/nomad_test.go b/discovery/nomad/nomad_test.go index 357d4a8e9b..32b087524c 100644 --- a/discovery/nomad/nomad_test.go +++ b/discovery/nomad/nomad_test.go @@ -21,9 +21,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -160,7 +160,7 @@ func TestNomadSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) tgs, err := d.refresh(context.Background()) diff --git a/discovery/openstack/hypervisor.go b/discovery/openstack/hypervisor.go index 8964da9294..ec127b1861 100644 --- a/discovery/openstack/hypervisor.go +++ b/discovery/openstack/hypervisor.go @@ -16,10 +16,10 @@ package openstack import ( "context" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors" @@ -43,14 +43,14 @@ type HypervisorDiscovery struct { provider *gophercloud.ProviderClient authOpts *gophercloud.AuthOptions region string - logger log.Logger + logger *slog.Logger port int availability gophercloud.Availability } // newHypervisorDiscovery returns a new hypervisor discovery. func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, - port int, region string, availability gophercloud.Availability, l log.Logger, + port int, region string, availability gophercloud.Availability, l *slog.Logger, ) *HypervisorDiscovery { return &HypervisorDiscovery{ provider: provider, authOpts: opts, diff --git a/discovery/openstack/hypervisor_test.go b/discovery/openstack/hypervisor_test.go index 45684b4a2e..e4a97f32cf 100644 --- a/discovery/openstack/hypervisor_test.go +++ b/discovery/openstack/hypervisor_test.go @@ -93,6 +93,5 @@ func TestOpenstackSDHypervisorRefreshWithDoneContext(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := hypervisor.refresh(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) + require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) } diff --git a/discovery/openstack/instance.go b/discovery/openstack/instance.go index 78c669e6f7..2a9e29f2ef 100644 --- a/discovery/openstack/instance.go +++ b/discovery/openstack/instance.go @@ -16,17 +16,17 @@ package openstack import ( "context" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips" "github.com/gophercloud/gophercloud/openstack/compute/v2/servers" "github.com/gophercloud/gophercloud/pagination" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" @@ -52,7 +52,7 @@ type InstanceDiscovery struct { provider *gophercloud.ProviderClient authOpts *gophercloud.AuthOptions region string - logger log.Logger + logger *slog.Logger port int allTenants bool availability gophercloud.Availability @@ -60,10 +60,10 @@ type InstanceDiscovery struct { // NewInstanceDiscovery returns a new instance discovery. func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, - port int, region string, allTenants bool, availability gophercloud.Availability, l log.Logger, + port int, region string, allTenants bool, availability gophercloud.Availability, l *slog.Logger, ) *InstanceDiscovery { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } return &InstanceDiscovery{ provider: provider, authOpts: opts, @@ -134,7 +134,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, for _, s := range instanceList { if len(s.Addresses) == 0 { - level.Info(i.logger).Log("msg", "Got no IP address", "instance", s.ID) + i.logger.Info("Got no IP address", "instance", s.ID) continue } @@ -151,7 +151,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, if !nameOk { flavorID, idOk := s.Flavor["id"].(string) if !idOk { - level.Warn(i.logger).Log("msg", "Invalid type for both flavor original_name and flavor id, expected string") + i.logger.Warn("Invalid type for both flavor original_name and flavor id, expected string") continue } labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorID) @@ -171,22 +171,22 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, for pool, address := range s.Addresses { md, ok := address.([]interface{}) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected array") + i.logger.Warn("Invalid type for address, expected array") continue } if len(md) == 0 { - level.Debug(i.logger).Log("msg", "Got no IP address", "instance", s.ID) + i.logger.Debug("Got no IP address", "instance", s.ID) continue } for _, address := range md { md1, ok := address.(map[string]interface{}) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected dict") + i.logger.Warn("Invalid type for address, expected dict") continue } addr, ok := md1["addr"].(string) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected string") + i.logger.Warn("Invalid type for address, expected string") continue } if _, ok := floatingIPPresent[addr]; ok { diff --git a/discovery/openstack/instance_test.go b/discovery/openstack/instance_test.go index 2b5ac1b89e..2617baa4e3 100644 --- a/discovery/openstack/instance_test.go +++ b/discovery/openstack/instance_test.go @@ -134,6 +134,5 @@ func TestOpenstackSDInstanceRefreshWithDoneContext(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := hypervisor.refresh(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) + require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) } diff --git a/discovery/openstack/openstack.go b/discovery/openstack/openstack.go index c98f78788d..fa7e0cce90 100644 --- a/discovery/openstack/openstack.go +++ b/discovery/openstack/openstack.go @@ -17,10 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "time" - "github.com/go-kit/log" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/mwitkow/go-conntrack" @@ -142,7 +142,7 @@ type refresher interface { } // NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, l *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*openstackMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") @@ -163,7 +163,7 @@ func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetr ), nil } -func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) { var opts gophercloud.AuthOptions if conf.IdentityEndpoint == "" { var err error diff --git a/discovery/ovhcloud/dedicated_server.go b/discovery/ovhcloud/dedicated_server.go index a70857a08b..15bb9809c9 100644 --- a/discovery/ovhcloud/dedicated_server.go +++ b/discovery/ovhcloud/dedicated_server.go @@ -16,13 +16,12 @@ package ovhcloud import ( "context" "fmt" + "log/slog" "net/netip" "net/url" "path" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/common/model" @@ -55,10 +54,10 @@ type dedicatedServer struct { type dedicatedServerDiscovery struct { *refresh.Discovery config *SDConfig - logger log.Logger + logger *slog.Logger } -func newDedicatedServerDiscovery(conf *SDConfig, logger log.Logger) *dedicatedServerDiscovery { +func newDedicatedServerDiscovery(conf *SDConfig, logger *slog.Logger) *dedicatedServerDiscovery { return &dedicatedServerDiscovery{config: conf, logger: logger} } @@ -115,10 +114,7 @@ func (d *dedicatedServerDiscovery) refresh(context.Context) ([]*targetgroup.Grou for _, dedicatedServerName := range dedicatedServerList { dedicatedServer, err := getDedicatedServerDetails(client, dedicatedServerName) if err != nil { - err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error()) - if err != nil { - return nil, err - } + d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error()) continue } dedicatedServerDetailedList = append(dedicatedServerDetailedList, *dedicatedServer) diff --git a/discovery/ovhcloud/dedicated_server_test.go b/discovery/ovhcloud/dedicated_server_test.go index 52311bcc87..f9dbd6af9c 100644 --- a/discovery/ovhcloud/dedicated_server_test.go +++ b/discovery/ovhcloud/dedicated_server_test.go @@ -21,8 +21,8 @@ import ( "os" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -41,7 +41,7 @@ application_secret: %s consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecretTest, ovhcloudConsumerKeyTest) require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg)) - d, err := newRefresher(&cfg, log.NewNopLogger()) + d, err := newRefresher(&cfg, promslog.NewNopLogger()) require.NoError(t, err) ctx := context.Background() targetGroups, err := d.refresh(ctx) diff --git a/discovery/ovhcloud/ovhcloud.go b/discovery/ovhcloud/ovhcloud.go index 988b4482f2..08ed70296b 100644 --- a/discovery/ovhcloud/ovhcloud.go +++ b/discovery/ovhcloud/ovhcloud.go @@ -17,10 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "net/netip" "time" - "github.com/go-kit/log" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -137,7 +137,7 @@ func parseIPList(ipList []string) ([]netip.Addr, error) { return ipAddresses, nil } -func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, logger *slog.Logger) (refresher, error) { switch conf.Service { case "vps": return newVpsDiscovery(conf, logger), nil @@ -148,7 +148,7 @@ func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) { } // NewDiscovery returns a new OVHcloud Discoverer which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*ovhcloudMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/ovhcloud/ovhcloud_test.go b/discovery/ovhcloud/ovhcloud_test.go index 9c95bf90e6..84a35af3ad 100644 --- a/discovery/ovhcloud/ovhcloud_test.go +++ b/discovery/ovhcloud/ovhcloud_test.go @@ -20,11 +20,11 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/util/testutil" ) var ( @@ -121,7 +121,7 @@ func TestParseIPs(t *testing.T) { func TestDiscoverer(t *testing.T) { conf, _ := getMockConf("vps") - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() reg := prometheus.NewRegistry() refreshMetrics := discovery.NewRefreshMetrics(reg) diff --git a/discovery/ovhcloud/vps.go b/discovery/ovhcloud/vps.go index 58ceeabd87..7050f826a5 100644 --- a/discovery/ovhcloud/vps.go +++ b/discovery/ovhcloud/vps.go @@ -16,13 +16,12 @@ package ovhcloud import ( "context" "fmt" + "log/slog" "net/netip" "net/url" "path" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/common/model" @@ -68,10 +67,10 @@ type virtualPrivateServer struct { type vpsDiscovery struct { *refresh.Discovery config *SDConfig - logger log.Logger + logger *slog.Logger } -func newVpsDiscovery(conf *SDConfig, logger log.Logger) *vpsDiscovery { +func newVpsDiscovery(conf *SDConfig, logger *slog.Logger) *vpsDiscovery { return &vpsDiscovery{config: conf, logger: logger} } @@ -133,10 +132,7 @@ func (d *vpsDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) { for _, vpsName := range vpsList { vpsDetailed, err := getVpsDetails(client, vpsName) if err != nil { - err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error()) - if err != nil { - return nil, err - } + d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error()) continue } vpsDetailedList = append(vpsDetailedList, *vpsDetailed) diff --git a/discovery/ovhcloud/vps_test.go b/discovery/ovhcloud/vps_test.go index 2d2d6dcd21..00d59da7f0 100644 --- a/discovery/ovhcloud/vps_test.go +++ b/discovery/ovhcloud/vps_test.go @@ -23,8 +23,8 @@ import ( yaml "gopkg.in/yaml.v2" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -43,7 +43,7 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg)) - d, err := newRefresher(&cfg, log.NewNopLogger()) + d, err := newRefresher(&cfg, promslog.NewNopLogger()) require.NoError(t, err) ctx := context.Background() targetGroups, err := d.refresh(ctx) diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go index 8f89acbf93..6122a76da7 100644 --- a/discovery/puppetdb/puppetdb.go +++ b/discovery/puppetdb/puppetdb.go @@ -19,6 +19,7 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "net" "net/http" "net/url" @@ -27,11 +28,11 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/discovery" @@ -138,14 +139,14 @@ type Discovery struct { } // NewDiscovery returns a new PuppetDB discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*puppetdbMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http") diff --git a/discovery/puppetdb/puppetdb_test.go b/discovery/puppetdb/puppetdb_test.go index bf9c7b215e..4585b78223 100644 --- a/discovery/puppetdb/puppetdb_test.go +++ b/discovery/puppetdb/puppetdb_test.go @@ -22,10 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -70,7 +70,7 @@ func TestPuppetSlashInURL(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) require.Equal(t, apiURL, d.url) @@ -94,7 +94,7 @@ func TestPuppetDBRefresh(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -142,7 +142,7 @@ func TestPuppetDBRefreshWithParameters(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -201,7 +201,7 @@ func TestPuppetDBInvalidCode(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -229,7 +229,7 @@ func TestPuppetDBInvalidFormat(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/refresh/refresh.go b/discovery/refresh/refresh.go index f037a90cff..31646c0e4c 100644 --- a/discovery/refresh/refresh.go +++ b/discovery/refresh/refresh.go @@ -16,17 +16,17 @@ package refresh import ( "context" "errors" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) type Options struct { - Logger log.Logger + Logger *slog.Logger Mech string Interval time.Duration RefreshF func(ctx context.Context) ([]*targetgroup.Group, error) @@ -35,7 +35,7 @@ type Options struct { // Discovery implements the Discoverer interface. type Discovery struct { - logger log.Logger + logger *slog.Logger interval time.Duration refreshf func(ctx context.Context) ([]*targetgroup.Group, error) metrics *discovery.RefreshMetrics @@ -45,9 +45,9 @@ type Discovery struct { func NewDiscovery(opts Options) *Discovery { m := opts.MetricsInstantiator.Instantiate(opts.Mech) - var logger log.Logger + var logger *slog.Logger if opts.Logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } else { logger = opts.Logger } @@ -68,7 +68,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { tgs, err := d.refresh(ctx) if err != nil { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + d.logger.Error("Unable to refresh target groups", "err", err.Error()) } } else { select { @@ -87,7 +87,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { tgs, err := d.refresh(ctx) if err != nil { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + d.logger.Error("Unable to refresh target groups", "err", err.Error()) } continue } diff --git a/discovery/scaleway/scaleway.go b/discovery/scaleway/scaleway.go index f8e1a83f5e..670e439c4f 100644 --- a/discovery/scaleway/scaleway.go +++ b/discovery/scaleway/scaleway.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "os" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -185,7 +185,7 @@ func init() { // the Discoverer interface. type Discovery struct{} -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*scalewayMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/triton/triton.go b/discovery/triton/triton.go index 675149f2a3..7b3b18f471 100644 --- a/discovery/triton/triton.go +++ b/discovery/triton/triton.go @@ -19,12 +19,12 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "net/url" "strings" "time" - "github.com/go-kit/log" "github.com/mwitkow/go-conntrack" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -146,7 +146,7 @@ type Discovery struct { } // New returns a new Discovery which periodically refreshes its targets. -func New(logger log.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func New(logger *slog.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*tritonMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/triton/triton_test.go b/discovery/triton/triton_test.go index e37693e6bf..b2d06afaf6 100644 --- a/discovery/triton/triton_test.go +++ b/discovery/triton/triton_test.go @@ -21,7 +21,6 @@ import ( "net/http/httptest" "net/url" "strconv" - "strings" "testing" "github.com/prometheus/client_golang/prometheus" @@ -182,8 +181,7 @@ func TestTritonSDRefreshNoServer(t *testing.T) { td, m, _ := newTritonDiscovery(conf) _, err := td.refresh(context.Background()) - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint")) + require.ErrorContains(t, err, "an error occurred when requesting targets from the discovery endpoint") m.Unregister() } @@ -193,8 +191,7 @@ func TestTritonSDRefreshCancelled(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := td.refresh(ctx) - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), context.Canceled.Error())) + require.ErrorContains(t, err, context.Canceled.Error()) m.Unregister() } diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go index c8af2f1587..de806895d7 100644 --- a/discovery/uyuni/uyuni.go +++ b/discovery/uyuni/uyuni.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "net/url" "path" @@ -24,7 +25,6 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/kolo/xmlrpc" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -41,10 +41,10 @@ const ( uyuniMetaLabelPrefix = model.MetaLabelPrefix + "uyuni_" uyuniLabelMinionHostname = uyuniMetaLabelPrefix + "minion_hostname" uyuniLabelPrimaryFQDN = uyuniMetaLabelPrefix + "primary_fqdn" - uyuniLablelSystemID = uyuniMetaLabelPrefix + "system_id" - uyuniLablelGroups = uyuniMetaLabelPrefix + "groups" - uyuniLablelEndpointName = uyuniMetaLabelPrefix + "endpoint_name" - uyuniLablelExporter = uyuniMetaLabelPrefix + "exporter" + uyuniLabelSystemID = uyuniMetaLabelPrefix + "system_id" + uyuniLabelGroups = uyuniMetaLabelPrefix + "groups" + uyuniLabelEndpointName = uyuniMetaLabelPrefix + "endpoint_name" + uyuniLabelExporter = uyuniMetaLabelPrefix + "exporter" uyuniLabelProxyModule = uyuniMetaLabelPrefix + "proxy_module" uyuniLabelMetricsPath = uyuniMetaLabelPrefix + "metrics_path" uyuniLabelScheme = uyuniMetaLabelPrefix + "scheme" @@ -109,7 +109,7 @@ type Discovery struct { entitlement string separator string interval time.Duration - logger log.Logger + logger *slog.Logger } // NewDiscovererMetrics implements discovery.Config. @@ -212,7 +212,7 @@ func getEndpointInfoForSystems( } // NewDiscovery returns a uyuni discovery for the given configuration. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*uyuniMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") @@ -270,10 +270,10 @@ func (d *Discovery) getEndpointLabels( model.AddressLabel: model.LabelValue(addr), uyuniLabelMinionHostname: model.LabelValue(networkInfo.Hostname), uyuniLabelPrimaryFQDN: model.LabelValue(networkInfo.PrimaryFQDN), - uyuniLablelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), - uyuniLablelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), - uyuniLablelEndpointName: model.LabelValue(endpoint.EndpointName), - uyuniLablelExporter: model.LabelValue(endpoint.ExporterName), + uyuniLabelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), + uyuniLabelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), + uyuniLabelEndpointName: model.LabelValue(endpoint.EndpointName), + uyuniLabelExporter: model.LabelValue(endpoint.ExporterName), uyuniLabelProxyModule: model.LabelValue(endpoint.Module), uyuniLabelMetricsPath: model.LabelValue(endpoint.Path), uyuniLabelScheme: model.LabelValue(scheme), diff --git a/discovery/vultr/vultr.go b/discovery/vultr/vultr.go index aaa9c64e47..f82b22168a 100644 --- a/discovery/vultr/vultr.go +++ b/discovery/vultr/vultr.go @@ -16,13 +16,13 @@ package vultr import ( "context" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -114,7 +114,7 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*vultrMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") diff --git a/discovery/vultr/vultr_test.go b/discovery/vultr/vultr_test.go index 2f12a35529..00ef21e38c 100644 --- a/discovery/vultr/vultr_test.go +++ b/discovery/vultr/vultr_test.go @@ -19,9 +19,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -57,7 +57,7 @@ func TestVultrSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdMock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/xds/client_test.go b/discovery/xds/client_test.go index b699995fb7..2cf5b2f9cb 100644 --- a/discovery/xds/client_test.go +++ b/discovery/xds/client_test.go @@ -52,16 +52,14 @@ func TestMakeXDSResourceHttpEndpointEmptyServerURLScheme(t *testing.T) { endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("127.0.0.1"), "monitoring") require.Empty(t, endpointURL) - require.Error(t, err) - require.Equal(t, "invalid xDS server URL", err.Error()) + require.EqualError(t, err, "invalid xDS server URL") } func TestMakeXDSResourceHttpEndpointEmptyServerURLHost(t *testing.T) { endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("grpc://127.0.0.1"), "monitoring") require.Empty(t, endpointURL) - require.Error(t, err) - require.Contains(t, err.Error(), "must be either 'http' or 'https'") + require.ErrorContains(t, err, "must be either 'http' or 'https'") } func TestMakeXDSResourceHttpEndpoint(t *testing.T) { diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go index d1d540aaf4..55b3d628e5 100644 --- a/discovery/xds/kuma.go +++ b/discovery/xds/kuma.go @@ -15,14 +15,14 @@ package xds import ( "fmt" + "log/slog" "net/url" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "google.golang.org/protobuf/types/known/anypb" "github.com/prometheus/prometheus/discovery" @@ -99,7 +99,7 @@ func (c *KumaSDConfig) SetDirectory(dir string) { func (c *KumaSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { logger := opts.Logger if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return NewKumaHTTPDiscovery(c, logger, opts.Metrics) @@ -158,7 +158,7 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L return targets, nil } -func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) { +func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) { m, ok := metrics.(*xdsMetrics) if !ok { return nil, fmt.Errorf("invalid discovery metrics type") @@ -170,7 +170,7 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, metrics discove var err error clientID, err = osutil.GetFQDN() if err != nil { - level.Debug(logger).Log("msg", "error getting FQDN", "err", err) + logger.Debug("error getting FQDN", "err", err) clientID = "prometheus" } } diff --git a/discovery/xds/kuma_mads.pb.go b/discovery/xds/kuma_mads.pb.go index e1160d7756..5d1d4ef713 100644 --- a/discovery/xds/kuma_mads.pb.go +++ b/discovery/xds/kuma_mads.pb.go @@ -23,13 +23,14 @@ package xds import ( context "context" + reflect "reflect" + sync "sync" + v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" _ "github.com/envoyproxy/protoc-gen-validate/validate" _ "google.golang.org/genproto/googleapis/api/annotations" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" ) const ( diff --git a/discovery/xds/kuma_test.go b/discovery/xds/kuma_test.go index cfb9cbac50..23d754c4b7 100644 --- a/discovery/xds/kuma_test.go +++ b/discovery/xds/kuma_test.go @@ -201,9 +201,8 @@ func TestKumaMadsV1ResourceParserInvalidResources(t *testing.T) { }} groups, err := kumaMadsV1ResourceParser(resources, KumaMadsV1ResourceTypeURL) require.Nil(t, groups) - require.Error(t, err) - require.Contains(t, err.Error(), "cannot parse") + require.ErrorContains(t, err, "cannot parse") } func TestNewKumaHTTPDiscovery(t *testing.T) { diff --git a/discovery/xds/xds.go b/discovery/xds/xds.go index 8191d6be1a..db55a2b6f7 100644 --- a/discovery/xds/xds.go +++ b/discovery/xds/xds.go @@ -15,11 +15,10 @@ package xds import ( "context" + "log/slog" "time" v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "google.golang.org/protobuf/encoding/protojson" @@ -104,7 +103,7 @@ type fetchDiscovery struct { refreshInterval time.Duration parseResources resourceParser - logger log.Logger + logger *slog.Logger metrics *xdsMetrics } @@ -140,7 +139,7 @@ func (d *fetchDiscovery) poll(ctx context.Context, ch chan<- []*targetgroup.Grou } if err != nil { - level.Error(d.logger).Log("msg", "error parsing resources", "err", err) + d.logger.Error("error parsing resources", "err", err) d.metrics.fetchFailuresCount.Inc() return } @@ -153,12 +152,12 @@ func (d *fetchDiscovery) poll(ctx context.Context, ch chan<- []*targetgroup.Grou parsedTargets, err := d.parseResources(response.Resources, response.TypeUrl) if err != nil { - level.Error(d.logger).Log("msg", "error parsing resources", "err", err) + d.logger.Error("error parsing resources", "err", err) d.metrics.fetchFailuresCount.Inc() return } - level.Debug(d.logger).Log("msg", "Updated to version", "version", response.VersionInfo, "targets", len(parsedTargets)) + d.logger.Debug("Updated to version", "version", response.VersionInfo, "targets", len(parsedTargets)) select { case <-ctx.Done(): diff --git a/discovery/xds/xds_test.go b/discovery/xds/xds_test.go index 7cce021c5f..db10adc1a2 100644 --- a/discovery/xds/xds_test.go +++ b/discovery/xds/xds_test.go @@ -22,9 +22,9 @@ import ( "time" v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" "google.golang.org/protobuf/types/known/anypb" @@ -90,7 +90,7 @@ func constantResourceParser(targets []model.LabelSet, err error) resourceParser } } -var nopLogger = log.NewNopLogger() +var nopLogger = promslog.NewNopLogger() type testResourceClient struct { resourceTypeURL string diff --git a/discovery/zookeeper/zookeeper.go b/discovery/zookeeper/zookeeper.go index 92904dd71c..a1cfe3d055 100644 --- a/discovery/zookeeper/zookeeper.go +++ b/discovery/zookeeper/zookeeper.go @@ -18,15 +18,16 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/go-zookeeper/zk" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -146,16 +147,16 @@ type Discovery struct { treeCaches []*treecache.ZookeeperTreeCache parse func(data []byte, path string) (model.LabelSet, error) - logger log.Logger + logger *slog.Logger } // NewNerveDiscovery returns a new Discovery for the given Nerve config. -func NewNerveDiscovery(conf *NerveSDConfig, logger log.Logger) (*Discovery, error) { +func NewNerveDiscovery(conf *NerveSDConfig, logger *slog.Logger) (*Discovery, error) { return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseNerveMember) } // NewServersetDiscovery returns a new Discovery for the given serverset config. -func NewServersetDiscovery(conf *ServersetSDConfig, logger log.Logger) (*Discovery, error) { +func NewServersetDiscovery(conf *ServersetSDConfig, logger *slog.Logger) (*Discovery, error) { return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseServersetMember) } @@ -165,11 +166,11 @@ func NewDiscovery( srvs []string, timeout time.Duration, paths []string, - logger log.Logger, + logger *slog.Logger, pf func(data []byte, path string) (model.LabelSet, error), ) (*Discovery, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } conn, _, err := zk.Connect( diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 8fefa8ecc9..a179a2f9f1 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -21,6 +21,7 @@ The Prometheus monitoring server | --web.config.file | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | | | --web.read-timeout | Maximum duration before timing out read of the request, and closing idle connections. | `5m` | | --web.max-connections | Maximum number of simultaneous connections across all listeners. | `512` | +| --web.max-notifications-subscribers | Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close. | `16` | | --web.external-url | The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically. | | | --web.route-prefix | Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url. | | | --web.user-assets | Path to static asset directory, available at /user. | | @@ -28,13 +29,13 @@ The Prometheus monitoring server | --web.enable-admin-api | Enable API endpoints for admin control actions. | `false` | | --web.enable-remote-write-receiver | Enable API endpoint accepting remote write requests. | `false` | | --web.remote-write-receiver.accepted-protobuf-messages | List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: prometheus.WriteRequest, io.prometheus.write.v2.Request | `prometheus.WriteRequest` | +| --web.enable-otlp-receiver | Enable API endpoint accepting OTLP write requests. | `false` | | --web.console.templates | Path to the console template directory, available at /consoles. | `consoles` | | --web.console.libraries | Path to the console library directory. | `console_libraries` | | --web.page-title | Document title of Prometheus instance. | `Prometheus Time Series Collection and Processing Server` | | --web.cors.origin | Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1\|domain2)\.com' | `.*` | | --storage.tsdb.path | Base path for metrics storage. Use with server mode only. | `data/` | -| --storage.tsdb.retention | [DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use "storage.tsdb.retention.time" instead. Use with server mode only. | | -| --storage.tsdb.retention.time | How long to retain samples in storage. When this flag is set it overrides "storage.tsdb.retention". If neither this flag nor "storage.tsdb.retention" nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | | +| --storage.tsdb.retention.time | How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | | | --storage.tsdb.retention.size | Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | | | --storage.tsdb.no-lockfile | Do not create lockfile in data directory. Use with server mode only. | `false` | | --storage.tsdb.head-chunks-write-queue-size | Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` | @@ -57,8 +58,8 @@ The Prometheus monitoring server | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | -| --scrape.name-escaping-scheme | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` | -| --enable-feature ... | Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, auto-reload-config, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --enable-feature ... | Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --agent | Run Prometheus in 'Agent mode'. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index e48cede79c..5e2a8f6bb1 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -15,7 +15,7 @@ Tooling for the Prometheus monitoring system. | -h, --help | Show context-sensitive help (also try --help-long and --help-man). | | --version | Show application version. | | --experimental | Enable experimental commands. | -| --enable-feature ... | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. | +| --enable-feature ... | Comma separated feature names to enable. Currently unused. | @@ -462,6 +462,7 @@ Unit tests for rules. | Flag | Description | Default | | --- | --- | --- | | --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | +| --debug | Enable unit test debugging. | `false` | | --diff | [Experimental] Print colored differential output between expected & received output. | `false` | diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md index 3c1ec84f0f..cd33dba8e3 100644 --- a/docs/configuration/alerting_rules.md +++ b/docs/configuration/alerting_rules.md @@ -21,10 +21,13 @@ An example rules file with an alert would be: ```yaml groups: - name: example + labels: + team: myteam rules: - alert: HighRequestLatency expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 for: 10m + keep_firing_for: 5m labels: severity: page annotations: @@ -38,6 +41,13 @@ the alert continues to be active during each evaluation for 10 minutes before firing the alert. Elements that are active, but not firing yet, are in the pending state. Alerting rules without the `for` clause will become active on the first evaluation. +There is also an optional `keep_firing_for` clause that tells Prometheus to keep +this alert firing for the specified duration after the firing condition was last met. +This can be used to prevent situations such as flapping alerts, false resolutions +due to lack of data loss, etc. Alerting rules without the `keep_firing_for` clause +will deactivate on the first evaluation where the condition is not met (assuming +any optional `for` duration desribed above has been satisfied). + The `labels` clause allows specifying a set of additional labels to be attached to the alert. Any existing conflicting labels will be overwritten. The label values can be templated. diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 1b205d1263..2093ed8836 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -71,12 +71,19 @@ global: # How frequently to evaluate rules. [ evaluation_interval: | default = 1m ] - # Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. - # Metric availability delays are more likely to occur when Prometheus is running as a remote write target, but can also occur when there's anomalies with scraping. + # Offset the rule evaluation timestamp of this particular group by the + # specified duration into the past to ensure the underlying metrics have + # been received. Metric availability delays are more likely to occur when + # Prometheus is running as a remote write target, but can also occur when + # there's anomalies with scraping. [ rule_query_offset: | default = 0s ] # The labels to add to any time series or alerts when communicating with - # external systems (federation, remote storage, Alertmanager). + # external systems (federation, remote storage, Alertmanager). + # Environment variable references `${var}` or `$var` are replaced according + # to the values of the current environment variables. + # References to undefined variables are replaced by the empty string. + # The `$` character can be escaped by using `$$`. external_labels: [ : ... ] @@ -84,33 +91,39 @@ global: # Reloading the configuration will reopen the file. [ query_log_file: ] + # File to which scrape failures are logged. + # Reloading the configuration will reopen the file. + [ scrape_failure_log_file: ] + # An uncompressed response body larger than this many bytes will cause the # scrape to fail. 0 means no limit. Example: 100MB. # This is an experimental feature, this behaviour could # change or be removed in the future. [ body_size_limit: | default = 0 ] - # Per-scrape limit on number of scraped samples that will be accepted. + # Per-scrape limit on the number of scraped samples that will be accepted. # If more than this number of samples are present after metric relabeling # the entire scrape will be treated as failed. 0 means no limit. [ sample_limit: | default = 0 ] - # Per-scrape limit on number of labels that will be accepted for a sample. If - # more than this number of labels are present post metric-relabeling, the - # entire scrape will be treated as failed. 0 means no limit. + # Limit on the number of labels that will be accepted per sample. If more + # than this number of labels are present on any sample post metric-relabeling, + # the entire scrape will be treated as failed. 0 means no limit. [ label_limit: | default = 0 ] - # Per-scrape limit on length of labels name that will be accepted for a sample. - # If a label name is longer than this number post metric-relabeling, the entire - # scrape will be treated as failed. 0 means no limit. + # Limit on the length (in bytes) of each individual label name. If any label + # name in a scrape is longer than this number post metric-relabeling, the + # entire scrape will be treated as failed. Note that label names are UTF-8 + # encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_name_length_limit: | default = 0 ] - # Per-scrape limit on length of labels value that will be accepted for a sample. - # If a label value is longer than this number post metric-relabeling, the - # entire scrape will be treated as failed. 0 means no limit. + # Limit on the length (in bytes) of each individual label value. If any label + # value in a scrape is longer than this number post metric-relabeling, the + # entire scrape will be treated as failed. Note that label values are UTF-8 + # encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_value_length_limit: | default = 0 ] - # Per-scrape config limit on number of unique targets that will be + # Limit per scrape config on number of unique targets that will be # accepted. If more than this number of targets are present after target # relabeling, Prometheus will mark the targets as failed without scraping them. # 0 means no limit. This is an experimental feature, this behaviour could @@ -122,9 +135,9 @@ global: [ keep_dropped_targets: | default = 0 ] # Specifies the validation scheme for metric and label names. Either blank or - # "legacy" for letters, numbers, colons, and underscores; or "utf8" for full - # UTF-8 support. - [ metric_name_validation_scheme | default "legacy" ] + # "utf8" for for full UTF-8 support, or "legacy" for letters, numbers, colons, + # and underscores. + [ metric_name_validation_scheme | default "utf8" ] runtime: # Configure the Go garbage collector GOGC parameter @@ -199,12 +212,18 @@ job_name: # The protocols to negotiate during a scrape with the client. # Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, -# OpenMetricsText1.0.0, PrometheusText0.0.4. +# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. [ scrape_protocols: [, ...] | default = ] -# Whether to scrape a classic histogram that is also exposed as a native +# Fallback protocol to use if a scrape returns blank, unparseable, or otherwise +# invalid Content-Type. +# Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, +# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. +[ fallback_scrape_protocol: ] + +# Whether to scrape a classic histogram, even if it is also exposed as a native # histogram (has no effect without --enable-feature=native-histograms). -[ scrape_classic_histograms: | default = false ] +[ always_scrape_classic_histograms: | default = false ] # The HTTP resource path on which to fetch metrics from targets. [ metrics_path: | default = /metrics ] @@ -260,64 +279,13 @@ params: # response from the scraped target. [ enable_compression: | default = true ] -# Sets the `Authorization` header on every scrape request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Sets the `Authorization` header on every scrape request with -# the configured credentials. -authorization: - # Sets the authentication type of the request. - [ type: | default: Bearer ] - # Sets the credentials of the request. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials of the request with the credentials read from the - # configured file. It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether scrape requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] +# File to which scrape failures are logged. +# Reloading the configuration will reopen the file. +[ scrape_failure_log_file: ] -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] # List of Azure service discovery configurations. azure_sd_configs: @@ -446,41 +414,43 @@ metric_relabel_configs: # change or be removed in the future. [ body_size_limit: | default = 0 ] -# Per-scrape limit on number of scraped samples that will be accepted. +# Per-scrape limit on the number of scraped samples that will be accepted. # If more than this number of samples are present after metric relabeling # the entire scrape will be treated as failed. 0 means no limit. [ sample_limit: | default = 0 ] -# Per-scrape limit on number of labels that will be accepted for a sample. If -# more than this number of labels are present post metric-relabeling, the -# entire scrape will be treated as failed. 0 means no limit. +# Limit on the number of labels that will be accepted per sample. If more +# than this number of labels are present on any sample post metric-relabeling, +# the entire scrape will be treated as failed. 0 means no limit. [ label_limit: | default = 0 ] -# Per-scrape limit on length of labels name that will be accepted for a sample. -# If a label name is longer than this number post metric-relabeling, the entire -# scrape will be treated as failed. 0 means no limit. +# Limit on the length (in bytes) of each individual label name. If any label +# name in a scrape is longer than this number post metric-relabeling, the +# entire scrape will be treated as failed. Note that label names are UTF-8 +# encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_name_length_limit: | default = 0 ] -# Per-scrape limit on length of labels value that will be accepted for a sample. -# If a label value is longer than this number post metric-relabeling, the -# entire scrape will be treated as failed. 0 means no limit. +# Limit on the length (in bytes) of each individual label value. If any label +# value in a scrape is longer than this number post metric-relabeling, the +# entire scrape will be treated as failed. Note that label values are UTF-8 +# encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_value_length_limit: | default = 0 ] -# Per-scrape config limit on number of unique targets that will be +# Limit per scrape config on number of unique targets that will be # accepted. If more than this number of targets are present after target # relabeling, Prometheus will mark the targets as failed without scraping them. # 0 means no limit. This is an experimental feature, this behaviour could # change in the future. [ target_limit: | default = 0 ] -# Per-job limit on the number of targets dropped by relabeling +# Limit per scrape config on the number of targets dropped by relabeling # that will be kept in memory. 0 means no limit. [ keep_dropped_targets: | default = 0 ] -# Specifies the validation scheme for metric and label names. Either blank or -# "legacy" for letters, numbers, colons, and underscores; or "utf8" for full -# UTF-8 support. -[ metric_name_validation_scheme | default "legacy" ] +# Specifies the validation scheme for metric and label names. Either blank or +# "utf8" for full UTF-8 support, or "legacy" for letters, numbers, colons, and +# underscores. +[ metric_name_validation_scheme | default "utf8" ] # Limit on total number of positive and negative buckets allowed in a single # native histogram. The resolution of a histogram with more buckets will be @@ -532,6 +502,73 @@ metric_relabel_configs: Where `` must be unique across all scrape configurations. +### `` + +A `http_config` allows configuring HTTP requests. + +``` +# Sets the `Authorization` header on every request with the +# configured username and password. +# username and username_file are mutually exclusive. +# password and password_file are mutually exclusive. +basic_auth: + [ username: ] + [ username_file: ] + [ password: ] + [ password_file: ] + +# Sets the `Authorization` header on every request with +# the configured credentials. +authorization: + # Sets the authentication type of the request. + [ type: | default: Bearer ] + # Sets the credentials of the request. It is mutually exclusive with + # `credentials_file`. + [ credentials: ] + # Sets the credentials of the request with the credentials read from the + # configured file. It is mutually exclusive with `credentials`. + [ credentials_file: ] + +# Optional OAuth 2.0 configuration. +# Cannot be used at the same time as basic_auth or authorization. +oauth2: + [ ] + +# Configure whether requests follow HTTP 3xx redirects. +[ follow_redirects: | default = true ] + +# Whether to enable HTTP2. +[ enable_http2: | default: true ] + +# Configures the request's TLS settings. +tls_config: + [ ] + +# Optional proxy URL. +[ proxy_url: ] +# Comma-separated string that can contain IPs, CIDR notation, domain names +# that should be excluded from proxying. IP and domain names can +# contain port numbers. +[ no_proxy: ] +# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) +[ proxy_from_environment: | default: false ] +# Specifies headers to send to proxies during CONNECT requests. +[ proxy_connect_header: + [ : [, ...] ] ] + +# Custom HTTP headers to be sent along with each request. +# Headers that are set by Prometheus itself can't be overwritten. +http_headers: + # Header name. + [ : + # Header values. + [ values: [, ...] ] + # Headers values. Hidden in configuration page. + [ secrets: [, ...] ] + # Files to read header values from. + [ files: [, ...] ] ] +``` + ### `` A `tls_config` allows configuring TLS connections. @@ -673,65 +710,9 @@ subscription_id: # instead be specified in the relabeling rule. [ port: | default = 80 ] -# Authentication information used to authenticate to the Azure API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not support by Azure. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by Azure. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by Azure. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -777,14 +758,17 @@ The following meta labels are available on targets during [relabeling](#relabel_ services: [ - ] -# See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more -# about the possible filters that can be used. +# A Consul Filter expression used to filter the catalog results +# See https://www.consul.io/api-docs/catalog#list-services to know more +# about the filter expressions that can be used. +[ filter: ] +# The `tags` and `node_meta` fields are deprecated in Consul in favor of `filter`. # An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list. tags: [ - ] -# Node metadata key/value pairs to filter nodes for a given service. +# Node metadata key/value pairs to filter nodes for a given service. As of Consul 1.14, consider `filter` instead. [ node_meta: [ : ... ] ] @@ -798,65 +782,9 @@ tags: # On large setup it might be a good idea to increase this value because the catalog will change all the time. [ refresh_interval: | default = 30s ] -# Authentication information used to authenticate to the consul server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` Note that the IP number and port used to scrape the targets is assembled as @@ -896,77 +824,20 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_digitalocean_vpc`: the id of the droplet's VPC ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. +# The port to scrape metrics from. +[ port: | default = 80 ] -# Optional HTTP basic authentication information, not currently supported by DigitalOcean. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] +# The time after which the droplets are refreshed. +[ refresh_interval: | default = 60s ] -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] +``` -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] +### `` -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - -# The port to scrape metrics from. -[ port: | default = 80 ] - -# The time after which the droplets are refreshed. -[ refresh_interval: | default = 60s ] -``` - -### `` - -Docker SD configurations allow retrieving scrape targets from [Docker Engine](https://docs.docker.com/engine/) hosts. +Docker SD configurations allow retrieving scrape targets from [Docker Engine](https://docs.docker.com/engine/) hosts. This SD discovers "containers" and will create a target for each network IP and port the container is configured to expose. @@ -993,34 +864,6 @@ See below for the configuration options for Docker discovery: # Address of the Docker daemon. host: -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from, when `role` is nodes, and for discovered # tasks and services that don't have published ports. [ port: | default = 80 ] @@ -1044,39 +887,9 @@ tls_config: # The time after which the containers are refreshed. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1185,34 +998,6 @@ See below for the configuration options for Docker Swarm discovery: # Address of the Docker daemon. host: -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - # Role of the targets to retrieve. Must be `services`, `tasks`, or `nodes`. role: @@ -1233,39 +1018,9 @@ role: # The time after which the service discovery data is refreshed. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1380,65 +1135,9 @@ filters: [ - name: values: , [...] ] -# Authentication information used to authenticate to the EC2 API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not supported by AWS. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by AWS. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutuall exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by AWS. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1667,63 +1366,9 @@ query: # The port to scrape metrics from. [ port: | default = 80 ] -# TLS configuration to connect to the PuppetDB. -tls_config: - [ ] - -# basic_auth, authorization, and oauth2, are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# `Authorization` HTTP header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [this example Prometheus configuration file](/documentation/examples/prometheus-puppetdb.yml) @@ -1904,80 +1549,21 @@ The labels below are only available for targets with `role` set to `robot`: # One of robot or hcloud. role: -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. +# The port to scrape metrics from. +[ port: | default = 80 ] -# Optional HTTP basic authentication information, required when role is robot -# Role hcloud does not support basic auth. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] +# The time after which the servers are refreshed. +[ refresh_interval: | default = 60s ] -# Optional `Authorization` header configuration, required when role is -# hcloud. Role robot does not support bearer token authentication. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] +``` -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] +### `` -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - -# The port to scrape metrics from. -[ port: | default = 80 ] - -# The time after which the servers are refreshed. -[ refresh_interval: | default = 60s ] -``` - -### `` - -HTTP-based service discovery provides a more generic way to configure static targets -and serves as an interface to plug in custom service discovery mechanisms. +HTTP-based service discovery provides a more generic way to configure static targets +and serves as an interface to plug in custom service discovery mechanisms. It fetches targets from an HTTP endpoint containing a list of zero or more ``s. The target must reply with an HTTP 200 response. @@ -2013,65 +1599,9 @@ url: # Refresh interval to re-query the endpoint. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the API server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2105,74 +1635,15 @@ following meta labels are available on all targets during # The unique ID of the data center. datacenter_id: -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, required when using IONOS -# Cloud username and password as authentication method. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, required when using IONOS -# Cloud token as authentication method. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the servers are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2290,6 +1761,8 @@ The `endpointslice` role discovers targets from existing endpointslices. For eac address referenced in the endpointslice object one target is discovered. If the endpoint is backed by a pod, all additional container ports of the pod, not bound to an endpoint port, are discovered as targets as well. +The role requires the `discovery.k8s.io/v1` API version (available since Kubernetes v1.21). + Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the endpoints object. @@ -2310,7 +1783,7 @@ Available meta labels: * `__meta_kubernetes_endpointslice_endpoint_topology_present_kubernetes_io_hostname`: Flag that shows if the referenced object has a kubernetes.io/hostname annotation. * `__meta_kubernetes_endpointslice_endpoint_hostname`: Hostname of the referenced endpoint. * `__meta_kubernetes_endpointslice_endpoint_node_name`: Name of the Node hosting the referenced endpoint. - * `__meta_kubernetes_endpointslice_endpoint_zone`: Zone the referenced endpoint exists in (only available when using the `discovery.k8s.io/v1` API group). + * `__meta_kubernetes_endpointslice_endpoint_zone`: Zone the referenced endpoint exists in. * `__meta_kubernetes_endpointslice_port`: Port of the referenced endpoint. * `__meta_kubernetes_endpointslice_port_name`: Named port of the referenced endpoint. * `__meta_kubernetes_endpointslice_port_protocol`: Protocol of the referenced endpoint. @@ -2323,6 +1796,8 @@ The `ingress` role discovers a target for each path of each ingress. This is generally useful for blackbox monitoring of an ingress. The address will be set to the host specified in the ingress spec. +The role requires the `networking.k8s.io/v1` API version (available since Kubernetes v1.19). + Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the ingress object. @@ -2354,66 +1829,6 @@ role: # Note that api_server and kube_config are mutually exclusive. [ kubeconfig_file: ] -# Optional authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # Optional namespace discovery. If omitted, all namespaces are used. namespaces: own_namespace: @@ -2443,6 +1858,10 @@ attach_metadata: # Attaches node metadata to discovered targets. Valid for roles: pod, endpoints, endpointslice. # When set to true, Prometheus must have permissions to get Nodes. [ node: | default = false ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [this example Prometheus configuration file](/documentation/examples/prometheus-kubernetes.yml) @@ -2483,66 +1902,9 @@ server: # The time after which the monitoring assignments are refreshed. [ fetch_timeout: | default = 2m ] -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional the `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful way @@ -2584,77 +1946,21 @@ See below for the configuration options for Lightsail discovery: [ access_key: ] [ secret_key: ] # Named AWS profile used to connect to the API. -[ profile: ] - -# AWS Role ARN, an alternative to using AWS API keys. -[ role_arn: ] - -# Refresh interval to re-read the instance list. -[ refresh_interval: | default = 60s ] - -# The port to scrape metrics from. If using the public IP address, this must -# instead be specified in the relabeling rule. -[ port: | default = 80 ] - -# Authentication information used to authenticate to the Lightsail API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not supported by AWS. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by AWS. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutuall exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by AWS. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] +[ profile: ] -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] +# AWS Role ARN, an alternative to using AWS API keys. +[ role_arn: ] -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# Refresh interval to re-read the instance list. +[ refresh_interval: | default = 60s ] -# TLS configuration. -tls_config: - [ ] +# The port to scrape metrics from. If using the public IP address, this must +# instead be specified in the relabeling rule. +[ port: | default = 80 ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2665,6 +1971,8 @@ This service discovery uses the public IPv4 address by default, by that can be changed with relabeling, as demonstrated in [the Prometheus linode-sd configuration file](/documentation/examples/prometheus-linode.yml). +Linode APIv4 Token must be created with scopes: `linodes:read_only`, `ips:read_only`, and `events:read_only`. + The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_linode_instance_id`: the id of the linode instance @@ -2692,71 +2000,10 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_linode_ipv6_ranges`: a list of IPv6 ranges with mask assigned to the linode instance joined by the tag separator ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. -# Note: Linode APIv4 Token must be created with scopes: 'linodes:read_only', 'ips:read_only', and 'events:read_only' - -# Optional HTTP basic authentication information, not currently supported by Linode APIv4. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional the `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] # Optional region to filter on. [ region: ] -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] @@ -2765,6 +2012,10 @@ tls_config: # The time after which the linode instances are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2805,67 +2056,9 @@ servers: # It is mutually exclusive with `auth_token` and other authentication mechanisms. [ auth_token_file: ] -# Sets the `Authorization` header on every request with the -# configured username and password. -# This is mutually exclusive with other authentication mechanisms. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -# NOTE: The current version of DC/OS marathon (v1.11.0) does not support -# standard `Authentication` header, use `auth_token` or `auth_token_file` -# instead. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration for connecting to marathon servers -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` By default every app listed in Marathon will be scraped by Prometheus. If not all @@ -2927,65 +2120,9 @@ The following meta labels are available on targets during [relabeling](#relabel_ [ server: ] [ tag_separator: | default = ,] -# Authentication information used to authenticate to the nomad server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -3123,66 +2260,12 @@ See below for the configuration options for Eureka discovery: # The URL to connect to the Eureka server. server: -# Sets the `Authorization` header on every request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - # Refresh interval to re-read the app instance list. [ refresh_interval: | default = 30s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [the Prometheus eureka-sd configuration file](/documentation/examples/prometheus-eureka.yml) @@ -3273,49 +2356,19 @@ role: # Zone is the availability zone of your targets (e.g. fr-par-1). [ zone: | default = fr-par-1 ] -# NameFilter specify a name filter (works as a LIKE) to apply on the server listing request. -[ name_filter: ] - -# TagsFilter specify a tag filter (a server needs to have all defined tags to be listed) to apply on the server listing request. -tags_filter: -[ - ] - -# Refresh interval to re-read the targets list. -[ refresh_interval: | default = 60s ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] +# NameFilter specify a name filter (works as a LIKE) to apply on the server listing request. +[ name_filter: ] + +# TagsFilter specify a tag filter (a server needs to have all defined tags to be listed) to apply on the server listing request. +tags_filter: +[ - ] + +# Refresh interval to re-read the targets list. +[ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -3355,61 +2408,9 @@ password: # Refresh interval to re-read the managed targets list. [ refresh_interval: | default = 60s ] -# Optional HTTP basic authentication information, currently not supported by Uyuni. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by Uyuni. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by Uyuni. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [the Prometheus uyuni-sd configuration file](/documentation/examples/prometheus-uyuni.yml) @@ -3444,72 +2445,15 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_vultr_instance_allowed_bandwidth_gb` : Monthly bandwidth quota in GB. ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, not currently supported by Vultr. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the instances are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` @@ -3661,25 +2605,6 @@ through the `__alerts_path__` label. # Configures the protocol scheme used for requests. [ scheme: | default = http ] -# Sets the `Authorization` header on every request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - # Optionally configures AWS's Signature Verification 4 signing process to sign requests. # Cannot be set at the same time as basic_auth, authorization, oauth2, azuread or google_iam. # To use the default credentials from the AWS SDK, use `sigv4: {}`. @@ -3699,44 +2624,9 @@ sigv4: # AWS Role ARN, an alternative to using AWS API keys. [ role_arn: ] -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] # List of Azure service discovery configurations. azure_sd_configs: @@ -3907,25 +2797,6 @@ write_relabel_configs: # For the `io.prometheus.write.v2.Request` message, this option is noop (always true). [ send_native_histograms: | default = false ] -# Sets the `Authorization` header on every remote write request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default = Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - # Optionally configures AWS's Signature Verification 4 signing process to # sign requests. Cannot be set at the same time as basic_auth, authorization, oauth2, or azuread. # To use the default credentials from the AWS SDK, use `sigv4: {}`. @@ -3945,11 +2816,6 @@ sigv4: # AWS Role ARN, an alternative to using AWS API keys. [ role_arn: ] -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth, authorization, sigv4, azuread or google_iam. -oauth2: - [ ] - # Optional AzureAD configuration. # Cannot be used at the same time as basic_auth, authorization, oauth2, sigv4 or google_iam. azuread: @@ -3976,43 +2842,9 @@ azuread: # Cannot be used at the same time as basic_auth, authorization, oauth2, sigv4 or azuread. # To use the default credentials from the Google Cloud SDK, use `google_iam: {}`. google_iam: - # Service account key with monitoring write permessions. + # Service account key with monitoring write permissions. credentials_file: -# Configures the remote write request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default = false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default = true ] - # Configures the queue used to write to remote storage. queue_config: # Number of samples to buffer per shard before we block reading of more @@ -4054,6 +2886,11 @@ metadata_config: [ send_interval: | default = 1m ] # Maximum number of samples per send. [ max_samples_per_send: | default = 500] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +# enable_http2 defaults to false for remote-write. +[ ] ``` There is a list of @@ -4088,66 +2925,12 @@ headers: # the local storage should have complete data for. [ read_recent: | default = false ] -# Sets the `Authorization` header on every remote read request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the remote read request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - # Whether to use the external labels as selectors for the remote read endpoint. [ filter_external_labels: | default = true ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` There is a list of @@ -4158,8 +2941,6 @@ with this feature. `tsdb` lets you configure the runtime-reloadable configuration settings of the TSDB. -NOTE: Out-of-order ingestion is an experimental feature, but you do not need any additional flag to enable it. Setting `out_of_order_time_window` to a positive duration enables it. - ```yaml # Configures how old an out-of-order/out-of-bounds sample can be w.r.t. the TSDB max time. # An out-of-order/out-of-bounds sample is ingested into the TSDB as long as the timestamp diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index 9aa226bbc0..9a8e7a70c9 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -89,6 +89,11 @@ name: # Offset the rule evaluation timestamp of this particular group by the specified duration into the past. [ query_offset: | default = global.rule_query_offset ] +# Labels to add or overwrite before storing the result for its rules. +# Labels defined in will override the key if it has a collision. +labels: + [ : ] + rules: [ - ... ] ``` diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 51c2a9b314..4be11ed472 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -11,23 +11,6 @@ Their behaviour can change in future releases which will be communicated via the You can enable them using the `--enable-feature` flag with a comma separated list of features. They may be enabled by default in future versions. -## Expand environment variables in external labels - -`--enable-feature=expand-external-labels` - -Replace `${var}` or `$var` in the [`external_labels`](configuration/configuration.md#configuration-file) -values according to the values of the current environment variables. References -to undefined variables are replaced by the empty string. -The `$` character can be escaped by using `$$`. - -## Remote Write Receiver - -`--enable-feature=remote-write-receiver` - -The remote write receiver allows Prometheus to accept remote write requests from other Prometheus servers. More details can be found [here](storage.md#overview). - -Activating the remote write receiver via a feature flag is deprecated. Use `--web.enable-remote-write-receiver` instead. This feature flag will be ignored in future versions of Prometheus. - ## Exemplars storage `--enable-feature=exemplar-storage` @@ -40,9 +23,8 @@ Exemplar storage is implemented as a fixed size circular buffer that stores exem `--enable-feature=memory-snapshot-on-shutdown` -This takes the snapshot of the chunks that are in memory along with the series information when shutting down and stores -it on disk. This will reduce the startup time since the memory state can be restored with this snapshot and m-mapped -chunks without the need of WAL replay. +This takes a snapshot of the chunks that are in memory along with the series information when shutting down and stores it on disk. This will reduce the startup time since the memory state can now be restored with this snapshot +and m-mapped chunks, while a WAL replay from disk is only needed for the parts of the WAL that are not part of the snapshot. ## Extra scrape metrics @@ -55,30 +37,6 @@ When enabled, for each instance scrape, Prometheus stores a sample in the follow to find out how close they are to reaching the limit with `scrape_samples_post_metric_relabeling / scrape_sample_limit`. Note that `scrape_sample_limit` can be zero if there is no limit configured, which means that the query above can return `+Inf` for targets with no limit (as we divide by zero). If you want to query only for targets that do have a sample limit use this query: `scrape_samples_post_metric_relabeling / (scrape_sample_limit > 0)`. - `scrape_body_size_bytes`. The uncompressed size of the most recent scrape response, if successful. Scrapes failing because `body_size_limit` is exceeded report `-1`, other scrape failures report `0`. -## New service discovery manager - -`--enable-feature=new-service-discovery-manager` - -When enabled, Prometheus uses a new service discovery manager that does not -restart unchanged discoveries upon reloading. This makes reloads faster and reduces -pressure on service discoveries' sources. - -Users are encouraged to test the new service discovery manager and report any -issues upstream. - -In future releases, this new service discovery manager will become the default and -this feature flag will be ignored. - -## Prometheus agent - -`--enable-feature=agent` - -When enabled, Prometheus runs in agent mode. The agent mode is limited to -discovery, scrape and remote write. - -This is useful when you do not need to query the Prometheus data locally, but -only from a central [remote endpoint](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage). - ## Per-step stats `--enable-feature=promql-per-step-stats` @@ -103,15 +61,6 @@ When enabled, the GOMEMLIMIT variable is automatically set to match the Linux co There is also an additional tuning flag, `--auto-gomemlimit.ratio`, which allows controlling how much of the memory is used for Prometheus. The remainder is reserved for memory outside the process. For example, kernel page cache. Page cache is important for Prometheus TSDB query performance. The default is `0.9`, which means 90% of the memory limit will be used for Prometheus. -## No default scrape port - -`--enable-feature=no-default-scrape-port` - -When enabled, the default ports for HTTP (`:80`) or HTTPS (`:443`) will _not_ be added to -the address used to scrape a target (the value of the `__address_` label), contrary to the default behavior. -In addition, if a default HTTP or HTTPS port has already been added either in a static configuration or -by a service discovery mechanism and the respective scheme is specified (`http` or `https`), that port will be removed. - ## Native Histograms `--enable-feature=native-histograms` @@ -134,67 +83,7 @@ those classic histograms that do not come with a corresponding native histogram. However, if a native histogram is present, Prometheus will ignore the corresponding classic histogram, with the notable exception of exemplars, which are always ingested. To keep the classic histograms as well, enable -`scrape_classic_histograms` in the scrape job. - -_Note about the format of `le` and `quantile` label values:_ - -In certain situations, the protobuf parsing changes the number formatting of -the `le` labels of classic histograms and the `quantile` labels of -summaries. Typically, this happens if the scraped target is instrumented with -[client_golang](https://github.com/prometheus/client_golang) provided that -[promhttp.HandlerOpts.EnableOpenMetrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus/promhttp#HandlerOpts) -is set to `false`. In such a case, integer label values are represented in the -text format as such, e.g. `quantile="1"` or `le="2"`. However, the protobuf parsing -changes the representation to float-like (following the OpenMetrics -specification), so the examples above become `quantile="1.0"` and `le="2.0"` after -ingestion into Prometheus, which changes the identity of the metric compared to -what was ingested before via the text format. - -The effect of this change is that alerts, recording rules and dashboards that -directly reference label values as whole numbers such as `le="1"` will stop -working. - -Aggregation by the `le` and `quantile` labels for vectors that contain the old and -new formatting will lead to unexpected results, and range vectors that span the -transition between the different formatting will contain additional series. -The most common use case for both is the quantile calculation via -`histogram_quantile`, e.g. -`histogram_quantile(0.95, sum by (le) (rate(histogram_bucket[10m])))`. -The `histogram_quantile` function already tries to mitigate the effects to some -extent, but there will be inaccuracies, in particular for shorter ranges that -cover only a few samples. - -Ways to deal with this change either globally or on a per metric basis: - -- Fix references to integer `le`, `quantile` label values, but otherwise do -nothing and accept that some queries that span the transition time will produce -inaccurate or unexpected results. -_This is the recommended solution, to get consistently normalized label values._ -Also Prometheus 3.0 is expected to enforce normalization of these label values. -- Use `metric_relabel_config` to retain the old labels when scraping targets. -This should **only** be applied to metrics that currently produce such labels. - - -```yaml - metric_relabel_configs: - - source_labels: - - quantile - target_label: quantile - regex: (\d+)\.0+ - - source_labels: - - le - - __name__ - target_label: le - regex: (\d+)\.0+;.*_bucket -``` - -## OTLP Receiver - -`--enable-feature=otlp-write-receiver` - -The OTLP receiver allows Prometheus to accept [OpenTelemetry](https://opentelemetry.io/) metrics writes. -Prometheus is best used as a Pull based system, and staleness, `up` metric, and other Pull enabled features -won't work when you push OTLP metrics. +`always_scrape_classic_histograms` in the scrape job. ## Experimental PromQL functions @@ -226,6 +115,12 @@ This has the potential to improve rule group evaluation latency and resource uti The number of concurrent rule evaluations can be configured with `--rules.max-concurrent-rule-evals`, which is set to `4` by default. +## Serve old Prometheus UI + +Fall back to serving the old (Prometheus 2.x) web UI instead of the new UI. The new UI that was released as part of Prometheus 3.0 is a complete rewrite and aims to be cleaner, less cluttered, and more modern under the hood. However, it is not fully feature complete and battle-tested yet, so some users may still prefer using the old UI. + +`--enable-feature=old-ui` + ## Metadata WAL Records `--enable-feature=metadata-wal-records` @@ -258,14 +153,6 @@ When enabled, Prometheus will change the way in which the `__name__` label is re This allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the "vector cannot contain metrics with the same labelset" error, which can happen when applying a regex-matcher to the `__name__` label. -## UTF-8 Name Support - -`--enable-feature=utf8-names` - -When enabled, changes the metric and label name validation scheme inside Prometheus to allow the full UTF-8 character set. -By itself, this flag does not enable the request of UTF-8 names via content negotiation. -Users will also have to set `metric_name_validation_scheme` in scrape configs to enable the feature either on the global config or on a per-scrape config basis. - ## Auto Reload Config `--enable-feature=auto-reload-config` diff --git a/docs/migration.md b/docs/migration.md index cb88bbfd6f..43fc43df2a 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -3,198 +3,198 @@ title: Migration sort_rank: 10 --- -# Prometheus 2.0 migration guide +# Prometheus 3.0 migration guide -In line with our [stability promise](https://prometheus.io/blog/2016/07/18/prometheus-1-0-released/#fine-print), -the Prometheus 2.0 release contains a number of backwards incompatible changes. -This document offers guidance on migrating from Prometheus 1.8 to Prometheus 2.0 and newer versions. +In line with our [stability promise](https://prometheus.io/docs/prometheus/latest/stability/), +the Prometheus 3.0 release contains a number of backwards incompatible changes. +This document offers guidance on migrating from Prometheus 2.x to Prometheus 3.0 and newer versions. ## Flags -The format of Prometheus command line flags has changed. Instead of a -single dash, all flags now use a double dash. Common flags (`--config.file`, -`--web.listen-address` and `--web.external-url`) remain but -almost all storage-related flags have been removed. - -Some notable flags which have been removed: - -- `-alertmanager.url` In Prometheus 2.0, the command line flags for configuring - a static Alertmanager URL have been removed. Alertmanager must now be - discovered via service discovery, see [Alertmanager service discovery](#alertmanager-service-discovery). - -- `-log.format` In Prometheus 2.0 logs can only be streamed to standard error. - -- `-query.staleness-delta` has been renamed to `--query.lookback-delta`; Prometheus - 2.0 introduces a new mechanism for handling staleness, see [staleness](querying/basics.md#staleness). - -- `-storage.local.*` Prometheus 2.0 introduces a new storage engine; as such all - flags relating to the old engine have been removed. For information on the - new engine, see [Storage](#storage). +- The following feature flags have been removed and they have been added to the + default behavior of Prometheus v3: + - `promql-at-modifier` + - `promql-negative-offset` + - `remote-write-receiver` + - `new-service-discovery-manager` + - `expand-external-labels` + Environment variable references `${var}` or `$var` in external label values + are replaced according to the values of the current environment variables. + References to undefined variables are replaced by the empty string. + The `$` character can be escaped by using `$$`. + - `no-default-scrape-port` + Prometheus v3 will no longer add ports to scrape targets according to the + specified scheme. Target will now appear in labels as configured. + If you rely on scrape targets like + `https://example.com/metrics` or `http://exmaple.com/metrics` to be + represented as `https://example.com/metrics:443` and + `http://example.com/metrics:80` respectively, add them to your target URLs + - `agent` + Instead use the dedicated `--agent` cli flag. + + Prometheus v3 will log a warning if you continue to pass these to + `--enable-feature`. + +## Configuration + +- The scrape job level configuration option `scrape_classic_histograms` has been + renamed to `always_scrape_classic_histograms`. If you use the + `--enable-feature=native-histograms` feature flag to ingest native histograms + and you also want to ingest classic histograms that an endpoint might expose + along with native histograms, be sure to add this configuration or change your + configuration from the old name. +- The `http_config.enable_http2` in `remote_write` items default has been + changed to `false`. In Prometheus v2 the remote write http client would + default to use http2. In order to parallelize multiple remote write queues + across multiple sockets its preferable to not default to http2. + If you prefer to use http2 for remote write you must now set + `http_config.enable_http2: true` in your `remote_write` configuration section. -- `-storage.remote.*` Prometheus 2.0 has removed the deprecated remote - storage flags, and will fail to start if they are supplied. To write to - InfluxDB, Graphite, or OpenTSDB use the relevant storage adapter. - -## Alertmanager service discovery - -Alertmanager service discovery was introduced in Prometheus 1.4, allowing Prometheus -to dynamically discover Alertmanager replicas using the same mechanism as scrape -targets. In Prometheus 2.0, the command line flags for static Alertmanager config -have been removed, so the following command line flag: - -``` -./prometheus -alertmanager.url=http://alertmanager:9093/ -``` +## PromQL -Would be replaced with the following in the `prometheus.yml` config file: +- The `.` pattern in regular expressions in PromQL matches newline characters. + With this change a regular expressions like `.*` matches strings that include + `\n`. This applies to matchers in queries and relabel configs. For example the + following regular expressions now match the accompanying strings, wheras in + Prometheus v2 these combinations didn't match. + +| Regex | Additional matches | +| ----- | ------ | +| ".*" | "foo\n", "Foo\nBar" | +| "foo.?bar" | "foo\nbar" | +| "foo.+bar" | "foo\nbar" | + + If you want Prometheus v3 to behave like v2 did, you will have to change your + regular expressions by replacing all `.` patterns with `[^\n]`, e.g. + `foo[^\n]*`. +- Lookback and range selectors are left open and right closed (previously left + closed and right closed). This change affects queries when the evaluation time + perfectly aligns with the sample timestamps. For example assume querying a + timeseries with even spaced samples exactly 1 minute apart. Before Prometheus + 3.x, range query with `5m` will mostly return 5 samples. But if the query + evaluation aligns perfectly with a scrape, it would return 6 samples. In + Prometheus 3.x queries like this will always return 5 samples. + This change has likely few effects for everyday use, except for some sub query + use cases. + Query front-ends that align queries usually align sub-queries to multiples of + the step size. These sub queries will likely be affected. + Tests are more likely to affected. To fix those either adjust the expected + number of samples or extend to range by less then one sample interval. +- The `holt_winters` function has been renamed to `double_exponential_smoothing` + and is now guarded by the `promql-experimental-functions` feature flag. + If you want to keep using holt_winters, you have to do both of these things: + - Rename holt_winters to double_exponential_smoothing in your queries. + - Pass `--enable-feature=promql-experimental-functions` in your Prometheus + cli invocation.. + +## Scrape protocols +Prometheus v3 is more strict concerning the Content-Type header received when +scraping. Prometheus v2 would default to the standard Prometheus text protocol +if the target being scraped did not specify a Content-Type header or if the +header was unparsable or unrecognised. This could lead to incorrect data being +parsed in the scrape. Prometheus v3 will now fail the scrape in such cases. + +If a scrape target is not providing the correct Content-Type header the +fallback protocol can be specified using the fallback_scrape_protocol +parameter. See [Prometheus scrape_config documentation.](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) + +This is a breaking change as scrapes that may have succeeded with Prometheus v2 +may now fail if this fallback protocol is not specified. -```yaml -alerting: - alertmanagers: - - static_configs: - - targets: - - alertmanager:9093 -``` +## Miscellaneous -You can also use all the usual Prometheus service discovery integrations and -relabeling in your Alertmanager configuration. This snippet instructs -Prometheus to search for Kubernetes pods, in the `default` namespace, with the -label `name: alertmanager` and with a non-empty port. +### TSDB format and downgrade +The TSDB format has been changed in Prometheus v2.55 in preparation for changes +to the index format. Consequently a Prometheus v3 tsdb can only be read by a +Prometheus v2.55 or newer. +Before upgrading to Prometheus v3 please upgrade to v2.55 first and confirm +Prometheus works as expected. Only then continue with the upgrade to v3. -```yaml -alerting: - alertmanagers: - - kubernetes_sd_configs: - - role: pod - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_name] - regex: alertmanager - action: keep - - source_labels: [__meta_kubernetes_namespace] - regex: default - action: keep - - source_labels: [__meta_kubernetes_pod_container_port_number] - regex: - action: drop -``` +### TSDB Storage contract +TSDB compatible storage is now expected to return results matching the specified +selectors. This might impact some third party implementations, most likely +implementing `remote_read`. +This contract is not explicitly enforced, but can cause undefined behavior. -## Recording rules and alerts +### UTF-8 names +Prometheus v3 supports UTF-8 in metric and label names. This means metric and +label names can change after upgrading according to what is exposed by +endpoints. Furthermore, metric and label names that would have previously been +flagged as invalid no longer will be. -The format for configuring alerting and recording rules has been changed to YAML. -An example of a recording rule and alert in the old format: +Users wishing to preserve the original validation behavior can update their +prometheus yaml configuration to specify the legacy validation scheme: ``` -job:request_duration_seconds:histogram_quantile99 = - histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - -ALERT FrontendRequestLatency - IF job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - FOR 5m - ANNOTATIONS { - summary = "High frontend request latency", - } -``` - -Would look like this: - -```yaml -groups: -- name: example.rules - rules: - - record: job:request_duration_seconds:histogram_quantile99 - expr: histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - - alert: FrontendRequestLatency - expr: job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - for: 5m - annotations: - summary: High frontend request latency +global: + metric_name_validation_scheme: legacy ``` -To help with the change, the `promtool` tool has a mode to automate the rules conversion. Given a `.rules` file, it will output a `.rules.yml` file in the -new format. For example: +Or on a per-scrape basis: ``` -$ promtool update rules example.rules +scrape_configs: + - job_name: job1 + metric_name_validation_scheme: utf8 + - job_name: job2 + metric_name_validation_scheme: legacy ``` -You will need to use `promtool` from [Prometheus 2.5](https://github.com/prometheus/prometheus/releases/tag/v2.5.0) as later versions no longer contain the above subcommand. - -## Storage - -The data format in Prometheus 2.0 has completely changed and is not backwards -compatible with 1.8 and older versions. To retain access to your historic monitoring data we -recommend you run a non-scraping Prometheus instance running at least version -1.8.1 in parallel with your Prometheus 2.0 instance, and have the new server -read existing data from the old one via the remote read protocol. - -Your Prometheus 1.8 instance should be started with the following flags and an -config file containing only the `external_labels` setting (if any): - +### Log message format +Prometheus v3 has adopted `log/slog` over the previous `go-kit/log`. This +results in a change of log message format. An example of the old log format is: ``` -$ ./prometheus-1.8.1.linux-amd64/prometheus -web.listen-address ":9094" -config.file old.yml +ts=2024-10-23T22:01:06.074Z caller=main.go:627 level=info msg="No time or size retention was set so using the default time retention" duration=15d +ts=2024-10-23T22:01:06.074Z caller=main.go:671 level=info msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=91d80252c3e528728b0f88d254dd720f6be07cb8-modified)" +ts=2024-10-23T22:01:06.074Z caller=main.go:676 level=info build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" +ts=2024-10-23T22:01:06.074Z caller=main.go:677 level=info host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" ``` -Prometheus 2.0 can then be started (on the same machine) with the following flags: - +a similar sequence in the new log format looks like this: ``` -$ ./prometheus-2.0.0.linux-amd64/prometheus --config.file prometheus.yml +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:640 msg="No time or size retention was set so using the default time retention" duration=15d +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:681 msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=7c7116fea8343795cae6da42960cacd0207a2af8)" +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:686 msg="operational information" build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" fd_limits="(soft=1048576, hard=1048576)" vm_limits="(soft=unlimited, hard=unlimited)" ``` -Where `prometheus.yml` contains in addition to your full existing configuration, the stanza: - -```yaml -remote_read: - - url: "http://localhost:9094/api/v1/read" -``` - -## PromQL - -The following features have been removed from PromQL: +### `le` and `quantile` label values +In Prometheus v3, the values of the `le` label of classic histograms and the +`quantile` label of summaries are normalized upon ingestions. In Prometheus v2 +the value of these labels depended on the scrape protocol (protobuf vs text +format) in some situations. This led to label values changing based on the +scrape protocol. E.g. a metric exposed as `my_classic_hist{le="1"}` would be +ingested as `my_classic_hist{le="1"}` via the text format, but as +`my_classic_hist{le="1.0"}` via protobuf. This changed the identity of the +metric and caused problems when querying the metric. +In Prometheus v3 these label values will always be normalized to a float like +representation. I.e. the above example will always result in +`my_classic_hist{le="1.0"}` being ingested into prometheus, no matter via which +protocol. The effect of this change is that alerts, recording rules and +dashboards that directly reference label values as whole numbers such as +`le="1"` will stop working. -- `drop_common_labels` function - the `without` aggregation modifier should be used - instead. -- `keep_common` aggregation modifier - the `by` modifier should be used instead. -- `count_scalar` function - use cases are better handled by `absent()` or correct - propagation of labels in operations. +Ways to deal with this change either globally or on a per metric basis: -See [issue #3060](https://github.com/prometheus/prometheus/issues/3060) for more -details. - -## Miscellaneous - -### Prometheus non-root user - -The Prometheus Docker image is now built to [run Prometheus -as a non-root user](https://github.com/prometheus/prometheus/pull/2859). If you -want the Prometheus UI/API to listen on a low port number (say, port 80), you'll -need to override it. For Kubernetes, you would use the following YAML: +- Fix references to integer `le`, `quantile` label values, but otherwise do +nothing and accept that some queries that span the transition time will produce +inaccurate or unexpected results. +_This is the recommended solution._ +- Use `metric_relabel_config` to retain the old labels when scraping targets. +This should **only** be applied to metrics that currently produce such labels. ```yaml -apiVersion: v1 -kind: Pod -metadata: - name: security-context-demo-2 -spec: - securityContext: - runAsUser: 0 -... -``` - -See [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) -for more details. - -If you're using Docker, then the following snippet would be used: - -``` -docker run -p 9090:9090 prom/prometheus:latest + metric_relabel_configs: + - source_labels: + - quantile + target_label: quantile + regex: (\d+)\.0+ + - source_labels: + - le + - __name__ + target_label: le + regex: (\d+)\.0+;.*_bucket ``` -### Prometheus lifecycle +# Prometheus 2.0 migration guide -If you use the Prometheus `/-/reload` HTTP endpoint to [automatically reload your -Prometheus config when it changes](configuration/configuration.md), -these endpoints are disabled by default for security reasons in Prometheus 2.0. -To enable them, set the `--web.enable-lifecycle` flag. +For the Prometheus 1.8 to 2.0 please refer to the [Prometheus v2.55 documentation](https://prometheus.io/docs/prometheus/2.55/migration/). diff --git a/docs/querying/api.md b/docs/querying/api.md index efa244fbc8..0352496f18 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -59,7 +59,7 @@ timestamps are always represented as Unix timestamps in seconds. * ``: Prometheus [time series selectors](basics.md#time-series-selectors) like `http_requests_total` or `http_requests_total{method=~"(GET|POST)"}` and need to be URL-encoded. -* ``: [Prometheus duration strings](basics.md#time-durations). +* ``: [the subset of Prometheus float literals using time units](basics.md#float-literals-and-time-durations). For example, `5m` refers to a duration of 5 minutes. * ``: boolean values (strings `true` and `false`). @@ -239,6 +239,75 @@ $ curl 'http://localhost:9090/api/v1/format_query?query=foo/bar' } ``` +## Parsing a PromQL expressions into a abstract syntax tree (AST) + +This endpoint is **experimental** and might change in the future. It is currently only meant to be used by Prometheus' own web UI, and the endpoint name and exact format returned may change from one Prometheus version to another. It may also be removed again in case it is no longer needed by the UI. + +The following endpoint parses a PromQL expression and returns it as a JSON-formatted AST (abstract syntax tree) representation: + +``` +GET /api/v1/parse_query +POST /api/v1/parse_query +``` + +URL query parameters: + +- `query=`: Prometheus expression query string. + +You can URL-encode these parameters directly in the request body by using the `POST` method and +`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large +query that may breach server-side URL character limits. + +The `data` section of the query result is a string containing the AST of the parsed query expression. + +The following example parses the expression `foo/bar`: + +```json +$ curl 'http://localhost:9090/api/v1/parse_query?query=foo/bar' +{ + "data" : { + "bool" : false, + "lhs" : { + "matchers" : [ + { + "name" : "__name__", + "type" : "=", + "value" : "foo" + } + ], + "name" : "foo", + "offset" : 0, + "startOrEnd" : null, + "timestamp" : null, + "type" : "vectorSelector" + }, + "matching" : { + "card" : "one-to-one", + "include" : [], + "labels" : [], + "on" : false + }, + "op" : "/", + "rhs" : { + "matchers" : [ + { + "name" : "__name__", + "type" : "=", + "value" : "bar" + } + ], + "name" : "bar", + "offset" : 0, + "startOrEnd" : null, + "timestamp" : null, + "type" : "vectorSelector" + }, + "type" : "binaryExpr" + }, + "status" : "success" +} +``` + ## Querying metadata Prometheus offers a set of API endpoints to query metadata about series and their labels. @@ -499,7 +568,7 @@ Instant vectors are returned as result type `vector`. The corresponding Each series could have the `"value"` key, or the `"histogram"` key, but not both. Series are not guaranteed to be returned in any particular order unless a function -such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label)` +such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label) is used. ### Scalars @@ -693,8 +762,10 @@ URL query parameters: - `rule_name[]=`: only return rules with the given rule name. If the parameter is repeated, rules with any of the provided names are returned. If we've filtered out all the rules of a group, the group is not returned. When the parameter is absent or empty, no filtering is done. - `rule_group[]=`: only return rules with the given rule group name. If the parameter is repeated, rules with any of the provided rule group names are returned. When the parameter is absent or empty, no filtering is done. - `file[]=`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done. -- `exclude_alerts=`: only return rules, do not return active alerts. +- `exclude_alerts=`: only return rules, do not return active alerts. - `match[]=`: only return rules that have configured labels that satisfy the label selectors. If the parameter is repeated, rules that match any of the sets of label selectors are returned. Note that matching is on the labels in the definition of each rule, not on the values after template expansion (for alerting rules). Optional. +- `group_limit=`: The `group_limit` parameter allows you to specify a limit for the number of rule groups that is returned in a single response. If the total number of rule groups exceeds the specified `group_limit` value, the response will include a `groupNextToken` property. You can use the value of this `groupNextToken` property in subsequent requests in the `group_next_token` parameter to paginate over the remaining rule groups. The `groupNextToken` property will not be present in the final response, indicating that you have retrieved all the available rule groups. Please note that there are no guarantees regarding the consistency of the response if the rule groups are being modified during the pagination process. +- `group_next_token`: the pagination token that was returned in previous request when the `group_limit` property is set. The pagination token is used to iteratively paginate over a large number of rule groups. To use the `group_next_token` parameter, the `group_limit` parameter also need to be present. If a rule group that coincides with the next token is removed while you are paginating over the rule groups, a response with status code 400 will be returned. ```json $ curl http://localhost:9090/api/v1/rules @@ -834,7 +905,7 @@ curl -G http://localhost:9091/api/v1/targets/metadata \ ``` The following example returns metadata for all metrics for all targets with -label `instance="127.0.0.1:9090`. +label `instance="127.0.0.1:9090"`. ```json curl -G http://localhost:9091/api/v1/targets/metadata \ @@ -1119,9 +1190,11 @@ The following endpoint returns various cardinality statistics about the Promethe GET /api/v1/status/tsdb ``` URL query parameters: + - `limit=`: Limit the number of returned items to a given number for each set of statistics. By default, 10 items are returned. -The `data` section of the query result consists of +The `data` section of the query result consists of: + - **headStats**: This provides the following data about the head block of the TSDB: - **numSeries**: The number of series. - **chunkCount**: The number of chunks. @@ -1197,13 +1270,13 @@ The following endpoint returns information about the WAL replay: GET /api/v1/status/walreplay ``` -**read**: The number of segments replayed so far. -**total**: The total number segments needed to be replayed. -**progress**: The progress of the replay (0 - 100%). -**state**: The state of the replay. Possible states: -- **waiting**: Waiting for the replay to start. -- **in progress**: The replay is in progress. -- **done**: The replay has finished. +- **read**: The number of segments replayed so far. +- **total**: The total number segments needed to be replayed. +- **progress**: The progress of the replay (0 - 100%). +- **state**: The state of the replay. Possible states: + - **waiting**: Waiting for the replay to start. + - **in progress**: The replay is in progress. + - **done**: The replay has finished. ```json $ curl http://localhost:9090/api/v1/status/walreplay @@ -1319,8 +1392,74 @@ is not considered an efficient way of ingesting samples. Use it with caution for specific low-volume use cases. It is not suitable for replacing the ingestion via scraping. -Enable the OTLP receiver by the feature flag -`--enable-feature=otlp-write-receiver`. When enabled, the OTLP receiver +Enable the OTLP receiver by setting +`--web.enable-otlp-receiver`. When enabled, the OTLP receiver endpoint is `/api/v1/otlp/v1/metrics`. *New in v2.47* + +## Notifications + +The following endpoints provide information about active status notifications concerning the Prometheus server itself. +Notifications are used in the web UI. + +These endpoints are **experimental**. They may change in the future. + +### Active Notifications + +The `/api/v1/notifications` endpoint returns a list of all currently active notifications. + +``` +GET /api/v1/notifications +``` + +Example: + +``` +$ curl http://localhost:9090/api/v1/notifications +{ + "status": "success", + "data": [ + { + "text": "Prometheus is shutting down and gracefully stopping all operations.", + "date": "2024-10-07T12:33:08.551376578+02:00", + "active": true + } + ] +} +``` + +*New in v3.0* + +### Live Notifications + +The `/api/v1/notifications/live` endpoint streams live notifications as they occur, using [Server-Sent Events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events). Deleted notifications are sent with `active: false`. Active notifications will be sent when connecting to the endpoint. + +``` +GET /api/v1/notifications/live +``` + +Example: + +``` +$ curl http://localhost:9090/api/v1/notifications/live +data: { + "status": "success", + "data": [ + { + "text": "Prometheus is shutting down and gracefully stopping all operations.", + "date": "2024-10-07T12:33:08.551376578+02:00", + "active": true + } + ] +} +``` + +**Note:** The `/notifications/live` endpoint will return a `204 No Content` response if the maximum number of subscribers has been reached. You can set the maximum number of listeners with the flag `--web.max-notifications-subscribers`, which defaults to 16. + +``` +GET /api/v1/notifications/live +204 No Content +``` + +*New in v3.0* diff --git a/docs/querying/basics.md b/docs/querying/basics.md index 81ffb4e0f3..1c06afb85d 100644 --- a/docs/querying/basics.md +++ b/docs/querying/basics.md @@ -35,8 +35,9 @@ evaluate to one of four types: Depending on the use-case (e.g. when graphing vs. displaying the output of an expression), only some of these types are legal as the result of a -user-specified expression. For example, an expression that returns an instant -vector is the only type which can be graphed. +user-specified expression. +For [instant queries](api.md#instant-queries), any of the above data types are allowed as the root of the expression. +[Range queries](api.md/#range-queries) only support scalar-typed and instant-vector-typed expressions. _Notes about the experimental native histograms:_ @@ -68,9 +69,10 @@ Example: 'these are unescaped: \n \\ \t' `these are not unescaped: \n ' " \t` -### Float literals +### Float literals and time durations -Scalar float values can be written as literal integer or floating-point numbers in the format (whitespace only included for better readability): +Scalar float values can be written as literal integer or floating-point numbers +in the format (whitespace only included for better readability): [-+]?( [0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? @@ -87,16 +89,53 @@ Examples: 0x8f -Inf NaN - -As of version 2.54, float literals can also be represented using the syntax of time durations, where the time duration is converted into a float value corresponding to the number of seconds the time duration represents. This is an experimental feature and might still change. +Additionally, underscores (`_`) can be used in between decimal or hexadecimal +digits to improve readability. Examples: - 1s # Equivalent to 1.0 - 2m # Equivalent to 120.0 - 1ms # Equivalent to 0.001 - + 1_000_000 + .123_456_789 + 0x_53_AB_F3_82 + +Float literals are also used to specify durations in seconds. For convenience, +decimal integer numbers may be combined with the following +time units: + +* `ms` – milliseconds +* `s` – seconds – 1s equals 1000ms +* `m` – minutes – 1m equals 60s (ignoring leap seconds) +* `h` – hours – 1h equals 60m +* `d` – days – 1d equals 24h (ignoring so-called daylight saving time) +* `w` – weeks – 1w equals 7d +* `y` – years – 1y equals 365d (ignoring leap days) + +Suffixing a decimal integer number with one of the units above is a different +representation of the equivalent number of seconds as a bare float literal. + +Examples: + + 1s # Equivalent to 1. + 2m # Equivalent to 120. + 1ms # Equivalent to 0.001. + -2h # Equivalent to -7200. + +The following examples do _not_ work: + + 0xABm # No suffixing of hexadecimal numbers. + 1.5h # Time units cannot be combined with a floating point. + +Infd # No suffixing of ±Inf or NaN. + +Multiple units can be combined by concatenation of suffixed integers. Units +must be ordered from the longest to the shortest. A given unit must only appear +once per float literal. + +Examples: + + 1h30m # Equivalent to 5400s and thus 5400. + 12h34m56s # Equivalent to 45296s and thus 45296. + 54s321ms # Equivalent to 54.321. ## Time series selectors @@ -109,8 +148,16 @@ single sample value for each at a given timestamp (point in time). In the simpl form, only a metric name is specified, which results in an instant vector containing elements for all time series that have this metric name. +The value returned will be that of the most recent sample at or before the +query's evaluation timestamp (in the case of an +[instant query](api.md#instant-queries)) +or the current step within the query (in the case of a +[range query](api.md/#range-queries)). +The [`@` modifier](#modifier) allows overriding the timestamp relative to which +the selection takes place. Time series are only returned if their most recent sample is less than the [lookback period](#staleness) ago. + This example selects all time series that have the `http_requests_total` metric -name: +name, returning the most recent sample for each: http_requests_total @@ -200,53 +247,22 @@ syntax](https://github.com/google/re2/wiki/Syntax). ### Range Vector Selectors Range vector literals work like instant vector literals, except that they -select a range of samples back from the current instant. Syntactically, a [time -duration](#time-durations) is appended in square brackets (`[]`) at the end of -a vector selector to specify how far back in time values should be fetched for -each resulting range vector element. The range is a closed interval, -i.e. samples with timestamps coinciding with either boundary of the range are -still included in the selection. - -In this example, we select all the values we have recorded within the last 5 -minutes for all time series that have the metric name `http_requests_total` and -a `job` label set to `prometheus`: +select a range of samples back from the current instant. Syntactically, a +[float literal](#float-literals-and-time-durations) is appended in square +brackets (`[]`) at the end of a vector selector to specify for how many seconds +back in time values should be fetched for each resulting range vector element. +Commonly, the float literal uses the syntax with one or more time units, e.g. +`[5m]`. The range is a left-open and right-closed interval, i.e. samples with +timestamps coinciding with the left boundary of the range are excluded from the +selection, while samples coinciding with the right boundary of the range are +included in the selection. + +In this example, we select all the values recorded less than 5m ago for all +time series that have the metric name `http_requests_total` and a `job` label +set to `prometheus`: http_requests_total{job="prometheus"}[5m] -### Time Durations - -Time durations are specified as a number, followed immediately by one of the -following units: - -* `ms` - milliseconds -* `s` - seconds -* `m` - minutes -* `h` - hours -* `d` - days - assuming a day always has 24h -* `w` - weeks - assuming a week always has 7d -* `y` - years - assuming a year always has 365d1 - -1 For days in a year, the leap day is ignored, and conversely, for a minute, a leap second is ignored. - -Time durations can be combined by concatenation. Units must be ordered from the -longest to the shortest. A given unit must only appear once in a time duration. - -Here are some examples of valid time durations: - - 5h - 1h30m - 5m - 10s - - -As of version 2.54, time durations can also be represented using the syntax of float literals, implying the number of seconds of the time duration. This is an experimental feature and might still change. - -Examples: - - 1.0 # Equivalent to 1s - 0.001 # Equivalent to 1ms - 120 # Equivalent to 2m - ### Offset modifier The `offset` modifier allows changing the time offset for individual @@ -329,7 +345,7 @@ Note that the `@` modifier allows a query to look ahead of its evaluation time. Subquery allows you to run an instant query for a given range and resolution. The result of a subquery is a range vector. -Syntax: ` '[' ':' [] ']' [ @ ] [ offset ]` +Syntax: ` '[' ':' [] ']' [ @ ] [ offset ]` * `` is optional. Default is the global evaluation interval. @@ -358,8 +374,9 @@ independently of the actual present time series data. This is mainly to support cases like aggregation (`sum`, `avg`, and so on), where multiple aggregated time series do not precisely align in time. Because of their independence, Prometheus needs to assign a value at those timestamps for each relevant time -series. It does so by taking the newest sample before this timestamp within the lookback period. -The lookback period is 5 minutes by default. +series. It does so by taking the newest sample that is less than the lookback period ago. +The lookback period is 5 minutes by default, but can be +[set with the `--query.lookback-delta` flag](../command-line/prometheus.md) If a target scrape or rule evaluation no longer returns a sample for a time series that was previously present, this time series will be marked as stale. diff --git a/docs/querying/functions.md b/docs/querying/functions.md index e13628c5c5..310b7b9337 100644 --- a/docs/querying/functions.md +++ b/docs/querying/functions.md @@ -326,45 +326,70 @@ With native histograms, aggregating everything works as usual without any `by` c histogram_quantile(0.9, sum(rate(http_request_duration_seconds[10m]))) -The `histogram_quantile()` function interpolates quantile values by -assuming a linear distribution within a bucket. +In the (common) case that a quantile value does not coincide with a bucket +boundary, the `histogram_quantile()` function interpolates the quantile value +within the bucket the quantile value falls into. For classic histograms, for +native histograms with custom bucket boundaries, and for the zero bucket of +other native histograms, it assumes a uniform distribution of observations +within the bucket (also called _linear interpolation_). For the +non-zero-buckets of native histograms with a standard exponential bucketing +schema, the interpolation is done under the assumption that the samples within +the bucket are distributed in a way that they would uniformly populate the +buckets in a hypothetical histogram with higher resolution. (This is also +called _exponential interpolation_.) If `b` has 0 observations, `NaN` is returned. For φ < 0, `-Inf` is returned. For φ > 1, `+Inf` is returned. For φ = `NaN`, `NaN` is returned. -The following is only relevant for classic histograms: If `b` contains -fewer than two buckets, `NaN` is returned. The highest bucket must have an -upper bound of `+Inf`. (Otherwise, `NaN` is returned.) If a quantile is located -in the highest bucket, the upper bound of the second highest bucket is -returned. A lower limit of the lowest bucket is assumed to be 0 if the upper -bound of that bucket is greater than -0. In that case, the usual linear interpolation is applied within that -bucket. Otherwise, the upper bound of the lowest bucket is returned for -quantiles located in the lowest bucket. - -You can use `histogram_quantile(0, v instant-vector)` to get the estimated minimum value stored in -a histogram. - -You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in -a histogram. - -Buckets of classic histograms are cumulative. Therefore, the following should always be the case: - -* The counts in the buckets are monotonically increasing (strictly non-decreasing). -* A lack of observations between the upper limits of two consecutive buckets results in equal counts -in those two buckets. - -However, floating point precision issues (e.g. small discrepancies introduced by computing of buckets -with `sum(rate(...))`) or invalid data might violate these assumptions. In that case, -`histogram_quantile` would be unable to return meaningful results. To mitigate the issue, -`histogram_quantile` assumes that tiny relative differences between consecutive buckets are happening -because of floating point precision errors and ignores them. (The threshold to ignore a difference -between two buckets is a trillionth (1e-12) of the sum of both buckets.) Furthermore, if there are -non-monotonic bucket counts even after this adjustment, they are increased to the value of the -previous buckets to enforce monotonicity. The latter is evidence for an actual issue with the input -data and is therefore flagged with an informational annotation reading `input to histogram_quantile -needed to be fixed for monotonicity`. If you encounter this annotation, you should find and remove -the source of the invalid data. +Special cases for classic histograms: + +* If `b` contains fewer than two buckets, `NaN` is returned. +* The highest bucket must have an upper bound of `+Inf`. (Otherwise, `NaN` is + returned.) +* If a quantile is located in the highest bucket, the upper bound of the second + highest bucket is returned. +* The lower limit of the lowest bucket is assumed to be 0 if the upper bound of + that bucket is greater than 0. In that case, the usual linear interpolation + is applied within that bucket. Otherwise, the upper bound of the lowest + bucket is returned for quantiles located in the lowest bucket. + +Special cases for native histograms (relevant for the exact interpolation +happening within the zero bucket): + +* A zero bucket with finite width is assumed to contain no negative + observations if the histogram has observations in positive buckets, but none + in negative buckets. +* A zero bucket with finite width is assumed to contain no positive + observations if the histogram has observations in negative buckets, but none + in positive buckets. + +You can use `histogram_quantile(0, v instant-vector)` to get the estimated +minimum value stored in a histogram. + +You can use `histogram_quantile(1, v instant-vector)` to get the estimated +maximum value stored in a histogram. + +Buckets of classic histograms are cumulative. Therefore, the following should +always be the case: + +* The counts in the buckets are monotonically increasing (strictly + non-decreasing). +* A lack of observations between the upper limits of two consecutive buckets + results in equal counts in those two buckets. + +However, floating point precision issues (e.g. small discrepancies introduced +by computing of buckets with `sum(rate(...))`) or invalid data might violate +these assumptions. In that case, `histogram_quantile` would be unable to return +meaningful results. To mitigate the issue, `histogram_quantile` assumes that +tiny relative differences between consecutive buckets are happening because of +floating point precision errors and ignores them. (The threshold to ignore a +difference between two buckets is a trillionth (1e-12) of the sum of both +buckets.) Furthermore, if there are non-monotonic bucket counts even after this +adjustment, they are increased to the value of the previous buckets to enforce +monotonicity. The latter is evidence for an actual issue with the input data +and is therefore flagged with an informational annotation reading `input to +histogram_quantile needed to be fixed for monotonicity`. If you encounter this +annotation, you should find and remove the source of the invalid data. ## `histogram_stddev()` and `histogram_stdvar()` @@ -380,15 +405,22 @@ do not show up in the returned vector. Similarly, `histogram_stdvar(v instant-vector)` returns the estimated standard variance of observations in a native histogram. -## `holt_winters()` +## `double_exponential_smoothing()` -`holt_winters(v range-vector, sf scalar, tf scalar)` produces a smoothed value +**This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.** + +`double_exponential_smoothing(v range-vector, sf scalar, tf scalar)` produces a smoothed value for time series based on the range in `v`. The lower the smoothing factor `sf`, the more importance is given to old data. The higher the trend factor `tf`, the more trends in the data is considered. Both `sf` and `tf` must be between 0 and 1. +For additional details, refer to [NIST Engineering Statistics Handbook](https://www.itl.nist.gov/div898/handbook/pmc/section4/pmc433.htm). +In Prometheus V2 this function was called `holt_winters`. This caused confusion +since the Holt-Winters method usually refers to triple exponential smoothing. +Double exponential smoothing as implemented here is also referred to as "Holt +Linear". -`holt_winters` should only be used with gauges. +`double_exponential_smoothing` should only be used with gauges. ## `hour()` @@ -432,6 +464,97 @@ by the number of seconds under the specified time range window, and should be used primarily for human readability. Use `rate` in recording rules so that increases are tracked consistently on a per-second basis. +## `info()` (experimental) + +_The `info` function is an experiment to improve UX +around including labels from [info metrics](https://grafana.com/blog/2021/08/04/how-to-use-promql-joins-for-more-effective-queries-of-prometheus-metrics-at-scale/#info-metrics). +The behavior of this function may change in future versions of Prometheus, +including its removal from PromQL. `info` has to be enabled via the +[feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`._ + +`info(v instant-vector, [data-label-selector instant-vector])` finds, for each time +series in `v`, all info series with matching _identifying_ labels (more on +this later), and adds the union of their _data_ (i.e., non-identifying) labels +to the time series. The second argument `data-label-selector` is optional. +It is not a real instant vector, but uses a subset of its syntax. +It must start and end with curly braces (`{ ... }`) and may only contain label matchers. +The label matchers are used to constrain which info series to consider +and which data labels to add to `v`. + +Identifying labels of an info series are the subset of labels that uniquely +identify the info series. The remaining labels are considered +_data labels_ (also called non-identifying). (Note that Prometheus's concept +of time series identity always includes _all_ the labels. For the sake of the `info` +function, we “logically” define info series identity in a different way than +in the conventional Prometheus view.) The identifying labels of an info series +are used to join it to regular (non-info) series, i.e. those series that have +the same labels as the identifying labels of the info series. The data labels, which are +the ones added to the regular series by the `info` function, effectively encode +metadata key value pairs. (This implies that a change in the data labels +in the conventional Prometheus view constitutes the end of one info series and +the beginning of a new info series, while the “logical” view of the `info` function is +that the same info series continues to exist, just with different “data”.) + +The conventional approach of adding data labels is sometimes called a “join query”, +as illustrated by the following example: + +``` + rate(http_server_request_duration_seconds_count[2m]) +* on (job, instance) group_left (k8s_cluster_name) + target_info +``` + +The core of the query is the expression `rate(http_server_request_duration_seconds_count[2m])`. +But to add data labels from an info metric, the user has to use elaborate +(and not very obvious) syntax to specify which info metric to use (`target_info`), what the +identifying labels are (`on (job, instance)`), and which data labels to add +(`group_left (k8s_cluster_name)`). + +This query is not only verbose and hard to write, it might also run into an “identity crisis”: +If any of the data labels of `target_info` changes, Prometheus sees that as a change of series +(as alluded to above, Prometheus just has no native concept of non-identifying labels). +If the old `target_info` series is not properly marked as stale (which can happen with certain ingestion paths), +the query above will fail for up to 5m (the lookback delta) because it will find a conflicting +match with both the old and the new version of `target_info`. + +The `info` function not only resolves this conflict in favor of the newer series, it also simplifies the syntax +because it knows about the available info series and what their identifying labels are. The example query +looks like this with the `info` function: + +``` +info( + rate(http_server_request_duration_seconds_count[2m]), + {k8s_cluster_name=~".+"} +) +``` + +The common case of adding _all_ data labels can be achieved by +omitting the 2nd argument of the `info` function entirely, simplifying +the example even more: + +``` +info(rate(http_server_request_duration_seconds_count[2m])) +``` + +While `info` normally automatically finds all matching info series, it's possible to +restrict them by providing a `__name__` label matcher, e.g. +`{__name__="target_info"}`. + +### Limitations + +In its current iteration, `info` defaults to considering only info series with +the name `target_info`. It also assumes that the identifying info series labels are +`instance` and `job`. `info` does support other info series names however, through +`__name__` label matchers. E.g., one can explicitly say to consider both +`target_info` and `build_info` as follows: +`{__name__=~"(target|build)_info"}`. However, the identifying labels always +have to be `instance` and `job`. + +These limitations are partially defeating the purpose of the `info` function. +At the current stage, this is an experiment to find out how useful the approach +turns out to be in practice. A final version of the `info` function will indeed +consider all matching info series and with their appropriate identifying labels. + ## `irate()` `irate(v range-vector)` calculates the per-second instant rate of increase of diff --git a/docs/querying/remote_read_api.md b/docs/querying/remote_read_api.md index efbd08e984..76de112342 100644 --- a/docs/querying/remote_read_api.md +++ b/docs/querying/remote_read_api.md @@ -17,7 +17,8 @@ Request are made to the following endpoint. ### Samples -This returns a message that includes a list of raw samples. +This returns a message that includes a list of raw samples matching the +requested query. ### Streamed Chunks diff --git a/docs/stability.md b/docs/stability.md index 1fd2e51e0c..cb30b8ad99 100644 --- a/docs/stability.md +++ b/docs/stability.md @@ -9,7 +9,7 @@ Prometheus promises API stability within a major version, and strives to avoid breaking changes for key features. Some features, which are cosmetic, still under development, or depend on 3rd party services, are not covered by this. -Things considered stable for 2.x: +Things considered stable for 3.x: * The query language and data model * Alerting and recording rules @@ -18,21 +18,25 @@ Things considered stable for 2.x: * Configuration file format (minus the service discovery remote read/write, see below) * Rule/alert file format * Console template syntax and semantics -* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/). +* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/) and receiving +* Agent mode +* OTLP receiver endpoint -Things considered unstable for 2.x: +Things considered unstable for 3.x: * Any feature listed as experimental or subject to change, including: - * The [`holt_winters` PromQL function](https://github.com/prometheus/prometheus/issues/2458) - * Remote write receiving, remote read and the remote read endpoint + * The [`double_exponential_smoothing` PromQL function](https://github.com/prometheus/prometheus/issues/2458) + * Remote read and the remote read endpoint * Server-side HTTPS and basic authentication -* Service discovery integrations, with the exception of `static_configs` and `file_sd_configs` +* Service discovery integrations, with the exception of `static_configs`, `file_sd_configs` and `http_sd_config` * Go APIs of packages that are part of the server * HTML generated by the web UI * The metrics in the /metrics endpoint of Prometheus itself * Exact on-disk format. Potential changes however, will be forward compatible and transparently handled by Prometheus * The format of the logs +Prometheus 2.x stability guarantees can be found [in the 2.x documentation](https://prometheus.io/docs/prometheus/2.55/stability/). + As long as you are not using any features marked as experimental/unstable, an upgrade within a major version can usually be performed without any operational adjustments and very little risk that anything will break. Any breaking changes diff --git a/docs/storage.md b/docs/storage.md index 55d4309d37..2142c970ff 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -87,10 +87,9 @@ or 31 days, whichever is smaller. Prometheus has several flags that configure local storage. The most important are: - `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. -- `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is - set, it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` - nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. - Supported units: y, w, d, h, m, s, ms. +- `--storage.tsdb.retention.time`: How long to retain samples in storage. If neither + this flag nor `storage.tsdb.retention.size` is set, the retention time defaults to + `15d`. Supported units: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only @@ -98,7 +97,6 @@ Prometheus has several flags that configure local storage. The most important ar chunks are counted in the total size. So the minimum requirement for the disk is the peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head` (m-mapped Head chunks) directory combined (peaks every 2 hours). -- `--storage.tsdb.retention`: Deprecated in favor of `storage.tsdb.retention.time`. - `--storage.tsdb.wal-compression`: Enables compression of the write-ahead log (WAL). Depending on your data, you can expect the WAL size to be halved with little extra cpu load. This flag was introduced in 2.11.0 and enabled by default in 2.20.0. @@ -146,7 +144,7 @@ a buffer, ensuring that older entries will be removed before the allocated stora for Prometheus becomes full. At present, we recommend setting the retention size to, at most, 80-85% of your -allocated Prometheus disk space. This increases the likelihood that older entires +allocated Prometheus disk space. This increases the likelihood that older entries will be removed prior to hitting any disk limitations. ## Remote storage integrations @@ -157,31 +155,27 @@ a set of interfaces that allow integrating with remote storage systems. ### Overview -Prometheus integrates with remote storage systems in three ways: +Prometheus integrates with remote storage systems in four ways: -- Prometheus can write samples that it ingests to a remote URL in a standardized format. -- Prometheus can receive samples from other Prometheus servers in a standardized format. -- Prometheus can read (back) sample data from a remote URL in a standardized format. +- Prometheus can write samples that it ingests to a remote URL in a [Remote Write format](https://prometheus.io/docs/specs/remote_write_spec_2_0/). +- Prometheus can receive samples from other clients in a [Remote Write format](https://prometheus.io/docs/specs/remote_write_spec_2_0/). +- Prometheus can read (back) sample data from a remote URL in a [Remote Read format](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto#L31). +- Prometheus can return sample data requested by clients in a [Remote Read format](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto#L31). ![Remote read and write architecture](images/remote_integrations.png) -The read and write protocols both use a snappy-compressed protocol buffer encoding over -HTTP. The protocols are not considered as stable APIs yet and may change to use gRPC -over HTTP/2 in the future, when all hops between Prometheus and the remote storage can -safely be assumed to support HTTP/2. +The remote read and write protocols both use a snappy-compressed protocol buffer encoding over +HTTP. The read protocol is not yet considered as stable API. -For details on configuring remote storage integrations in Prometheus, see the +The write protocol has a [stable specification for 1.0 version](https://prometheus.io/docs/specs/remote_write_spec/) +and [experimental specification for 2.0 version](https://prometheus.io/docs/specs/remote_write_spec_2_0/), +both supported by Prometheus server. + +For details on configuring remote storage integrations in Prometheus as a client, see the [remote write](configuration/configuration.md#remote_write) and [remote read](configuration/configuration.md#remote_read) sections of the Prometheus configuration documentation. -The built-in remote write receiver can be enabled by setting the -`--web.enable-remote-write-receiver` command line flag. When enabled, -the remote write receiver endpoint is `/api/v1/write`. - -For details on the request and response messages, see the -[remote storage protocol buffer definitions](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto). - Note that on the read path, Prometheus only fetches raw series data for a set of label selectors and time ranges from the remote end. All PromQL evaluation on the raw data still happens in Prometheus itself. This means that remote read queries @@ -189,6 +183,11 @@ have some scalability limit, since all necessary data needs to be loaded into th querying Prometheus server first and then processed there. However, supporting fully distributed evaluation of PromQL was deemed infeasible for the time being. +Prometheus also serves both protocols. The built-in remote write receiver can be enabled +by setting the `--web.enable-remote-write-receiver` command line flag. When enabled, +the remote write receiver endpoint is `/api/v1/write`. The remote read endpoint is +available on [`/api/v1/read`](https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/). + ### Existing integrations To learn more about existing integrations with remote storage systems, see the diff --git a/documentation/examples/custom-sd/adapter-usage/main.go b/documentation/examples/custom-sd/adapter-usage/main.go index 8ccbafe6f1..128132a8d2 100644 --- a/documentation/examples/custom-sd/adapter-usage/main.go +++ b/documentation/examples/custom-sd/adapter-usage/main.go @@ -18,6 +18,7 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "net" "net/http" "os" @@ -26,10 +27,9 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" prom_discovery "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -41,7 +41,7 @@ var ( a = kingpin.New("sd adapter usage", "Tool to generate file_sd target files for unimplemented SD mechanisms.") outputFile = a.Flag("output.file", "Output file for file_sd compatible file.").Default("custom_sd.json").String() listenAddress = a.Flag("listen.address", "The address the Consul HTTP API is listening on for requests.").Default("localhost:8500").String() - logger log.Logger + logger *slog.Logger // addressLabel is the name for the label containing a target's address. addressLabel = model.MetaLabelPrefix + "consul_address" @@ -90,7 +90,7 @@ type discovery struct { address string refreshInterval int tagSeparator string - logger log.Logger + logger *slog.Logger oldSourceList map[string]bool } @@ -164,7 +164,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { var srvs map[string][]string resp, err := http.Get(fmt.Sprintf("http://%s/v1/catalog/services", d.address)) if err != nil { - level.Error(d.logger).Log("msg", "Error getting services list", "err", err) + d.logger.Error("Error getting services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -173,7 +173,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { io.Copy(io.Discard, resp.Body) resp.Body.Close() if err != nil { - level.Error(d.logger).Log("msg", "Error reading services list", "err", err) + d.logger.Error("Error reading services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -181,7 +181,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { err = json.Unmarshal(b, &srvs) resp.Body.Close() if err != nil { - level.Error(d.logger).Log("msg", "Error parsing services list", "err", err) + d.logger.Error("Error parsing services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -200,13 +200,13 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } resp, err := http.Get(fmt.Sprintf("http://%s/v1/catalog/service/%s", d.address, name)) if err != nil { - level.Error(d.logger).Log("msg", "Error getting services nodes", "service", name, "err", err) + d.logger.Error("Error getting services nodes", "service", name, "err", err) break } tg, err := d.parseServiceNodes(resp, name) if err != nil { - level.Error(d.logger).Log("msg", "Error parsing services nodes", "service", name, "err", err) + d.logger.Error("Error parsing services nodes", "service", name, "err", err) break } tgs = append(tgs, tg) @@ -254,8 +254,7 @@ func main() { fmt.Println("err: ", err) return } - logger = log.NewSyncLogger(log.NewLogfmtLogger(os.Stdout)) - logger = log.With(logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller) + logger = promslog.New(&promslog.Config{}) ctx := context.Background() @@ -272,7 +271,7 @@ func main() { } if err != nil { - level.Error(logger).Log("msg", "failed to create discovery metrics", "err", err) + logger.Error("failed to create discovery metrics", "err", err) os.Exit(1) } @@ -280,7 +279,7 @@ func main() { refreshMetrics := prom_discovery.NewRefreshMetrics(reg) metrics, err := prom_discovery.RegisterSDMetrics(reg, refreshMetrics) if err != nil { - level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err) + logger.Error("failed to register service discovery metrics", "err", err) os.Exit(1) } diff --git a/documentation/examples/custom-sd/adapter/adapter.go b/documentation/examples/custom-sd/adapter/adapter.go index dcf5a2b78c..b242c4eaa0 100644 --- a/documentation/examples/custom-sd/adapter/adapter.go +++ b/documentation/examples/custom-sd/adapter/adapter.go @@ -18,13 +18,12 @@ import ( "context" "encoding/json" "fmt" + "log/slog" "os" "path/filepath" "reflect" "sort" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -55,7 +54,7 @@ type Adapter struct { manager *discovery.Manager output string name string - logger log.Logger + logger *slog.Logger } func mapToArray(m map[string]*customSD) []customSD { @@ -106,7 +105,7 @@ func (a *Adapter) refreshTargetGroups(allTargetGroups map[string][]*targetgroup. a.groups = tempGroups err := a.writeOutput() if err != nil { - level.Error(log.With(a.logger, "component", "sd-adapter")).Log("err", err) + a.logger.With("component", "sd-adapter").Error("failed to write output", "err", err) } } } @@ -163,7 +162,7 @@ func (a *Adapter) Run() { } // NewAdapter creates a new instance of Adapter. -func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger log.Logger, sdMetrics map[string]discovery.DiscovererMetrics, registerer prometheus.Registerer) *Adapter { +func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger *slog.Logger, sdMetrics map[string]discovery.DiscovererMetrics, registerer prometheus.Registerer) *Adapter { return &Adapter{ ctx: ctx, disc: d, diff --git a/documentation/examples/prometheus-ovhcloud.yml b/documentation/examples/prometheus-ovhcloud.yml index 21facad1ca..b2cc60af25 100644 --- a/documentation/examples/prometheus-ovhcloud.yml +++ b/documentation/examples/prometheus-ovhcloud.yml @@ -1,4 +1,4 @@ -# An example scrape configuration for running Prometheus with Ovhcloud. +# An example scrape configuration for running Prometheus with OVHcloud. scrape_configs: - job_name: 'ovhcloud' ovhcloud_sd_configs: diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index 7fa830c7ca..0aad437588 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -4,12 +4,11 @@ go 1.22.0 require ( github.com/alecthomas/kingpin/v2 v2.4.0 - github.com/go-kit/log v0.2.1 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 github.com/influxdata/influxdb v1.11.6 - github.com/prometheus/client_golang v1.20.0 - github.com/prometheus/common v0.57.0 + github.com/prometheus/client_golang v1.20.4 + github.com/prometheus/common v0.60.0 github.com/prometheus/prometheus v0.53.1 github.com/stretchr/testify v1.9.0 ) @@ -26,6 +25,7 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dennwc/varint v1.0.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -55,11 +55,11 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.25.0 // indirect - golang.org/x/net v0.27.0 // indirect - golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sys v0.22.0 // indirect - golang.org/x/text v0.16.0 // indirect + golang.org/x/crypto v0.27.0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/text v0.18.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect google.golang.org/grpc v1.65.0 // indirect diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum index 8f71c3c4a4..936b448d84 100644 --- a/documentation/examples/remote_storage/go.sum +++ b/documentation/examples/remote_storage/go.sum @@ -253,8 +253,8 @@ github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXP github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI= -github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= +github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -264,8 +264,8 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8 github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.57.0 h1:Ro/rKjwdq9mZn1K5QPctzh+MA4Lp0BuYk5ZZEVhoNcY= -github.com/prometheus/common v0.57.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= @@ -323,8 +323,8 @@ golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= -golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -344,20 +344,20 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= -golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -373,17 +373,17 @@ golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go b/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go index 36242a8f4d..b02560dbab 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go @@ -16,19 +16,19 @@ package graphite import ( "bytes" "fmt" + "log/slog" "math" "net" "sort" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" ) // Client allows sending batches of Prometheus samples to Graphite. type Client struct { - logger log.Logger + logger *slog.Logger address string transport string @@ -37,9 +37,9 @@ type Client struct { } // NewClient creates a new Client. -func NewClient(logger log.Logger, address, transport string, timeout time.Duration, prefix string) *Client { +func NewClient(logger *slog.Logger, address, transport string, timeout time.Duration, prefix string) *Client { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Client{ logger: logger, @@ -93,7 +93,7 @@ func (c *Client) Write(samples model.Samples) error { t := float64(s.Timestamp.UnixNano()) / 1e9 v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send value to Graphite, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send value to Graphite, skipping sample", "value", v, "sample", s) continue } fmt.Fprintf(&buf, "%s %f %f\n", k, v, t) diff --git a/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go b/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go index e84ed9e129..6ae40f8173 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go @@ -17,22 +17,22 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "math" "os" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" influx "github.com/influxdata/influxdb/client/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/prompb" ) // Client allows sending batches of Prometheus samples to InfluxDB. type Client struct { - logger log.Logger + logger *slog.Logger client influx.Client database string @@ -41,16 +41,16 @@ type Client struct { } // NewClient creates a new Client. -func NewClient(logger log.Logger, conf influx.HTTPConfig, db, rp string) *Client { +func NewClient(logger *slog.Logger, conf influx.HTTPConfig, db, rp string) *Client { c, err := influx.NewHTTPClient(conf) // Currently influx.NewClient() *should* never return an error. if err != nil { - level.Error(logger).Log("err", err) + logger.Error("Error creating influx HTTP client", "err", err) os.Exit(1) } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Client{ @@ -84,7 +84,7 @@ func (c *Client) Write(samples model.Samples) error { for _, s := range samples { v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send to InfluxDB, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send to InfluxDB, skipping sample", "value", v, "sample", s) c.ignoredSamples.Inc() continue } diff --git a/documentation/examples/remote_storage/remote_storage_adapter/main.go b/documentation/examples/remote_storage/remote_storage_adapter/main.go index bb348aba7f..7f62990d2e 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/main.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/main.go @@ -17,6 +17,7 @@ package main import ( "fmt" "io" + "log/slog" "net/http" _ "net/http/pprof" "net/url" @@ -26,16 +27,14 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" influx "github.com/influxdata/influxdb/client/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" - "github.com/prometheus/common/promlog/flag" + "github.com/prometheus/common/promslog" + "github.com/prometheus/common/promslog/flag" "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/graphite" "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/influxdb" @@ -57,7 +56,7 @@ type config struct { remoteTimeout time.Duration listenAddr string telemetryPath string - promlogConfig promlog.Config + promslogConfig promslog.Config } var ( @@ -105,11 +104,11 @@ func main() { cfg := parseFlags() http.Handle(cfg.telemetryPath, promhttp.Handler()) - logger := promlog.New(&cfg.promlogConfig) + logger := promslog.New(&cfg.promslogConfig) writers, readers := buildClients(logger, cfg) if err := serve(logger, cfg.listenAddr, writers, readers); err != nil { - level.Error(logger).Log("msg", "Failed to listen", "addr", cfg.listenAddr, "err", err) + logger.Error("Failed to listen", "addr", cfg.listenAddr, "err", err) os.Exit(1) } } @@ -120,7 +119,7 @@ func parseFlags() *config { cfg := &config{ influxdbPassword: os.Getenv("INFLUXDB_PW"), - promlogConfig: promlog.Config{}, + promslogConfig: promslog.Config{}, } a.Flag("graphite-address", "The host:port of the Graphite server to send samples to. None, if empty."). @@ -146,7 +145,7 @@ func parseFlags() *config { a.Flag("web.telemetry-path", "Address to listen on for web endpoints."). Default("/metrics").StringVar(&cfg.telemetryPath) - flag.AddFlags(a, &cfg.promlogConfig) + flag.AddFlags(a, &cfg.promslogConfig) _, err := a.Parse(os.Args[1:]) if err != nil { @@ -168,19 +167,19 @@ type reader interface { Name() string } -func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { +func buildClients(logger *slog.Logger, cfg *config) ([]writer, []reader) { var writers []writer var readers []reader if cfg.graphiteAddress != "" { c := graphite.NewClient( - log.With(logger, "storage", "Graphite"), + logger.With("storage", "Graphite"), cfg.graphiteAddress, cfg.graphiteTransport, cfg.remoteTimeout, cfg.graphitePrefix) writers = append(writers, c) } if cfg.opentsdbURL != "" { c := opentsdb.NewClient( - log.With(logger, "storage", "OpenTSDB"), + logger.With("storage", "OpenTSDB"), cfg.opentsdbURL, cfg.remoteTimeout, ) @@ -189,7 +188,7 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { if cfg.influxdbURL != "" { url, err := url.Parse(cfg.influxdbURL) if err != nil { - level.Error(logger).Log("msg", "Failed to parse InfluxDB URL", "url", cfg.influxdbURL, "err", err) + logger.Error("Failed to parse InfluxDB URL", "url", cfg.influxdbURL, "err", err) os.Exit(1) } conf := influx.HTTPConfig{ @@ -199,7 +198,7 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { Timeout: cfg.remoteTimeout, } c := influxdb.NewClient( - log.With(logger, "storage", "InfluxDB"), + logger.With("storage", "InfluxDB"), conf, cfg.influxdbDatabase, cfg.influxdbRetentionPolicy, @@ -208,15 +207,15 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { writers = append(writers, c) readers = append(readers, c) } - level.Info(logger).Log("msg", "Starting up...") + logger.Info("Starting up...") return writers, readers } -func serve(logger log.Logger, addr string, writers []writer, readers []reader) error { +func serve(logger *slog.Logger, addr string, writers []writer, readers []reader) error { http.HandleFunc("/write", func(w http.ResponseWriter, r *http.Request) { req, err := remote.DecodeWriteRequest(r.Body) if err != nil { - level.Error(logger).Log("msg", "Read error", "err", err.Error()) + logger.Error("Read error", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -238,21 +237,21 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e http.HandleFunc("/read", func(w http.ResponseWriter, r *http.Request) { compressed, err := io.ReadAll(r.Body) if err != nil { - level.Error(logger).Log("msg", "Read error", "err", err.Error()) + logger.Error("Read error", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } reqBuf, err := snappy.Decode(nil, compressed) if err != nil { - level.Error(logger).Log("msg", "Decode error", "err", err.Error()) + logger.Error("Decode error", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } var req prompb.ReadRequest if err := proto.Unmarshal(reqBuf, &req); err != nil { - level.Error(logger).Log("msg", "Unmarshal error", "err", err.Error()) + logger.Error("Unmarshal error", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -267,7 +266,7 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e var resp *prompb.ReadResponse resp, err = reader.Read(&req) if err != nil { - level.Warn(logger).Log("msg", "Error executing query", "query", req, "storage", reader.Name(), "err", err) + logger.Warn("Error executing query", "query", req, "storage", reader.Name(), "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -283,7 +282,7 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e compressed = snappy.Encode(nil, data) if _, err := w.Write(compressed); err != nil { - level.Warn(logger).Log("msg", "Error writing response", "storage", reader.Name(), "err", err) + logger.Warn("Error writing response", "storage", reader.Name(), "err", err) } }) @@ -309,12 +308,12 @@ func protoToSamples(req *prompb.WriteRequest) model.Samples { return samples } -func sendSamples(logger log.Logger, w writer, samples model.Samples) { +func sendSamples(logger *slog.Logger, w writer, samples model.Samples) { begin := time.Now() err := w.Write(samples) duration := time.Since(begin).Seconds() if err != nil { - level.Warn(logger).Log("msg", "Error sending samples to remote storage", "err", err, "storage", w.Name(), "num_samples", len(samples)) + logger.Warn("Error sending samples to remote storage", "err", err, "storage", w.Name(), "num_samples", len(samples)) failedSamples.WithLabelValues(w.Name()).Add(float64(len(samples))) } sentSamples.WithLabelValues(w.Name()).Add(float64(len(samples))) diff --git a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go index abb1d0b7d3..433c70527a 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go @@ -19,13 +19,12 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "math" "net/http" "net/url" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" ) @@ -36,14 +35,14 @@ const ( // Client allows sending batches of Prometheus samples to OpenTSDB. type Client struct { - logger log.Logger + logger *slog.Logger url string timeout time.Duration } // NewClient creates a new Client. -func NewClient(logger log.Logger, url string, timeout time.Duration) *Client { +func NewClient(logger *slog.Logger, url string, timeout time.Duration) *Client { return &Client{ logger: logger, url: url, @@ -78,7 +77,7 @@ func (c *Client) Write(samples model.Samples) error { for _, s := range samples { v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send value to OpenTSDB, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send value to OpenTSDB, skipping sample", "value", v, "sample", s) continue } metric := TagValue(s.Metric[model.MetricNameLabel]) diff --git a/go.mod b/go.mod index 1c167886d5..a00e5bd539 100644 --- a/go.mod +++ b/go.mod @@ -17,23 +17,21 @@ require ( github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 github.com/cespare/xxhash/v2 v2.3.0 github.com/dennwc/varint v1.0.0 - github.com/digitalocean/godo v1.121.0 - github.com/docker/docker v27.1.1+incompatible + github.com/digitalocean/godo v1.126.0 + github.com/docker/docker v27.3.1+incompatible github.com/edsrzf/mmap-go v1.1.0 - github.com/envoyproxy/go-control-plane v0.12.1-0.20240621013728-1eb8caab5155 + github.com/envoyproxy/go-control-plane v0.13.0 github.com/envoyproxy/protoc-gen-validate v1.1.0 github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/fsnotify/fsnotify v1.7.0 - github.com/go-kit/log v0.2.1 - github.com/go-logfmt/logfmt v0.6.0 github.com/go-openapi/strfmt v0.23.0 - github.com/go-zookeeper/zk v1.0.3 + github.com/go-zookeeper/zk v1.0.4 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 github.com/google/go-cmp v0.6.0 github.com/google/pprof v0.0.0-20240711041743-f6c9dda6c6da github.com/google/uuid v1.6.0 - github.com/gophercloud/gophercloud v1.14.0 + github.com/gophercloud/gophercloud v1.14.1 github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da github.com/grpc-ecosystem/grpc-gateway v1.16.0 github.com/hashicorp/consul/api v1.29.4 @@ -41,10 +39,10 @@ require ( github.com/hetznercloud/hcloud-go/v2 v2.13.1 github.com/ionos-cloud/sdk-go/v6 v6.2.1 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.17.9 + github.com/klauspost/compress v1.17.10 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b - github.com/linode/linodego v1.38.0 - github.com/miekg/dns v1.1.61 + github.com/linode/linodego v1.41.0 + github.com/miekg/dns v1.1.62 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1 @@ -54,42 +52,42 @@ require ( github.com/prometheus/alertmanager v0.27.0 github.com/prometheus/client_golang v1.20.5 github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.58.0 + github.com/prometheus/common v0.60.0 github.com/prometheus/common/assets v0.2.0 github.com/prometheus/common/sigv4 v0.1.0 - github.com/prometheus/exporter-toolkit v0.11.0 - github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29 + github.com/prometheus/exporter-toolkit v0.13.0 + github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c github.com/stretchr/testify v1.9.0 github.com/vultr/govultr/v2 v2.17.2 - go.opentelemetry.io/collector/pdata v1.12.0 - go.opentelemetry.io/collector/semconv v0.105.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 - go.opentelemetry.io/otel v1.29.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 - go.opentelemetry.io/otel/sdk v1.28.0 - go.opentelemetry.io/otel/trace v1.29.0 + go.opentelemetry.io/collector/pdata v1.16.0 + go.opentelemetry.io/collector/semconv v0.110.0 + go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 + go.opentelemetry.io/otel v1.31.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 + go.opentelemetry.io/otel/sdk v1.30.0 + go.opentelemetry.io/otel/trace v1.31.0 go.uber.org/atomic v1.11.0 - go.uber.org/automaxprocs v1.5.3 + go.uber.org/automaxprocs v1.6.0 go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 - golang.org/x/oauth2 v0.22.0 + golang.org/x/oauth2 v0.23.0 golang.org/x/sync v0.8.0 - golang.org/x/sys v0.25.0 - golang.org/x/text v0.17.0 - golang.org/x/time v0.6.0 - golang.org/x/tools v0.23.0 - google.golang.org/api v0.196.0 - google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed - google.golang.org/grpc v1.66.0 + golang.org/x/sys v0.26.0 + golang.org/x/text v0.19.0 + golang.org/x/tools v0.26.0 + google.golang.org/api v0.199.0 + google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 + google.golang.org/grpc v1.67.1 google.golang.org/protobuf v1.34.2 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 - k8s.io/api v0.29.3 - k8s.io/apimachinery v0.29.3 - k8s.io/client-go v0.29.3 + k8s.io/api v0.31.1 + k8s.io/apimachinery v0.31.1 + k8s.io/client-go v0.31.1 k8s.io/klog v1.0.0 k8s.io/klog/v2 v2.130.1 ) @@ -100,9 +98,9 @@ require ( ) require ( - cloud.google.com/go/auth v0.9.3 // indirect + cloud.google.com/go/auth v0.9.5 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect - cloud.google.com/go/compute/metadata v0.5.0 // indirect + cloud.google.com/go/compute/metadata v0.5.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect @@ -111,7 +109,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cilium/ebpf v0.11.0 // indirect - github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b // indirect + github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect github.com/containerd/cgroups/v3 v3.0.3 // indirect github.com/containerd/log v0.1.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect @@ -121,11 +119,10 @@ require ( github.com/docker/go-units v0.5.0 // indirect github.com/dustin/go-humanize v1.0.0 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect - github.com/evanphx/json-patch v5.6.0+incompatible // indirect github.com/fatih/color v1.16.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/ghodss/yaml v1.0.0 // indirect - github.com/go-kit/kit v0.12.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/analysis v0.22.2 // indirect @@ -139,7 +136,7 @@ require ( github.com/go-resty/resty/v2 v2.13.1 // indirect github.com/godbus/dbus/v5 v5.0.4 // indirect github.com/golang-jwt/jwt/v5 v5.2.1 // indirect - github.com/golang/glog v1.2.1 // indirect + github.com/golang/glog v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect @@ -149,7 +146,7 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/hashicorp/cronexpr v1.1.2 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect @@ -169,6 +166,8 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mdlayher/socket v0.4.1 // indirect + github.com/mdlayher/vsock v1.2.1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect @@ -188,30 +187,28 @@ require ( github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/objx v0.5.2 // indirect + github.com/x448/float16 v0.8.4 // indirect github.com/xhit/go-str2duration/v2 v2.1.0 // indirect go.mongodb.org/mongo-driver v1.14.0 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/otel/metric v1.29.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect - golang.org/x/crypto v0.26.0 // indirect + golang.org/x/crypto v0.28.0 // indirect golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect - golang.org/x/mod v0.19.0 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/term v0.23.0 // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/term v0.25.0 // indirect + golang.org/x/time v0.6.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gotest.tools/v3 v3.0.3 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect - k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect - sigs.k8s.io/yaml v1.3.0 // indirect -) - -replace ( - k8s.io/klog => github.com/simonpasquier/klog-gokit v0.3.0 - k8s.io/klog/v2 => github.com/simonpasquier/klog-gokit/v3 v3.3.0 + sigs.k8s.io/yaml v1.4.0 // indirect ) // Exclude linodego v1.0.0 as it is no longer published on github. diff --git a/go.sum b/go.sum index 35b4e21e34..91af29198d 100644 --- a/go.sum +++ b/go.sum @@ -12,8 +12,8 @@ cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bP cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/auth v0.9.3 h1:VOEUIAADkkLtyfr3BLa3R8Ed/j6w1jTBmARx+wb5w5U= -cloud.google.com/go/auth v0.9.3/go.mod h1:7z6VY+7h3KUdRov5F1i8NDP5ZzWKYmEPO842BgCsmTk= +cloud.google.com/go/auth v0.9.5 h1:4CTn43Eynw40aFVr3GpPqsQponx2jv0BQpjvajsbbzw= +cloud.google.com/go/auth v0.9.5/go.mod h1:Xo0n7n66eHyOWWCnitop6870Ilwo3PiZyodVkkH1xWM= cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= @@ -22,8 +22,8 @@ cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvf cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= -cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= +cloud.google.com/go/compute/metadata v0.5.2 h1:UxK4uu/Tn+I3p2dYWTfiX4wva7aYlKixAHn3fyqngqo= +cloud.google.com/go/compute/metadata v0.5.2/go.mod h1:C66sj2AluDcIqakBq/M8lw8/ybHgOZqin2obFxa/E5k= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= @@ -63,13 +63,8 @@ github.com/DmitriyVTitov/size v1.5.0 h1:/PzqxYrOyOUX1BXj6J9OuVRVGe+66VL4D9FlUaW5 github.com/DmitriyVTitov/size v1.5.0/go.mod h1:le6rNI4CoLQV1b9gzp1+3d7hMAD/uu2QcJ+aYbNgiU0= github.com/KimMachineGun/automemlimit v0.6.1 h1:ILa9j1onAAMadBsyyUJv5cack8Y1WT26yLj/V+ulKp8= github.com/KimMachineGun/automemlimit v0.6.1/go.mod h1:T7xYht7B8r6AG/AqFcUdc7fzd2bIdBKmepfP2S1svPY= -github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= -github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= -github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= -github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= @@ -80,23 +75,17 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5 github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 h1:t3eaIm0rUkzbrIewtiFmMK5RXHej2XnoXNhxVsAYUfg= github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= -github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= -github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= -github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -104,8 +93,6 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= -github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -119,26 +106,18 @@ github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= -github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b h1:ga8SEFjZ60pxLcmhnThWgvH2wg8376yUJmPhEH4H3kw= -github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= -github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/colega/go-yaml-yaml v0.0.0-20220720105220-255a8d16d094 h1:FpZSn61BWXbtyH68+uSv416veEswX1M2HRyQfdHnOyQ= github.com/colega/go-yaml-yaml v0.0.0-20220720105220-255a8d16d094/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0= github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -148,43 +127,34 @@ github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= github.com/dgraph-io/ristretto v0.1.1 h1:6CWw5tJNgpegArSHpNHJKldNeq03FQCwYvfMVWajOK8= github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA= github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= -github.com/digitalocean/godo v1.121.0 h1:ilXiHuEnhbJs2fmFEPX0r/QQ6KfiOIMAhJN3f8NiCfI= -github.com/digitalocean/godo v1.121.0/go.mod h1:WQVH83OHUy6gC4gXpEVQKtxTd4L5oCp+5OialidkPLY= +github.com/digitalocean/godo v1.126.0 h1:+Znh7VMQj/E8ArbjWnc7OKGjWfzC+I8OCSRp7r1MdD8= +github.com/digitalocean/godo v1.126.0/go.mod h1:PU8JB6I1XYkQIdHFop8lLAY9ojp6M0XcU0TWaQSxbrc= github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= -github.com/docker/docker v27.1.1+incompatible h1:hO/M4MtV36kzKldqnA37IWhebRA+LnqqcqDja6kVaKY= -github.com/docker/docker v27.1.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v27.3.1+incompatible h1:KttF0XoteNTicmUtBO0L2tP+J7FGRFTjaEF4k6WdhfI= +github.com/docker/docker v27.3.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= -github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ= github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.12.1-0.20240621013728-1eb8caab5155 h1:IgJPqnrlY2Mr4pYB6oaMKvFvwJ9H+X6CCY5x1vCTcpc= -github.com/envoyproxy/go-control-plane v0.12.1-0.20240621013728-1eb8caab5155/go.mod h1:5Wkq+JduFtdAXihLmeTJf+tRYIT4KBc2vPXDhwVo1pA= +github.com/envoyproxy/go-control-plane v0.13.0 h1:HzkeUz1Knt+3bK+8LG1bxOO/jzWZmdxpwC51i202les= +github.com/envoyproxy/go-control-plane v0.13.0/go.mod h1:GRaKG3dwvFoTg4nj7aXdZnvMg4d7nvT/wl9WgVXn3Q8= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= -github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= -github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb h1:IT4JYU7k4ikYg1SCxNI1/Tieq/NFvh6dzLdgi7eu0tM= github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb/go.mod h1:bH6Xx7IW64qjjJq8M2u4dxNaBiDfKK+z/3eGDpXEQhc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -194,13 +164,12 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= -github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= @@ -208,17 +177,11 @@ github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgOZ7o= -github.com/go-kit/kit v0.12.0 h1:e4o3o3IsBfAKQh5Qbbiqyfu97Ku7jrO/JbohvztANh4= -github.com/go-kit/kit v0.12.0/go.mod h1:lHd+EkCZPIwYItmGDDRdhinkzX2A1sj+M9biaEaizzs= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= -github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= -github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= -github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -244,26 +207,21 @@ github.com/go-openapi/validate v0.23.0 h1:2l7PJLzCis4YUGEoW6eoQw3WhyM65WSIcjX6SQ github.com/go-openapi/validate v0.23.0/go.mod h1:EeiAZ5bmpSIOJV1WLfyYF9qp/B1ZgSaEpHTJHtN5cbE= github.com/go-resty/resty/v2 v2.13.1 h1:x+LHXBI2nMB1vqndymf26quycC4aggYJ7DECYbiz03g= github.com/go-resty/resty/v2 v2.13.1/go.mod h1:GznXlLxkq6Nh4sU59rPmUw3VtgpO3aS96ORAI6Q7d+0= -github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= -github.com/go-zookeeper/zk v1.0.3 h1:7M2kwOsc//9VeeFiPtf+uSJlVpU66x9Ba5+8XK7/TDg= -github.com/go-zookeeper/zk v1.0.3/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/go-zookeeper/zk v1.0.4 h1:DPzxraQx7OrPyXq2phlGlNSIyWEsAox0RJmjTseMV6I= +github.com/go-zookeeper/zk v1.0.4/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.2.1 h1:OptwRhECazUx5ix5TTWC3EZhsZEHWcYWY4FQHTIubm4= -github.com/golang/glog v1.2.1/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/glog v1.2.2 h1:1+mZ9upx1Dh6FmUTFR1naJ77miKiXgALjWOZ3NVFPmY= +github.com/golang/glog v1.2.2/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -292,7 +250,6 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -334,7 +291,6 @@ github.com/google/pprof v0.0.0-20240711041743-f6c9dda6c6da/go.mod h1:K1liHPHnj73 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -344,30 +300,20 @@ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= -github.com/gophercloud/gophercloud v1.14.0 h1:Bt9zQDhPrbd4qX7EILGmy+i7GP35cc+AAL2+wIJpUE8= -github.com/gophercloud/gophercloud v1.14.0/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/gophercloud/gophercloud v1.14.1 h1:DTCNaTVGl8/cFu58O1JwWgis9gtISAFONqpMKNg/Vpw= +github.com/gophercloud/gophercloud v1.14.1/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da h1:BML5sNe+bw2uO8t8cQSwe5QhvoP04eHPF7bnaQma0Kw= github.com/grafana/regexp v0.0.0-20240607082908-2cb410fa05da/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= -github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I= github.com/hashicorp/consul/api v1.29.4 h1:P6slzxDLBOxUSj3fWo2o65VuKtbtOXFi7TSSgtXutuE= github.com/hashicorp/consul/api v1.29.4/go.mod h1:HUlfw+l2Zy68ceJavv2zAyArl2fqhGWnMycyt56sBgg= github.com/hashicorp/consul/proto-public v0.6.2 h1:+DA/3g/IiKlJZb88NBn0ZgXrxJp2NlvCZdEyl+qxvL0= github.com/hashicorp/consul/proto-public v0.6.2/go.mod h1:cXXbOg74KBNGajC+o8RlA502Esf0R9prcoJgiOX/2Tg= -github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/consul/sdk v0.16.1 h1:V8TxTnImoPD5cj0U9Spl0TUxcytjcbbJeADFF07KdHg= github.com/hashicorp/consul/sdk v0.16.1/go.mod h1:fSXvwxB2hmh1FMZCNl6PwX0Q/1wdWtHJcZ7Ea5tns0s= github.com/hashicorp/cronexpr v1.1.2 h1:wG/ZYIKT+RT3QkOdgYc+xsKWVRgnxJ1OJtjjy84fJ9A= @@ -376,7 +322,6 @@ github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brv github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= @@ -394,7 +339,6 @@ github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9 github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= -github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc= github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= @@ -405,51 +349,38 @@ github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/b github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4= github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= -github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/memberlist v0.5.0 h1:EtYPN8DpAURiapus508I4n9CzHs2W+8NZGbmmR/prTM= github.com/hashicorp/memberlist v0.5.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0= github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 h1:fgVfQ4AC1avVOnu2cfms8VAiD8lUq3vWI8mTocOXN/w= github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3/go.mod h1:svtxn6QnrQ69P23VvIWMR34tg3vmwLz4UdUzm1dSCgE= -github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY= github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4= github.com/hetznercloud/hcloud-go/v2 v2.13.1 h1:jq0GP4QaYE5d8xR/Zw17s9qoaESRJMXfGmtD1a/qckQ= github.com/hetznercloud/hcloud-go/v2 v2.13.1/go.mod h1:dhix40Br3fDiBhwaSG/zgaYOFFddpfBm/6R1Zz0IiF0= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/ionos-cloud/sdk-go/v6 v6.2.1 h1:mxxN+frNVmbFrmmFfXnBC3g2USYJrl6mc1LW2iNYbFY= github.com/ionos-cloud/sdk-go/v6 v6.2.1/go.mod h1:SXrO9OGyWjd2rZhAhEpdYN6VUAODzzqRdqA9BCviQtI= github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww= github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= @@ -457,15 +388,13 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0= +github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b/go.mod h1:pcaDhQK0/NJZEvtCO0qQPPropqV0sJOJ6YW7X+9kRwM= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -480,11 +409,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= -github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= -github.com/linode/linodego v1.38.0 h1:wP3oW9OhGc6vhze8NPf2knbwH4TzSbrjzuCd9okjbTY= -github.com/linode/linodego v1.38.0/go.mod h1:L7GXKFD3PoN2xSEtFc04wIXP5WK65O10jYQx0PQISWQ= -github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= +github.com/linode/linodego v1.41.0 h1:GcP7JIBr9iLRJ9FwAtb9/WCT1DuPJS/xUApapfdjtiY= +github.com/linode/linodego v1.41.0/go.mod h1:Ow4/XZ0yvWBzt3iAHwchvhSx30AyLintsSMvvQ2/SJY= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= @@ -495,7 +421,6 @@ github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= @@ -503,27 +428,23 @@ github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27k github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/maxatome/go-testdeep v1.12.0 h1:Ql7Go8Tg0C1D/uMMX59LAoYK7LffeJQ6X2T04nTH68g= github.com/maxatome/go-testdeep v1.12.0/go.mod h1:lPZc/HAcJMP92l7yI6TRz1aZN5URwUBUAfUNvrclaNM= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U= +github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA= +github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= +github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= -github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs= -github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ= -github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= +github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ= +github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ= github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU= github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= -github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= -github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= @@ -544,64 +465,35 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8m github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= -github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU= -github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k= -github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= -github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1 h1:dOYG7LS/WK00RWZc8XGgcUTlTxpp3mKhdR2Q9z9HbXM= github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= -github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= -github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA= github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= -github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= -github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= -github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= -github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= -github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= -github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= -github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/openzipkin-contrib/zipkin-go-opentracing v0.4.5/go.mod h1:/wsWhb9smxSfWAKL3wpBW7V8scJMt8N8gnaMCS9E/cA= -github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= -github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= -github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI= github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c= -github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= -github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= -github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= -github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -614,84 +506,59 @@ github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P github.com/prometheus/alertmanager v0.27.0 h1:V6nTa2J5V4s8TG4C4HtrBP/WNSebCCTYGGv4qecA/+I= github.com/prometheus/alertmanager v0.27.0/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.58.0 h1:N+N8vY4/23r6iYfD3UQZUoJPnUYAo7v6LG5XZxjZTXo= -github.com/prometheus/common v0.58.0/go.mod h1:GpWM7dewqmVYcd7SmRaiWVe9SSqjf0UrwnYnpEZNuT0= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= -github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g= -github.com/prometheus/exporter-toolkit v0.11.0/go.mod h1:BVnENhnNecpwoTLiABx7mrPB/OLRIgN74qlQbV+FK1Q= +github.com/prometheus/exporter-toolkit v0.13.0 h1:lmA0Q+8IaXgmFRKw09RldZmZdnvu9wwcDLIXGmTPw1c= +github.com/prometheus/exporter-toolkit v0.13.0/go.mod h1:2uop99EZl80KdXhv/MxVI2181fMcwlsumFOqBecGkG0= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= -github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29 h1:BkTk4gynLjguayxrYxZoMZjBnAOh7ntQvUkOFmkMqPU= -github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg= +github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 h1:yoKAVkEVwAqbGbR8n87rHQ1dulL25rKloGadb3vm770= +github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30/go.mod h1:sH0u6fq6x4R5M7WxkoQFY/o7UaiItec0o1LinLCJNq8= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/shoenig/test v1.7.1 h1:UJcjSAI3aUKx52kfcfhblgyhZceouhvvs3OYdWgn+PY= github.com/shoenig/test v1.7.1/go.mod h1:UxJ6u/x2v/TNs/LoLxBNJRV9DiwBBKYxXSyczsBHFoI= github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c h1:aqg5Vm5dwtvL+YgDpBcK1ITf3o96N/K7/wsRXQnUTEs= github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c/go.mod h1:owqhoLW1qZoYLZzLnBw+QkPP9WZnjlSWihhxAJC1+/M= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/simonpasquier/klog-gokit v0.3.0 h1:TkFK21cbwDRS+CiystjqbAiq5ubJcVTk9hLUck5Ntcs= -github.com/simonpasquier/klog-gokit v0.3.0/go.mod h1:+SUlDQNrhVtGt2FieaqNftzzk8P72zpWlACateWxA9k= -github.com/simonpasquier/klog-gokit/v3 v3.3.0 h1:HMzH999kO5gEgJTaWWO+xjncW5oycspcsBnjn9b853Q= -github.com/simonpasquier/klog-gokit/v3 v3.3.0/go.mod h1:uSbnWC3T7kt1dQyY9sjv0Ao1SehMAJdVnUNSKhjaDsg= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -709,26 +576,20 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/vultr/govultr/v2 v2.17.2 h1:gej/rwr91Puc/tgh+j33p/BLR16UrIPnSr+AIwYWZQs= github.com/vultr/govultr/v2 v2.17.2/go.mod h1:ZFOKGWmgjytfyjeyAdhQlSWwTjh2ig+X49cAp50dzXI= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80= go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c= -go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= @@ -736,49 +597,42 @@ go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/collector/pdata v1.12.0 h1:Xx5VK1p4VO0md8MWm2icwC1MnJ7f8EimKItMWw46BmA= -go.opentelemetry.io/collector/pdata v1.12.0/go.mod h1:MYeB0MmMAxeM0hstCFrCqWLzdyeYySim2dG6pDT6nYI= -go.opentelemetry.io/collector/semconv v0.105.0 h1:8p6dZ3JfxFTjbY38d8xlQGB1TQ3nPUvs+D0RERniZ1g= -go.opentelemetry.io/collector/semconv v0.105.0/go.mod h1:yMVUCNoQPZVq/IPfrHrnntZTWsLf5YGZ7qwKulIl5hw= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= -go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= -go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0/go.mod h1:s75jGIWA9OfCMzF0xr+ZgfrB5FEbbV7UuYo32ahUiFI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 h1:R3X6ZXmNPRR8ul6i3WgFURCHzaXjHdm0karRG/+dj3s= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0/go.mod h1:QWFXnDavXWwMx2EEcZsf3yxgEKAqsxQ+Syjp+seyInw= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 h1:j9+03ymgYhPKmeXGk5Zu+cIZOlVzd9Zv7QIiyItjFBU= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0/go.mod h1:Y5+XiUG4Emn1hTfciPzGPJaSI+RpDts6BnCIir0SLqk= -go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc= -go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8= -go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= -go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= -go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= -go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= +go.opentelemetry.io/collector/pdata v1.16.0 h1:g02K8jlRnmQ7TQDuXpdgVL6vIxIVqr5Gbb1qIR27rto= +go.opentelemetry.io/collector/pdata v1.16.0/go.mod h1:YZZJIt2ehxosYf/Y1pbvexjNWsIGNNrzzlCTO9jC1F4= +go.opentelemetry.io/collector/semconv v0.110.0 h1:KHQnOHe3gUz0zsxe8ph9kN5OTypCFD4V+06AiBTfeNk= +go.opentelemetry.io/collector/semconv v0.110.0/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 h1:4BZHA+B1wXEQoGNHxW8mURaLhcdGwvRnmhGbm+odRbc= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0/go.mod h1:3qi2EEwMgB4xnKgPLqsDP3j9qxnHDZeHsnAxfjQqTko= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 h1:lsInsfvhVIfOI6qHVyysXMNDnjO9Npvl7tlDPJFBVd4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0/go.mod h1:KQsVNh4OjgjTG0G6EiNi1jVpnaeeKsKMRwbLN+f1+8M= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0 h1:m0yTiGDLUvVYaTFbAvCkVYIYcvwKt3G7OLoN77NUs/8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.30.0/go.mod h1:wBQbT4UekBfegL2nx0Xk1vBcnzyBPsIVm9hRG4fYcr4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 h1:umZgi92IyxfXd/l4kaDhnKgY8rnN/cZcF1LKc6I8OQ8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0/go.mod h1:4lVs6obhSVRb1EW5FhOuBTyiQhtRtAnnva9vD3yRfq8= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE= +go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= -go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= -go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8= -go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -786,8 +640,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= -golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw= -golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -822,17 +676,12 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= -golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -843,7 +692,6 @@ golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -869,16 +717,16 @@ golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= -golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -897,11 +745,7 @@ golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -912,13 +756,11 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -960,16 +802,16 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= -golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= -golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= -golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -981,24 +823,20 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= @@ -1010,8 +848,6 @@ golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -1019,7 +855,6 @@ golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= @@ -1040,13 +875,12 @@ golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.23.0 h1:SGsXPZ+2l4JsgaCKkx+FQ9YZ5XEtA1GZYuoDjenLjvg= -golang.org/x/tools v0.23.0/go.mod h1:pnu6ufv6vQkll6szChhK3C3L/ruaIv5eBeztNG8wtsI= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= @@ -1061,10 +895,9 @@ google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/ google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.196.0 h1:k/RafYqebaIJBO3+SMnfEGtFVlvp5vSgqTUF54UN/zg= -google.golang.org/api v0.196.0/go.mod h1:g9IL21uGkYgvQ5BZg6BAtoGJQIm8r6EgaAbpNey5wBE= +google.golang.org/api v0.199.0 h1:aWUXClp+VFJmqE0JPvpZOK3LDQMyFKYIow4etYd9qxs= +google.golang.org/api v0.199.0/go.mod h1:ohG4qSztDJmZdjK/Ar6MhbAmb/Rpi4JHOqagsh90K28= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= @@ -1075,7 +908,6 @@ google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= @@ -1099,19 +931,14 @@ google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1m google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed h1:3RgNmBoI9MZhsj3QxC+AP/qQhNwpCLOvYDYYsFrhFt0= -google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo= +google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 h1:hjSy6tcFQZ171igDaN5QHOw2n6vx40juYbC/x67CEhc= +google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:qpvKtACPCQhAdu3PyQgV4l3LMXZEtft7y8QcarRsp9I= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= -google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.22.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= @@ -1121,8 +948,8 @@ google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3Iji google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.66.0 h1:DibZuoBznOxbDQxRINckZcUvnCEvrW9pcWIE2yF9r1c= -google.golang.org/grpc v1.66.0/go.mod h1:s3/l6xSSCURdVfAnL+TqCNMyTDAGN6+lZeVxnZR128Y= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -1142,18 +969,13 @@ gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= @@ -1166,7 +988,6 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= -honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= @@ -1174,16 +995,20 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.29.3 h1:2ORfZ7+bGC3YJqGpV0KSDDEVf8hdGQ6A03/50vj8pmw= -k8s.io/api v0.29.3/go.mod h1:y2yg2NTyHUUkIoTC+phinTnEa3KFM6RZ3szxt014a80= -k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU= -k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU= -k8s.io/client-go v0.29.3 h1:R/zaZbEAxqComZ9FHeQwOh3Y1ZUs7FaHKZdQtIc2WZg= -k8s.io/client-go v0.29.3/go.mod h1:tkDisCvgPfiRpxGnOORfkljmS+UrW+WtXAy2fTvXJB0= +k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU= +k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI= +k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U= +k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0= +k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg= +k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= +k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= @@ -1191,7 +1016,5 @@ sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMm sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= -sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= -sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 300f3176e4..a6ad47acd3 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -304,6 +304,14 @@ func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { h.ZeroCount /= scalar h.Count /= scalar h.Sum /= scalar + // Division by zero removes all buckets. + if scalar == 0 { + h.PositiveBuckets = nil + h.NegativeBuckets = nil + h.PositiveSpans = nil + h.NegativeSpans = nil + return h + } for i := range h.PositiveBuckets { h.PositiveBuckets[i] /= scalar } @@ -353,7 +361,7 @@ func (h *FloatHistogram) Add(other *FloatHistogram) (*FloatHistogram, error) { default: // All other cases shouldn't actually happen. // They are a direct collision of CounterReset and NotCounterReset. - // Conservatively set the CounterResetHint to "unknown" and isse a warning. + // Conservatively set the CounterResetHint to "unknown" and issue a warning. h.CounterResetHint = UnknownCounterReset // TODO(trevorwhitney): Actually issue the warning as soon as the plumbing for it is in place } @@ -669,7 +677,7 @@ func detectReset(currIt, prevIt *floatBucketIterator) bool { if !currIt.Next() { // Reached end of currIt early, therefore // previous histogram has a bucket that the - // current one does not have. Unlass all + // current one does not have. Unless all // remaining buckets in the previous histogram // are unpopulated, this is a reset. for { @@ -902,7 +910,7 @@ func (h *FloatHistogram) trimBucketsInZeroBucket() { // reconcileZeroBuckets finds a zero bucket large enough to include the zero // buckets of both histograms (the receiving histogram and the other histogram) // with a zero threshold that is not within a populated bucket in either -// histogram. This method modifies the receiving histogram accourdingly, but +// histogram. This method modifies the receiving histogram accordingly, but // leaves the other histogram as is. Instead, it returns the zero count the // other histogram would have if it were modified. func (h *FloatHistogram) reconcileZeroBuckets(other *FloatHistogram) float64 { diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index cf370a313e..34988e9d39 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -399,14 +399,10 @@ func TestFloatHistogramDiv(t *testing.T) { }, 0, &FloatHistogram{ - ZeroThreshold: 0.01, - ZeroCount: math.Inf(1), - Count: math.Inf(1), - Sum: math.Inf(1), - PositiveSpans: []Span{{-2, 1}, {2, 3}}, - PositiveBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, - NegativeSpans: []Span{{3, 2}, {3, 2}}, - NegativeBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, + ZeroThreshold: 0.01, + Count: math.Inf(1), + Sum: math.Inf(1), + ZeroCount: math.Inf(1), }, }, { diff --git a/model/labels/labels_common.go b/model/labels/labels_common.go index d7bdc1e076..99529a3836 100644 --- a/model/labels/labels_common.go +++ b/model/labels/labels_common.go @@ -230,5 +230,5 @@ func contains(s []Label, n string) bool { } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index c8bce51234..c64bb990e0 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -16,7 +16,6 @@ package labels import ( - "reflect" "slices" "strings" "unsafe" @@ -299,10 +298,8 @@ func Equal(ls, o Labels) bool { func EmptyLabels() Labels { return Labels{} } -func yoloBytes(s string) (b []byte) { - *(*string)(unsafe.Pointer(&b)) = s - (*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s) - return +func yoloBytes(s string) []byte { + return unsafe.Slice(unsafe.StringData(s), len(s)) } // New returns a sorted Labels from the given labels. @@ -338,8 +335,8 @@ func Compare(a, b Labels) int { } i := 0 // First, go 8 bytes at a time. Data strings are expected to be 8-byte aligned. - sp := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&shorter)).Data) - lp := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&longer)).Data) + sp := unsafe.Pointer(unsafe.StringData(shorter)) + lp := unsafe.Pointer(unsafe.StringData(longer)) for ; i < len(shorter)-8; i += 8 { if *(*uint64)(unsafe.Add(sp, i)) != *(*uint64)(unsafe.Add(lp, i)) { break diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 3238190e98..0847c819c5 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -100,13 +100,13 @@ func newFastRegexMatcherWithoutCache(v string) (*FastRegexMatcher, error) { // available, even if the string matcher is faster. m.matchString = m.stringMatcher.Matches } else { - parsed, err := syntax.Parse(v, syntax.Perl) + parsed, err := syntax.Parse(v, syntax.Perl|syntax.DotNL) if err != nil { return nil, err } // Simplify the syntax tree to run faster. parsed = parsed.Simplify() - m.re, err = regexp.Compile("^(?:" + parsed.String() + ")$") + m.re, err = regexp.Compile("^(?s:" + parsed.String() + ")$") if err != nil { return nil, err } diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index ce30e240f9..66722d832e 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -132,7 +132,7 @@ func TestFastRegexMatcher_MatchString(t *testing.T) { t.Parallel() m, err := NewFastRegexMatcher(r) require.NoError(t, err) - re := regexp.MustCompile("^(?:" + r + ")$") + re := regexp.MustCompile("^(?s:" + r + ")$") require.Equal(t, re.MatchString(v), m.MatchString(v)) }) } @@ -287,7 +287,7 @@ func TestOptimizeConcatRegex(t *testing.T) { } for _, c := range cases { - parsed, err := syntax.Parse(c.regex, syntax.Perl) + parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL) require.NoError(t, err) prefix, suffix, contains := optimizeConcatRegex(parsed) @@ -368,7 +368,7 @@ func TestFindSetMatches(t *testing.T) { c := c t.Run(c.pattern, func(t *testing.T) { t.Parallel() - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matches, actualCaseSensitive := findSetMatches(parsed) require.ElementsMatch(t, c.expMatches, matches) @@ -468,18 +468,18 @@ func TestStringMatcherFromRegexp(t *testing.T) { pattern string exp StringMatcher }{ - {".*", anyStringWithoutNewlineMatcher{}}, - {"().*", anyStringWithoutNewlineMatcher{}}, - {".*()", anyStringWithoutNewlineMatcher{}}, - {"().*()", anyStringWithoutNewlineMatcher{}}, - {".*?", anyStringWithoutNewlineMatcher{}}, + {".*", trueMatcher{}}, + {"().*", trueMatcher{}}, + {".*()", trueMatcher{}}, + {"().*()", trueMatcher{}}, + {".*?", trueMatcher{}}, {"(?s:.*)", trueMatcher{}}, - {"(.*)", anyStringWithoutNewlineMatcher{}}, - {"^.*$", anyStringWithoutNewlineMatcher{}}, - {".+", &anyNonEmptyStringMatcher{matchNL: false}}, + {"(.*)", trueMatcher{}}, + {"^.*$", trueMatcher{}}, + {".+", &anyNonEmptyStringMatcher{matchNL: true}}, {"(?s:.+)", &anyNonEmptyStringMatcher{matchNL: true}}, - {"^.+$", &anyNonEmptyStringMatcher{matchNL: false}}, - {"(.+)", &anyNonEmptyStringMatcher{matchNL: false}}, + {"^.+$", &anyNonEmptyStringMatcher{matchNL: true}}, + {"(.+)", &anyNonEmptyStringMatcher{matchNL: true}}, {"", emptyStringMatcher{}}, {"()", emptyStringMatcher{}}, {"^$", emptyStringMatcher{}}, @@ -492,23 +492,23 @@ func TestStringMatcherFromRegexp(t *testing.T) { {`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{&equalStringMatcher{s: "BAR", caseSensitive: false}, orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}})})}, {"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})}, {"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "BAR", caseSensitive: false}, orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}})})}, - {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: anyStringWithoutNewlineMatcher{}}}, - {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, + {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: trueMatcher{}}}, + {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, {"10\\.0\\.(1|2)\\.+", nil}, - {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{}, suffix: "foo", suffixCaseSensitive: true}}, - {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: anyStringWithoutNewlineMatcher{}}}, - {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, - {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: false}}})}, - {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/cortex-gw", "/cortex-gw-internal", "/gateway"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: nil}}, + {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{matchNL: true}, suffix: "foo", suffixCaseSensitive: true}}, + {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: trueMatcher{}}}, + {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, + {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: true}}})}, + {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/cortex-gw", "/cortex-gw-internal", "/gateway"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: nil}}, // we don't support case insensitive matching for contains. // This is because there's no strings.IndexOfFold function. // We can revisit later if this is really popular by using strings.ToUpper. @@ -519,15 +519,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { {".*foo.*bar.*", nil}, {`\d*`, nil}, {".", nil}, - {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: anyStringWithoutNewlineMatcher{}}}}}, + {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: trueMatcher{}}}}}, // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat. // It would make the code too complex to handle it. {"(.+)/(foo.*|bar$)", nil}, // Case sensitive alternate with same literal prefix and .* suffix. - {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: anyStringWithoutNewlineMatcher{}}}}}, + {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: trueMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: trueMatcher{}}}}}, // Case insensitive alternate with same literal prefix and .* suffix. - {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, - {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, + {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: trueMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: trueMatcher{}}}}}, + {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: trueMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: trueMatcher{}}}}}, // Concatenated variable length selectors are not supported. {"foo.*.*", nil}, {"foo.+.+", nil}, @@ -536,15 +536,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { {"aaa.?.?", nil}, {"aaa.?.*", nil}, // Regexps with ".?". - {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, + {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, {"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, - {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}}, + {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}}, {"f.?o", nil}, } { c := c t.Run(c.pattern, func(t *testing.T) { t.Parallel() - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matches := stringMatcherFromRegexp(parsed) require.Equal(t, c.exp, matches) @@ -563,16 +563,16 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) { { pattern: "(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", expectedLiteralPrefixMatchers: 3, - expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX"}, - expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "xyz-016a-ixb-dp\n"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX", "xyz-016a-ixb-dp\n"}, + expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Case insensitive. { pattern: "(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", expectedLiteralPrefixMatchers: 3, - expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX"}, - expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp", "xyz-016a-ixb-dp\n"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX", "xyz-016a-ixb-dp\n"}, + expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp"}, }, // Nested literal prefixes, case sensitive. @@ -600,13 +600,13 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) { }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher. numPrefixMatchers := 0 @@ -649,16 +649,16 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) { { pattern: "(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)", expectedLiteralSuffixMatchers: 2, - expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op"}, - expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op", "\nxyz-016a-ixb-dp"}, + expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Case insensitive. { pattern: "(?i)(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)", expectedLiteralSuffixMatchers: 2, - expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op"}, - expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op", "\nxyz-016a-ixb-dp"}, + expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Nested literal suffixes, case sensitive. @@ -678,13 +678,13 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) { }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains literalSuffixStringMatcher. numSuffixMatchers := 0 @@ -724,26 +724,26 @@ func TestStringMatcherFromRegexp_Quest(t *testing.T) { { pattern: "test.?", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"test", "test!"}, - expectedNotMatches: []string{"test\n", "tes", "test!!"}, + expectedMatches: []string{"test\n", "test", "test!"}, + expectedNotMatches: []string{"tes", "test!!"}, }, { pattern: ".?test", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"test", "!test"}, - expectedNotMatches: []string{"\ntest", "tes", "test!"}, + expectedMatches: []string{"\ntest", "test", "!test"}, + expectedNotMatches: []string{"tes", "test!"}, }, { pattern: "(aaa.?|bbb.?)", expectedZeroOrOneMatchers: 2, - expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX"}, - expectedNotMatches: []string{"aa", "aaaXX", "aaa\n", "bb", "bbbXX", "bbb\n"}, + expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX", "aaa\n", "bbb\n"}, + expectedNotMatches: []string{"aa", "aaaXX", "bb", "bbbXX"}, }, { pattern: ".*aaa.?", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX"}, - expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX", "XXXaaa\n"}, + expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX", "XXXaaa\n"}, + expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX"}, }, // Match newline. @@ -758,18 +758,18 @@ func TestStringMatcherFromRegexp_Quest(t *testing.T) { { pattern: "(aaa.?|((?s).?bbb.+))", expectedZeroOrOneMatchers: 2, - expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX"}, - expectedNotMatches: []string{"aa", "aaa\n", "Xbbb", "\nbbb"}, + expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX", "aaa\n"}, + expectedNotMatches: []string{"aa", "Xbbb", "\nbbb"}, }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains zeroOrOneCharacterStringMatcher. numZeroOrOneMatchers := 0 @@ -821,7 +821,7 @@ func FuzzFastRegexMatcher_WithStaticallyDefinedRegularExpressions(f *testing.F) for _, re := range regexes { m, err := NewFastRegexMatcher(re) require.NoError(f, err) - r := regexp.MustCompile("^(?:" + re + ")$") + r := regexp.MustCompile("^(?s:" + re + ")$") matchers = append(matchers, m) res = append(res, r) } @@ -852,7 +852,7 @@ func FuzzFastRegexMatcher_WithFuzzyRegularExpressions(f *testing.F) { return } - reg, err := regexp.Compile("^(?:" + re + ")$") + reg, err := regexp.Compile("^(?s:" + re + ")$") if err != nil { // Ignore invalid regexes. return @@ -1480,7 +1480,7 @@ func BenchmarkOptimizeEqualOrPrefixStringMatchers(b *testing.B) { } b.Logf("regexp: %s", re) - parsed, err := syntax.Parse(re, syntax.Perl) + parsed, err := syntax.Parse(re, syntax.Perl|syntax.DotNL) require.NoError(b, err) unoptimized := stringMatcherFromRegexpInternal(parsed) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index a880465969..eb79f7be21 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -171,7 +171,7 @@ type Regexp struct { // NewRegexp creates a new anchored Regexp and returns an error if the // passed-in regular expression does not compile. func NewRegexp(s string) (Regexp, error) { - regex, err := regexp.Compile("^(?:" + s + ")$") + regex, err := regexp.Compile("^(?s:" + s + ")$") return Regexp{Regexp: regex}, err } @@ -218,8 +218,8 @@ func (re Regexp) String() string { } str := re.Regexp.String() - // Trim the anchor `^(?:` prefix and `)$` suffix. - return str[4 : len(str)-2] + // Trim the anchor `^(?s:` prefix and `)$` suffix. + return str[5 : len(str)-2] } // Process returns a relabeled version of the given label set. The relabel configurations @@ -277,6 +277,13 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { return false } case Replace: + // Fast path to add or delete label pair. + if val == "" && cfg.Regex == DefaultRelabelConfig.Regex && + !varInRegexTemplate(cfg.TargetLabel) && !varInRegexTemplate(cfg.Replacement) { + lb.Set(cfg.TargetLabel, cfg.Replacement) + break + } + indexes := cfg.Regex.FindStringSubmatchIndex(val) // If there is no match no replacement must take place. if indexes == nil { @@ -326,3 +333,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { return true } + +func varInRegexTemplate(template string) bool { + return strings.Contains(template, "$") +} diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index fc9952134d..0c6d41f5e3 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -569,6 +569,29 @@ func TestRelabel(t *testing.T) { }, drop: true, }, + { + input: labels.FromMap(map[string]string{ + "a": "line1\nline2", + "b": "bar", + "c": "baz", + }), + relabel: []*Config{ + { + SourceLabels: model.LabelNames{"a"}, + Regex: MustNewRegexp("line1.*line2"), + TargetLabel: "d", + Separator: ";", + Replacement: "match${1}", + Action: Replace, + }, + }, + output: labels.FromMap(map[string]string{ + "a": "line1\nline2", + "b": "bar", + "c": "baz", + "d": "match", + }), + }, } for _, test := range tests { @@ -838,6 +861,34 @@ func BenchmarkRelabel(b *testing.B) { "__scrape_timeout__", "10s", "job", "kubernetes-pods"), }, + { + name: "static label pair", + config: ` + - replacement: wwwwww + target_label: wwwwww + - replacement: yyyyyyyyyyyy + target_label: xxxxxxxxx + - replacement: xxxxxxxxx + target_label: yyyyyyyyyyyy + - source_labels: ["something"] + target_label: with_source_labels + replacement: value + - replacement: dropped + target_label: ${0} + - replacement: ${0} + target_label: dropped`, + lbls: labels.FromStrings( + "abcdefg01", "hijklmn1", + "abcdefg02", "hijklmn2", + "abcdefg03", "hijklmn3", + "abcdefg04", "hijklmn4", + "abcdefg05", "hijklmn5", + "abcdefg06", "hijklmn6", + "abcdefg07", "hijklmn7", + "abcdefg08", "hijklmn8", + "job", "foo", + ), + }, } for i := range tests { err := yaml.UnmarshalStrict([]byte(tests[i].config), &tests[i].cfgs) diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index ec00b93540..4f6ae43f73 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -111,6 +111,20 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { ) } + for k, v := range g.Labels { + if !model.LabelName(k).IsValid() || k == model.MetricNameLabel { + errs = append( + errs, fmt.Errorf("invalid label name: %s", k), + ) + } + + if !model.LabelValue(v).IsValid() { + errs = append( + errs, fmt.Errorf("invalid label value: %s", v), + ) + } + } + set[g.Name] = struct{}{} for i, r := range g.Rules { @@ -143,10 +157,11 @@ type RuleGroup struct { EvaluationDelay *model.Duration `yaml:"evaluation_delay,omitempty"` QueryOffset *model.Duration `yaml:"query_offset,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` - SourceTenants []string `yaml:"source_tenants,omitempty"` - AlignEvaluationTimeOnInterval bool `yaml:"align_evaluation_time_on_interval,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []RuleNode `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` + SourceTenants []string `yaml:"source_tenants,omitempty"` + AlignEvaluationTimeOnInterval bool `yaml:"align_evaluation_time_on_interval,omitempty"` } // Rule describes an alerting or recording rule. diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index 789234e78a..a12a86e73e 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -87,9 +87,8 @@ func TestParseFileFailure(t *testing.T) { for _, c := range table { _, errs := ParseFile(filepath.Join("testdata", c.filename)) - require.NotNil(t, errs, "Expected error parsing %s but got none", c.filename) - require.Error(t, errs[0]) - require.Containsf(t, errs[0].Error(), c.errMsg, "Expected error for %s.", c.filename) + require.NotEmpty(t, errs, "Expected error parsing %s but got none", c.filename) + require.ErrorContainsf(t, errs[0], c.errMsg, "Expected error for %s.", c.filename) } } @@ -110,6 +109,23 @@ groups: severity: "page" annotations: summary: "Instance {{ $labels.instance }} down" +`, + shouldPass: true, + }, + { + ruleString: ` +groups: +- name: example + labels: + team: myteam + rules: + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: "page" + annotations: + summary: "Instance {{ $labels.instance }} down" `, shouldPass: true, }, @@ -261,8 +277,7 @@ func TestError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := tt.error.Error() - require.Equal(t, tt.want, got) + require.EqualError(t, tt.error, tt.want) }) } } @@ -310,8 +325,7 @@ func TestWrappedError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := tt.wrappedError.Error() - require.Equal(t, tt.want, got) + require.EqualError(t, tt.wrappedError, tt.want) }) } } diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go new file mode 100644 index 0000000000..bd0d5089ac --- /dev/null +++ b/model/textparse/benchmark_test.go @@ -0,0 +1,185 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "testing" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/labels" + + "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" +) + +type newParser func([]byte, *labels.SymbolTable) Parser + +var newTestParserFns = map[string]newParser{ + "promtext": NewPromParser, + "promproto": func(b []byte, st *labels.SymbolTable) Parser { + return NewProtobufParser(b, true, st) + }, + "omtext": func(b []byte, st *labels.SymbolTable) Parser { + return NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) + }, + "omtext_with_nhcb": func(b []byte, st *labels.SymbolTable) Parser { + p := NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) + return NewNHCBParser(p, st, false) + }, +} + +// BenchmarkParse benchmarks parsing, mimicking how scrape/scrape.go#append use it. +// Typically used as follows: +/* + export bench=v1 && go test ./model/textparse/... \ + -run '^$' -bench '^BenchmarkParse' \ + -benchtime 2s -count 6 -cpu 2 -benchmem -timeout 999m \ + | tee ${bench}.txt +*/ +// For profiles, add -memprofile=${bench}.mem.pprof -cpuprofile=${bench}.cpu.pprof +// options. +// +// NOTE(bwplotka): Previous iterations of this benchmark had different cases for isolated +// Series, Series+Metrics with and without reuse, Series+CT. Those cases are sometimes +// good to know if you are working on a certain optimization, but it does not +// make sense to persist such cases for everybody (e.g. for CI one day). +// For local iteration, feel free to adjust cases/comment out code etc. +// +// NOTE(bwplotka): Do not try to conclude "what parser (OM, proto, prom) is the fastest" +// as the testdata has different amount and type of metrics and features (e.g. exemplars). +func BenchmarkParse(b *testing.B) { + for _, bcase := range []struct { + dataFile string // Localized to "./testdata". + dataProto []byte + parser string + + compareToExpfmtFormat expfmt.FormatType + }{ + {dataFile: "promtestdata.txt", parser: "promtext", compareToExpfmtFormat: expfmt.TypeTextPlain}, + {dataFile: "promtestdata.nometa.txt", parser: "promtext", compareToExpfmtFormat: expfmt.TypeTextPlain}, + + // We don't pass compareToExpfmtFormat: expfmt.TypeProtoDelim as expfmt does not support GAUGE_HISTOGRAM, see https://github.com/prometheus/common/issues/430. + {dataProto: createTestProtoBuf(b).Bytes(), parser: "promproto"}, + + // We don't pass compareToExpfmtFormat: expfmt.TypeOpenMetrics as expfmt does not support OM exemplars, see https://github.com/prometheus/common/issues/703. + {dataFile: "omtestdata.txt", parser: "omtext"}, + {dataFile: "promtestdata.txt", parser: "omtext"}, // Compare how omtext parser deals with Prometheus text format vs promtext. + + // NHCB. + {dataFile: "omhistogramdata.txt", parser: "omtext"}, // Measure OM parser baseline for histograms. + {dataFile: "omhistogramdata.txt", parser: "omtext_with_nhcb"}, // Measure NHCB over OM parser. + } { + var buf []byte + dataCase := bcase.dataFile + if len(bcase.dataProto) > 0 { + dataCase = "createTestProtoBuf()" + buf = bcase.dataProto + } else { + f, err := os.Open(filepath.Join("testdata", bcase.dataFile)) + require.NoError(b, err) + b.Cleanup(func() { + _ = f.Close() + }) + buf, err = io.ReadAll(f) + require.NoError(b, err) + } + b.Run(fmt.Sprintf("data=%v/parser=%v", dataCase, bcase.parser), func(b *testing.B) { + newParserFn := newTestParserFns[bcase.parser] + var ( + res labels.Labels + e exemplar.Exemplar + ) + + b.SetBytes(int64(len(buf))) + b.ReportAllocs() + b.ResetTimer() + + st := labels.NewSymbolTable() + for i := 0; i < b.N; i++ { + p := newParserFn(buf, st) + + Inner: + for { + t, err := p.Next() + switch t { + case EntryInvalid: + if errors.Is(err, io.EOF) { + break Inner + } + b.Fatal(err) + case EntryType: + _, _ = p.Type() + continue + case EntryHelp: + _, _ = p.Help() + continue + case EntryUnit: + _, _ = p.Unit() + continue + case EntryComment: + continue + case EntryHistogram: + _, _, _, _ = p.Histogram() + case EntrySeries: + _, _, _ = p.Series() + default: + b.Fatal("not implemented entry", t) + } + + _ = p.Metric(&res) + _ = p.CreatedTimestamp() + for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) { + } + } + } + }) + + b.Run(fmt.Sprintf("data=%v/parser=xpfmt", dataCase), func(b *testing.B) { + if bcase.compareToExpfmtFormat == expfmt.TypeUnknown { + b.Skip("compareToExpfmtFormat not set") + } + + b.SetBytes(int64(len(buf))) + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + decSamples := make(model.Vector, 0, 50) + sdec := expfmt.SampleDecoder{ + Dec: expfmt.NewDecoder(bytes.NewReader(buf), expfmt.NewFormat(bcase.compareToExpfmtFormat)), + Opts: &expfmt.DecodeOptions{ + Timestamp: model.TimeFromUnixNano(0), + }, + } + + for { + if err := sdec.Decode(&decSamples); err != nil { + if errors.Is(err, io.EOF) { + break + } + b.Fatal(err) + } + decSamples = decSamples[:0] + } + } + }) + } +} diff --git a/model/textparse/interface.go b/model/textparse/interface.go index 0b5d9281e4..2682855281 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -14,6 +14,8 @@ package textparse import ( + "errors" + "fmt" "mime" "github.com/prometheus/common/model" @@ -23,8 +25,7 @@ import ( "github.com/prometheus/prometheus/model/labels" ) -// Parser parses samples from a byte slice of samples in the official -// Prometheus and OpenMetrics text exposition formats. +// Parser parses samples from a byte slice of samples in different exposition formats. type Parser interface { // Series returns the bytes of a series with a simple float64 as a // value, the timestamp if set, and the value of the current sample. @@ -58,6 +59,8 @@ type Parser interface { // Metric writes the labels of the current sample into the passed labels. // It returns the string from which the metric was parsed. + // The values of the "le" labels of classic histograms and "quantile" labels + // of summaries should follow the OpenMetrics formatting rules. Metric(l *labels.Labels) string // Exemplar writes the exemplar of the current sample into the passed @@ -69,6 +72,8 @@ type Parser interface { // CreatedTimestamp returns the created timestamp (in milliseconds) for the // current sample. It returns nil if it is unknown e.g. if it wasn't set, // if the scrape protocol or metric type does not support created timestamps. + // Assume the CreatedTimestamp returned pointer is only valid until + // the Next iteration. CreatedTimestamp() *int64 // Next advances the parser to the next sample. @@ -76,26 +81,65 @@ type Parser interface { Next() (Entry, error) } -// New returns a new parser of the byte slice. -// -// This function always returns a valid parser, but might additionally -// return an error if the content type cannot be parsed. -func New(b []byte, contentType string, parseClassicHistograms bool, st *labels.SymbolTable) (Parser, error) { +// extractMediaType returns the mediaType of a required parser. It tries first to +// extract a valid and supported mediaType from contentType. If that fails, +// the provided fallbackType (possibly an empty string) is returned, together with +// an error. fallbackType is used as-is without further validation. +func extractMediaType(contentType, fallbackType string) (string, error) { if contentType == "" { - return NewPromParser(b, st), nil + if fallbackType == "" { + return "", errors.New("non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target") + } + return fallbackType, fmt.Errorf("non-compliant scrape target sending blank Content-Type, using fallback_scrape_protocol %q", fallbackType) } + // We have a contentType, parse it. mediaType, _, err := mime.ParseMediaType(contentType) if err != nil { - return NewPromParser(b, st), err + if fallbackType == "" { + retErr := fmt.Errorf("cannot parse Content-Type %q and no fallback_scrape_protocol for target", contentType) + return "", errors.Join(retErr, err) + } + retErr := fmt.Errorf("could not parse received Content-Type %q, using fallback_scrape_protocol %q", contentType, fallbackType) + return fallbackType, errors.Join(retErr, err) + } + + // We have a valid media type, either we recognise it and can use it + // or we have to error. + switch mediaType { + case "application/openmetrics-text", "application/vnd.google.protobuf", "text/plain": + return mediaType, nil + } + // We're here because we have no recognised mediaType. + if fallbackType == "" { + return "", fmt.Errorf("received unsupported Content-Type %q and no fallback_scrape_protocol specified for target", contentType) } + return fallbackType, fmt.Errorf("received unsupported Content-Type %q, using fallback_scrape_protocol %q", contentType, fallbackType) +} + +// New returns a new parser of the byte slice. +// +// This function no longer guarantees to return a valid parser. +// +// It only returns a valid parser if the supplied contentType and fallbackType allow. +// An error may also be returned if fallbackType had to be used or there was some +// other error parsing the supplied Content-Type. +// If the returned parser is nil then the scrape must fail. +func New(b []byte, contentType, fallbackType string, parseClassicHistograms, skipOMCTSeries bool, st *labels.SymbolTable) (Parser, error) { + mediaType, err := extractMediaType(contentType, fallbackType) + // err may be nil or something we want to warn about. + switch mediaType { case "application/openmetrics-text": - return NewOpenMetricsParser(b, st), nil + return NewOpenMetricsParser(b, st, func(o *openMetricsParserOptions) { + o.SkipCTSeries = skipOMCTSeries + }), err case "application/vnd.google.protobuf": - return NewProtobufParser(b, parseClassicHistograms, st), nil + return NewProtobufParser(b, parseClassicHistograms, st), err + case "text/plain": + return NewPromParser(b, st), err default: - return NewPromParser(b, st), nil + return nil, err } } diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index c644565628..72c8284f2d 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -14,16 +14,28 @@ package textparse import ( + "errors" + "io" "testing" + "github.com/google/go-cmp/cmp" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/testutil" ) func TestNewParser(t *testing.T) { t.Parallel() + requireNilParser := func(t *testing.T, p Parser) { + require.Nil(t, p) + } + requirePromParser := func(t *testing.T, p Parser) { require.NotNil(t, p) _, ok := p.(*PromParser) @@ -36,34 +48,83 @@ func TestNewParser(t *testing.T) { require.True(t, ok) } + requireProtobufParser := func(t *testing.T, p Parser) { + require.NotNil(t, p) + _, ok := p.(*ProtobufParser) + require.True(t, ok) + } + for name, tt := range map[string]*struct { - contentType string - validateParser func(*testing.T, Parser) - err string + contentType string + fallbackScrapeProtocol config.ScrapeProtocol + validateParser func(*testing.T, Parser) + err string }{ "empty-string": { - validateParser: requirePromParser, + validateParser: requireNilParser, + err: "non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target", + }, + "empty-string-fallback-text-plain": { + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "non-compliant scrape target sending blank Content-Type, using fallback_scrape_protocol \"text/plain\"", }, "invalid-content-type-1": { contentType: "invalid/", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "expected token after slash", }, + "invalid-content-type-1-fallback-text-plain": { + contentType: "invalid/", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "expected token after slash", + }, + "invalid-content-type-1-fallback-openmetrics": { + contentType: "invalid/", + validateParser: requireOpenMetricsParser, + fallbackScrapeProtocol: config.OpenMetricsText0_0_1, + err: "expected token after slash", + }, + "invalid-content-type-1-fallback-protobuf": { + contentType: "invalid/", + validateParser: requireProtobufParser, + fallbackScrapeProtocol: config.PrometheusProto, + err: "expected token after slash", + }, "invalid-content-type-2": { contentType: "invalid/invalid/invalid", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "unexpected content after media subtype", }, + "invalid-content-type-2-fallback-text-plain": { + contentType: "invalid/invalid/invalid", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText1_0_0, + err: "unexpected content after media subtype", + }, "invalid-content-type-3": { contentType: "/", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "no media type", }, + "invalid-content-type-3-fallback-text-plain": { + contentType: "/", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText1_0_0, + err: "no media type", + }, "invalid-content-type-4": { contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "duplicate parameter name", }, + "invalid-content-type-4-fallback-open-metrics": { + contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8", + validateParser: requireOpenMetricsParser, + fallbackScrapeProtocol: config.OpenMetricsText1_0_0, + err: "duplicate parameter name", + }, "openmetrics": { contentType: "application/openmetrics-text", validateParser: requireOpenMetricsParser, @@ -80,27 +141,129 @@ func TestNewParser(t *testing.T) { contentType: "text/plain", validateParser: requirePromParser, }, + "protobuf": { + contentType: "application/vnd.google.protobuf", + validateParser: requireProtobufParser, + }, "plain-text-with-version": { contentType: "text/plain; version=0.0.4", validateParser: requirePromParser, }, "some-other-valid-content-type": { contentType: "text/html", - validateParser: requirePromParser, + validateParser: requireNilParser, + err: "received unsupported Content-Type \"text/html\" and no fallback_scrape_protocol specified for target", + }, + "some-other-valid-content-type-fallback-text-plain": { + contentType: "text/html", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "received unsupported Content-Type \"text/html\", using fallback_scrape_protocol \"text/plain\"", }, } { t.Run(name, func(t *testing.T) { tt := tt // Copy to local variable before going parallel. t.Parallel() - p, err := New([]byte{}, tt.contentType, false, labels.NewSymbolTable()) + fallbackProtoMediaType := tt.fallbackScrapeProtocol.HeaderMediaType() + + p, err := New([]byte{}, tt.contentType, fallbackProtoMediaType, false, false, labels.NewSymbolTable()) tt.validateParser(t, p) if tt.err == "" { require.NoError(t, err) } else { - require.Error(t, err) - require.Contains(t, err.Error(), tt.err) + require.ErrorContains(t, err, tt.err) } }) } } + +// parsedEntry represents data that is parsed for each entry. +type parsedEntry struct { + // In all but EntryComment, EntryInvalid. + m string + + // In EntryHistogram. + shs *histogram.Histogram + fhs *histogram.FloatHistogram + + // In EntrySeries. + v float64 + + // In EntrySeries and EntryHistogram. + lset labels.Labels + t *int64 + es []exemplar.Exemplar + ct *int64 + + // In EntryType. + typ model.MetricType + // In EntryHelp. + help string + // In EntryUnit. + unit string + // In EntryComment. + comment string +} + +func requireEntries(t *testing.T, exp, got []parsedEntry) { + t.Helper() + + testutil.RequireEqualWithOptions(t, exp, got, []cmp.Option{ + cmp.AllowUnexported(parsedEntry{}), + }) +} + +func testParse(t *testing.T, p Parser) (ret []parsedEntry) { + t.Helper() + + for { + et, err := p.Next() + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + + var got parsedEntry + var m []byte + switch et { + case EntryInvalid: + t.Fatal("entry invalid not expected") + case EntrySeries, EntryHistogram: + if et == EntrySeries { + m, got.t, got.v = p.Series() + got.m = string(m) + } else { + m, got.t, got.shs, got.fhs = p.Histogram() + got.m = string(m) + } + + p.Metric(&got.lset) + // Parser reuses int pointer. + if ct := p.CreatedTimestamp(); ct != nil { + got.ct = int64p(*ct) + } + for e := (exemplar.Exemplar{}); p.Exemplar(&e); { + got.es = append(got.es, e) + } + case EntryType: + m, got.typ = p.Type() + got.m = string(m) + + case EntryHelp: + m, h := p.Help() + got.m = string(m) + got.help = string(h) + + case EntryUnit: + m, u := p.Unit() + got.m = string(m) + got.unit = string(u) + + case EntryComment: + got.comment = string(p.Comment()) + } + ret = append(ret, got) + } + return ret +} diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go new file mode 100644 index 0000000000..d019c327c3 --- /dev/null +++ b/model/textparse/nhcbparse.go @@ -0,0 +1,376 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "errors" + "io" + "math" + "strconv" + "strings" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/convertnhcb" +) + +type collectionState int + +const ( + stateStart collectionState = iota + stateCollecting + stateEmitting +) + +// The NHCBParser wraps a Parser and converts classic histograms to native +// histograms with custom buckets. +// +// Since Parser interface is line based, this parser needs to keep track +// of the last classic histogram series it saw to collate them into a +// single native histogram. +// +// Note: +// - Only series that have the histogram metadata type are considered for +// conversion. +// - The classic series are also returned if keepClassicHistograms is true. +type NHCBParser struct { + // The parser we're wrapping. + parser Parser + // Option to keep classic histograms along with converted histograms. + keepClassicHistograms bool + + // Labels builder. + builder labels.ScratchBuilder + + // State of the parser. + state collectionState + + // Caches the values from the underlying parser. + // For Series and Histogram. + bytes []byte + ts *int64 + value float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + // For Metric. + lset labels.Labels + metricString string + // For Type. + bName []byte + typ model.MetricType + + // Caches the entry itself if we are inserting a converted NHCB + // halfway through. + entry Entry + err error + + // Caches the values and metric for the inserted converted NHCB. + bytesNHCB []byte + hNHCB *histogram.Histogram + fhNHCB *histogram.FloatHistogram + lsetNHCB labels.Labels + exemplars []exemplar.Exemplar + ctNHCB *int64 + metricStringNHCB string + + // Collates values from the classic histogram series to build + // the converted histogram later. + tempLsetNHCB labels.Labels + tempNHCB convertnhcb.TempHistogram + tempExemplars []exemplar.Exemplar + tempExemplarCount int + tempCT *int64 + + // Remembers the last base histogram metric name (assuming it's + // a classic histogram) so we can tell if the next float series + // is part of the same classic histogram. + lastHistogramName string + lastHistogramLabelsHash uint64 + lastHistogramExponential bool + // Reused buffer for hashing labels. + hBuffer []byte +} + +func NewNHCBParser(p Parser, st *labels.SymbolTable, keepClassicHistograms bool) Parser { + return &NHCBParser{ + parser: p, + keepClassicHistograms: keepClassicHistograms, + builder: labels.NewScratchBuilderWithSymbolTable(st, 16), + tempNHCB: convertnhcb.NewTempHistogram(), + } +} + +func (p *NHCBParser) Series() ([]byte, *int64, float64) { + return p.bytes, p.ts, p.value +} + +func (p *NHCBParser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) { + if p.state == stateEmitting { + return p.bytesNHCB, p.ts, p.hNHCB, p.fhNHCB + } + return p.bytes, p.ts, p.h, p.fh +} + +func (p *NHCBParser) Help() ([]byte, []byte) { + return p.parser.Help() +} + +func (p *NHCBParser) Type() ([]byte, model.MetricType) { + return p.bName, p.typ +} + +func (p *NHCBParser) Unit() ([]byte, []byte) { + return p.parser.Unit() +} + +func (p *NHCBParser) Comment() []byte { + return p.parser.Comment() +} + +func (p *NHCBParser) Metric(l *labels.Labels) string { + if p.state == stateEmitting { + *l = p.lsetNHCB + return p.metricStringNHCB + } + *l = p.lset + return p.metricString +} + +func (p *NHCBParser) Exemplar(ex *exemplar.Exemplar) bool { + if p.state == stateEmitting { + if len(p.exemplars) == 0 { + return false + } + *ex = p.exemplars[0] + p.exemplars = p.exemplars[1:] + return true + } + return p.parser.Exemplar(ex) +} + +func (p *NHCBParser) CreatedTimestamp() *int64 { + switch p.state { + case stateStart: + if p.entry == EntrySeries || p.entry == EntryHistogram { + return p.parser.CreatedTimestamp() + } + case stateCollecting: + return p.tempCT + case stateEmitting: + return p.ctNHCB + } + return nil +} + +func (p *NHCBParser) Next() (Entry, error) { + if p.state == stateEmitting { + p.state = stateStart + if p.entry == EntrySeries { + isNHCB := p.handleClassicHistogramSeries(p.lset) + if isNHCB && !p.keepClassicHistograms { + // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. + return p.Next() + } + } + return p.entry, p.err + } + + p.entry, p.err = p.parser.Next() + if p.err != nil { + if errors.Is(p.err, io.EOF) && p.processNHCB() { + return EntryHistogram, nil + } + return EntryInvalid, p.err + } + switch p.entry { + case EntrySeries: + p.bytes, p.ts, p.value = p.parser.Series() + p.metricString = p.parser.Metric(&p.lset) + // Check the label set to see if we can continue or need to emit the NHCB. + var isNHCB bool + if p.compareLabels() { + // Labels differ. Check if we can emit the NHCB. + if p.processNHCB() { + return EntryHistogram, nil + } + isNHCB = p.handleClassicHistogramSeries(p.lset) + } else { + // Labels are the same. Check if after an exponential histogram. + if p.lastHistogramExponential { + isNHCB = false + } else { + isNHCB = p.handleClassicHistogramSeries(p.lset) + } + } + if isNHCB && !p.keepClassicHistograms { + // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. + return p.Next() + } + return p.entry, p.err + case EntryHistogram: + p.bytes, p.ts, p.h, p.fh = p.parser.Histogram() + p.metricString = p.parser.Metric(&p.lset) + p.storeExponentialLabels() + case EntryType: + p.bName, p.typ = p.parser.Type() + } + if p.processNHCB() { + return EntryHistogram, nil + } + return p.entry, p.err +} + +// Return true if labels have changed and we should emit the NHCB. +func (p *NHCBParser) compareLabels() bool { + if p.state != stateCollecting { + return false + } + if p.typ != model.MetricTypeHistogram { + // Different metric type. + return true + } + if p.lastHistogramName != convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) { + // Different metric name. + return true + } + nextHash, _ := p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + // Different label values. + return p.lastHistogramLabelsHash != nextHash +} + +// Save the label set of the classic histogram without suffix and bucket `le` label. +func (p *NHCBParser) storeClassicLabels() { + p.lastHistogramName = convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + p.lastHistogramExponential = false +} + +func (p *NHCBParser) storeExponentialLabels() { + p.lastHistogramName = p.lset.Get(labels.MetricName) + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer) + p.lastHistogramExponential = true +} + +// handleClassicHistogramSeries collates the classic histogram series to be converted to NHCB +// if it is actually a classic histogram series (and not a normal float series) and if there +// isn't already a native histogram with the same name (assuming it is always processed +// right before the classic histograms) and returns true if the collation was done. +func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { + if p.typ != model.MetricTypeHistogram { + return false + } + mName := lset.Get(labels.MetricName) + // Sanity check to ensure that the TYPE metadata entry name is the same as the base name. + if convertnhcb.GetHistogramMetricBaseName(mName) != string(p.bName) { + return false + } + switch { + case strings.HasSuffix(mName, "_bucket") && lset.Has(labels.BucketLabel): + le, err := strconv.ParseFloat(lset.Get(labels.BucketLabel), 64) + if err == nil && !math.IsNaN(le) { + p.processClassicHistogramSeries(lset, "_bucket", func(hist *convertnhcb.TempHistogram) { + hist.BucketCounts[le] = p.value + }) + return true + } + case strings.HasSuffix(mName, "_count"): + p.processClassicHistogramSeries(lset, "_count", func(hist *convertnhcb.TempHistogram) { + hist.Count = p.value + }) + return true + case strings.HasSuffix(mName, "_sum"): + p.processClassicHistogramSeries(lset, "_sum", func(hist *convertnhcb.TempHistogram) { + hist.Sum = p.value + }) + return true + } + return false +} + +func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, suffix string, updateHist func(*convertnhcb.TempHistogram)) { + if p.state != stateCollecting { + p.storeClassicLabels() + p.tempCT = p.parser.CreatedTimestamp() + p.state = stateCollecting + } + p.tempLsetNHCB = convertnhcb.GetHistogramMetricBase(lset, suffix) + p.storeExemplars() + updateHist(&p.tempNHCB) +} + +func (p *NHCBParser) storeExemplars() { + for ex := p.nextExemplarPtr(); p.parser.Exemplar(ex); ex = p.nextExemplarPtr() { + p.tempExemplarCount++ + } +} + +func (p *NHCBParser) nextExemplarPtr() *exemplar.Exemplar { + switch { + case p.tempExemplarCount == len(p.tempExemplars)-1: + // Reuse the previously allocated exemplar, it was not filled up. + case len(p.tempExemplars) == cap(p.tempExemplars): + // Let the runtime grow the slice. + p.tempExemplars = append(p.tempExemplars, exemplar.Exemplar{}) + default: + // Take the next element into use. + p.tempExemplars = p.tempExemplars[:len(p.tempExemplars)+1] + } + return &p.tempExemplars[len(p.tempExemplars)-1] +} + +func (p *NHCBParser) swapExemplars() { + p.exemplars = p.tempExemplars[:p.tempExemplarCount] + p.tempExemplars = p.tempExemplars[:0] + p.tempExemplarCount = 0 +} + +// processNHCB converts the collated classic histogram series to NHCB and caches the info +// to be returned to callers. Retruns true if the conversion was successful. +func (p *NHCBParser) processNHCB() bool { + if p.state != stateCollecting { + return false + } + ub := make([]float64, 0, len(p.tempNHCB.BucketCounts)) + for b := range p.tempNHCB.BucketCounts { + ub = append(ub, b) + } + upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(ub, false) + fhBase := hBase.ToFloat(nil) + h, fh := convertnhcb.NewHistogram(p.tempNHCB, upperBounds, hBase, fhBase) + if h != nil { + if err := h.Validate(); err != nil { + return false + } + p.hNHCB = h + p.fhNHCB = nil + } else if fh != nil { + if err := fh.Validate(); err != nil { + return false + } + p.hNHCB = nil + p.fhNHCB = fh + } + p.metricStringNHCB = p.tempLsetNHCB.Get(labels.MetricName) + strings.ReplaceAll(p.tempLsetNHCB.DropMetricName().String(), ", ", ",") + p.bytesNHCB = []byte(p.metricStringNHCB) + p.lsetNHCB = p.tempLsetNHCB + p.swapExemplars() + p.ctNHCB = p.tempCT + p.tempNHCB = convertnhcb.NewTempHistogram() + p.state = stateEmitting + p.tempCT = nil + return true +} diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go new file mode 100644 index 0000000000..6152a85038 --- /dev/null +++ b/model/textparse/nhcbparse_test.go @@ -0,0 +1,939 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "bytes" + "encoding/binary" + "strconv" + "testing" + + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" +) + +func TestNHCBParserOnOMParser(t *testing.T) { + // The input is taken originally from TestOpenMetricsParse, with additional tests for the NHCBParser. + + input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +# UNIT go_gc_duration_seconds seconds +go_gc_duration_seconds{quantile="0"} 4.9351e-05 +go_gc_duration_seconds{quantile="0.25"} 7.424100000000001e-05 +go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 +# HELP nohelp1 +# HELP help2 escape \ \n \\ \" \x chars +# UNIT nounit +go_gc_duration_seconds{quantile="1.0",a="b"} 8.3835e-05 +go_gc_duration_seconds_count 99 +some:aggregate:rate5m{a_b="c"} 1 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 33 123.123 +# TYPE hh histogram +hh_bucket{le="+Inf"} 1 +# TYPE gh gaugehistogram +gh_bucket{le="+Inf"} 1 +# TYPE hhh histogram +hhh_bucket{le="+Inf"} 1 # {id="histogram-bucket-test"} 4 +hhh_count 1 # {id="histogram-count-test"} 4 +# TYPE ggh gaugehistogram +ggh_bucket{le="+Inf"} 1 # {id="gaugehistogram-bucket-test",xx="yy"} 4 123.123 +ggh_count 1 # {id="gaugehistogram-count-test",xx="yy"} 4 123.123 +# TYPE smr_seconds summary +smr_seconds_count 2.0 # {id="summary-count-test"} 1 123.321 +smr_seconds_sum 42.0 # {id="summary-sum-test"} 1 123.321 +# TYPE ii info +ii{foo="bar"} 1 +# TYPE ss stateset +ss{ss="foo"} 1 +ss{ss="bar"} 0 +ss{A="a"} 0 +# TYPE un unknown +_metric_starting_with_underscore 1 +testmetric{_label_starting_with_underscore="foo"} 1 +testmetric{label="\"bar\""} 1 +# HELP foo Counter with and without labels to certify CT is parsed for both cases +# TYPE foo counter +foo_total 17.0 1520879607.789 # {id="counter-test"} 5 +foo_created 1520872607.123 +foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 +foo_created{a="b"} 1520872607.123 +# HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far +# TYPE bar summary +bar_count 17.0 +bar_sum 324789.3 +bar{quantile="0.95"} 123.7 +bar{quantile="0.99"} 150.0 +bar_created 1520872608.124 +# HELP baz Histogram with the same objective as above's summary +# TYPE baz histogram +baz_bucket{le="0.0"} 0 +baz_bucket{le="+Inf"} 17 +baz_count 17 +baz_sum 324789.3 +baz_created 1520872609.125 +# HELP fizz_created Gauge which shouldn't be parsed as CT +# TYPE fizz_created gauge +fizz_created 17.0 +# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 1 +something_bucket{le="+Inf"} 18 +something_count{a="b"} 9 +something_sum{a="b"} 42123.0 +something_bucket{a="b",le="0.0"} 8 +something_bucket{a="b",le="+Inf"} 9 +something_created{a="b"} 1520430002 +# HELP yum Summary with _created between sum and quantiles +# TYPE yum summary +yum_count 20 +yum_sum 324789.5 +yum_created 1520430003 +yum{quantile="0.95"} 123.7 +yum{quantile="0.99"} 150.0 +# HELP foobar Summary with _created as the first line +# TYPE foobar summary +foobar_count 21 +foobar_created 1520430004 +foobar_sum 324789.6 +foobar{quantile="0.95"} 123.8 +foobar{quantile="0.99"} 150.1` + + input += "\n# HELP metric foo\x00bar" + input += "\nnull_byte_metric{a=\"abc\x00\"} 1" + input += "\n# EOF\n" + + exp := []parsedEntry{ + { + m: "go_gc_duration_seconds", + help: "A summary of the GC invocation durations.", + }, { + m: "go_gc_duration_seconds", + typ: model.MetricTypeSummary, + }, { + m: "go_gc_duration_seconds", + unit: "seconds", + }, { + m: `go_gc_duration_seconds{quantile="0"}`, + v: 4.9351e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), + }, { + m: `go_gc_duration_seconds{quantile="0.25"}`, + v: 7.424100000000001e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.25"), + }, { + m: `go_gc_duration_seconds{quantile="0.5",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.5", "a", "b"), + }, { + m: "nohelp1", + help: "", + }, { + m: "help2", + help: "escape \\ \n \\ \" \\x chars", + }, { + m: "nounit", + unit: "", + }, { + m: `go_gc_duration_seconds{quantile="1.0",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `go_gc_duration_seconds_count`, + v: 99, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds_count"), + }, { + m: `some:aggregate:rate5m{a_b="c"}`, + v: 1, + lset: labels.FromStrings("__name__", "some:aggregate:rate5m", "a_b", "c"), + }, { + m: "go_goroutines", + help: "Number of goroutines that currently exist.", + }, { + m: "go_goroutines", + typ: model.MetricTypeGauge, + }, { + m: `go_goroutines`, + v: 33, + t: int64p(123123), + lset: labels.FromStrings("__name__", "go_goroutines"), + }, { + m: "hh", + typ: model.MetricTypeHistogram, + }, { + m: `hh{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1, + Sum: 0.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + // Custom values are empty as we do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "hh"), + }, { + m: "gh", + typ: model.MetricTypeGaugeHistogram, + }, { + m: `gh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "gh_bucket", "le", "+Inf"), + }, { + m: "hhh", + typ: model.MetricTypeHistogram, + }, { + m: `hhh{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1, + Sum: 0.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + // Custom values are empty as we do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "hhh"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + {Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + }, + }, { + m: "ggh", + typ: model.MetricTypeGaugeHistogram, + }, { + m: `ggh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}}, + }, { + m: `ggh_count`, + v: 1, + lset: labels.FromStrings("__name__", "ggh_count"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}}, + }, { + m: "smr_seconds", + typ: model.MetricTypeSummary, + }, { + m: `smr_seconds_count`, + v: 2, + lset: labels.FromStrings("__name__", "smr_seconds_count"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}}, + }, { + m: `smr_seconds_sum`, + v: 42, + lset: labels.FromStrings("__name__", "smr_seconds_sum"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}}, + }, { + m: "ii", + typ: model.MetricTypeInfo, + }, { + m: `ii{foo="bar"}`, + v: 1, + lset: labels.FromStrings("__name__", "ii", "foo", "bar"), + }, { + m: "ss", + typ: model.MetricTypeStateset, + }, { + m: `ss{ss="foo"}`, + v: 1, + lset: labels.FromStrings("__name__", "ss", "ss", "foo"), + }, { + m: `ss{ss="bar"}`, + v: 0, + lset: labels.FromStrings("__name__", "ss", "ss", "bar"), + }, { + m: `ss{A="a"}`, + v: 0, + lset: labels.FromStrings("A", "a", "__name__", "ss"), + }, { + m: "un", + typ: model.MetricTypeUnknown, + }, { + m: "_metric_starting_with_underscore", + v: 1, + lset: labels.FromStrings("__name__", "_metric_starting_with_underscore"), + }, { + m: "testmetric{_label_starting_with_underscore=\"foo\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "_label_starting_with_underscore", "foo"), + }, { + m: "testmetric{label=\"\\\"bar\\\"\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: "foo", + help: "Counter with and without labels to certify CT is parsed for both cases", + }, { + m: "foo", + typ: model.MetricTypeCounter, + }, { + m: "foo_total", + v: 17, + lset: labels.FromStrings("__name__", "foo_total"), + t: int64p(1520879607789), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, + ct: int64p(1520872607123), + }, { + m: `foo_total{a="b"}`, + v: 17.0, + lset: labels.FromStrings("__name__", "foo_total", "a", "b"), + t: int64p(1520879607789), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, + ct: int64p(1520872607123), + }, { + m: "bar", + help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", + }, { + m: "bar", + typ: model.MetricTypeSummary, + }, { + m: "bar_count", + v: 17.0, + lset: labels.FromStrings("__name__", "bar_count"), + ct: int64p(1520872608124), + }, { + m: "bar_sum", + v: 324789.3, + lset: labels.FromStrings("__name__", "bar_sum"), + ct: int64p(1520872608124), + }, { + m: `bar{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"), + ct: int64p(1520872608124), + }, { + m: `bar{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"), + ct: int64p(1520872608124), + }, { + m: "baz", + help: "Histogram with the same objective as above's summary", + }, { + m: "baz", + typ: model.MetricTypeHistogram, + }, { + m: `baz{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 17, + Sum: 324789.3, + PositiveSpans: []histogram.Span{{Offset: 1, Length: 1}}, // The first bucket has 0 count so we don't store it and Offset is 1. + PositiveBuckets: []int64{17}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "baz"), + ct: int64p(1520872609125), + }, { + m: "fizz_created", + help: "Gauge which shouldn't be parsed as CT", + }, { + m: "fizz_created", + typ: model.MetricTypeGauge, + }, { + m: `fizz_created`, + v: 17, + lset: labels.FromStrings("__name__", "fizz_created"), + }, { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 18, + Sum: 324789.4, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{1, 16}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something"), + ct: int64p(1520430001000), + }, { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{8, -7}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + ct: int64p(1520430002000), + }, { + m: "yum", + help: "Summary with _created between sum and quantiles", + }, { + m: "yum", + typ: model.MetricTypeSummary, + }, { + m: `yum_count`, + v: 20, + lset: labels.FromStrings("__name__", "yum_count"), + ct: int64p(1520430003000), + }, { + m: `yum_sum`, + v: 324789.5, + lset: labels.FromStrings("__name__", "yum_sum"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.95"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.99"), + ct: int64p(1520430003000), + }, { + m: "foobar", + help: "Summary with _created as the first line", + }, { + m: "foobar", + typ: model.MetricTypeSummary, + }, { + m: `foobar_count`, + v: 21, + lset: labels.FromStrings("__name__", "foobar_count"), + ct: int64p(1520430004000), + }, { + m: `foobar_sum`, + v: 324789.6, + lset: labels.FromStrings("__name__", "foobar_sum"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.95"}`, + v: 123.8, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.95"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.99"}`, + v: 150.1, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.99"), + ct: int64p(1520430004000), + }, { + m: "metric", + help: "foo\x00bar", + }, { + m: "null_byte_metric{a=\"abc\x00\"}", + v: 1, + lset: labels.FromStrings("__name__", "null_byte_metric", "a", "abc\x00"), + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} + +func TestNHCBParserOMParser_MultipleHistograms(t *testing.T) { + // The input is taken originally from TestOpenMetricsParse, with additional tests for the NHCBParser. + + input := `# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_bucket{le="0.0"} 1 # {id="something-test"} -2.0 +something_bucket{le="1.0"} 16 # {id="something-test"} 0.5 +something_bucket{le="+Inf"} 18 # {id="something-test"} 8 +something_count{a="b"} 9 +something_sum{a="b"} 42123.0 +something_bucket{a="b",le="0.0"} 8 # {id="something-test"} 0.0 123.321 +something_bucket{a="b",le="1.0"} 8 +something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 +# EOF +` + + exp := []parsedEntry{ + { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 18, + Sum: 324789.4, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 3}}, + PositiveBuckets: []int64{1, 14, -13}, + CustomValues: []float64{0.0, 1.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "something-test"), Value: -2.0}, + {Labels: labels.FromStrings("id", "something-test"), Value: 0.5}, + {Labels: labels.FromStrings("id", "something-test"), Value: 8.0}, + }, + }, { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}, {Offset: 1, Length: 1}}, + PositiveBuckets: []int64{8, -7}, + CustomValues: []float64{0.0, 1.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "something-test"), Value: 0.0, HasTs: true, Ts: 123321}, + {Labels: labels.FromStrings("id", "something-test"), Value: 2e100, HasTs: true, Ts: 123000}, + }, + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} + +// Verify the requirement tables from +// https://github.com/prometheus/prometheus/issues/13532 . +// "classic" means the option "always_scrape_classic_histograms". +// "nhcb" means the option "convert_classic_histograms_to_nhcb". +// +// Case 1. Only classic histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | YES | NO | NO |. +// | classic=true, nhcb=false | YES | NO | NO |. +// | classic=false, nhcb=true | NO | NO | YES |. +// | classic=true, nhcb=true | YES | NO | YES |. +// +// Case 2. Both classic and exponential histograms are exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | YES | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | YES | YES | NO |. +// +// Case 3. Only exponential histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | NO | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | NO | YES | NO |. +func TestNHCBParser_NoNHCBWhenExponential(t *testing.T) { + type requirement struct { + expectClassic bool + expectExponential bool + expectNHCB bool + } + + cases := []map[string]requirement{ + // Case 1. + { + "classic=false, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: false, expectNHCB: true}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: false, expectNHCB: true}, + }, + // Case 2. + { + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: true, expectNHCB: false}, + }, + // Case 3. + { + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + }, + } + + // Create parser from keep classic option. + type parserFactory func(bool) Parser + + type testCase struct { + name string + parser parserFactory + classic bool + nhcb bool + exp []parsedEntry + } + + type parserOptions struct { + useUTF8sep bool + hasCreatedTimeStamp bool + } + // Defines the parser name, the Parser factory and the test cases + // supported by the parser and parser options. + parsers := []func() (string, parserFactory, []int, parserOptions){ + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + inputBuf := createTestProtoBufHistogram(t) + return NewProtobufParser(inputBuf.Bytes(), keepClassic, labels.NewSymbolTable()) + } + return "ProtoBuf", factory, []int{1, 2, 3}, parserOptions{useUTF8sep: true, hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestOpenMetricsHistogram() + return NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + } + return "OpenMetrics", factory, []int{1}, parserOptions{hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestPromHistogram() + return NewPromParser([]byte(input), labels.NewSymbolTable()) + } + return "Prometheus", factory, []int{1}, parserOptions{} + }, + } + + testCases := []testCase{} + for _, parser := range parsers { + for _, classic := range []bool{false, true} { + for _, nhcb := range []bool{false, true} { + parserName, parser, supportedCases, options := parser() + requirementName := "classic=" + strconv.FormatBool(classic) + ", nhcb=" + strconv.FormatBool(nhcb) + tc := testCase{ + name: "parser=" + parserName + ", " + requirementName, + parser: parser, + classic: classic, + nhcb: nhcb, + exp: []parsedEntry{}, + } + for _, caseNumber := range supportedCases { + caseI := cases[caseNumber-1] + req, ok := caseI[requirementName] + require.True(t, ok, "Case %d does not have requirement %s", caseNumber, requirementName) + metric := "test_histogram" + strconv.Itoa(caseNumber) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + help: "Test histogram " + strconv.Itoa(caseNumber), + }) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + typ: model.MetricTypeHistogram, + }) + + var ct *int64 + if options.hasCreatedTimeStamp { + ct = int64p(1000) + } + + var bucketForMetric func(string) string + if options.useUTF8sep { + bucketForMetric = func(s string) string { + return "_bucket\xffle\xff" + s + } + } else { + bucketForMetric = func(s string) string { + return "_bucket{le=\"" + s + "\"}" + } + } + + if req.expectExponential { + // Always expect exponential histogram first. + exponentialSeries := []parsedEntry{ + { + m: metric, + shs: &histogram.Histogram{ + Schema: 3, + Count: 175, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 2, + PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, + NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, exponentialSeries...) + } + if req.expectClassic { + // Always expect classic histogram series after exponential. + classicSeries := []parsedEntry{ + { + m: metric + "_count", + v: 175, + lset: labels.FromStrings("__name__", metric+"_count"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + "_sum", + v: 0.0008280461746287094, + lset: labels.FromStrings("__name__", metric+"_sum"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0004899999999999998"), + v: 2, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0004899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0003899999999999998"), + v: 4, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0003899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0002899999999999998"), + v: 16, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0002899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("+Inf"), + v: 175, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "+Inf"), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, classicSeries...) + } + if req.expectNHCB { + // Always expect NHCB series after classic. + nhcbSeries := []parsedEntry{ + { + m: metric + "{}", + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 175, + Sum: 0.0008280461746287094, + PositiveSpans: []histogram.Span{{Length: 4}}, + PositiveBuckets: []int64{2, 0, 10, 147}, + CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, nhcbSeries...) + } + } + testCases = append(testCases, tc) + } + } + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := tc.parser(tc.classic) + if tc.nhcb { + p = NewNHCBParser(p, labels.NewSymbolTable(), tc.classic) + } + got := testParse(t, p) + requireEntries(t, tc.exp, got) + }) + } +} + +func createTestProtoBufHistogram(t *testing.T) *bytes.Buffer { + testMetricFamilies := []string{`name: "test_histogram1" +help: "Test histogram 1" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + > + timestamp_ms: 1234568 +>`, `name: "test_histogram2" +help: "Test histogram 2" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +>`, `name: "test_histogram3" +help: "Test histogram 3" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +> +`} + + varintBuf := make([]byte, binary.MaxVarintLen32) + buf := &bytes.Buffer{} + + for _, tmf := range testMetricFamilies { + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(tmf, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + } + + return buf +} + +func createTestOpenMetricsHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234.568 +test_histogram1_sum 0.0008280461746287094 1234.568 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234.568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234.568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234.568 +test_histogram1_bucket{le="+Inf"} 175 1234.568 +test_histogram1_created 1 +# EOF` +} + +func createTestPromHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234568 +test_histogram1_sum 0.0008280461746287094 1234768 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234568 +test_histogram1_bucket{le="+Inf"} 175 1234568` +} diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 5f0415d3ee..3ae9c7ddfc 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -17,13 +17,16 @@ package textparse import ( + "bytes" "errors" "fmt" "io" "math" + "strconv" "strings" "unicode/utf8" + "github.com/cespare/xxhash/v2" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/exemplar" @@ -72,15 +75,16 @@ func (l *openMetricsLexer) Error(es string) { // OpenMetrics text exposition format. // This is based on the working draft https://docs.google.com/document/u/1/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit type OpenMetricsParser struct { - l *openMetricsLexer - builder labels.ScratchBuilder - series []byte - text []byte - mtype model.MetricType - val float64 - ts int64 - hasTS bool - start int + l *openMetricsLexer + builder labels.ScratchBuilder + series []byte + mfNameLen int // length of metric family name to get from series. + text []byte + mtype model.MetricType + val float64 + ts int64 + hasTS bool + start int // offsets is a list of offsets into series that describe the positions // of the metric name and label names and values for this series. // p.offsets[0] is the start character of the metric name. @@ -95,7 +99,15 @@ type OpenMetricsParser struct { exemplarTs int64 hasExemplarTs bool - skipCTSeries bool + // Created timestamp parsing state. + ct int64 + ctHashSet uint64 + // ignoreExemplar instructs the parser to not overwrite exemplars (to keep them while peeking ahead). + ignoreExemplar bool + // visitedMFName is the metric family name of the last visited metric when peeking ahead + // for _created series during the execution of the CreatedTimestamp method. + visitedMFName []byte + skipCTSeries bool } type openMetricsParserOptions struct { @@ -201,7 +213,7 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) p.builder.Add(label, value) } @@ -252,87 +264,144 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool { // CreatedTimestamp returns the created timestamp for a current Metric if exists or nil. // NOTE(Maniktherana): Might use additional CPU/mem resources due to deep copy of parser required for peeking given 1.0 OM specification on _created series. func (p *OpenMetricsParser) CreatedTimestamp() *int64 { - if !TypeRequiresCT(p.mtype) { + if !typeRequiresCT(p.mtype) { // Not a CT supported metric type, fast path. + p.ctHashSet = 0 // Use ctHashSet as a single way of telling "empty cache" return nil } var ( - currLset labels.Labels - buf []byte - peekWithoutNameLsetHash uint64 + buf []byte + currName []byte ) - p.Metric(&currLset) - currFamilyLsetHash, buf := currLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile") - // Search for the _created line for the currFamilyLsetHash using ephemeral parser until - // we see EOF or new metric family. We have to do it as we don't know where (and if) - // that CT line is. - // TODO(bwplotka): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. - peek := deepCopy(p) + if len(p.series) > 1 && p.series[0] == '{' && p.series[1] == '"' { + // special case for UTF-8 encoded metric family names. + currName = p.series[p.offsets[0]-p.start : p.mfNameLen+2] + } else { + currName = p.series[p.offsets[0]-p.start : p.mfNameLen] + } + + currHash := p.seriesHash(&buf, currName) + // Check cache, perhaps we fetched something already. + if currHash == p.ctHashSet && p.ct > 0 { + return &p.ct + } + + // Create a new lexer to reset the parser once this function is done executing. + resetLexer := &openMetricsLexer{ + b: p.l.b, + i: p.l.i, + start: p.l.start, + err: p.l.err, + state: p.l.state, + } + + p.skipCTSeries = false + + p.ignoreExemplar = true + savedStart := p.start + defer func() { + p.ignoreExemplar = false + p.start = savedStart + p.l = resetLexer + }() + for { - eType, err := peek.Next() + eType, err := p.Next() if err != nil { - // This means peek will give error too later on, so def no CT line found. + // This means p.Next() will give error too later on, so def no CT line found. // This might result in partial scrape with wrong/missing CT, but only // spec improvement would help. - // TODO(bwplotka): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. + // TODO: Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. + p.resetCTParseValues() return nil } if eType != EntrySeries { // Assume we hit different family, no CT line found. + p.resetCTParseValues() return nil } - var peekedLset labels.Labels - peek.Metric(&peekedLset) - peekedName := peekedLset.Get(model.MetricNameLabel) - if !strings.HasSuffix(peekedName, "_created") { + peekedName := p.series[p.offsets[0]-p.start : p.offsets[1]-p.start] + if len(peekedName) < 8 || string(peekedName[len(peekedName)-8:]) != "_created" { // Not a CT line, search more. continue } - // We got a CT line here, but let's search if CT line is actually for our series, edge case. - peekWithoutNameLsetHash, _ = peekedLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile") - if peekWithoutNameLsetHash != currFamilyLsetHash { - // CT line for a different series, for our series no CT. + // Remove _created suffix. + peekedHash := p.seriesHash(&buf, peekedName[:len(peekedName)-8]) + if peekedHash != currHash { + // Found CT line for a different series, for our series no CT. + p.resetCTParseValues() return nil } - ct := int64(peek.val) + + // All timestamps in OpenMetrics are Unix Epoch in seconds. Convert to milliseconds. + // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#timestamps + ct := int64(p.val * 1000.0) + p.setCTParseValues(ct, currHash, currName, true) return &ct } } -// TypeRequiresCT returns true if the metric type requires a _created timestamp. -func TypeRequiresCT(t model.MetricType) bool { - switch t { - case model.MetricTypeCounter, model.MetricTypeSummary, model.MetricTypeHistogram: - return true - default: - return false +var ( + leBytes = []byte{108, 101} + quantileBytes = []byte{113, 117, 97, 110, 116, 105, 108, 101} +) + +// seriesHash generates a hash based on the metric family name and the offsets +// of label names and values from the parsed OpenMetrics data. It skips quantile +// and le labels for summaries and histograms respectively. +func (p *OpenMetricsParser) seriesHash(offsetsArr *[]byte, metricFamilyName []byte) uint64 { + // Iterate through p.offsets to find the label names and values. + for i := 2; i < len(p.offsets); i += 4 { + lStart := p.offsets[i] - p.start + lEnd := p.offsets[i+1] - p.start + label := p.series[lStart:lEnd] + // Skip quantile and le labels for summaries and histograms. + if p.mtype == model.MetricTypeSummary && bytes.Equal(label, quantileBytes) { + continue + } + if p.mtype == model.MetricTypeHistogram && bytes.Equal(label, leBytes) { + continue + } + *offsetsArr = append(*offsetsArr, p.series[lStart:lEnd]...) + vStart := p.offsets[i+2] - p.start + vEnd := p.offsets[i+3] - p.start + *offsetsArr = append(*offsetsArr, p.series[vStart:vEnd]...) } + + *offsetsArr = append(*offsetsArr, metricFamilyName...) + hashedOffsets := xxhash.Sum64(*offsetsArr) + + // Reset the offsets array for later reuse. + *offsetsArr = (*offsetsArr)[:0] + return hashedOffsets } -// deepCopy creates a copy of a parser without re-using the slices' original memory addresses. -func deepCopy(p *OpenMetricsParser) OpenMetricsParser { - newB := make([]byte, len(p.l.b)) - copy(newB, p.l.b) +// setCTParseValues sets the parser to the state after CreatedTimestamp method was called and CT was found. +// This is useful to prevent re-parsing the same series again and early return the CT value. +func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool) { + p.ct = ct + p.ctHashSet = ctHashSet + p.visitedMFName = mfName + p.skipCTSeries = skipCTSeries // Do we need to set it? +} - newLexer := &openMetricsLexer{ - b: newB, - i: p.l.i, - start: p.l.start, - err: p.l.err, - state: p.l.state, - } +// resetCtParseValues resets the parser to the state before CreatedTimestamp method was called. +func (p *OpenMetricsParser) resetCTParseValues() { + p.ctHashSet = 0 + p.skipCTSeries = true +} - newParser := OpenMetricsParser{ - l: newLexer, - builder: p.builder, - mtype: p.mtype, - val: p.val, - skipCTSeries: false, +// typeRequiresCT returns true if the metric type requires a _created timestamp. +func typeRequiresCT(t model.MetricType) bool { + switch t { + case model.MetricTypeCounter, model.MetricTypeSummary, model.MetricTypeHistogram: + return true + default: + return false } - return newParser } // nextToken returns the next token from the openMetricsLexer. @@ -356,10 +425,12 @@ func (p *OpenMetricsParser) Next() (Entry, error) { p.start = p.l.i p.offsets = p.offsets[:0] - p.eOffsets = p.eOffsets[:0] - p.exemplar = p.exemplar[:0] - p.exemplarVal = 0 - p.hasExemplarTs = false + if !p.ignoreExemplar { + p.eOffsets = p.eOffsets[:0] + p.exemplar = p.exemplar[:0] + p.exemplarVal = 0 + p.hasExemplarTs = false + } switch t := p.nextToken(); t { case tEOFWord: @@ -378,6 +449,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) { mStart++ mEnd-- } + p.mfNameLen = mEnd - mStart p.offsets = append(p.offsets, mStart, mEnd) default: return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) @@ -483,6 +555,16 @@ func (p *OpenMetricsParser) Next() (Entry, error) { func (p *OpenMetricsParser) parseComment() error { var err error + + if p.ignoreExemplar { + for t := p.nextToken(); t != tLinebreak; t = p.nextToken() { + if t == tEOF { + return errors.New("data does not end with # EOF") + } + } + return nil + } + // Parse the labels. p.eOffsets, err = p.parseLVals(p.eOffsets, true) if err != nil { @@ -591,10 +673,9 @@ func (p *OpenMetricsParser) parseLVals(offsets []int, isExemplar bool) ([]int, e // isCreatedSeries returns true if the current series is a _created series. func (p *OpenMetricsParser) isCreatedSeries() bool { - var newLbs labels.Labels - p.Metric(&newLbs) - name := newLbs.Get(model.MetricNameLabel) - if TypeRequiresCT(p.mtype) && strings.HasSuffix(name, "_created") { + metricName := p.series[p.offsets[0]-p.start : p.offsets[1]-p.start] + // check length so the metric is longer than len("_created") + if typeRequiresCT(p.mtype) && len(metricName) >= 8 && string(metricName[len(metricName)-8:]) == "_created" { return true } return false @@ -663,3 +744,15 @@ func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error } return val, nil } + +// normalizeFloatsInLabelValues ensures that values of the "le" labels of classic histograms and "quantile" labels +// of summaries follow OpenMetrics formatting rules. +func normalizeFloatsInLabelValues(t model.MetricType, l, v string) string { + if (t == model.MetricTypeSummary && l == model.QuantileLabel) || (t == model.MetricTypeHistogram && l == model.BucketLabel) { + f, err := strconv.ParseFloat(v, 64) + if err == nil { + return formatOpenMetricsFloat(f) + } + } + return v +} diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index cadaabc99f..9c3c679ab5 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -14,7 +14,7 @@ package textparse import ( - "errors" + "fmt" "io" "testing" @@ -69,32 +69,57 @@ testmetric{label="\"bar\""} 1 # HELP foo Counter with and without labels to certify CT is parsed for both cases # TYPE foo counter foo_total 17.0 1520879607.789 # {id="counter-test"} 5 -foo_created 1000 +foo_created 1520872607.123 foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 -foo_created{a="b"} 1000 +foo_created{a="b"} 1520872607.123 +foo_total{le="c"} 21.0 +foo_created{le="c"} 1520872621.123 +foo_total{le="1"} 10.0 # HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far # TYPE bar summary bar_count 17.0 bar_sum 324789.3 bar{quantile="0.95"} 123.7 bar{quantile="0.99"} 150.0 -bar_created 1520430000 +bar_created 1520872608.124 # HELP baz Histogram with the same objective as above's summary # TYPE baz histogram baz_bucket{le="0.0"} 0 baz_bucket{le="+Inf"} 17 baz_count 17 baz_sum 324789.3 -baz_created 1520430000 +baz_created 1520872609.125 # HELP fizz_created Gauge which shouldn't be parsed as CT # TYPE fizz_created gauge -fizz_created 17.0` +fizz_created 17.0 +# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 1 +something_bucket{le="1"} 2 +something_bucket{le="+Inf"} 18 +# HELP yum Summary with _created between sum and quantiles +# TYPE yum summary +yum_count 20 +yum_sum 324789.5 +yum_created 1520430003 +yum{quantile="0.95"} 123.7 +yum{quantile="0.99"} 150.0 +# HELP foobar Summary with _created as the first line +# TYPE foobar summary +foobar_count 21 +foobar_created 1520430004 +foobar_sum 324789.6 +foobar{quantile="0.95"} 123.8 +foobar{quantile="0.99"} 150.1` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" input += "\n# EOF\n" - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go_gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -107,7 +132,7 @@ fizz_created 17.0` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25"}`, v: 7.424100000000001e-05, @@ -169,12 +194,16 @@ fizz_created 17.0` m: `hhh_bucket{le="+Inf"}`, v: 1, lset: labels.FromStrings("__name__", "hhh_bucket", "le", "+Inf"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + }, }, { m: `hhh_count`, v: 1, lset: labels.FromStrings("__name__", "hhh_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + }, }, { m: "ggh", typ: model.MetricTypeGaugeHistogram, @@ -182,12 +211,16 @@ fizz_created 17.0` m: `ggh_bucket{le="+Inf"}`, v: 1, lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + }, }, { m: `ggh_count`, v: 1, lset: labels.FromStrings("__name__", "ggh_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + }, }, { m: "smr_seconds", typ: model.MetricTypeSummary, @@ -195,12 +228,16 @@ fizz_created 17.0` m: `smr_seconds_count`, v: 2, lset: labels.FromStrings("__name__", "smr_seconds_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}, + }, }, { m: `smr_seconds_sum`, v: 42, lset: labels.FromStrings("__name__", "smr_seconds_sum"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}, + }, }, { m: "ii", typ: model.MetricTypeInfo, @@ -249,15 +286,28 @@ fizz_created 17.0` v: 17, lset: labels.FromStrings("__name__", "foo_total"), t: int64p(1520879607789), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5}, - ct: int64p(1000), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "counter-test"), Value: 5}, + }, + ct: int64p(1520872607123), }, { m: `foo_total{a="b"}`, v: 17.0, lset: labels.FromStrings("__name__", "foo_total", "a", "b"), t: int64p(1520879607789), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5}, - ct: int64p(1000), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "counter-test"), Value: 5}, + }, + ct: int64p(1520872607123), + }, { + m: `foo_total{le="c"}`, + v: 21.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "c"), + ct: int64p(1520872621123), + }, { + m: `foo_total{le="1"}`, + v: 10.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "1"), }, { m: "bar", help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", @@ -268,22 +318,22 @@ fizz_created 17.0` m: "bar_count", v: 17.0, lset: labels.FromStrings("__name__", "bar_count"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: "bar_sum", v: 324789.3, lset: labels.FromStrings("__name__", "bar_sum"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: `bar{quantile="0.95"}`, v: 123.7, lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: `bar{quantile="0.99"}`, v: 150.0, lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: "baz", help: "Histogram with the same objective as above's summary", @@ -294,22 +344,22 @@ fizz_created 17.0` m: `baz_bucket{le="0.0"}`, v: 0, lset: labels.FromStrings("__name__", "baz_bucket", "le", "0.0"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_bucket{le="+Inf"}`, v: 17, lset: labels.FromStrings("__name__", "baz_bucket", "le", "+Inf"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_count`, v: 17, lset: labels.FromStrings("__name__", "baz_count"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_sum`, v: 324789.3, lset: labels.FromStrings("__name__", "baz_sum"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: "fizz_created", help: "Gauge which shouldn't be parsed as CT", @@ -320,6 +370,89 @@ fizz_created 17.0` m: `fizz_created`, v: 17, lset: labels.FromStrings("__name__", "fizz_created"), + }, { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something_count`, + v: 18, + lset: labels.FromStrings("__name__", "something_count"), + ct: int64p(1520430001000), + }, { + m: `something_sum`, + v: 324789.4, + lset: labels.FromStrings("__name__", "something_sum"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="0.0"}`, + v: 1, + lset: labels.FromStrings("__name__", "something_bucket", "le", "0.0"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="1"}`, + v: 2, + lset: labels.FromStrings("__name__", "something_bucket", "le", "1.0"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="+Inf"}`, + v: 18, + lset: labels.FromStrings("__name__", "something_bucket", "le", "+Inf"), + ct: int64p(1520430001000), + }, { + m: "yum", + help: "Summary with _created between sum and quantiles", + }, { + m: "yum", + typ: model.MetricTypeSummary, + }, { + m: `yum_count`, + v: 20, + lset: labels.FromStrings("__name__", "yum_count"), + ct: int64p(1520430003000), + }, { + m: `yum_sum`, + v: 324789.5, + lset: labels.FromStrings("__name__", "yum_sum"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.95"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.99"), + ct: int64p(1520430003000), + }, { + m: "foobar", + help: "Summary with _created as the first line", + }, { + m: "foobar", + typ: model.MetricTypeSummary, + }, { + m: `foobar_count`, + v: 21, + lset: labels.FromStrings("__name__", "foobar_count"), + ct: int64p(1520430004000), + }, { + m: `foobar_sum`, + v: 324789.6, + lset: labels.FromStrings("__name__", "foobar_sum"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.95"}`, + v: 123.8, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.95"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.99"}`, + v: 150.1, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.99"), + ct: int64p(1520430004000), }, { m: "metric", help: "foo\x00bar", @@ -331,7 +464,8 @@ fizz_created 17.0` } p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - checkParseResultsWithCT(t, p, exp, true) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestUTF8OpenMetricsParse(t *testing.T) { @@ -346,7 +480,7 @@ func TestUTF8OpenMetricsParse(t *testing.T) { # UNIT "go.gc_duration_seconds" seconds {"go.gc_duration_seconds",quantile="0"} 4.9351e-05 {"go.gc_duration_seconds",quantile="0.25"} 7.424100000000001e-05 -{"go.gc_duration_seconds_created"} 12313 +{"go.gc_duration_seconds_created"} 1520872607.123 {"go.gc_duration_seconds",quantile="0.5",a="b"} 8.3835e-05 {"http.status",q="0.9",a="b"} 8.3835e-05 {"http.status",q="0.9",a="b"} 8.3835e-05 @@ -356,7 +490,7 @@ func TestUTF8OpenMetricsParse(t *testing.T) { input += "\n# EOF\n" - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go.gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -369,13 +503,13 @@ func TestUTF8OpenMetricsParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), - ct: int64p(12313), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), + ct: int64p(1520872607123), }, { m: `{"go.gc_duration_seconds",quantile="0.25"}`, v: 7.424100000000001e-05, lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.25"), - ct: int64p(12313), + ct: int64p(1520872607123), }, { m: `{"go.gc_duration_seconds",quantile="0.5",a="b"}`, v: 8.3835e-05, @@ -405,7 +539,8 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"), } p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - checkParseResultsWithCT(t, p, exp, true) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestOpenMetricsParseErrors(t *testing.T) { @@ -699,7 +834,7 @@ func TestOpenMetricsParseErrors(t *testing.T) { } for i, c := range cases { - p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable()) + p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) var err error for err == nil { _, err = p.Next() @@ -764,231 +899,121 @@ func TestOMNullByteHandling(t *testing.T) { } for i, c := range cases { - p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable()) + p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) var err error for err == nil { _, err = p.Next() } if c.err == "" { - require.Equal(t, io.EOF, err, "test %d", i) + require.ErrorIs(t, err, io.EOF, "test %d", i) continue } - require.Equal(t, c.err, err.Error(), "test %d", i) + require.EqualError(t, err, c.err, "test %d", i) } } -// While not desirable, there are cases were CT fails to parse and -// these tests show them. +// TestCTParseFailures tests known failure edge cases, we know does not work due +// current OM spec limitations or clients with broken OM format. // TODO(maniktherana): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. func TestCTParseFailures(t *testing.T) { - input := `# HELP something Histogram with _created between buckets and summary -# TYPE something histogram -something_count 17 -something_sum 324789.3 -something_created 1520430001 -something_bucket{le="0.0"} 0 -something_bucket{le="+Inf"} 17 -# HELP thing Histogram with _created as first line + for _, tcase := range []struct { + name string + input string + expected []parsedEntry + }{ + { + name: "_created line is a first one", + input: `# HELP thing histogram with _created as first line # TYPE thing histogram -thing_created 1520430002 +thing_created 1520872607.123 thing_count 17 thing_sum 324789.3 thing_bucket{le="0.0"} 0 thing_bucket{le="+Inf"} 17 -# HELP yum Summary with _created between sum and quantiles -# TYPE yum summary -yum_count 17.0 -yum_sum 324789.3 -yum_created 1520430003 -yum{quantile="0.95"} 123.7 -yum{quantile="0.99"} 150.0 -# HELP foobar Summary with _created as the first line -# TYPE foobar summary -foobar_created 1520430004 -foobar_count 17.0 -foobar_sum 324789.3 -foobar{quantile="0.95"} 123.7 -foobar{quantile="0.99"} 150.0` - - input += "\n# EOF\n" - - int64p := func(x int64) *int64 { return &x } - - type expectCT struct { - m string - ct *int64 - typ model.MetricType - help string - isErr bool - } - - exp := []expectCT{ - { - m: "something", - help: "Histogram with _created between buckets and summary", - isErr: false, - }, { - m: "something", - typ: model.MetricTypeHistogram, - isErr: false, - }, { - m: `something_count`, - ct: int64p(1520430001), - isErr: false, - }, { - m: `something_sum`, - ct: int64p(1520430001), - isErr: false, - }, { - m: `something_bucket{le="0.0"}`, - ct: int64p(1520430001), - isErr: true, - }, { - m: `something_bucket{le="+Inf"}`, - ct: int64p(1520430001), - isErr: true, - }, { - m: "thing", - help: "Histogram with _created as first line", - isErr: false, - }, { - m: "thing", - typ: model.MetricTypeHistogram, - isErr: false, - }, { - m: `thing_count`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_sum`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_bucket{le="0.0"}`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_bucket{le="+Inf"}`, - ct: int64p(1520430002), - isErr: true, - }, { - m: "yum", - help: "Summary with _created between summary and quantiles", - isErr: false, - }, { - m: "yum", - typ: model.MetricTypeSummary, - isErr: false, - }, { - m: "yum_count", - ct: int64p(1520430003), - isErr: false, - }, { - m: "yum_sum", - ct: int64p(1520430003), - isErr: false, - }, { - m: `yum{quantile="0.95"}`, - ct: int64p(1520430003), - isErr: true, - }, { - m: `yum{quantile="0.99"}`, - ct: int64p(1520430003), - isErr: true, - }, { - m: "foobar", - help: "Summary with _created as the first line", - isErr: false, - }, { - m: "foobar", - typ: model.MetricTypeSummary, - isErr: false, - }, { - m: "foobar_count", - ct: int64p(1520430004), - isErr: true, - }, { - m: "foobar_sum", - ct: int64p(1520430004), - isErr: true, - }, { - m: `foobar{quantile="0.95"}`, - ct: int64p(1520430004), - isErr: true, - }, { - m: `foobar{quantile="0.99"}`, - ct: int64p(1520430004), - isErr: true, - }, - } - - p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - i := 0 - - var res labels.Labels - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - p.Metric(&res) - - if ct := p.CreatedTimestamp(); exp[i].isErr { - require.Nil(t, ct) - } else { - require.Equal(t, *exp[i].ct, *ct) - } - default: - i++ - continue - } - i++ +# HELP thing_c counter with _created as first line +# TYPE thing_c counter +thing_c_created 1520872607.123 +thing_c_total 14123.232 +# EOF +`, + expected: []parsedEntry{ + { + m: "thing", + help: "histogram with _created as first line", + }, + { + m: "thing", + typ: model.MetricTypeHistogram, + }, + { + m: `thing_count`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_sum`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_bucket{le="0.0"}`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_bucket{le="+Inf"}`, + ct: nil, // Should be int64p(1520872607123), + }, + { + m: "thing_c", + help: "counter with _created as first line", + }, + { + m: "thing_c", + typ: model.MetricTypeCounter, + }, + { + m: `thing_c_total`, + ct: nil, // Should be int64p(1520872607123). + }, + }, + }, + { + // TODO(bwplotka): Kind of correct bevaviour? If yes, let's move to the OK tests above. + name: "maybe counter with no meta", + input: `foo_total 17.0 +foo_created 1520872607.123 +foo_total{a="b"} 17.0 +foo_created{a="b"} 1520872608.123 +# EOF +`, + expected: []parsedEntry{ + { + m: `foo_total`, + }, + { + m: `foo_created`, + }, + { + m: `foo_total{a="b"}`, + }, + { + m: `foo_created{a="b"}`, + }, + }, + }, + } { + t.Run(fmt.Sprintf("case=%v", tcase.name), func(t *testing.T) { + p := NewOpenMetricsParser([]byte(tcase.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + got := testParse(t, p) + resetValAndLset(got) // Keep this test focused on metric, basic entries and CT only. + requireEntries(t, tcase.expected, got) + }) } } -func TestDeepCopy(t *testing.T) { - input := []byte(`# HELP go_goroutines A gauge goroutines. -# TYPE go_goroutines gauge -go_goroutines 33 123.123 -# TYPE go_gc_duration_seconds summary -go_gc_duration_seconds -go_gc_duration_seconds_created`) - - st := labels.NewSymbolTable() - parser := NewOpenMetricsParser(input, st, WithOMParserCTSeriesSkipped()).(*OpenMetricsParser) - - // Modify the original parser state - _, err := parser.Next() - require.NoError(t, err) - require.Equal(t, "go_goroutines", string(parser.l.b[parser.offsets[0]:parser.offsets[1]])) - require.True(t, parser.skipCTSeries) - - // Create a deep copy of the parser - copyParser := deepCopy(parser) - etype, err := copyParser.Next() - require.NoError(t, err) - require.Equal(t, EntryType, etype) - require.True(t, parser.skipCTSeries) - require.False(t, copyParser.skipCTSeries) - - // Modify the original parser further - parser.Next() - parser.Next() - parser.Next() - require.Equal(t, "go_gc_duration_seconds", string(parser.l.b[parser.offsets[0]:parser.offsets[1]])) - require.Equal(t, "summary", string(parser.mtype)) - require.False(t, copyParser.skipCTSeries) - require.True(t, parser.skipCTSeries) - - // Ensure the copy remains unchanged - copyParser.Next() - copyParser.Next() - require.Equal(t, "go_gc_duration_seconds", string(copyParser.l.b[copyParser.offsets[0]:copyParser.offsets[1]])) - require.False(t, copyParser.skipCTSeries) +func resetValAndLset(e []parsedEntry) { + for i := range e { + e[i].v = 0 + e[i].lset = labels.EmptyLabels() + } } diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index a611f3aea7..0ab932c665 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -239,7 +239,8 @@ func (p *PromParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) + p.builder.Add(label, value) } @@ -502,7 +503,7 @@ func unreplace(s string) string { } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } func parseFloat(s string) (float64, error) { diff --git a/model/textparse/promparse_test.go b/model/textparse/promparse_test.go index 7971d23b7e..e8cf66f539 100644 --- a/model/textparse/promparse_test.go +++ b/model/textparse/promparse_test.go @@ -14,37 +14,15 @@ package textparse import ( - "bytes" - "errors" "io" - "os" - "strings" "testing" - "github.com/klauspost/compress/gzip" - "github.com/stretchr/testify/require" - - "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/util/testutil" ) -type expectedParse struct { - lset labels.Labels - m string - t *int64 - v float64 - typ model.MetricType - help string - unit string - comment string - e *exemplar.Exemplar - ct *int64 -} - func TestPromParse(t *testing.T) { input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations. # TYPE go_gc_duration_seconds summary @@ -53,6 +31,13 @@ go_gc_duration_seconds{quantile="0.25",} 7.424100000000001e-05 go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 go_gc_duration_seconds{quantile="0.8", a="b"} 8.3835e-05 go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05 +# HELP prometheus_http_request_duration_seconds Histogram of latencies for HTTP requests. +# TYPE prometheus_http_request_duration_seconds histogram +prometheus_http_request_duration_seconds_bucket{handler="/",le="1"} 423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="2"} 1423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"} 1423 +prometheus_http_request_duration_seconds_sum{handler="/"} 2000 +prometheus_http_request_duration_seconds_count{handler="/"} 1423 # Hrandom comment starting with prefix of HELP # wind_speed{A="2",c="3"} 12345 @@ -72,13 +57,12 @@ some:aggregate:rate5m{a_b="c"} 1 go_goroutines 33 123123 _metric_starting_with_underscore 1 testmetric{_label_starting_with_underscore="foo"} 1 -testmetric{label="\"bar\""} 1` +testmetric{label="\"bar\""} 1 +testmetric{le="10"} 1` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" - int64p := func(x int64) *int64 { return &x } - - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go_gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -88,7 +72,7 @@ testmetric{label="\"bar\""} 1` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25",}`, v: 7.424100000000001e-05, @@ -105,6 +89,32 @@ testmetric{label="\"bar\""} 1` m: `go_gc_duration_seconds{ quantile="0.9", a="b"}`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.9", "a", "b"), + }, { + m: "prometheus_http_request_duration_seconds", + help: "Histogram of latencies for HTTP requests.", + }, { + m: "prometheus_http_request_duration_seconds", + typ: model.MetricTypeHistogram, + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="1"}`, + v: 423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "1.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="2"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "2.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "+Inf"), + }, { + m: `prometheus_http_request_duration_seconds_sum{handler="/"}`, + v: 2000, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_sum", "handler", "/"), + }, { + m: `prometheus_http_request_duration_seconds_count{handler="/"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_count", "handler", "/"), }, { comment: "# Hrandom comment starting with prefix of HELP", }, { @@ -140,7 +150,7 @@ testmetric{label="\"bar\""} 1` v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), }, { - // NOTE: Unlike OpenMetrics, Promparse allows spaces between label terms. This appears to be unintended and should probably be fixed. + // NOTE: Unlike OpenMetrics, PromParser allows spaces between label terms. This appears to be unintended and should probably be fixed. m: `go_gc_duration_seconds { quantile = "2.0" a = "b" }`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "2.0", "a", "b"), @@ -175,6 +185,10 @@ testmetric{label="\"bar\""} 1` m: "testmetric{label=\"\\\"bar\\\"\"}", v: 1, lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: `testmetric{le="10"}`, + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "le", "10"), }, { m: "metric", help: "foo\x00bar", @@ -186,80 +200,8 @@ testmetric{label="\"bar\""} 1` } p := NewPromParser([]byte(input), labels.NewSymbolTable()) - checkParseResults(t, p, exp) -} - -func checkParseResults(t *testing.T, p Parser, exp []expectedParse) { - checkParseResultsWithCT(t, p, exp, false) -} - -func checkParseResultsWithCT(t *testing.T, p Parser, exp []expectedParse, ctLinesRemoved bool) { - i := 0 - - var res labels.Labels - - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - m, ts, v := p.Series() - - p.Metric(&res) - - if ctLinesRemoved { - // Are CT series skipped? - _, typ := p.Type() - if TypeRequiresCT(typ) && strings.HasSuffix(res.Get(labels.MetricName), "_created") { - t.Fatalf("we exped created lines skipped") - } - } - - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].t, ts) - require.Equal(t, exp[i].v, v) - testutil.RequireEqual(t, exp[i].lset, res) - - var e exemplar.Exemplar - found := p.Exemplar(&e) - if exp[i].e == nil { - require.False(t, found) - } else { - require.True(t, found) - testutil.RequireEqual(t, *exp[i].e, e) - } - if ct := p.CreatedTimestamp(); ct != nil { - require.Equal(t, *exp[i].ct, *ct) - } else { - require.Nil(t, exp[i].ct) - } - - case EntryType: - m, typ := p.Type() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].typ, typ) - - case EntryHelp: - m, h := p.Help() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].help, string(h)) - - case EntryUnit: - m, u := p.Unit() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].unit, string(u)) - - case EntryComment: - require.Equal(t, exp[i].comment, string(p.Comment())) - } - - i++ - } - require.Len(t, exp, i) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestUTF8PromParse(t *testing.T) { @@ -283,7 +225,7 @@ func TestUTF8PromParse(t *testing.T) { {"go.gc_duration_seconds_count"} 99 {"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0` - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go.gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -293,7 +235,7 @@ func TestUTF8PromParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), }, { m: `{"go.gc_duration_seconds",quantile="0.25",}`, v: 7.424100000000001e-05, @@ -339,7 +281,8 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"), } p := NewPromParser([]byte(input), labels.NewSymbolTable()) - checkParseResults(t, p, exp) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestPromParseErrors(t *testing.T) { @@ -423,8 +366,7 @@ func TestPromParseErrors(t *testing.T) { for err == nil { _, err = p.Next() } - require.Error(t, err) - require.Equal(t, c.err, err.Error(), "test %d", i) + require.EqualError(t, err, c.err, "test %d", i) } } @@ -483,194 +425,6 @@ func TestPromNullByteHandling(t *testing.T) { continue } - require.Error(t, err) - require.Equal(t, c.err, err.Error(), "test %d", i) - } -} - -const ( - promtestdataSampleCount = 410 -) - -func BenchmarkParse(b *testing.B) { - for parserName, parser := range map[string]func([]byte, *labels.SymbolTable) Parser{ - "prometheus": NewPromParser, - "openmetrics": func(b []byte, st *labels.SymbolTable) Parser { - return NewOpenMetricsParser(b, st) - }, - } { - for _, fn := range []string{"promtestdata.txt", "promtestdata.nometa.txt"} { - f, err := os.Open(fn) - require.NoError(b, err) - defer f.Close() - - buf, err := io.ReadAll(f) - require.NoError(b, err) - - b.Run(parserName+"/no-decode-metric/"+fn, func(b *testing.B) { - total := 0 - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run(parserName+"/decode-metric/"+fn, func(b *testing.B) { - total := 0 - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - - var res labels.Labels - p.Metric(&res) - - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run(parserName+"/decode-metric-reuse/"+fn, func(b *testing.B) { - total := 0 - var res labels.Labels - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - - p.Metric(&res) - - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run("expfmt-text/"+fn, func(b *testing.B) { - if parserName != "prometheus" { - b.Skip() - } - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - total := 0 - - for i := 0; i < b.N; i += promtestdataSampleCount { - decSamples := make(model.Vector, 0, 50) - sdec := expfmt.SampleDecoder{ - Dec: expfmt.NewDecoder(bytes.NewReader(buf), expfmt.NewFormat(expfmt.TypeTextPlain)), - Opts: &expfmt.DecodeOptions{ - Timestamp: model.TimeFromUnixNano(0), - }, - } - - for { - if err = sdec.Decode(&decSamples); err != nil { - break - } - total += len(decSamples) - decSamples = decSamples[:0] - } - } - _ = total - }) - } - } -} - -func BenchmarkGzip(b *testing.B) { - for _, fn := range []string{"promtestdata.txt", "promtestdata.nometa.txt"} { - b.Run(fn, func(b *testing.B) { - f, err := os.Open(fn) - require.NoError(b, err) - defer f.Close() - - var buf bytes.Buffer - gw := gzip.NewWriter(&buf) - - n, err := io.Copy(gw, f) - require.NoError(b, err) - require.NoError(b, gw.Close()) - - gbuf, err := io.ReadAll(&buf) - require.NoError(b, err) - - k := b.N / promtestdataSampleCount - - b.ReportAllocs() - b.SetBytes(n / promtestdataSampleCount) - b.ResetTimer() - - total := 0 - - for i := 0; i < k; i++ { - gr, err := gzip.NewReader(bytes.NewReader(gbuf)) - require.NoError(b, err) - - d, err := io.ReadAll(gr) - require.NoError(b, err) - require.NoError(b, gr.Close()) - - total += len(d) - } - _ = total - }) + require.EqualError(t, err, c.err, "test %d", i) } } diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index e384a75fca..a77e1d728f 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -20,7 +20,9 @@ import ( "fmt" "io" "math" + "strconv" "strings" + "sync" "unicode/utf8" "github.com/gogo/protobuf/proto" @@ -34,6 +36,15 @@ import ( dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) +// floatFormatBufPool is exclusively used in formatOpenMetricsFloat. +var floatFormatBufPool = sync.Pool{ + New: func() interface{} { + // To contain at most 17 digits and additional syntax for a float64. + b := make([]byte, 0, 24) + return &b + }, +} + // ProtobufParser is a very inefficient way of unmarshaling the old Prometheus // protobuf format and then present it as it if were parsed by a // Prometheus-2-style text parser. This is only done so that we can easily plug @@ -457,6 +468,12 @@ func (p *ProtobufParser) Next() (Entry, error) { p.state = EntryHelp case EntryHelp: + if p.mf.Unit != "" { + p.state = EntryUnit + } else { + p.state = EntryType + } + case EntryUnit: p.state = EntryType case EntryType: t := p.mf.GetType() @@ -604,7 +621,7 @@ func readDelimited(b []byte, mf *dto.MetricFamily) (n int, err error) { return totalLength, mf.Unmarshal(b[varIntLength:totalLength]) } -// formatOpenMetricsFloat works like the usual Go string formatting of a fleat +// formatOpenMetricsFloat works like the usual Go string formatting of a float // but appends ".0" if the resulting number would otherwise contain neither a // "." nor an "e". func formatOpenMetricsFloat(f float64) string { @@ -623,11 +640,15 @@ func formatOpenMetricsFloat(f float64) string { case math.IsInf(f, -1): return "-Inf" } - s := fmt.Sprint(f) - if strings.ContainsAny(s, "e.") { - return s + bp := floatFormatBufPool.Get().(*[]byte) + defer floatFormatBufPool.Put(bp) + + *bp = strconv.AppendFloat((*bp)[:0], f, 'g', -1, 64) + if bytes.ContainsAny(*bp, "e.") { + return string(*bp) } - return s + ".0" + *bp = append(*bp, '.', '0') + return string(*bp) } // isNativeHistogram returns false iff the provided histograms has no spans at diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index cf34ae52df..065459a69a 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -16,8 +16,6 @@ package textparse import ( "bytes" "encoding/binary" - "errors" - "io" "testing" "github.com/gogo/protobuf/proto" @@ -27,12 +25,12 @@ import ( "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/util/testutil" - dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) -func createTestProtoBuf(t *testing.T) *bytes.Buffer { +func createTestProtoBuf(t testing.TB) *bytes.Buffer { + t.Helper() + testMetricFamilies := []string{ `name: "go_build_info" help: "Build information about the main Go module." @@ -411,6 +409,49 @@ metric: < > > +`, + `name: "test_histogram3" +help: "Similar histogram as before but now with integer buckets." +type: HISTOGRAM +metric: < + histogram: < + sample_count: 6 + sample_sum: 50 + bucket: < + cumulative_count: 2 + upper_bound: -20 + > + bucket: < + cumulative_count: 4 + upper_bound: 20 + exemplar: < + label: < + name: "dummyID" + value: "59727" + > + value: 15 + timestamp: < + seconds: 1625851153 + nanos: 146848499 + > + > + > + bucket: < + cumulative_count: 6 + upper_bound: 30 + exemplar: < + label: < + name: "dummyID" + value: "5617" + > + value: 25 + > + > + schema: 0 + zero_threshold: 0 + > +> + `, `name: "test_histogram_family" help: "Test histogram metric family with two very simple histograms." @@ -783,32 +824,17 @@ metric: < } func TestProtobufParse(t *testing.T) { - type parseResult struct { - lset labels.Labels - m string - t int64 - v float64 - typ model.MetricType - help string - unit string - comment string - shs *histogram.Histogram - fhs *histogram.FloatHistogram - e []exemplar.Exemplar - ct int64 - } - inputBuf := createTestProtoBuf(t) scenarios := []struct { name string parser Parser - expected []parseResult + expected []parsedEntry }{ { name: "ignore classic buckets of native histograms", parser: NewProtobufParser(inputBuf.Bytes(), false, labels.NewSymbolTable()), - expected: []parseResult{ + expected: []parsedEntry{ { m: "go_build_info", help: "Build information about the main Go module.", @@ -830,6 +856,9 @@ func TestProtobufParse(t *testing.T) { { m: "go_memstats_alloc_bytes_total", help: "Total number of bytes allocated, even if freed.", + }, + { + m: "go_memstats_alloc_bytes_total", unit: "bytes", }, { @@ -842,7 +871,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "go_memstats_alloc_bytes_total", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "42"), Value: 12, HasTs: true, Ts: 1625851151233}, }, }, @@ -856,7 +885,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "something_untyped", - t: 1234567, + t: int64p(1234567), v: 42, lset: labels.FromStrings( "__name__", "something_untyped", @@ -872,7 +901,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -893,7 +922,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -907,7 +936,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gauge_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, Count: 175, @@ -929,7 +958,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -943,7 +972,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ Count: 175.0, ZeroCount: 2.0, @@ -964,7 +993,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -978,7 +1007,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gauge_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ CounterResetHint: histogram.GaugeType, Count: 175.0, @@ -1000,7 +1029,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -1041,7 +1070,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram2_bucket", "le", "-0.00038", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00038, HasTs: true, Ts: 1625851153146}, }, }, @@ -1052,7 +1081,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram2_bucket", "le", "1.0", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.000295, HasTs: false}, }, }, @@ -1064,6 +1093,66 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, + { + m: "test_histogram3", + help: "Similar histogram as before but now with integer buckets.", + }, + { + m: "test_histogram3", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram3_count", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_count", + ), + }, + { + m: "test_histogram3_sum", + v: 50, + lset: labels.FromStrings( + "__name__", "test_histogram3_sum", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff-20.0", + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "-20.0", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff20.0", + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "20.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59727"), Value: 15, HasTs: true, Ts: 1625851153146}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff30.0", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "30.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "5617"), Value: 25, HasTs: false}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff+Inf", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "+Inf", + ), + }, { m: "test_histogram_family", help: "Test histogram metric family with two very simple histograms.", @@ -1235,7 +1324,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_counter_with_createdtimestamp", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_counter_with_createdtimestamp", ), @@ -1251,7 +1340,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_summary_with_createdtimestamp_count", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_count", ), @@ -1259,7 +1348,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_summary_with_createdtimestamp_sum", v: 1.234, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_sum", ), @@ -1274,7 +1363,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, PositiveSpans: []histogram.Span{}, @@ -1294,7 +1383,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gaugehistogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, PositiveSpans: []histogram.Span{}, @@ -1314,7 +1403,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_native_histogram_exemplars", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1335,7 +1424,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, {Labels: labels.FromStrings("dummyID", "59772"), Value: -0.00052, HasTs: true, Ts: 1625851160156}, }, @@ -1350,7 +1439,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_native_histogram_exemplars2", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1371,7 +1460,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -1380,16 +1469,16 @@ func TestProtobufParse(t *testing.T) { { name: "parse classic and native buckets", parser: NewProtobufParser(inputBuf.Bytes(), true, labels.NewSymbolTable()), - expected: []parseResult{ - { // 0 + expected: []parsedEntry{ + { m: "go_build_info", help: "Build information about the main Go module.", }, - { // 1 + { m: "go_build_info", typ: model.MetricTypeGauge, }, - { // 2 + { m: "go_build_info\xFFchecksum\xFF\xFFpath\xFFgithub.com/prometheus/client_golang\xFFversion\xFF(devel)", v: 1, lset: labels.FromStrings( @@ -1399,51 +1488,55 @@ func TestProtobufParse(t *testing.T) { "version", "(devel)", ), }, - { // 3 + { m: "go_memstats_alloc_bytes_total", help: "Total number of bytes allocated, even if freed.", }, - { // 4 + { + m: "go_memstats_alloc_bytes_total", + unit: "bytes", + }, + { m: "go_memstats_alloc_bytes_total", typ: model.MetricTypeCounter, }, - { // 5 + { m: "go_memstats_alloc_bytes_total", v: 1.546544e+06, lset: labels.FromStrings( "__name__", "go_memstats_alloc_bytes_total", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "42"), Value: 12, HasTs: true, Ts: 1625851151233}, }, }, - { // 6 + { m: "something_untyped", help: "Just to test the untyped type.", }, - { // 7 + { m: "something_untyped", typ: model.MetricTypeUnknown, }, - { // 8 + { m: "something_untyped", - t: 1234567, + t: int64p(1234567), v: 42, lset: labels.FromStrings( "__name__", "something_untyped", ), }, - { // 9 + { m: "test_histogram", help: "Test histogram with many buckets removed to keep it manageable in size.", }, - { // 10 + { m: "test_histogram", typ: model.MetricTypeHistogram, }, - { // 11 + { m: "test_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1464,79 +1557,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 12 + { m: "test_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_count", ), }, - { // 13 + { m: "test_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_sum", ), }, - { // 14 + { m: "test_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 15 + { m: "test_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 16 + { m: "test_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 17 + { m: "test_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "+Inf", ), }, - { // 18 + { m: "test_gauge_histogram", help: "Like test_histogram but as gauge histogram.", }, - { // 19 + { m: "test_gauge_histogram", typ: model.MetricTypeGaugeHistogram, }, - { // 20 + { m: "test_gauge_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, Count: 175, @@ -1558,79 +1651,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 21 + { m: "test_gauge_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_histogram_count", ), }, - { // 22 + { m: "test_gauge_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_gauge_histogram_sum", ), }, - { // 23 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 24 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 25 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 26 + { m: "test_gauge_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "+Inf", ), }, - { // 27 + { m: "test_float_histogram", help: "Test float histogram with many buckets removed to keep it manageable in size.", }, - { // 28 + { m: "test_float_histogram", typ: model.MetricTypeHistogram, }, - { // 29 + { m: "test_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ Count: 175.0, ZeroCount: 2.0, @@ -1651,79 +1744,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 30 + { m: "test_float_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_float_histogram_count", ), }, - { // 31 + { m: "test_float_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_float_histogram_sum", ), }, - { // 32 + { m: "test_float_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 33 + { m: "test_float_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 34 + { m: "test_float_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 35 + { m: "test_float_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "+Inf", ), }, - { // 36 + { m: "test_gauge_float_histogram", help: "Like test_float_histogram but as gauge histogram.", }, - { // 37 + { m: "test_gauge_float_histogram", typ: model.MetricTypeGaugeHistogram, }, - { // 38 + { m: "test_gauge_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ CounterResetHint: histogram.GaugeType, Count: 175.0, @@ -1745,91 +1838,91 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 39 + { m: "test_gauge_float_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_count", ), }, - { // 40 + { m: "test_gauge_float_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_sum", ), }, - { // 41 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 42 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 43 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 44 + { m: "test_gauge_float_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "+Inf", ), }, - { // 45 + { m: "test_histogram2", help: "Similar histogram as before but now without sparse buckets.", }, - { // 46 + { m: "test_histogram2", typ: model.MetricTypeHistogram, }, - { // 47 + { m: "test_histogram2_count", v: 175, lset: labels.FromStrings( "__name__", "test_histogram2_count", ), }, - { // 48 + { m: "test_histogram2_sum", v: 0.000828, lset: labels.FromStrings( "__name__", "test_histogram2_sum", ), }, - { // 49 + { m: "test_histogram2_bucket\xffle\xff-0.00048", v: 2, lset: labels.FromStrings( @@ -1837,29 +1930,29 @@ func TestProtobufParse(t *testing.T) { "le", "-0.00048", ), }, - { // 50 + { m: "test_histogram2_bucket\xffle\xff-0.00038", v: 4, lset: labels.FromStrings( "__name__", "test_histogram2_bucket", "le", "-0.00038", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00038, HasTs: true, Ts: 1625851153146}, }, }, - { // 51 + { m: "test_histogram2_bucket\xffle\xff1.0", v: 16, lset: labels.FromStrings( "__name__", "test_histogram2_bucket", "le", "1.0", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.000295, HasTs: false}, }, }, - { // 52 + { m: "test_histogram2_bucket\xffle\xff+Inf", v: 175, lset: labels.FromStrings( @@ -1867,15 +1960,75 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 53 + { + m: "test_histogram3", + help: "Similar histogram as before but now with integer buckets.", + }, + { + m: "test_histogram3", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram3_count", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_count", + ), + }, + { + m: "test_histogram3_sum", + v: 50, + lset: labels.FromStrings( + "__name__", "test_histogram3_sum", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff-20.0", + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "-20.0", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff20.0", + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "20.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59727"), Value: 15, HasTs: true, Ts: 1625851153146}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff30.0", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "30.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "5617"), Value: 25, HasTs: false}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff+Inf", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "+Inf", + ), + }, + { m: "test_histogram_family", help: "Test histogram metric family with two very simple histograms.", }, - { // 54 + { m: "test_histogram_family", typ: model.MetricTypeHistogram, }, - { // 55 + { m: "test_histogram_family\xfffoo\xffbar", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -1893,7 +2046,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 56 + { m: "test_histogram_family_count\xfffoo\xffbar", v: 5, lset: labels.FromStrings( @@ -1901,7 +2054,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 57 + { m: "test_histogram_family_sum\xfffoo\xffbar", v: 12.1, lset: labels.FromStrings( @@ -1909,7 +2062,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 58 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff1.1", v: 2, lset: labels.FromStrings( @@ -1918,7 +2071,7 @@ func TestProtobufParse(t *testing.T) { "le", "1.1", ), }, - { // 59 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff2.2", v: 3, lset: labels.FromStrings( @@ -1927,7 +2080,7 @@ func TestProtobufParse(t *testing.T) { "le", "2.2", ), }, - { // 60 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff+Inf", v: 5, lset: labels.FromStrings( @@ -1936,7 +2089,7 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 61 + { m: "test_histogram_family\xfffoo\xffbaz", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -1954,7 +2107,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 62 + { m: "test_histogram_family_count\xfffoo\xffbaz", v: 6, lset: labels.FromStrings( @@ -1962,7 +2115,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 63 + { m: "test_histogram_family_sum\xfffoo\xffbaz", v: 13.1, lset: labels.FromStrings( @@ -1970,7 +2123,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 64 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff1.1", v: 1, lset: labels.FromStrings( @@ -1979,7 +2132,7 @@ func TestProtobufParse(t *testing.T) { "le", "1.1", ), }, - { // 65 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff2.2", v: 5, lset: labels.FromStrings( @@ -1988,7 +2141,7 @@ func TestProtobufParse(t *testing.T) { "le", "2.2", ), }, - { // 66 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff+Inf", v: 6, lset: labels.FromStrings( @@ -1997,15 +2150,15 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 67 + { m: "test_float_histogram_with_zerothreshold_zero", help: "Test float histogram with a zero threshold of zero.", }, - { // 68 + { m: "test_float_histogram_with_zerothreshold_zero", typ: model.MetricTypeHistogram, }, - { // 69 + { m: "test_float_histogram_with_zerothreshold_zero", fhs: &histogram.FloatHistogram{ Count: 5.0, @@ -2021,15 +2174,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_float_histogram_with_zerothreshold_zero", ), }, - { // 70 + { m: "rpc_durations_seconds", help: "RPC latency distributions.", }, - { // 71 + { m: "rpc_durations_seconds", typ: model.MetricTypeSummary, }, - { // 72 + { m: "rpc_durations_seconds_count\xffservice\xffexponential", v: 262, lset: labels.FromStrings( @@ -2037,7 +2190,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 73 + { m: "rpc_durations_seconds_sum\xffservice\xffexponential", v: 0.00025551262820703587, lset: labels.FromStrings( @@ -2045,7 +2198,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 74 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.5", v: 6.442786329648548e-07, lset: labels.FromStrings( @@ -2054,7 +2207,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 75 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.9", v: 1.9435742936658396e-06, lset: labels.FromStrings( @@ -2063,7 +2216,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 76 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.99", v: 4.0471608667037015e-06, lset: labels.FromStrings( @@ -2072,37 +2225,37 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 77 + { m: "without_quantiles", help: "A summary without quantiles.", }, - { // 78 + { m: "without_quantiles", typ: model.MetricTypeSummary, }, - { // 79 + { m: "without_quantiles_count", v: 42, lset: labels.FromStrings( "__name__", "without_quantiles_count", ), }, - { // 80 + { m: "without_quantiles_sum", v: 1.234, lset: labels.FromStrings( "__name__", "without_quantiles_sum", ), }, - { // 81 + { m: "empty_histogram", help: "A histogram without observations and with a zero threshold of zero but with a no-op span to identify it as a native histogram.", }, - { // 82 + { m: "empty_histogram", typ: model.MetricTypeHistogram, }, - { // 83 + { m: "empty_histogram", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -2113,57 +2266,57 @@ func TestProtobufParse(t *testing.T) { "__name__", "empty_histogram", ), }, - { // 84 + { m: "test_counter_with_createdtimestamp", help: "A counter with a created timestamp.", }, - { // 85 + { m: "test_counter_with_createdtimestamp", typ: model.MetricTypeCounter, }, - { // 86 + { m: "test_counter_with_createdtimestamp", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_counter_with_createdtimestamp", ), }, - { // 87 + { m: "test_summary_with_createdtimestamp", help: "A summary with a created timestamp.", }, - { // 88 + { m: "test_summary_with_createdtimestamp", typ: model.MetricTypeSummary, }, - { // 89 + { m: "test_summary_with_createdtimestamp_count", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_count", ), }, - { // 90 + { m: "test_summary_with_createdtimestamp_sum", v: 1.234, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_sum", ), }, - { // 91 + { m: "test_histogram_with_createdtimestamp", help: "A histogram with a created timestamp.", }, - { // 92 + { m: "test_histogram_with_createdtimestamp", typ: model.MetricTypeHistogram, }, - { // 93 + { m: "test_histogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, PositiveSpans: []histogram.Span{}, @@ -2173,17 +2326,17 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram_with_createdtimestamp", ), }, - { // 94 + { m: "test_gaugehistogram_with_createdtimestamp", help: "A gauge histogram with a created timestamp.", }, - { // 95 + { m: "test_gaugehistogram_with_createdtimestamp", typ: model.MetricTypeGaugeHistogram, }, - { // 96 + { m: "test_gaugehistogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, PositiveSpans: []histogram.Span{}, @@ -2193,17 +2346,17 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_gaugehistogram_with_createdtimestamp", ), }, - { // 97 + { m: "test_histogram_with_native_histogram_exemplars", help: "A histogram with native histogram exemplars.", }, - { // 98 + { m: "test_histogram_with_native_histogram_exemplars", typ: model.MetricTypeHistogram, }, - { // 99 + { m: "test_histogram_with_native_histogram_exemplars", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2224,80 +2377,80 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, {Labels: labels.FromStrings("dummyID", "59772"), Value: -0.00052, HasTs: true, Ts: 1625851160156}, }, }, - { // 100 + { m: "test_histogram_with_native_histogram_exemplars_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_count", ), }, - { // 101 + { m: "test_histogram_with_native_histogram_exemplars_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_sum", ), }, - { // 102 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0004899999999999998", ), }, - { // 103 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 104 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 105 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "+Inf", ), }, - { // 106 + { m: "test_histogram_with_native_histogram_exemplars2", help: "Another histogram with native histogram exemplars.", }, - { // 107 + { m: "test_histogram_with_native_histogram_exemplars2", typ: model.MetricTypeHistogram, }, - { // 108 + { m: "test_histogram_with_native_histogram_exemplars2", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2318,56 +2471,56 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 109 + { m: "test_histogram_with_native_histogram_exemplars2_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_count", ), }, - { // 110 + { m: "test_histogram_with_native_histogram_exemplars2_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_sum", ), }, - { // 111 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0004899999999999998", ), }, - { // 112 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0003899999999999998", ), }, - { // 113 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0002899999999999998", ), }, - { // 114 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", @@ -2381,94 +2534,11 @@ func TestProtobufParse(t *testing.T) { for _, scenario := range scenarios { t.Run(scenario.name, func(t *testing.T) { var ( - i int - res labels.Labels p = scenario.parser exp = scenario.expected ) - - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - m, ts, v := p.Series() - - var e exemplar.Exemplar - p.Metric(&res) - eFound := p.Exemplar(&e) - ct := p.CreatedTimestamp() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if ts != nil { - require.Equal(t, exp[i].t, *ts, "i: %d", i) - } else { - require.Equal(t, int64(0), exp[i].t, "i: %d", i) - } - require.Equal(t, exp[i].v, v, "i: %d", i) - testutil.RequireEqual(t, exp[i].lset, res, "i: %d", i) - if len(exp[i].e) == 0 { - require.False(t, eFound, "i: %d", i) - } else { - require.True(t, eFound, "i: %d", i) - testutil.RequireEqual(t, exp[i].e[0], e, "i: %d", i) - require.False(t, p.Exemplar(&e), "too many exemplars returned, i: %d", i) - } - if exp[i].ct != 0 { - require.NotNilf(t, ct, "i: %d", i) - require.Equal(t, exp[i].ct, *ct, "i: %d", i) - } else { - require.Nilf(t, ct, "i: %d", i) - } - - case EntryHistogram: - m, ts, shs, fhs := p.Histogram() - p.Metric(&res) - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if ts != nil { - require.Equal(t, exp[i].t, *ts, "i: %d", i) - } else { - require.Equal(t, int64(0), exp[i].t, "i: %d", i) - } - testutil.RequireEqual(t, exp[i].lset, res, "i: %d", i) - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if shs != nil { - require.Equal(t, exp[i].shs, shs, "i: %d", i) - } else { - require.Equal(t, exp[i].fhs, fhs, "i: %d", i) - } - j := 0 - for e := (exemplar.Exemplar{}); p.Exemplar(&e); j++ { - testutil.RequireEqual(t, exp[i].e[j], e, "i: %d", i) - e = exemplar.Exemplar{} - } - require.Len(t, exp[i].e, j, "not enough exemplars found, i: %d", i) - - case EntryType: - m, typ := p.Type() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].typ, typ, "i: %d", i) - - case EntryHelp: - m, h := p.Help() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].help, string(h), "i: %d", i) - - case EntryUnit: - m, u := p.Unit() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].unit, string(u), "i: %d", i) - - case EntryComment: - require.Equal(t, exp[i].comment, string(p.Comment()), "i: %d", i) - } - - i++ - } - require.Len(t, exp, i) + got := testParse(t, p) + requireEntries(t, exp, got) }) } } diff --git a/model/textparse/testdata/omhistogramdata.txt b/model/textparse/testdata/omhistogramdata.txt new file mode 100644 index 0000000000..1876168355 --- /dev/null +++ b/model/textparse/testdata/omhistogramdata.txt @@ -0,0 +1,45 @@ +# HELP golang_manual_histogram_seconds This is a histogram with manually selected parameters +# TYPE golang_manual_histogram_seconds histogram +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5001"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5001"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5002"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5002"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5003"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5003"} 1 +# EOF \ No newline at end of file diff --git a/model/textparse/testdata/omtestdata.txt b/model/textparse/testdata/omtestdata.txt new file mode 100644 index 0000000000..0f5f78b8b9 --- /dev/null +++ b/model/textparse/testdata/omtestdata.txt @@ -0,0 +1,64 @@ +# HELP go_build_info Build information about the main Go module. +# TYPE go_build_info gauge +go_build_info{checksum="",path="",version=""} 1.0 +# HELP promhttp_metric_handler_errors Total number of internal errors encountered by the promhttp metric handler. +# TYPE promhttp_metric_handler_errors counter +promhttp_metric_handler_errors_total{cause="encoding"} 0.0 +promhttp_metric_handler_errors_created{cause="encoding"} 1.726839813016397e+09 +promhttp_metric_handler_errors_total{cause="gathering"} 0.0 +promhttp_metric_handler_errors_created{cause="gathering"} 1.726839813016395e+09 +# HELP rpc_durations_histogram_seconds RPC latency distributions. +# TYPE rpc_durations_histogram_seconds histogram +rpc_durations_histogram_seconds_bucket{le="-0.00099"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.00089"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0007899999999999999"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0006899999999999999"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0005899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0004899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0003899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0002899999999999998"} 3 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09 +rpc_durations_histogram_seconds_bucket{le="-0.0001899999999999998"} 5 # {dummyID="84741"} -0.00020178290006788965 1.726839814829977e+09 +rpc_durations_histogram_seconds_bucket{le="-8.999999999999979e-05"} 5 +rpc_durations_histogram_seconds_bucket{le="1.0000000000000216e-05"} 8 # {dummyID="19206"} -4.6156147425468016e-05 1.7268398151337721e+09 +rpc_durations_histogram_seconds_bucket{le="0.00011000000000000022"} 9 # {dummyID="3974"} 9.528436760156754e-05 1.726839814526797e+09 +rpc_durations_histogram_seconds_bucket{le="0.00021000000000000023"} 11 # {dummyID="29640"} 0.00017459624183458996 1.7268398139220061e+09 +rpc_durations_histogram_seconds_bucket{le="0.0003100000000000002"} 15 # {dummyID="9818"} 0.0002791130914009552 1.7268398149821382e+09 +rpc_durations_histogram_seconds_bucket{le="0.0004100000000000002"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0005100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0006100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0007100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0008100000000000004"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0009100000000000004"} 15 +rpc_durations_histogram_seconds_bucket{le="+Inf"} 15 +rpc_durations_histogram_seconds_sum -8.452185437166741e-05 +rpc_durations_histogram_seconds_count 15 +rpc_durations_histogram_seconds_created 1.726839813016302e+09 +# HELP rpc_durations_seconds RPC latency distributions. +# TYPE rpc_durations_seconds summary +rpc_durations_seconds{service="exponential",quantile="0.5"} 7.689368882420941e-07 +rpc_durations_seconds{service="exponential",quantile="0.9"} 1.6537614174305048e-06 +rpc_durations_seconds{service="exponential",quantile="0.99"} 2.0965499063061924e-06 +rpc_durations_seconds_sum{service="exponential"} 2.0318666372575776e-05 +rpc_durations_seconds_count{service="exponential"} 22 +rpc_durations_seconds_created{service="exponential"} 1.7268398130168908e+09 +rpc_durations_seconds{service="normal",quantile="0.5"} -5.066758674917046e-06 +rpc_durations_seconds{service="normal",quantile="0.9"} 0.0002935723711788224 +rpc_durations_seconds{service="normal",quantile="0.99"} 0.0003023094636293776 +rpc_durations_seconds_sum{service="normal"} -8.452185437166741e-05 +rpc_durations_seconds_count{service="normal"} 15 +rpc_durations_seconds_created{service="normal"} 1.726839813016714e+09 +rpc_durations_seconds{service="uniform",quantile="0.5"} 9.005014931474918e-05 +rpc_durations_seconds{service="uniform",quantile="0.9"} 0.00017801230208182325 +rpc_durations_seconds{service="uniform",quantile="0.99"} 0.00018641524538180192 +rpc_durations_seconds_sum{service="uniform"} 0.0011666095700533677 +rpc_durations_seconds_count{service="uniform"} 11 +rpc_durations_seconds_created{service="uniform"} 1.72683981301684e+09 +# HELP rpc_requests Total number of RPC requests received. +# TYPE rpc_requests counter +rpc_requests_total{service="exponential"} 22.0 +rpc_requests_created{service="exponential"} 1.726839813016893e+09 +rpc_requests_total{service="normal"} 15.0 +rpc_requests_created{service="normal"} 1.726839813016717e+09 +rpc_requests_total{service="uniform"} 11.0 +rpc_requests_created{service="uniform"} 1.7268398130168471e+09 +# EOF diff --git a/model/textparse/promtestdata.nometa.txt b/model/textparse/testdata/promtestdata.nometa.txt similarity index 100% rename from model/textparse/promtestdata.nometa.txt rename to model/textparse/testdata/promtestdata.nometa.txt diff --git a/model/textparse/promtestdata.txt b/model/textparse/testdata/promtestdata.txt similarity index 100% rename from model/textparse/promtestdata.txt rename to model/textparse/testdata/promtestdata.txt diff --git a/notifier/notifier.go b/notifier/notifier.go index 218e4cb8c7..e970b67e6d 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -16,25 +16,28 @@ package notifier import ( "bytes" "context" + "crypto/md5" + "encoding/hex" "encoding/json" "fmt" "io" + "log/slog" "net/http" "net/url" "path" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/go-openapi/strfmt" "github.com/prometheus/alertmanager/api/v2/models" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/sigv4" "github.com/prometheus/common/version" "go.uber.org/atomic" + "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -117,7 +120,7 @@ type Manager struct { stopRequested chan struct{} alertmanagers map[string]*alertmanagerSet - logger log.Logger + logger *slog.Logger } // Options are the configurable parameters of a Handler. @@ -218,12 +221,12 @@ func do(ctx context.Context, client *http.Client, req *http.Request) (*http.Resp } // NewManager is the manager constructor. -func NewManager(o *Options, logger log.Logger) *Manager { +func NewManager(o *Options, logger *slog.Logger) *Manager { if o.Do == nil { o.Do = do } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } n := &Manager{ @@ -257,6 +260,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs amSets := make(map[string]*alertmanagerSet) + // configToAlertmanagers maps alertmanager sets for each unique AlertmanagerConfig, + // helping to avoid dropping known alertmanagers and re-use them without waiting for SD updates when applying the config. + configToAlertmanagers := make(map[string]*alertmanagerSet, len(n.alertmanagers)) + for _, oldAmSet := range n.alertmanagers { + hash, err := oldAmSet.configHash() + if err != nil { + return err + } + configToAlertmanagers[hash] = oldAmSet + } for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() { ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics) @@ -264,6 +277,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { return err } + hash, err := ams.configHash() + if err != nil { + return err + } + + if oldAmSet, ok := configToAlertmanagers[hash]; ok { + ams.ams = oldAmSet.ams + ams.droppedAms = oldAmSet.droppedAms + } + amSets[k] = ams } @@ -319,7 +342,7 @@ func (n *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) { }() wg.Wait() - level.Info(n.logger).Log("msg", "Notification manager stopped") + n.logger.Info("Notification manager stopped") } // sendLoop continuously consumes the notifications queue and sends alerts to @@ -376,20 +399,20 @@ func (n *Manager) sendOneBatch() { func (n *Manager) drainQueue() { if !n.opts.DrainOnShutdown { if n.queueLen() > 0 { - level.Warn(n.logger).Log("msg", "Draining remaining notifications on shutdown is disabled, and some notifications have been dropped", "count", n.queueLen()) + n.logger.Warn("Draining remaining notifications on shutdown is disabled, and some notifications have been dropped", "count", n.queueLen()) n.metrics.dropped.Add(float64(n.queueLen())) } return } - level.Info(n.logger).Log("msg", "Draining any remaining notifications...") + n.logger.Info("Draining any remaining notifications...") for n.queueLen() > 0 { n.sendOneBatch() } - level.Info(n.logger).Log("msg", "Remaining notifications drained") + n.logger.Info("Remaining notifications drained") } func (n *Manager) reload(tgs map[string][]*targetgroup.Group) { @@ -399,7 +422,7 @@ func (n *Manager) reload(tgs map[string][]*targetgroup.Group) { for id, tgroup := range tgs { am, ok := n.alertmanagers[id] if !ok { - level.Error(n.logger).Log("msg", "couldn't sync alert manager set", "err", fmt.Sprintf("invalid id:%v", id)) + n.logger.Error("couldn't sync alert manager set", "err", fmt.Sprintf("invalid id:%v", id)) continue } am.sync(tgroup) @@ -422,7 +445,7 @@ func (n *Manager) Send(alerts ...*Alert) { if d := len(alerts) - n.opts.QueueCapacity; d > 0 { alerts = alerts[d:] - level.Warn(n.logger).Log("msg", "Alert batch larger than queue capacity, dropping alerts", "num_dropped", d) + n.logger.Warn("Alert batch larger than queue capacity, dropping alerts", "num_dropped", d) n.metrics.dropped.Add(float64(d)) } @@ -431,7 +454,7 @@ func (n *Manager) Send(alerts ...*Alert) { if d := (len(n.queue) + len(alerts)) - n.opts.QueueCapacity; d > 0 { n.queue = n.queue[d:] - level.Warn(n.logger).Log("msg", "Alert notification queue full, dropping alerts", "num_dropped", d) + n.logger.Warn("Alert notification queue full, dropping alerts", "num_dropped", d) n.metrics.dropped.Add(float64(d)) } n.queue = append(n.queue, alerts...) @@ -519,10 +542,10 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { begin := time.Now() - // v1Payload and v2Payload represent 'alerts' marshaled for Alertmanager API - // v1 or v2. Marshaling happens below. Reference here is for caching between + // cachedPayload represent 'alerts' marshaled for Alertmanager API v2. + // Marshaling happens below. Reference here is for caching between // for loop iterations. - var v1Payload, v2Payload []byte + var cachedPayload []byte n.mtx.RLock() amSets := n.alertmanagers @@ -553,42 +576,29 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { continue } // We can't use the cached values from previous iteration. - v1Payload, v2Payload = nil, nil + cachedPayload = nil } switch ams.cfg.APIVersion { - case config.AlertmanagerAPIVersionV1: - { - if v1Payload == nil { - v1Payload, err = json.Marshal(amAlerts) - if err != nil { - level.Error(n.logger).Log("msg", "Encoding alerts for Alertmanager API v1 failed", "err", err) - ams.mtx.RUnlock() - return false - } - } - - payload = v1Payload - } case config.AlertmanagerAPIVersionV2: { - if v2Payload == nil { + if cachedPayload == nil { openAPIAlerts := alertsToOpenAPIAlerts(amAlerts) - v2Payload, err = json.Marshal(openAPIAlerts) + cachedPayload, err = json.Marshal(openAPIAlerts) if err != nil { - level.Error(n.logger).Log("msg", "Encoding alerts for Alertmanager API v2 failed", "err", err) + n.logger.Error("Encoding alerts for Alertmanager API v2 failed", "err", err) ams.mtx.RUnlock() return false } } - payload = v2Payload + payload = cachedPayload } default: { - level.Error(n.logger).Log( - "msg", fmt.Sprintf("Invalid Alertmanager API version '%v', expected one of '%v'", ams.cfg.APIVersion, config.SupportedAlertmanagerAPIVersions), + n.logger.Error( + fmt.Sprintf("Invalid Alertmanager API version '%v', expected one of '%v'", ams.cfg.APIVersion, config.SupportedAlertmanagerAPIVersions), "err", err, ) ams.mtx.RUnlock() @@ -598,7 +608,7 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { if len(ams.cfg.AlertRelabelConfigs) > 0 { // We can't use the cached values on the next iteration. - v1Payload, v2Payload = nil, nil + cachedPayload = nil } for _, am := range ams.ams { @@ -609,7 +619,7 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { go func(ctx context.Context, client *http.Client, url string, payload []byte, count int) { if err := n.sendOne(ctx, client, url, payload); err != nil { - level.Error(n.logger).Log("alertmanager", url, "count", count, "msg", "Error sending alert", "err", err) + n.logger.Error("Error sending alert", "alertmanager", url, "count", count, "err", err) n.metrics.errors.WithLabelValues(url).Inc() } else { numSuccess.Inc() @@ -689,7 +699,7 @@ func (n *Manager) sendOne(ctx context.Context, c *http.Client, url string, b []b // // Stop is safe to call multiple times. func (n *Manager) Stop() { - level.Info(n.logger).Log("msg", "Stopping notification manager...") + n.logger.Info("Stopping notification manager...") n.stopOnce.Do(func() { close(n.stopRequested) @@ -724,10 +734,10 @@ type alertmanagerSet struct { mtx sync.RWMutex ams []alertmanager droppedAms []alertmanager - logger log.Logger + logger *slog.Logger } -func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger, metrics *alertMetrics) (*alertmanagerSet, error) { +func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger *slog.Logger, metrics *alertMetrics) (*alertmanagerSet, error) { client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "alertmanager") if err != nil { return nil, err @@ -761,7 +771,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { for _, tg := range tgs { ams, droppedAms, err := AlertmanagerFromGroup(tg, s.cfg) if err != nil { - level.Error(s.logger).Log("msg", "Creating discovered Alertmanagers failed", "err", err) + s.logger.Error("Creating discovered Alertmanagers failed", "err", err) continue } allAms = append(allAms, ams...) @@ -770,6 +780,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { s.mtx.Lock() defer s.mtx.Unlock() + previousAms := s.ams // Set new Alertmanagers and deduplicate them along their unique URL. s.ams = []alertmanager{} s.droppedAms = []alertmanager{} @@ -789,6 +800,26 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { seen[us] = struct{}{} s.ams = append(s.ams, am) } + // Now remove counters for any removed Alertmanagers. + for _, am := range previousAms { + us := am.url().String() + if _, ok := seen[us]; ok { + continue + } + s.metrics.latency.DeleteLabelValues(us) + s.metrics.sent.DeleteLabelValues(us) + s.metrics.errors.DeleteLabelValues(us) + seen[us] = struct{}{} + } +} + +func (s *alertmanagerSet) configHash() (string, error) { + b, err := yaml.Marshal(s.cfg) + if err != nil { + return "", err + } + hash := md5.Sum(b) + return hex.EncodeToString(hash[:]), nil } func postPath(pre string, v config.AlertmanagerAPIVersion) string { diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index cf922a537c..97b0274f29 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -26,11 +26,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/alertmanager/api/v2/models" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/config" + _ "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" @@ -49,27 +50,27 @@ func TestPostPath(t *testing.T) { }{ { in: "", - out: "/api/v1/alerts", + out: "/api/v2/alerts", }, { in: "/", - out: "/api/v1/alerts", + out: "/api/v2/alerts", }, { in: "/prefix", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, { in: "/prefix//", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, { in: "prefix//", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, } for _, c := range cases { - require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV1)) + require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV2)) } } @@ -743,7 +744,7 @@ func TestHangingNotifier(t *testing.T) { // Initialize the discovery manager // This is relevant as the updates aren't sent continually in real life, but only each updatert. - // The old implementation of TestHangingNotifier didn't take that into acount. + // The old implementation of TestHangingNotifier didn't take that into account. ctx, cancel := context.WithCancel(context.Background()) defer cancel() reg := prometheus.NewRegistry() @@ -751,7 +752,7 @@ func TestHangingNotifier(t *testing.T) { require.NoError(t, err) sdManager := discovery.NewManager( ctx, - log.NewNopLogger(), + promslog.NewNopLogger(), reg, sdMetrics, discovery.Name("sd-manager"), @@ -1017,3 +1018,107 @@ func TestStop_DrainingEnabled(t *testing.T) { require.Equal(t, int64(2), alertsReceived.Load()) } + +func TestApplyConfig(t *testing.T) { + targetURL := "alertmanager:9093" + targetGroup := &targetgroup.Group{ + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue(targetURL), + }, + }, + } + alertmanagerURL := fmt.Sprintf("http://%s/api/v2/alerts", targetURL) + + n := NewManager(&Options{}, nil) + cfg := &config.Config{} + s := ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json +` + // 1. Ensure known alertmanagers are not dropped during ApplyConfig. + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1) + + // First, apply the config and reload. + require.NoError(t, n.ApplyConfig(cfg)) + tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}} + n.reload(tgs) + require.Len(t, n.Alertmanagers(), 1) + require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) + + // Reapply the config. + require.NoError(t, n.ApplyConfig(cfg)) + // Ensure the known alertmanagers are not dropped. + require.Len(t, n.Alertmanagers(), 1) + require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) + + // 2. Ensure known alertmanagers are not dropped during ApplyConfig even when + // the config order changes. + s = ` +alerting: + alertmanagers: + - static_configs: + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Len(t, n.Alertmanagers(), 1) + // Ensure no unnecessary alertmanagers are injected. + require.Empty(t, n.alertmanagers["config-0"].ams) + // Ensure the config order is taken into account. + ams := n.alertmanagers["config-1"].ams + require.Len(t, ams, 1) + require.Equal(t, alertmanagerURL, ams[0].url().String()) + + // 3. Ensure known alertmanagers are reused for new config with identical AlertmanagerConfig. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Len(t, n.Alertmanagers(), 2) + for cfgIdx := range 2 { + ams := n.alertmanagers[fmt.Sprintf("config-%d", cfgIdx)].ams + require.Len(t, ams, 1) + require.Equal(t, alertmanagerURL, ams[0].url().String()) + } + + // 4. Ensure known alertmanagers are reused only for identical AlertmanagerConfig. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + path_prefix: /bar + - file_sd_configs: + - files: + - foo.json + relabel_configs: + - source_labels: ['__address__'] + regex: 'doesntmatter:1234' + action: drop +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Empty(t, n.Alertmanagers()) +} diff --git a/promql/bench_test.go b/promql/bench_test.go index 74e85b0548..943baceecb 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" @@ -117,7 +119,7 @@ func rangeQueryCases() []benchCase { }, // Holt-Winters and long ranges. { - expr: "holt_winters(a_X[1d], 0.3, 0.3)", + expr: "double_exponential_smoothing(a_X[1d], 0.3, 0.3)", }, { expr: "changes(a_X[1d])", @@ -380,6 +382,126 @@ func BenchmarkNativeHistograms(b *testing.B) { } } +func BenchmarkInfoFunction(b *testing.B) { + // Initialize test storage and generate test series data. + testStorage := teststorage.New(b) + defer testStorage.Close() + + start := time.Unix(0, 0) + end := start.Add(2 * time.Hour) + step := 30 * time.Second + + // Generate time series data for the benchmark. + generateInfoFunctionTestSeries(b, testStorage, 100, 2000, 3600) + + // Define test cases with queries to benchmark. + cases := []struct { + name string + query string + }{ + { + name: "Joining info metrics with other metrics with group_left example 1", + query: "rate(http_server_request_duration_seconds_count[2m]) * on (job, instance) group_left (k8s_cluster_name) target_info{k8s_cluster_name=\"us-east\"}", + }, + { + name: "Joining info metrics with other metrics with info() example 1", + query: `info(rate(http_server_request_duration_seconds_count[2m]), {k8s_cluster_name="us-east"})`, + }, + { + name: "Joining info metrics with other metrics with group_left example 2", + query: "sum by (k8s_cluster_name, http_status_code) (rate(http_server_request_duration_seconds_count[2m]) * on (job, instance) group_left (k8s_cluster_name) target_info)", + }, + { + name: "Joining info metrics with other metrics with info() example 2", + query: `sum by (k8s_cluster_name, http_status_code) (info(rate(http_server_request_duration_seconds_count[2m]), {k8s_cluster_name=~".+"}))`, + }, + } + + // Benchmark each query type. + for _, tc := range cases { + // Initialize the PromQL engine once for all benchmarks. + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 50000000, + Timeout: 100 * time.Second, + EnableAtModifier: true, + EnableNegativeOffset: true, + } + engine := promql.NewEngine(opts) + b.Run(tc.name, func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() // Stop the timer to exclude setup time. + qry, err := engine.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step) + require.NoError(b, err) + b.StartTimer() + result := qry.Exec(context.Background()) + require.NoError(b, result.Err) + } + }) + } + + // Report allocations. + b.ReportAllocs() +} + +// Helper function to generate target_info and http_server_request_duration_seconds_count series for info function benchmarking. +func generateInfoFunctionTestSeries(tb testing.TB, stor *teststorage.TestStorage, infoSeriesNum, interval, numIntervals int) { + tb.Helper() + + ctx := context.Background() + statusCodes := []string{"200", "400", "500"} + + // Generate target_info metrics with instance and job labels, and k8s_cluster_name label. + // Generate http_server_request_duration_seconds_count metrics with instance and job labels, and http_status_code label. + // the classic target_info metrics is gauge type. + metrics := make([]labels.Labels, 0, infoSeriesNum+len(statusCodes)) + for i := 0; i < infoSeriesNum; i++ { + clusterName := "us-east" + if i >= infoSeriesNum/2 { + clusterName = "eu-south" + } + metrics = append(metrics, labels.FromStrings( + "__name__", "target_info", + "instance", "instance"+strconv.Itoa(i), + "job", "job"+strconv.Itoa(i), + "k8s_cluster_name", clusterName, + )) + } + + for _, statusCode := range statusCodes { + metrics = append(metrics, labels.FromStrings( + "__name__", "http_server_request_duration_seconds_count", + "instance", "instance0", + "job", "job0", + "http_status_code", statusCode, + )) + } + + // Append the generated metrics and samples to the storage. + refs := make([]storage.SeriesRef, len(metrics)) + + for i := 0; i < numIntervals; i++ { + a := stor.Appender(context.Background()) + ts := int64(i * interval) + for j, metric := range metrics[:infoSeriesNum] { + ref, _ := a.Append(refs[j], metric, ts, 1) + refs[j] = ref + } + + for j, metric := range metrics[infoSeriesNum:] { + ref, _ := a.Append(refs[j+infoSeriesNum], metric, ts, float64(i)) + refs[j+infoSeriesNum] = ref + } + + require.NoError(tb, a.Commit()) + } + + stor.DB.ForceHeadMMap() // Ensure we have at most one head chunk for every series. + stor.DB.Compact(ctx) +} + func generateNativeHistogramSeries(app storage.Appender, numSeries int) error { commonLabels := []string{labels.MetricName, "native_histogram_series", "foo", "bar"} series := make([][]*histogram.Histogram, numSeries) diff --git a/promql/engine.go b/promql/engine.go index 9454f5b5dc..55ac4be913 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "reflect" "runtime" @@ -30,10 +31,9 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -43,6 +43,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/util/annotations" @@ -125,7 +126,11 @@ type QueryEngine interface { // QueryLogger is an interface that can be used to log all the queries logged // by the engine. type QueryLogger interface { - Log(...interface{}) error + Error(msg string, args ...any) + Info(msg string, args ...any) + Debug(msg string, args ...any) + Warn(msg string, args ...any) + With(args ...any) Close() error } @@ -288,7 +293,7 @@ type QueryTracker interface { // EngineOpts contains configuration options used when creating a new Engine. type EngineOpts struct { - Logger log.Logger + Logger *slog.Logger Reg prometheus.Registerer MaxSamples int Timeout time.Duration @@ -326,7 +331,7 @@ type EngineOpts struct { // Engine handles the lifetime of queries from beginning to end. // It is connected to a querier. type Engine struct { - logger log.Logger + logger *slog.Logger metrics *engineMetrics timeout time.Duration maxSamplesPerQuery int @@ -344,7 +349,7 @@ type Engine struct { // NewEngine returns a new engine. func NewEngine(opts EngineOpts) *Engine { if opts.Logger == nil { - opts.Logger = log.NewNopLogger() + opts.Logger = promslog.NewNopLogger() } queryResultSummary := prometheus.NewSummaryVec(prometheus.SummaryOpts{ @@ -403,7 +408,7 @@ func NewEngine(opts EngineOpts) *Engine { if opts.LookbackDelta == 0 { opts.LookbackDelta = defaultLookbackDelta if l := opts.Logger; l != nil { - level.Debug(l).Log("msg", "Lookback delta is zero, setting to default value", "value", defaultLookbackDelta) + l.Debug("Lookback delta is zero, setting to default value", "value", defaultLookbackDelta) } } @@ -435,6 +440,10 @@ func NewEngine(opts EngineOpts) *Engine { // Close closes ng. func (ng *Engine) Close() error { + if ng == nil { + return nil + } + if ng.activeQueryTracker != nil { return ng.activeQueryTracker.Close() } @@ -451,7 +460,7 @@ func (ng *Engine) SetQueryLogger(l QueryLogger) { // not make reload fail; only log a warning. err := ng.queryLogger.Close() if err != nil { - level.Warn(ng.logger).Log("msg", "Error while closing the previous query log file", "err", err) + ng.logger.Warn("Error while closing the previous query log file", "err", err) } } @@ -629,23 +638,23 @@ func (ng *Engine) exec(ctx context.Context, q *query) (v parser.Value, ws annota // The step provided by the user is in seconds. params["step"] = int64(eq.Interval / (time.Second / time.Nanosecond)) } - f := []interface{}{"params", params} + l.With("params", params) if err != nil { - f = append(f, "error", err) + l.With("error", err) } - f = append(f, "stats", stats.NewQueryStats(q.Stats())) + l.With("stats", stats.NewQueryStats(q.Stats())) if span := trace.SpanFromContext(ctx); span != nil { - f = append(f, "spanID", span.SpanContext().SpanID()) + l.With("spanID", span.SpanContext().SpanID()) } if origin := ctx.Value(QueryOrigin{}); origin != nil { for k, v := range origin.(map[string]interface{}) { - f = append(f, k, v) + l.With(k, v) } } - if err := l.Log(f...); err != nil { - ng.metrics.queryLogFailures.Inc() - level.Error(ng.logger).Log("msg", "can't log query", "err", err) - } + l.Info("promql query logged") + // TODO: @tjhop -- do we still need this metric/error log if logger doesn't return errors? + // ng.metrics.queryLogFailures.Inc() + // ng.logger.Error("can't log query", "err", err) } ng.queryLoggerLock.RUnlock() }() @@ -731,6 +740,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ng.enableDelayedNameRemoval, + querier: querier, } query.sampleStats.InitStepTracking(start, start, 1) @@ -789,6 +799,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ng.enableDelayedNameRemoval, + querier: querier, } query.sampleStats.InitStepTracking(evaluator.startTimestamp, evaluator.endTimestamp, evaluator.interval) val, warnings, err := evaluator.Eval(ctxInnerEval, s.Expr) @@ -906,11 +917,17 @@ func getTimeRangesForSelector(s *parser.EvalStmt, n *parser.VectorSelector, path } if evalRange == 0 { - start -= durationMilliseconds(s.LookbackDelta) + // Reduce the start by one fewer ms than the lookback delta + // because wo want to exclude samples that are precisely the + // lookback delta before the eval time. + start -= durationMilliseconds(s.LookbackDelta) - 1 } else { - // For all matrix queries we want to ensure that we have (end-start) + range selected - // this way we have `range` data before the start time - start -= durationMilliseconds(evalRange) + // For all matrix queries we want to ensure that we have + // (end-start) + range selected this way we have `range` data + // before the start time. We subtract one from the range to + // exclude samples positioned directly at the lower boundary of + // the range. + start -= durationMilliseconds(evalRange) - 1 } offsetMilliseconds := durationMilliseconds(n.OriginalOffset) @@ -995,6 +1012,8 @@ func extractGroupsFromPath(p []parser.Node) (bool, []string) { return false, nil } +// checkAndExpandSeriesSet expands expr's UnexpandedSeriesSet into expr's Series. +// If the Series field is already non-nil, it's a no-op. func checkAndExpandSeriesSet(ctx context.Context, expr parser.Expr) (annotations.Annotations, error) { switch e := expr.(type) { case *parser.MatrixSelector: @@ -1048,11 +1067,12 @@ type evaluator struct { maxSamples int currentSamples int - logger log.Logger + logger *slog.Logger lookbackDelta time.Duration samplesStats *stats.QuerySamples noStepSubqueryIntervalFn func(rangeMillis int64) int64 enableDelayedNameRemoval bool + querier storage.Querier } // errorf causes a panic with the input formatted into an error. @@ -1078,7 +1098,7 @@ func (ev *evaluator) recover(expr parser.Expr, ws *annotations.Annotations, errp buf := make([]byte, 64<<10) buf = buf[:runtime.Stack(buf, false)] - level.Error(ev.logger).Log("msg", "runtime panic during query evaluation", "expr", expr.String(), "err", e, "stacktrace", string(buf)) + ev.logger.Error("runtime panic during query evaluation", "expr", expr.String(), "err", e, "stacktrace", string(buf)) *errp = fmt.Errorf("unexpected error: %w", err) case errWithWarnings: *errp = err.err @@ -1214,38 +1234,17 @@ func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Label ev.currentSamples = tempNumSamples // Gather input vectors for this timestamp. for i := range exprs { - vectors[i] = vectors[i][:0] - + var bh []EvalSeriesHelper + var sh []EvalSeriesHelper if prepSeries != nil { - bufHelpers[i] = bufHelpers[i][:0] - } - - for si, series := range matrixes[i] { - switch { - case len(series.Floats) > 0 && series.Floats[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts, DropName: series.DropName}) - // Move input vectors forward so we don't have to re-scan the same - // past points at the next step. - matrixes[i][si].Floats = series.Floats[1:] - case len(series.Histograms) > 0 && series.Histograms[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts, DropName: series.DropName}) - matrixes[i][si].Histograms = series.Histograms[1:] - default: - continue - } - if prepSeries != nil { - bufHelpers[i] = append(bufHelpers[i], seriesHelpers[i][si]) - } - // Don't add histogram size here because we only - // copy the pointer above, not the whole - // histogram. - ev.currentSamples++ - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } + bh = bufHelpers[i][:0] + sh = seriesHelpers[i] } + vectors[i], bh = ev.gatherVector(ts, matrixes[i], vectors[i], bh, sh) args[i] = vectors[i] - ev.samplesStats.UpdatePeak(ev.currentSamples) + if prepSeries != nil { + bufHelpers[i] = bh + } } // Make the function call. @@ -1446,6 +1445,79 @@ func (ev *evaluator) rangeEvalAgg(ctx context.Context, aggExpr *parser.Aggregate return result, warnings } +// evalSeries generates a Matrix between ev.startTimestamp and ev.endTimestamp (inclusive), each point spaced ev.interval apart, from series given offset. +// For every storage.Series iterator in series, the method iterates in ev.interval sized steps from ev.startTimestamp until and including ev.endTimestamp, +// collecting every corresponding sample (obtained via ev.vectorSelectorSingle) into a Series. +// All of the generated Series are collected into a Matrix, that gets returned. +func (ev *evaluator) evalSeries(ctx context.Context, series []storage.Series, offset time.Duration, recordOrigT bool) Matrix { + numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 + + mat := make(Matrix, 0, len(series)) + var prevSS *Series + it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) + var chkIter chunkenc.Iterator + for _, s := range series { + if err := contextDone(ctx, "expression evaluation"); err != nil { + ev.error(err) + } + + chkIter = s.Iterator(chkIter) + it.Reset(chkIter) + ss := Series{ + Metric: s.Labels(), + } + + for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { + step++ + origT, f, h, ok := ev.vectorSelectorSingle(it, offset, ts) + if !ok { + continue + } + + if h == nil { + ev.currentSamples++ + ev.samplesStats.IncrementSamplesAtStep(step, 1) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if ss.Floats == nil { + ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) + } + if recordOrigT { + // This is an info metric, where we want to track the original sample timestamp. + // Info metric values should be 1 by convention, therefore we can re-use this + // space in the sample. + f = float64(origT) + } + ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) + } else { + if recordOrigT { + ev.error(fmt.Errorf("this should be an info metric, with float samples: %s", ss.Metric)) + } + + point := HPoint{H: h, T: ts} + histSize := point.size() + ev.currentSamples += histSize + ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize)) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if ss.Histograms == nil { + ss.Histograms = reuseOrGetHPointSlices(prevSS, numSteps) + } + ss.Histograms = append(ss.Histograms, point) + } + } + + if len(ss.Floats)+len(ss.Histograms) > 0 { + mat = append(mat, ss) + prevSS = &mat[len(mat)-1] + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + return mat +} + // evalSubquery evaluates given SubqueryExpr and returns an equivalent // evaluated MatrixSelector in its place. Note that the Name and LabelMatchers are not set. func (ev *evaluator) evalSubquery(ctx context.Context, subq *parser.SubqueryExpr) (*parser.MatrixSelector, int, annotations.Annotations) { @@ -1592,6 +1664,8 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, return ev.evalLabelReplace(ctx, e.Args) case "label_join": return ev.evalLabelJoin(ctx, e.Args) + case "info": + return ev.evalInfo(ctx, e.Args) } if !matrixArg { @@ -1850,20 +1924,20 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, }, e.LHS, e.RHS) default: return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh) + vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } case lt == parser.ValueTypeVector && rt == parser.ValueTypeScalar: return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh) + vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) case lt == parser.ValueTypeScalar && rt == parser.ValueTypeVector: return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh) + vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } @@ -1883,56 +1957,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } - mat := make(Matrix, 0, len(e.Series)) - var prevSS *Series - it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) - var chkIter chunkenc.Iterator - for i, s := range e.Series { - if err := contextDone(ctx, "expression evaluation"); err != nil { - ev.error(err) - } - chkIter = s.Iterator(chkIter) - it.Reset(chkIter) - ss := Series{ - Metric: e.Series[i].Labels(), - } - - for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { - step++ - _, f, h, ok := ev.vectorSelectorSingle(it, e, ts) - if ok { - if h == nil { - ev.currentSamples++ - ev.samplesStats.IncrementSamplesAtStep(step, 1) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - if ss.Floats == nil { - ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) - } - ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) - } else { - point := HPoint{H: h, T: ts} - histSize := point.size() - ev.currentSamples += histSize - ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize)) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - if ss.Histograms == nil { - ss.Histograms = reuseOrGetHPointSlices(prevSS, numSteps) - } - ss.Histograms = append(ss.Histograms, point) - } - } - } - - if len(ss.Floats)+len(ss.Histograms) > 0 { - mat = append(mat, ss) - prevSS = &mat[len(mat)-1] - } - } - ev.samplesStats.UpdatePeak(ev.currentSamples) + mat := ev.evalSeries(ctx, e.Series, e.Offset, false) return mat, ws case *parser.MatrixSelector: @@ -1953,6 +1978,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, + querier: ev.querier, } if e.Step != 0 { @@ -1997,6 +2023,7 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, + querier: ev.querier, } res, ws := newEv.eval(ctx, e.Expr) ev.currentSamples = newEv.currentSamples @@ -2083,7 +2110,7 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Co seriesIterators := make([]*storage.MemoizedSeriesIterator, len(vs.Series)) for i, s := range vs.Series { it := s.Iterator(nil) - seriesIterators[i] = storage.NewMemoizedIterator(it, durationMilliseconds(ev.lookbackDelta)) + seriesIterators[i] = storage.NewMemoizedIterator(it, durationMilliseconds(ev.lookbackDelta)-1) } return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { @@ -2097,7 +2124,7 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Co vec := make(Vector, 0, len(vs.Series)) for i, s := range vs.Series { it := seriesIterators[i] - t, _, _, ok := ev.vectorSelectorSingle(it, vs, enh.Ts) + t, _, _, ok := ev.vectorSelectorSingle(it, vs.Offset, enh.Ts) if !ok { continue } @@ -2121,10 +2148,10 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Co } // vectorSelectorSingle evaluates an instant vector for the iterator of one time series. -func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, node *parser.VectorSelector, ts int64) ( +func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, offset time.Duration, ts int64) ( int64, float64, *histogram.FloatHistogram, bool, ) { - refTime := ts - durationMilliseconds(node.Offset) + refTime := ts - durationMilliseconds(offset) var t int64 var v float64 var h *histogram.FloatHistogram @@ -2145,7 +2172,7 @@ func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, no if valueType == chunkenc.ValNone || t > refTime { var ok bool t, v, h, ok = it.PeekPrev() - if !ok || t < refTime-durationMilliseconds(ev.lookbackDelta) { + if !ok || t <= refTime-durationMilliseconds(ev.lookbackDelta) { return 0, 0, nil, false } } @@ -2279,20 +2306,20 @@ func (ev *evaluator) matrixIterSlice( mintFloats, mintHistograms := mint, mint // First floats... - if len(floats) > 0 && floats[len(floats)-1].T >= mint { + if len(floats) > 0 && floats[len(floats)-1].T > mint { // There is an overlap between previous and current ranges, retain common // points. In most such cases: // (a) the overlap is significantly larger than the eval step; and/or // (b) the number of samples is relatively small. // so a linear search will be as fast as a binary search. var drop int - for drop = 0; floats[drop].T < mint; drop++ { + for drop = 0; floats[drop].T <= mint; drop++ { } ev.currentSamples -= drop copy(floats, floats[drop:]) floats = floats[:len(floats)-drop] // Only append points with timestamps after the last timestamp we have. - mintFloats = floats[len(floats)-1].T + 1 + mintFloats = floats[len(floats)-1].T } else { ev.currentSamples -= len(floats) if floats != nil { @@ -2301,14 +2328,14 @@ func (ev *evaluator) matrixIterSlice( } // ...then the same for histograms. TODO(beorn7): Use generics? - if len(histograms) > 0 && histograms[len(histograms)-1].T >= mint { + if len(histograms) > 0 && histograms[len(histograms)-1].T > mint { // There is an overlap between previous and current ranges, retain common // points. In most such cases: // (a) the overlap is significantly larger than the eval step; and/or // (b) the number of samples is relatively small. // so a linear search will be as fast as a binary search. var drop int - for drop = 0; histograms[drop].T < mint; drop++ { + for drop = 0; histograms[drop].T <= mint; drop++ { } // Rotate the buffer around the drop index so that points before mint can be // reused to store new histograms. @@ -2319,7 +2346,7 @@ func (ev *evaluator) matrixIterSlice( histograms = histograms[:len(histograms)-drop] ev.currentSamples -= totalHPointSize(histograms) // Only append points with timestamps after the last timestamp we have. - mintHistograms = histograms[len(histograms)-1].T + 1 + mintHistograms = histograms[len(histograms)-1].T } else { ev.currentSamples -= totalHPointSize(histograms) if histograms != nil { @@ -2343,7 +2370,7 @@ loop: case chunkenc.ValFloatHistogram, chunkenc.ValHistogram: t := buf.AtT() // Values in the buffer are guaranteed to be smaller than maxt. - if t >= mintHistograms { + if t > mintHistograms { if histograms == nil { histograms = getMatrixSelectorHPoints() } @@ -2369,7 +2396,7 @@ loop: continue loop } // Values in the buffer are guaranteed to be smaller than maxt. - if t >= mintFloats { + if t > mintFloats { ev.currentSamples++ if ev.currentSamples > ev.maxSamples { ev.error(ErrTooManySamples(env)) @@ -2504,7 +2531,7 @@ func (ev *evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatchi } // VectorBinop evaluates a binary operation between two Vectors, excluding set operators. -func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *parser.VectorMatching, returnBool bool, lhsh, rhsh []EvalSeriesHelper, enh *EvalNodeHelper) (Vector, error) { +func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *parser.VectorMatching, returnBool bool, lhsh, rhsh []EvalSeriesHelper, enh *EvalNodeHelper, pos posrange.PositionRange) (Vector, error) { if matching.Card == parser.CardManyToMany { panic("many-to-many only allowed for set operators") } @@ -2578,7 +2605,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * fl, fr = fr, fl hl, hr = hr, hl } - floatValue, histogramValue, keep, err := vectorElemBinop(op, fl, fr, hl, hr) + floatValue, histogramValue, keep, err := vectorElemBinop(op, fl, fr, hl, hr, pos) if err != nil { lastErr = err } @@ -2687,7 +2714,7 @@ func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.V } // VectorscalarBinop evaluates a binary operation between a Vector and a Scalar. -func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper) (Vector, error) { +func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper, pos posrange.PositionRange) (Vector, error) { var lastErr error for _, lhsSample := range lhs { lf, rf := lhsSample.F, rhs.V @@ -2699,7 +2726,7 @@ func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scala lf, rf = rf, lf lh, rh = rh, lh } - float, histogram, keep, err := vectorElemBinop(op, lf, rf, lh, rh) + float, histogram, keep, err := vectorElemBinop(op, lf, rf, lh, rh, pos) if err != nil { lastErr = err } @@ -2766,7 +2793,7 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 { } // vectorElemBinop evaluates a binary operation between two Vector elements. -func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram) (float64, *histogram.FloatHistogram, bool, error) { +func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram, pos posrange.PositionRange) (float64, *histogram.FloatHistogram, bool, error) { switch op { case parser.ADD: if hlhs != nil && hrhs != nil { @@ -2776,7 +2803,13 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram } return 0, res.Compact(0), true, nil } - return lhs + rhs, nil, true, nil + if hlhs == nil && hrhs == nil { + return lhs + rhs, nil, true, nil + } + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "+", "float", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "+", "histogram", pos) case parser.SUB: if hlhs != nil && hrhs != nil { res, err := hlhs.Copy().Sub(hrhs) @@ -2785,7 +2818,13 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram } return 0, res.Compact(0), true, nil } - return lhs - rhs, nil, true, nil + if hlhs == nil && hrhs == nil { + return lhs - rhs, nil, true, nil + } + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "-", "float", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "-", "histogram", pos) case parser.MUL: if hlhs != nil && hrhs == nil { return 0, hlhs.Copy().Mul(rhs), true, nil @@ -2793,11 +2832,20 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram if hlhs == nil && hrhs != nil { return 0, hrhs.Copy().Mul(lhs), true, nil } + if hlhs != nil && hrhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "*", "histogram", pos) + } return lhs * rhs, nil, true, nil case parser.DIV: if hlhs != nil && hrhs == nil { return 0, hlhs.Copy().Div(rhs), true, nil } + if hrhs != nil { + if hlhs != nil { + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", "/", "histogram", pos) + } + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", "/", "histogram", pos) + } return lhs / rhs, nil, true, nil case parser.POW: return math.Pow(lhs, rhs), nil, true, nil @@ -2874,7 +2922,15 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix group.hasHistogram = true } case parser.STDVAR, parser.STDDEV: - group.floatValue = 0 + switch { + case h != nil: + // Ignore histograms for STDVAR and STDDEV. + group.seen = false + case math.IsNaN(f), math.IsInf(f, 0): + group.floatValue = math.NaN() + default: + group.floatValue = 0 + } case parser.QUANTILE: group.heap = make(vectorByValueHeap, 1) group.heap[0] = Sample{F: f} @@ -3335,6 +3391,9 @@ func handleVectorBinopError(err error, e *parser.BinaryExpr) annotations.Annotat } metricName := "" pos := e.PositionRange() + if errors.Is(err, annotations.PromQLInfo) || errors.Is(err, annotations.PromQLWarning) { + return annotations.New().Add(err) + } if errors.Is(err, histogram.ErrHistogramsIncompatibleSchema) { return annotations.New().Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, pos)) } else if errors.Is(err, histogram.ErrHistogramsIncompatibleBounds) { @@ -3661,3 +3720,41 @@ func newHistogramStatsSeries(series storage.Series) *histogramStatsSeries { func (s histogramStatsSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { return NewHistogramStatsIterator(s.Series.Iterator(it)) } + +// gatherVector gathers a Vector for ts from the series in input. +// output is used as a buffer. +// If bufHelpers and seriesHelpers are provided, seriesHelpers[i] is appended to bufHelpers for every input index i. +// The gathered Vector and bufHelper are returned. +func (ev *evaluator) gatherVector(ts int64, input Matrix, output Vector, bufHelpers, seriesHelpers []EvalSeriesHelper) (Vector, []EvalSeriesHelper) { + output = output[:0] + for i, series := range input { + switch { + case len(series.Floats) > 0 && series.Floats[0].T == ts: + s := series.Floats[0] + output = append(output, Sample{Metric: series.Metric, F: s.F, T: ts, DropName: series.DropName}) + // Move input vectors forward so we don't have to re-scan the same + // past points at the next step. + input[i].Floats = series.Floats[1:] + case len(series.Histograms) > 0 && series.Histograms[0].T == ts: + s := series.Histograms[0] + output = append(output, Sample{Metric: series.Metric, H: s.H, T: ts, DropName: series.DropName}) + input[i].Histograms = series.Histograms[1:] + default: + continue + } + if len(seriesHelpers) > 0 { + bufHelpers = append(bufHelpers, seriesHelpers[i]) + } + + // Don't add histogram size here because we only + // copy the pointer above, not the whole + // histogram. + ev.currentSamples++ + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + + return output, bufHelpers +} diff --git a/promql/engine_internal_test.go b/promql/engine_internal_test.go index cb501b2fdf..0962c218c7 100644 --- a/promql/engine_internal_test.go +++ b/promql/engine_internal_test.go @@ -14,22 +14,21 @@ package promql import ( + "bytes" "errors" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/util/annotations" ) func TestRecoverEvaluatorRuntime(t *testing.T) { - var output []interface{} - logger := log.Logger(log.LoggerFunc(func(keyvals ...interface{}) error { - output = append(output, keyvals...) - return nil - })) + var output bytes.Buffer + logger := promslog.New(&promslog.Config{Writer: &output}) ev := &evaluator{logger: logger} expr, _ := parser.ParseExpr("sum(up)") @@ -38,7 +37,7 @@ func TestRecoverEvaluatorRuntime(t *testing.T) { defer func() { require.EqualError(t, err, "unexpected error: runtime error: index out of range [123] with length 0") - require.Contains(t, output, "sum(up)") + require.Contains(t, output.String(), "sum(up)") }() defer ev.recover(expr, nil, &err) @@ -48,7 +47,7 @@ func TestRecoverEvaluatorRuntime(t *testing.T) { } func TestRecoverEvaluatorError(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} + ev := &evaluator{logger: promslog.NewNopLogger()} var err error e := errors.New("custom error") @@ -62,7 +61,7 @@ func TestRecoverEvaluatorError(t *testing.T) { } func TestRecoverEvaluatorErrorWithWarnings(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} + ev := &evaluator{logger: promslog.NewNopLogger()} var err error var ws annotations.Annotations diff --git a/promql/engine_test.go b/promql/engine_test.go index 4493a66106..7c398029f5 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -325,271 +325,271 @@ func TestSelectHintsSetCorrectly(t *testing.T) { { query: "foo", start: 10000, expected: []*storage.SelectHints{ - {Start: 5000, End: 10000}, + {Start: 5001, End: 10000}, }, }, { query: "foo @ 15", start: 10000, expected: []*storage.SelectHints{ - {Start: 10000, End: 15000}, + {Start: 10001, End: 15000}, }, }, { query: "foo @ 1", start: 10000, expected: []*storage.SelectHints{ - {Start: -4000, End: 1000}, + {Start: -3999, End: 1000}, }, }, { query: "foo[2m]", start: 200000, expected: []*storage.SelectHints{ - {Start: 80000, End: 200000, Range: 120000}, + {Start: 80001, End: 200000, Range: 120000}, }, }, { query: "foo[2m] @ 180", start: 200000, expected: []*storage.SelectHints{ - {Start: 60000, End: 180000, Range: 120000}, + {Start: 60001, End: 180000, Range: 120000}, }, }, { query: "foo[2m] @ 300", start: 200000, expected: []*storage.SelectHints{ - {Start: 180000, End: 300000, Range: 120000}, + {Start: 180001, End: 300000, Range: 120000}, }, }, { query: "foo[2m] @ 60", start: 200000, expected: []*storage.SelectHints{ - {Start: -60000, End: 60000, Range: 120000}, + {Start: -59999, End: 60000, Range: 120000}, }, }, { query: "foo[2m] offset 2m", start: 300000, expected: []*storage.SelectHints{ - {Start: 60000, End: 180000, Range: 120000}, + {Start: 60001, End: 180000, Range: 120000}, }, }, { query: "foo[2m] @ 200 offset 2m", start: 300000, expected: []*storage.SelectHints{ - {Start: -40000, End: 80000, Range: 120000}, + {Start: -39999, End: 80000, Range: 120000}, }, }, { query: "foo[2m:1s]", start: 300000, expected: []*storage.SelectHints{ - {Start: 175000, End: 300000, Step: 1000}, + {Start: 175001, End: 300000, Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s])", start: 300000, expected: []*storage.SelectHints{ - {Start: 175000, End: 300000, Func: "count_over_time", Step: 1000}, + {Start: 175001, End: 300000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 300)", start: 200000, expected: []*storage.SelectHints{ - {Start: 175000, End: 300000, Func: "count_over_time", Step: 1000}, + {Start: 175001, End: 300000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 200)", start: 200000, expected: []*storage.SelectHints{ - {Start: 75000, End: 200000, Func: "count_over_time", Step: 1000}, + {Start: 75001, End: 200000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 100)", start: 200000, expected: []*storage.SelectHints{ - {Start: -25000, End: 100000, Func: "count_over_time", Step: 1000}, + {Start: -24999, End: 100000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: 165000, End: 290000, Func: "count_over_time", Step: 1000}, + {Start: 165001, End: 290000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time((foo offset 10s)[2m:1s] offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: 155000, End: 280000, Func: "count_over_time", Step: 1000}, + {Start: 155001, End: 280000, Func: "count_over_time", Step: 1000}, }, }, { // When the @ is on the vector selector, the enclosing subquery parameters // don't affect the hint ranges. query: "count_over_time((foo @ 200 offset 10s)[2m:1s] offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: 185000, End: 190000, Func: "count_over_time", Step: 1000}, + {Start: 185001, End: 190000, Func: "count_over_time", Step: 1000}, }, }, { // When the @ is on the vector selector, the enclosing subquery parameters // don't affect the hint ranges. query: "count_over_time((foo @ 200 offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: 185000, End: 190000, Func: "count_over_time", Step: 1000}, + {Start: 185001, End: 190000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time((foo offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: -45000, End: 80000, Func: "count_over_time", Step: 1000}, + {Start: -44999, End: 80000, Func: "count_over_time", Step: 1000}, }, }, { query: "foo", start: 10000, end: 20000, expected: []*storage.SelectHints{ - {Start: 5000, End: 20000, Step: 1000}, + {Start: 5001, End: 20000, Step: 1000}, }, }, { query: "foo @ 15", start: 10000, end: 20000, expected: []*storage.SelectHints{ - {Start: 10000, End: 15000, Step: 1000}, + {Start: 10001, End: 15000, Step: 1000}, }, }, { query: "foo @ 1", start: 10000, end: 20000, expected: []*storage.SelectHints{ - {Start: -4000, End: 1000, Step: 1000}, + {Start: -3999, End: 1000, Step: 1000}, }, }, { query: "rate(foo[2m] @ 180)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 60000, End: 180000, Range: 120000, Func: "rate", Step: 1000}, + {Start: 60001, End: 180000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m] @ 300)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 180000, End: 300000, Range: 120000, Func: "rate", Step: 1000}, + {Start: 180001, End: 300000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m] @ 60)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: -60000, End: 60000, Range: 120000, Func: "rate", Step: 1000}, + {Start: -59999, End: 60000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m])", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 80000, End: 500000, Range: 120000, Func: "rate", Step: 1000}, + {Start: 80001, End: 500000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m] offset 2m)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 60000, End: 380000, Range: 120000, Func: "rate", Step: 1000}, + {Start: 60001, End: 380000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m:1s])", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 175000, End: 500000, Func: "rate", Step: 1000}, + {Start: 175001, End: 500000, Func: "rate", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s])", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 175000, End: 500000, Func: "count_over_time", Step: 1000}, + {Start: 175001, End: 500000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 165000, End: 490000, Func: "count_over_time", Step: 1000}, + {Start: 165001, End: 490000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 300)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 175000, End: 300000, Func: "count_over_time", Step: 1000}, + {Start: 175001, End: 300000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 200)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 75000, End: 200000, Func: "count_over_time", Step: 1000}, + {Start: 75001, End: 200000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 100)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: -25000, End: 100000, Func: "count_over_time", Step: 1000}, + {Start: -24999, End: 100000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time((foo offset 10s)[2m:1s] offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 155000, End: 480000, Func: "count_over_time", Step: 1000}, + {Start: 155001, End: 480000, Func: "count_over_time", Step: 1000}, }, }, { // When the @ is on the vector selector, the enclosing subquery parameters // don't affect the hint ranges. query: "count_over_time((foo @ 200 offset 10s)[2m:1s] offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 185000, End: 190000, Func: "count_over_time", Step: 1000}, + {Start: 185001, End: 190000, Func: "count_over_time", Step: 1000}, }, }, { // When the @ is on the vector selector, the enclosing subquery parameters // don't affect the hint ranges. query: "count_over_time((foo @ 200 offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 185000, End: 190000, Func: "count_over_time", Step: 1000}, + {Start: 185001, End: 190000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time((foo offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: -45000, End: 80000, Func: "count_over_time", Step: 1000}, + {Start: -44999, End: 80000, Func: "count_over_time", Step: 1000}, }, }, { query: "sum by (dim1) (foo)", start: 10000, expected: []*storage.SelectHints{ - {Start: 5000, End: 10000, Func: "sum", By: true, Grouping: []string{"dim1"}}, + {Start: 5001, End: 10000, Func: "sum", By: true, Grouping: []string{"dim1"}}, }, }, { query: "sum without (dim1) (foo)", start: 10000, expected: []*storage.SelectHints{ - {Start: 5000, End: 10000, Func: "sum", Grouping: []string{"dim1"}}, + {Start: 5001, End: 10000, Func: "sum", Grouping: []string{"dim1"}}, }, }, { query: "sum by (dim1) (avg_over_time(foo[1s]))", start: 10000, expected: []*storage.SelectHints{ - {Start: 9000, End: 10000, Func: "avg_over_time", Range: 1000}, + {Start: 9001, End: 10000, Func: "avg_over_time", Range: 1000}, }, }, { query: "sum by (dim1) (max by (dim2) (foo))", start: 10000, expected: []*storage.SelectHints{ - {Start: 5000, End: 10000, Func: "max", By: true, Grouping: []string{"dim2"}}, + {Start: 5001, End: 10000, Func: "max", By: true, Grouping: []string{"dim2"}}, }, }, { query: "(max by (dim1) (foo))[5s:1s]", start: 10000, expected: []*storage.SelectHints{ - {Start: 0, End: 10000, Func: "max", By: true, Grouping: []string{"dim1"}, Step: 1000}, + {Start: 1, End: 10000, Func: "max", By: true, Grouping: []string{"dim1"}, Step: 1000}, }, }, { query: "(sum(http_requests{group=~\"p.*\"})+max(http_requests{group=~\"c.*\"}))[20s:5s]", start: 120000, expected: []*storage.SelectHints{ - {Start: 95000, End: 120000, Func: "sum", By: true, Step: 5000}, - {Start: 95000, End: 120000, Func: "max", By: true, Step: 5000}, + {Start: 95001, End: 120000, Func: "sum", By: true, Step: 5000}, + {Start: 95001, End: 120000, Func: "max", By: true, Step: 5000}, }, }, { query: "foo @ 50 + bar @ 250 + baz @ 900", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 45000, End: 50000, Step: 1000}, - {Start: 245000, End: 250000, Step: 1000}, - {Start: 895000, End: 900000, Step: 1000}, + {Start: 45001, End: 50000, Step: 1000}, + {Start: 245001, End: 250000, Step: 1000}, + {Start: 895001, End: 900000, Step: 1000}, }, }, { query: "foo @ 50 + bar + baz @ 900", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 45000, End: 50000, Step: 1000}, - {Start: 95000, End: 500000, Step: 1000}, - {Start: 895000, End: 900000, Step: 1000}, + {Start: 45001, End: 50000, Step: 1000}, + {Start: 95001, End: 500000, Step: 1000}, + {Start: 895001, End: 900000, Step: 1000}, }, }, { query: "rate(foo[2s] @ 50) + bar @ 250 + baz @ 900", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 48000, End: 50000, Step: 1000, Func: "rate", Range: 2000}, - {Start: 245000, End: 250000, Step: 1000}, - {Start: 895000, End: 900000, Step: 1000}, + {Start: 48001, End: 50000, Step: 1000, Func: "rate", Range: 2000}, + {Start: 245001, End: 250000, Step: 1000}, + {Start: 895001, End: 900000, Step: 1000}, }, }, { query: "rate(foo[2s:1s] @ 50) + bar + baz", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 43000, End: 50000, Step: 1000, Func: "rate"}, - {Start: 95000, End: 500000, Step: 1000}, - {Start: 95000, End: 500000, Step: 1000}, + {Start: 43001, End: 50000, Step: 1000, Func: "rate"}, + {Start: 95001, End: 500000, Step: 1000}, + {Start: 95001, End: 500000, Step: 1000}, }, }, { query: "rate(foo[2s:1s] @ 50) + bar + rate(baz[2m:1s] @ 900 offset 2m) ", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 43000, End: 50000, Step: 1000, Func: "rate"}, - {Start: 95000, End: 500000, Step: 1000}, - {Start: 655000, End: 780000, Step: 1000, Func: "rate"}, + {Start: 43001, End: 50000, Step: 1000, Func: "rate"}, + {Start: 95001, End: 500000, Step: 1000}, + {Start: 655001, End: 780000, Step: 1000, Func: "rate"}, }, }, { // Hints are based on the inner most subquery timestamp. query: `sum_over_time(sum_over_time(metric{job="1"}[100s])[100s:25s] @ 50)[3s:1s] @ 3000`, start: 100000, expected: []*storage.SelectHints{ - {Start: -150000, End: 50000, Range: 100000, Func: "sum_over_time", Step: 25000}, + {Start: -149999, End: 50000, Range: 100000, Func: "sum_over_time", Step: 25000}, }, }, { // Hints are based on the inner most subquery timestamp. query: `sum_over_time(sum_over_time(metric{job="1"}[100s])[100s:25s] @ 3000)[3s:1s] @ 50`, expected: []*storage.SelectHints{ - {Start: 2800000, End: 3000000, Range: 100000, Func: "sum_over_time", Step: 25000}, + {Start: 2800001, End: 3000000, Range: 100000, Func: "sum_over_time", Step: 25000}, }, }, } { @@ -939,22 +939,20 @@ load 10s }, }, { - Query: "max_over_time(metricWith1SampleEvery10Seconds[59s])[20s:5s]", + Query: "max_over_time(metricWith1SampleEvery10Seconds[60s])[20s:5s]", Start: time.Unix(201, 0), PeakSamples: 10, - TotalSamples: 24, // (1 sample / 10 seconds * 60 seconds) * 20/5 (using 59s so we always return 6 samples - // as if we run a query on 00 looking back 60 seconds we will return 7 samples; - // see next test). + TotalSamples: 24, // (1 sample / 10 seconds * 60 seconds) * 4 TotalSamplesPerStep: stats.TotalSamplesPerStep{ 201000: 24, }, }, { - Query: "max_over_time(metricWith1SampleEvery10Seconds[60s])[20s:5s]", + Query: "max_over_time(metricWith1SampleEvery10Seconds[61s])[20s:5s]", Start: time.Unix(201, 0), PeakSamples: 11, TotalSamples: 26, // (1 sample / 10 seconds * 60 seconds) * 4 + 2 as - // max_over_time(metricWith1SampleEvery10Seconds[60s]) @ 190 and 200 will return 7 samples. + // max_over_time(metricWith1SampleEvery10Seconds[61s]) @ 190 and 200 will return 7 samples. TotalSamplesPerStep: stats.TotalSamplesPerStep{ 201000: 26, }, @@ -963,10 +961,9 @@ load 10s Query: "max_over_time(metricWith1HistogramEvery10Seconds[60s])[20s:5s]", Start: time.Unix(201, 0), PeakSamples: 78, - TotalSamples: 338, // (1 histogram (size 13 HPoint) / 10 seconds * 60 seconds) * 4 + 2 * 13 as - // max_over_time(metricWith1SampleEvery10Seconds[60s]) @ 190 and 200 will return 7 samples. + TotalSamples: 312, // (1 histogram (size 13) / 10 seconds * 60 seconds) * 4 TotalSamplesPerStep: stats.TotalSamplesPerStep{ - 201000: 338, + 201000: 312, }, }, { @@ -1431,23 +1428,23 @@ load 10s }, { // The peak samples in memory is during the first evaluation: - // - Subquery takes 22 samples, 11 for each bigmetric, - // - Result is calculated per series where the series samples is buffered, hence 11 more here. + // - Subquery takes 22 samples, 11 for each bigmetric, but samples on the left bound won't be evaluated. + // - Result is calculated per series where the series samples is buffered, hence 10 more here. // - The result of two series is added before the last series buffer is discarded, so 2 more here. - // Hence at peak it is 22 (subquery) + 11 (buffer of a series) + 2 (result from 2 series). + // Hence at peak it is 22 (subquery) + 10 (buffer of a series) + 2 (result from 2 series). // The subquery samples and the buffer is discarded before duplicating. Query: `rate(bigmetric[10s:1s] @ 10)`, - MaxSamples: 35, + MaxSamples: 34, Start: time.Unix(0, 0), End: time.Unix(10, 0), Interval: 5 * time.Second, }, { // Here the reasoning is same as above. But LHS and RHS are done one after another. - // So while one of them takes 35 samples at peak, we need to hold the 2 sample + // So while one of them takes 34 samples at peak, we need to hold the 2 sample // result of the other till then. Query: `rate(bigmetric[10s:1s] @ 10) + rate(bigmetric[10s:1s] @ 30)`, - MaxSamples: 37, + MaxSamples: 36, Start: time.Unix(0, 0), End: time.Unix(10, 0), Interval: 5 * time.Second, @@ -1456,25 +1453,25 @@ load 10s // promql.Sample as above but with only 1 part as step invariant. // Here the peak is caused by the non-step invariant part as it touches more time range. // Hence at peak it is 2*21 (subquery from 0s to 20s) - // + 11 (buffer of a series per evaluation) + // + 10 (buffer of a series per evaluation) // + 6 (result from 2 series at 3 eval times). Query: `rate(bigmetric[10s:1s]) + rate(bigmetric[10s:1s] @ 30)`, - MaxSamples: 59, + MaxSamples: 58, Start: time.Unix(10, 0), End: time.Unix(20, 0), Interval: 5 * time.Second, }, { // Nested subquery. - // We saw that innermost rate takes 35 samples which is still the peak + // We saw that innermost rate takes 34 samples which is still the peak // since the other two subqueries just duplicate the result. Query: `rate(rate(bigmetric[10s:1s] @ 10)[100s:25s] @ 1000)[100s:20s] @ 2000`, - MaxSamples: 35, + MaxSamples: 34, Start: time.Unix(10, 0), }, { // Nested subquery. - // Now the outmost subquery produces more samples than inner most rate. + // Now the outermost subquery produces more samples than inner most rate. Query: `rate(rate(bigmetric[10s:1s] @ 10)[100s:25s] @ 1000)[17s:1s] @ 2000`, MaxSamples: 36, Start: time.Unix(10, 0), @@ -1583,11 +1580,11 @@ load 1ms start: 10, result: promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 28, T: 280000}, {F: 29, T: 290000}, {F: 30, T: 300000}}, + Floats: []promql.FPoint{{F: 29, T: 290000}, {F: 30, T: 300000}}, Metric: lbls1, }, promql.Series{ - Floats: []promql.FPoint{{F: 56, T: 280000}, {F: 58, T: 290000}, {F: 60, T: 300000}}, + Floats: []promql.FPoint{{F: 58, T: 290000}, {F: 60, T: 300000}}, Metric: lbls2, }, }, @@ -1596,7 +1593,7 @@ load 1ms start: 100, result: promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 3, T: -2000}, {F: 2, T: -1000}, {F: 1, T: 0}}, + Floats: []promql.FPoint{{F: 2, T: -1000}, {F: 1, T: 0}}, Metric: lblsneg, }, }, @@ -1605,7 +1602,7 @@ load 1ms start: 100, result: promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 504, T: -503000}, {F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}}, + Floats: []promql.FPoint{{F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}}, Metric: lblsneg, }, }, @@ -1614,7 +1611,7 @@ load 1ms start: 100, result: promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 2342, T: 2342}, {F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}}, + Floats: []promql.FPoint{{F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}}, Metric: lblsms, }, }, @@ -2021,23 +2018,58 @@ func TestSubquerySelector(t *testing.T) { type FakeQueryLogger struct { closed bool logs []interface{} + attrs []any } func NewFakeQueryLogger() *FakeQueryLogger { return &FakeQueryLogger{ closed: false, logs: make([]interface{}, 0), + attrs: make([]any, 0), } } +// It implements the promql.QueryLogger interface. func (f *FakeQueryLogger) Close() error { f.closed = true return nil } -func (f *FakeQueryLogger) Log(l ...interface{}) error { - f.logs = append(f.logs, l...) - return nil +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Info(msg string, args ...any) { + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Error(msg string, args ...any) { + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Warn(msg string, args ...any) { + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Debug(msg string, args ...any) { + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) With(args ...any) { + f.attrs = append(f.attrs, args...) } func TestQueryLogger_basic(t *testing.T) { @@ -2065,9 +2097,8 @@ func TestQueryLogger_basic(t *testing.T) { f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) queryExec() - for i, field := range []interface{}{"params", map[string]interface{}{"query": "test statement"}} { - require.Equal(t, field, f1.logs[i]) - } + require.Contains(t, f1.logs, `params`) + require.Contains(t, f1.logs, map[string]interface{}{"query": "test statement"}) l := len(f1.logs) queryExec() @@ -2113,11 +2144,8 @@ func TestQueryLogger_fields(t *testing.T) { res := query.Exec(ctx) require.NoError(t, res.Err) - expected := []string{"foo", "bar"} - for i, field := range expected { - v := f1.logs[len(f1.logs)-len(expected)+i].(string) - require.Equal(t, field, v) - } + require.Contains(t, f1.logs, `foo`) + require.Contains(t, f1.logs, `bar`) } func TestQueryLogger_error(t *testing.T) { @@ -2143,9 +2171,10 @@ func TestQueryLogger_error(t *testing.T) { res := query.Exec(ctx) require.Error(t, res.Err, "query should have failed") - for i, field := range []interface{}{"params", map[string]interface{}{"query": "test statement"}, "error", testErr} { - require.Equal(t, f1.logs[i], field) - } + require.Contains(t, f1.logs, `params`) + require.Contains(t, f1.logs, map[string]interface{}{"query": "test statement"}) + require.Contains(t, f1.logs, `error`) + require.Contains(t, f1.logs, testErr) } func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) { @@ -3023,6 +3052,29 @@ func TestEngineOptsValidation(t *testing.T) { } } +func TestEngine_Close(t *testing.T) { + t.Run("nil engine", func(t *testing.T) { + var ng *promql.Engine + require.NoError(t, ng.Close()) + }) + + t.Run("non-nil engine", func(t *testing.T) { + ng := promql.NewEngine(promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 0, + Timeout: 100 * time.Second, + NoStepSubqueryIntervalFn: nil, + EnableAtModifier: true, + EnableNegativeOffset: true, + EnablePerStepStats: false, + LookbackDelta: 0, + EnableDelayedNameRemoval: true, + }) + require.NoError(t, ng.Close()) + }) +} + func TestInstantQueryWithRangeVectorSelector(t *testing.T) { engine := newTestEngine(t) @@ -3041,7 +3093,7 @@ func TestInstantQueryWithRangeVectorSelector(t *testing.T) { ts time.Time }{ "matches series with points in range": { - expr: "some_metric[1m]", + expr: "some_metric[2m]", ts: baseT.Add(2 * time.Minute), expected: promql.Matrix{ { @@ -3077,7 +3129,6 @@ func TestInstantQueryWithRangeVectorSelector(t *testing.T) { { Metric: labels.FromStrings("__name__", "some_metric_with_stale_marker"), Floats: []promql.FPoint{ - {T: timestamp.FromTime(baseT), F: 0}, {T: timestamp.FromTime(baseT.Add(time.Minute)), F: 1}, {T: timestamp.FromTime(baseT.Add(3 * time.Minute)), F: 3}, }, @@ -3099,6 +3150,217 @@ func TestInstantQueryWithRangeVectorSelector(t *testing.T) { } } +func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { + // TODO(codesome): Integrate histograms into the PromQL testing framework + // and write more tests there. + cases := []struct { + histograms []histogram.Histogram + expected histogram.FloatHistogram + expectedAvg histogram.FloatHistogram + }{ + { + histograms: []histogram.Histogram{ + { + CounterResetHint: histogram.GaugeType, + Schema: 0, + Count: 25, + Sum: 1234.5, + ZeroThreshold: 0.001, + ZeroCount: 4, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 1, -1, 0}, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 2, Length: 2}, + }, + NegativeBuckets: []int64{2, 2, -3, 8}, + }, + { + CounterResetHint: histogram.GaugeType, + Schema: 0, + Count: 41, + Sum: 2345.6, + ZeroThreshold: 0.001, + ZeroCount: 5, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 4}, + {Offset: 0, Length: 0}, + {Offset: 0, Length: 3}, + }, + PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 0}, + NegativeSpans: []histogram.Span{ + {Offset: 1, Length: 4}, + {Offset: 2, Length: 0}, + {Offset: 2, Length: 3}, + }, + NegativeBuckets: []int64{1, 3, -2, 5, -2, 0, -3}, + }, + { + CounterResetHint: histogram.GaugeType, + Schema: 0, + Count: 41, + Sum: 1111.1, + ZeroThreshold: 0.001, + ZeroCount: 5, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 4}, + {Offset: 0, Length: 0}, + {Offset: 0, Length: 3}, + }, + PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 0}, + NegativeSpans: []histogram.Span{ + {Offset: 1, Length: 4}, + {Offset: 2, Length: 0}, + {Offset: 2, Length: 3}, + }, + NegativeBuckets: []int64{1, 3, -2, 5, -2, 0, -3}, + }, + { + CounterResetHint: histogram.GaugeType, + Schema: 1, // Everything is 0 just to make the count 4 so avg has nicer numbers. + }, + }, + expected: histogram.FloatHistogram{ + CounterResetHint: histogram.GaugeType, + Schema: 0, + ZeroThreshold: 0.001, + ZeroCount: 14, + Count: 107, + Sum: 4691.2, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 7}, + }, + PositiveBuckets: []float64{3, 8, 2, 5, 3, 2, 2}, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 6}, + {Offset: 3, Length: 3}, + }, + NegativeBuckets: []float64{2, 6, 8, 4, 15, 9, 10, 10, 4}, + }, + expectedAvg: histogram.FloatHistogram{ + CounterResetHint: histogram.GaugeType, + Schema: 0, + ZeroThreshold: 0.001, + ZeroCount: 3.5, + Count: 26.75, + Sum: 1172.8, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 7}, + }, + PositiveBuckets: []float64{0.75, 2, 0.5, 1.25, 0.75, 0.5, 0.5}, + NegativeSpans: []histogram.Span{ + {Offset: 0, Length: 6}, + {Offset: 3, Length: 3}, + }, + NegativeBuckets: []float64{0.5, 1.5, 2, 1, 3.75, 2.25, 2.5, 2.5, 1}, + }, + }, + } + + idx0 := int64(0) + for _, c := range cases { + for _, floatHisto := range []bool{true, false} { + t.Run(fmt.Sprintf("floatHistogram=%t %d", floatHisto, idx0), func(t *testing.T) { + storage := teststorage.New(t) + t.Cleanup(func() { storage.Close() }) + + seriesName := "sparse_histogram_series" + seriesNameOverTime := "sparse_histogram_series_over_time" + + engine := newTestEngine(t) + + ts := idx0 * int64(10*time.Minute/time.Millisecond) + app := storage.Appender(context.Background()) + _, err := app.Append(0, labels.FromStrings("__name__", "float_series", "idx", "0"), ts, 42) + require.NoError(t, err) + for idx1, h := range c.histograms { + lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) + // Since we mutate h later, we need to create a copy here. + var err error + if floatHisto { + _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat(nil)) + } else { + _, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil) + } + require.NoError(t, err) + + lbls = labels.FromStrings("__name__", seriesNameOverTime) + newTs := ts + int64(idx1)*int64(time.Minute/time.Millisecond) + // Since we mutate h later, we need to create a copy here. + if floatHisto { + _, err = app.AppendHistogram(0, lbls, newTs, nil, h.Copy().ToFloat(nil)) + } else { + _, err = app.AppendHistogram(0, lbls, newTs, h.Copy(), nil) + } + require.NoError(t, err) + } + require.NoError(t, app.Commit()) + + queryAndCheck := func(queryString string, ts int64, exp promql.Vector) { + qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) + require.NoError(t, err) + + res := qry.Exec(context.Background()) + require.NoError(t, res.Err) + require.Empty(t, res.Warnings) + + vector, err := res.Vector() + require.NoError(t, err) + + testutil.RequireEqual(t, exp, vector) + } + queryAndCheckAnnotations := func(queryString string, ts int64, expWarnings annotations.Annotations) { + qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) + require.NoError(t, err) + + res := qry.Exec(context.Background()) + require.NoError(t, res.Err) + require.Equal(t, expWarnings, res.Warnings) + } + + // sum(). + queryString := fmt.Sprintf("sum(%s)", seriesName) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) + + queryString = `sum({idx="0"})` + var annos annotations.Annotations + annos.Add(annotations.NewMixedFloatsHistogramsAggWarning(posrange.PositionRange{Start: 4, End: 13})) + queryAndCheckAnnotations(queryString, ts, annos) + + // + operator. + queryString = fmt.Sprintf(`%s{idx="0"}`, seriesName) + for idx := 1; idx < len(c.histograms); idx++ { + queryString += fmt.Sprintf(` + ignoring(idx) %s{idx="%d"}`, seriesName, idx) + } + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) + + // count(). + queryString = fmt.Sprintf("count(%s)", seriesName) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, F: 4, Metric: labels.EmptyLabels()}}) + + // avg(). + queryString = fmt.Sprintf("avg(%s)", seriesName) + queryAndCheck(queryString, ts, []promql.Sample{{T: ts, H: &c.expectedAvg, Metric: labels.EmptyLabels()}}) + + offset := int64(len(c.histograms) - 1) + newTs := ts + offset*int64(time.Minute/time.Millisecond) + + // sum_over_time(). + queryString = fmt.Sprintf("sum_over_time(%s[%dm:1m])", seriesNameOverTime, offset+1) + queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expected, Metric: labels.EmptyLabels(), DropName: true}}) + + // avg_over_time(). + queryString = fmt.Sprintf("avg_over_time(%s[%dm:1m])", seriesNameOverTime, offset+1) + queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expectedAvg, Metric: labels.EmptyLabels(), DropName: true}}) + }) + idx0++ + } + } +} + func TestNativeHistogram_SubOperator(t *testing.T) { // TODO(codesome): Integrate histograms into the PromQL testing framework // and write more tests there. @@ -3351,43 +3613,43 @@ metric 0 1 2 }{ { name: "default lookback delta", - ts: lastDatapointTs.Add(defaultLookbackDelta), + ts: lastDatapointTs.Add(defaultLookbackDelta - time.Millisecond), expectSamples: true, }, { name: "outside default lookback delta", - ts: lastDatapointTs.Add(defaultLookbackDelta + time.Millisecond), + ts: lastDatapointTs.Add(defaultLookbackDelta), expectSamples: false, }, { name: "custom engine lookback delta", - ts: lastDatapointTs.Add(10 * time.Minute), + ts: lastDatapointTs.Add(10*time.Minute - time.Millisecond), engineLookback: 10 * time.Minute, expectSamples: true, }, { name: "outside custom engine lookback delta", - ts: lastDatapointTs.Add(10*time.Minute + time.Millisecond), + ts: lastDatapointTs.Add(10 * time.Minute), engineLookback: 10 * time.Minute, expectSamples: false, }, { name: "custom query lookback delta", - ts: lastDatapointTs.Add(20 * time.Minute), + ts: lastDatapointTs.Add(20*time.Minute - time.Millisecond), engineLookback: 10 * time.Minute, queryLookback: 20 * time.Minute, expectSamples: true, }, { name: "outside custom query lookback delta", - ts: lastDatapointTs.Add(20*time.Minute + time.Millisecond), + ts: lastDatapointTs.Add(20 * time.Minute), engineLookback: 10 * time.Minute, queryLookback: 20 * time.Minute, expectSamples: false, }, { name: "negative custom query lookback delta", - ts: lastDatapointTs.Add(20 * time.Minute), + ts: lastDatapointTs.Add(20*time.Minute - time.Millisecond), engineLookback: -10 * time.Minute, queryLookback: 20 * time.Minute, expectSamples: true, @@ -3454,18 +3716,18 @@ histogram {{sum:4 count:4 buckets:[2 2]}} {{sum:6 count:6 buckets:[3 3]}} {{sum: } } - qry, err := engine.NewRangeQuery(context.Background(), storage, nil, "increase(histogram[60s])", time.Unix(0, 0), time.Unix(0, 0).Add(1*time.Minute), time.Minute) + qry, err := engine.NewRangeQuery(context.Background(), storage, nil, "increase(histogram[90s])", time.Unix(0, 0), time.Unix(0, 0).Add(60*time.Second), time.Minute) require.NoError(t, err) verify(t, qry, []histogram.FloatHistogram{ { - Count: 2, - Sum: 2, // Increase from 4 to 6 is 2. + Count: 3, + Sum: 3, // Increase from 4 to 6 is 2. Interpolation adds 1. PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, // Two buckets changed between the first and second histogram. - PositiveBuckets: []float64{1, 1}, // Increase from 2 to 3 is 1 in both buckets. + PositiveBuckets: []float64{1.5, 1.5}, // Increase from 2 to 3 is 1 in both buckets. Interpolation adds 0.5. }, }) - qry, err = engine.NewInstantQuery(context.Background(), storage, nil, "histogram[60s]", time.Unix(0, 0).Add(2*time.Minute)) + qry, err = engine.NewInstantQuery(context.Background(), storage, nil, "histogram[61s]", time.Unix(0, 0).Add(2*time.Minute)) require.NoError(t, err) verify(t, qry, []histogram.FloatHistogram{ { diff --git a/promql/functions.go b/promql/functions.go index 182b69b080..e93a4cdc5b 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -350,7 +350,7 @@ func calcTrendValue(i int, tf, s0, s1, b float64) float64 { // data. A lower smoothing factor increases the influence of historical data. The trend factor (0 < tf < 1) affects // how trends in historical data will affect the current data. A higher trend factor increases the influence. // of trends. Algorithm taken from https://en.wikipedia.org/wiki/Exponential_smoothing titled: "Double exponential smoothing". -func funcHoltWinters(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { +func funcDoubleExponentialSmoothing(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { samples := vals[0].(Matrix)[0] // The smoothing factor argument. @@ -415,22 +415,12 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(a.Metric, b.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -442,7 +432,8 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode return +1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil @@ -450,22 +441,12 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(b.Metric, a.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -477,7 +458,8 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval return -1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return -labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil @@ -551,6 +533,10 @@ func funcRound(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper toNearestInverse := 1.0 / toNearest for _, el := range vec { + if el.H != nil { + // Process only float samples. + continue + } f := math.Floor(el.F*toNearestInverse+0.5) / toNearestInverse enh.Out = append(enh.Out, Sample{ Metric: el.Metric, @@ -1480,7 +1466,7 @@ func (ev *evaluator) evalLabelReplace(ctx context.Context, args parser.Expressio regexStr = stringFromArg(args[4]) ) - regex, err := regexp.Compile("^(?:" + regexStr + ")$") + regex, err := regexp.Compile("^(?s:" + regexStr + ")$") if err != nil { panic(fmt.Errorf("invalid regular expression in label_replace(): %s", regexStr)) } @@ -1514,11 +1500,6 @@ func (ev *evaluator) evalLabelReplace(ctx context.Context, args parser.Expressio return matrix, ws } -// === label_replace(Vector parser.ValueTypeVector, dst_label, replacement, src_labelname, regex parser.ValueTypeString) (Vector, Annotations) === -func funcLabelReplace(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - panic("funcLabelReplace wrong implementation called") -} - // === Vector(s Scalar) (Vector, Annotations) === func funcVector(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return append(enh.Out, @@ -1570,11 +1551,6 @@ func (ev *evaluator) evalLabelJoin(ctx context.Context, args parser.Expressions) return matrix, ws } -// === label_join(vector model.ValVector, dest_labelname, separator, src_labelname...) (Vector, Annotations) === -func funcLabelJoin(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - panic("funcLabelReplace wrong implementation called") -} - // Common code for date related functions. func dateWrapper(vals []parser.Value, enh *EvalNodeHelper, f func(time.Time) float64) Vector { if len(vals) == 0 { @@ -1657,82 +1633,83 @@ func funcYear(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) // FunctionCalls is a list of all functions supported by PromQL, including their types. var FunctionCalls = map[string]FunctionCall{ - "abs": funcAbs, - "absent": funcAbsent, - "absent_over_time": funcAbsentOverTime, - "acos": funcAcos, - "acosh": funcAcosh, - "asin": funcAsin, - "asinh": funcAsinh, - "atan": funcAtan, - "atanh": funcAtanh, - "avg_over_time": funcAvgOverTime, - "ceil": funcCeil, - "changes": funcChanges, - "clamp": funcClamp, - "clamp_max": funcClampMax, - "clamp_min": funcClampMin, - "cos": funcCos, - "cosh": funcCosh, - "count_over_time": funcCountOverTime, - "days_in_month": funcDaysInMonth, - "day_of_month": funcDayOfMonth, - "day_of_week": funcDayOfWeek, - "day_of_year": funcDayOfYear, - "deg": funcDeg, - "delta": funcDelta, - "deriv": funcDeriv, - "exp": funcExp, - "floor": funcFloor, - "histogram_avg": funcHistogramAvg, - "histogram_count": funcHistogramCount, - "histogram_fraction": funcHistogramFraction, - "histogram_quantile": funcHistogramQuantile, - "histogram_sum": funcHistogramSum, - "histogram_stddev": funcHistogramStdDev, - "histogram_stdvar": funcHistogramStdVar, - "holt_winters": funcHoltWinters, - "hour": funcHour, - "idelta": funcIdelta, - "increase": funcIncrease, - "irate": funcIrate, - "label_replace": funcLabelReplace, - "label_join": funcLabelJoin, - "ln": funcLn, - "log10": funcLog10, - "log2": funcLog2, - "last_over_time": funcLastOverTime, - "mad_over_time": funcMadOverTime, - "max_over_time": funcMaxOverTime, - "min_over_time": funcMinOverTime, - "minute": funcMinute, - "month": funcMonth, - "pi": funcPi, - "predict_linear": funcPredictLinear, - "present_over_time": funcPresentOverTime, - "quantile_over_time": funcQuantileOverTime, - "rad": funcRad, - "rate": funcRate, - "resets": funcResets, - "round": funcRound, - "scalar": funcScalar, - "sgn": funcSgn, - "sin": funcSin, - "sinh": funcSinh, - "sort": funcSort, - "sort_desc": funcSortDesc, - "sort_by_label": funcSortByLabel, - "sort_by_label_desc": funcSortByLabelDesc, - "sqrt": funcSqrt, - "stddev_over_time": funcStddevOverTime, - "stdvar_over_time": funcStdvarOverTime, - "sum_over_time": funcSumOverTime, - "tan": funcTan, - "tanh": funcTanh, - "time": funcTime, - "timestamp": funcTimestamp, - "vector": funcVector, - "year": funcYear, + "abs": funcAbs, + "absent": funcAbsent, + "absent_over_time": funcAbsentOverTime, + "acos": funcAcos, + "acosh": funcAcosh, + "asin": funcAsin, + "asinh": funcAsinh, + "atan": funcAtan, + "atanh": funcAtanh, + "avg_over_time": funcAvgOverTime, + "ceil": funcCeil, + "changes": funcChanges, + "clamp": funcClamp, + "clamp_max": funcClampMax, + "clamp_min": funcClampMin, + "cos": funcCos, + "cosh": funcCosh, + "count_over_time": funcCountOverTime, + "days_in_month": funcDaysInMonth, + "day_of_month": funcDayOfMonth, + "day_of_week": funcDayOfWeek, + "day_of_year": funcDayOfYear, + "deg": funcDeg, + "delta": funcDelta, + "deriv": funcDeriv, + "exp": funcExp, + "floor": funcFloor, + "histogram_avg": funcHistogramAvg, + "histogram_count": funcHistogramCount, + "histogram_fraction": funcHistogramFraction, + "histogram_quantile": funcHistogramQuantile, + "histogram_sum": funcHistogramSum, + "histogram_stddev": funcHistogramStdDev, + "histogram_stdvar": funcHistogramStdVar, + "double_exponential_smoothing": funcDoubleExponentialSmoothing, + "hour": funcHour, + "idelta": funcIdelta, + "increase": funcIncrease, + "info": nil, + "irate": funcIrate, + "label_replace": nil, // evalLabelReplace not called via this map. + "label_join": nil, // evalLabelJoin not called via this map. + "ln": funcLn, + "log10": funcLog10, + "log2": funcLog2, + "last_over_time": funcLastOverTime, + "mad_over_time": funcMadOverTime, + "max_over_time": funcMaxOverTime, + "min_over_time": funcMinOverTime, + "minute": funcMinute, + "month": funcMonth, + "pi": funcPi, + "predict_linear": funcPredictLinear, + "present_over_time": funcPresentOverTime, + "quantile_over_time": funcQuantileOverTime, + "rad": funcRad, + "rate": funcRate, + "resets": funcResets, + "round": funcRound, + "scalar": funcScalar, + "sgn": funcSgn, + "sin": funcSin, + "sinh": funcSinh, + "sort": funcSort, + "sort_desc": funcSortDesc, + "sort_by_label": funcSortByLabel, + "sort_by_label_desc": funcSortByLabelDesc, + "sqrt": funcSqrt, + "stddev_over_time": funcStddevOverTime, + "stdvar_over_time": funcStdvarOverTime, + "sum_over_time": funcSumOverTime, + "tan": funcTan, + "tanh": funcTanh, + "time": funcTime, + "timestamp": funcTimestamp, + "vector": funcVector, + "year": funcYear, } // AtModifierUnsafeFunctions are the functions whose result diff --git a/promql/fuzz.go b/promql/fuzz.go index 5f08e6a72c..759055fb0d 100644 --- a/promql/fuzz.go +++ b/promql/fuzz.go @@ -61,17 +61,13 @@ const ( var symbolTable = labels.NewSymbolTable() func fuzzParseMetricWithContentType(in []byte, contentType string) int { - p, warning := textparse.New(in, contentType, false, symbolTable) - if warning != nil { + p, warning := textparse.New(in, contentType, "", false, false, symbolTable) + if p == nil || warning != nil { // An invalid content type is being passed, which should not happen // in this context. panic(warning) } - if contentType == "application/openmetrics-text" { - p = textparse.NewOpenMetricsParser(in, symbolTable) - } - var err error for { _, err = p.Next() @@ -95,7 +91,7 @@ func fuzzParseMetricWithContentType(in []byte, contentType string) int { // Note that this is not the parser for the text-based exposition-format; that // lives in github.com/prometheus/client_golang/text. func FuzzParseMetric(in []byte) int { - return fuzzParseMetricWithContentType(in, "") + return fuzzParseMetricWithContentType(in, "text/plain") } func FuzzParseOpenMetric(in []byte) int { diff --git a/promql/fuzz_test.go b/promql/fuzz_test.go index 1f0bbaa662..4a26798ded 100644 --- a/promql/fuzz_test.go +++ b/promql/fuzz_test.go @@ -29,7 +29,7 @@ func TestfuzzParseMetricWithContentTypePanicOnInvalid(t *testing.T) { } else { err, ok := p.(error) require.True(t, ok) - require.Contains(t, err.Error(), "duplicate parameter name") + require.ErrorContains(t, err, "duplicate parameter name") } }() diff --git a/promql/info.go b/promql/info.go new file mode 100644 index 0000000000..1a9f7eb18e --- /dev/null +++ b/promql/info.go @@ -0,0 +1,454 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + + "github.com/grafana/regexp" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/annotations" +) + +const targetInfo = "target_info" + +// identifyingLabels are the labels we consider as identifying for info metrics. +// Currently hard coded, so we don't need knowledge of individual info metrics. +var identifyingLabels = []string{"instance", "job"} + +// evalInfo implements the info PromQL function. +func (ev *evaluator) evalInfo(ctx context.Context, args parser.Expressions) (parser.Value, annotations.Annotations) { + val, annots := ev.eval(ctx, args[0]) + mat := val.(Matrix) + // Map from data label name to matchers. + dataLabelMatchers := map[string][]*labels.Matcher{} + var infoNameMatchers []*labels.Matcher + if len(args) > 1 { + // TODO: Introduce a dedicated LabelSelector type. + labelSelector := args[1].(*parser.VectorSelector) + for _, m := range labelSelector.LabelMatchers { + dataLabelMatchers[m.Name] = append(dataLabelMatchers[m.Name], m) + if m.Name == labels.MetricName { + infoNameMatchers = append(infoNameMatchers, m) + } + } + } else { + infoNameMatchers = []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, targetInfo)} + } + + // Don't try to enrich info series. + ignoreSeries := map[int]struct{}{} +loop: + for i, s := range mat { + name := s.Metric.Get(labels.MetricName) + for _, m := range infoNameMatchers { + if m.Matches(name) { + ignoreSeries[i] = struct{}{} + continue loop + } + } + } + + selectHints := ev.infoSelectHints(args[0]) + infoSeries, ws, err := ev.fetchInfoSeries(ctx, mat, ignoreSeries, dataLabelMatchers, selectHints) + if err != nil { + ev.error(err) + } + annots.Merge(ws) + + res, ws := ev.combineWithInfoSeries(ctx, mat, infoSeries, ignoreSeries, dataLabelMatchers) + annots.Merge(ws) + return res, annots +} + +// infoSelectHints calculates the storage.SelectHints for selecting info series, given expr (first argument to info call). +func (ev *evaluator) infoSelectHints(expr parser.Expr) storage.SelectHints { + var nodeTimestamp *int64 + var offset int64 + parser.Inspect(expr, func(node parser.Node, path []parser.Node) error { + switch n := node.(type) { + case *parser.VectorSelector: + if n.Timestamp != nil { + nodeTimestamp = n.Timestamp + } + offset = durationMilliseconds(n.OriginalOffset) + return fmt.Errorf("end traversal") + default: + return nil + } + }) + + start := ev.startTimestamp + end := ev.endTimestamp + if nodeTimestamp != nil { + // The timestamp on the selector overrides everything. + start = *nodeTimestamp + end = *nodeTimestamp + } + // Reduce the start by one fewer ms than the lookback delta + // because wo want to exclude samples that are precisely the + // lookback delta before the eval time. + start -= durationMilliseconds(ev.lookbackDelta) - 1 + start -= offset + end -= offset + + return storage.SelectHints{ + Start: start, + End: end, + Step: ev.interval, + Func: "info", + } +} + +// fetchInfoSeries fetches info series given matching identifying labels in mat. +// Series in ignoreSeries are not fetched. +// dataLabelMatchers may be mutated. +func (ev *evaluator) fetchInfoSeries(ctx context.Context, mat Matrix, ignoreSeries map[int]struct{}, dataLabelMatchers map[string][]*labels.Matcher, selectHints storage.SelectHints) (Matrix, annotations.Annotations, error) { + // A map of values for all identifying labels we are interested in. + idLblValues := map[string]map[string]struct{}{} + for i, s := range mat { + if _, exists := ignoreSeries[i]; exists { + continue + } + + // Register relevant values per identifying label for this series. + for _, l := range identifyingLabels { + val := s.Metric.Get(l) + if val == "" { + continue + } + + if idLblValues[l] == nil { + idLblValues[l] = map[string]struct{}{} + } + idLblValues[l][val] = struct{}{} + } + } + if len(idLblValues) == 0 { + return nil, nil, nil + } + + // Generate regexps for every interesting value per identifying label. + var sb strings.Builder + idLblRegexps := make(map[string]string, len(idLblValues)) + for name, vals := range idLblValues { + sb.Reset() + i := 0 + for v := range vals { + if i > 0 { + sb.WriteRune('|') + } + sb.WriteString(regexp.QuoteMeta(v)) + i++ + } + idLblRegexps[name] = sb.String() + } + + var infoLabelMatchers []*labels.Matcher + for name, re := range idLblRegexps { + infoLabelMatchers = append(infoLabelMatchers, labels.MustNewMatcher(labels.MatchRegexp, name, re)) + } + var nameMatcher *labels.Matcher + for name, ms := range dataLabelMatchers { + for i, m := range ms { + if m.Name == labels.MetricName { + nameMatcher = m + ms = slices.Delete(ms, i, i+1) + } + infoLabelMatchers = append(infoLabelMatchers, m) + } + if len(ms) > 0 { + dataLabelMatchers[name] = ms + } else { + delete(dataLabelMatchers, name) + } + } + if nameMatcher == nil { + // Default to using the target_info metric. + infoLabelMatchers = append([]*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, targetInfo)}, infoLabelMatchers...) + } + + infoIt := ev.querier.Select(ctx, false, &selectHints, infoLabelMatchers...) + infoSeries, ws, err := expandSeriesSet(ctx, infoIt) + if err != nil { + return nil, ws, err + } + + infoMat := ev.evalSeries(ctx, infoSeries, 0, true) + return infoMat, ws, nil +} + +// combineWithInfoSeries combines mat with select data labels from infoMat. +func (ev *evaluator) combineWithInfoSeries(ctx context.Context, mat, infoMat Matrix, ignoreSeries map[int]struct{}, dataLabelMatchers map[string][]*labels.Matcher) (Matrix, annotations.Annotations) { + buf := make([]byte, 0, 1024) + lb := labels.NewScratchBuilder(0) + sigFunction := func(name string) func(labels.Labels) string { + return func(lset labels.Labels) string { + lb.Reset() + lb.Add(labels.MetricName, name) + lset.MatchLabels(true, identifyingLabels...).Range(func(l labels.Label) { + lb.Add(l.Name, l.Value) + }) + lb.Sort() + return string(lb.Labels().Bytes(buf)) + } + } + + infoMetrics := map[string]struct{}{} + for _, is := range infoMat { + lblMap := is.Metric.Map() + infoMetrics[lblMap[labels.MetricName]] = struct{}{} + } + sigfs := make(map[string]func(labels.Labels) string, len(infoMetrics)) + for name := range infoMetrics { + sigfs[name] = sigFunction(name) + } + + // Keep a copy of the original point slices so they can be returned to the pool. + origMatrices := []Matrix{ + make(Matrix, len(mat)), + make(Matrix, len(infoMat)), + } + copy(origMatrices[0], mat) + copy(origMatrices[1], infoMat) + + numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 + originalNumSamples := ev.currentSamples + + // Create an output vector that is as big as the input matrix with + // the most time series. + biggestLen := max(len(mat), len(infoMat)) + baseVector := make(Vector, 0, len(mat)) + infoVector := make(Vector, 0, len(infoMat)) + enh := &EvalNodeHelper{ + Out: make(Vector, 0, biggestLen), + } + type seriesAndTimestamp struct { + Series + ts int64 + } + seriess := make(map[uint64]seriesAndTimestamp, biggestLen) // Output series by series hash. + tempNumSamples := ev.currentSamples + + // For every base series, compute signature per info metric. + baseSigs := make([]map[string]string, 0, len(mat)) + for _, s := range mat { + sigs := make(map[string]string, len(infoMetrics)) + for infoName := range infoMetrics { + sigs[infoName] = sigfs[infoName](s.Metric) + } + baseSigs = append(baseSigs, sigs) + } + + infoSigs := make([]string, 0, len(infoMat)) + for _, s := range infoMat { + name := s.Metric.Map()[labels.MetricName] + infoSigs = append(infoSigs, sigfs[name](s.Metric)) + } + + var warnings annotations.Annotations + for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval { + if err := contextDone(ctx, "expression evaluation"); err != nil { + ev.error(err) + } + + // Reset number of samples in memory after each timestamp. + ev.currentSamples = tempNumSamples + // Gather input vectors for this timestamp. + baseVector, _ = ev.gatherVector(ts, mat, baseVector, nil, nil) + infoVector, _ = ev.gatherVector(ts, infoMat, infoVector, nil, nil) + + enh.Ts = ts + result, err := ev.combineWithInfoVector(baseVector, infoVector, ignoreSeries, baseSigs, infoSigs, enh, dataLabelMatchers) + if err != nil { + ev.error(err) + } + enh.Out = result[:0] // Reuse result vector. + + vecNumSamples := result.TotalSamples() + ev.currentSamples += vecNumSamples + // When we reset currentSamples to tempNumSamples during the next iteration of the loop it also + // needs to include the samples from the result here, as they're still in memory. + tempNumSamples += vecNumSamples + ev.samplesStats.UpdatePeak(ev.currentSamples) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + + // Add samples in result vector to output series. + for _, sample := range result { + h := sample.Metric.Hash() + ss, exists := seriess[h] + if exists { + if ss.ts == ts { // If we've seen this output series before at this timestamp, it's a duplicate. + ev.errorf("vector cannot contain metrics with the same labelset") + } + ss.ts = ts + } else { + ss = seriesAndTimestamp{Series{Metric: sample.Metric}, ts} + } + addToSeries(&ss.Series, enh.Ts, sample.F, sample.H, numSteps) + seriess[h] = ss + } + } + + // Reuse the original point slices. + for _, m := range origMatrices { + for _, s := range m { + putFPointSlice(s.Floats) + putHPointSlice(s.Histograms) + } + } + // Assemble the output matrix. By the time we get here we know we don't have too many samples. + numSamples := 0 + output := make(Matrix, 0, len(seriess)) + for _, ss := range seriess { + numSamples += len(ss.Floats) + totalHPointSize(ss.Histograms) + output = append(output, ss.Series) + } + ev.currentSamples = originalNumSamples + numSamples + ev.samplesStats.UpdatePeak(ev.currentSamples) + return output, warnings +} + +// combineWithInfoVector combines base and info Vectors. +// Base series in ignoreSeries are not combined. +func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[int]struct{}, baseSigs []map[string]string, infoSigs []string, enh *EvalNodeHelper, dataLabelMatchers map[string][]*labels.Matcher) (Vector, error) { + if len(base) == 0 { + return nil, nil // Short-circuit: nothing is going to match. + } + + // All samples from the info Vector hashed by the matching label/values. + if enh.rightSigs == nil { + enh.rightSigs = make(map[string]Sample, len(enh.Out)) + } else { + clear(enh.rightSigs) + } + + for i, s := range info { + if s.H != nil { + ev.error(errors.New("info sample should be float")) + } + // We encode original info sample timestamps via the float value. + origT := int64(s.F) + + sig := infoSigs[i] + if existing, exists := enh.rightSigs[sig]; exists { + // We encode original info sample timestamps via the float value. + existingOrigT := int64(existing.F) + switch { + case existingOrigT > origT: + // Keep the other info sample, since it's newer. + case existingOrigT < origT: + // Keep this info sample, since it's newer. + enh.rightSigs[sig] = s + default: + // The two info samples have the same timestamp - conflict. + name := s.Metric.Map()[labels.MetricName] + ev.errorf("found duplicate series for info metric %s", name) + } + } else { + enh.rightSigs[sig] = s + } + } + + for i, bs := range base { + if _, exists := ignoreSeries[i]; exists { + // This series should not be enriched with info metric data labels. + enh.Out = append(enh.Out, Sample{ + Metric: bs.Metric, + F: bs.F, + H: bs.H, + }) + continue + } + + baseLabels := bs.Metric.Map() + enh.resetBuilder(labels.Labels{}) + + // For every info metric name, try to find an info series with the same signature. + seenInfoMetrics := map[string]struct{}{} + for infoName, sig := range baseSigs[i] { + is, exists := enh.rightSigs[sig] + if !exists { + continue + } + if _, exists := seenInfoMetrics[infoName]; exists { + continue + } + + err := is.Metric.Validate(func(l labels.Label) error { + if l.Name == labels.MetricName { + return nil + } + if _, exists := dataLabelMatchers[l.Name]; len(dataLabelMatchers) > 0 && !exists { + // Not among the specified data label matchers. + return nil + } + + if v := enh.lb.Get(l.Name); v != "" && v != l.Value { + return fmt.Errorf("conflicting label: %s", l.Name) + } + if _, exists := baseLabels[l.Name]; exists { + // Skip labels already on the base metric. + return nil + } + + enh.lb.Set(l.Name, l.Value) + return nil + }) + if err != nil { + return nil, err + } + seenInfoMetrics[infoName] = struct{}{} + } + + infoLbls := enh.lb.Labels() + if infoLbls.Len() == 0 { + // If there's at least one data label matcher not matching the empty string, + // we have to ignore this series as there are no matching info series. + allMatchersMatchEmpty := true + for _, ms := range dataLabelMatchers { + for _, m := range ms { + if !m.Matches("") { + allMatchersMatchEmpty = false + break + } + } + } + if !allMatchersMatchEmpty { + continue + } + } + + enh.resetBuilder(bs.Metric) + infoLbls.Range(func(l labels.Label) { + enh.lb.Set(l.Name, l.Value) + }) + + enh.Out = append(enh.Out, Sample{ + Metric: enh.lb.Labels(), + F: bs.F, + H: bs.H, + }) + } + return enh.Out, nil +} diff --git a/promql/info_test.go b/promql/info_test.go new file mode 100644 index 0000000000..2e7a67172f --- /dev/null +++ b/promql/info_test.go @@ -0,0 +1,140 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql_test + +import ( + "testing" + + "github.com/prometheus/prometheus/promql/promqltest" +) + +// The "info" function is experimental. This is why we write those tests here for now instead of promqltest/testdata/info.test. +func TestInfo(t *testing.T) { + engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery) + promqltest.RunTest(t, ` +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + metric_not_matching_target_info{instance="a", job="2", label="value"} 0 1 2 + metric_with_overlapping_label{instance="a", job="1", label="value", data="base"} 0 1 2 + target_info{instance="a", job="1", data="info", another_data="another info"} 1 1 1 + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +# Include one info metric data label. +eval range from 0m to 10m step 5m info(metric, {data=~".+"}) + metric{data="info", instance="a", job="1", label="value"} 0 1 2 + +# Include all info metric data labels. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Try including all info metric data labels, but non-matching identifying labels. +eval range from 0m to 10m step 5m info(metric_not_matching_target_info) + metric_not_matching_target_info{instance="a", job="2", label="value"} 0 1 2 + +# Try including a certain info metric data label with a non-matching matcher not accepting empty labels. +# Metric is ignored, due there being a data label matcher not matching empty labels, +# and there being no info series matches. +eval range from 0m to 10m step 5m info(metric, {non_existent=~".+"}) + +# Include a certain info metric data label together with a non-matching matcher accepting empty labels. +# Since the non_existent matcher matches empty labels, it's simply ignored when there's no match. +# XXX: This case has to include a matcher not matching empty labels, due the PromQL limitation +# that vector selectors have to contain at least one matcher not accepting empty labels. +# We might need another construct than vector selector to get around this limitation. +eval range from 0m to 10m step 5m info(metric, {data=~".+", non_existent=~".*"}) + metric{data="info", instance="a", job="1", label="value"} 0 1 2 + +# Info series data labels overlapping with those of base series are ignored. +eval range from 0m to 10m step 5m info(metric_with_overlapping_label) + metric_with_overlapping_label{data="base", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Include data labels from target_info specifically. +eval range from 0m to 10m step 5m info(metric, {__name__="target_info"}) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Try to include all data labels from a non-existent info metric. +eval range from 0m to 10m step 5m info(metric, {__name__="non_existent"}) + metric{instance="a", job="1", label="value"} 0 1 2 + +# Try to include a certain data label from a non-existent info metric. +eval range from 0m to 10m step 5m info(metric, {__name__="non_existent", data=~".+"}) + +# Include data labels from build_info. +eval range from 0m to 10m step 5m info(metric, {__name__="build_info"}) + metric{instance="a", job="1", label="value", build_data="build"} 0 1 2 + +# Include data labels from build_info and target_info. +eval range from 0m to 10m step 5m info(metric, {__name__=~".+_info"}) + metric{instance="a", job="1", label="value", build_data="build", data="info", another_data="another info"} 0 1 2 + +# Info metrics themselves are ignored when it comes to enriching with info metric data labels. +eval range from 0m to 10m step 5m info(build_info, {__name__=~".+_info", build_data=~".+"}) + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +clear + +# Overlapping target_info series. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + target_info{instance="a", job="1", data="info", another_data="another info"} 1 1 _ + target_info{instance="a", job="1", data="updated info", another_data="another info"} _ _ 1 + +# Conflicting info series are resolved through picking the latest sample. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 _ + metric{data="updated info", instance="a", job="1", label="value", another_data="another info"} _ _ 2 + +clear + +# Non-overlapping target_info series. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + target_info{instance="a", job="1", data="info"} 1 1 stale + target_info{instance="a", job="1", data="updated info"} _ _ 1 + +# Include info metric data labels from a metric which data labels change over time. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value"} 0 1 _ + metric{data="updated info", instance="a", job="1", label="value"} _ _ 2 + +clear + +# Info series selector matches histogram series, info metrics should be float type. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + histogram{instance="a", job="1"} {{schema:1 sum:3 count:22 buckets:[5 10 7]}} + +eval_fail range from 0m to 10m step 5m info(metric, {__name__="histogram"}) + +clear + +# Series with skipped scrape. +load 1m + metric{instance="a", job="1", label="value"} 0 _ 2 3 4 + target_info{instance="a", job="1", data="info"} 1 _ 1 1 1 + +# Lookback works also for the info series. +eval range from 1m to 4m step 1m info(metric) + metric{data="info", instance="a", job="1", label="value"} 0 2 3 4 + +# @ operator works also with info. +# Note that we pick the timestamp missing a sample, lookback should pick previous sample. +eval range from 1m to 4m step 1m info(metric @ 60) + metric{data="info", instance="a", job="1", label="value"} 0 0 0 0 + +# offset operator works also with info. +eval range from 1m to 4m step 1m info(metric offset 1m) + metric{data="info", instance="a", job="1", label="value"} 0 0 2 3 +`, engine) +} diff --git a/promql/parser/ast.go b/promql/parser/ast.go index 162d7817ab..132ef3f0d2 100644 --- a/promql/parser/ast.go +++ b/promql/parser/ast.go @@ -208,6 +208,10 @@ type VectorSelector struct { UnexpandedSeriesSet storage.SeriesSet Series []storage.Series + // BypassEmptyMatcherCheck is true when the VectorSelector isn't required to have at least one matcher matching the empty string. + // This is the case when VectorSelector is used to represent the info function's second argument. + BypassEmptyMatcherCheck bool + PosRange posrange.PositionRange } diff --git a/promql/parser/functions.go b/promql/parser/functions.go index 99b41321fe..aa65aca275 100644 --- a/promql/parser/functions.go +++ b/promql/parser/functions.go @@ -202,10 +202,11 @@ var Functions = map[string]*Function{ ArgTypes: []ValueType{ValueTypeScalar, ValueTypeVector}, ReturnType: ValueTypeVector, }, - "holt_winters": { - Name: "holt_winters", - ArgTypes: []ValueType{ValueTypeMatrix, ValueTypeScalar, ValueTypeScalar}, - ReturnType: ValueTypeVector, + "double_exponential_smoothing": { + Name: "double_exponential_smoothing", + ArgTypes: []ValueType{ValueTypeMatrix, ValueTypeScalar, ValueTypeScalar}, + ReturnType: ValueTypeVector, + Experimental: true, }, "hour": { Name: "hour", @@ -223,6 +224,13 @@ var Functions = map[string]*Function{ ArgTypes: []ValueType{ValueTypeMatrix}, ReturnType: ValueTypeVector, }, + "info": { + Name: "info", + ArgTypes: []ValueType{ValueTypeVector, ValueTypeVector}, + ReturnType: ValueTypeVector, + Experimental: true, + Variadic: 1, + }, "irate": { Name: "irate", ArgTypes: []ValueType{ValueTypeMatrix}, diff --git a/promql/parser/parse.go b/promql/parser/parse.go index 6dda270529..3f50cf2b8a 100644 --- a/promql/parser/parse.go +++ b/promql/parser/parse.go @@ -786,6 +786,19 @@ func (p *parser) checkAST(node Node) (typ ValueType) { } } + if n.Func.Name == "info" && len(n.Args) > 1 { + // Check the type is correct first + if n.Args[1].Type() != ValueTypeVector { + p.addParseErrf(node.PositionRange(), "expected type %s in %s, got %s", DocumentedType(ValueTypeVector), fmt.Sprintf("call to function %q", n.Func.Name), DocumentedType(n.Args[1].Type())) + } + // Check the vector selector in the input doesn't contain a metric name + if n.Args[1].(*VectorSelector).Name != "" { + p.addParseErrf(n.Args[1].PositionRange(), "expected label selectors only, got vector selector instead") + } + // Set Vector Selector flag to bypass empty matcher check + n.Args[1].(*VectorSelector).BypassEmptyMatcherCheck = true + } + for i, arg := range n.Args { if i >= len(n.Func.ArgTypes) { if n.Func.Variadic == 0 { @@ -832,17 +845,19 @@ func (p *parser) checkAST(node Node) (typ ValueType) { // metric name is a non-empty matcher. break } - // A Vector selector must contain at least one non-empty matcher to prevent - // implicit selection of all metrics (e.g. by a typo). - notEmpty := false - for _, lm := range n.LabelMatchers { - if lm != nil && !lm.Matches("") { - notEmpty = true - break + if !n.BypassEmptyMatcherCheck { + // A Vector selector must contain at least one non-empty matcher to prevent + // implicit selection of all metrics (e.g. by a typo). + notEmpty := false + for _, lm := range n.LabelMatchers { + if lm != nil && !lm.Matches("") { + notEmpty = true + break + } + } + if !notEmpty { + p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher") } - } - if !notEmpty { - p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher") } case *NumberLiteral, *StringLiteral: diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index f50137b6de..0e5e2f638b 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -3872,6 +3872,81 @@ var testExpr = []struct { }, }, }, + { + input: `info(rate(http_request_counter_total{}[5m]))`, + expected: &Call{ + Func: MustGetFunction("info"), + Args: Expressions{ + &Call{ + Func: MustGetFunction("rate"), + PosRange: posrange.PositionRange{ + Start: 5, + End: 43, + }, + Args: Expressions{ + &MatrixSelector{ + VectorSelector: &VectorSelector{ + Name: "http_request_counter_total", + OriginalOffset: 0, + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_request_counter_total"), + }, + PosRange: posrange.PositionRange{ + Start: 10, + End: 38, + }, + }, + EndPos: 42, + Range: 5 * time.Minute, + }, + }, + }, + }, + PosRange: posrange.PositionRange{ + Start: 0, + End: 44, + }, + }, + }, + { + input: `info(rate(http_request_counter_total{}[5m]), target_info{foo="bar"})`, + fail: true, + errMsg: `1:46: parse error: expected label selectors only, got vector selector instead`, + }, + { + input: `info(http_request_counter_total{namespace="zzz"}, {foo="bar", bar="baz"})`, + expected: &Call{ + Func: MustGetFunction("info"), + Args: Expressions{ + &VectorSelector{ + Name: "http_request_counter_total", + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, "namespace", "zzz"), + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_request_counter_total"), + }, + PosRange: posrange.PositionRange{ + Start: 5, + End: 48, + }, + }, + &VectorSelector{ + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, "foo", "bar"), + MustLabelMatcher(labels.MatchEqual, "bar", "baz"), + }, + PosRange: posrange.PositionRange{ + Start: 50, + End: 72, + }, + BypassEmptyMatcherCheck: true, + }, + }, + PosRange: posrange.PositionRange{ + Start: 0, + End: 73, + }, + }, + }, } func makeInt64Pointer(val int64) *int64 { @@ -3889,6 +3964,12 @@ func readable(s string) string { } func TestParseExpressions(t *testing.T) { + // Enable experimental functions testing. + EnableExperimentalFunctions = true + t.Cleanup(func() { + EnableExperimentalFunctions = false + }) + model.NameValidationScheme = model.UTF8Validation for _, test := range testExpr { t.Run(readable(test.input), func(t *testing.T) { @@ -3925,8 +4006,7 @@ func TestParseExpressions(t *testing.T) { require.Equal(t, expected, expr, "error on input '%s'", test.input) } else { - require.Error(t, err) - require.Contains(t, err.Error(), test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error()) + require.ErrorContains(t, err, test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error()) var errorList ParseErrors ok := errors.As(err, &errorList) @@ -4468,7 +4548,7 @@ func TestRecoverParserError(t *testing.T) { e := errors.New("custom error") defer func() { - require.Equal(t, e.Error(), err.Error()) + require.EqualError(t, err, e.Error()) }() defer p.recover(&err) diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index f0649a77a8..e078bcb60b 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -39,6 +39,7 @@ import ( "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/almost" + "github.com/prometheus/prometheus/util/convertnhcb" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) @@ -46,8 +47,8 @@ import ( var ( patSpace = regexp.MustCompile("[\t ]+") patLoad = regexp.MustCompile(`^load(?:_(with_nhcb))?\s+(.+?)$`) - patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|warn|ordered))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`) - patEvalRange = regexp.MustCompile(`^eval(?:_(fail|warn))?\s+range\s+from\s+(.+)\s+to\s+(.+)\s+step\s+(.+?)\s+(.+)$`) + patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|warn|ordered|info))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`) + patEvalRange = regexp.MustCompile(`^eval(?:_(fail|warn|info))?\s+range\s+from\s+(.+)\s+to\s+(.+)\s+step\s+(.+?)\s+(.+)$`) ) const ( @@ -321,6 +322,8 @@ func (t *test) parseEval(lines []string, i int) (int, *evalCmd, error) { cmd.fail = true case "warn": cmd.warn = true + case "info": + cmd.info = true } for j := 1; i+1 < len(lines); j++ { @@ -477,43 +480,22 @@ func (cmd *loadCmd) append(a storage.Appender) error { return nil } -func getHistogramMetricBase(m labels.Labels, suffix string) (labels.Labels, uint64) { - mName := m.Get(labels.MetricName) - baseM := labels.NewBuilder(m). - Set(labels.MetricName, strings.TrimSuffix(mName, suffix)). - Del(labels.BucketLabel). - Labels() - hash := baseM.Hash() - return baseM, hash -} - type tempHistogramWrapper struct { metric labels.Labels upperBounds []float64 - histogramByTs map[int64]tempHistogram + histogramByTs map[int64]convertnhcb.TempHistogram } func newTempHistogramWrapper() tempHistogramWrapper { return tempHistogramWrapper{ upperBounds: []float64{}, - histogramByTs: map[int64]tempHistogram{}, + histogramByTs: map[int64]convertnhcb.TempHistogram{}, } } -type tempHistogram struct { - bucketCounts map[float64]float64 - count float64 - sum float64 -} - -func newTempHistogram() tempHistogram { - return tempHistogram{ - bucketCounts: map[float64]float64{}, - } -} - -func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*tempHistogram, float64)) { - m2, m2hash := getHistogramMetricBase(m, suffix) +func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*convertnhcb.TempHistogram, float64)) { + m2 := convertnhcb.GetHistogramMetricBase(m, suffix) + m2hash := m2.Hash() histogramWrapper, exists := histogramMap[m2hash] if !exists { histogramWrapper = newTempHistogramWrapper() @@ -528,7 +510,7 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap } histogram, exists := histogramWrapper.histogramByTs[s.T] if !exists { - histogram = newTempHistogram() + histogram = convertnhcb.NewTempHistogram() } updateHistogram(&histogram, s.F) histogramWrapper.histogramByTs[s.T] = histogram @@ -536,34 +518,6 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap histogramMap[m2hash] = histogramWrapper } -func processUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64, *histogram.FloatHistogram) { - sort.Float64s(upperBounds0) - upperBounds := make([]float64, 0, len(upperBounds0)) - prevLE := math.Inf(-1) - for _, le := range upperBounds0 { - if le != prevLE { // deduplicate - upperBounds = append(upperBounds, le) - prevLE = le - } - } - var customBounds []float64 - if upperBounds[len(upperBounds)-1] == math.Inf(1) { - customBounds = upperBounds[:len(upperBounds)-1] - } else { - customBounds = upperBounds - } - return upperBounds, &histogram.FloatHistogram{ - Count: 0, - Sum: 0, - Schema: histogram.CustomBucketsSchema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: uint32(len(upperBounds))}, - }, - PositiveBuckets: make([]float64, len(upperBounds)), - CustomValues: customBounds, - } -} - // If classic histograms are defined, convert them into native histograms with custom // bounds and append the defined time series to the storage. func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { @@ -582,16 +536,16 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { } processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogramWrapper *tempHistogramWrapper) { histogramWrapper.upperBounds = append(histogramWrapper.upperBounds, le) - }, func(histogram *tempHistogram, f float64) { - histogram.bucketCounts[le] = f + }, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.BucketCounts[le] = f }) case strings.HasSuffix(mName, "_count"): - processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { - histogram.count = f + processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.Count = f }) case strings.HasSuffix(mName, "_sum"): - processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { - histogram.sum = f + processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *convertnhcb.TempHistogram, f float64) { + histogram.Sum = f }) } } @@ -599,30 +553,21 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { // Convert the collated classic histogram data into native histograms // with custom bounds and append them to the storage. for _, histogramWrapper := range histogramMap { - upperBounds, fhBase := processUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds) + upperBounds, hBase := convertnhcb.ProcessUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds, true) + fhBase := hBase.ToFloat(nil) samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs)) for t, histogram := range histogramWrapper.histogramByTs { - fh := fhBase.Copy() - var prevCount, total float64 - for i, le := range upperBounds { - currCount, exists := histogram.bucketCounts[le] - if !exists { - currCount = 0 + h, fh := convertnhcb.NewHistogram(histogram, upperBounds, hBase, fhBase) + if fh == nil { + if err := h.Validate(); err != nil { + return err } - count := currCount - prevCount - fh.PositiveBuckets[i] = count - total += count - prevCount = currCount - } - fh.Sum = histogram.sum - if histogram.count != 0 { - total = histogram.count + fh = h.ToFloat(nil) } - fh.Count = total - s := promql.Sample{T: t, H: fh.Compact(0)} - if err := s.H.Validate(); err != nil { + if err := fh.Validate(); err != nil { return err } + s := promql.Sample{T: t, H: fh} samples = append(samples, s) } sort.Slice(samples, func(i, j int) bool { return samples[i].T < samples[j].T }) @@ -657,10 +602,10 @@ type evalCmd struct { step time.Duration line int - isRange bool // if false, instant query - fail, warn, ordered bool - expectedFailMessage string - expectedFailRegexp *regexp.Regexp + isRange bool // if false, instant query + fail, warn, ordered, info bool + expectedFailMessage string + expectedFailRegexp *regexp.Regexp metrics map[uint64]labels.Labels expectScalar bool @@ -1208,13 +1153,16 @@ func (t *test) runInstantQuery(iq atModifierTestCase, cmd *evalCmd, engine promq if res.Err == nil && cmd.fail { return fmt.Errorf("expected error evaluating query %q (line %d) but got none", iq.expr, cmd.line) } - countWarnings, _ := res.Warnings.CountWarningsAndInfo() + countWarnings, countInfo := res.Warnings.CountWarningsAndInfo() if !cmd.warn && countWarnings > 0 { return fmt.Errorf("unexpected warnings evaluating query %q (line %d): %v", iq.expr, cmd.line, res.Warnings) } if cmd.warn && countWarnings == 0 { return fmt.Errorf("expected warnings evaluating query %q (line %d) but got none", iq.expr, cmd.line) } + if cmd.info && countInfo == 0 { + return fmt.Errorf("expected info annotations evaluating query %q (line %d) but got none", iq.expr, cmd.line) + } err = cmd.compareResult(res.Value) if err != nil { return fmt.Errorf("error in %s %s (line %d): %w", cmd, iq.expr, cmd.line, err) diff --git a/promql/promqltest/test_test.go b/promql/promqltest/test_test.go index eabd8b1847..5da924e9a5 100644 --- a/promql/promqltest/test_test.go +++ b/promql/promqltest/test_test.go @@ -237,7 +237,7 @@ eval instant at 5m sum by (group) (http_requests) load 5m testmetric {{}} -eval instant at 5m testmetric +eval instant at 0m testmetric `, expectedError: `error in eval testmetric (line 5): unexpected metric {__name__="testmetric"} in result, has value {count:0, sum:0}`, }, diff --git a/promql/promqltest/testdata/aggregators.test b/promql/promqltest/testdata/aggregators.test index 68d2e735b3..e2eb381dbc 100644 --- a/promql/promqltest/testdata/aggregators.test +++ b/promql/promqltest/testdata/aggregators.test @@ -250,7 +250,7 @@ clear load 5m http_requests{job="api-server", instance="0", group="production"} 0+10x10 http_requests{job="api-server", instance="1", group="production"} 0+20x10 - http_requests{job="api-server", instance="2", group="production"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN + http_requests{job="api-server", instance="2", group="production"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN http_requests{job="api-server", instance="0", group="canary"} 0+30x10 http_requests{job="api-server", instance="1", group="canary"} 0+40x10 http_requests{job="app-server", instance="0", group="production"} 0+50x10 @@ -337,32 +337,32 @@ load 5m version{job="app-server", instance="0", group="canary"} 7 version{job="app-server", instance="1", group="canary"} 7 -eval instant at 5m count_values("version", version) +eval instant at 1m count_values("version", version) {version="6"} 5 {version="7"} 2 {version="8"} 2 -eval instant at 5m count_values(((("version"))), version) +eval instant at 1m count_values(((("version"))), version) {version="6"} 5 {version="7"} 2 {version="8"} 2 -eval instant at 5m count_values without (instance)("version", version) +eval instant at 1m count_values without (instance)("version", version) {job="api-server", group="production", version="6"} 3 {job="api-server", group="canary", version="8"} 2 {job="app-server", group="production", version="6"} 2 {job="app-server", group="canary", version="7"} 2 # Overwrite label with output. Don't do this. -eval instant at 5m count_values without (instance)("job", version) +eval instant at 1m count_values without (instance)("job", version) {job="6", group="production"} 5 {job="8", group="canary"} 2 {job="7", group="canary"} 2 # Overwrite label with output. Don't do this. -eval instant at 5m count_values by (job, group)("job", version) +eval instant at 1m count_values by (job, group)("job", version) {job="6", group="production"} 5 {job="8", group="canary"} 2 {job="7", group="canary"} 2 @@ -572,3 +572,160 @@ clear # #eval instant at 1m count(topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without())) # {} 1 + +clear + +# Test stddev produces consistent results regardless the order the data is loaded in. +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + +clear + +load 5m + series{label="a"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} 0.5 + +eval instant at 0m stdvar(series) + {} 0.25 + +eval instant at 0m stddev by (label) (series) + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} NaN + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} inf + +eval instant at 0m stddev (series) + {} NaN + +eval instant at 0m stdvar (series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} inf + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series inf + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN diff --git a/promql/promqltest/testdata/at_modifier.test b/promql/promqltest/testdata/at_modifier.test index 35f90ee671..4091f7eabf 100644 --- a/promql/promqltest/testdata/at_modifier.test +++ b/promql/promqltest/testdata/at_modifier.test @@ -121,45 +121,43 @@ eval instant at 25s sum_over_time(metric{job="1"}[100:1] offset 20 @ 100) # Since vector selector has timestamp, the result value does not depend on the timestamp of subqueries. # Inner most sum=1+2+...+10=55. -# With [100s:25s] subquery, it's 55*5. +# With [100s:25s] subquery, it's 55*4. eval instant at 100s sum_over_time(sum_over_time(metric{job="1"}[100s] @ 100)[100s:25s] @ 50) - {job="1"} 275 + {job="1"} 220 # Nested subqueries with different timestamps on both. # Since vector selector has timestamp, the result value does not depend on the timestamp of subqueries. -# Sum of innermost subquery is 275 as above. The outer subquery repeats it 4 times. +# Sum of innermost subquery is 220 as above. The outer subquery repeats it 3 times. eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[100s] @ 100)[100s:25s] @ 50)[3s:1s] @ 3000) - {job="1"} 1100 + {job="1"} 660 # Testing the inner subquery timestamp since vector selector does not have @. # Inner sum for subquery [100s:25s] @ 50 are -# at -50 nothing, at -25 nothing, at 0=0, at 25=2, at 50=4+5=9. -# This sum of 11 is repeated 4 times by outer subquery. +# at -50 nothing, at -25 nothing, at 0=0, at 25=2, at 50=5. +# This sum of 7 is repeated 3 times by outer subquery. eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[10s])[100s:25s] @ 50)[3s:1s] @ 200) - {job="1"} 44 + {job="1"} 21 # Inner sum for subquery [100s:25s] @ 200 are -# at 100=9+10, at 125=12, at 150=14+15, at 175=17, at 200=19+20. -# This sum of 116 is repeated 4 times by outer subquery. +# at 125=12, at 150=15, at 175=17, at 200=20. +# This sum of 64 is repeated 3 times by outer subquery. eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[10s])[100s:25s] @ 200)[3s:1s] @ 50) - {job="1"} 464 + {job="1"} 192 # Nested subqueries with timestamp only on outer subquery. # Outer most subquery: -# at 900=783 -# inner subquery: at 870=87+86+85, at 880=88+87+86, at 890=89+88+87 -# at 925=537 -# inner subquery: at 895=89+88, at 905=90+89, at 915=90+91 -# at 950=828 -# inner subquery: at 920=92+91+90, at 930=93+92+91, at 940=94+93+92 -# at 975=567 -# inner subquery: at 945=94+93, at 955=95+94, at 965=96+95 -# at 1000=873 -# inner subquery: at 970=97+96+95, at 980=98+97+96, at 990=99+98+97 +# at 925=360 +# inner subquery: at 905=90+89, at 915=91+90 +# at 950=372 +# inner subquery: at 930=93+92, at 940=94+93 +# at 975=380 +# inner subquery: at 955=95+94, at 965=96+95 +# at 1000=392 +# inner subquery: at 980=98+97, at 990=99+98 eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[20s])[20s:10s] offset 10s)[100s:25s] @ 1000) - {job="1"} 3588 + {job="1"} 1504 # minute is counted on the value of the sample. eval instant at 10s minute(metric @ 1500) @@ -182,32 +180,32 @@ eval instant at 15m timestamp(timestamp(metric{job="1"} @ 10)) # minute is counted on the value of the sample. eval instant at 0s sum_over_time(minute(metric @ 1500)[100s:10s]) - {job="1"} 22 - {job="2"} 55 + {job="1"} 20 + {job="2"} 50 # If nothing passed, minute() takes eval time. # Here the eval time is determined by the subquery. # [50m:1m] at 6000, i.e. 100m, is 50m to 100m. -# sum=50+51+52+...+59+0+1+2+...+40. +# sum=51+52+...+59+0+1+2+...+40. eval instant at 0s sum_over_time(minute()[50m:1m] @ 6000) - {} 1365 + {} 1315 -# sum=45+46+47+...+59+0+1+2+...+35. +# sum=46+47+...+59+0+1+2+...+35. eval instant at 0s sum_over_time(minute()[50m:1m] @ 6000 offset 5m) - {} 1410 + {} 1365 # time() is the eval time which is determined by subquery here. -# 2900+2901+...+3000 = (3000*3001 - 2899*2900)/2. +# 2901+...+3000 = (3000*3001 - 2899*2900)/2. eval instant at 0s sum_over_time(vector(time())[100s:1s] @ 3000) - {} 297950 + {} 295050 -# 2300+2301+...+2400 = (2400*2401 - 2299*2300)/2. +# 2301+...+2400 = (2400*2401 - 2299*2300)/2. eval instant at 0s sum_over_time(vector(time())[100s:1s] @ 3000 offset 600s) - {} 237350 + {} 235050 # timestamp() takes the time of the sample and not the evaluation time. eval instant at 0s sum_over_time(timestamp(metric{job="1"} @ 10)[100s:10s] @ 3000) - {job="1"} 110 + {job="1"} 100 # The result of inner timestamp() will have the timestamp as the # eval time, hence entire expression is not step invariant and depends on eval time. diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test index 6e2b3630bc..fb1d169624 100644 --- a/promql/promqltest/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -6,11 +6,13 @@ load 5m # Tests for resets(). eval instant at 50m resets(http_requests[5m]) + +eval instant at 50m resets(http_requests[10m]) {path="/foo"} 0 {path="/bar"} 0 {path="/biz"} 0 -eval instant at 50m resets(http_requests[300]) +eval instant at 50m resets(http_requests[600]) {path="/foo"} 0 {path="/bar"} 0 {path="/biz"} 0 @@ -21,6 +23,11 @@ eval instant at 50m resets(http_requests[20m]) {path="/biz"} 0 eval instant at 50m resets(http_requests[30m]) + {path="/foo"} 1 + {path="/bar"} 0 + {path="/biz"} 0 + +eval instant at 50m resets(http_requests[32m]) {path="/foo"} 2 {path="/bar"} 1 {path="/biz"} 0 @@ -34,28 +41,30 @@ eval instant at 50m resets(nonexistent_metric[50m]) # Tests for changes(). eval instant at 50m changes(http_requests[5m]) + +eval instant at 50m changes(http_requests[6m]) {path="/foo"} 0 {path="/bar"} 0 {path="/biz"} 0 eval instant at 50m changes(http_requests[20m]) - {path="/foo"} 3 - {path="/bar"} 3 + {path="/foo"} 2 + {path="/bar"} 2 {path="/biz"} 0 eval instant at 50m changes(http_requests[30m]) - {path="/foo"} 4 - {path="/bar"} 5 - {path="/biz"} 1 + {path="/foo"} 3 + {path="/bar"} 4 + {path="/biz"} 0 eval instant at 50m changes(http_requests[50m]) - {path="/foo"} 8 - {path="/bar"} 9 + {path="/foo"} 7 + {path="/bar"} 8 {path="/biz"} 1 eval instant at 50m changes((http_requests[50m])) - {path="/foo"} 8 - {path="/bar"} 9 + {path="/foo"} 7 + {path="/bar"} 8 {path="/biz"} 1 eval instant at 50m changes(nonexistent_metric[50m]) @@ -66,7 +75,7 @@ load 5m x{a="b"} NaN NaN NaN x{a="c"} 0 NaN 0 -eval instant at 15m changes(x[15m]) +eval instant at 15m changes(x[20m]) {a="b"} 0 {a="c"} 2 @@ -75,14 +84,14 @@ clear # Tests for increase(). load 5m http_requests{path="/foo"} 0+10x10 - http_requests{path="/bar"} 0+10x5 0+10x5 + http_requests{path="/bar"} 0+18x5 0+18x5 http_requests{path="/dings"} 10+10x10 http_requests{path="/bumms"} 1+10x10 # Tests for increase(). eval instant at 50m increase(http_requests[50m]) {path="/foo"} 100 - {path="/bar"} 90 + {path="/bar"} 160 {path="/dings"} 100 {path="/bumms"} 100 @@ -95,7 +104,7 @@ eval instant at 50m increase(http_requests[50m]) # value, and therefore the extrapolation happens only by 30s. eval instant at 50m increase(http_requests[100m]) {path="/foo"} 100 - {path="/bar"} 90 + {path="/bar"} 162 {path="/dings"} 105 {path="/bumms"} 101 @@ -115,15 +124,17 @@ clear # Tests for rate(). load 5m - testcounter_reset_middle 0+10x4 0+10x5 + testcounter_reset_middle 0+27x4 0+27x5 testcounter_reset_end 0+10x9 0 10 # Counter resets at in the middle of range are handled correctly by rate(). eval instant at 50m rate(testcounter_reset_middle[50m]) - {} 0.03 + {} 0.08 # Counter resets at end of range are ignored by rate(). eval instant at 50m rate(testcounter_reset_end[5m]) + +eval instant at 50m rate(testcounter_reset_end[6m]) {} 0 clear @@ -242,24 +253,24 @@ eval instant at 50m deriv(testcounter_reset_middle[100m]) # intercept at t=3000: 38.63636363636364 # intercept at t=3000+3600: 76.81818181818181 eval instant at 50m predict_linear(testcounter_reset_middle[50m], 3600) - {} 76.81818181818181 + {} 70 eval instant at 50m predict_linear(testcounter_reset_middle[50m], 1h) - {} 76.81818181818181 + {} 70 # intercept at t = 3000+3600 = 6600 -eval instant at 50m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600) +eval instant at 50m predict_linear(testcounter_reset_middle[55m] @ 3000, 3600) {} 76.81818181818181 -eval instant at 50m predict_linear(testcounter_reset_middle[50m] @ 3000, 1h) +eval instant at 50m predict_linear(testcounter_reset_middle[55m] @ 3000, 1h) {} 76.81818181818181 # intercept at t = 600+3600 = 4200 -eval instant at 10m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600) +eval instant at 10m predict_linear(testcounter_reset_middle[55m] @ 3000, 3600) {} 51.36363636363637 # intercept at t = 4200+3600 = 7800 -eval instant at 70m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600) +eval instant at 70m predict_linear(testcounter_reset_middle[55m] @ 3000, 3600) {} 89.54545454545455 # With http_requests, there is a sample value exactly at the end of @@ -467,7 +478,7 @@ load 5m http_requests{job="api-server", instance="1", group="production"} 0+20x10 http_requests{job="api-server", instance="0", group="canary"} 0+30x10 http_requests{job="api-server", instance="1", group="canary"} 0+40x10 - http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN + http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN http_requests{job="app-server", instance="0", group="production"} 0+50x10 http_requests{job="app-server", instance="1", group="production"} 0+60x10 http_requests{job="app-server", instance="0", group="canary"} 0+70x10 @@ -502,7 +513,7 @@ load 5m http_requests{job="api-server", instance="1", group="production"} 0+20x10 http_requests{job="api-server", instance="0", group="canary"} 0+30x10 http_requests{job="api-server", instance="1", group="canary"} 0+40x10 - http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN + http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN http_requests{job="app-server", instance="0", group="production"} 0+50x10 http_requests{job="app-server", instance="1", group="production"} 0+60x10 http_requests{job="app-server", instance="0", group="canary"} 0+70x10 @@ -640,7 +651,7 @@ eval_ordered instant at 50m sort_by_label(node_uname_info, "release") node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 100 node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 100 -# Tests for holt_winters +# Tests for double_exponential_smoothing clear # positive trends @@ -650,7 +661,7 @@ load 10s http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300+80x1000 http_requests{job="api-server", instance="1", group="canary"} 0+40x2000 -eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1) +eval instant at 8000s double_exponential_smoothing(http_requests[1m], 0.01, 0.1) {job="api-server", instance="0", group="production"} 8000 {job="api-server", instance="1", group="production"} 16000 {job="api-server", instance="0", group="canary"} 24000 @@ -664,7 +675,7 @@ load 10s http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300-80x1000 http_requests{job="api-server", instance="1", group="canary"} 0-40x1000 0+40x1000 -eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1) +eval instant at 8000s double_exponential_smoothing(http_requests[1m], 0.01, 0.1) {job="api-server", instance="0", group="production"} 0 {job="api-server", instance="1", group="production"} -16000 {job="api-server", instance="0", group="canary"} 24000 @@ -688,10 +699,10 @@ load 10s metric9 -9.988465674311579e+307 -9.988465674311579e+307 -9.988465674311579e+307 metric10 -9.988465674311579e+307 9.988465674311579e+307 -eval instant at 1m avg_over_time(metric[1m]) +eval instant at 55s avg_over_time(metric[1m]) {} 3 -eval instant at 1m sum_over_time(metric[1m])/count_over_time(metric[1m]) +eval instant at 55s sum_over_time(metric[1m])/count_over_time(metric[1m]) {} 3 eval instant at 1m avg_over_time(metric2[1m]) @@ -758,8 +769,8 @@ eval instant at 1m avg_over_time(metric8[1m]) {} 9.988465674311579e+307 # This overflows float64. -eval instant at 1m sum_over_time(metric8[1m])/count_over_time(metric8[1m]) - {} Inf +eval instant at 1m sum_over_time(metric8[2m])/count_over_time(metric8[2m]) + {} +Inf eval instant at 1m avg_over_time(metric9[1m]) {} -9.988465674311579e+307 @@ -768,10 +779,16 @@ eval instant at 1m avg_over_time(metric9[1m]) eval instant at 1m sum_over_time(metric9[1m])/count_over_time(metric9[1m]) {} -Inf -eval instant at 1m avg_over_time(metric10[1m]) +eval instant at 45s avg_over_time(metric10[1m]) + {} 0 + +eval instant at 1m avg_over_time(metric10[2m]) {} 0 -eval instant at 1m sum_over_time(metric10[1m])/count_over_time(metric10[1m]) +eval instant at 45s sum_over_time(metric10[1m])/count_over_time(metric10[1m]) + {} 0 + +eval instant at 1m sum_over_time(metric10[2m])/count_over_time(metric10[2m]) {} 0 # Test if very big intermediate values cause loss of detail. @@ -779,10 +796,10 @@ clear load 10s metric 1 1e100 1 -1e100 -eval instant at 1m sum_over_time(metric[1m]) +eval instant at 1m sum_over_time(metric[2m]) {} 2 -eval instant at 1m avg_over_time(metric[1m]) +eval instant at 1m avg_over_time(metric[2m]) {} 0.5 # Tests for stddev_over_time and stdvar_over_time. @@ -790,13 +807,13 @@ clear load 10s metric 0 8 8 2 3 -eval instant at 1m stdvar_over_time(metric[1m]) +eval instant at 1m stdvar_over_time(metric[2m]) {} 10.56 -eval instant at 1m stddev_over_time(metric[1m]) +eval instant at 1m stddev_over_time(metric[2m]) {} 3.249615 -eval instant at 1m stddev_over_time((metric[1m])) +eval instant at 1m stddev_over_time((metric[2m])) {} 3.249615 # Tests for stddev_over_time and stdvar_over_time #4927. @@ -826,42 +843,42 @@ load 10s data{test="three samples"} 0 1 2 data{test="uneven samples"} 0 1 4 -eval instant at 1m quantile_over_time(0, data[1m]) +eval instant at 1m quantile_over_time(0, data[2m]) {test="two samples"} 0 {test="three samples"} 0 {test="uneven samples"} 0 -eval instant at 1m quantile_over_time(0.5, data[1m]) +eval instant at 1m quantile_over_time(0.5, data[2m]) {test="two samples"} 0.5 {test="three samples"} 1 {test="uneven samples"} 1 -eval instant at 1m quantile_over_time(0.75, data[1m]) +eval instant at 1m quantile_over_time(0.75, data[2m]) {test="two samples"} 0.75 {test="three samples"} 1.5 {test="uneven samples"} 2.5 -eval instant at 1m quantile_over_time(0.8, data[1m]) +eval instant at 1m quantile_over_time(0.8, data[2m]) {test="two samples"} 0.8 {test="three samples"} 1.6 {test="uneven samples"} 2.8 -eval instant at 1m quantile_over_time(1, data[1m]) +eval instant at 1m quantile_over_time(1, data[2m]) {test="two samples"} 1 {test="three samples"} 2 {test="uneven samples"} 4 -eval_warn instant at 1m quantile_over_time(-1, data[1m]) +eval_warn instant at 1m quantile_over_time(-1, data[2m]) {test="two samples"} -Inf {test="three samples"} -Inf {test="uneven samples"} -Inf -eval_warn instant at 1m quantile_over_time(2, data[1m]) +eval_warn instant at 1m quantile_over_time(2, data[2m]) {test="two samples"} +Inf {test="three samples"} +Inf {test="uneven samples"} +Inf -eval_warn instant at 1m (quantile_over_time(2, (data[1m]))) +eval_warn instant at 1m (quantile_over_time(2, (data[2m]))) {test="two samples"} +Inf {test="three samples"} +Inf {test="uneven samples"} +Inf @@ -969,21 +986,21 @@ load 10s data{type="some_nan3"} NaN 0 1 data{type="only_nan"} NaN NaN NaN -eval instant at 1m min_over_time(data[1m]) +eval instant at 1m min_over_time(data[2m]) {type="numbers"} 0 {type="some_nan"} 0 {type="some_nan2"} 1 {type="some_nan3"} 0 {type="only_nan"} NaN -eval instant at 1m max_over_time(data[1m]) +eval instant at 1m max_over_time(data[2m]) {type="numbers"} 3 {type="some_nan"} 2 {type="some_nan2"} 2 {type="some_nan3"} 1 {type="only_nan"} NaN -eval instant at 1m last_over_time(data[1m]) +eval instant at 1m last_over_time(data[2m]) data{type="numbers"} 3 data{type="some_nan"} NaN data{type="some_nan2"} 1 @@ -1076,13 +1093,19 @@ eval instant at 1m absent_over_time(httpd_log_lines_total[30s]) {} 1 eval instant at 15m absent_over_time(http_requests[5m]) - -eval instant at 16m absent_over_time(http_requests[5m]) {} 1 +eval instant at 15m absent_over_time(http_requests[10m]) + eval instant at 16m absent_over_time(http_requests[6m]) + {} 1 + +eval instant at 16m absent_over_time(http_requests[16m]) eval instant at 16m absent_over_time(httpd_handshake_failures_total[1m]) + {} 1 + +eval instant at 16m absent_over_time(httpd_handshake_failures_total[2m]) eval instant at 16m absent_over_time({instance="127.0.0.1"}[5m]) @@ -1138,17 +1161,18 @@ eval instant at 0m present_over_time(httpd_log_lines_total[30s]) eval instant at 1m present_over_time(httpd_log_lines_total[30s]) eval instant at 15m present_over_time(http_requests[5m]) + +eval instant at 15m present_over_time(http_requests[10m]) {instance="127.0.0.1", job="httpd", path="/bar"} 1 {instance="127.0.0.1", job="httpd", path="/foo"} 1 -eval instant at 16m present_over_time(http_requests[5m]) - eval instant at 16m present_over_time(http_requests[6m]) + +eval instant at 16m present_over_time(http_requests[16m]) {instance="127.0.0.1", job="httpd", path="/bar"} 1 {instance="127.0.0.1", job="httpd", path="/foo"} 1 eval instant at 16m present_over_time(httpd_handshake_failures_total[1m]) - {instance="127.0.0.1", job="node"} 1 eval instant at 16m present_over_time({instance="127.0.0.1"}[5m]) {instance="127.0.0.1",job="node"} 1 @@ -1169,59 +1193,59 @@ load 5m exp_root_log{l="x"} 10 exp_root_log{l="y"} 20 -eval instant at 5m exp(exp_root_log) +eval instant at 1m exp(exp_root_log) {l="x"} 22026.465794806718 {l="y"} 485165195.4097903 -eval instant at 5m exp(exp_root_log - 10) +eval instant at 1m exp(exp_root_log - 10) {l="y"} 22026.465794806718 {l="x"} 1 -eval instant at 5m exp(exp_root_log - 20) +eval instant at 1m exp(exp_root_log - 20) {l="x"} 4.5399929762484854e-05 {l="y"} 1 -eval instant at 5m ln(exp_root_log) +eval instant at 1m ln(exp_root_log) {l="x"} 2.302585092994046 {l="y"} 2.995732273553991 -eval instant at 5m ln(exp_root_log - 10) +eval instant at 1m ln(exp_root_log - 10) {l="y"} 2.302585092994046 {l="x"} -Inf -eval instant at 5m ln(exp_root_log - 20) +eval instant at 1m ln(exp_root_log - 20) {l="y"} -Inf {l="x"} NaN -eval instant at 5m exp(ln(exp_root_log)) +eval instant at 1m exp(ln(exp_root_log)) {l="y"} 20 {l="x"} 10 -eval instant at 5m sqrt(exp_root_log) +eval instant at 1m sqrt(exp_root_log) {l="x"} 3.1622776601683795 {l="y"} 4.47213595499958 -eval instant at 5m log2(exp_root_log) +eval instant at 1m log2(exp_root_log) {l="x"} 3.3219280948873626 {l="y"} 4.321928094887363 -eval instant at 5m log2(exp_root_log - 10) +eval instant at 1m log2(exp_root_log - 10) {l="y"} 3.3219280948873626 {l="x"} -Inf -eval instant at 5m log2(exp_root_log - 20) +eval instant at 1m log2(exp_root_log - 20) {l="x"} NaN {l="y"} -Inf -eval instant at 5m log10(exp_root_log) +eval instant at 1m log10(exp_root_log) {l="x"} 1 {l="y"} 1.301029995663981 -eval instant at 5m log10(exp_root_log - 10) +eval instant at 1m log10(exp_root_log - 10) {l="y"} 1 {l="x"} -Inf -eval instant at 5m log10(exp_root_log - 20) +eval instant at 1m log10(exp_root_log - 20) {l="x"} NaN {l="y"} -Inf @@ -1234,3 +1258,12 @@ load 1m # We expect the value to be 0 for t=0s to t=59s (inclusive), then 60 for t=60s and t=61s. eval range from 0 to 61s step 1s timestamp(metric) {} 0x59 60 60 + +clear + +# Check round with mixed data types +load 1m + mixed_metric {{schema:0 sum:5 count:4 buckets:[1 2 1]}} 1 2 3 {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:8 count:6 buckets:[1 4 1]}} + +eval range from 0 to 5m step 1m round(mixed_metric) + {} _ 1 2 3 diff --git a/promql/promqltest/testdata/histograms.test b/promql/promqltest/testdata/histograms.test index 47cba79935..6089fd01d2 100644 --- a/promql/promqltest/testdata/histograms.test +++ b/promql/promqltest/testdata/histograms.test @@ -108,8 +108,8 @@ eval instant at 50m histogram_stdvar(testhistogram3) eval instant at 50m histogram_fraction(0, 0.2, testhistogram3) {start="positive"} 0.6363636363636364 {start="negative"} 0 - -eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[5m])) + +eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[10m])) {start="positive"} 0.6363636363636364 {start="negative"} 0 @@ -118,8 +118,8 @@ eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[5m])) eval instant at 50m testhistogram3_bucket{le=".2"} / ignoring(le) testhistogram3_count {start="positive"} 0.6363636363636364 - -eval instant at 50m rate(testhistogram3_bucket{le=".2"}[5m]) / ignoring(le) rate(testhistogram3_count[5m]) + +eval instant at 50m rate(testhistogram3_bucket{le=".2"}[10m]) / ignoring(le) rate(testhistogram3_count[10m]) {start="positive"} 0.6363636363636364 # Test histogram_quantile, native and classic. @@ -241,28 +241,27 @@ eval instant at 50m histogram_quantile(0.8, testhistogram_bucket) {start="negative"} 0.3 # More realistic with rates. - -eval instant at 50m histogram_quantile(0.2, rate(testhistogram[5m])) +eval instant at 50m histogram_quantile(0.2, rate(testhistogram[10m])) {start="positive"} 0.048 {start="negative"} -0.2 -eval instant at 50m histogram_quantile(0.2, rate(testhistogram_bucket[5m])) +eval instant at 50m histogram_quantile(0.2, rate(testhistogram_bucket[10m])) {start="positive"} 0.048 {start="negative"} -0.2 -eval instant at 50m histogram_quantile(0.5, rate(testhistogram[5m])) +eval instant at 50m histogram_quantile(0.5, rate(testhistogram[10m])) {start="positive"} 0.15 {start="negative"} -0.15 -eval instant at 50m histogram_quantile(0.5, rate(testhistogram_bucket[5m])) +eval instant at 50m histogram_quantile(0.5, rate(testhistogram_bucket[10m])) {start="positive"} 0.15 {start="negative"} -0.15 -eval instant at 50m histogram_quantile(0.8, rate(testhistogram[5m])) +eval instant at 50m histogram_quantile(0.8, rate(testhistogram[10m])) {start="positive"} 0.72 {start="negative"} 0.3 -eval instant at 50m histogram_quantile(0.8, rate(testhistogram_bucket[5m])) +eval instant at 50m histogram_quantile(0.8, rate(testhistogram_bucket[10m])) {start="positive"} 0.72 {start="negative"} 0.3 @@ -307,115 +306,112 @@ eval instant at 47m histogram_quantile(5./6., rate(testhistogram2_bucket[15m])) # Aggregated histogram: Everything in one. Note how native histograms # don't require aggregation by le. -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m]))) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[10m]))) {} 0.075 -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[10m])) by (le)) {} 0.075 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[5m]))) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[10m]))) {} 0.1277777777777778 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[10m])) by (le)) {} 0.1277777777777778 # Aggregated histogram: Everything in one. Now with avg, which does not change anything. -eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds[5m]))) +eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds[10m]))) {} 0.075 -eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds_bucket[5m])) by (le)) +eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds_bucket[10m])) by (le)) {} 0.075 -eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds[5m]))) +eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds[10m]))) {} 0.12777777777777778 -eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds_bucket[5m])) by (le)) +eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds_bucket[10m])) by (le)) {} 0.12777777777777778 # Aggregated histogram: By instance. -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m])) by (instance)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[10m])) by (instance)) {instance="ins1"} 0.075 {instance="ins2"} 0.075 -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[10m])) by (le, instance)) {instance="ins1"} 0.075 {instance="ins2"} 0.075 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[5m])) by (instance)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[10m])) by (instance)) {instance="ins1"} 0.1333333333 {instance="ins2"} 0.125 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[10m])) by (le, instance)) {instance="ins1"} 0.1333333333 {instance="ins2"} 0.125 # Aggregated histogram: By job. - -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m])) by (job)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[10m])) by (job)) {job="job1"} 0.1 {job="job2"} 0.0642857142857143 -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[10m])) by (le, job)) {job="job1"} 0.1 {job="job2"} 0.0642857142857143 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[5m])) by (job)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[10m])) by (job)) {job="job1"} 0.14 {job="job2"} 0.1125 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[10m])) by (le, job)) {job="job1"} 0.14 {job="job2"} 0.1125 # Aggregated histogram: By job and instance. - -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m])) by (job, instance)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[10m])) by (job, instance)) {instance="ins1", job="job1"} 0.11 {instance="ins2", job="job1"} 0.09 {instance="ins1", job="job2"} 0.06 {instance="ins2", job="job2"} 0.0675 -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[10m])) by (le, job, instance)) {instance="ins1", job="job1"} 0.11 {instance="ins2", job="job1"} 0.09 {instance="ins1", job="job2"} 0.06 {instance="ins2", job="job2"} 0.0675 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[5m])) by (job, instance)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[10m])) by (job, instance)) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} 0.1333333333333333 {instance="ins1", job="job2"} 0.1 {instance="ins2", job="job2"} 0.1166666666666667 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[10m])) by (le, job, instance)) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} 0.1333333333333333 {instance="ins1", job="job2"} 0.1 {instance="ins2", job="job2"} 0.1166666666666667 # The unaggregated histogram for comparison. Same result as the previous one. - -eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds[5m])) +eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds[10m])) {instance="ins1", job="job1"} 0.11 {instance="ins2", job="job1"} 0.09 {instance="ins1", job="job2"} 0.06 {instance="ins2", job="job2"} 0.0675 -eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds_bucket[5m])) +eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds_bucket[10m])) {instance="ins1", job="job1"} 0.11 {instance="ins2", job="job1"} 0.09 {instance="ins1", job="job2"} 0.06 {instance="ins2", job="job2"} 0.0675 -eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds[5m])) +eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds[10m])) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} 0.13333333333333333 {instance="ins1", job="job2"} 0.1 {instance="ins2", job="job2"} 0.11666666666666667 -eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket[5m])) +eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket[10m])) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} 0.13333333333333333 {instance="ins1", job="job2"} 0.1 @@ -425,6 +421,25 @@ eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket eval instant at 50m sum(request_duration_seconds) {} {{schema:-53 count:250 custom_values:[0.1 0.2] buckets:[100 90 60]}} +eval instant at 50m sum(request_duration_seconds{job="job1",instance="ins1"} + ignoring(job,instance) request_duration_seconds{job="job1",instance="ins2"} + ignoring(job,instance) request_duration_seconds{job="job2",instance="ins1"} + ignoring(job,instance) request_duration_seconds{job="job2",instance="ins2"}) + {} {{schema:-53 count:250 custom_values:[0.1 0.2] buckets:[100 90 60]}} + +eval instant at 50m avg(request_duration_seconds) + {} {{schema:-53 count:62.5 custom_values:[0.1 0.2] buckets:[25 22.5 15]}} + +# To verify the result above, calculate from classic histogram as well. +eval instant at 50m avg (request_duration_seconds_bucket{le="0.1"}) + {} 25 + +eval instant at 50m avg (request_duration_seconds_bucket{le="0.2"}) - avg (request_duration_seconds_bucket{le="0.1"}) + {} 22.5 + +eval instant at 50m avg (request_duration_seconds_bucket{le="+Inf"}) - avg (request_duration_seconds_bucket{le="0.2"}) + {} 15 + +eval instant at 50m count(request_duration_seconds) + {} 4 + # A histogram with nonmonotonic bucket counts. This may happen when recording # rule evaluation or federation races scrape ingestion, causing some buckets # counts to be derived from fewer samples. @@ -448,19 +463,19 @@ eval instant at 50m histogram_quantile(0.99, nonmonotonic_bucket) {} 979.75 # Buckets with different representations of the same upper bound. -eval instant at 50m histogram_quantile(0.5, rate(mixed_bucket[5m])) +eval instant at 50m histogram_quantile(0.5, rate(mixed_bucket[10m])) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} NaN -eval instant at 50m histogram_quantile(0.5, rate(mixed[5m])) +eval instant at 50m histogram_quantile(0.5, rate(mixed[10m])) {instance="ins1", job="job1"} 0.2 {instance="ins2", job="job1"} NaN -eval instant at 50m histogram_quantile(0.75, rate(mixed_bucket[5m])) +eval instant at 50m histogram_quantile(0.75, rate(mixed_bucket[10m])) {instance="ins1", job="job1"} 0.2 {instance="ins2", job="job1"} NaN -eval instant at 50m histogram_quantile(1, rate(mixed_bucket[5m])) +eval instant at 50m histogram_quantile(1, rate(mixed_bucket[10m])) {instance="ins1", job="job1"} 0.2 {instance="ins2", job="job1"} NaN @@ -469,7 +484,7 @@ load_with_nhcb 5m empty_bucket{le="0.2", job="job1", instance="ins1"} 0x10 empty_bucket{le="+Inf", job="job1", instance="ins1"} 0x10 -eval instant at 50m histogram_quantile(0.2, rate(empty_bucket[5m])) +eval instant at 50m histogram_quantile(0.2, rate(empty_bucket[10m])) {instance="ins1", job="job1"} NaN # Load a duplicate histogram with a different name to test failure scenario on multiple histograms with the same label set. @@ -508,3 +523,36 @@ eval instant at 5m histogram_quantile(1.0, sum by (le) (rate(const_histogram_buc eval instant at 5m histogram_quantile(1.0, sum(rate(const_histogram[5m]))) {} NaN + +load_with_nhcb 1m + histogram_over_time_bucket{le="0"} 0 1 3 9 + histogram_over_time_bucket{le="1"} 2 3 3 9 + histogram_over_time_bucket{le="2"} 3 8 5 10 + histogram_over_time_bucket{le="4"} 3 10 6 18 + +# Test custom buckets with sum_over_time, avg_over_time. +eval instant at 3m sum_over_time(histogram_over_time[4m:1m]) + {} {{schema:-53 count:37 custom_values:[0 1 2 4] buckets:[13 4 9 11]}} + +eval instant at 3m avg_over_time(histogram_over_time[4m:1m]) + {} {{schema:-53 count:9.25 custom_values:[0 1 2 4] buckets:[3.25 1 2.25 2.75]}} + +# Test custom buckets with counter reset +load_with_nhcb 5m + histogram_with_reset_bucket{le="1"} 1 3 9 + histogram_with_reset_bucket{le="2"} 3 3 9 + histogram_with_reset_bucket{le="4"} 8 5 12 + histogram_with_reset_bucket{le="8"} 10 6 18 + histogram_with_reset_sum{} 36 16 61 + +eval instant at 10m increase(histogram_with_reset[15m]) + {} {{schema:-53 count:27 sum:91.5 custom_values:[1 2 4 8] counter_reset_hint:gauge buckets:[13.5 0 4.5 9]}} + +eval instant at 10m resets(histogram_with_reset[15m]) + {} 1 + +eval instant at 10m histogram_count(increase(histogram_with_reset[15m])) + {} 27 + +eval instant at 10m histogram_sum(increase(histogram_with_reset[15m])) + {} 91.5 diff --git a/promql/promqltest/testdata/name_label_dropping.test b/promql/promqltest/testdata/name_label_dropping.test index 1f1dac3602..5f5dcd5e4d 100644 --- a/promql/promqltest/testdata/name_label_dropping.test +++ b/promql/promqltest/testdata/name_label_dropping.test @@ -4,81 +4,81 @@ load 5m another_metric{env="1"} 60 120 180 # Does not drop __name__ for vector selector -eval instant at 15m metric{env="1"} +eval instant at 10m metric{env="1"} metric{env="1"} 120 # Drops __name__ for unary operators -eval instant at 15m -metric +eval instant at 10m -metric {env="1"} -120 # Drops __name__ for binary operators -eval instant at 15m metric + another_metric +eval instant at 10m metric + another_metric {env="1"} 300 # Does not drop __name__ for binary comparison operators -eval instant at 15m metric <= another_metric +eval instant at 10m metric <= another_metric metric{env="1"} 120 # Drops __name__ for binary comparison operators with "bool" modifier -eval instant at 15m metric <= bool another_metric +eval instant at 10m metric <= bool another_metric {env="1"} 1 # Drops __name__ for vector-scalar operations -eval instant at 15m metric * 2 +eval instant at 10m metric * 2 {env="1"} 240 # Drops __name__ for instant-vector functions -eval instant at 15m clamp(metric, 0, 100) +eval instant at 10m clamp(metric, 0, 100) {env="1"} 100 # Drops __name__ for range-vector functions -eval instant at 15m rate(metric{env="1"}[10m]) +eval instant at 10m rate(metric{env="1"}[10m]) {env="1"} 0.2 # Does not drop __name__ for last_over_time function -eval instant at 15m last_over_time(metric{env="1"}[10m]) +eval instant at 10m last_over_time(metric{env="1"}[10m]) metric{env="1"} 120 # Drops name for other _over_time functions -eval instant at 15m max_over_time(metric{env="1"}[10m]) +eval instant at 10m max_over_time(metric{env="1"}[10m]) {env="1"} 120 # Allows relabeling (to-be-dropped) __name__ via label_replace -eval instant at 15m label_replace(rate({env="1"}[10m]), "my_name", "rate_$1", "__name__", "(.+)") +eval instant at 10m label_replace(rate({env="1"}[10m]), "my_name", "rate_$1", "__name__", "(.+)") {my_name="rate_metric", env="1"} 0.2 {my_name="rate_another_metric", env="1"} 0.2 # Allows preserving __name__ via label_replace -eval instant at 15m label_replace(rate({env="1"}[10m]), "__name__", "rate_$1", "__name__", "(.+)") +eval instant at 10m label_replace(rate({env="1"}[10m]), "__name__", "rate_$1", "__name__", "(.+)") rate_metric{env="1"} 0.2 rate_another_metric{env="1"} 0.2 # Allows relabeling (to-be-dropped) __name__ via label_join -eval instant at 15m label_join(rate({env="1"}[10m]), "my_name", "_", "__name__") +eval instant at 10m label_join(rate({env="1"}[10m]), "my_name", "_", "__name__") {my_name="metric", env="1"} 0.2 {my_name="another_metric", env="1"} 0.2 # Allows preserving __name__ via label_join -eval instant at 15m label_join(rate({env="1"}[10m]), "__name__", "_", "__name__", "env") +eval instant at 10m label_join(rate({env="1"}[10m]), "__name__", "_", "__name__", "env") metric_1{env="1"} 0.2 another_metric_1{env="1"} 0.2 # Does not drop metric names fro aggregation operators -eval instant at 15m sum by (__name__, env) (metric{env="1"}) +eval instant at 10m sum by (__name__, env) (metric{env="1"}) metric{env="1"} 120 # Aggregation operators by __name__ lead to duplicate labelset errors (aggregation is partitioned by not yet removed __name__ label) # This is an accidental side effect of delayed __name__ label dropping -eval_fail instant at 15m sum by (__name__) (rate({env="1"}[10m])) +eval_fail instant at 10m sum by (__name__) (rate({env="1"}[10m])) # Aggregation operators aggregate metrics with same labelset and to-be-dropped names # This is an accidental side effect of delayed __name__ label dropping -eval instant at 15m sum(rate({env="1"}[10m])) by (env) +eval instant at 10m sum(rate({env="1"}[10m])) by (env) {env="1"} 0.4 # Aggregationk operators propagate __name__ label dropping information -eval instant at 15m topk(10, sum by (__name__, env) (metric{env="1"})) +eval instant at 10m topk(10, sum by (__name__, env) (metric{env="1"})) metric{env="1"} 120 -eval instant at 15m topk(10, sum by (__name__, env) (rate(metric{env="1"}[10m]))) +eval instant at 10m topk(10, sum by (__name__, env) (rate(metric{env="1"}[10m]))) {env="1"} 0.2 diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 7d2eec32cf..8c5814ae8a 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -2,55 +2,58 @@ load 5m empty_histogram {{}} -eval instant at 5m empty_histogram +eval instant at 1m empty_histogram {__name__="empty_histogram"} {{}} -eval instant at 5m histogram_count(empty_histogram) +eval instant at 1m histogram_count(empty_histogram) {} 0 -eval instant at 5m histogram_sum(empty_histogram) +eval instant at 1m histogram_sum(empty_histogram) {} 0 -eval instant at 5m histogram_avg(empty_histogram) +eval instant at 1m histogram_avg(empty_histogram) {} NaN -eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram) +eval instant at 1m histogram_fraction(-Inf, +Inf, empty_histogram) {} NaN -eval instant at 5m histogram_fraction(0, 8, empty_histogram) +eval instant at 1m histogram_fraction(0, 8, empty_histogram) {} NaN - +clear # buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4). load 5m single_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}} # histogram_count extracts the count property from the histogram. -eval instant at 5m histogram_count(single_histogram) +eval instant at 1m histogram_count(single_histogram) {} 4 # histogram_sum extracts the sum property from the histogram. -eval instant at 5m histogram_sum(single_histogram) +eval instant at 1m histogram_sum(single_histogram) {} 5 # histogram_avg calculates the average from sum and count properties. -eval instant at 5m histogram_avg(single_histogram) +eval instant at 1m histogram_avg(single_histogram) {} 1.25 # We expect half of the values to fall in the range 1 < x <= 2. -eval instant at 5m histogram_fraction(1, 2, single_histogram) +eval instant at 1m histogram_fraction(1, 2, single_histogram) {} 0.5 # We expect all values to fall in the range 0 < x <= 8. -eval instant at 5m histogram_fraction(0, 8, single_histogram) +eval instant at 1m histogram_fraction(0, 8, single_histogram) {} 1 -# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2. -eval instant at 5m histogram_quantile(0.5, single_histogram) - {} 1.5 - +# Median is 1.414213562373095 (2**2**-1, or sqrt(2)) due to +# exponential interpolation, i.e. the "midpoint" within range 1 < x <= +# 2 is assumed where the bucket boundary would be if we increased the +# resolution of the histogram by one step. +eval instant at 1m histogram_quantile(0.5, single_histogram) + {} 1.414213562373095 +clear # Repeat the same histogram 10 times. load 5m @@ -68,8 +71,9 @@ eval instant at 5m histogram_avg(multi_histogram) eval instant at 5m histogram_fraction(1, 2, multi_histogram) {} 0.5 +# See explanation for exponential interpolation above. eval instant at 5m histogram_quantile(0.5, multi_histogram) - {} 1.5 + {} 1.414213562373095 # Each entry should look the same as the first. @@ -85,10 +89,11 @@ eval instant at 50m histogram_avg(multi_histogram) eval instant at 50m histogram_fraction(1, 2, multi_histogram) {} 0.5 +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, multi_histogram) - {} 1.5 - + {} 1.414213562373095 +clear # Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket # with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket @@ -109,8 +114,9 @@ eval instant at 5m histogram_avg(incr_histogram) eval instant at 5m histogram_fraction(1, 2, incr_histogram) {} 0.6 +# See explanation for exponential interpolation above. eval instant at 5m histogram_quantile(0.5, incr_histogram) - {} 1.5 + {} 1.414213562373095 eval instant at 50m incr_histogram @@ -129,18 +135,20 @@ eval instant at 50m histogram_avg(incr_histogram) eval instant at 50m histogram_fraction(1, 2, incr_histogram) {} 0.8571428571428571 +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, incr_histogram) - {} 1.5 + {} 1.414213562373095 # Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum. -eval instant at 50m rate(incr_histogram[5m]) - {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} +eval instant at 50m rate(incr_histogram[10m]) + {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} # Calculate the 50th percentile of observations over the last 10m. +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m])) - {} 1.5 - + {} 1.414213562373095 +clear # Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.: # 0: 1 2 4 8 16 32 64 (higher resolution) @@ -166,77 +174,79 @@ eval instant at 5m histogram_avg(low_res_histogram) eval instant at 5m histogram_fraction(1, 4, low_res_histogram) {} 1 - +clear # z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range # 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket. load 5m single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}} -eval instant at 5m histogram_count(single_zero_histogram) +eval instant at 1m histogram_count(single_zero_histogram) {} 1 -eval instant at 5m histogram_sum(single_zero_histogram) +eval instant at 1m histogram_sum(single_zero_histogram) {} 0.25 -eval instant at 5m histogram_avg(single_zero_histogram) +eval instant at 1m histogram_avg(single_zero_histogram) {} 0.25 # When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally # distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the # entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5. -eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram) +eval instant at 1m histogram_fraction(-0.5, 0.5, single_zero_histogram) {} 1 # Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5. -eval instant at 5m histogram_quantile(0.5, single_zero_histogram) +eval instant at 1m histogram_quantile(0.5, single_zero_histogram) {} 0 - +clear # Let's turn single_histogram upside-down. load 5m negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}} -eval instant at 5m histogram_count(negative_histogram) +eval instant at 1m histogram_count(negative_histogram) {} 4 -eval instant at 5m histogram_sum(negative_histogram) +eval instant at 1m histogram_sum(negative_histogram) {} -5 -eval instant at 5m histogram_avg(negative_histogram) +eval instant at 1m histogram_avg(negative_histogram) {} -1.25 # We expect half of the values to fall in the range -2 < x <= -1. -eval instant at 5m histogram_fraction(-2, -1, negative_histogram) +eval instant at 1m histogram_fraction(-2, -1, negative_histogram) {} 0.5 -eval instant at 5m histogram_quantile(0.5, negative_histogram) - {} -1.5 - +# Exponential interpolation works the same as for positive buckets, just mirrored. +eval instant at 1m histogram_quantile(0.5, negative_histogram) + {} -1.414213562373095 +clear # Two histogram samples. load 5m two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}} # We expect to see the newest sample. -eval instant at 10m histogram_count(two_samples_histogram) +eval instant at 5m histogram_count(two_samples_histogram) {} 4 -eval instant at 10m histogram_sum(two_samples_histogram) +eval instant at 5m histogram_sum(two_samples_histogram) {} -4 -eval instant at 10m histogram_avg(two_samples_histogram) +eval instant at 5m histogram_avg(two_samples_histogram) {} -1 -eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram) +eval instant at 5m histogram_fraction(-2, -1, two_samples_histogram) {} 0.5 -eval instant at 10m histogram_quantile(0.5, two_samples_histogram) - {} -1.5 - +# See explanation for exponential interpolation above. +eval instant at 5m histogram_quantile(0.5, two_samples_histogram) + {} -1.414213562373095 +clear # Add two histograms with negated data. load 5m @@ -259,6 +269,8 @@ eval instant at 5m histogram_fraction(0, 4, balanced_histogram) eval instant at 5m histogram_quantile(0.5, balanced_histogram) {} 0.5 +clear + # Add histogram to test sum(last_over_time) regression load 5m incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10 @@ -270,6 +282,8 @@ eval instant at 50m histogram_sum(sum(incr_sum_histogram)) eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) {} 30 +clear + # Apply rate function to histogram. load 15s histogram_rate {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 @@ -280,6 +294,8 @@ eval instant at 5m rate(histogram_rate[45s]) eval range from 5m to 5m30s step 30s rate(histogram_rate[45s]) {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1 +clear + # Apply count and sum function to histogram. load 10m histogram_count_sum_2 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -290,6 +306,8 @@ eval instant at 10m histogram_count(histogram_count_sum_2) eval instant at 10m histogram_sum(histogram_count_sum_2) {} 100 +clear + # Apply stddev and stdvar function to histogram with {1, 2, 3, 4} (low res). load 10m histogram_stddev_stdvar_1 {{schema:2 count:4 sum:10 buckets:[1 0 0 0 1 0 0 1 1]}}x1 @@ -300,6 +318,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_1) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_1) {} 1.163807968526718 +clear + # Apply stddev and stdvar function to histogram with {1, 1, 1, 1} (high res). load 10m histogram_stddev_stdvar_2 {{schema:8 count:10 sum:10 buckets:[1 2 3 4]}}x1 @@ -310,6 +330,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_2) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_2) {} 2.3971123370139447e-05 +clear + # Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9}. load 10m histogram_stddev_stdvar_3 {{schema:3 count:7 sum:62 z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 @@ -320,6 +342,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_3) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_3) {} 1844.4651144196398 +clear + # Apply stddev and stdvar function to histogram with {-100000, -10000, -1000, -888, -888, -100, -50, -9, -8, -3}. load 10m histogram_stddev_stdvar_4 {{schema:0 count:10 sum:-112946 z_bucket:0 n_buckets:[0 0 1 1 1 0 1 1 0 0 3 0 0 0 1 0 0 1]}}x1 @@ -330,6 +354,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_4) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_4) {} 759352122.1939945 +clear + # Apply stddev and stdvar function to histogram with {-10x10}. load 10m histogram_stddev_stdvar_5 {{schema:0 count:10 sum:-100 z_bucket:0 n_buckets:[0 0 0 0 10]}}x1 @@ -340,6 +366,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_5) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_5) {} 1.725830020304794 +clear + # Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, NaN}. load 10m histogram_stddev_stdvar_6 {{schema:3 count:7 sum:NaN z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 @@ -350,6 +378,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_6) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_6) {} NaN +clear + # Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, Inf}. load 10m histogram_stddev_stdvar_7 {{schema:3 count:7 sum:Inf z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 @@ -360,6 +390,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_7) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_7) {} Inf +clear + # Apply quantile function to histogram with all positive buckets with zero bucket. load 10m histogram_quantile_1 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 @@ -370,20 +402,24 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_1) eval instant at 10m histogram_quantile(1, histogram_quantile_1) {} 16 +# The following quantiles are within a bucket. Exponential +# interpolation is applied (rather than linear, as it is done for +# classic histograms), leading to slightly different quantile values. eval instant at 10m histogram_quantile(0.99, histogram_quantile_1) - {} 15.759999999999998 + {} 15.67072476139083 eval instant at 10m histogram_quantile(0.9, histogram_quantile_1) - {} 13.600000000000001 + {} 12.99603834169977 eval instant at 10m histogram_quantile(0.6, histogram_quantile_1) - {} 4.799999999999997 + {} 4.594793419988138 eval instant at 10m histogram_quantile(0.5, histogram_quantile_1) - {} 1.6666666666666665 + {} 1.5874010519681994 +# Linear interpolation within the zero bucket after all. eval instant at 10m histogram_quantile(0.1, histogram_quantile_1) - {} 0.0006000000000000001 + {} 0.0006 eval instant at 10m histogram_quantile(0, histogram_quantile_1) {} 0 @@ -391,6 +427,8 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_1) eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_1) {} -Inf +clear + # Apply quantile function to histogram with all negative buckets with zero bucket. load 10m histogram_quantile_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 @@ -401,17 +439,20 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_2) eval instant at 10m histogram_quantile(1, histogram_quantile_2) {} 0 +# Again, the quantile values here are slightly different from what +# they would be with linear interpolation. Note that quantiles +# ending up in the zero bucket are linearly interpolated after all. eval instant at 10m histogram_quantile(0.99, histogram_quantile_2) - {} -6.000000000000048e-05 + {} -0.00006 eval instant at 10m histogram_quantile(0.9, histogram_quantile_2) - {} -0.0005999999999999996 + {} -0.0006 eval instant at 10m histogram_quantile(0.5, histogram_quantile_2) - {} -1.6666666666666667 + {} -1.5874010519681996 eval instant at 10m histogram_quantile(0.1, histogram_quantile_2) - {} -13.6 + {} -12.996038341699768 eval instant at 10m histogram_quantile(0, histogram_quantile_2) {} -16 @@ -419,7 +460,11 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_2) eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_2) {} -Inf -# Apply quantile function to histogram with both positive and negative buckets with zero bucket. +clear + +# Apply quantile function to histogram with both positive and negative +# buckets with zero bucket. +# First positive buckets with exponential interpolation. load 10m histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -430,31 +475,34 @@ eval instant at 10m histogram_quantile(1, histogram_quantile_3) {} 16 eval instant at 10m histogram_quantile(0.99, histogram_quantile_3) - {} 15.519999999999996 + {} 15.34822590920423 eval instant at 10m histogram_quantile(0.9, histogram_quantile_3) - {} 11.200000000000003 + {} 10.556063286183155 eval instant at 10m histogram_quantile(0.7, histogram_quantile_3) - {} 1.2666666666666657 + {} 1.2030250360821164 +# Linear interpolation in the zero bucket, symmetrically centered around +# the zero point. eval instant at 10m histogram_quantile(0.55, histogram_quantile_3) - {} 0.0006000000000000005 + {} 0.0006 eval instant at 10m histogram_quantile(0.5, histogram_quantile_3) {} 0 eval instant at 10m histogram_quantile(0.45, histogram_quantile_3) - {} -0.0005999999999999996 + {} -0.0006 +# Finally negative buckets with mirrored exponential interpolation. eval instant at 10m histogram_quantile(0.3, histogram_quantile_3) - {} -1.266666666666667 + {} -1.2030250360821169 eval instant at 10m histogram_quantile(0.1, histogram_quantile_3) - {} -11.2 + {} -10.556063286183155 eval instant at 10m histogram_quantile(0.01, histogram_quantile_3) - {} -15.52 + {} -15.34822590920423 eval instant at 10m histogram_quantile(0, histogram_quantile_3) {} -16 @@ -462,6 +510,92 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_3) eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_3) {} -Inf +clear + +# Try different schemas. (The interpolation logic must not depend on the schema.) +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 buckets:[0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} 2.0 + {schema="0"} 1.4142135623730951 + {schema="+1"} 1.189207 + +eval instant at 1m histogram_fraction(0, 2, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.4142135623730951, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.189207, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +# The same as above, but one bucket "further to the right". +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 buckets:[0 0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} 8.0 + {schema="0"} 2.82842712474619 + {schema="+1"} 1.6817928305074292 + +eval instant at 1m histogram_fraction(0, 8, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(0, 2.82842712474619, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.6817928305074292, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +# And everything again but for negative buckets. +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 n_buckets:[0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} -2.0 + {schema="0"} -1.4142135623730951 + {schema="+1"} -1.189207 + +eval instant at 1m histogram_fraction(-2, 0, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(-1.4142135623730951, 0, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(-1.189207, 0, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 n_buckets:[0 0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} -8.0 + {schema="0"} -2.82842712474619 + {schema="+1"} -1.6817928305074292 + +eval instant at 1m histogram_fraction(-8, 0, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(-2.82842712474619, 0, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(-1.6817928305074292, 0, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + + # Apply fraction function to empty histogram. load 10m histogram_fraction_1 {{}}x1 @@ -469,6 +603,8 @@ load 10m eval instant at 10m histogram_fraction(3.1415, 42, histogram_fraction_1) {} NaN +clear + # Apply fraction function to histogram with positive and zero buckets. load 10m histogram_fraction_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 @@ -485,11 +621,18 @@ eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_2) eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_2) {} 0.16666666666666666 +# Note that this result and the one above add up to 1. +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) + {} 0.8333333333333334 + +# We are in the zero bucket, resulting in linear interpolation eval instant at 10m histogram_fraction(0, 0.0005, histogram_fraction_2) {} 0.08333333333333333 -eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) - {} 0.8333333333333334 +# Demonstrate that the inverse operation with histogram_quantile yields +# the original value with the non-trivial result above. +eval instant at 10m histogram_quantile(0.08333333333333333, histogram_fraction_2) + {} 0.0005 eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) {} 0 @@ -497,17 +640,30 @@ eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) eval instant at 10m histogram_fraction(1, 2, histogram_fraction_2) {} 0.25 +# More non-trivial results with interpolation involved below, including +# some round-trips via histogram_quantile to prove that the inverse +# operation leads to the same results. + +eval instant at 10m histogram_fraction(0, 1.5, histogram_fraction_2) + {} 0.4795739585136224 + eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_2) - {} 0.125 + {} 0.10375937481971091 eval instant at 10m histogram_fraction(1, 8, histogram_fraction_2) {} 0.3333333333333333 +eval instant at 10m histogram_fraction(0, 6, histogram_fraction_2) + {} 0.6320802083934297 + +eval instant at 10m histogram_quantile(0.6320802083934297, histogram_fraction_2) + {} 6 + eval instant at 10m histogram_fraction(1, 6, histogram_fraction_2) - {} 0.2916666666666667 + {} 0.29874687506009634 eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_2) - {} 0.16666666666666666 + {} 0.15250624987980724 eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_2) {} 0 @@ -570,6 +726,12 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_3) eval instant at 10m histogram_fraction(-0.0005, 0, histogram_fraction_3) {} 0.08333333333333333 +eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_3) + {} 0.9166666666666666 + +eval instant at 10m histogram_quantile(0.9166666666666666, histogram_fraction_3) + {} -0.0005 + eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_3) {} 0 @@ -595,16 +757,22 @@ eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_3) {} 0.25 eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_3) - {} 0.125 + {} 0.10375937481971091 eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_3) {} 0.3333333333333333 +eval instant at 10m histogram_fraction(-inf, -6, histogram_fraction_3) + {} 0.36791979160657035 + +eval instant at 10m histogram_quantile(0.36791979160657035, histogram_fraction_3) + {} -6 + eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_3) - {} 0.2916666666666667 + {} 0.29874687506009634 eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_3) - {} 0.16666666666666666 + {} 0.15250624987980724 eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_3) {} 0 @@ -633,6 +801,8 @@ eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_3) eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_3) {} 1 +clear + # Apply fraction function to histogram with both positive, negative and zero buckets. load 10m histogram_fraction_4 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -652,6 +822,18 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_4) eval instant at 10m histogram_fraction(-0.0005, 0.0005, histogram_fraction_4) {} 0.08333333333333333 +eval instant at 10m histogram_fraction(-inf, 0.0005, histogram_fraction_4) + {} 0.5416666666666666 + +eval instant at 10m histogram_quantile(0.5416666666666666, histogram_fraction_4) + {} 0.0005 + +eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_4) + {} 0.4583333333333333 + +eval instant at 10m histogram_quantile(0.4583333333333333, histogram_fraction_4) + {} -0.0005 + eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_4) {} 0.4166666666666667 @@ -662,31 +844,31 @@ eval instant at 10m histogram_fraction(1, 2, histogram_fraction_4) {} 0.125 eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_4) - {} 0.0625 + {} 0.051879687409855414 eval instant at 10m histogram_fraction(1, 8, histogram_fraction_4) {} 0.16666666666666666 eval instant at 10m histogram_fraction(1, 6, histogram_fraction_4) - {} 0.14583333333333334 + {} 0.14937343753004825 eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_4) - {} 0.08333333333333333 + {} 0.07625312493990366 eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_4) {} 0.125 eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_4) - {} 0.0625 + {} 0.051879687409855456 eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_4) {} 0.16666666666666666 eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_4) - {} 0.14583333333333334 + {} 0.14937343753004817 eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_4) - {} 0.08333333333333333 + {} 0.07625312493990362 eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_4) {} 0 @@ -766,18 +948,40 @@ eval instant at 10m histogram_mul_div*float_series_0 eval instant at 10m float_series_0*histogram_mul_div {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} -# TODO: (NeerajGartia21) remove all the histogram buckets in case of division with zero. See: https://github.com/prometheus/prometheus/issues/13934 eval instant at 10m histogram_mul_div/0 - {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + {} {{schema:0 count:Inf sum:Inf z_bucket_w:0.001 z_bucket:Inf}} eval instant at 10m histogram_mul_div/float_series_0 - {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + {} {{schema:0 count:Inf sum:Inf z_bucket_w:0.001 z_bucket:Inf}} eval instant at 10m histogram_mul_div*0/0 - {} {{schema:0 count:NaN sum:NaN z_bucket:NaN z_bucket_w:0.001 buckets:[NaN NaN NaN] n_buckets:[NaN NaN NaN]}} + {} {{schema:0 count:NaN sum:NaN z_bucket_w:0.001 z_bucket:NaN}} + +eval_info instant at 10m histogram_mul_div*histogram_mul_div + +eval_info instant at 10m histogram_mul_div/histogram_mul_div + +eval_info instant at 10m float_series_3/histogram_mul_div + +eval_info instant at 10m 0/histogram_mul_div clear +# Apply binary operators to mixed histogram and float samples. +# TODO:(NeerajGartia21) move these tests to their respective locations when tests from engine_test.go are be moved here. + +load 10m + histogram_sample {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + float_sample 0x1 + +eval_info instant at 10m float_sample+histogram_sample + +eval_info instant at 10m histogram_sample+float_sample + +eval_info instant at 10m float_sample-histogram_sample + +eval_info instant at 10m histogram_sample-float_sample + # Counter reset only noticeable in a single bucket. load 5m reset_in_bucket {{schema:0 count:4 sum:5 buckets:[1 2 1]}} {{schema:0 count:5 sum:6 buckets:[1 1 3]}} {{schema:0 count:6 sum:7 buckets:[1 2 3]}} @@ -814,7 +1018,7 @@ load 30s some_metric {{schema:0 sum:1 count:1 buckets:[1] counter_reset_hint:gauge}} {{schema:0 sum:2 count:2 buckets:[2] counter_reset_hint:gauge}} {{schema:0 sum:3 count:3 buckets:[3] counter_reset_hint:gauge}} # Test the case where we only have two points for rate -eval_warn instant at 30s rate(some_metric[30s]) +eval_warn instant at 30s rate(some_metric[1m]) {} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}} # Test the case where we have more than two points for rate @@ -836,11 +1040,11 @@ eval_warn instant at 1m30s rate(some_metric[1m]) # Should produce no results. # Start with custom, end with exponential. -eval_warn instant at 1m rate(some_metric[30s]) +eval_warn instant at 1m rate(some_metric[1m]) # Should produce no results. # Start with exponential, end with custom. -eval_warn instant at 30s rate(some_metric[30s]) +eval_warn instant at 30s rate(some_metric[1m]) # Should produce no results. clear @@ -975,8 +1179,8 @@ clear load 1m histogram_sum_over_time {{schema:0 count:25 sum:1234.5 z_bucket:4 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[2 4 0 0 1 9]}} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:0 count:41 sum:1111.1 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:1 count:0}} -eval instant at 3m sum_over_time(histogram_sum_over_time[3m:1m]) +eval instant at 3m sum_over_time(histogram_sum_over_time[4m:1m]) {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}} -eval instant at 3m avg_over_time(histogram_sum_over_time[3m:1m]) +eval instant at 3m avg_over_time(histogram_sum_over_time[4m:1m]) {} {{schema:0 count:26.75 sum:1172.8 z_bucket:3.5 z_bucket_w:0.001 buckets:[0.75 2 0.5 1.25 0.75 0.5 0.5] n_buckets:[0.5 1.5 2 1 3.75 2.25 0 0 0 2.5 2.5 1]}} diff --git a/promql/promqltest/testdata/operators.test b/promql/promqltest/testdata/operators.test index df2311b9ba..645cca88b8 100644 --- a/promql/promqltest/testdata/operators.test +++ b/promql/promqltest/testdata/operators.test @@ -113,7 +113,7 @@ eval instant at 50m http_requests{job="api-server", group="canary"} http_requests{group="canary", instance="0", job="api-server"} 300 http_requests{group="canary", instance="1", job="api-server"} 400 -eval instant at 50m http_requests{job="api-server", group="canary"} + rate(http_requests{job="api-server"}[5m]) * 5 * 60 +eval instant at 50m http_requests{job="api-server", group="canary"} + rate(http_requests{job="api-server"}[10m]) * 5 * 60 {group="canary", instance="0", job="api-server"} 330 {group="canary", instance="1", job="api-server"} 440 @@ -308,65 +308,65 @@ load 5m threshold{instance="abc",job="node",target="a@b.com"} 0 # Copy machine role to node variable. -eval instant at 5m node_role * on (instance) group_right (role) node_var +eval instant at 1m node_role * on (instance) group_right (role) node_var {instance="abc",job="node",role="prometheus"} 2 -eval instant at 5m node_var * on (instance) group_left (role) node_role +eval instant at 1m node_var * on (instance) group_left (role) node_role {instance="abc",job="node",role="prometheus"} 2 -eval instant at 5m node_var * ignoring (role) group_left (role) node_role +eval instant at 1m node_var * ignoring (role) group_left (role) node_role {instance="abc",job="node",role="prometheus"} 2 -eval instant at 5m node_role * ignoring (role) group_right (role) node_var +eval instant at 1m node_role * ignoring (role) group_right (role) node_var {instance="abc",job="node",role="prometheus"} 2 # Copy machine role to node variable with instrumentation labels. -eval instant at 5m node_cpu * ignoring (role, mode) group_left (role) node_role +eval instant at 1m node_cpu * ignoring (role, mode) group_left (role) node_role {instance="abc",job="node",mode="idle",role="prometheus"} 3 {instance="abc",job="node",mode="user",role="prometheus"} 1 -eval instant at 5m node_cpu * on (instance) group_left (role) node_role +eval instant at 1m node_cpu * on (instance) group_left (role) node_role {instance="abc",job="node",mode="idle",role="prometheus"} 3 {instance="abc",job="node",mode="user",role="prometheus"} 1 # Ratio of total. -eval instant at 5m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu) +eval instant at 1m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu) {instance="abc",job="node",mode="idle"} .75 {instance="abc",job="node",mode="user"} .25 {instance="def",job="node",mode="idle"} .80 {instance="def",job="node",mode="user"} .20 -eval instant at 5m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu) +eval instant at 1m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu) {job="node",mode="idle"} 0.7857142857142857 {job="node",mode="user"} 0.21428571428571427 -eval instant at 5m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)) +eval instant at 1m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)) {} 1.0 -eval instant at 5m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu) +eval instant at 1m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu) {instance="abc",job="node",mode="idle"} .75 {instance="abc",job="node",mode="user"} .25 {instance="def",job="node",mode="idle"} .80 {instance="def",job="node",mode="user"} .20 -eval instant at 5m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu) +eval instant at 1m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu) {instance="abc",job="node",mode="idle"} .75 {instance="abc",job="node",mode="user"} .25 {instance="def",job="node",mode="idle"} .80 {instance="def",job="node",mode="user"} .20 -eval instant at 5m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu) +eval instant at 1m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu) {job="node",mode="idle"} 0.7857142857142857 {job="node",mode="user"} 0.21428571428571427 -eval instant at 5m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)) +eval instant at 1m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)) {} 1.0 # Copy over label from metric with no matching labels, without having to list cross-job target labels ('job' here). -eval instant at 5m node_cpu + on(dummy) group_left(foo) random*0 +eval instant at 1m node_cpu + on(dummy) group_left(foo) random*0 {instance="abc",job="node",mode="idle",foo="bar"} 3 {instance="abc",job="node",mode="user",foo="bar"} 1 {instance="def",job="node",mode="idle",foo="bar"} 8 @@ -374,12 +374,12 @@ eval instant at 5m node_cpu + on(dummy) group_left(foo) random*0 # Use threshold from metric, and copy over target. -eval instant at 5m node_cpu > on(job, instance) group_left(target) threshold +eval instant at 1m node_cpu > on(job, instance) group_left(target) threshold node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 # Use threshold from metric, and a default (1) if it's not present. -eval instant at 5m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1)) +eval instant at 1m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1)) node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 node_cpu{instance="def",job="node",mode="idle"} 8 @@ -387,37 +387,37 @@ eval instant at 5m node_cpu > on(job, instance) group_left(target) (threshold or # Check that binops drop the metric name. -eval instant at 5m node_cpu + 2 +eval instant at 1m node_cpu + 2 {instance="abc",job="node",mode="idle"} 5 {instance="abc",job="node",mode="user"} 3 {instance="def",job="node",mode="idle"} 10 {instance="def",job="node",mode="user"} 4 -eval instant at 5m node_cpu - 2 +eval instant at 1m node_cpu - 2 {instance="abc",job="node",mode="idle"} 1 {instance="abc",job="node",mode="user"} -1 {instance="def",job="node",mode="idle"} 6 {instance="def",job="node",mode="user"} 0 -eval instant at 5m node_cpu / 2 +eval instant at 1m node_cpu / 2 {instance="abc",job="node",mode="idle"} 1.5 {instance="abc",job="node",mode="user"} 0.5 {instance="def",job="node",mode="idle"} 4 {instance="def",job="node",mode="user"} 1 -eval instant at 5m node_cpu * 2 +eval instant at 1m node_cpu * 2 {instance="abc",job="node",mode="idle"} 6 {instance="abc",job="node",mode="user"} 2 {instance="def",job="node",mode="idle"} 16 {instance="def",job="node",mode="user"} 4 -eval instant at 5m node_cpu ^ 2 +eval instant at 1m node_cpu ^ 2 {instance="abc",job="node",mode="idle"} 9 {instance="abc",job="node",mode="user"} 1 {instance="def",job="node",mode="idle"} 64 {instance="def",job="node",mode="user"} 4 -eval instant at 5m node_cpu % 2 +eval instant at 1m node_cpu % 2 {instance="abc",job="node",mode="idle"} 1 {instance="abc",job="node",mode="user"} 1 {instance="def",job="node",mode="idle"} 0 @@ -432,14 +432,14 @@ load 5m metricB{baz="meh"} 4 # On with no labels, for metrics with no common labels. -eval instant at 5m random + on() metricA +eval instant at 1m random + on() metricA {} 5 # Ignoring with no labels is the same as no ignoring. -eval instant at 5m metricA + ignoring() metricB +eval instant at 1m metricA + ignoring() metricB {baz="meh"} 7 -eval instant at 5m metricA + metricB +eval instant at 1m metricA + metricB {baz="meh"} 7 clear @@ -457,16 +457,16 @@ load 5m test_total{instance="localhost"} 50 test_smaller{instance="localhost"} 10 -eval instant at 5m test_total > bool test_smaller +eval instant at 1m test_total > bool test_smaller {instance="localhost"} 1 -eval instant at 5m test_total > test_smaller +eval instant at 1m test_total > test_smaller test_total{instance="localhost"} 50 -eval instant at 5m test_total < bool test_smaller +eval instant at 1m test_total < bool test_smaller {instance="localhost"} 0 -eval instant at 5m test_total < test_smaller +eval instant at 1m test_total < test_smaller clear @@ -476,14 +476,14 @@ load 5m trigx{} 20 trigNaN{} NaN -eval instant at 5m trigy atan2 trigx +eval instant at 1m trigy atan2 trigx {} 0.4636476090008061 -eval instant at 5m trigy atan2 trigNaN +eval instant at 1m trigy atan2 trigNaN {} NaN -eval instant at 5m 10 atan2 20 +eval instant at 1m 10 atan2 20 0.4636476090008061 -eval instant at 5m 10 atan2 NaN +eval instant at 1m 10 atan2 NaN NaN diff --git a/promql/promqltest/testdata/range_queries.test b/promql/promqltest/testdata/range_queries.test index e695109602..3bfe2ce4cb 100644 --- a/promql/promqltest/testdata/range_queries.test +++ b/promql/promqltest/testdata/range_queries.test @@ -1,18 +1,18 @@ # sum_over_time with all values -load 30s +load 15s bar 0 1 10 100 1000 -eval range from 0 to 2m step 1m sum_over_time(bar[30s]) +eval range from 0 to 1m step 30s sum_over_time(bar[30s]) {} 0 11 1100 clear # sum_over_time with trailing values -load 30s +load 15s bar 0 1 10 100 1000 0 0 0 0 eval range from 0 to 2m step 1m sum_over_time(bar[30s]) - {} 0 11 1100 + {} 0 1100 0 clear @@ -21,15 +21,15 @@ load 30s bar 0 1 10 100 1000 10000 100000 1000000 10000000 eval range from 0 to 4m step 1m sum_over_time(bar[30s]) - {} 0 11 1100 110000 11000000 + {} 0 10 1000 100000 10000000 clear # sum_over_time with all values random -load 30s +load 15s bar 5 17 42 2 7 905 51 -eval range from 0 to 3m step 1m sum_over_time(bar[30s]) +eval range from 0 to 90s step 30s sum_over_time(bar[30s]) {} 5 59 9 956 clear diff --git a/promql/promqltest/testdata/staleness.test b/promql/promqltest/testdata/staleness.test index 4fdbc997b7..a48473d439 100644 --- a/promql/promqltest/testdata/staleness.test +++ b/promql/promqltest/testdata/staleness.test @@ -14,10 +14,10 @@ eval instant at 40s metric {__name__="metric"} 2 # It goes stale 5 minutes after the last sample. -eval instant at 330s metric +eval instant at 329s metric {__name__="metric"} 2 -eval instant at 331s metric +eval instant at 330s metric # Range vector ignores stale sample. @@ -30,9 +30,13 @@ eval instant at 10s count_over_time(metric[1s]) eval instant at 20s count_over_time(metric[1s]) eval instant at 20s count_over_time(metric[10s]) + +eval instant at 20s count_over_time(metric[20s]) {} 1 eval instant at 20s count_over_time(metric[10]) + +eval instant at 20s count_over_time(metric[20]) {} 1 @@ -48,7 +52,7 @@ eval instant at 0s metric eval instant at 150s metric {__name__="metric"} 0 -eval instant at 300s metric +eval instant at 299s metric {__name__="metric"} 0 -eval instant at 301s metric +eval instant at 300s metric diff --git a/promql/promqltest/testdata/subquery.test b/promql/promqltest/testdata/subquery.test index 1d338d9764..3ac547a2b5 100644 --- a/promql/promqltest/testdata/subquery.test +++ b/promql/promqltest/testdata/subquery.test @@ -10,18 +10,18 @@ eval instant at 10s sum_over_time(metric[50s:5s]) # Every evaluation yields the last value, i.e. 2 eval instant at 5m sum_over_time(metric[50s:10s]) - {} 12 + {} 10 # Series becomes stale at 5m10s (5m after last sample) -# Hence subquery gets a single sample at 6m-50s=5m10s. -eval instant at 6m sum_over_time(metric[50s:10s]) +# Hence subquery gets a single sample at 5m10s. +eval instant at 5m59s sum_over_time(metric[60s:10s]) {} 2 eval instant at 10s rate(metric[20s:10s]) {} 0.1 eval instant at 20s rate(metric[20s:5s]) - {} 0.05 + {} 0.06666666666666667 clear @@ -49,16 +49,16 @@ load 10s metric3 0+3x1000 eval instant at 1000s sum_over_time(metric1[30s:10s]) - {} 394 + {} 297 -# This is (394*2 - 100), because other than the last 100 at 1000s, +# This is (97 + 98*2 + 99*2 + 100), because other than 97@975s and 100@1000s, # everything else is repeated with the 5s step. eval instant at 1000s sum_over_time(metric1[30s:5s]) - {} 688 + {} 591 -# Offset is aligned with the step. +# Offset is aligned with the step, so this is from [98@980s, 99@990s, 100@1000s]. eval instant at 1010s sum_over_time(metric1[30s:10s] offset 10s) - {} 394 + {} 297 # Same result for different offsets due to step alignment. eval instant at 1010s sum_over_time(metric1[30s:10s] offset 9s) @@ -93,16 +93,16 @@ eval instant at 1010s sum_over_time((metric1)[30:10] offset 3) # Nested subqueries eval instant at 1000s rate(sum_over_time(metric1[30s:10s])[50s:10s]) - {} 0.4 + {} 0.30000000000000004 eval instant at 1000s rate(sum_over_time(metric2[30s:10s])[50s:10s]) - {} 0.8 + {} 0.6000000000000001 eval instant at 1000s rate(sum_over_time(metric3[30s:10s])[50s:10s]) - {} 1.2 + {} 0.9 eval instant at 1000s rate(sum_over_time((metric1+metric2+metric3)[30s:10s])[30s:10s]) - {} 2.4 + {} 1.8 clear @@ -115,16 +115,20 @@ load 7s eval instant at 80s rate(metric[1m]) {} 2.517857143 -# No extrapolation, [2@20, 144@80]: (144 - 2) / 60 -eval instant at 80s rate(metric[1m:10s]) - {} 2.366666667 +# Extrapolated to range start for counter, [2@20, 144@80]: (144 - 2) / (80 - 20) +eval instant at 80s rate(metric[1m500ms:10s]) + {} 2.3666666666666667 + +# Extrapolated to zero value for counter, [2@20, 144@80]: (144 - 0) / 61 +eval instant at 80s rate(metric[1m1s:10s]) + {} 2.360655737704918 # Only one value between 10s and 20s, 2@14 eval instant at 20s min_over_time(metric[10s]) {} 2 -# min(1@10, 2@20) -eval instant at 20s min_over_time(metric[10s:10s]) +# min(2@20) +eval instant at 20s min_over_time(metric[15s:10s]) {} 1 eval instant at 20m min_over_time(rate(metric[5m])[20m:1m]) diff --git a/promql/promqltest/testdata/trig_functions.test b/promql/promqltest/testdata/trig_functions.test index fa5f94651b..036621193d 100644 --- a/promql/promqltest/testdata/trig_functions.test +++ b/promql/promqltest/testdata/trig_functions.test @@ -5,92 +5,92 @@ load 5m trig{l="y"} 20 trig{l="NaN"} NaN -eval instant at 5m sin(trig) +eval instant at 1m sin(trig) {l="x"} -0.5440211108893699 {l="y"} 0.9129452507276277 {l="NaN"} NaN -eval instant at 5m cos(trig) +eval instant at 1m cos(trig) {l="x"} -0.8390715290764524 {l="y"} 0.40808206181339196 {l="NaN"} NaN -eval instant at 5m tan(trig) +eval instant at 1m tan(trig) {l="x"} 0.6483608274590867 {l="y"} 2.2371609442247427 {l="NaN"} NaN -eval instant at 5m asin(trig - 10.1) +eval instant at 1m asin(trig - 10.1) {l="x"} -0.10016742116155944 {l="y"} NaN {l="NaN"} NaN -eval instant at 5m acos(trig - 10.1) +eval instant at 1m acos(trig - 10.1) {l="x"} 1.670963747956456 {l="y"} NaN {l="NaN"} NaN -eval instant at 5m atan(trig) +eval instant at 1m atan(trig) {l="x"} 1.4711276743037345 {l="y"} 1.5208379310729538 {l="NaN"} NaN -eval instant at 5m sinh(trig) +eval instant at 1m sinh(trig) {l="x"} 11013.232920103324 {l="y"} 2.4258259770489514e+08 {l="NaN"} NaN -eval instant at 5m cosh(trig) +eval instant at 1m cosh(trig) {l="x"} 11013.232920103324 {l="y"} 2.4258259770489514e+08 {l="NaN"} NaN -eval instant at 5m tanh(trig) +eval instant at 1m tanh(trig) {l="x"} 0.9999999958776927 {l="y"} 1 {l="NaN"} NaN -eval instant at 5m asinh(trig) +eval instant at 1m asinh(trig) {l="x"} 2.99822295029797 {l="y"} 3.6895038689889055 {l="NaN"} NaN -eval instant at 5m acosh(trig) +eval instant at 1m acosh(trig) {l="x"} 2.993222846126381 {l="y"} 3.6882538673612966 {l="NaN"} NaN -eval instant at 5m atanh(trig - 10.1) +eval instant at 1m atanh(trig - 10.1) {l="x"} -0.10033534773107522 {l="y"} NaN {l="NaN"} NaN -eval instant at 5m rad(trig) +eval instant at 1m rad(trig) {l="x"} 0.17453292519943295 {l="y"} 0.3490658503988659 {l="NaN"} NaN -eval instant at 5m rad(trig - 10) +eval instant at 1m rad(trig - 10) {l="x"} 0 {l="y"} 0.17453292519943295 {l="NaN"} NaN -eval instant at 5m rad(trig - 20) +eval instant at 1m rad(trig - 20) {l="x"} -0.17453292519943295 {l="y"} 0 {l="NaN"} NaN -eval instant at 5m deg(trig) +eval instant at 1m deg(trig) {l="x"} 572.9577951308232 {l="y"} 1145.9155902616465 {l="NaN"} NaN -eval instant at 5m deg(trig - 10) +eval instant at 1m deg(trig - 10) {l="x"} 0 {l="y"} 572.9577951308232 {l="NaN"} NaN -eval instant at 5m deg(trig - 20) +eval instant at 1m deg(trig - 20) {l="x"} -572.9577951308232 {l="y"} 0 {l="NaN"} NaN diff --git a/promql/quantile.go b/promql/quantile.go index 7ddb76acba..06775d3ae6 100644 --- a/promql/quantile.go +++ b/promql/quantile.go @@ -153,19 +153,31 @@ func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) { // histogramQuantile calculates the quantile 'q' based on the given histogram. // -// The quantile value is interpolated assuming a linear distribution within a -// bucket. -// TODO(beorn7): Find an interpolation method that is a better fit for -// exponential buckets (and think about configurable interpolation). +// For custom buckets, the result is interpolated linearly, i.e. it is assumed +// the observations are uniformly distributed within each bucket. (This is a +// quite blunt assumption, but it is consistent with the interpolation method +// used for classic histograms so far.) +// +// For exponential buckets, the interpolation is done under the assumption that +// the samples within each bucket are distributed in a way that they would +// uniformly populate the buckets in a hypothetical histogram with higher +// resolution. For example, if the rank calculation suggests that the requested +// quantile is right in the middle of the population of the (1,2] bucket, we +// assume the quantile would be right at the bucket boundary between the two +// buckets the (1,2] bucket would be divided into if the histogram had double +// the resolution, which is 2**2**-1 = 1.4142... We call this exponential +// interpolation. +// +// However, for a quantile that ends up in the zero bucket, this method isn't +// very helpful (because there is an infinite number of buckets close to zero, +// so we would have to assume zero as the result). Therefore, we return to +// linear interpolation in the zero bucket. // // A natural lower bound of 0 is assumed if the histogram has only positive // buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has // only negative buckets. -// TODO(beorn7): Come to terms if we want that. // -// There are a number of special cases (once we have a way to report errors -// happening during evaluations of AST functions, we should report those -// explicitly): +// There are a number of special cases: // // If the histogram has 0 observations, NaN is returned. // @@ -193,9 +205,9 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { rank float64 ) - // if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator - // if the q < 0.5, use the forward iterator - // if the q >= 0.5, use the reverse iterator + // If there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator. + // If q < 0.5, use the forward iterator. + // If q >= 0.5, use the reverse iterator. if math.IsNaN(h.Sum) || q < 0.5 { it = h.AllBucketIterator() rank = q * h.Count @@ -260,8 +272,29 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { rank = count - rank } - // TODO(codesome): Use a better estimation than linear. - return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count) + // The fraction of how far we are into the current bucket. + fraction := rank / bucket.Count + + // Return linear interpolation for custom buckets and for quantiles that + // end up in the zero bucket. + if h.UsesCustomBuckets() || (bucket.Lower <= 0 && bucket.Upper >= 0) { + return bucket.Lower + (bucket.Upper-bucket.Lower)*fraction + } + + // For exponential buckets, we interpolate on a logarithmic scale. On a + // logarithmic scale, the exponential bucket boundaries (for any schema) + // become linear (every bucket has the same width). Therefore, after + // taking the logarithm of both bucket boundaries, we can use the + // calculated fraction in the same way as for linear interpolation (see + // above). Finally, we return to the normal scale by applying the + // exponential function to the result. + logLower := math.Log2(math.Abs(bucket.Lower)) + logUpper := math.Log2(math.Abs(bucket.Upper)) + if bucket.Lower > 0 { // Positive bucket. + return math.Exp2(logLower + (logUpper-logLower)*fraction) + } + // Otherwise, we are in a negative bucket and have to mirror things. + return -math.Exp2(logUpper + (logLower-logUpper)*(1-fraction)) } // histogramFraction calculates the fraction of observations between the @@ -271,8 +304,8 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { // histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h) // returns 0.9. // -// The same notes (and TODOs) with regard to interpolation and assumptions about -// the zero bucket boundaries apply as for histogramQuantile. +// The same notes with regard to interpolation and assumptions about the zero +// bucket boundaries apply as for histogramQuantile. // // Whether either boundary is inclusive or exclusive doesn’t actually matter as // long as interpolation has to be performed anyway. In the case of a boundary @@ -310,7 +343,35 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 ) for it.Next() { b := it.At() - if b.Lower < 0 && b.Upper > 0 { + zeroBucket := false + + // interpolateLinearly is used for custom buckets to be + // consistent with the linear interpolation known from classic + // histograms. It is also used for the zero bucket. + interpolateLinearly := func(v float64) float64 { + return rank + b.Count*(v-b.Lower)/(b.Upper-b.Lower) + } + + // interpolateExponentially is using the same exponential + // interpolation method as above for histogramQuantile. This + // method is a better fit for exponential bucketing. + interpolateExponentially := func(v float64) float64 { + var ( + logLower = math.Log2(math.Abs(b.Lower)) + logUpper = math.Log2(math.Abs(b.Upper)) + logV = math.Log2(math.Abs(v)) + fraction float64 + ) + if v > 0 { + fraction = (logV - logLower) / (logUpper - logLower) + } else { + fraction = 1 - ((logV - logUpper) / (logLower - logUpper)) + } + return rank + b.Count*fraction + } + + if b.Lower <= 0 && b.Upper >= 0 { + zeroBucket = true switch { case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0: // This is the zero bucket and the histogram has only @@ -325,10 +386,12 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 } } if !lowerSet && b.Lower >= lower { + // We have hit the lower value at the lower bucket boundary. lowerRank = rank lowerSet = true } if !upperSet && b.Lower >= upper { + // We have hit the upper value at the lower bucket boundary. upperRank = rank upperSet = true } @@ -336,11 +399,21 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 break } if !lowerSet && b.Lower < lower && b.Upper > lower { - lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower) + // The lower value is in this bucket. + if h.UsesCustomBuckets() || zeroBucket { + lowerRank = interpolateLinearly(lower) + } else { + lowerRank = interpolateExponentially(lower) + } lowerSet = true } if !upperSet && b.Lower < upper && b.Upper > upper { - upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower) + // The upper value is in this bucket. + if h.UsesCustomBuckets() || zeroBucket { + upperRank = interpolateLinearly(upper) + } else { + upperRank = interpolateExponentially(upper) + } upperSet = true } if lowerSet && upperSet { diff --git a/promql/query_logger.go b/promql/query_logger.go index 7e06ebb97f..c0a70b66d7 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "strings" @@ -26,14 +27,12 @@ import ( "unicode/utf8" "github.com/edsrzf/mmap-go" - "github.com/go-kit/log" - "github.com/go-kit/log/level" ) type ActiveQueryTracker struct { - mmapedFile []byte + mmappedFile []byte getNextIndex chan int - logger log.Logger + logger *slog.Logger closer io.Closer maxConcurrent int } @@ -63,11 +62,11 @@ func parseBrokenJSON(brokenJSON []byte) (string, bool) { return queries, true } -func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { +func logUnfinishedQueries(filename string, filesize int, logger *slog.Logger) { if _, err := os.Stat(filename); err == nil { fd, err := os.Open(filename) if err != nil { - level.Error(logger).Log("msg", "Failed to open query log file", "err", err) + logger.Error("Failed to open query log file", "err", err) return } defer fd.Close() @@ -75,7 +74,7 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { brokenJSON := make([]byte, filesize) _, err = fd.Read(brokenJSON) if err != nil { - level.Error(logger).Log("msg", "Failed to read query log file", "err", err) + logger.Error("Failed to read query log file", "err", err) return } @@ -83,72 +82,72 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { if !queriesExist { return } - level.Info(logger).Log("msg", "These queries didn't finish in prometheus' last run:", "queries", queries) + logger.Info("These queries didn't finish in prometheus' last run:", "queries", queries) } } -type mmapedFile struct { +type mmappedFile struct { f io.Closer m mmap.MMap } -func (f *mmapedFile) Close() error { +func (f *mmappedFile) Close() error { err := f.m.Unmap() if err != nil { - err = fmt.Errorf("mmapedFile: unmapping: %w", err) + err = fmt.Errorf("mmappedFile: unmapping: %w", err) } if fErr := f.f.Close(); fErr != nil { - return errors.Join(fmt.Errorf("close mmapedFile.f: %w", fErr), err) + return errors.Join(fmt.Errorf("close mmappedFile.f: %w", fErr), err) } return err } -func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io.Closer, error) { +func getMMappedFile(filename string, filesize int, logger *slog.Logger) ([]byte, io.Closer, error) { file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o666) if err != nil { absPath, pathErr := filepath.Abs(filename) if pathErr != nil { absPath = filename } - level.Error(logger).Log("msg", "Error opening query log file", "file", absPath, "err", err) + logger.Error("Error opening query log file", "file", absPath, "err", err) return nil, nil, err } err = file.Truncate(int64(filesize)) if err != nil { file.Close() - level.Error(logger).Log("msg", "Error setting filesize.", "filesize", filesize, "err", err) + logger.Error("Error setting filesize.", "filesize", filesize, "err", err) return nil, nil, err } fileAsBytes, err := mmap.Map(file, mmap.RDWR, 0) if err != nil { file.Close() - level.Error(logger).Log("msg", "Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) + logger.Error("Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) return nil, nil, err } - return fileAsBytes, &mmapedFile{f: file, m: fileAsBytes}, err + return fileAsBytes, &mmappedFile{f: file, m: fileAsBytes}, err } -func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger log.Logger) *ActiveQueryTracker { +func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger *slog.Logger) *ActiveQueryTracker { err := os.MkdirAll(localStoragePath, 0o777) if err != nil { - level.Error(logger).Log("msg", "Failed to create directory for logging active queries") + logger.Error("Failed to create directory for logging active queries") } filename, filesize := filepath.Join(localStoragePath, "queries.active"), 1+maxConcurrent*entrySize logUnfinishedQueries(filename, filesize, logger) - fileAsBytes, closer, err := getMMapedFile(filename, filesize, logger) + fileAsBytes, closer, err := getMMappedFile(filename, filesize, logger) if err != nil { panic("Unable to create mmap-ed active query log") } copy(fileAsBytes, "[") activeQueryTracker := ActiveQueryTracker{ - mmapedFile: fileAsBytes, + mmappedFile: fileAsBytes, closer: closer, getNextIndex: make(chan int, maxConcurrent), logger: logger, @@ -174,18 +173,18 @@ func trimStringByBytes(str string, size int) string { return string(bytesStr[:trimIndex]) } -func _newJSONEntry(query string, timestamp int64, logger log.Logger) []byte { +func _newJSONEntry(query string, timestamp int64, logger *slog.Logger) []byte { entry := Entry{query, timestamp} jsonEntry, err := json.Marshal(entry) if err != nil { - level.Error(logger).Log("msg", "Cannot create json of query", "query", query) + logger.Error("Cannot create json of query", "query", query) return []byte{} } return jsonEntry } -func newJSONEntry(query string, logger log.Logger) []byte { +func newJSONEntry(query string, logger *slog.Logger) []byte { timestamp := time.Now().Unix() minEntryJSON := _newJSONEntry("", timestamp, logger) @@ -206,14 +205,14 @@ func (tracker ActiveQueryTracker) GetMaxConcurrent() int { } func (tracker ActiveQueryTracker) Delete(insertIndex int) { - copy(tracker.mmapedFile[insertIndex:], strings.Repeat("\x00", entrySize)) + copy(tracker.mmappedFile[insertIndex:], strings.Repeat("\x00", entrySize)) tracker.getNextIndex <- insertIndex } func (tracker ActiveQueryTracker) Insert(ctx context.Context, query string) (int, error) { select { case i := <-tracker.getNextIndex: - fileBytes := tracker.mmapedFile + fileBytes := tracker.mmappedFile entry := newJSONEntry(query, tracker.logger) start, end := i, i+entrySize diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 7bd93781ec..eb06e513ef 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -26,7 +26,7 @@ import ( func TestQueryLogging(t *testing.T) { fileAsBytes := make([]byte, 4096) queryLogger := ActiveQueryTracker{ - mmapedFile: fileAsBytes, + mmappedFile: fileAsBytes, logger: nil, getNextIndex: make(chan int, 4), } @@ -70,7 +70,7 @@ func TestQueryLogging(t *testing.T) { func TestIndexReuse(t *testing.T) { queryBytes := make([]byte, 1+3*entrySize) queryLogger := ActiveQueryTracker{ - mmapedFile: queryBytes, + mmappedFile: queryBytes, logger: nil, getNextIndex: make(chan int, 3), } @@ -106,10 +106,10 @@ func TestIndexReuse(t *testing.T) { func TestMMapFile(t *testing.T) { dir := t.TempDir() - fpath := filepath.Join(dir, "mmapedFile") + fpath := filepath.Join(dir, "mmappedFile") const data = "ab" - fileAsBytes, closer, err := getMMapedFile(fpath, 2, nil) + fileAsBytes, closer, err := getMMappedFile(fpath, 2, nil) require.NoError(t, err) copy(fileAsBytes, data) require.NoError(t, closer.Close()) diff --git a/promql/value.go b/promql/value.go index f25dbcd780..f19c0b5b58 100644 --- a/promql/value.go +++ b/promql/value.go @@ -526,7 +526,7 @@ func (ssi *storageSeriesIterator) Next() chunkenc.ValueType { ssi.currH = p.H return chunkenc.ValFloatHistogram default: - panic("storageSeriesIterater.Next failed to pick value type") + panic("storageSeriesIterator.Next failed to pick value type") } } diff --git a/rules/alerting.go b/rules/alerting.go index 038c49a697..b94f3c5ff5 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -16,13 +16,12 @@ package rules import ( "context" "fmt" + "log/slog" "net/url" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -141,7 +140,7 @@ type AlertingRule struct { // the fingerprint of the labelset they correspond to. active map[uint64]*Alert - logger log.Logger + logger *slog.Logger noDependentRules *atomic.Bool noDependencyRules *atomic.Bool @@ -151,7 +150,7 @@ type AlertingRule struct { func NewAlertingRule( name string, vec parser.Expr, hold, keepFiringFor time.Duration, labels, annotations, externalLabels labels.Labels, externalURL string, - restored bool, logger log.Logger, + restored bool, logger *slog.Logger, ) *AlertingRule { el := externalLabels.Map() @@ -381,7 +380,7 @@ func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts t result, err := tmpl.Expand() if err != nil { result = fmt.Sprintf("", err) - level.Warn(r.logger).Log("msg", "Expanding alert template failed", "err", err, "data", tmplData) + r.logger.Warn("Expanding alert template failed", "err", err, "data", tmplData) } return result } diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 67d683c851..f0aa339cc7 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -19,8 +19,8 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -276,7 +276,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) ruleWithExternalLabels := NewAlertingRule( "ExternalLabelExists", @@ -287,7 +287,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { labels.EmptyLabels(), labels.FromStrings("foo", "bar", "dings", "bums"), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -371,7 +371,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) ruleWithExternalURL := NewAlertingRule( "ExternalURLExists", @@ -382,7 +382,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "http://localhost:1234", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -466,7 +466,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -527,7 +527,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; `), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) evalTime := time.Unix(0, 0) @@ -607,7 +607,7 @@ func TestAlertingRuleDuplicate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) @@ -651,7 +651,7 @@ func TestAlertingRuleLimit(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) evalTime := time.Unix(0, 0) @@ -779,7 +779,7 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) { }, }, } - nm := notifier.NewManager(&opts, log.NewNopLogger()) + nm := notifier.NewManager(&opts, promslog.NewNopLogger()) f := SendAlerts(nm, "") notifyFunc := func(ctx context.Context, expr string, alerts ...*Alert) { @@ -986,7 +986,7 @@ func TestAlertingEvalWithOrigin(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { @@ -1008,7 +1008,7 @@ func TestAlertingRule_SetNoDependentRules(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) require.False(t, rule.NoDependentRules()) @@ -1029,7 +1029,7 @@ func TestAlertingRule_SetNoDependencyRules(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) require.False(t, rule.NoDependencyRules()) diff --git a/rules/fixtures/rules1.yaml b/rules/fixtures/rules1.yaml new file mode 100644 index 0000000000..76fbf71f3b --- /dev/null +++ b/rules/fixtures/rules1.yaml @@ -0,0 +1,5 @@ +groups: + - name: test_1 + rules: + - record: test_2 + expr: vector(2) diff --git a/rules/group.go b/rules/group.go index 539149da4c..865e7dcc28 100644 --- a/rules/group.go +++ b/rules/group.go @@ -16,9 +16,9 @@ package rules import ( "context" "errors" + "log/slog" "math" "slices" - "sort" "strings" "sync" "time" @@ -27,10 +27,9 @@ import ( "github.com/prometheus/prometheus/promql/parser" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" @@ -67,7 +66,7 @@ type Group struct { terminated chan struct{} managerDone chan struct{} - logger log.Logger + logger *slog.Logger metrics *Metrics @@ -76,8 +75,8 @@ type Group struct { evalIterationFunc GroupEvalIterationFunc // concurrencyController controls the rules evaluation concurrency. - concurrencyController RuleConcurrencyController - + concurrencyController RuleConcurrencyController + appOpts *storage.AppendOptions alignEvaluationTimeOnInterval bool } @@ -130,25 +129,29 @@ func NewGroup(o GroupOptions) *Group { concurrencyController = sequentialRuleEvalController{} } - return &Group{ - name: o.Name, - file: o.File, - interval: o.Interval, - queryOffset: o.QueryOffset, - limit: o.Limit, - rules: o.Rules, - shouldRestore: o.ShouldRestore, - opts: o.Opts, - sourceTenants: o.SourceTenants, - seriesInPreviousEval: make([]map[string]labels.Labels, len(o.Rules)), - done: make(chan struct{}), - managerDone: o.done, - terminated: make(chan struct{}), - logger: log.With(o.Opts.Logger, "file", o.File, "group", o.Name), - metrics: metrics, - evalIterationFunc: evalIterationFunc, - concurrencyController: concurrencyController, + if o.Opts.Logger == nil { + promslog.NewNopLogger() + } + return &Group{ + name: o.Name, + file: o.File, + interval: o.Interval, + queryOffset: o.QueryOffset, + limit: o.Limit, + rules: o.Rules, + shouldRestore: o.ShouldRestore, + opts: o.Opts, + sourceTenants: o.SourceTenants, + seriesInPreviousEval: make([]map[string]labels.Labels, len(o.Rules)), + done: make(chan struct{}), + managerDone: o.done, + terminated: make(chan struct{}), + logger: o.Opts.Logger.With("file", o.File, "group", o.Name), + metrics: metrics, + evalIterationFunc: evalIterationFunc, + concurrencyController: concurrencyController, + appOpts: &storage.AppendOptions{DiscardOutOfOrder: true}, alignEvaluationTimeOnInterval: o.AlignEvaluationTimeOnInterval, } } @@ -197,7 +200,7 @@ func matchesMatcherSets(matcherSets [][]*labels.Matcher, lbls labels.Labels) boo return ok } -// Queryable returns the group's querable. +// Queryable returns the group's queryable. func (g *Group) Queryable() storage.Queryable { return g.opts.Queryable } // Context returns the group's context. @@ -213,7 +216,7 @@ func (g *Group) Limit() int { return g.limit } // If it's empty or nil, then the owning user/tenant is considered to be the source tenant. func (g *Group) SourceTenants() []string { return g.sourceTenants } -func (g *Group) Logger() log.Logger { return g.logger } +func (g *Group) Logger() *slog.Logger { return g.logger } func (g *Group) run(ctx context.Context) { defer close(g.terminated) @@ -285,7 +288,7 @@ func (g *Group) run(ctx context.Context) { g.RestoreForState(restoreStartTime) totalRestoreTimeSeconds := time.Since(restoreStartTime).Seconds() g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(totalRestoreTimeSeconds) - level.Debug(g.logger).Log("msg", "'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) + g.logger.Debug("'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) g.shouldRestore = false } @@ -514,7 +517,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { defer cleanup() } - logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i) + logger := g.logger.With("name", rule.Name(), "index", i) ctx, sp := otel.Tracer("").Start(ctx, "rule") sp.SetAttributes(attribute.String("name", rule.Name())) defer func(t time.Time) { @@ -527,7 +530,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { }(time.Now()) if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() { - logger = log.WithPrefix(logger, "trace_id", sp.SpanContext().TraceID()) + logger = logger.With("trace_id", sp.SpanContext().TraceID()) } g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() @@ -543,7 +546,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { // happens on shutdown and thus we skip logging of any errors here. var eqc promql.ErrQueryCanceled if !errors.As(err, &eqc) { - level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err) + logger.Warn("Evaluating rule failed", "rule", rule, "err", err) } return } @@ -569,7 +572,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { sp.SetStatus(codes.Error, err.Error()) g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() - level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err) + logger.Warn("Rule sample appending failed", "err", err) return } g.seriesInPreviousEval[i] = seriesReturned @@ -579,6 +582,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { if s.H != nil { _, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H) } else { + app.SetOptions(g.appOpts) _, err = app.Append(0, s.Metric, s.T, s.F) } @@ -593,15 +597,15 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { switch { case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample): numOutOfOrder++ - level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Warn("Rule evaluation result discarded", "err", err, "sample", s) case errors.Is(unwrappedErr, storage.ErrTooOldSample): numTooOld++ - level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Warn("Rule evaluation result discarded", "err", err, "sample", s) case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp): numDuplicates++ - level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Warn("Rule evaluation result discarded", "err", err, "sample", s) default: - level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Warn("Rule evaluation result discarded", "err", err, "sample", s) } } else { buf := [1024]byte{} @@ -609,13 +613,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { } } if numOutOfOrder > 0 { - level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "num_dropped", numOutOfOrder) + logger.Warn("Error on ingesting out-of-order result from rule evaluation", "num_dropped", numOutOfOrder) } if numTooOld > 0 { - level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "num_dropped", numTooOld) + logger.Warn("Error on ingesting too old result from rule evaluation", "num_dropped", numTooOld) } if numDuplicates > 0 { - level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "num_dropped", numDuplicates) + logger.Warn("Error on ingesting results from rule evaluation with different value but same timestamp", "num_dropped", numDuplicates) } for metric, lset := range g.seriesInPreviousEval[i] { @@ -634,7 +638,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { // Do not count these in logging, as this is expected if series // is exposed from a different rule. default: - level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err) + logger.Warn("Adding stale sample failed", "sample", lset.String(), "err", err) } } } @@ -675,6 +679,7 @@ func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { return } app := g.opts.Appendable.Appender(ctx) + app.SetOptions(g.appOpts) queryOffset := g.QueryOffset() for _, s := range g.staleSeries { // Rule that produced series no longer configured, mark it stale. @@ -691,11 +696,11 @@ func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { // Do not count these in logging, as this is expected if series // is exposed from a different rule. default: - level.Warn(g.logger).Log("msg", "Adding stale sample for previous configuration failed", "sample", s, "err", err) + g.logger.Warn("Adding stale sample for previous configuration failed", "sample", s, "err", err) } } if err := app.Commit(); err != nil { - level.Warn(g.logger).Log("msg", "Stale sample appending for previous configuration failed", "err", err) + g.logger.Warn("Stale sample appending for previous configuration failed", "err", err) } else { g.staleSeries = nil } @@ -710,12 +715,12 @@ func (g *Group) RestoreForState(ts time.Time) { mintMS := int64(model.TimeFromUnixNano(mint.UnixNano())) q, err := g.opts.Queryable.Querier(mintMS, maxtMS) if err != nil { - level.Error(g.logger).Log("msg", "Failed to get Querier", "err", err) + g.logger.Error("Failed to get Querier", "err", err) return } defer func() { if err := q.Close(); err != nil { - level.Error(g.logger).Log("msg", "Failed to close Querier", "err", err) + g.logger.Error("Failed to close Querier", "err", err) } }() @@ -736,8 +741,8 @@ func (g *Group) RestoreForState(ts time.Time) { sset, err := alertRule.QueryForStateSeries(g.opts.Context, q) if err != nil { - level.Error(g.logger).Log( - "msg", "Failed to restore 'for' state", + g.logger.Error( + "Failed to restore 'for' state", labels.AlertName, alertRule.Name(), "stage", "Select", "err", err, @@ -756,7 +761,7 @@ func (g *Group) RestoreForState(ts time.Time) { // No results for this alert rule. if len(seriesByLabels) == 0 { - level.Debug(g.logger).Log("msg", "No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) + g.logger.Debug("No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) alertRule.SetRestored(true) continue } @@ -776,7 +781,7 @@ func (g *Group) RestoreForState(ts time.Time) { t, v = it.At() } if it.Err() != nil { - level.Error(g.logger).Log("msg", "Failed to restore 'for' state", + g.logger.Error("Failed to restore 'for' state", labels.AlertName, alertRule.Name(), "stage", "Iterator", "err", it.Err()) return } @@ -818,7 +823,7 @@ func (g *Group) RestoreForState(ts time.Time) { } a.ActiveAt = restoredActiveAt - level.Debug(g.logger).Log("msg", "'for' state restored", + g.logger.Debug("'for' state restored", labels.AlertName, alertRule.Name(), "restored_time", a.ActiveAt.Format(time.RFC850), "labels", a.Labels.String()) }) @@ -871,7 +876,7 @@ func (g *Group) Equals(ng *Group) bool { copyAndSort := func(x []string) []string { copied := make([]string, len(x)) copy(copied, x) - sort.Strings(copied) + slices.Sort(copied) return copied } diff --git a/rules/manager.go b/rules/manager.go index a2421a4d2d..b5bb015116 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -17,15 +17,15 @@ import ( "context" "errors" "fmt" + "log/slog" "net/url" "slices" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "golang.org/x/sync/semaphore" "github.com/prometheus/prometheus/model/labels" @@ -96,7 +96,7 @@ type Manager struct { done chan struct{} restored bool - logger log.Logger + logger *slog.Logger } // NotifyFunc sends notifications about a set of alerts generated by the given expression. @@ -112,7 +112,7 @@ type ManagerOptions struct { Context context.Context Appendable storage.Appendable Queryable storage.Queryable - Logger log.Logger + Logger *slog.Logger Registerer prometheus.Registerer OutageTolerance time.Duration ForGracePeriod time.Duration @@ -158,6 +158,10 @@ func NewManager(o *ManagerOptions) *Manager { o.RuleDependencyController = ruleDependencyController{} } + if o.Logger == nil { + o.Logger = promslog.NewNopLogger() + } + m := &Manager{ groups: map[string]*Group{}, opts: o, @@ -171,7 +175,7 @@ func NewManager(o *ManagerOptions) *Manager { // Run starts processing of the rule manager. It is blocking. func (m *Manager) Run() { - level.Info(m.logger).Log("msg", "Starting rule manager...") + m.logger.Info("Starting rule manager...") m.start() <-m.done } @@ -185,7 +189,7 @@ func (m *Manager) Stop() { m.mtx.Lock() defer m.mtx.Unlock() - level.Info(m.logger).Log("msg", "Stopping rule manager...") + m.logger.Info("Stopping rule manager...") for _, eg := range m.groups { eg.stop() @@ -195,7 +199,7 @@ func (m *Manager) Stop() { // staleness markers. close(m.done) - level.Info(m.logger).Log("msg", "Rule manager stopped") + m.logger.Info("Rule manager stopped") } // Update the rule manager's state as the config requires. If @@ -216,7 +220,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels if errs != nil { for _, e := range errs { - level.Error(m.logger).Log("msg", "loading groups failed", "err", e) + m.logger.Error("loading groups failed", "err", e) } return errors.New("error loading rules, previous rule set restored") } @@ -327,25 +331,27 @@ func (m *Manager) LoadGroups( return nil, []error{fmt.Errorf("%s: %w", fn, err)} } + mLabels := FromMaps(rg.Labels, r.Labels) + if r.Alert.Value != "" { rules = append(rules, NewAlertingRule( r.Alert.Value, expr, time.Duration(r.For), time.Duration(r.KeepFiringFor), - labels.FromMap(r.Labels), + mLabels, labels.FromMap(r.Annotations), externalLabels, externalURL, !shouldRestore, - log.With(m.logger, "alert", r.Alert), + m.logger.With("alert", r.Alert), )) continue } rules = append(rules, NewRecordingRule( r.Record.Value, expr, - labels.FromMap(r.Labels), + mLabels, )) } @@ -530,3 +536,16 @@ func (c sequentialRuleEvalController) Allow(_ context.Context, _ *Group, _ Rule) } func (c sequentialRuleEvalController) Done(_ context.Context) {} + +// FromMaps returns new sorted Labels from the given maps, overriding each other in order. +func FromMaps(maps ...map[string]string) labels.Labels { + mLables := make(map[string]string) + + for _, m := range maps { + for k, v := range m { + mLables[k] = v + } + } + + return labels.FromMap(mLables) +} diff --git a/rules/manager_test.go b/rules/manager_test.go index c30d12eefe..bafcf2dc16 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -26,10 +26,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -374,7 +374,7 @@ func TestForStateRestore(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, OutageTolerance: 30 * time.Minute, ForGracePeriod: 10 * time.Minute, @@ -547,7 +547,7 @@ func TestStaleness(t *testing.T) { Appendable: st, Queryable: st, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("a + 1") @@ -641,7 +641,7 @@ groups: require.NoError(t, err) m := NewManager(&ManagerOptions{ - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), DefaultRuleQueryOffset: func() time.Duration { return time.Minute }, @@ -781,7 +781,7 @@ func TestUpdate(t *testing.T) { Queryable: st, QueryFunc: EngineQueryFunc(engine, st), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() defer ruleManager.Stop() @@ -862,7 +862,7 @@ func TestUpdate_AlwaysRestore(t *testing.T) { Appendable: st, Queryable: st, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), AlwaysRestoreAlertState: true, }) ruleManager.start() @@ -894,7 +894,7 @@ func TestUpdate_AlwaysRestoreDoesntAffectUnchangedGroups(t *testing.T) { Appendable: st, Queryable: st, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), AlwaysRestoreAlertState: true, }) ruleManager.start() @@ -933,7 +933,7 @@ func TestUpdateSetsSourceTenants(t *testing.T) { Queryable: st, QueryFunc: EngineQueryFunc(engine, st), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() defer ruleManager.Stop() @@ -975,7 +975,7 @@ func TestAlignEvaluationTimeOnInterval(t *testing.T) { Queryable: st, QueryFunc: EngineQueryFunc(engine, st), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() defer ruleManager.Stop() @@ -1047,7 +1047,7 @@ func TestGroupEvaluationContextFuncIsCalledWhenSupplied(t *testing.T) { Queryable: st, QueryFunc: mockQueryFunc, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), GroupEvaluationContextFunc: mockContextWrapFunc, }) @@ -1080,12 +1080,13 @@ type ruleGroupsTest struct { // ruleGroupTest forms a testing struct for running tests over rules. type ruleGroupTest struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []rulefmt.Rule `yaml:"rules"` - SourceTenants []string `yaml:"source_tenants,omitempty"` - AlignEvaluationTimeOnInterval bool `yaml:"align_evaluation_time_on_interval,omitempty"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []rulefmt.Rule `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` + SourceTenants []string `yaml:"source_tenants,omitempty"` + AlignEvaluationTimeOnInterval bool `yaml:"align_evaluation_time_on_interval,omitempty"` } func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { @@ -1108,6 +1109,7 @@ func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { Interval: g.Interval, Limit: g.Limit, Rules: rtmp, + Labels: g.Labels, SourceTenants: g.SourceTenants, AlignEvaluationTimeOnInterval: g.AlignEvaluationTimeOnInterval, }) @@ -1160,14 +1162,14 @@ func TestNotify(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: notifyFunc, ResendDelay: 2 * time.Second, } expr, err := parser.ParseExpr("a > 1") require.NoError(t, err) - rule := NewAlertingRule("aTooHigh", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule := NewAlertingRule("aTooHigh", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) group := NewGroup(GroupOptions{ Name: "alert", Interval: time.Second, @@ -1231,7 +1233,7 @@ func TestMetricsUpdate(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Registerer: registry, }) ruleManager.start() @@ -1305,7 +1307,7 @@ func TestGroupStalenessOnRemoval(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) var stopped bool ruleManager.start() @@ -1382,7 +1384,7 @@ func TestMetricsStalenessOnManagerShutdown(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) var stopped bool ruleManager.start() @@ -1430,6 +1432,53 @@ func countStaleNaN(t *testing.T, st storage.Storage) int { return c } +func TestRuleMovedBetweenGroups(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + storage := teststorage.New(t, 600000) + defer storage.Close() + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(opts) + ruleManager := NewManager(&ManagerOptions{ + Appendable: storage, + Queryable: storage, + QueryFunc: EngineQueryFunc(engine, storage), + Context: context.Background(), + Logger: promslog.NewNopLogger(), + }) + var stopped bool + ruleManager.start() + defer func() { + if !stopped { + ruleManager.Stop() + } + }() + + rule2 := "fixtures/rules2.yaml" + rule1 := "fixtures/rules1.yaml" + + // Load initial configuration of rules2 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule2}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated + time.Sleep(3 * time.Second) + + // Reload configuration of rules1 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule1}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated in new location and potential staleness marker + time.Sleep(3 * time.Second) + + require.Equal(t, 0, countStaleNaN(t, storage)) // Not expecting any stale markers. +} + func TestGroupHasAlertingRules(t *testing.T) { tests := []struct { group *Group @@ -1484,7 +1533,7 @@ func TestRuleHealthUpdates(t *testing.T) { Appendable: st, Queryable: st, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("a + 1") @@ -1582,7 +1631,7 @@ func TestRuleGroupEvalIterationFunc(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, OutageTolerance: 30 * time.Minute, ForGracePeriod: 10 * time.Minute, @@ -1668,7 +1717,7 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum(histogram_metric)") @@ -1716,7 +1765,7 @@ func TestManager_LoadGroups_ShouldCheckWhetherEachRuleHasDependentsAndDependenci ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Appendable: storage, QueryFunc: func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) { return nil, nil }, }) @@ -1772,7 +1821,7 @@ func TestDependencyMap(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1781,7 +1830,7 @@ func TestDependencyMap(t *testing.T) { expr, err = parser.ParseExpr("user:requests:rate1m <= 0") require.NoError(t, err) - rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) expr, err = parser.ParseExpr("sum by (user) (rate(requests[5m]))") require.NoError(t, err) @@ -1821,7 +1870,7 @@ func TestNoDependency(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1844,7 +1893,7 @@ func TestDependenciesEdgeCases(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } t.Run("empty group", func(t *testing.T) { @@ -2002,7 +2051,7 @@ func TestNoMetricSelector(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -2031,7 +2080,7 @@ func TestDependentRulesWithNonMetricExpression(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -2040,7 +2089,7 @@ func TestDependentRulesWithNonMetricExpression(t *testing.T) { expr, err = parser.ParseExpr("user:requests:rate1m <= 0") require.NoError(t, err) - rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) expr, err = parser.ParseExpr("3") require.NoError(t, err) @@ -2063,7 +2112,7 @@ func TestRulesDependentOnMetaMetrics(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } // This rule is not dependent on any other rules in its group but it does depend on `ALERTS`, which is produced by @@ -2092,7 +2141,7 @@ func TestDependencyMapUpdatesOnGroupUpdate(t *testing.T) { files := []string{"fixtures/rules.yaml"} ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() @@ -2344,7 +2393,7 @@ func TestUpdateWhenStopped(t *testing.T) { files := []string{"fixtures/rules.yaml"} ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() err := ruleManager.Update(10*time.Second, files, labels.EmptyLabels(), "", nil) @@ -2401,7 +2450,7 @@ func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.I return &ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), ConcurrentEvalsEnabled: concurrent, MaxConcurrentEvals: maxConcurrent, Appendable: storage, @@ -2430,3 +2479,18 @@ func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.I }, } } + +func TestLabels_FromMaps(t *testing.T) { + mLabels := FromMaps( + map[string]string{"aaa": "101", "bbb": "222"}, + map[string]string{"aaa": "111", "ccc": "333"}, + ) + + expected := labels.New( + labels.Label{Name: "aaa", Value: "111"}, + labels.Label{Name: "bbb", Value: "222"}, + labels.Label{Name: "ccc", Value: "333"}, + ) + + require.Equal(t, expected, mLabels, "unexpected labelset") +} diff --git a/rules/origin_test.go b/rules/origin_test.go index 75c83f9a4e..0bf428f3c1 100644 --- a/rules/origin_test.go +++ b/rules/origin_test.go @@ -19,9 +19,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" @@ -96,7 +97,7 @@ func TestNewRuleDetail(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) detail := NewRuleDetail(rule) diff --git a/scrape/helpers_test.go b/scrape/helpers_test.go index 116fa5c94b..12a56d7071 100644 --- a/scrape/helpers_test.go +++ b/scrape/helpers_test.go @@ -43,6 +43,8 @@ func (a nopAppendable) Appender(_ context.Context) storage.Appender { type nopAppender struct{} +func (a nopAppender) SetOptions(opts *storage.AppendOptions) {} + func (a nopAppender) Append(storage.SeriesRef, labels.Labels, int64, float64) (storage.SeriesRef, error) { return 0, nil } @@ -55,6 +57,10 @@ func (a nopAppender) AppendHistogram(storage.SeriesRef, labels.Labels, int64, *h return 0, nil } +func (a nopAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + return 0, nil +} + func (a nopAppender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Metadata) (storage.SeriesRef, error) { return 0, nil } @@ -78,9 +84,10 @@ func equalFloatSamples(a, b floatSample) bool { } type histogramSample struct { - t int64 - h *histogram.Histogram - fh *histogram.FloatHistogram + metric labels.Labels + t int64 + h *histogram.Histogram + fh *histogram.FloatHistogram } type collectResultAppendable struct { @@ -109,6 +116,8 @@ type collectResultAppender struct { pendingMetadata []metadata.Metadata } +func (a *collectResultAppender) SetOptions(opts *storage.AppendOptions) {} + func (a *collectResultAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() @@ -146,7 +155,7 @@ func (a *collectResultAppender) AppendExemplar(ref storage.SeriesRef, l labels.L func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() - a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t}) + a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t, metric: l}) if a.next == nil { return 0, nil } @@ -154,6 +163,13 @@ func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels. return a.next.AppendHistogram(ref, l, t, h, fh) } +func (a *collectResultAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + return a.AppendHistogram(ref, l, ct, &histogram.Histogram{}, nil) + } + return a.AppendHistogram(ref, l, ct, nil, &histogram.FloatHistogram{}) +} + func (a *collectResultAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() diff --git a/scrape/manager.go b/scrape/manager.go index e3dba5f0ee..f3dad2a048 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -17,31 +17,32 @@ import ( "errors" "fmt" "hash/fnv" + "log/slog" "reflect" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/osutil" "github.com/prometheus/prometheus/util/pool" ) // NewManager is the Manager constructor. -func NewManager(o *Options, logger log.Logger, app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) { +func NewManager(o *Options, logger *slog.Logger, newScrapeFailureLogger func(string) (*logging.JSONFileLogger, error), app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) { if o == nil { o = &Options{} } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } sm, err := newScrapeMetrics(registerer) @@ -50,15 +51,16 @@ func NewManager(o *Options, logger log.Logger, app storage.Appendable, registere } m := &Manager{ - append: app, - opts: o, - logger: logger, - scrapeConfigs: make(map[string]*config.ScrapeConfig), - scrapePools: make(map[string]*scrapePool), - graceShut: make(chan struct{}), - triggerReload: make(chan struct{}, 1), - metrics: sm, - buffers: pool.New(1e3, 100e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }), + append: app, + opts: o, + logger: logger, + newScrapeFailureLogger: newScrapeFailureLogger, + scrapeConfigs: make(map[string]*config.ScrapeConfig), + scrapePools: make(map[string]*scrapePool), + graceShut: make(chan struct{}), + triggerReload: make(chan struct{}, 1), + metrics: sm, + buffers: pool.New(1e3, 100e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }), } m.metrics.setTargetMetadataCacheGatherer(m) @@ -68,8 +70,7 @@ func NewManager(o *Options, logger log.Logger, app storage.Appendable, registere // Options are the configuration parameters to the scrape manager. type Options struct { - ExtraMetrics bool - NoDefaultPort bool + ExtraMetrics bool // Option used by downstream scraper users like OpenTelemetry Collector // to help lookup metric metadata. Should be false for Prometheus. PassMetadataInContext bool @@ -99,16 +100,18 @@ const DefaultNameEscapingScheme = model.ValueEncodingEscaping // when receiving new target groups from the discovery manager. type Manager struct { opts *Options - logger log.Logger + logger *slog.Logger append storage.Appendable graceShut chan struct{} - offsetSeed uint64 // Global offsetSeed seed is used to spread scrape workload across HA setup. - mtxScrape sync.Mutex // Guards the fields below. - scrapeConfigs map[string]*config.ScrapeConfig - scrapePools map[string]*scrapePool - targetSets map[string][]*targetgroup.Group - buffers *pool.Pool + offsetSeed uint64 // Global offsetSeed seed is used to spread scrape workload across HA setup. + mtxScrape sync.Mutex // Guards the fields below. + scrapeConfigs map[string]*config.ScrapeConfig + scrapePools map[string]*scrapePool + newScrapeFailureLogger func(string) (*logging.JSONFileLogger, error) + scrapeFailureLoggers map[string]*logging.JSONFileLogger + targetSets map[string][]*targetgroup.Group + buffers *pool.Pool triggerReload chan struct{} @@ -172,17 +175,27 @@ func (m *Manager) reload() { if _, ok := m.scrapePools[setName]; !ok { scrapeConfig, ok := m.scrapeConfigs[setName] if !ok { - level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName) + m.logger.Error("error reloading target set", "err", "invalid config id:"+setName) + continue + } + if scrapeConfig.ConvertClassicHistogramsToNHCB && m.opts.EnableCreatedTimestampZeroIngestion { + // TODO(krajorama): fix https://github.com/prometheus/prometheus/issues/15137 + m.logger.Error("error reloading target set", "err", "cannot convert classic histograms to native histograms with custom buckets and ingest created timestamp zero samples at the same time due to https://github.com/prometheus/prometheus/issues/15137") continue } m.metrics.targetScrapePools.Inc() - sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, log.With(m.logger, "scrape_pool", setName), m.buffers, m.opts, m.metrics) + sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, m.logger.With("scrape_pool", setName), m.buffers, m.opts, m.metrics) if err != nil { m.metrics.targetScrapePoolsFailed.Inc() - level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName) + m.logger.Error("error creating new scrape pool", "err", err, "scrape_pool", setName) continue } m.scrapePools[setName] = sp + if l, ok := m.scrapeFailureLoggers[scrapeConfig.ScrapeFailureLogFile]; ok { + sp.SetScrapeFailureLogger(l) + } else { + sp.logger.Error("No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", setName) + } } wg.Add(1) @@ -238,11 +251,36 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { } c := make(map[string]*config.ScrapeConfig) + scrapeFailureLoggers := map[string]*logging.JSONFileLogger{ + "": nil, // Emptying the file name sets the scrape logger to nil. + } for _, scfg := range scfgs { c[scfg.JobName] = scfg + if _, ok := scrapeFailureLoggers[scfg.ScrapeFailureLogFile]; !ok { + // We promise to reopen the file on each reload. + var ( + logger *logging.JSONFileLogger + err error + ) + if m.newScrapeFailureLogger != nil { + if logger, err = m.newScrapeFailureLogger(scfg.ScrapeFailureLogFile); err != nil { + return err + } + } + scrapeFailureLoggers[scfg.ScrapeFailureLogFile] = logger + } } m.scrapeConfigs = c + oldScrapeFailureLoggers := m.scrapeFailureLoggers + for _, s := range oldScrapeFailureLoggers { + if s != nil { + defer s.Close() + } + } + + m.scrapeFailureLoggers = scrapeFailureLoggers + if err := m.setOffsetSeed(cfg.GlobalConfig.ExternalLabels); err != nil { return err } @@ -257,9 +295,16 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { case !reflect.DeepEqual(sp.config, cfg): err := sp.reload(cfg) if err != nil { - level.Error(m.logger).Log("msg", "error reloading scrape pool", "err", err, "scrape_pool", name) + m.logger.Error("error reloading scrape pool", "err", err, "scrape_pool", name) failed = true } + fallthrough + case ok: + if l, ok := m.scrapeFailureLoggers[cfg.ScrapeFailureLogFile]; ok { + sp.SetScrapeFailureLogger(l) + } else { + sp.logger.Error("No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", name) + } } } diff --git a/scrape/manager_test.go b/scrape/manager_test.go index c71691c95d..c3544f6344 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -14,6 +14,7 @@ package scrape import ( + "bytes" "context" "fmt" "net/http" @@ -26,33 +27,42 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "google.golang.org/protobuf/types/known/timestamppb" "gopkg.in/yaml.v2" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" _ "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/runutil" "github.com/prometheus/prometheus/util/testutil" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + func TestPopulateLabels(t *testing.T) { cases := []struct { - in labels.Labels - cfg *config.ScrapeConfig - noDefaultPort bool - res labels.Labels - resOrig labels.Labels - err string + in labels.Labels + cfg *config.ScrapeConfig + res labels.Labels + resOrig labels.Labels + err string }{ // Regular population of scrape config options. { @@ -106,8 +116,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeTimeout: model.Duration(time.Second), }, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4:80", - model.InstanceLabel: "1.2.3.4:80", + model.AddressLabel: "1.2.3.4", + model.InstanceLabel: "1.2.3.4", model.SchemeLabel: "http", model.MetricsPathLabel: "/custom", model.JobLabel: "custom-job", @@ -137,7 +147,7 @@ func TestPopulateLabels(t *testing.T) { ScrapeTimeout: model.Duration(time.Second), }, res: labels.FromMap(map[string]string{ - model.AddressLabel: "[::1]:443", + model.AddressLabel: "[::1]", model.InstanceLabel: "custom-instance", model.SchemeLabel: "https", model.MetricsPathLabel: "/metrics", @@ -360,7 +370,6 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4", model.InstanceLabel: "1.2.3.4", @@ -379,7 +388,7 @@ func TestPopulateLabels(t *testing.T) { model.ScrapeTimeoutLabel: "1s", }), }, - // Remove default port (http). + // verify that the default port is not removed (http). { in: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4:80", @@ -391,9 +400,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4", + model.AddressLabel: "1.2.3.4:80", model.InstanceLabel: "1.2.3.4:80", model.SchemeLabel: "http", model.MetricsPathLabel: "/metrics", @@ -410,7 +418,7 @@ func TestPopulateLabels(t *testing.T) { model.ScrapeTimeoutLabel: "1s", }), }, - // Remove default port (https). + // verify that the default port is not removed (https). { in: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4:443", @@ -422,9 +430,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4", + model.AddressLabel: "1.2.3.4:443", model.InstanceLabel: "1.2.3.4:443", model.SchemeLabel: "https", model.MetricsPathLabel: "/metrics", @@ -445,7 +452,7 @@ func TestPopulateLabels(t *testing.T) { for _, c := range cases { in := c.in.Copy() - res, orig, err := PopulateLabels(labels.NewBuilder(c.in), c.cfg, c.noDefaultPort) + res, orig, err := PopulateLabels(labels.NewBuilder(c.in), c.cfg) if c.err != "" { require.EqualError(t, err, c.err) } else { @@ -511,7 +518,7 @@ scrape_configs: ) opts := Options{} - scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) require.NoError(t, err) newLoop := func(scrapeLoopOptions) loop { ch <- struct{}{} @@ -576,7 +583,7 @@ scrape_configs: func TestManagerTargetsUpdates(t *testing.T) { opts := Options{} testRegistry := prometheus.NewRegistry() - m, err := NewManager(&opts, nil, nil, testRegistry) + m, err := NewManager(&opts, nil, nil, nil, testRegistry) require.NoError(t, err) ts := make(chan map[string][]*targetgroup.Group) @@ -629,7 +636,7 @@ global: opts := Options{} testRegistry := prometheus.NewRegistry() - scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) require.NoError(t, err) // Load the first config. @@ -706,7 +713,7 @@ scrape_configs: } opts := Options{} - scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) require.NoError(t, err) reload(scrapeManager, cfg1) @@ -716,37 +723,256 @@ scrape_configs: require.ElementsMatch(t, []string{"job1", "job3"}, scrapeManager.ScrapePools()) } -// TestManagerCTZeroIngestion tests scrape manager for CT cases. +func setupScrapeManager(t *testing.T, honorTimestamps, enableCTZeroIngestion bool) (*collectResultAppender, *Manager) { + app := &collectResultAppender{} + scrapeManager, err := NewManager( + &Options{ + EnableCreatedTimestampZeroIngestion: enableCTZeroIngestion, + skipOffsetting: true, + }, + promslog.New(&promslog.Config{}), + nil, + &collectResultAppendable{app}, + prometheus.NewRegistry(), + ) + require.NoError(t, err) + + require.NoError(t, scrapeManager.ApplyConfig(&config.Config{ + GlobalConfig: config.GlobalConfig{ + // Disable regular scrapes. + ScrapeInterval: model.Duration(9999 * time.Minute), + ScrapeTimeout: model.Duration(5 * time.Second), + ScrapeProtocols: []config.ScrapeProtocol{config.OpenMetricsText1_0_0, config.PrometheusProto}, + }, + ScrapeConfigs: []*config.ScrapeConfig{{JobName: "test", HonorTimestamps: honorTimestamps}}, + })) + + return app, scrapeManager +} + +func setupTestServer(t *testing.T, typ string, toWrite []byte) *httptest.Server { + once := sync.Once{} + + server := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fail := true + once.Do(func() { + fail = false + w.Header().Set("Content-Type", typ) + w.Write(toWrite) + }) + + if fail { + w.WriteHeader(http.StatusInternalServerError) + } + }), + ) + + t.Cleanup(func() { server.Close() }) + + return server +} + +// TestManagerCTZeroIngestion tests scrape manager for various CT cases. func TestManagerCTZeroIngestion(t *testing.T) { - const mName = "expected_counter" + const ( + // _total suffix is required, otherwise expfmt with OMText will mark metric as "unknown" + expectedMetricName = "expected_metric_total" + expectedCreatedMetricName = "expected_metric_created" + expectedSampleValue = 17.0 + ) + + for _, testFormat := range []config.ScrapeProtocol{config.PrometheusProto, config.OpenMetricsText1_0_0} { + t.Run(fmt.Sprintf("format=%s", testFormat), func(t *testing.T) { + for _, testWithCT := range []bool{false, true} { + t.Run(fmt.Sprintf("withCT=%v", testWithCT), func(t *testing.T) { + for _, testCTZeroIngest := range []bool{false, true} { + t.Run(fmt.Sprintf("ctZeroIngest=%v", testCTZeroIngest), func(t *testing.T) { + sampleTs := time.Now() + ctTs := time.Time{} + if testWithCT { + ctTs = sampleTs.Add(-2 * time.Minute) + } + + // TODO(bwplotka): Add more types than just counter? + encoded := prepareTestEncodedCounter(t, testFormat, expectedMetricName, expectedSampleValue, sampleTs, ctTs) + app, scrapeManager := setupScrapeManager(t, true, testCTZeroIngest) + + // Perform the test. + doOneScrape(t, scrapeManager, app, setupTestServer(t, config.ScrapeProtocolsHeaders[testFormat], encoded)) + + // Verify results. + // Verify what we got vs expectations around CT injection. + samples := findSamplesForMetric(app.resultFloats, expectedMetricName) + if testWithCT && testCTZeroIngest { + require.Len(t, samples, 2) + require.Equal(t, 0.0, samples[0].f) + require.Equal(t, timestamp.FromTime(ctTs), samples[0].t) + require.Equal(t, expectedSampleValue, samples[1].f) + require.Equal(t, timestamp.FromTime(sampleTs), samples[1].t) + } else { + require.Len(t, samples, 1) + require.Equal(t, expectedSampleValue, samples[0].f) + require.Equal(t, timestamp.FromTime(sampleTs), samples[0].t) + } + + // Verify what we got vs expectations around additional _created series for OM text. + // enableCTZeroInjection also kills that _created line. + createdSeriesSamples := findSamplesForMetric(app.resultFloats, expectedCreatedMetricName) + if testFormat == config.OpenMetricsText1_0_0 && testWithCT && !testCTZeroIngest { + // For OM Text, when counter has CT, and feature flag disabled we should see _created lines. + require.Len(t, createdSeriesSamples, 1) + // Conversion taken from common/expfmt.writeOpenMetricsFloat. + // We don't check the ct timestamp as explicit ts was not implemented in expfmt.Encoder, + // but exists in OM https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#:~:text=An%20example%20with%20a%20Metric%20with%20no%20labels%2C%20and%20a%20MetricPoint%20with%20a%20timestamp%20and%20a%20created + // We can implement this, but we want to potentially get rid of OM 1.0 CT lines + require.Equal(t, float64(timestamppb.New(ctTs).AsTime().UnixNano())/1e9, createdSeriesSamples[0].f) + } else { + require.Empty(t, createdSeriesSamples) + } + }) + } + }) + } + }) + } +} + +func prepareTestEncodedCounter(t *testing.T, format config.ScrapeProtocol, mName string, v float64, ts, ct time.Time) (encoded []byte) { + t.Helper() + + counter := &dto.Counter{Value: proto.Float64(v)} + if !ct.IsZero() { + counter.CreatedTimestamp = timestamppb.New(ct) + } + ctrType := dto.MetricType_COUNTER + inputMetric := &dto.MetricFamily{ + Name: proto.String(mName), + Type: &ctrType, + Metric: []*dto.Metric{{ + TimestampMs: proto.Int64(timestamp.FromTime(ts)), + Counter: counter, + }}, + } + switch format { + case config.PrometheusProto: + return protoMarshalDelimited(t, inputMetric) + case config.OpenMetricsText1_0_0: + buf := &bytes.Buffer{} + require.NoError(t, expfmt.NewEncoder(buf, expfmt.NewFormat(expfmt.TypeOpenMetrics), expfmt.WithCreatedLines(), expfmt.WithUnit()).Encode(inputMetric)) + _, _ = buf.WriteString("# EOF") + + t.Log("produced OM text to expose:", buf.String()) + return buf.Bytes() + default: + t.Fatalf("not implemented format: %v", format) + return nil + } +} + +func doOneScrape(t *testing.T, manager *Manager, appender *collectResultAppender, server *httptest.Server) { + t.Helper() + + serverURL, err := url.Parse(server.URL) + require.NoError(t, err) + + // Add fake target directly into tsets + reload + manager.updateTsets(map[string][]*targetgroup.Group{ + "test": {{ + Targets: []model.LabelSet{{ + model.SchemeLabel: model.LabelValue(serverURL.Scheme), + model.AddressLabel: model.LabelValue(serverURL.Host), + }}, + }}, + }) + manager.reload() + + // Wait for one scrape. + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error { + appender.mtx.Lock() + defer appender.mtx.Unlock() + + // Check if scrape happened and grab the relevant samples. + if len(appender.resultFloats) > 0 { + return nil + } + return fmt.Errorf("expected some float samples, got none") + }), "after 1 minute") + manager.Stop() +} + +func findSamplesForMetric(floats []floatSample, metricName string) (ret []floatSample) { + for _, f := range floats { + if f.metric.Get(model.MetricNameLabel) == metricName { + ret = append(ret, f) + } + } + return ret +} + +// generateTestHistogram generates the same thing as tsdbutil.GenerateTestHistogram, +// but in the form of dto.Histogram. +func generateTestHistogram(i int) *dto.Histogram { + helper := tsdbutil.GenerateTestHistogram(i) + h := &dto.Histogram{} + h.SampleCount = proto.Uint64(helper.Count) + h.SampleSum = proto.Float64(helper.Sum) + h.Schema = proto.Int32(helper.Schema) + h.ZeroThreshold = proto.Float64(helper.ZeroThreshold) + h.ZeroCount = proto.Uint64(helper.ZeroCount) + h.PositiveSpan = make([]*dto.BucketSpan, len(helper.PositiveSpans)) + for i, span := range helper.PositiveSpans { + h.PositiveSpan[i] = &dto.BucketSpan{ + Offset: proto.Int32(span.Offset), + Length: proto.Uint32(span.Length), + } + } + h.PositiveDelta = helper.PositiveBuckets + h.NegativeSpan = make([]*dto.BucketSpan, len(helper.NegativeSpans)) + for i, span := range helper.NegativeSpans { + h.NegativeSpan[i] = &dto.BucketSpan{ + Offset: proto.Int32(span.Offset), + Length: proto.Uint32(span.Length), + } + } + h.NegativeDelta = helper.NegativeBuckets + return h +} + +func TestManagerCTZeroIngestionHistogram(t *testing.T) { + const mName = "expected_histogram" for _, tc := range []struct { name string - counterSample *dto.Counter + inputHistSample *dto.Histogram enableCTZeroIngestion bool }{ { - name: "disabled with CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - // Timestamp does not matter as long as it exists in this test. - CreatedTimestamp: timestamppb.Now(), - }, + name: "disabled with CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + h.CreatedTimestamp = timestamppb.Now() + return h + }(), + enableCTZeroIngestion: false, }, { - name: "enabled with CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - // Timestamp does not matter as long as it exists in this test. - CreatedTimestamp: timestamppb.Now(), - }, + name: "enabled with CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + h.CreatedTimestamp = timestamppb.Now() + return h + }(), enableCTZeroIngestion: true, }, { - name: "enabled without CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - }, + name: "enabled without CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + return h + }(), enableCTZeroIngestion: true, }, } { @@ -755,9 +981,11 @@ func TestManagerCTZeroIngestion(t *testing.T) { scrapeManager, err := NewManager( &Options{ EnableCreatedTimestampZeroIngestion: tc.enableCTZeroIngestion, + EnableNativeHistogramsIngestion: true, skipOffsetting: true, }, - log.NewLogfmtLogger(os.Stderr), + promslog.New(&promslog.Config{}), + nil, &collectResultAppendable{app}, prometheus.NewRegistry(), ) @@ -779,16 +1007,16 @@ func TestManagerCTZeroIngestion(t *testing.T) { // Start fake HTTP target to that allow one scrape only. server := httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fail := true + fail := true // TODO(bwplotka): Kill or use? once.Do(func() { fail = false w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) - ctrType := dto.MetricType_COUNTER + ctrType := dto.MetricType_HISTOGRAM w.Write(protoMarshalDelimited(t, &dto.MetricFamily{ Name: proto.String(mName), Type: &ctrType, - Metric: []*dto.Metric{{Counter: tc.counterSample}}, + Metric: []*dto.Metric{{Histogram: tc.inputHistSample}}, })) }) @@ -814,7 +1042,8 @@ func TestManagerCTZeroIngestion(t *testing.T) { }) scrapeManager.reload() - var got []float64 + var got []histogramSample + // Wait for one scrape. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) defer cancel() @@ -822,32 +1051,35 @@ func TestManagerCTZeroIngestion(t *testing.T) { app.mtx.Lock() defer app.mtx.Unlock() - // Check if scrape happened and grab the relevant samples, they have to be there - or it's a bug + // Check if scrape happened and grab the relevant histograms, they have to be there - or it's a bug // and it's not worth waiting. - for _, f := range app.resultFloats { - if f.metric.Get(model.MetricNameLabel) == mName { - got = append(got, f.f) + for _, h := range app.resultHistograms { + if h.metric.Get(model.MetricNameLabel) == mName { + got = append(got, h) } } - if len(app.resultFloats) > 0 { + if len(app.resultHistograms) > 0 { return nil } - return fmt.Errorf("expected some samples, got none") + return fmt.Errorf("expected some histogram samples, got none") }), "after 1 minute") scrapeManager.Stop() - // Check for zero samples, assuming we only injected always one sample. + // Check for zero samples, assuming we only injected always one histogram sample. // Did it contain CT to inject? If yes, was CT zero enabled? - if tc.counterSample.CreatedTimestamp.IsValid() && tc.enableCTZeroIngestion { + if tc.inputHistSample.CreatedTimestamp.IsValid() && tc.enableCTZeroIngestion { require.Len(t, got, 2) - require.Equal(t, 0.0, got[0]) - require.Equal(t, tc.counterSample.GetValue(), got[1]) + // Zero sample. + require.Equal(t, histogram.Histogram{}, *got[0].h) + // Quick soft check to make sure it's the same sample or at least not zero. + require.Equal(t, tc.inputHistSample.GetSampleSum(), got[1].h.Sum) return } // Expect only one, valid sample. require.Len(t, got, 1) - require.Equal(t, tc.counterSample.GetValue(), got[0]) + // Quick soft check to make sure it's the same sample or at least not zero. + require.Equal(t, tc.inputHistSample.GetSampleSum(), got[0].h.Sum) }) } } @@ -857,7 +1089,7 @@ func TestUnregisterMetrics(t *testing.T) { // Check that all metrics can be unregistered, allowing a second manager to be created. for i := 0; i < 2; i++ { opts := Options{} - manager, err := NewManager(&opts, nil, nil, reg) + manager, err := NewManager(&opts, nil, nil, nil, reg) require.NotNil(t, manager) require.NoError(t, err) // Unregister all metrics. @@ -893,7 +1125,7 @@ func runManagers(t *testing.T, ctx context.Context) (*discovery.Manager, *Manage require.NoError(t, err) discoveryManager := discovery.NewManager( ctx, - log.NewNopLogger(), + promslog.NewNopLogger(), reg, sdMetrics, discovery.Updatert(100*time.Millisecond), @@ -901,6 +1133,7 @@ func runManagers(t *testing.T, ctx context.Context) (*discovery.Manager, *Manage scrapeManager, err := NewManager( &Options{DiscoveryReloadInterval: model.Duration(100 * time.Millisecond)}, nil, + nil, nopAppendable{}, prometheus.NewRegistry(), ) @@ -1179,7 +1412,7 @@ scrape_configs: } // TestOnlyStaleTargetsAreDropped makes sure that when a job has multiple providers, when one of them should no -// longer discover targets, only the stale targets of that provier are dropped. +// longer discover targets, only the stale targets of that provider are dropped. func TestOnlyStaleTargetsAreDropped(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/scrape/scrape.go b/scrape/scrape.go index 2abd4691d6..7e270bb3a3 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "net/http" "reflect" @@ -29,11 +30,10 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/klauspost/compress/gzip" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/config" @@ -47,6 +47,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/pool" ) @@ -63,7 +64,7 @@ var errNameLabelMandatory = fmt.Errorf("missing metric name (%s label)", labels. // scrapePool manages scrapes for sets of targets. type scrapePool struct { appendable storage.Appendable - logger log.Logger + logger *slog.Logger cancel context.CancelFunc httpOpts []config_util.HTTPClientOption @@ -87,9 +88,10 @@ type scrapePool struct { // Constructor for new scrape loops. This is settable for testing convenience. newLoop func(scrapeLoopOptions) loop - noDefaultPort bool - metrics *scrapeMetrics + + scrapeFailureLogger *logging.JSONFileLogger + scrapeFailureLoggerMtx sync.RWMutex } type labelLimits struct { @@ -110,8 +112,10 @@ type scrapeLoopOptions struct { trackTimestampsStaleness bool interval time.Duration timeout time.Duration - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme + fallbackScrapeProtocol string mrc []*relabel.Config cache *scrapeCache @@ -123,9 +127,9 @@ const maxAheadTime = 10 * time.Minute // returning an empty label set is interpreted as "drop". type labelsMutator func(labels.Labels) labels.Labels -func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger log.Logger, buffers *pool.Pool, options *Options, metrics *scrapeMetrics) (*scrapePool, error) { +func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger *slog.Logger, buffers *pool.Pool, options *Options, metrics *scrapeMetrics) (*scrapePool, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, options.HTTPClientOptions...) @@ -146,7 +150,6 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed logger: logger, metrics: metrics, httpOpts: options.HTTPClientOptions, - noDefaultPort: options.NoDefaultPort, } sp.newLoop = func(opts scrapeLoopOptions) loop { // Update the targets retrieval function for metadata to a new scrape cache. @@ -159,7 +162,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed return newScrapeLoop( ctx, opts.scraper, - log.With(logger, "target", opts.target), + logger.With("target", opts.target), buffers, func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, opts.target, opts.honorLabels, opts.mrc) @@ -178,7 +181,8 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.labelLimits, opts.interval, opts.timeout, - opts.scrapeClassicHistograms, + opts.alwaysScrapeClassicHist, + opts.convertClassicHistToNHCB, options.EnableNativeHistogramsIngestion, options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, @@ -188,6 +192,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed metrics, options.skipOffsetting, opts.validationScheme, + opts.fallbackScrapeProtocol, ) } sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) @@ -218,6 +223,27 @@ func (sp *scrapePool) DroppedTargetsCount() int { return sp.droppedTargetsCount } +func (sp *scrapePool) SetScrapeFailureLogger(l *logging.JSONFileLogger) { + sp.scrapeFailureLoggerMtx.Lock() + defer sp.scrapeFailureLoggerMtx.Unlock() + if l != nil { + l.With("job_name", sp.config.JobName) + } + sp.scrapeFailureLogger = l + + sp.targetMtx.Lock() + defer sp.targetMtx.Unlock() + for _, s := range sp.loops { + s.setScrapeFailureLogger(sp.scrapeFailureLogger) + } +} + +func (sp *scrapePool) getScrapeFailureLogger() *logging.JSONFileLogger { + sp.scrapeFailureLoggerMtx.RLock() + defer sp.scrapeFailureLoggerMtx.RUnlock() + return sp.scrapeFailureLogger +} + // stop terminates all scrape loops and returns after they all terminated. func (sp *scrapePool) stop() { sp.mtx.Lock() @@ -303,11 +329,12 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { enableCompression = sp.config.EnableCompression trackTimestampsStaleness = sp.config.TrackTimestampsStaleness mrc = sp.config.MetricRelabelConfigs + fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() ) - validationScheme := model.LegacyValidation - if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig { - validationScheme = model.UTF8Validation + validationScheme := model.UTF8Validation + if sp.config.MetricNameValidationScheme == config.LegacyValidationConfig { + validationScheme = model.LegacyValidation } sp.targetMtx.Lock() @@ -349,6 +376,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { interval: interval, timeout: timeout, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, }) ) if err != nil { @@ -361,6 +389,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { wg.Done() newLoop.setForcedError(forcedErr) + newLoop.setScrapeFailureLogger(sp.getScrapeFailureLogger()) newLoop.run(nil) }(oldLoop, newLoop) @@ -404,9 +433,9 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { sp.droppedTargets = []*Target{} sp.droppedTargetsCount = 0 for _, tg := range tgs { - targets, failures := TargetsFromGroup(tg, sp.config, sp.noDefaultPort, targets, lb) + targets, failures := TargetsFromGroup(tg, sp.config, targets, lb) for _, err := range failures { - level.Error(sp.logger).Log("msg", "Creating target failed", "err", err) + sp.logger.Error("Creating target failed", "err", err) } sp.metrics.targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures))) for _, t := range targets { @@ -457,12 +486,14 @@ func (sp *scrapePool) sync(targets []*Target) { enableCompression = sp.config.EnableCompression trackTimestampsStaleness = sp.config.TrackTimestampsStaleness mrc = sp.config.MetricRelabelConfigs - scrapeClassicHistograms = sp.config.ScrapeClassicHistograms + fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() + alwaysScrapeClassicHist = sp.config.AlwaysScrapeClassicHistograms + convertClassicHistToNHCB = sp.config.ConvertClassicHistogramsToNHCB ) - validationScheme := model.LegacyValidation - if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig { - validationScheme = model.UTF8Validation + validationScheme := model.UTF8Validation + if sp.config.MetricNameValidationScheme == config.LegacyValidationConfig { + validationScheme = model.LegacyValidation } sp.targetMtx.Lock() @@ -498,11 +529,15 @@ func (sp *scrapePool) sync(targets []*Target) { mrc: mrc, interval: interval, timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, + alwaysScrapeClassicHist: alwaysScrapeClassicHist, + convertClassicHistToNHCB: convertClassicHistToNHCB, + validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, }) if err != nil { l.setForcedError(err) } + l.setScrapeFailureLogger(sp.scrapeFailureLogger) sp.activeTargets[hash] = t sp.loops[hash] = l @@ -825,6 +860,7 @@ func (s *targetScraper) readResponse(ctx context.Context, resp *http.Response, w type loop interface { run(errc chan<- error) setForcedError(err error) + setScrapeFailureLogger(*logging.JSONFileLogger) stop() getCache() *scrapeCache disableEndOfRunStalenessMarkers() @@ -839,7 +875,9 @@ type cacheEntry struct { type scrapeLoop struct { scraper scraper - l log.Logger + l *slog.Logger + scrapeFailureLogger *logging.JSONFileLogger + scrapeFailureLoggerMtx sync.RWMutex cache *scrapeCache lastScrapeSize int buffers *pool.Pool @@ -855,8 +893,10 @@ type scrapeLoop struct { labelLimits *labelLimits interval time.Duration timeout time.Duration - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme + fallbackScrapeProtocol string // Feature flagged options. enableNativeHistogramIngestion bool @@ -1138,7 +1178,7 @@ func (c *scrapeCache) LengthMetadata() int { func newScrapeLoop(ctx context.Context, sc scraper, - l log.Logger, + l *slog.Logger, buffers *pool.Pool, sampleMutator labelsMutator, reportSampleMutator labelsMutator, @@ -1155,7 +1195,8 @@ func newScrapeLoop(ctx context.Context, labelLimits *labelLimits, interval time.Duration, timeout time.Duration, - scrapeClassicHistograms bool, + alwaysScrapeClassicHist bool, + convertClassicHistToNHCB bool, enableNativeHistogramIngestion bool, enableCTZeroIngestion bool, reportExtraMetrics bool, @@ -1165,9 +1206,10 @@ func newScrapeLoop(ctx context.Context, metrics *scrapeMetrics, skipOffsetting bool, validationScheme model.ValidationScheme, + fallbackScrapeProtocol string, ) *scrapeLoop { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if buffers == nil { buffers = pool.New(1e3, 1e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }) @@ -1209,7 +1251,8 @@ func newScrapeLoop(ctx context.Context, labelLimits: labelLimits, interval: interval, timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, + alwaysScrapeClassicHist: alwaysScrapeClassicHist, + convertClassicHistToNHCB: convertClassicHistToNHCB, enableNativeHistogramIngestion: enableNativeHistogramIngestion, enableCTZeroIngestion: enableCTZeroIngestion, reportExtraMetrics: reportExtraMetrics, @@ -1217,12 +1260,22 @@ func newScrapeLoop(ctx context.Context, metrics: metrics, skipOffsetting: skipOffsetting, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, } sl.ctx, sl.cancel = context.WithCancel(ctx) return sl } +func (sl *scrapeLoop) setScrapeFailureLogger(l *logging.JSONFileLogger) { + sl.scrapeFailureLoggerMtx.Lock() + defer sl.scrapeFailureLoggerMtx.Unlock() + if ts, ok := sl.scraper.(fmt.Stringer); ok && l != nil { + l.With("target", ts.String()) + } + sl.scrapeFailureLogger = l +} + func (sl *scrapeLoop) run(errc chan<- error) { if !sl.skipOffsetting { select { @@ -1316,13 +1369,13 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er } err = app.Commit() if err != nil { - level.Error(sl.l).Log("msg", "Scrape commit failed", "err", err) + sl.l.Error("Scrape commit failed", "err", err) } }() defer func() { if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, bytesRead, scrapeErr); err != nil { - level.Warn(sl.l).Log("msg", "Appending scrape report failed", "err", err) + sl.l.Warn("Appending scrape report failed", "err", err) } }() @@ -1332,7 +1385,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Append failed", "err", err) + sl.l.Warn("Append failed", "err", err) } if errc != nil { errc <- forcedErr @@ -1365,7 +1418,12 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er } bytesRead = len(b) } else { - level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr) + sl.l.Debug("Scrape failed", "err", scrapeErr) + sl.scrapeFailureLoggerMtx.RLock() + if sl.scrapeFailureLogger != nil { + sl.scrapeFailureLogger.Error("err", scrapeErr) + } + sl.scrapeFailureLoggerMtx.RUnlock() if errc != nil { errc <- scrapeErr } @@ -1380,13 +1438,13 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er if appErr != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Debug(sl.l).Log("msg", "Append failed", "err", appErr) + sl.l.Debug("Append failed", "err", appErr) // The append failed, probably due to a parse error or sample limit. // Call sl.append again with an empty scrape to trigger stale markers. if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Append failed", "err", err) + sl.l.Warn("Append failed", "err", err) } } @@ -1459,16 +1517,16 @@ func (sl *scrapeLoop) endOfRunStaleness(last time.Time, ticker *time.Ticker, int } err = app.Commit() if err != nil { - level.Warn(sl.l).Log("msg", "Stale commit failed", "err", err) + sl.l.Warn("Stale commit failed", "err", err) } }() if _, _, _, err = sl.append(app, []byte{}, "", staleTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Stale append failed", "err", err) + sl.l.Warn("Stale append failed", "err", err) } if err = sl.reportStale(app, staleTime); err != nil { - level.Warn(sl.l).Log("msg", "Stale report failed", "err", err) + sl.l.Warn("Stale report failed", "err", err) } } @@ -1495,11 +1553,24 @@ type appendErrors struct { } func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { - p, err := textparse.New(b, contentType, sl.scrapeClassicHistograms, sl.symbolTable) + p, err := textparse.New(b, contentType, sl.fallbackScrapeProtocol, sl.alwaysScrapeClassicHist, sl.enableCTZeroIngestion, sl.symbolTable) + if p == nil { + sl.l.Error( + "Failed to determine correct type of scrape target.", + "content_type", contentType, + "fallback_media_type", sl.fallbackScrapeProtocol, + "err", err, + ) + return + } + if sl.convertClassicHistToNHCB { + p = textparse.NewNHCBParser(p, sl.symbolTable, sl.alwaysScrapeClassicHist) + } if err != nil { - level.Debug(sl.l).Log( - "msg", "Invalid content type on scrape, using prometheus parser as fallback.", + sl.l.Debug( + "Invalid content type on scrape, using fallback setting.", "content_type", contentType, + "fallback_media_type", sl.fallbackScrapeProtocol, "err", err, ) } @@ -1515,7 +1586,7 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, metadataChanged bool ) - exemplars := make([]exemplar.Exemplar, 1) + exemplars := make([]exemplar.Exemplar, 0, 1) // updateMetadata updates the current iteration's metadata object and the // metadataChanged value if we have metadata in the scrape cache AND the @@ -1657,11 +1728,19 @@ loop: } else { if sl.enableCTZeroIngestion { if ctMs := p.CreatedTimestamp(); ctMs != nil { - ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + if isHistogram && sl.enableNativeHistogramIngestion { + if h != nil { + ref, err = app.AppendHistogramCTZeroSample(ref, lset, t, *ctMs, h, nil) + } else { + ref, err = app.AppendHistogramCTZeroSample(ref, lset, t, *ctMs, nil, fh) + } + } else { + ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + } if err != nil && !errors.Is(err, storage.ErrOutOfOrderCT) { // OOO is a common case, ignoring completely for now. // CT is an experimental feature. For now, we don't need to fail the // scrape on errors updating the created timestamp, log debug. - level.Debug(sl.l).Log("msg", "Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) + sl.l.Debug("Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) } } } @@ -1686,7 +1765,7 @@ loop: sampleAdded, err = sl.checkAddError(met, err, &sampleLimitErr, &bucketLimitErr, &appErrs) if err != nil { if !errors.Is(err, storage.ErrNotFound) { - level.Debug(sl.l).Log("msg", "Unexpected error", "series", string(met), "err", err) + sl.l.Debug("Unexpected error", "series", string(met), "err", err) } break loop } @@ -1738,21 +1817,21 @@ loop: outOfOrderExemplars++ default: // Since exemplar storage is still experimental, we don't fail the scrape on ingestion errors. - level.Debug(sl.l).Log("msg", "Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr) + sl.l.Debug("Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr) } } if outOfOrderExemplars > 0 && outOfOrderExemplars == len(exemplars) { // Only report out of order exemplars if all are out of order, otherwise this was a partial update // to some existing set of exemplars. appErrs.numExemplarOutOfOrder += outOfOrderExemplars - level.Debug(sl.l).Log("msg", "Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) + sl.l.Debug("Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) sl.metrics.targetScrapeExemplarOutOfOrder.Add(float64(outOfOrderExemplars)) } if sl.appendMetadataToWAL && metadataChanged { if _, merr := app.UpdateMetadata(ref, lset, meta); merr != nil { // No need to fail the scrape on errors appending metadata. - level.Debug(sl.l).Log("msg", "Error when appending metadata in scrape loop", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", meta), "err", merr) + sl.l.Debug("Error when appending metadata in scrape loop", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", meta), "err", merr) } } } @@ -1771,21 +1850,23 @@ loop: sl.metrics.targetScrapeNativeHistogramBucketLimit.Inc() } if appErrs.numOutOfOrder > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder) + sl.l.Warn("Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder) } if appErrs.numDuplicates > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates) + sl.l.Warn("Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates) } if appErrs.numOutOfBounds > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds) + sl.l.Warn("Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds) } if appErrs.numExemplarOutOfOrder > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) + sl.l.Warn("Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) } if err == nil { sl.cache.forEachStale(func(lset labels.Labels) bool { // Series no longer exposed, mark it stale. + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) _, err = app.Append(0, lset, defTime, math.Float64frombits(value.StaleNaN)) + app.SetOptions(nil) switch { case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): // Do not count these in logging, as this is expected if a target @@ -1808,17 +1889,17 @@ func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucke return false, storage.ErrNotFound case errors.Is(err, storage.ErrOutOfOrderSample): appErrs.numOutOfOrder++ - level.Debug(sl.l).Log("msg", "Out of order sample", "series", string(met)) + sl.l.Debug("Out of order sample", "series", string(met)) sl.metrics.targetScrapeSampleOutOfOrder.Inc() return false, nil case errors.Is(err, storage.ErrDuplicateSampleForTimestamp): appErrs.numDuplicates++ - level.Debug(sl.l).Log("msg", "Duplicate sample for timestamp", "series", string(met)) + sl.l.Debug("Duplicate sample for timestamp", "series", string(met)) sl.metrics.targetScrapeSampleDuplicate.Inc() return false, nil case errors.Is(err, storage.ErrOutOfBounds): appErrs.numOutOfBounds++ - level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met)) + sl.l.Debug("Out of bounds metric", "series", string(met)) sl.metrics.targetScrapeSampleOutOfBounds.Inc() return false, nil case errors.Is(err, errSampleLimit): @@ -1891,7 +1972,7 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err error) { ts := timestamp.FromTime(start) - + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) stale := math.Float64frombits(value.StaleNaN) b := labels.NewBuilder(labels.EmptyLabels()) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index b703f21d46..f75e1db89a 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -29,16 +29,18 @@ import ( "strings" "sync" "testing" + "text/template" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" + "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" dto "github.com/prometheus/client_model/go" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -53,6 +55,7 @@ import ( "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/pool" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" @@ -83,6 +86,97 @@ func TestNewScrapePool(t *testing.T) { require.NotNil(t, sp.newLoop, "newLoop function not initialized.") } +func TestStorageHandlesOutOfOrderTimestamps(t *testing.T) { + // Test with default OutOfOrderTimeWindow (0) + t.Run("Out-Of-Order Sample Disabled", func(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + runScrapeLoopTest(t, s, false) + }) + + // Test with specific OutOfOrderTimeWindow (600000) + t.Run("Out-Of-Order Sample Enabled", func(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + runScrapeLoopTest(t, s, true) + }) +} + +func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrder bool) { + // Create an appender for adding samples to the storage. + app := s.Appender(context.Background()) + capp := &collectResultAppender{next: app} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return capp }, 0) + + // Current time for generating timestamps. + now := time.Now() + + // Calculate timestamps for the samples based on the current time. + now = now.Truncate(time.Minute) // round down the now timestamp to the nearest minute + timestampInorder1 := now + timestampOutOfOrder := now.Add(-5 * time.Minute) + timestampInorder2 := now.Add(5 * time.Minute) + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1`), "", timestampInorder1) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 2`), "", timestampOutOfOrder) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 3`), "", timestampInorder2) + require.NoError(t, err) + + require.NoError(t, slApp.Commit()) + + // Query the samples back from the storage. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + // Use a matcher to filter the metric name. + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + + // Define the expected results + want := []floatSample{ + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder1), + f: 1, + }, + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder2), + f: 3, + }, + } + + if expectOutOfOrder { + require.NotEqual(t, want, results, "Expected results to include out-of-order sample:\n%s", results) + } else { + require.Equal(t, want, results, "Appended samples not as expected:\n%s", results) + } +} + func TestDroppedTargetsList(t *testing.T) { var ( app = &nopAppendable{} @@ -158,6 +252,9 @@ type testLoop struct { timeout time.Duration } +func (l *testLoop) setScrapeFailureLogger(*logging.JSONFileLogger) { +} + func (l *testLoop) run(errc chan<- error) { if l.runOnce { panic("loop must be started only once") @@ -392,7 +489,7 @@ func TestScrapePoolTargetLimit(t *testing.T) { activeTargets: map[uint64]*Target{}, loops: map[uint64]loop{}, newLoop: newLoop, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), client: http.DefaultClient, metrics: newTestScrapeMetrics(t), symbolTable: labels.NewSymbolTable(), @@ -437,7 +534,7 @@ func TestScrapePoolTargetLimit(t *testing.T) { lerr := l.(*testLoop).getForcedError() if shouldErr { require.Error(t, lerr, "error was expected for %d targets with a limit of %d", targets, limit) - require.Equal(t, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit), lerr.Error()) + require.EqualError(t, lerr, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit)) } else { require.NoError(t, lerr) } @@ -680,11 +777,13 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, false, false, + false, nil, false, newTestScrapeMetrics(t), false, model.LegacyValidation, + "text/plain", ) } @@ -823,11 +922,13 @@ func TestScrapeLoopRun(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, false, model.LegacyValidation, + "text/plain", ) // The loop must terminate during the initial offset if the context @@ -968,11 +1069,13 @@ func TestScrapeLoopMetadata(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, false, model.LegacyValidation, + "text/plain", ) defer cancel() @@ -1040,6 +1143,7 @@ func TestScrapeLoopSeriesAdded(t *testing.T) { } func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { + model.NameValidationScheme = model.LegacyValidation s := teststorage.New(t) defer s.Close() ctx, cancel := context.WithCancel(context.Background()) @@ -1144,6 +1248,87 @@ func BenchmarkScrapeLoopAppendOM(b *testing.B) { } } +func TestSetOptionsHandlingStaleness(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + signal := make(chan struct{}, 1) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Function to run the scrape loop + runScrapeLoop := func(ctx context.Context, t *testing.T, cue int, action func(*scrapeLoop)) { + var ( + scraper = &testScraper{} + app = func(ctx context.Context) storage.Appender { + return s.Appender(ctx) + } + ) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + numScrapes := 0 + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + if numScrapes == cue { + action(sl) + } + w.Write([]byte(fmt.Sprintf("metric_a{a=\"1\",b=\"1\"} %d\n", 42+numScrapes))) + return nil + } + sl.run(nil) + } + go func() { + runScrapeLoop(ctx, t, 2, func(sl *scrapeLoop) { + go sl.stop() + // Wait a bit then start a new target. + time.Sleep(100 * time.Millisecond) + go func() { + runScrapeLoop(ctx, t, 4, func(_ *scrapeLoop) { + cancel() + }) + signal <- struct{}{} + }() + }) + }() + + select { + case <-signal: + case <-time.After(10 * time.Second): + t.Fatalf("Scrape wasn't stopped.") + } + + ctx1, cancel := context.WithCancel(context.Background()) + defer cancel() + + q, err := s.Querier(0, time.Now().UnixNano()) + + require.NoError(t, err) + defer q.Close() + + series := q.Select(ctx1, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + var c int + for _, s := range results { + if value.IsStaleNaN(s.f) { + c++ + } + } + require.Equal(t, 0, c, "invalid count of staleness markers after stopping the engine") +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { appender := &collectResultAppender{} var ( @@ -1520,7 +1705,8 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { fakeRef := storage.SeriesRef(1) expValue := float64(1) metric := []byte(`metric{n="1"} 1`) - p, warning := textparse.New(metric, "", false, labels.NewSymbolTable()) + p, warning := textparse.New(metric, "text/plain", "", false, false, labels.NewSymbolTable()) + require.NotNil(t, p) require.NoError(t, warning) var lset labels.Labels @@ -1840,7 +2026,7 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { func TestScrapeLoopAppendExemplar(t *testing.T) { tests := []struct { title string - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool enableNativeHistogramsIngestion bool scrapeText string contentType string @@ -1994,7 +2180,8 @@ metric: < `, contentType: "application/vnd.google.protobuf", histograms: []histogramSample{{ - t: 1234568, + t: 1234568, + metric: labels.FromStrings("__name__", "test_histogram"), h: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2108,7 +2295,7 @@ metric: < > `, - scrapeClassicHistograms: true, + alwaysScrapeClassicHist: true, contentType: "application/vnd.google.protobuf", floats: []floatSample{ {metric: labels.FromStrings("__name__", "test_histogram_count"), t: 1234568, f: 175}, @@ -2120,7 +2307,8 @@ metric: < {metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), t: 1234568, f: 175}, }, histograms: []histogramSample{{ - t: 1234568, + t: 1234568, + metric: labels.FromStrings("__name__", "test_histogram"), h: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2169,7 +2357,7 @@ metric: < sl.reportSampleMutator = func(l labels.Labels) labels.Labels { return mutateReportSampleLabels(l, discoveryLabels) } - sl.scrapeClassicHistograms = test.scrapeClassicHistograms + sl.alwaysScrapeClassicHist = test.alwaysScrapeClassicHist now := time.Now() @@ -2376,8 +2564,11 @@ func TestTargetScraperScrapeOK(t *testing.T) { expectedTimeout = "1.5" ) - var protobufParsing bool - var allowUTF8 bool + var ( + protobufParsing bool + allowUTF8 bool + qValuePattern = regexp.MustCompile(`q=([0-9]+(\.\d+)?)`) + ) server := httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -2390,6 +2581,17 @@ func TestTargetScraperScrapeOK(t *testing.T) { "Expected Accept header to prefer application/vnd.google.protobuf.") } + contentTypes := strings.Split(accept, ",") + for _, ct := range contentTypes { + match := qValuePattern.FindStringSubmatch(ct) + require.Len(t, match, 3) + qValue, err := strconv.ParseFloat(match[1], 64) + require.NoError(t, err, "Error parsing q value") + require.GreaterOrEqual(t, qValue, float64(0)) + require.LessOrEqual(t, qValue, float64(1)) + require.LessOrEqual(t, len(strings.Split(match[1], ".")[1]), 3, "q value should have at most 3 decimal places") + } + timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds") require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.") @@ -2528,7 +2730,7 @@ func TestTargetScrapeScrapeNotFound(t *testing.T) { resp, err := ts.scrape(context.Background()) require.NoError(t, err) _, err = ts.readResponse(context.Background(), resp, io.Discard) - require.Contains(t, err.Error(), "404", "Expected \"404 NotFound\" error but got: %s", err) + require.ErrorContains(t, err, "404", "Expected \"404 NotFound\" error but got: %s", err) } func TestTargetScraperBodySizeLimit(t *testing.T) { @@ -3040,7 +3242,7 @@ func TestReuseCacheRace(t *testing.T) { func TestCheckAddError(t *testing.T) { var appErrs appendErrors - sl := scrapeLoop{l: log.NewNopLogger(), metrics: newTestScrapeMetrics(t)} + sl := scrapeLoop{l: promslog.NewNopLogger(), metrics: newTestScrapeMetrics(t)} sl.checkAddError(nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs) require.Equal(t, 1, appErrs.numOutOfOrder) } @@ -3111,18 +3313,7 @@ func TestScrapeReportLimit(t *testing.T) { ScrapeTimeout: model.Duration(100 * time.Millisecond), } - var ( - scrapes int - scrapedTwice = make(chan bool) - ) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprint(w, "metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") - scrapes++ - if scrapes == 2 { - close(scrapedTwice) - } - })) + ts, scrapedTwice := newScrapableServer("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") defer ts.Close() sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) @@ -3165,6 +3356,52 @@ func TestScrapeReportLimit(t *testing.T) { require.True(t, found) } +func TestScrapeUTF8(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + model.NameValidationScheme = model.UTF8Validation + t.Cleanup(func() { model.NameValidationScheme = model.LegacyValidation }) + + cfg := &config.ScrapeConfig{ + JobName: "test", + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + MetricNameValidationScheme: config.UTF8ValidationConfig, + } + ts, scrapedTwice := newScrapableServer("{\"with.dots\"} 42\n") + defer ts.Close() + + sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped twice") + case <-scrapedTwice: + // If the target has been scraped twice, report samples from the first + // scrape have been inserted in the database. + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "with.dots")) + + require.True(t, series.Next(), "series not found in tsdb") +} + func TestScrapeLoopLabelLimit(t *testing.T) { tests := []struct { title string @@ -3361,16 +3598,7 @@ test_summary_count 199 // The expected "quantile" values do not have the trailing ".0". expectedQuantileValues := []string{"0.5", "0.9", "0.95", "0.99", "1"} - scrapeCount := 0 - scraped := make(chan bool) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprint(w, metricsText) - scrapeCount++ - if scrapeCount > 2 { - close(scraped) - } - })) + ts, scrapedTwice := newScrapableServer(metricsText) defer ts.Close() sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) @@ -3389,7 +3617,7 @@ test_summary_count 199 select { case <-time.After(5 * time.Second): t.Fatalf("target was not scraped") - case <-scraped: + case <-scrapedTwice: } ctx, cancel := context.WithCancel(context.Background()) @@ -3421,6 +3649,524 @@ test_summary_count 199 checkValues("quantile", expectedQuantileValues, series) } +// Testing whether we can automatically convert scraped classic histograms into native histograms with custom buckets. +func TestConvertClassicHistogramsToNHCB(t *testing.T) { + genTestCounterText := func(name string, value int, withMetadata bool) string { + if withMetadata { + return fmt.Sprintf(` +# HELP %s some help text +# TYPE %s counter +%s{address="0.0.0.0",port="5001"} %d +`, name, name, name, value) + } + return fmt.Sprintf(` +%s %d +`, name, value) + } + genTestHistText := func(name string, withMetadata bool) string { + data := map[string]interface{}{ + "name": name, + } + b := &bytes.Buffer{} + if withMetadata { + template.Must(template.New("").Parse(` +# HELP {{.name}} This is a histogram with default buckets +# TYPE {{.name}} histogram +`)).Execute(b, data) + } + template.Must(template.New("").Parse(` +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.005"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.01"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.025"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.05"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.1"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.25"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="1"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="2.5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="10"} 1 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="+Inf"} 1 +{{.name}}_sum{address="0.0.0.0",port="5001"} 10 +{{.name}}_count{address="0.0.0.0",port="5001"} 1 +`)).Execute(b, data) + return b.String() + } + genTestCounterProto := func(name string, value int) string { + return fmt.Sprintf(` +name: "%s" +help: "some help text" +type: COUNTER +metric: < + label: < + name: "address" + value: "0.0.0.0" + > + label: < + name: "port" + value: "5001" + > + counter: < + value: %d + > +> +`, name, value) + } + genTestHistProto := func(name string, hasClassic, hasExponential bool) string { + var classic string + if hasClassic { + classic = ` +bucket: < + cumulative_count: 0 + upper_bound: 0.005 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.01 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.025 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.05 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.1 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.25 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.5 +> +bucket: < + cumulative_count: 0 + upper_bound: 1 +> +bucket: < + cumulative_count: 0 + upper_bound: 2.5 +> +bucket: < + cumulative_count: 0 + upper_bound: 5 +> +bucket: < + cumulative_count: 1 + upper_bound: 10 +>` + } + var expo string + if hasExponential { + expo = ` +schema: 3 +zero_threshold: 2.938735877055719e-39 +zero_count: 0 +positive_span: < + offset: 2 + length: 1 +> +positive_delta: 1` + } + return fmt.Sprintf(` +name: "%s" +help: "This is a histogram with default buckets" +type: HISTOGRAM +metric: < + label: < + name: "address" + value: "0.0.0.0" + > + label: < + name: "port" + value: "5001" + > + histogram: < + sample_count: 1 + sample_sum: 10 + %s + %s + > + timestamp_ms: 1234568 +> +`, name, classic, expo) + } + + metricsTexts := map[string]struct { + text []string + contentType string + hasClassic bool + hasExponential bool + }{ + "text": { + text: []string{ + genTestCounterText("test_metric_1", 1, true), + genTestCounterText("test_metric_1_count", 1, true), + genTestCounterText("test_metric_1_sum", 1, true), + genTestCounterText("test_metric_1_bucket", 1, true), + genTestHistText("test_histogram_1", true), + genTestCounterText("test_metric_2", 1, true), + genTestCounterText("test_metric_2_count", 1, true), + genTestCounterText("test_metric_2_sum", 1, true), + genTestCounterText("test_metric_2_bucket", 1, true), + genTestHistText("test_histogram_2", true), + genTestCounterText("test_metric_3", 1, true), + genTestCounterText("test_metric_3_count", 1, true), + genTestCounterText("test_metric_3_sum", 1, true), + genTestCounterText("test_metric_3_bucket", 1, true), + genTestHistText("test_histogram_3", true), + }, + hasClassic: true, + }, + "text, in different order": { + text: []string{ + genTestCounterText("test_metric_1", 1, true), + genTestCounterText("test_metric_1_count", 1, true), + genTestCounterText("test_metric_1_sum", 1, true), + genTestCounterText("test_metric_1_bucket", 1, true), + genTestHistText("test_histogram_1", true), + genTestCounterText("test_metric_2", 1, true), + genTestCounterText("test_metric_2_count", 1, true), + genTestCounterText("test_metric_2_sum", 1, true), + genTestCounterText("test_metric_2_bucket", 1, true), + genTestHistText("test_histogram_2", true), + genTestHistText("test_histogram_3", true), + genTestCounterText("test_metric_3", 1, true), + genTestCounterText("test_metric_3_count", 1, true), + genTestCounterText("test_metric_3_sum", 1, true), + genTestCounterText("test_metric_3_bucket", 1, true), + }, + hasClassic: true, + }, + "protobuf": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", true, false), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", true, false), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", true, false), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + }, + "protobuf, in different order": { + text: []string{ + genTestHistProto("test_histogram_1", true, false), + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_2", true, false), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_3", true, false), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + }, + "protobuf, with additional native exponential histogram": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", true, true), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", true, true), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", true, true), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + hasExponential: true, + }, + "protobuf, with only native exponential histogram": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", false, true), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", false, true), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", false, true), + }, + contentType: "application/vnd.google.protobuf", + hasExponential: true, + }, + } + + checkBucketValues := func(expectedCount int, series storage.SeriesSet) { + labelName := "le" + var expectedValues []string + if expectedCount > 0 { + expectedValues = []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1.0", "2.5", "5.0", "10.0", "+Inf"} + } + foundLeValues := map[string]bool{} + + for series.Next() { + s := series.At() + v := s.Labels().Get(labelName) + require.NotContains(t, foundLeValues, v, "duplicate label value found") + foundLeValues[v] = true + } + + require.Equal(t, len(expectedValues), len(foundLeValues), "unexpected number of label values, expected %v but found %v", expectedValues, foundLeValues) + for _, v := range expectedValues { + require.Contains(t, foundLeValues, v, "label value not found") + } + } + + // Checks that the expected series is present and runs a basic sanity check of the float values. + checkFloatSeries := func(series storage.SeriesSet, expectedCount int, expectedFloat float64) { + count := 0 + for series.Next() { + i := series.At().Iterator(nil) + loop: + for { + switch i.Next() { + case chunkenc.ValNone: + break loop + case chunkenc.ValFloat: + _, f := i.At() + require.Equal(t, expectedFloat, f) + case chunkenc.ValHistogram: + panic("unexpected value type: histogram") + case chunkenc.ValFloatHistogram: + panic("unexpected value type: float histogram") + default: + panic("unexpected value type") + } + } + count++ + } + require.Equal(t, expectedCount, count, "number of float series not as expected") + } + + // Checks that the expected series is present and runs a basic sanity check of the histogram values. + checkHistSeries := func(series storage.SeriesSet, expectedCount int, expectedSchema int32) { + count := 0 + for series.Next() { + i := series.At().Iterator(nil) + loop: + for { + switch i.Next() { + case chunkenc.ValNone: + break loop + case chunkenc.ValFloat: + panic("unexpected value type: float") + case chunkenc.ValHistogram: + _, h := i.AtHistogram(nil) + require.Equal(t, expectedSchema, h.Schema) + require.Equal(t, uint64(1), h.Count) + require.Equal(t, 10.0, h.Sum) + case chunkenc.ValFloatHistogram: + _, h := i.AtFloatHistogram(nil) + require.Equal(t, expectedSchema, h.Schema) + require.Equal(t, uint64(1), h.Count) + require.Equal(t, 10.0, h.Sum) + default: + panic("unexpected value type") + } + } + count++ + } + require.Equal(t, expectedCount, count, "number of histogram series not as expected") + } + + for metricsTextName, metricsText := range metricsTexts { + for name, tc := range map[string]struct { + alwaysScrapeClassicHistograms bool + convertClassicHistToNHCB bool + }{ + "convert with scrape": { + alwaysScrapeClassicHistograms: true, + convertClassicHistToNHCB: true, + }, + "convert without scrape": { + alwaysScrapeClassicHistograms: false, + convertClassicHistToNHCB: true, + }, + "scrape without convert": { + alwaysScrapeClassicHistograms: true, + convertClassicHistToNHCB: false, + }, + "neither scrape nor convert": { + alwaysScrapeClassicHistograms: false, + convertClassicHistToNHCB: false, + }, + } { + var expectedClassicHistCount, expectedNativeHistCount int + var expectCustomBuckets bool + if metricsText.hasExponential { + expectedNativeHistCount = 1 + expectCustomBuckets = false + expectedClassicHistCount = 0 + if metricsText.hasClassic && tc.alwaysScrapeClassicHistograms { + expectedClassicHistCount = 1 + } + } else if metricsText.hasClassic { + switch { + case tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: + expectedClassicHistCount = 1 + expectedNativeHistCount = 1 + expectCustomBuckets = true + case !tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: + expectedClassicHistCount = 0 + expectedNativeHistCount = 1 + expectCustomBuckets = true + case !tc.convertClassicHistToNHCB: + expectedClassicHistCount = 1 + expectedNativeHistCount = 0 + } + } + + t.Run(fmt.Sprintf("%s with %s", name, metricsTextName), func(t *testing.T) { + simpleStorage := teststorage.New(t) + defer simpleStorage.Close() + + config := &config.ScrapeConfig{ + JobName: "test", + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(50 * time.Millisecond), + ScrapeTimeout: model.Duration(25 * time.Millisecond), + AlwaysScrapeClassicHistograms: tc.alwaysScrapeClassicHistograms, + ConvertClassicHistogramsToNHCB: tc.convertClassicHistToNHCB, + } + + scrapeCount := 0 + scraped := make(chan bool) + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if metricsText.contentType != "" { + w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) + for _, text := range metricsText.text { + buf := &bytes.Buffer{} + // In case of protobuf, we have to create the binary representation. + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(text, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintBuf := binary.AppendUvarint(nil, uint64(len(protoBuf))) + buf.Write(varintBuf) + buf.Write(protoBuf) + w.Write(buf.Bytes()) + } + } else { + for _, text := range metricsText.text { + fmt.Fprint(w, text) + } + } + scrapeCount++ + if scrapeCount > 2 { + close(scraped) + } + })) + defer ts.Close() + + sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + require.Len(t, sp.ActiveTargets(), 1) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped") + case <-scraped: + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := simpleStorage.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + var series storage.SeriesSet + + for i := 1; i <= 3; i++ { + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_count", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_sum", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_bucket", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_count", i))) + checkFloatSeries(series, expectedClassicHistCount, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_sum", i))) + checkFloatSeries(series, expectedClassicHistCount, 10.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_bucket", i))) + checkBucketValues(expectedClassicHistCount, series) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d", i))) + + var expectedSchema int32 + if expectCustomBuckets { + expectedSchema = histogram.CustomBucketsSchema + } else { + expectedSchema = 3 + } + checkHistSeries(series, expectedNativeHistCount, expectedSchema) + } + }) + } + } +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) { appender := &collectResultAppender{} var ( @@ -3458,7 +4204,6 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t * case <-time.After(5 * time.Second): t.Fatalf("Scrape wasn't stopped.") } - // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for // each scrape successful or not. require.Len(t, appender.resultFloats, 27, "Appended samples not as expected:\n%s", appender) @@ -3726,7 +4471,9 @@ func TestNativeHistogramMaxSchemaSet(t *testing.T) { }, } for name, tc := range testcases { + tc := tc t.Run(name, func(t *testing.T) { + t.Parallel() testNativeHistogramMaxSchemaSet(t, tc.minBucketFactor, tc.expectedSchema) }) } @@ -3768,8 +4515,9 @@ func testNativeHistogramMaxSchemaSet(t *testing.T, minBucketFactor string, expec // Create a scrape loop with the HTTP server as the target. configStr := fmt.Sprintf(` global: - scrape_interval: 1s - scrape_timeout: 1s + metric_name_validation_scheme: legacy + scrape_interval: 50ms + scrape_timeout: 25ms scrape_configs: - job_name: test %s @@ -3782,9 +4530,9 @@ scrape_configs: s.DB.EnableNativeHistograms() reg := prometheus.NewRegistry() - mng, err := NewManager(&Options{EnableNativeHistogramsIngestion: true}, nil, s, reg) + mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, nil, nil, s, reg) require.NoError(t, err) - cfg, err := config.Load(configStr, false, log.NewNopLogger()) + cfg, err := config.Load(configStr, promslog.NewNopLogger()) require.NoError(t, err) mng.ApplyConfig(cfg) tsets := make(chan map[string][]*targetgroup.Group) @@ -3813,7 +4561,7 @@ scrape_configs: countSeries++ } return countSeries > 0 - }, 15*time.Second, 100*time.Millisecond) + }, 5*time.Second, 100*time.Millisecond) // Check that native histogram schema is as expected. q, err := s.Querier(0, math.MaxInt64) @@ -3838,3 +4586,174 @@ scrape_configs: require.Equal(t, expectedSchema, h.Schema) } } + +func TestTargetScrapeConfigWithLabels(t *testing.T) { + const ( + configTimeout = 1500 * time.Millisecond + expectedTimeout = "1.5" + expectedTimeoutLabel = "1s500ms" + secondTimeout = 500 * time.Millisecond + secondTimeoutLabel = "500ms" + expectedParam = "value1" + secondParam = "value2" + expectedPath = "/metric-ok" + secondPath = "/metric-nok" + httpScheme = "http" + paramLabel = "__param_param" + jobName = "test" + ) + + createTestServer := func(t *testing.T, done chan struct{}) *url.URL { + server := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer close(done) + require.Equal(t, expectedTimeout, r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds")) + require.Equal(t, expectedParam, r.URL.Query().Get("param")) + require.Equal(t, expectedPath, r.URL.Path) + + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + w.Write([]byte("metric_a 1\nmetric_b 2\n")) + }), + ) + t.Cleanup(server.Close) + serverURL, err := url.Parse(server.URL) + require.NoError(t, err) + return serverURL + } + + run := func(t *testing.T, cfg *config.ScrapeConfig, targets []*targetgroup.Group) chan struct{} { + done := make(chan struct{}) + srvURL := createTestServer(t, done) + + // Update target addresses to use the dynamically created server URL. + for _, target := range targets { + for i := range target.Targets { + target.Targets[i][model.AddressLabel] = model.LabelValue(srvURL.Host) + } + } + + sp, err := newScrapePool(cfg, &nopAppendable{}, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + t.Cleanup(sp.stop) + + sp.Sync(targets) + return done + } + + cases := []struct { + name string + cfg *config.ScrapeConfig + targets []*targetgroup.Group + }{ + { + name: "Everything in scrape config", + cfg: &config.ScrapeConfig{ + ScrapeInterval: model.Duration(2 * time.Second), + ScrapeTimeout: model.Duration(configTimeout), + Params: url.Values{"param": []string{expectedParam}}, + JobName: jobName, + Scheme: httpScheme, + MetricsPath: expectedPath, + }, + targets: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + {model.AddressLabel: model.LabelValue("")}, + }, + }, + }, + }, + { + name: "Overridden in target", + cfg: &config.ScrapeConfig{ + ScrapeInterval: model.Duration(2 * time.Second), + ScrapeTimeout: model.Duration(secondTimeout), + JobName: jobName, + Scheme: httpScheme, + MetricsPath: secondPath, + Params: url.Values{"param": []string{secondParam}}, + }, + targets: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + { + model.AddressLabel: model.LabelValue(""), + model.ScrapeTimeoutLabel: expectedTimeoutLabel, + model.MetricsPathLabel: expectedPath, + paramLabel: expectedParam, + }, + }, + }, + }, + }, + { + name: "Overridden in relabel_config", + cfg: &config.ScrapeConfig{ + ScrapeInterval: model.Duration(2 * time.Second), + ScrapeTimeout: model.Duration(secondTimeout), + JobName: jobName, + Scheme: httpScheme, + MetricsPath: secondPath, + Params: url.Values{"param": []string{secondParam}}, + RelabelConfigs: []*relabel.Config{ + { + Action: relabel.DefaultRelabelConfig.Action, + Regex: relabel.DefaultRelabelConfig.Regex, + SourceLabels: relabel.DefaultRelabelConfig.SourceLabels, + TargetLabel: model.ScrapeTimeoutLabel, + Replacement: expectedTimeoutLabel, + }, + { + Action: relabel.DefaultRelabelConfig.Action, + Regex: relabel.DefaultRelabelConfig.Regex, + SourceLabels: relabel.DefaultRelabelConfig.SourceLabels, + TargetLabel: paramLabel, + Replacement: expectedParam, + }, + { + Action: relabel.DefaultRelabelConfig.Action, + Regex: relabel.DefaultRelabelConfig.Regex, + SourceLabels: relabel.DefaultRelabelConfig.SourceLabels, + TargetLabel: model.MetricsPathLabel, + Replacement: expectedPath, + }, + }, + }, + targets: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + { + model.AddressLabel: model.LabelValue(""), + model.ScrapeTimeoutLabel: secondTimeoutLabel, + model.MetricsPathLabel: secondPath, + paramLabel: secondParam, + }, + }, + }, + }, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + select { + case <-run(t, c.cfg, c.targets): + case <-time.After(10 * time.Second): + t.Fatal("timeout after 10 seconds") + } + }) + } +} + +func newScrapableServer(scrapeText string) (s *httptest.Server, scrapedTwice chan bool) { + var scrapes int + scrapedTwice = make(chan bool) + + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, scrapeText) + scrapes++ + if scrapes == 2 { + close(scrapedTwice) + } + })), scrapedTwice +} diff --git a/scrape/target.go b/scrape/target.go index 9ef4471fbd..06d4737ff9 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -17,7 +17,6 @@ import ( "errors" "fmt" "hash/fnv" - "net" "net/url" "strings" "sync" @@ -424,7 +423,7 @@ func (app *maxSchemaAppender) AppendHistogram(ref storage.SeriesRef, lset labels // PopulateLabels builds a label set from the given label set and scrape configuration. // It returns a label set before relabeling was applied as the second return value. // Returns the original discovered label set found before relabelling was applied if the target is dropped during relabeling. -func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort bool) (res, orig labels.Labels, err error) { +func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig) (res, orig labels.Labels, err error) { // Copy labels into the labelset for the target if they are not set already. scrapeLabels := []labels.Label{ {Name: model.JobLabel, Value: cfg.JobName}, @@ -441,8 +440,8 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort } // Encode scrape query parameters as labels. for k, v := range cfg.Params { - if len(v) > 0 { - lb.Set(model.ParamLabelPrefix+k, v[0]) + if name := model.ParamLabelPrefix + k; len(v) > 0 && lb.Get(name) == "" { + lb.Set(name, v[0]) } } @@ -457,51 +456,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort return labels.EmptyLabels(), labels.EmptyLabels(), errors.New("no address") } - // addPort checks whether we should add a default port to the address. - // If the address is not valid, we don't append a port either. - addPort := func(s string) (string, string, bool) { - // If we can split, a port exists and we don't have to add one. - if host, port, err := net.SplitHostPort(s); err == nil { - return host, port, false - } - // If adding a port makes it valid, the previous error - // was not due to an invalid address and we can append a port. - _, _, err := net.SplitHostPort(s + ":1234") - return "", "", err == nil - } - addr := lb.Get(model.AddressLabel) - scheme := lb.Get(model.SchemeLabel) - host, port, add := addPort(addr) - // If it's an address with no trailing port, infer it based on the used scheme - // unless the no-default-scrape-port feature flag is present. - if !noDefaultPort && add { - // Addresses reaching this point are already wrapped in [] if necessary. - switch scheme { - case "http", "": - addr += ":80" - case "https": - addr += ":443" - default: - return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("invalid scheme: %q", cfg.Scheme) - } - lb.Set(model.AddressLabel, addr) - } - - if noDefaultPort { - // If it's an address with a trailing default port and the - // no-default-scrape-port flag is present, remove the port. - switch port { - case "80": - if scheme == "http" { - lb.Set(model.AddressLabel, host) - } - case "443": - if scheme == "https" { - lb.Set(model.AddressLabel, host) - } - } - } if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil { return labels.EmptyLabels(), labels.EmptyLabels(), err @@ -557,7 +512,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort } // TargetsFromGroup builds targets based on the given TargetGroup and config. -func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, noDefaultPort bool, targets []*Target, lb *labels.Builder) ([]*Target, []error) { +func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, targets []*Target, lb *labels.Builder) ([]*Target, []error) { targets = targets[:0] failures := []error{} @@ -573,7 +528,7 @@ func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, noDefault } } - lset, origLabels, err := PopulateLabels(lb, cfg, noDefaultPort) + lset, origLabels, err := PopulateLabels(lb, cfg) if err != nil { failures = append(failures, fmt.Errorf("instance %d in group %s: %w", i, tg, err)) } diff --git a/scrape/target_test.go b/scrape/target_test.go index 84fe078b2b..bd27952874 100644 --- a/scrape/target_test.go +++ b/scrape/target_test.go @@ -348,7 +348,7 @@ func TestTargetsFromGroup(t *testing.T) { ScrapeInterval: model.Duration(1 * time.Minute), } lb := labels.NewBuilder(labels.EmptyLabels()) - targets, failures := TargetsFromGroup(&targetgroup.Group{Targets: []model.LabelSet{{}, {model.AddressLabel: "localhost:9090"}}}, &cfg, false, nil, lb) + targets, failures := TargetsFromGroup(&targetgroup.Group{Targets: []model.LabelSet{{}, {model.AddressLabel: "localhost:9090"}}}, &cfg, nil, lb) require.Len(t, targets, 1) require.Len(t, failures, 1) require.EqualError(t, failures[0], expectedError) @@ -435,7 +435,7 @@ scrape_configs: lb := labels.NewBuilder(labels.EmptyLabels()) group := &targetgroup.Group{Targets: targets} for i := 0; i < b.N; i++ { - tgets, _ = TargetsFromGroup(group, config.ScrapeConfigs[0], false, tgets, lb) + tgets, _ = TargetsFromGroup(group, config.ScrapeConfigs[0], tgets, lb) if len(targets) != nTargets { b.Fatalf("Expected %d targets, got %d", nTargets, len(targets)) } diff --git a/scripts/compress_assets.sh b/scripts/compress_assets.sh index 6608677bbf..19e1e22486 100755 --- a/scripts/compress_assets.sh +++ b/scripts/compress_assets.sh @@ -4,6 +4,12 @@ set -euo pipefail +export STATIC_DIR=static +PREBUILT_ASSETS_STATIC_DIR=${PREBUILT_ASSETS_STATIC_DIR:-} +if [ -n "$PREBUILT_ASSETS_STATIC_DIR" ]; then + STATIC_DIR=$(realpath $PREBUILT_ASSETS_STATIC_DIR) +fi + cd web/ui cp embed.go.tmpl embed.go @@ -11,6 +17,19 @@ GZIP_OPTS="-fk" # gzip option '-k' may not always exist in the latest gzip available on different distros. if ! gzip -k -h &>/dev/null; then GZIP_OPTS="-f"; fi +mkdir -p static find static -type f -name '*.gz' -delete -find static -type f -exec gzip $GZIP_OPTS '{}' \; -print0 | xargs -0 -I % echo %.gz | sort | xargs echo //go:embed >> embed.go + +# Compress files from the prebuilt static directory and replicate the structure in the current static directory +find "${STATIC_DIR}" -type f ! -name '*.gz' -exec bash -c ' + for file; do + dest="${file#${STATIC_DIR}}" + mkdir -p "static/$(dirname "$dest")" + gzip '"$GZIP_OPTS"' "$file" -c > "static/${dest}.gz" + done +' bash {} + + +# Append the paths of gzipped files to embed.go +find static -type f -name '*.gz' -print0 | sort -z | xargs -0 echo //go:embed >> embed.go + echo var EmbedFS embed.FS >> embed.go diff --git a/scripts/get_module_version.sh b/scripts/get_module_version.sh new file mode 100755 index 0000000000..e385587044 --- /dev/null +++ b/scripts/get_module_version.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# if no version string is passed as an argument, read VERSION file +if [ $# -eq 0 ]; then + VERSION="$(< VERSION)" +else + VERSION=$1 +fi + + +# Remove leading 'v' if present +VERSION="${VERSION#v}" + +# Extract MAJOR, MINOR, and REST +MAJOR="${VERSION%%.*}" +MINOR="${VERSION#*.}"; MINOR="${MINOR%%.*}" +REST="${VERSION#*.*.}" + +# Format and output based on MAJOR version +if [[ "$MAJOR" == "2" ]]; then + echo "0.$MINOR.$REST" +elif [[ "$MAJOR" == "3" ]]; then + printf "0.3%02d.$REST\n" "$MINOR" +fi diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index a15cfc97f0..1c099932ba 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: Install Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: diff --git a/scripts/ui_release.sh b/scripts/ui_release.sh index ea4423d257..c1b872fd37 100755 --- a/scripts/ui_release.sh +++ b/scripts/ui_release.sh @@ -30,8 +30,8 @@ function publish() { cmd+=" --dry-run" fi for workspace in ${workspaces}; do - # package "app" is private so we shouldn't try to publish it. - if [[ "${workspace}" != "react-app" ]]; then + # package "mantine-ui" is private so we shouldn't try to publish it. + if [[ "${workspace}" != "mantine-ui" ]]; then cd "${workspace}" eval "${cmd}" cd "${root_ui_folder}" diff --git a/storage/buffer.go b/storage/buffer.go index 4c9b004ab6..e847c10e61 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -253,9 +253,9 @@ type sampleRing struct { delta int64 // Lookback buffers. We use iBuf for mixed samples, but one of the three - // concrete ones for homogenous samples. (Only one of the four bufs is + // concrete ones for homogeneous samples. (Only one of the four bufs is // allowed to be populated!) This avoids the overhead of the interface - // wrapper for the happy (and by far most common) case of homogenous + // wrapper for the happy (and by far most common) case of homogeneous // samples. iBuf []chunks.Sample fBuf []fSample @@ -280,7 +280,7 @@ const ( fhBuf ) -// newSampleRing creates a new sampleRing. If you do not know the prefereed +// newSampleRing creates a new sampleRing. If you do not know the preferred // value type yet, use a size of 0 (in which case the provided typ doesn't // matter). On the first add, a buffer of size 16 will be allocated with the // preferred type being the type of the first added sample. @@ -626,7 +626,7 @@ func addF(s fSample, buf []fSample, r *sampleRing) []fSample { return buf } -// addF adds an hSample to a (specialized) hSample buffer. +// addH adds an hSample to a (specialized) hSample buffer. func addH(s hSample, buf []hSample, r *sampleRing) []hSample { l := len(buf) // Grow the ring buffer if it fits no more elements. diff --git a/storage/fanout.go b/storage/fanout.go index e52342bc7e..4d076788a7 100644 --- a/storage/fanout.go +++ b/storage/fanout.go @@ -15,9 +15,8 @@ package storage import ( "context" + "log/slog" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/exemplar" @@ -28,7 +27,7 @@ import ( ) type fanout struct { - logger log.Logger + logger *slog.Logger primary Storage secondaries []Storage @@ -43,7 +42,7 @@ type fanout struct { // and the error from the secondary querier will be returned as a warning. // // NOTE: In the case of Prometheus, it treats all remote storages as secondary / best effort. -func NewFanout(logger log.Logger, primary Storage, secondaries ...Storage) Storage { +func NewFanout(logger *slog.Logger, primary Storage, secondaries ...Storage) Storage { return &fanout{ logger: logger, primary: primary, @@ -142,12 +141,22 @@ func (f *fanout) Close() error { // fanoutAppender implements Appender. type fanoutAppender struct { - logger log.Logger + logger *slog.Logger primary Appender secondaries []Appender } +// SetOptions propagates the hints to both primary and secondary appenders. +func (f *fanoutAppender) SetOptions(opts *AppendOptions) { + if f.primary != nil { + f.primary.SetOptions(opts) + } + for _, appender := range f.secondaries { + appender.SetOptions(opts) + } +} + func (f *fanoutAppender) Append(ref SeriesRef, l labels.Labels, t int64, v float64) (SeriesRef, error) { ref, err := f.primary.Append(ref, l, t, v) if err != nil { @@ -190,6 +199,20 @@ func (f *fanoutAppender) AppendHistogram(ref SeriesRef, l labels.Labels, t int64 return ref, nil } +func (f *fanoutAppender) AppendHistogramCTZeroSample(ref SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) { + ref, err := f.primary.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) + if err != nil { + return ref, err + } + + for _, appender := range f.secondaries { + if _, err := appender.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh); err != nil { + return 0, err + } + } + return ref, nil +} + func (f *fanoutAppender) UpdateMetadata(ref SeriesRef, l labels.Labels, m metadata.Metadata) (SeriesRef, error) { ref, err := f.primary.UpdateMetadata(ref, l, m) if err != nil { @@ -226,7 +249,7 @@ func (f *fanoutAppender) Commit() (err error) { err = appender.Commit() } else { if rollbackErr := appender.Rollback(); rollbackErr != nil { - level.Error(f.logger).Log("msg", "Squashed rollback error on commit", "err", rollbackErr) + f.logger.Error("Squashed rollback error on commit", "err", rollbackErr) } } } @@ -242,7 +265,7 @@ func (f *fanoutAppender) Rollback() (err error) { case err == nil: err = rollbackErr case rollbackErr != nil: - level.Error(f.logger).Log("msg", "Squashed rollback error on rollback", "err", rollbackErr) + f.logger.Error("Squashed rollback error on rollback", "err", rollbackErr) } } return nil diff --git a/storage/fanout_test.go b/storage/fanout_test.go index 4613fe7572..3eef9e3cd0 100644 --- a/storage/fanout_test.go +++ b/storage/fanout_test.go @@ -173,16 +173,13 @@ func TestFanoutErrors(t *testing.T) { } if tc.err != nil { - require.Error(t, ss.Err()) - require.Equal(t, tc.err.Error(), ss.Err().Error()) + require.EqualError(t, ss.Err(), tc.err.Error()) } if tc.warning != nil { - require.NotEmpty(t, ss.Warnings(), "warnings expected") w := ss.Warnings() - require.Error(t, w.AsErrors()[0]) - warn, _ := w.AsStrings("", 0, 0) - require.Equal(t, tc.warning.Error(), warn[0]) + require.NotEmpty(t, w, "warnings expected") + require.EqualError(t, w.AsErrors()[0], tc.warning.Error()) } }) t.Run("chunks", func(t *testing.T) { @@ -200,16 +197,13 @@ func TestFanoutErrors(t *testing.T) { } if tc.err != nil { - require.Error(t, ss.Err()) - require.Equal(t, tc.err.Error(), ss.Err().Error()) + require.EqualError(t, ss.Err(), tc.err.Error()) } if tc.warning != nil { - require.NotEmpty(t, ss.Warnings(), "warnings expected") w := ss.Warnings() - require.Error(t, w.AsErrors()[0]) - warn, _ := w.AsStrings("", 0, 0) - require.Equal(t, tc.warning.Error(), warn[0]) + require.NotEmpty(t, w, "warnings expected") + require.EqualError(t, w.AsErrors()[0], tc.warning.Error()) } }) } diff --git a/storage/interface.go b/storage/interface.go index 8035caaa6d..0634acb092 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -50,7 +50,8 @@ var ( // NOTE(bwplotka): This can be both an instrumentation failure or commonly expected // behaviour, and we currently don't have a way to determine this. As a result // it's recommended to ignore this error for now. - ErrOutOfOrderCT = fmt.Errorf("created timestamp out of order, ignoring") + ErrOutOfOrderCT = fmt.Errorf("created timestamp out of order, ignoring") + ErrCTNewerThanSample = fmt.Errorf("CT is newer or the same as sample's timestamp, ignoring") ) // SeriesRef is a generic series reference. In prometheus it is either a @@ -113,6 +114,8 @@ type Querier interface { LabelQuerier // Select returns a set of series that matches the given label matchers. + // Results are not checked whether they match. Results that do not match + // may cause undefined behavior. // Caller can specify if it requires returned series to be sorted. Prefer not requiring sorting for better performance. // It allows passing hints that can help in optimising select, but it's up to implementation how this is used if used at all. Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) SeriesSet @@ -151,6 +154,8 @@ type ChunkQuerier interface { LabelQuerier // Select returns a set of series that matches the given label matchers. + // Results are not checked whether they match. Results that do not match + // may cause undefined behavior. // Caller can specify if it requires returned series to be sorted. Prefer not requiring sorting for better performance. // It allows passing hints that can help in optimising select, but it's up to implementation how this is used if used at all. Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) ChunkSeriesSet @@ -158,7 +163,7 @@ type ChunkQuerier interface { // LabelQuerier provides querying access over labels. type LabelQuerier interface { - // LabelValues returns all potential values for a label name. + // LabelValues returns all potential values for a label name in sorted order. // It is not safe to use the strings beyond the lifetime of the querier. // If matchers are specified the returned result set is reduced // to label values of metrics matching the matchers. @@ -238,6 +243,10 @@ func (f QueryableFunc) Querier(mint, maxt int64) (Querier, error) { return f(mint, maxt) } +type AppendOptions struct { + DiscardOutOfOrder bool +} + // Appender provides batched appends against a storage. // It must be completed with a call to Commit or Rollback and must not be reused afterwards. // @@ -266,6 +275,10 @@ type Appender interface { // Appender has to be discarded after rollback. Rollback() error + // SetOptions configures the appender with specific append options such as + // discarding out-of-order samples even if out-of-order is enabled in the TSDB. + SetOptions(opts *AppendOptions) + ExemplarAppender HistogramAppender MetadataUpdater @@ -313,6 +326,20 @@ type HistogramAppender interface { // pointer. AppendHistogram won't mutate the histogram, but in turn // depends on the caller to not mutate it either. AppendHistogram(ref SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) + // AppendHistogramCTZeroSample adds synthetic zero sample for the given ct timestamp, + // which will be associated with given series, labels and the incoming + // sample's t (timestamp). AppendHistogramCTZeroSample returns error if zero sample can't be + // appended, for example when ct is too old, or when it would collide with + // incoming sample (sample has priority). + // + // AppendHistogramCTZeroSample has to be called before the corresponding histogram AppendHistogram. + // A series reference number is returned which can be used to modify the + // CT for the given series in the same or later transactions. + // Returned reference numbers are ephemeral and may be rejected in calls + // to AppendHistogramCTZeroSample() at any point. + // + // If the reference is 0 it must not be used for caching. + AppendHistogramCTZeroSample(ref SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) } // MetadataUpdater provides an interface for associating metadata to stored series. diff --git a/storage/merge.go b/storage/merge.go index 318f22696e..3144a0f648 100644 --- a/storage/merge.go +++ b/storage/merge.go @@ -153,13 +153,18 @@ func (q *mergeGenericQuerier) Select(ctx context.Context, sortSeries bool, hints ) // Schedule all Selects for all queriers we know about. for _, querier := range q.queriers { + // copy the matchers as some queriers may alter the slice. + // See https://github.com/prometheus/prometheus/issues/14723 + matchersCopy := make([]*labels.Matcher, len(matchers)) + copy(matchersCopy, matchers) + wg.Add(1) - go func(qr genericQuerier) { + go func(qr genericQuerier, m []*labels.Matcher) { defer wg.Done() // We need to sort for NewMergeSeriesSet to work. - seriesSetChan <- qr.Select(ctx, true, hints, matchers...) - }(querier) + seriesSetChan <- qr.Select(ctx, true, hints, m...) + }(querier, matchersCopy) } go func() { wg.Wait() diff --git a/storage/remote/azuread/azuread_test.go b/storage/remote/azuread/azuread_test.go index 7c97138120..08870382ec 100644 --- a/storage/remote/azuread/azuread_test.go +++ b/storage/remote/azuread/azuread_test.go @@ -68,7 +68,7 @@ func (ad *AzureAdTestSuite) TestAzureAdRoundTripper() { cases := []struct { cfg *AzureADConfig }{ - // AzureAd roundtripper with Managedidentity. + // AzureAd roundtripper with ManagedIdentity. { cfg: &AzureADConfig{ Cloud: "AzurePublic", diff --git a/storage/remote/chunked_test.go b/storage/remote/chunked_test.go index 7c3993ca62..82ed866345 100644 --- a/storage/remote/chunked_test.go +++ b/storage/remote/chunked_test.go @@ -86,7 +86,7 @@ func TestChunkedReader_Overflow(t *testing.T) { _, err = NewChunkedReader(bytes.NewReader(b2), 11, nil).Next() require.Error(t, err, "expect exceed limit error") - require.Equal(t, "chunkedReader: message size exceeded the limit 11 bytes; got: 12 bytes", err.Error()) + require.EqualError(t, err, "chunkedReader: message size exceeded the limit 11 bytes; got: 12 bytes") } func TestChunkedReader_CorruptedFrame(t *testing.T) { @@ -102,5 +102,5 @@ func TestChunkedReader_CorruptedFrame(t *testing.T) { _, err = NewChunkedReader(bytes.NewReader(bs), 20, nil).Next() require.Error(t, err, "expected malformed frame") - require.Equal(t, "chunkedReader: corrupted frame; checksum mismatch", err.Error()) + require.EqualError(t, err, "chunkedReader: corrupted frame; checksum mismatch") } diff --git a/storage/remote/client.go b/storage/remote/client.go index 62218cfba9..23775122e5 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "net/http" + "net/http/httptrace" "strconv" "strings" "time" @@ -31,6 +32,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/common/sigv4" "github.com/prometheus/common/version" + "go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace" @@ -213,8 +215,11 @@ func NewWriteClient(name string, conf *ClientConfig) (WriteClient, error) { if conf.WriteProtoMsg != "" { writeProtoMsg = conf.WriteProtoMsg } - - httpClient.Transport = otelhttp.NewTransport(t) + httpClient.Transport = otelhttp.NewTransport( + t, + otelhttp.WithClientTrace(func(ctx context.Context) *httptrace.ClientTrace { + return otelhttptrace.NewClientTrace(ctx, otelhttptrace.WithoutSubSpans()) + })) return &Client{ remoteName: name, urlString: conf.URL.String(), diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index 404f1add75..c2fe6186ce 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -20,9 +20,9 @@ import ( "sync" "testing" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -253,8 +253,7 @@ func TestValidateLabelsAndMetricName(t *testing.T) { t.Run(test.description, func(t *testing.T) { err := validateLabelsAndMetricName(test.input) if test.expectedErr != "" { - require.Error(t, err) - require.Equal(t, test.expectedErr, err.Error()) + require.EqualError(t, err, test.expectedErr) } else { require.NoError(t, err) } @@ -551,7 +550,7 @@ func TestNegotiateResponseType(t *testing.T) { _, err = NegotiateResponseType([]prompb.ReadRequest_ResponseType{20}) require.Error(t, err, "expected error due to not supported requested response types") - require.Equal(t, "server does not support any of the requested response types: [20]; supported: map[SAMPLES:{} STREAMED_XOR_CHUNKS:{}]", err.Error()) + require.EqualError(t, err, "server does not support any of the requested response types: [20]; supported: map[SAMPLES:{} STREAMED_XOR_CHUNKS:{}]") } func TestMergeLabels(t *testing.T) { @@ -583,7 +582,7 @@ func TestDecodeWriteRequest(t *testing.T) { } func TestDecodeWriteV2Request(t *testing.T) { - buf, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + buf, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) actual, err := DecodeWriteV2Request(bytes.NewReader(buf)) diff --git a/storage/remote/metadata_watcher.go b/storage/remote/metadata_watcher.go index fdcd668f56..9306dcb4c2 100644 --- a/storage/remote/metadata_watcher.go +++ b/storage/remote/metadata_watcher.go @@ -16,11 +16,11 @@ package remote import ( "context" "errors" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/scrape" ) @@ -44,7 +44,7 @@ func (noop *noopScrapeManager) Get() (*scrape.Manager, error) { // MetadataWatcher watches the Scrape Manager for a given WriteMetadataTo. type MetadataWatcher struct { name string - logger log.Logger + logger *slog.Logger managerGetter ReadyScrapeManager manager Watchable @@ -62,9 +62,9 @@ type MetadataWatcher struct { } // NewMetadataWatcher builds a new MetadataWatcher. -func NewMetadataWatcher(l log.Logger, mg ReadyScrapeManager, name string, w MetadataAppender, interval model.Duration, deadline time.Duration) *MetadataWatcher { +func NewMetadataWatcher(l *slog.Logger, mg ReadyScrapeManager, name string, w MetadataAppender, interval model.Duration, deadline time.Duration) *MetadataWatcher { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if mg == nil { @@ -87,7 +87,7 @@ func NewMetadataWatcher(l log.Logger, mg ReadyScrapeManager, name string, w Meta // Start the MetadataWatcher. func (mw *MetadataWatcher) Start() { - level.Info(mw.logger).Log("msg", "Starting scraped metadata watcher") + mw.logger.Info("Starting scraped metadata watcher") mw.hardShutdownCtx, mw.hardShutdownCancel = context.WithCancel(context.Background()) mw.softShutdownCtx, mw.softShutdownCancel = context.WithCancel(mw.hardShutdownCtx) go mw.loop() @@ -95,15 +95,15 @@ func (mw *MetadataWatcher) Start() { // Stop the MetadataWatcher. func (mw *MetadataWatcher) Stop() { - level.Info(mw.logger).Log("msg", "Stopping metadata watcher...") - defer level.Info(mw.logger).Log("msg", "Scraped metadata watcher stopped") + mw.logger.Info("Stopping metadata watcher...") + defer mw.logger.Info("Scraped metadata watcher stopped") mw.softShutdownCancel() select { case <-mw.done: return case <-time.After(mw.deadline): - level.Error(mw.logger).Log("msg", "Failed to flush metadata") + mw.logger.Error("Failed to flush metadata") } mw.hardShutdownCancel() diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index a112b9bbce..c22c761320 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -49,7 +49,7 @@ func NormalizeLabel(label string) string { // Return '_' for anything non-alphanumeric. func sanitizeRune(r rune) rune { - if unicode.IsLetter(r) || unicode.IsDigit(r) { + if unicode.IsLower(r) || unicode.IsUpper(r) || unicode.IsDigit(r) { return r } return '_' diff --git a/storage/remote/otlptranslator/prometheus/normalize_label_test.go b/storage/remote/otlptranslator/prometheus/normalize_label_test.go new file mode 100644 index 0000000000..3ceb8760c5 --- /dev/null +++ b/storage/remote/otlptranslator/prometheus/normalize_label_test.go @@ -0,0 +1,45 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prometheus + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNormalizeLabel(t *testing.T) { + tests := []struct { + label string + expected string + }{ + {"", ""}, + {"label:with:colons", "label_with_colons"}, // Without UTF-8 support, colons are only allowed in metric names + {"LabelWithCapitalLetters", "LabelWithCapitalLetters"}, + {"label!with&special$chars)", "label_with_special_chars_"}, + {"label_with_foreign_characteres_字符", "label_with_foreign_characteres___"}, + {"label.with.dots", "label_with_dots"}, + {"123label", "key_123label"}, + {"_label_starting_with_underscore", "key_label_starting_with_underscore"}, + {"__label_starting_with_2underscores", "__label_starting_with_2underscores"}, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { + result := NormalizeLabel(test.label) + require.Equal(t, test.expected, result) + }) + } +} diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 0f472b80a0..36b647f510 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -237,11 +237,13 @@ func removeSuffix(tokens []string, suffix string) []string { // Clean up specified string so it's Prometheus compliant func CleanUpString(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }), "_") + return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsUpper(r) && !unicode.IsLower(r) && !unicode.IsDigit(r) }), "_") } func RemovePromForbiddenRunes(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' && r != ':' }), "_") + return strings.Join(strings.FieldsFunc(s, func(r rune) bool { + return !unicode.IsUpper(r) && !unicode.IsLower(r) && !unicode.IsDigit(r) && r != '_' && r != ':' + }), "_") } // Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 07b9b0a784..4e55209410 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -202,4 +202,5 @@ func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false)) require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false)) require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false)) + require.Equal(t, "metric_with___foreign_characteres", BuildCompliantName(createCounter("metric_with_字符_foreign_characteres", ""), "", false)) } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 84cd309d6b..08d72c52f4 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -21,6 +21,7 @@ import ( "encoding/hex" "fmt" "log" + "log/slog" "math" "slices" "sort" @@ -28,8 +29,6 @@ import ( "unicode/utf8" "github.com/cespare/xxhash/v2" - gokitlog "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" @@ -245,7 +244,7 @@ func isValidAggregationTemporality(metric pmetric.Metric) bool { // However, work is under way to resolve this shortcoming through a feature called native histograms custom buckets: // https://github.com/prometheus/prometheus/issues/13485. func (c *PrometheusConverter) addHistogramDataPoints(ctx context.Context, dataPoints pmetric.HistogramDataPointSlice, - resource pcommon.Resource, settings Settings, baseName string, logger gokitlog.Logger) error { + resource pcommon.Resource, settings Settings, baseName string, logger *slog.Logger) error { for x := 0; x < dataPoints.Len(); x++ { if err := c.everyN.checkContext(ctx); err != nil { return err @@ -337,7 +336,7 @@ func (c *PrometheusConverter) addHistogramDataPoints(ctx context.Context, dataPo labels := createLabels(baseName+createdSuffix, baseLabels) c.addTimeSeriesIfNeeded(labels, startTimestampMs, pt.Timestamp()) } - level.Debug(logger).Log("labels", labelsStringer(createLabels(baseName, baseLabels)), "start_ts", startTimestampMs, "sample_ts", timestamp, "type", "histogram") + logger.Debug("addHistogramDataPoints", "labels", labelsStringer(createLabels(baseName, baseLabels)), "start_ts", startTimestampMs, "sample_ts", timestamp, "type", "histogram") } return nil @@ -359,9 +358,17 @@ func getPromExemplars[T exemplarType](ctx context.Context, everyN *everyNTimes, exemplarRunes := 0 promExemplar := prompb.Exemplar{ - Value: exemplar.DoubleValue(), Timestamp: timestamp.FromTime(exemplar.Timestamp().AsTime()), } + switch exemplar.ValueType() { + case pmetric.ExemplarValueTypeInt: + promExemplar.Value = float64(exemplar.IntValue()) + case pmetric.ExemplarValueTypeDouble: + promExemplar.Value = exemplar.DoubleValue() + default: + return nil, fmt.Errorf("unsupported exemplar value type: %v", exemplar.ValueType()) + } + if traceID := exemplar.TraceID(); !traceID.IsEmpty() { val := hex.EncodeToString(traceID[:]) exemplarRunes += utf8.RuneCountInString(traceIDKey) + utf8.RuneCountInString(val) @@ -443,7 +450,7 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { } func (c *PrometheusConverter) addSummaryDataPoints(ctx context.Context, dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, - settings Settings, baseName string, logger gokitlog.Logger) error { + settings Settings, baseName string, logger *slog.Logger) error { for x := 0; x < dataPoints.Len(); x++ { if err := c.everyN.checkContext(ctx); err != nil { return err @@ -500,7 +507,7 @@ func (c *PrometheusConverter) addSummaryDataPoints(ctx context.Context, dataPoin c.addTimeSeriesIfNeeded(createdLabels, startTimestampMs, pt.Timestamp()) } - level.Debug(logger).Log("labels", labelsStringer(createLabels(baseName, baseLabels)), "start_ts", startTimestampMs, "sample_ts", timestamp, "type", "summary") + logger.Debug("addSummaryDataPoints", "labels", labelsStringer(createLabels(baseName, baseLabels)), "start_ts", startTimestampMs, "sample_ts", timestamp, "type", "summary") } return nil @@ -584,7 +591,7 @@ const defaultIntervalForStartTimestamps = int64(300_000) // make use of its direct support fort Created Timestamps instead. // See https://opentelemetry.io/docs/specs/otel/metrics/data-model/#resets-and-gaps to know more about how OTel handles // resets for cumulative metrics. -func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb.Label, settings Settings, typ string, value float64, logger gokitlog.Logger) { +func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb.Label, settings Settings, typ string, value float64, logger *slog.Logger) { if !settings.EnableCreatedTimestampZeroIngestion { return } @@ -606,7 +613,7 @@ func (c *PrometheusConverter) handleStartTime(startTs, ts int64, labels []prompb return } - level.Debug(logger).Log("msg", "adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", value) + logger.Debug("adding zero value at start_ts", "type", typ, "labels", labelsStringer(labels), "start_ts", startTs, "sample_ts", ts, "sample_value", value) // See https://github.com/prometheus/prometheus/issues/14600 for context. c.addSample(&prompb.Sample{Timestamp: startTs}, labels) @@ -682,10 +689,10 @@ func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timesta return } - ts := convertTimeStamp(timestamp) sample := &prompb.Sample{ - Value: float64(1), - Timestamp: ts, + Value: float64(1), + // convert ns to ms + Timestamp: convertTimeStamp(timestamp), } converter.addSample(sample, labels) } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index 71731e5a4d..051d0d8e86 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -21,13 +21,13 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/prompb" ) @@ -196,10 +196,10 @@ func TestPrometheusConverter_AddSummaryDataPoints(t *testing.T) { nowMinus6m := pcommon.Timestamp(now.Add(-20 * time.Second).UnixNano()) nowMinus1h := pcommon.Timestamp(now.Add(-1 * time.Hour).UnixNano()) tests := []struct { + name string overrideValidInterval time.Duration metric func() pmetric.Metric want func() map[uint64]*prompb.TimeSeries - name string }{ { name: "summary with start time equal to sample timestamp", @@ -437,7 +437,7 @@ func TestPrometheusConverter_AddSummaryDataPoints(t *testing.T) { ValidIntervalCreatedTimestampZeroIngestion: tt.overrideValidInterval, }, metric.Name(), - log.NewNopLogger(), + promslog.NewNopLogger(), ) require.NoError(t, err) @@ -551,7 +551,7 @@ func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) { EnableCreatedTimestampZeroIngestion: true, }, metric.Name(), - log.NewNopLogger(), + promslog.NewNopLogger(), ) require.NoError(t, err) @@ -785,3 +785,38 @@ func TestPrometheusConverter_AddExponentialHistogramDataPoints(t *testing.T) { }) } } + +func TestGetPromExemplars(t *testing.T) { + ctx := context.Background() + everyN := &everyNTimes{n: 1} + + t.Run("Exemplars with int value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetIntValue(42) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, float64(42), exemplars[0].Value) + }) + + t.Run("Exemplars with double value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetDoubleValue(69.420) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, 69.420, exemplars[0].Value) + }) + + t.Run("Exemplars with unsupported value type", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + _, err := getPromExemplars(ctx, everyN, pt) + assert.Error(t, err) + }) +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index f12ba26f0f..8349d4f907 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -60,7 +60,6 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(ctx context.Cont promName, ) ts, _ := c.getOrCreateTimeSeries(lbls) - ts.Histograms = append(ts.Histograms, histogram) exemplars, err := getPromExemplars[pmetric.ExponentialHistogramDataPoint](ctx, &c.everyN, pt) @@ -73,8 +72,8 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(ctx context.Cont return annots, nil } -// exponentialToNativeHistogram translates OTel Exponential Histogram data point -// to Prometheus Native Histogram. +// exponentialToNativeHistogram translates an OTel Exponential Histogram data point +// to a Prometheus Native Histogram. func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) { var annots annotations.Annotations scale := p.Scale() diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go index 902bee399c..5c02dd8b39 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go @@ -23,12 +23,13 @@ import ( "time" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/prompb" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + "github.com/prometheus/prometheus/prompb" + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -171,7 +172,7 @@ func TestConvertBucketsLayout(t *testing.T) { }, // Downscale: // 4+2+0+2, 0+0+0+0, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 0, 1 - // Check from sclaing from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1 + // Check from scaling from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1 wantDeltas: []int64{8, -7}, }, }, @@ -222,7 +223,7 @@ func TestConvertBucketsLayout(t *testing.T) { }, // Downscale: // 4+2+0+2, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 1 - // Check from sclaing from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1 + // Check from scaling from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1 wantDeltas: []int64{8, -8, 0, 1}, }, }, diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index a74caabc58..a7f41a6316 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -20,11 +20,11 @@ import ( "context" "errors" "fmt" + "log/slog" "sort" "strings" "time" - "github.com/go-kit/log" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "go.uber.org/multierr" @@ -67,7 +67,7 @@ func NewPrometheusConverter() *PrometheusConverter { } // FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func (c *PrometheusConverter) FromMetrics(ctx context.Context, md pmetric.Metrics, settings Settings, logger log.Logger) (annots annotations.Annotations, errs error) { +func (c *PrometheusConverter) FromMetrics(ctx context.Context, md pmetric.Metrics, settings Settings, logger *slog.Logger) (annots annotations.Annotations, errs error) { c.everyN = everyNTimes{n: 128} resourceMetricsSlice := md.ResourceMetrics() for i := 0; i < resourceMetricsSlice.Len(); i++ { diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go index 8e7e50a8e6..9d75c83dd0 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go @@ -22,7 +22,7 @@ import ( "testing" "time" - "github.com/go-kit/log" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" @@ -30,6 +30,15 @@ import ( ) func TestFromMetrics(t *testing.T) { + t.Run("successful", func(t *testing.T) { + converter := NewPrometheusConverter() + payload := createExportRequest(5, 128, 128, 2, 0) + + annots, err := converter.FromMetrics(context.Background(), payload.Metrics(), Settings{}, promslog.NewNopLogger()) + require.NoError(t, err) + require.Empty(t, annots) + }) + t.Run("context cancellation", func(t *testing.T) { converter := NewPrometheusConverter() ctx, cancel := context.WithCancel(context.Background()) @@ -37,8 +46,9 @@ func TestFromMetrics(t *testing.T) { cancel() payload := createExportRequest(5, 128, 128, 2, 0) - _, err := converter.FromMetrics(ctx, payload.Metrics(), Settings{}, log.NewNopLogger()) + annots, err := converter.FromMetrics(ctx, payload.Metrics(), Settings{}, promslog.NewNopLogger()) require.ErrorIs(t, err, context.Canceled) + require.Empty(t, annots) }) t.Run("context timeout", func(t *testing.T) { @@ -48,8 +58,9 @@ func TestFromMetrics(t *testing.T) { t.Cleanup(cancel) payload := createExportRequest(5, 128, 128, 2, 0) - _, err := converter.FromMetrics(ctx, payload.Metrics(), Settings{}, log.NewNopLogger()) + annots, err := converter.FromMetrics(ctx, payload.Metrics(), Settings{}, promslog.NewNopLogger()) require.ErrorIs(t, err, context.DeadlineExceeded) + require.Empty(t, annots) }) t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) { @@ -75,7 +86,7 @@ func TestFromMetrics(t *testing.T) { } converter := NewPrometheusConverter() - annots, err := converter.FromMetrics(context.Background(), request.Metrics(), Settings{}, log.NewNopLogger()) + annots, err := converter.FromMetrics(context.Background(), request.Metrics(), Settings{}, promslog.NewNopLogger()) require.NoError(t, err) require.NotEmpty(t, annots) ws, infos := annots.AsStrings("", 0, 0) @@ -108,7 +119,7 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { for i := 0; i < b.N; i++ { converter := NewPrometheusConverter() - annots, err := converter.FromMetrics(context.Background(), payload.Metrics(), Settings{}, log.NewNopLogger()) + annots, err := converter.FromMetrics(context.Background(), payload.Metrics(), Settings{}, promslog.NewNopLogger()) require.NoError(b, err) require.Empty(b, annots) require.NotNil(b, converter.TimeSeries()) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index af0c7a478b..30385f8fd2 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -18,10 +18,9 @@ package prometheusremotewrite import ( "context" + "log/slog" "math" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" @@ -47,9 +46,9 @@ func (c *PrometheusConverter) addGaugeNumberDataPoints(ctx context.Context, data model.MetricNameLabel, name, ) - timestamp := convertTimeStamp(pt.Timestamp()) sample := &prompb.Sample{ - Timestamp: timestamp, + // convert ns to ms + Timestamp: convertTimeStamp(pt.Timestamp()), } switch pt.ValueType() { case pmetric.NumberDataPointValueTypeInt: @@ -67,7 +66,7 @@ func (c *PrometheusConverter) addGaugeNumberDataPoints(ctx context.Context, data } func (c *PrometheusConverter) addSumNumberDataPoints(ctx context.Context, dataPoints pmetric.NumberDataPointSlice, - resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string, logger log.Logger) error { + resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string, logger *slog.Logger) error { for x := 0; x < dataPoints.Len(); x++ { if err := c.everyN.checkContext(ctx); err != nil { return err @@ -85,8 +84,8 @@ func (c *PrometheusConverter) addSumNumberDataPoints(ctx context.Context, dataPo model.MetricNameLabel, name, ) - sample := &prompb.Sample{ + // convert ns to ms Timestamp: timestamp, } switch pt.ValueType() { @@ -127,7 +126,7 @@ func (c *PrometheusConverter) addSumNumberDataPoints(ctx context.Context, dataPo } c.addTimeSeriesIfNeeded(createdLabels, startTimestampMs, pt.Timestamp()) } - level.Debug(logger).Log("labels", labelsStringer(lbls), "start_ts", startTimestampMs, "sample_ts", timestamp, "type", "sum") + logger.Debug("addSumNumberDataPoints", "labels", labelsStringer(lbls), "start_ts", startTimestampMs, "sample_ts", timestamp, "type", "sum") } return nil diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go index fcd8922d92..a0f4bda706 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go @@ -21,8 +21,8 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" @@ -256,7 +256,7 @@ func TestPrometheusConverter_addSumNumberDataPoints(t *testing.T) { EnableCreatedTimestampZeroIngestion: true, }, metric.Name(), - log.NewNopLogger(), + promslog.NewNopLogger(), ) assert.Equal(t, tt.want(), converter.unique) diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index b1c8997268..9f27c333a6 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "strconv" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" semconv "go.opentelemetry.io/otel/semconv/v1.21.0" @@ -407,7 +407,7 @@ type QueueManager struct { reshardDisableStartTimestamp atomic.Int64 // Time that reshard was disabled. reshardDisableEndTimestamp atomic.Int64 // Time that reshard is disabled until. - logger log.Logger + logger *slog.Logger flushDeadline time.Duration cfg config.QueueConfig mcfg config.MetadataConfig @@ -454,7 +454,7 @@ func NewQueueManager( metrics *queueManagerMetrics, watcherMetrics *wlog.WatcherMetrics, readerMetrics *wlog.LiveReaderMetrics, - logger log.Logger, + logger *slog.Logger, dir string, samplesIn *ewmaRate, cfg config.QueueConfig, @@ -471,7 +471,7 @@ func NewQueueManager( protoMsg config.RemoteWriteProtoMsg, ) *QueueManager { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } // Copy externalLabels into a slice, which we need for processExternalLabels. @@ -480,7 +480,7 @@ func NewQueueManager( extLabelsSlice = append(extLabelsSlice, l) }) - logger = log.With(logger, remoteName, client.Name(), endpoint, client.Endpoint()) + logger = logger.With(remoteName, client.Name(), endpoint, client.Endpoint()) t := &QueueManager{ logger: logger, flushDeadline: flushDeadline, @@ -526,7 +526,7 @@ func NewQueueManager( // ships them alongside series. If both mechanisms are set, the new one // takes precedence by implicitly disabling the older one. if t.mcfg.Send && t.protoMsg != config.RemoteWriteProtoMsgV1 { - level.Warn(logger).Log("msg", "usage of 'metadata_config.send' is redundant when using remote write v2 (or higher) as metadata will always be gathered from the WAL and included for every series within each write request") + logger.Warn("usage of 'metadata_config.send' is redundant when using remote write v2 (or higher) as metadata will always be gathered from the WAL and included for every series within each write request") t.mcfg.Send = false } @@ -567,7 +567,7 @@ func (t *QueueManager) AppendWatcherMetadata(ctx context.Context, metadata []scr err := t.sendMetadataWithBackoff(ctx, mm[i*t.mcfg.MaxSamplesPerSend:last], pBuf) if err != nil { t.metrics.failedMetadataTotal.Add(float64(last - (i * t.mcfg.MaxSamplesPerSend))) - level.Error(t.logger).Log("msg", "non-recoverable error while sending metadata", "count", last-(i*t.mcfg.MaxSamplesPerSend), "err", err) + t.logger.Error("non-recoverable error while sending metadata", "count", last-(i*t.mcfg.MaxSamplesPerSend), "err", err) } } } @@ -706,7 +706,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[s.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped sample for series that was not explicitly dropped via relabelling", "ref", s.Ref) + t.logger.Info("Dropped sample for series that was not explicitly dropped via relabelling", "ref", s.Ref) t.metrics.droppedSamplesTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedSamplesTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -769,7 +769,7 @@ outer: // Track dropped exemplars in the same EWMA for sharding calc. t.dataDropped.incr(1) if _, ok := t.droppedSeries[e.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped exemplar for series that was not explicitly dropped via relabelling", "ref", e.Ref) + t.logger.Info("Dropped exemplar for series that was not explicitly dropped via relabelling", "ref", e.Ref) t.metrics.droppedExemplarsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedExemplarsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -825,7 +825,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[h.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) + t.logger.Info("Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) t.metrics.droppedHistogramsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedHistogramsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -880,7 +880,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[h.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) + t.logger.Info("Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) t.metrics.droppedHistogramsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedHistogramsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -944,8 +944,8 @@ func (t *QueueManager) Start() { // Stop stops sending samples to the remote storage and waits for pending // sends to complete. func (t *QueueManager) Stop() { - level.Info(t.logger).Log("msg", "Stopping remote storage...") - defer level.Info(t.logger).Log("msg", "Remote storage stopped.") + t.logger.Info("Stopping remote storage...") + defer t.logger.Info("Remote storage stopped.") close(t.quit) t.wg.Wait() @@ -1093,10 +1093,10 @@ func (t *QueueManager) updateShardsLoop() { // to stay close to shardUpdateDuration. select { case t.reshardChan <- desiredShards: - level.Info(t.logger).Log("msg", "Remote storage resharding", "from", t.numShards, "to", desiredShards) + t.logger.Info("Remote storage resharding", "from", t.numShards, "to", desiredShards) t.numShards = desiredShards default: - level.Info(t.logger).Log("msg", "Currently resharding, skipping.") + t.logger.Info("Currently resharding, skipping.") } case <-t.quit: return @@ -1114,14 +1114,14 @@ func (t *QueueManager) shouldReshard(desiredShards int) bool { minSendTimestamp := time.Now().Add(-1 * shardUpdateDuration).Unix() lsts := t.lastSendTimestamp.Load() if lsts < minSendTimestamp { - level.Warn(t.logger).Log("msg", "Skipping resharding, last successful send was beyond threshold", "lastSendTimestamp", lsts, "minSendTimestamp", minSendTimestamp) + t.logger.Warn("Skipping resharding, last successful send was beyond threshold", "lastSendTimestamp", lsts, "minSendTimestamp", minSendTimestamp) return false } if disableTimestamp := t.reshardDisableEndTimestamp.Load(); time.Now().Unix() < disableTimestamp { disabledAt := time.Unix(t.reshardDisableStartTimestamp.Load(), 0) disabledFor := time.Until(time.Unix(disableTimestamp, 0)) - level.Warn(t.logger).Log("msg", "Skipping resharding, resharding is disabled while waiting for recoverable errors", "disabled_at", disabledAt, "disabled_for", disabledFor) + t.logger.Warn("Skipping resharding, resharding is disabled while waiting for recoverable errors", "disabled_at", disabledAt, "disabled_for", disabledFor) return false } return true @@ -1164,7 +1164,7 @@ func (t *QueueManager) calculateDesiredShards() int { desiredShards = timePerSample * (dataInRate*dataKeptRatio + backlogCatchup) ) t.metrics.desiredNumShards.Set(desiredShards) - level.Debug(t.logger).Log("msg", "QueueManager.calculateDesiredShards", + t.logger.Debug("QueueManager.calculateDesiredShards", "dataInRate", dataInRate, "dataOutRate", dataOutRate, "dataKeptRatio", dataKeptRatio, @@ -1182,7 +1182,7 @@ func (t *QueueManager) calculateDesiredShards() int { lowerBound = float64(t.numShards) * (1. - shardToleranceFraction) upperBound = float64(t.numShards) * (1. + shardToleranceFraction) ) - level.Debug(t.logger).Log("msg", "QueueManager.updateShardsLoop", + t.logger.Debug("QueueManager.updateShardsLoop", "lowerBound", lowerBound, "desiredShards", desiredShards, "upperBound", upperBound) desiredShards = math.Ceil(desiredShards) // Round up to be on the safe side. @@ -1193,7 +1193,7 @@ func (t *QueueManager) calculateDesiredShards() int { numShards := int(desiredShards) // Do not downshard if we are more than ten seconds back. if numShards < t.numShards && delay > 10.0 { - level.Debug(t.logger).Log("msg", "Not downsharding due to being too far behind") + t.logger.Debug("Not downsharding due to being too far behind") return t.numShards } @@ -1321,7 +1321,7 @@ func (s *shards) stop() { // Log error for any dropped samples, exemplars, or histograms. logDroppedError := func(t string, counter atomic.Uint32) { if dropped := counter.Load(); dropped > 0 { - level.Error(s.qm.logger).Log("msg", fmt.Sprintf("Failed to flush all %s on shutdown", t), "count", dropped) + s.qm.logger.Error(fmt.Sprintf("Failed to flush all %s on shutdown", t), "count", dropped) } } logDroppedError("samples", s.samplesDroppedOnHardShutdown) @@ -1564,7 +1564,7 @@ func (s *shards) runShard(ctx context.Context, shardID int, queue *queue) { nPendingSamples, nPendingExemplars, nPendingHistograms := populateTimeSeries(batch, pendingData, s.qm.sendExemplars, s.qm.sendNativeHistograms) n := nPendingSamples + nPendingExemplars + nPendingHistograms if timer { - level.Debug(s.qm.logger).Log("msg", "runShard timer ticked, sending buffered data", "samples", nPendingSamples, + s.qm.logger.Debug("runShard timer ticked, sending buffered data", "samples", nPendingSamples, "exemplars", nPendingExemplars, "shard", shardNum, "histograms", nPendingHistograms) } _ = s.sendSamples(ctx, pendingData[:n], nPendingSamples, nPendingExemplars, nPendingHistograms, pBuf, &buf, enc) @@ -1691,9 +1691,9 @@ func (s *shards) updateMetrics(_ context.Context, err error, sampleCount, exempl s.qm.metrics.failedExemplarsTotal.Add(float64(exemplarDiff)) } if err != nil { - level.Error(s.qm.logger).Log("msg", "non-recoverable error", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff, "err", err) + s.qm.logger.Error("non-recoverable error", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff, "err", err) } else if sampleDiff+exemplarDiff+histogramDiff > 0 { - level.Error(s.qm.logger).Log("msg", "we got 2xx status code from the Receiver yet statistics indicate some dat was not written; investigation needed", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff) + s.qm.logger.Error("we got 2xx status code from the Receiver yet statistics indicate some dat was not written; investigation needed", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff) } // These counters are used to calculate the dynamic sharding, and as such @@ -2018,16 +2018,16 @@ func (t *QueueManager) sendWriteRequestWithBackoff(ctx context.Context, attempt switch { case backoffErr.retryAfter > 0: sleepDuration = backoffErr.retryAfter - level.Info(t.logger).Log("msg", "Retrying after duration specified by Retry-After header", "duration", sleepDuration) + t.logger.Info("Retrying after duration specified by Retry-After header", "duration", sleepDuration) case backoffErr.retryAfter < 0: - level.Debug(t.logger).Log("msg", "retry-after cannot be in past, retrying using default backoff mechanism") + t.logger.Debug("retry-after cannot be in past, retrying using default backoff mechanism") } // We should never reshard for a recoverable error; increasing shards could // make the problem worse, particularly if we're getting rate limited. // // reshardDisableTimestamp holds the unix timestamp until which resharding - // is diableld. We'll update that timestamp if the period we were just told + // is disabled. We'll update that timestamp if the period we were just told // to sleep for is newer than the existing disabled timestamp. reshardWaitPeriod := time.Now().Add(time.Duration(sleepDuration) * 2) if oldTS, updated := setAtomicToNewer(&t.reshardDisableEndTimestamp, reshardWaitPeriod.Unix()); updated { @@ -2047,7 +2047,7 @@ func (t *QueueManager) sendWriteRequestWithBackoff(ctx context.Context, attempt // If we make it this far, we've encountered a recoverable error and will retry. onRetry() - level.Warn(t.logger).Log("msg", "Failed to send batch, retrying", "err", err) + t.logger.Warn("Failed to send batch, retrying", "err", err) backoff = sleepDuration * 2 @@ -2147,12 +2147,12 @@ func compressPayload(tmpbuf *[]byte, inp []byte, enc Compression) (compressed [] } } -func buildWriteRequest(logger log.Logger, timeSeries []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, buf *[]byte, filter func(prompb.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { +func buildWriteRequest(logger *slog.Logger, timeSeries []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, buf *[]byte, filter func(prompb.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { highest, lowest, timeSeries, droppedSamples, droppedExemplars, droppedHistograms := buildTimeSeries(timeSeries, filter) if droppedSamples > 0 || droppedExemplars > 0 || droppedHistograms > 0 { - level.Debug(logger).Log("msg", "dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) + logger.Debug("dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) } req := &prompb.WriteRequest{ @@ -2185,11 +2185,11 @@ func buildWriteRequest(logger log.Logger, timeSeries []prompb.TimeSeries, metada return compressed, highest, lowest, nil } -func buildV2WriteRequest(logger log.Logger, samples []writev2.TimeSeries, labels []string, pBuf, buf *[]byte, filter func(writev2.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { +func buildV2WriteRequest(logger *slog.Logger, samples []writev2.TimeSeries, labels []string, pBuf, buf *[]byte, filter func(writev2.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { highest, lowest, timeSeries, droppedSamples, droppedExemplars, droppedHistograms := buildV2TimeSeries(samples, filter) if droppedSamples > 0 || droppedExemplars > 0 || droppedHistograms > 0 { - level.Debug(logger).Log("msg", "dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) + logger.Debug("dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) } req := &writev2.Request{ diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index 032a1a92f7..4b7c5a4e90 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -28,13 +28,13 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" @@ -351,7 +351,7 @@ func TestMetadataDelivery(t *testing.T) { require.Equal(t, 0.0, client_testutil.ToFloat64(m.metrics.failedMetadataTotal)) require.Len(t, c.receivedMetadata, numMetadata) - // One more write than the rounded qoutient should be performed in order to get samples that didn't + // One more write than the rounded quotient should be performed in order to get samples that didn't // fit into MaxSamplesPerSend. require.Equal(t, numMetadata/config.DefaultMetadataConfig.MaxSamplesPerSend+1, c.writesReceived) // Make sure the last samples were sent. @@ -1326,21 +1326,25 @@ func BenchmarkSampleSend(b *testing.B) { cfg.MaxShards = 20 // todo: test with new proto type(s) - m := newTestQueueManager(b, cfg, mcfg, defaultFlushDeadline, c, config.RemoteWriteProtoMsgV1) - m.StoreSeries(series, 0) + for _, format := range []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2} { + b.Run(string(format), func(b *testing.B) { + m := newTestQueueManager(b, cfg, mcfg, defaultFlushDeadline, c, format) + m.StoreSeries(series, 0) - // These should be received by the client. - m.Start() - defer m.Stop() + // These should be received by the client. + m.Start() + defer m.Stop() - b.ResetTimer() - for i := 0; i < b.N; i++ { - m.Append(samples) - m.UpdateSeriesSegment(series, i+1) // simulate what wlog.Watcher.garbageCollectSeries does - m.SeriesReset(i + 1) + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.Append(samples) + m.UpdateSeriesSegment(series, i+1) // simulate what wlog.Watcher.garbageCollectSeries does + m.SeriesReset(i + 1) + } + // Do not include shutdown + b.StopTimer() + }) } - // Do not include shutdown - b.StopTimer() } // Check how long it takes to add N series, including external labels processing. @@ -1414,8 +1418,7 @@ func BenchmarkStartup(b *testing.B) { } sort.Ints(segments) - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stdout)) - logger = log.With(logger, "caller", log.DefaultCaller) + logger := promslog.New(&promslog.Config{}) cfg := testDefaultQueueConfig() mcfg := config.DefaultMetadataConfig @@ -1849,7 +1852,7 @@ func createDummyTimeSeries(instances int) []timeSeries { } func BenchmarkBuildWriteRequest(b *testing.B) { - noopLogger := log.NewNopLogger() + noopLogger := promslog.NewNopLogger() bench := func(b *testing.B, batch []timeSeries) { buff := make([]byte, 0) seriesBuff := make([]prompb.TimeSeries, len(batch)) @@ -1859,13 +1862,6 @@ func BenchmarkBuildWriteRequest(b *testing.B) { } pBuf := proto.NewBuffer(nil) - // Warmup buffers - for i := 0; i < 10; i++ { - populateTimeSeries(batch, seriesBuff, true, true) - buildWriteRequest(noopLogger, seriesBuff, nil, pBuf, &buff, nil, "snappy") - } - - b.ResetTimer() totalSize := 0 for i := 0; i < b.N; i++ { populateTimeSeries(batch, seriesBuff, true, true) @@ -1896,46 +1892,44 @@ func BenchmarkBuildWriteRequest(b *testing.B) { } func BenchmarkBuildV2WriteRequest(b *testing.B) { - noopLogger := log.NewNopLogger() - type testcase struct { - batch []timeSeries - } - testCases := []testcase{ - {createDummyTimeSeries(2)}, - {createDummyTimeSeries(10)}, - {createDummyTimeSeries(100)}, - } - for _, tc := range testCases { + noopLogger := promslog.NewNopLogger() + bench := func(b *testing.B, batch []timeSeries) { symbolTable := writev2.NewSymbolTable() buff := make([]byte, 0) - seriesBuff := make([]writev2.TimeSeries, len(tc.batch)) + seriesBuff := make([]writev2.TimeSeries, len(batch)) for i := range seriesBuff { seriesBuff[i].Samples = []writev2.Sample{{}} seriesBuff[i].Exemplars = []writev2.Exemplar{{}} } pBuf := []byte{} - // Warmup buffers - for i := 0; i < 10; i++ { - populateV2TimeSeries(&symbolTable, tc.batch, seriesBuff, true, true) - buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") - } - - b.Run(fmt.Sprintf("%d-instances", len(tc.batch)), func(b *testing.B) { - totalSize := 0 - for j := 0; j < b.N; j++ { - populateV2TimeSeries(&symbolTable, tc.batch, seriesBuff, true, true) - b.ResetTimer() - req, _, _, err := buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") - if err != nil { - b.Fatal(err) - } - symbolTable.Reset() - totalSize += len(req) - b.ReportMetric(float64(totalSize)/float64(b.N), "compressedSize/op") + totalSize := 0 + for i := 0; i < b.N; i++ { + populateV2TimeSeries(&symbolTable, batch, seriesBuff, true, true) + req, _, _, err := buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") + if err != nil { + b.Fatal(err) } - }) + totalSize += len(req) + b.ReportMetric(float64(totalSize)/float64(b.N), "compressedSize/op") + } } + + twoBatch := createDummyTimeSeries(2) + tenBatch := createDummyTimeSeries(10) + hundredBatch := createDummyTimeSeries(100) + + b.Run("2 instances", func(b *testing.B) { + bench(b, twoBatch) + }) + + b.Run("10 instances", func(b *testing.B) { + bench(b, tenBatch) + }) + + b.Run("1k instances", func(b *testing.B) { + bench(b, hundredBatch) + }) } func TestDropOldTimeSeries(t *testing.T) { diff --git a/storage/remote/read_handler.go b/storage/remote/read_handler.go index ffc64c9c3f..8f2945f974 100644 --- a/storage/remote/read_handler.go +++ b/storage/remote/read_handler.go @@ -16,13 +16,12 @@ package remote import ( "context" "errors" + "log/slog" "net/http" "slices" "strings" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/config" @@ -34,7 +33,7 @@ import ( ) type readHandler struct { - logger log.Logger + logger *slog.Logger queryable storage.SampleAndChunkQueryable config func() config.Config remoteReadSampleLimit int @@ -46,7 +45,7 @@ type readHandler struct { // NewReadHandler creates a http.Handler that accepts remote read requests and // writes them to the provided queryable. -func NewReadHandler(logger log.Logger, r prometheus.Registerer, queryable storage.SampleAndChunkQueryable, config func() config.Config, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame int) http.Handler { +func NewReadHandler(logger *slog.Logger, r prometheus.Registerer, queryable storage.SampleAndChunkQueryable, config func() config.Config, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame int) http.Handler { h := &readHandler{ logger: logger, queryable: queryable, @@ -140,7 +139,7 @@ func (h *readHandler) remoteReadSamples( } defer func() { if err := querier.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error on querier close", "err", err.Error()) + h.logger.Warn("Error on querier close", "err", err.Error()) } }() @@ -163,7 +162,7 @@ func (h *readHandler) remoteReadSamples( return err } for _, w := range ws { - level.Warn(h.logger).Log("msg", "Warnings on remote read query", "err", w.Error()) + h.logger.Warn("Warnings on remote read query", "err", w.Error()) } for _, ts := range resp.Results[i].Timeseries { ts.Labels = MergeLabels(ts.Labels, sortedExternalLabels) @@ -208,7 +207,7 @@ func (h *readHandler) remoteReadStreamedXORChunks(ctx context.Context, w http.Re } defer func() { if err := querier.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error on chunk querier close", "err", err.Error()) + h.logger.Warn("Error on chunk querier close", "err", err.Error()) } }() @@ -239,7 +238,7 @@ func (h *readHandler) remoteReadStreamedXORChunks(ctx context.Context, w http.Re } for _, w := range ws { - level.Warn(h.logger).Log("msg", "Warnings on chunked remote read query", "warnings", w.Error()) + h.logger.Warn("Warnings on chunked remote read query", "warnings", w.Error()) } return nil }(); err != nil { diff --git a/storage/remote/read_handler_test.go b/storage/remote/read_handler_test.go index 4cd4647e72..fd7f3ad48d 100644 --- a/storage/remote/read_handler_test.go +++ b/storage/remote/read_handler_test.go @@ -334,7 +334,7 @@ func TestStreamReadEndpoint(t *testing.T) { Type: prompb.Chunk_XOR, MinTimeMs: 7200000, MaxTimeMs: 7200000, - Data: []byte("\000\001\200\364\356\006@\307p\000\000\000\000\000\000"), + Data: []byte("\000\001\200\364\356\006@\307p\000\000\000\000\000"), }, }, }, @@ -381,7 +381,7 @@ func TestStreamReadEndpoint(t *testing.T) { Type: prompb.Chunk_XOR, MinTimeMs: 14400000, MaxTimeMs: 14400000, - Data: []byte("\000\001\200\350\335\r@\327p\000\000\000\000\000\000"), + Data: []byte("\000\001\200\350\335\r@\327p\000\000\000\000\000"), }, }, }, diff --git a/storage/remote/read_test.go b/storage/remote/read_test.go index d63cefc3fe..b78a8c6215 100644 --- a/storage/remote/read_test.go +++ b/storage/remote/read_test.go @@ -475,7 +475,9 @@ func TestSampleAndChunkQueryableClient(t *testing.T) { ) q, err := c.Querier(tc.mint, tc.maxt) require.NoError(t, err) - defer require.NoError(t, q.Close()) + defer func() { + require.NoError(t, q.Close()) + }() ss := q.Select(context.Background(), true, nil, tc.matchers...) require.NoError(t, err) diff --git a/storage/remote/storage.go b/storage/remote/storage.go index 05634f1798..14c3c87d93 100644 --- a/storage/remote/storage.go +++ b/storage/remote/storage.go @@ -18,12 +18,13 @@ import ( "crypto/md5" "encoding/hex" "fmt" + "log/slog" "sync" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/config" @@ -51,8 +52,9 @@ type startTimeCallback func() (int64, error) // Storage represents all the remote read and write endpoints. It implements // storage.Storage. type Storage struct { - logger *logging.Deduper - mtx sync.Mutex + deduper *logging.Deduper + logger *slog.Logger + mtx sync.Mutex rws *WriteStorage @@ -62,14 +64,16 @@ type Storage struct { } // NewStorage returns a remote.Storage. -func NewStorage(l log.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWAL bool) *Storage { +func NewStorage(l *slog.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWAL bool) *Storage { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } - logger := logging.Dedupe(l, 1*time.Minute) + deduper := logging.Dedupe(l, 1*time.Minute) + logger := slog.New(deduper) s := &Storage{ logger: logger, + deduper: deduper, localStartTimeCallback: stCallback, } s.rws = NewWriteStorage(s.logger, reg, walDir, flushDeadline, sm, metadataInWAL) @@ -196,7 +200,7 @@ func (s *Storage) LowestSentTimestamp() int64 { // Close the background processing of the storage queues. func (s *Storage) Close() error { - s.logger.Stop() + s.deduper.Stop() s.mtx.Lock() defer s.mtx.Unlock() return s.rws.Close() diff --git a/storage/remote/write.go b/storage/remote/write.go index 3d2f1fdfcd..639f344520 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -17,13 +17,14 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "sync" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" @@ -57,7 +58,7 @@ var ( // WriteStorage represents all the remote write storage. type WriteStorage struct { - logger log.Logger + logger *slog.Logger reg prometheus.Registerer mtx sync.Mutex @@ -78,9 +79,9 @@ type WriteStorage struct { } // NewWriteStorage creates and runs a WriteStorage. -func NewWriteStorage(logger log.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWal bool) *WriteStorage { +func NewWriteStorage(logger *slog.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWal bool) *WriteStorage { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } rws := &WriteStorage{ queues: make(map[string]*QueueManager), @@ -277,6 +278,7 @@ func (rws *WriteStorage) Close() error { type timestampTracker struct { writeStorage *WriteStorage + appendOptions *storage.AppendOptions samples int64 exemplars int64 histograms int64 @@ -284,6 +286,10 @@ type timestampTracker struct { highestRecvTimestamp *maxTimestamp } +func (t *timestampTracker) SetOptions(opts *storage.AppendOptions) { + t.appendOptions = opts +} + // Append implements storage.Appender. func (t *timestampTracker) Append(_ storage.SeriesRef, _ labels.Labels, ts int64, _ float64) (storage.SeriesRef, error) { t.samples++ @@ -306,14 +312,29 @@ func (t *timestampTracker) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, return 0, nil } -func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { - // TODO: Add and increment a `metadata` field when we get around to wiring metadata in remote_write. - // UpadteMetadata is no-op for remote write (where timestampTracker is being used) for now. +func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64) (storage.SeriesRef, error) { + t.samples++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } return 0, nil } -func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) { - // AppendCTZeroSample is no-op for remote-write for now. +func (t *timestampTracker) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { + t.histograms++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } + return 0, nil +} + +func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { + // TODO: Add and increment a `metadata` field when we get around to wiring metadata in remote_write. + // UpdateMetadata is no-op for remote write (where timestampTracker is being used) for now. return 0, nil } diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 43c72a3571..99e4392ff5 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -18,12 +18,11 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" @@ -42,7 +41,7 @@ import ( ) type writeHandler struct { - logger log.Logger + logger *slog.Logger appendable storage.Appendable samplesWithInvalidLabelsTotal prometheus.Counter @@ -58,7 +57,7 @@ const maxAheadTime = 10 * time.Minute // // NOTE(bwplotka): When accepting v2 proto and spec, partial writes are possible // as per https://prometheus.io/docs/specs/remote_write_spec_2_0/#partial-write. -func NewWriteHandler(logger log.Logger, reg prometheus.Registerer, appendable storage.Appendable, acceptedProtoMsgs []config.RemoteWriteProtoMsg) http.Handler { +func NewWriteHandler(logger *slog.Logger, reg prometheus.Registerer, appendable storage.Appendable, acceptedProtoMsgs []config.RemoteWriteProtoMsg) http.Handler { protoMsgs := map[config.RemoteWriteProtoMsg]struct{}{} for _, acc := range acceptedProtoMsgs { protoMsgs[acc] = struct{}{} @@ -119,7 +118,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { msgType, err := h.parseProtoMsg(contentType) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) return } @@ -131,7 +130,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } return ret }()) - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) } @@ -142,14 +141,14 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // We could give http.StatusUnsupportedMediaType, but let's assume snappy by default. } else if enc != string(SnappyBlockCompression) { err := fmt.Errorf("%v encoding (compression) is not accepted by this server; only %v is acceptable", enc, SnappyBlockCompression) - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) } // Read the request body. body, err := io.ReadAll(r.Body) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err.Error()) + h.logger.Error("Error decoding remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -157,7 +156,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { decompressed, err := snappy.Decode(nil, body) if err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decompressing remote write request", "err", err.Error()) + h.logger.Error("Error decompressing remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -169,7 +168,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { var req prompb.WriteRequest if err := proto.Unmarshal(decompressed, &req); err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decoding v1 remote write request", "protobuf_message", msgType, "err", err.Error()) + h.logger.Error("Error decoding v1 remote write request", "protobuf_message", msgType, "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -180,7 +179,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return default: - level.Error(h.logger).Log("msg", "Error while remote writing the v1 request", "err", err.Error()) + h.logger.Error("Error while remote writing the v1 request", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -193,7 +192,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { var req writev2.Request if err := proto.Unmarshal(decompressed, &req); err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decoding v2 remote write request", "protobuf_message", msgType, "err", err.Error()) + h.logger.Error("Error decoding v2 remote write request", "protobuf_message", msgType, "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -205,7 +204,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if err != nil { if errHTTPCode/5 == 100 { // 5xx - level.Error(h.logger).Log("msg", "Error while remote writing the v2 request", "err", err.Error()) + h.logger.Error("Error while remote writing the v2 request", "err", err.Error()) } http.Error(w, err.Error(), errHTTPCode) return @@ -241,11 +240,11 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err // TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are // potentially written. Perhaps unify with fixed writeV2 implementation a bit. if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) { - level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String()) + h.logger.Warn("Invalid metric names or labels", "got", ls.String()) samplesWithInvalidLabels++ continue } else if duplicateLabel, hasDuplicate := ls.HasDuplicateLabelNames(); hasDuplicate { - level.Warn(h.logger).Log("msg", "Invalid labels for series.", "labels", ls.String(), "duplicated_label", duplicateLabel) + h.logger.Warn("Invalid labels for series.", "labels", ls.String(), "duplicated_label", duplicateLabel) samplesWithInvalidLabels++ continue } @@ -261,10 +260,10 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err switch { case errors.Is(err, storage.ErrOutOfOrderExemplar): outOfOrderExemplarErrs++ - level.Debug(h.logger).Log("msg", "Out of order exemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Debug("Out of order exemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) default: // Since exemplar storage is still experimental, we don't fail the request on ingestion errors - level.Debug(h.logger).Log("msg", "Error while adding exemplar in AppendExemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e), "err", err) + h.logger.Debug("Error while adding exemplar in AppendExemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e), "err", err) } } } @@ -276,7 +275,7 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err } if outOfOrderExemplarErrs > 0 { - _ = level.Warn(h.logger).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) + h.logger.Warn("Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) } if samplesWithInvalidLabels > 0 { h.samplesWithInvalidLabelsTotal.Add(float64(samplesWithInvalidLabels)) @@ -293,7 +292,7 @@ func (h *writeHandler) appendV1Samples(app storage.Appender, ss []prompb.Sample, if errors.Is(err, storage.ErrOutOfOrderSample) || errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { - level.Error(h.logger).Log("msg", "Out of order sample from remote write", "err", err.Error(), "series", labels.String(), "timestamp", s.Timestamp) + h.logger.Error("Out of order sample from remote write", "err", err.Error(), "series", labels.String(), "timestamp", s.Timestamp) } return err } @@ -315,7 +314,7 @@ func (h *writeHandler) appendV1Histograms(app storage.Appender, hh []prompb.Hist if errors.Is(err, storage.ErrOutOfOrderSample) || errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { - level.Error(h.logger).Log("msg", "Out of order histogram from remote write", "err", err.Error(), "series", labels.String(), "timestamp", hp.Timestamp) + h.logger.Error("Out of order histogram from remote write", "err", err.Error(), "series", labels.String(), "timestamp", hp.Timestamp) } return err } @@ -345,7 +344,7 @@ func (h *writeHandler) writeV2(ctx context.Context, req *writev2.Request) (_ Wri // On 5xx, we always rollback, because we expect // sender to retry and TSDB is not idempotent. if rerr := app.Rollback(); rerr != nil { - level.Error(h.logger).Log("msg", "writev2 rollback failed on retry-able error", "err", rerr) + h.logger.Error("writev2 rollback failed on retry-able error", "err", rerr) } return WriteResponseStats{}, errHTTPCode, err } @@ -407,7 +406,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * errors.Is(err, storage.ErrDuplicateSampleForTimestamp) || errors.Is(err, storage.ErrTooOldSample) { // TODO(bwplotka): Not too spammy log? - level.Error(h.logger).Log("msg", "Out of order sample from remote write", "err", err.Error(), "series", ls.String(), "timestamp", s.Timestamp) + h.logger.Error("Out of order sample from remote write", "err", err.Error(), "series", ls.String(), "timestamp", s.Timestamp) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } @@ -432,7 +431,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { // TODO(bwplotka): Not too spammy log? - level.Error(h.logger).Log("msg", "Out of order histogram from remote write", "err", err.Error(), "series", ls.String(), "timestamp", hp.Timestamp) + h.logger.Error("Out of order histogram from remote write", "err", err.Error(), "series", ls.String(), "timestamp", hp.Timestamp) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } @@ -450,18 +449,18 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * // Handle append error. if errors.Is(err, storage.ErrOutOfOrderExemplar) { outOfOrderExemplarErrs++ // Maintain old metrics, but technically not needed, given we fail here. - level.Error(h.logger).Log("msg", "Out of order exemplar", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Error("Out of order exemplar", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } // TODO(bwplotka): Add strict mode which would trigger rollback of everything if needed. // For now we keep the previously released flow (just error not debug leve) of dropping them without rollback and 5xx. - level.Error(h.logger).Log("msg", "failed to ingest exemplar, emitting error log, but no error for PRW caller", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Error("failed to ingest exemplar, emitting error log, but no error for PRW caller", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) } m := ts.ToMetadata(req.Symbols) if _, err = app.UpdateMetadata(ref, ls, m); err != nil { - level.Debug(h.logger).Log("msg", "error while updating metadata from remote write", "err", err) + h.logger.Debug("error while updating metadata from remote write", "err", err) // Metadata is attached to each series, so since Prometheus does not reject sample without metadata information, // we don't report remote write error either. We increment metric instead. samplesWithoutMetadata += rs.AllSamples() - allSamplesSoFar @@ -469,7 +468,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * } if outOfOrderExemplarErrs > 0 { - level.Warn(h.logger).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) + h.logger.Warn("Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) } h.samplesWithInvalidLabelsTotal.Add(float64(samplesWithInvalidLabels)) @@ -482,7 +481,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * // NewOTLPWriteHandler creates a http.Handler that accepts OTLP write requests and // writes them to the provided appendable. -func NewOTLPWriteHandler(logger log.Logger, appendable storage.Appendable, configFunc func() config.Config, enableCTZeroIngestion bool, validIntervalCTZeroIngestion time.Duration) http.Handler { +func NewOTLPWriteHandler(logger *slog.Logger, appendable storage.Appendable, configFunc func() config.Config, enableCTZeroIngestion bool, validIntervalCTZeroIngestion time.Duration) http.Handler { rwHandler := &writeHandler{ logger: logger, appendable: appendable, @@ -498,7 +497,7 @@ func NewOTLPWriteHandler(logger log.Logger, appendable storage.Appendable, confi } type otlpWriteHandler struct { - logger log.Logger + logger *slog.Logger rwHandler *writeHandler configFunc func() config.Config enableCTZeroIngestion bool @@ -508,7 +507,7 @@ type otlpWriteHandler struct { func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { req, err := DecodeOTLPWriteRequest(r) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err.Error()) + h.logger.Error("Error decoding remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -523,11 +522,11 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { ValidIntervalCreatedTimestampZeroIngestion: h.validIntervalCTZeroIngestion, }, h.logger) if err != nil { - level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) + h.logger.Warn("Error translating OTLP metrics to Prometheus write request", "err", err) } ws, _ := annots.AsStrings("", 0, 0) if len(ws) > 0 { - level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) + h.logger.Warn("Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) } err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ @@ -541,7 +540,7 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return default: - level.Error(h.logger).Log("msg", "Error appending remote write", "err", err.Error()) + h.logger.Error("Error appending remote write", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index 5c89a1ab95..580c7c143e 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -27,11 +27,12 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" @@ -129,7 +130,7 @@ func TestRemoteWriteHandlerHeadersHandling_V1Message(t *testing.T) { } appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -144,7 +145,7 @@ func TestRemoteWriteHandlerHeadersHandling_V1Message(t *testing.T) { } func TestRemoteWriteHandlerHeadersHandling_V2Message(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) for _, tc := range []struct { @@ -230,7 +231,7 @@ func TestRemoteWriteHandlerHeadersHandling_V2Message(t *testing.T) { } appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -255,7 +256,7 @@ func TestRemoteWriteHandler_V1Message(t *testing.T) { // in Prometheus, so keeping like this to not break existing 1.0 clients. appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -428,7 +429,7 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) { }, } { t.Run(tc.desc, func(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), tc.input, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), tc.input, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) req, err := http.NewRequest("", "", bytes.NewReader(payload)) @@ -445,7 +446,7 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) { appendExemplarErr: tc.appendExemplarErr, updateMetadataErr: tc.updateMetadataErr, } - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -544,7 +545,7 @@ func TestOutOfOrderSample_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -586,7 +587,7 @@ func TestOutOfOrderExemplar_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -624,7 +625,7 @@ func TestOutOfOrderHistogram_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -655,7 +656,7 @@ func BenchmarkRemoteWriteHandler(b *testing.B) { appendable := &mockAppendable{} // TODO: test with other proto format(s) - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() b.ResetTimer() @@ -672,7 +673,7 @@ func TestCommitErr_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{commitErr: fmt.Errorf("commit error")} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -685,7 +686,7 @@ func TestCommitErr_V1Message(t *testing.T) { } func TestCommitErr_V2Message(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) req, err := http.NewRequest("", "", bytes.NewReader(payload)) @@ -696,7 +697,7 @@ func TestCommitErr_V2Message(t *testing.T) { req.Header.Set(RemoteWriteVersionHeader, RemoteWriteVersion20HeaderValue) appendable := &mockAppendable{commitErr: fmt.Errorf("commit error")} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -723,7 +724,7 @@ func BenchmarkRemoteWriteOOOSamples(b *testing.B) { require.NoError(b, db.Close()) }) // TODO: test with other proto format(s) - handler := NewWriteHandler(log.NewNopLogger(), nil, db.Head(), []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, db.Head(), []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) buf, _, _, err := buildWriteRequest(nil, genSeriesWithSample(1000, 200*time.Minute.Milliseconds()), nil, nil, nil, nil, "snappy") require.NoError(b, err) @@ -832,6 +833,10 @@ func (m *mockAppendable) Appender(_ context.Context) storage.Appender { return m } +func (m *mockAppendable) SetOptions(opts *storage.AppendOptions) { + panic("unimplemented") +} + func (m *mockAppendable) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if m.appendSampleErr != nil { return 0, m.appendSampleErr @@ -915,6 +920,13 @@ func (m *mockAppendable) AppendHistogram(_ storage.SeriesRef, l labels.Labels, t return 0, nil } +func (m *mockAppendable) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + // AppendCTZeroSample is no-op for remote-write for now. + // TODO(bwplotka/arthursens): Add support for PRW 2.0 for CT zero feature (but also we might + // replace this with in-metadata CT storage, see https://github.com/prometheus/prometheus/issues/14218). + return 0, nil +} + func (m *mockAppendable) UpdateMetadata(_ storage.SeriesRef, l labels.Labels, mp metadata.Metadata) (storage.SeriesRef, error) { if m.updateMetadataErr != nil { return 0, m.updateMetadataErr diff --git a/storage/remote/write_test.go b/storage/remote/write_test.go index 17b35ec9e4..7d05f5b912 100644 --- a/storage/remote/write_test.go +++ b/storage/remote/write_test.go @@ -22,10 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" common_config "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" @@ -380,7 +380,7 @@ func TestOTLPWriteHandler(t *testing.T) { req.Header.Set("Content-Type", "application/x-protobuf") appendable := &mockAppendable{} - handler := NewOTLPWriteHandler(log.NewNopLogger(), appendable, func() config.Config { + handler := NewOTLPWriteHandler(promslog.NewNopLogger(), appendable, func() config.Config { return config.Config{ OTLPConfig: config.DefaultOTLPConfig, } diff --git a/tracing/tracing.go b/tracing/tracing.go index 6b9319ecbd..4fdedf505b 100644 --- a/tracing/tracing.go +++ b/tracing/tracing.go @@ -16,11 +16,10 @@ package tracing import ( "context" "fmt" + "log/slog" "reflect" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/version" "go.opentelemetry.io/otel" @@ -43,14 +42,14 @@ const serviceName = "prometheus" // Manager is capable of building, (re)installing and shutting down // the tracer provider. type Manager struct { - logger log.Logger + logger *slog.Logger done chan struct{} config config.TracingConfig shutdownFunc func() error } // NewManager creates a new tracing manager. -func NewManager(logger log.Logger) *Manager { +func NewManager(logger *slog.Logger) *Manager { return &Manager{ logger: logger, done: make(chan struct{}), @@ -62,7 +61,7 @@ func NewManager(logger log.Logger) *Manager { func (m *Manager) Run() { otel.SetTextMapPropagator(propagation.TraceContext{}) otel.SetErrorHandler(otelErrHandler(func(err error) { - level.Error(m.logger).Log("msg", "OpenTelemetry handler returned an error", "err", err) + m.logger.Error("OpenTelemetry handler returned an error", "err", err.Error()) })) <-m.done } @@ -89,7 +88,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { m.config = cfg.TracingConfig m.shutdownFunc = nil otel.SetTracerProvider(noop.NewTracerProvider()) - level.Info(m.logger).Log("msg", "Tracing provider uninstalled.") + m.logger.Info("Tracing provider uninstalled.") return nil } @@ -102,7 +101,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { m.config = cfg.TracingConfig otel.SetTracerProvider(tp) - level.Info(m.logger).Log("msg", "Successfully installed a new tracer provider.") + m.logger.Info("Successfully installed a new tracer provider.") return nil } @@ -115,10 +114,10 @@ func (m *Manager) Stop() { } if err := m.shutdownFunc(); err != nil { - level.Error(m.logger).Log("msg", "failed to shut down the tracer provider", "err", err) + m.logger.Error("failed to shut down the tracer provider", "err", err) } - level.Info(m.logger).Log("msg", "Tracing manager stopped") + m.logger.Info("Tracing manager stopped") } type otelErrHandler func(err error) diff --git a/tracing/tracing_test.go b/tracing/tracing_test.go index b7996c6104..e735e1a18a 100644 --- a/tracing/tracing_test.go +++ b/tracing/tracing_test.go @@ -16,8 +16,8 @@ package tracing import ( "testing" - "github.com/go-kit/log" config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace/noop" @@ -28,7 +28,7 @@ import ( func TestInstallingNewTracerProvider(t *testing.T) { tpBefore := otel.GetTracerProvider() - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -41,7 +41,7 @@ func TestInstallingNewTracerProvider(t *testing.T) { } func TestReinstallingTracerProvider(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -76,7 +76,7 @@ func TestReinstallingTracerProvider(t *testing.T) { } func TestReinstallingTracerProviderWithTLS(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -96,7 +96,7 @@ func TestReinstallingTracerProviderWithTLS(t *testing.T) { } func TestUninstallingTracerProvider(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -118,7 +118,7 @@ func TestUninstallingTracerProvider(t *testing.T) { } func TestTracerProviderShutdown(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 9697739e00..3863e6cd99 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -17,14 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "path/filepath" "sync" "time" "unicode/utf8" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "go.uber.org/atomic" @@ -226,7 +225,7 @@ func (m *dbMetrics) Unregister() { // DB represents a WAL-only storage. It implements storage.DB. type DB struct { mtx sync.RWMutex - logger log.Logger + logger *slog.Logger opts *Options rs *remote.Storage @@ -251,7 +250,7 @@ type DB struct { } // Open returns a new agent.DB in the given directory. -func Open(l log.Logger, reg prometheus.Registerer, rs *remote.Storage, dir string, opts *Options) (*DB, error) { +func Open(l *slog.Logger, reg prometheus.Registerer, rs *remote.Storage, dir string, opts *Options) (*DB, error) { opts = validateOptions(opts) locker, err := tsdbutil.NewDirLocker(dir, "agent", l, reg) @@ -306,11 +305,11 @@ func Open(l log.Logger, reg prometheus.Registerer, rs *remote.Storage, dir strin } if err := db.replayWAL(); err != nil { - level.Warn(db.logger).Log("msg", "encountered WAL read error, attempting repair", "err", err) + db.logger.Warn("encountered WAL read error, attempting repair", "err", err) if err := w.Repair(err); err != nil { return nil, fmt.Errorf("repair corrupted WAL: %w", err) } - level.Info(db.logger).Log("msg", "successfully repaired WAL") + db.logger.Info("successfully repaired WAL") } go db.run() @@ -335,7 +334,7 @@ func validateOptions(opts *Options) *Options { opts.WALCompression = wlog.CompressionNone } - // Revert Stripesize to DefaultStripsize if Stripsize is either 0 or not a power of 2. + // Revert StripeSize to DefaultStripeSize if StripeSize is either 0 or not a power of 2. if opts.StripeSize <= 0 || ((opts.StripeSize & (opts.StripeSize - 1)) != 0) { opts.StripeSize = tsdb.DefaultStripeSize } @@ -359,7 +358,7 @@ func validateOptions(opts *Options) *Options { } func (db *DB) replayWAL() error { - level.Info(db.logger).Log("msg", "replaying WAL, this may take a while", "dir", db.wal.Dir()) + db.logger.Info("replaying WAL, this may take a while", "dir", db.wal.Dir()) start := time.Now() dir, startFrom, err := wlog.LastCheckpoint(db.wal.Dir()) @@ -376,7 +375,7 @@ func (db *DB) replayWAL() error { } defer func() { if err := sr.Close(); err != nil { - level.Warn(db.logger).Log("msg", "error while closing the wal segments reader", "err", err) + db.logger.Warn("error while closing the wal segments reader", "err", err) } }() @@ -386,7 +385,7 @@ func (db *DB) replayWAL() error { return fmt.Errorf("backfill checkpoint: %w", err) } startFrom++ - level.Info(db.logger).Log("msg", "WAL checkpoint loaded") + db.logger.Info("WAL checkpoint loaded") } // Find the last segment. @@ -395,7 +394,7 @@ func (db *DB) replayWAL() error { return fmt.Errorf("finding WAL segments: %w", err) } - // Backfil segments from the most recent checkpoint onwards. + // Backfill segments from the most recent checkpoint onwards. for i := startFrom; i <= last; i++ { seg, err := wlog.OpenReadSegment(wlog.SegmentName(db.wal.Dir(), i)) if err != nil { @@ -405,12 +404,12 @@ func (db *DB) replayWAL() error { sr := wlog.NewSegmentBufReader(seg) err = db.loadWAL(wlog.NewReader(sr), multiRef) if err := sr.Close(); err != nil { - level.Warn(db.logger).Log("msg", "error while closing the wal segments reader", "err", err) + db.logger.Warn("error while closing the wal segments reader", "err", err) } if err != nil { return err } - level.Info(db.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", last) + db.logger.Info("WAL segment loaded", "segment", i, "maxSegment", last) } walReplayDuration := time.Since(start) @@ -571,7 +570,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H } if v := nonExistentSeriesRefs.Load(); v > 0 { - level.Warn(db.logger).Log("msg", "found sample referencing non-existing series", "skipped_series", v) + db.logger.Warn("found sample referencing non-existing series", "skipped_series", v) } db.nextRef.Store(uint64(lastRef)) @@ -616,9 +615,9 @@ Loop: ts = maxTS } - level.Debug(db.logger).Log("msg", "truncating the WAL", "ts", ts) + db.logger.Debug("truncating the WAL", "ts", ts) if err := db.truncate(ts); err != nil { - level.Warn(db.logger).Log("msg", "failed to truncate WAL", "err", err) + db.logger.Warn("failed to truncate WAL", "err", err) } } } @@ -631,7 +630,7 @@ func (db *DB) truncate(mint int64) error { start := time.Now() db.gc(mint) - level.Info(db.logger).Log("msg", "series GC completed", "duration", time.Since(start)) + db.logger.Info("series GC completed", "duration", time.Since(start)) first, last, err := wlog.Segments(db.wal.Dir()) if err != nil { @@ -679,7 +678,7 @@ func (db *DB) truncate(mint int64) error { // If truncating fails, we'll just try it again at the next checkpoint. // Leftover segments will still just be ignored in the future if there's a // checkpoint that supersedes them. - level.Error(db.logger).Log("msg", "truncating segments failed", "err", err) + db.logger.Error("truncating segments failed", "err", err) } // The checkpoint is written and segments before it are truncated, so we @@ -696,13 +695,13 @@ func (db *DB) truncate(mint int64) error { // Leftover old checkpoints do not cause problems down the line beyond // occupying disk space. They will just be ignored since a newer checkpoint // exists. - level.Error(db.logger).Log("msg", "delete old checkpoints", "err", err) + db.logger.Error("delete old checkpoints", "err", err) db.metrics.checkpointDeleteFail.Inc() } db.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) - level.Info(db.logger).Log("msg", "WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) + db.logger.Info("WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) return nil } @@ -764,6 +763,7 @@ func (db *DB) Close() error { type appender struct { *DB + hints *storage.AppendOptions pendingSeries []record.RefSeries pendingSamples []record.RefSample @@ -784,6 +784,10 @@ type appender struct { floatHistogramSeries []*memSeries } +func (a *appender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts +} + func (a *appender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { // series references and chunk references are identical for agent mode. headRef := chunks.HeadSeriesRef(ref) @@ -977,9 +981,134 @@ func (a *appender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Met return 0, nil } -func (a *appender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) { - // TODO(bwplotka): Wire metadata in the Agent's appender. - return 0, nil +func (a *appender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + if err := h.Validate(); err != nil { + return 0, err + } + } + if fh != nil { + if err := fh.Validate(); err != nil { + return 0, err + } + } + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + // Ensure no empty labels have gotten through. + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + var created bool + series, created = a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: series.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + } + + series.Lock() + defer series.Unlock() + + if ct <= a.minValidTime(series.lastTs) { + return 0, storage.ErrOutOfOrderCT + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + switch { + case h != nil: + zeroHistogram := &histogram.Histogram{} + a.pendingHistograms = append(a.pendingHistograms, record.RefHistogramSample{ + Ref: series.ref, + T: ct, + H: zeroHistogram, + }) + a.histogramSeries = append(a.histogramSeries, series) + case fh != nil: + a.pendingFloatHistograms = append(a.pendingFloatHistograms, record.RefFloatHistogramSample{ + Ref: series.ref, + T: ct, + FH: &histogram.FloatHistogram{}, + }) + a.floatHistogramSeries = append(a.floatHistogramSeries, series) + } + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + return storage.SeriesRef(series.ref), nil +} + +func (a *appender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + newSeries, created := a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: newSeries.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + + series = newSeries + } + + series.Lock() + defer series.Unlock() + + if t <= a.minValidTime(series.lastTs) { + a.metrics.totalOutOfOrderSamples.Inc() + return 0, storage.ErrOutOfOrderSample + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + // NOTE: always modify pendingSamples and sampleSeries together. + a.pendingSamples = append(a.pendingSamples, record.RefSample{ + Ref: series.ref, + T: ct, + V: 0, + }) + a.sampleSeries = append(a.sampleSeries, series) + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + + return storage.SeriesRef(series.ref), nil } // Commit submits the collected samples and purges the batch. diff --git a/tsdb/agent/db_test.go b/tsdb/agent/db_test.go index b31041b1b9..b28c29095c 100644 --- a/tsdb/agent/db_test.go +++ b/tsdb/agent/db_test.go @@ -15,21 +15,23 @@ package agent import ( "context" + "errors" "fmt" + "io" "math" "path/filepath" "strconv" "testing" "time" - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" @@ -89,12 +91,12 @@ func createTestAgentDB(t testing.TB, reg prometheus.Registerer, opts *Options) * t.Helper() dbDir := t.TempDir() - rs := remote.NewStorage(log.NewNopLogger(), reg, startTime, dbDir, time.Second*30, nil, false) + rs := remote.NewStorage(promslog.NewNopLogger(), reg, startTime, dbDir, time.Second*30, nil, false) t.Cleanup(func() { require.NoError(t, rs.Close()) }) - db, err := Open(log.NewNopLogger(), reg, rs, dbDir, opts) + db, err := Open(promslog.NewNopLogger(), reg, rs, dbDir, opts) require.NoError(t, err) return db } @@ -583,7 +585,7 @@ func TestWALReplay(t *testing.T) { func TestLockfile(t *testing.T) { tsdbutil.TestDirLockerUsage(t, func(t *testing.T, data string, createLock bool) (*tsdbutil.DirLocker, testutil.Closer) { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() reg := prometheus.NewRegistry() rs := remote.NewStorage(logger, reg, startTime, data, time.Second*30, nil, false) t.Cleanup(func() { @@ -605,12 +607,12 @@ func TestLockfile(t *testing.T) { func Test_ExistingWAL_NextRef(t *testing.T) { dbDir := t.TempDir() - rs := remote.NewStorage(log.NewNopLogger(), nil, startTime, dbDir, time.Second*30, nil, false) + rs := remote.NewStorage(promslog.NewNopLogger(), nil, startTime, dbDir, time.Second*30, nil, false) defer func() { require.NoError(t, rs.Close()) }() - db, err := Open(log.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) + db, err := Open(promslog.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) require.NoError(t, err) seriesCount := 10 @@ -638,9 +640,11 @@ func Test_ExistingWAL_NextRef(t *testing.T) { require.NoError(t, db.Close()) // Create a new storage and see what nextRef is initialized to. - db, err = Open(log.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) + db, err = Open(promslog.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) require.NoError(t, err) - defer require.NoError(t, db.Close()) + defer func() { + require.NoError(t, db.Close()) + }() require.Equal(t, uint64(seriesCount+histogramCount), db.nextRef.Load(), "nextRef should be equal to the number of series written across the entire WAL") } @@ -932,6 +936,249 @@ func TestDBOutOfOrderTimeWindow(t *testing.T) { } } +type walSample struct { + t int64 + f float64 + h *histogram.Histogram + lbls labels.Labels + ref storage.SeriesRef +} + +func TestDBCreatedTimestampSamplesIngestion(t *testing.T) { + t.Parallel() + + type appendableSample struct { + t int64 + ct int64 + v float64 + lbls labels.Labels + h *histogram.Histogram + expectsError bool + } + + testHistogram := tsdbutil.GenerateTestHistograms(1)[0] + zeroHistogram := &histogram.Histogram{} + + lbls := labelsForTest(t.Name(), 1) + defLbls := labels.New(lbls[0]...) + + testCases := []struct { + name string + inputSamples []appendableSample + expectedSamples []*walSample + expectedSeriesCount int + }{ + { + name: "in order ct+normal sample/floatSamples", + inputSamples: []appendableSample{ + {t: 100, ct: 1, v: 10, lbls: defLbls}, + {t: 101, ct: 1, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, f: 0, lbls: defLbls}, + {t: 100, f: 10, lbls: defLbls}, + {t: 101, f: 10, lbls: defLbls}, + }, + }, + { + name: "CT+float && CT+histogram samples", + inputSamples: []appendableSample{ + { + t: 100, + ct: 30, + v: 20, + lbls: defLbls, + }, + { + t: 300, + ct: 230, + h: testHistogram, + lbls: defLbls, + }, + }, + expectedSamples: []*walSample{ + {t: 30, f: 0, lbls: defLbls}, + {t: 100, f: 20, lbls: defLbls}, + {t: 230, h: zeroHistogram, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 1, + }, + { + name: "CT+float && CT+histogram samples with error", + inputSamples: []appendableSample{ + { + // invalid CT + t: 100, + ct: 100, + v: 10, + lbls: defLbls, + expectsError: true, + }, + { + // invalid CT histogram + t: 300, + ct: 300, + h: testHistogram, + lbls: defLbls, + expectsError: true, + }, + }, + expectedSamples: []*walSample{ + {t: 100, f: 10, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 0, + }, + { + name: "In order ct+normal sample/histogram", + inputSamples: []appendableSample{ + {t: 100, h: testHistogram, ct: 1, lbls: defLbls}, + {t: 101, h: testHistogram, ct: 1, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, h: &histogram.Histogram{}}, + {t: 100, h: testHistogram}, + {t: 101, h: &histogram.Histogram{CounterResetHint: histogram.NotCounterReset}}, + }, + }, + { + name: "ct+normal then OOO sample/float", + inputSamples: []appendableSample{ + {t: 60_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 120_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 180_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 50_000, ct: 40_000, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 40_000, f: 0, lbls: defLbls}, + {t: 50_000, f: 10, lbls: defLbls}, + {t: 60_000, f: 10, lbls: defLbls}, + {t: 120_000, f: 10, lbls: defLbls}, + {t: 180_000, f: 10, lbls: defLbls}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 360_000 + s := createTestAgentDB(t, reg, opts) + app := s.Appender(context.TODO()) + + for _, sample := range tc.inputSamples { + // We supposed to write a Histogram to the WAL + if sample.h != nil { + _, err := app.AppendHistogramCTZeroSample(0, sample.lbls, sample.t, sample.ct, zeroHistogram, nil) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.AppendHistogram(0, sample.lbls, sample.t, sample.h, nil) + require.NoError(t, err) + } else { + // We supposed to write a float sample to the WAL + _, err := app.AppendCTZeroSample(0, sample.lbls, sample.t, sample.ct) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.Append(0, sample.lbls, sample.t, sample.v) + require.NoError(t, err) + } + } + + require.NoError(t, app.Commit()) + // Close the DB to ensure all data is flushed to the WAL + require.NoError(t, s.Close()) + + // Check that we dont have any OOO samples in the WAL by checking metrics + families, err := reg.Gather() + require.NoError(t, err, "failed to gather metrics") + for _, f := range families { + if f.GetName() == "prometheus_agent_out_of_order_samples_total" { + t.Fatalf("unexpected metric %s", f.GetName()) + } + } + + outputSamples := readWALSamples(t, s.wal.Dir()) + + require.Equal(t, len(tc.expectedSamples), len(outputSamples), "Expected %d samples", len(tc.expectedSamples)) + + for i, expectedSample := range tc.expectedSamples { + for _, sample := range outputSamples { + if sample.t == expectedSample.t && sample.lbls.String() == expectedSample.lbls.String() { + if expectedSample.h != nil { + require.Equal(t, expectedSample.h, sample.h, "histogram value mismatch (sample index %d)", i) + } else { + require.Equal(t, expectedSample.f, sample.f, "value mismatch (sample index %d)", i) + } + } + } + } + }) + } +} + +func readWALSamples(t *testing.T, walDir string) []*walSample { + t.Helper() + sr, err := wlog.NewSegmentsReader(walDir) + require.NoError(t, err) + defer func(sr io.ReadCloser) { + err := sr.Close() + require.NoError(t, err) + }(sr) + + r := wlog.NewReader(sr) + dec := record.NewDecoder(labels.NewSymbolTable()) + + var ( + samples []record.RefSample + histograms []record.RefHistogramSample + + lastSeries record.RefSeries + outputSamples = make([]*walSample, 0) + ) + + for r.Next() { + rec := r.Record() + switch dec.Type(rec) { + case record.Series: + series, err := dec.Series(rec, nil) + require.NoError(t, err) + lastSeries = series[0] + case record.Samples: + samples, err = dec.Samples(rec, samples[:0]) + require.NoError(t, err) + for _, s := range samples { + outputSamples = append(outputSamples, &walSample{ + t: s.T, + f: s.V, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + case record.HistogramSamples: + histograms, err = dec.HistogramSamples(rec, histograms[:0]) + require.NoError(t, err) + for _, h := range histograms { + outputSamples = append(outputSamples, &walSample{ + t: h.T, + h: h.H, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + } + } + + return outputSamples +} + func BenchmarkCreateSeries(b *testing.B) { s := createTestAgentDB(b, nil, DefaultOptions()) defer s.Close() diff --git a/tsdb/block.go b/tsdb/block.go index 20d66763c3..bfb04c3f43 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -20,16 +20,17 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "slices" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -282,7 +283,7 @@ func readMetaFile(dir string) (*BlockMeta, int64, error) { return &m, int64(len(b)), nil } -func writeMetaFile(logger log.Logger, dir string, meta *BlockMeta) (int64, error) { +func writeMetaFile(logger *slog.Logger, dir string, meta *BlockMeta) (int64, error) { meta.Version = metaVersion1 // Make any changes to the file appear atomic. @@ -290,7 +291,7 @@ func writeMetaFile(logger log.Logger, dir string, meta *BlockMeta) (int64, error tmp := path + ".tmp" defer func() { if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } }() @@ -336,7 +337,7 @@ type Block struct { indexr IndexReader tombstones tombstones.Reader - logger log.Logger + logger *slog.Logger numBytesChunks int64 numBytesIndex int64 @@ -346,14 +347,14 @@ type Block struct { // OpenBlock opens the block in the directory. It can be passed a chunk pool, which is used // to instantiate chunk structs. -func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, err error) { +func OpenBlock(logger *slog.Logger, dir string, pool chunkenc.Pool) (pb *Block, err error) { return OpenBlockWithOptions(logger, dir, pool, nil, DefaultPostingsForMatchersCacheTTL, DefaultPostingsForMatchersCacheMaxItems, DefaultPostingsForMatchersCacheMaxBytes, DefaultPostingsForMatchersCacheForce) } // OpenBlockWithOptions is like OpenBlock but allows to pass a cache provider and sharding function. -func OpenBlockWithOptions(logger log.Logger, dir string, pool chunkenc.Pool, cache index.ReaderCacheProvider, postingsCacheTTL time.Duration, postingsCacheMaxItems int, postingsCacheMaxBytes int64, postingsCacheForce bool) (pb *Block, err error) { +func OpenBlockWithOptions(logger *slog.Logger, dir string, pool chunkenc.Pool, cache index.ReaderCacheProvider, postingsCacheTTL time.Duration, postingsCacheMaxItems int, postingsCacheMaxBytes int64, postingsCacheForce bool) (pb *Block, err error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } var closers []io.Closer defer func() { diff --git a/tsdb/block_test.go b/tsdb/block_test.go index 0f6d1811ce..a2f64ac4fb 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -22,12 +22,13 @@ import ( "math/rand" "os" "path/filepath" + "slices" "sort" "strconv" "testing" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -46,7 +47,7 @@ import ( func TestBlockMetaMustNeverBeVersion2(t *testing.T) { dir := t.TempDir() - _, err := writeMetaFile(log.NewNopLogger(), dir, &BlockMeta{}) + _, err := writeMetaFile(promslog.NewNopLogger(), dir, &BlockMeta{}) require.NoError(t, err) meta, _, err := readMetaFile(dir) @@ -151,7 +152,7 @@ func TestCorruptedChunk(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Truncate(fi.Size()-1)) }, - iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: segment doesn't include enough bytes to read the chunk - required:26, available:25"), + iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: segment doesn't include enough bytes to read the chunk - required:25, available:24"), }, { name: "checksum mismatch", @@ -169,7 +170,7 @@ func TestCorruptedChunk(t *testing.T) { require.NoError(t, err) require.Equal(t, 1, n) }, - iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: checksum mismatch expected:cfc0526c, actual:34815eae"), + iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: checksum mismatch expected:231bddcf, actual:d85ad10d"), }, } { t.Run(tc.name, func(t *testing.T) { @@ -191,7 +192,7 @@ func TestCorruptedChunk(t *testing.T) { // Check open err. b, err := OpenBlock(nil, blockDir, nil) if tc.openErr != nil { - require.Equal(t, tc.openErr.Error(), err.Error()) + require.EqualError(t, err, tc.openErr.Error()) return } defer func() { require.NoError(t, b.Close()) }() @@ -205,7 +206,7 @@ func TestCorruptedChunk(t *testing.T) { require.True(t, set.Next()) it := set.At().Iterator(nil) require.Equal(t, chunkenc.ValNone, it.Next()) - require.Equal(t, tc.iterErr.Error(), it.Err().Error()) + require.EqualError(t, it.Err(), tc.iterErr.Error()) }) } } @@ -323,6 +324,33 @@ func TestLabelValuesWithMatchers(t *testing.T) { } } +func TestBlockQuerierReturnsSortedLabelValues(t *testing.T) { + tmpdir := t.TempDir() + ctx := context.Background() + + var seriesEntries []storage.Series + for i := 100; i > 0; i-- { + seriesEntries = append(seriesEntries, storage.NewListSeries(labels.FromStrings( + "__name__", fmt.Sprintf("value%d", i), + ), []chunks.Sample{sample{100, 0, nil, nil}})) + } + + blockDir := createBlock(t, tmpdir, seriesEntries) + + // Check open err. + block, err := OpenBlock(nil, blockDir, nil) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, block.Close()) }) + + q, err := newBlockBaseQuerier(block, 0, 100) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, q.Close()) }) + + res, _, err := q.LabelValues(ctx, "__name__", nil) + require.NoError(t, err) + require.True(t, slices.IsSorted(res)) +} + // TestBlockSize ensures that the block size is calculated correctly. func TestBlockSize(t *testing.T) { tmpdir := t.TempDir() @@ -357,7 +385,7 @@ func TestBlockSize(t *testing.T) { require.NoError(t, err) require.Equal(t, expAfterDelete, actAfterDelete, "after a delete reported block size doesn't match actual disk size") - c, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(t, err) blockDirsAfterCompact, err := c.Compact(tmpdir, []string{blockInit.Dir()}, nil) require.NoError(t, err) @@ -609,13 +637,13 @@ func testPostingsForLabelMatching(t *testing.T, offset storage.SeriesRef, setUp // createBlock creates a block with given set of series and returns its dir. func createBlock(tb testing.TB, dir string, series []storage.Series) string { - blockDir, err := CreateBlock(series, dir, 0, log.NewNopLogger()) + blockDir, err := CreateBlock(series, dir, 0, promslog.NewNopLogger()) require.NoError(tb, err) return blockDir } func createBlockFromHead(tb testing.TB, dir string, head *Head) string { - compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{1000000}, nil, nil) + compactor, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{1000000}, nil, nil) require.NoError(tb, err) require.NoError(tb, os.MkdirAll(dir, 0o777)) @@ -629,7 +657,7 @@ func createBlockFromHead(tb testing.TB, dir string, head *Head) string { } func createBlockFromOOOHead(tb testing.TB, dir string, head *OOOCompactionHead) string { - compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{1000000}, nil, nil) + compactor, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{1000000}, nil, nil) require.NoError(tb, err) require.NoError(tb, os.MkdirAll(dir, 0o777)) diff --git a/tsdb/blockwriter.go b/tsdb/blockwriter.go index 232ec2b914..63f82e28df 100644 --- a/tsdb/blockwriter.go +++ b/tsdb/blockwriter.go @@ -17,11 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "os" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/prometheus/model/timestamp" @@ -31,7 +30,7 @@ import ( // BlockWriter is a block writer that allows appending and flushing series to disk. type BlockWriter struct { - logger log.Logger + logger *slog.Logger destinationDir string head *Head @@ -50,7 +49,7 @@ var ErrNoSeriesAppended = errors.New("no series appended, aborting") // contains anything at all. It is the caller's responsibility to // ensure that the resulting blocks do not overlap etc. // Writer ensures the block flush is atomic (via rename). -func NewBlockWriter(logger log.Logger, dir string, blockSize int64) (*BlockWriter, error) { +func NewBlockWriter(logger *slog.Logger, dir string, blockSize int64) (*BlockWriter, error) { w := &BlockWriter{ logger: logger, destinationDir: dir, @@ -95,7 +94,7 @@ func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) { // Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime). // Because of this block intervals are always +1 than the total samples it includes. maxt := w.head.MaxTime() + 1 - level.Info(w.logger).Log("msg", "flushing", "series_count", w.head.NumSeries(), "mint", timestamp.Time(mint), "maxt", timestamp.Time(maxt)) + w.logger.Info("flushing", "series_count", w.head.NumSeries(), "mint", timestamp.Time(mint), "maxt", timestamp.Time(maxt)) compactor, err := NewLeveledCompactor(ctx, nil, @@ -121,7 +120,7 @@ func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) { func (w *BlockWriter) Close() error { defer func() { if err := os.RemoveAll(w.chunkDir); err != nil { - level.Error(w.logger).Log("msg", "error in deleting BlockWriter files", "err", err) + w.logger.Error("error in deleting BlockWriter files", "err", err) } }() return w.head.Close() diff --git a/tsdb/blockwriter_test.go b/tsdb/blockwriter_test.go index d8240b53c6..4ec25df70a 100644 --- a/tsdb/blockwriter_test.go +++ b/tsdb/blockwriter_test.go @@ -19,9 +19,10 @@ import ( "path/filepath" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" ) @@ -29,7 +30,7 @@ import ( func TestBlockWriter(t *testing.T) { ctx := context.Background() outputDir := t.TempDir() - w, err := NewBlockWriter(log.NewNopLogger(), outputDir, DefaultBlockDuration) + w, err := NewBlockWriter(promslog.NewNopLogger(), outputDir, DefaultBlockDuration) require.NoError(t, err) // Add some series. diff --git a/tsdb/chunkenc/bstream.go b/tsdb/chunkenc/bstream.go index 8cc59f3ea7..6e01798f72 100644 --- a/tsdb/chunkenc/bstream.go +++ b/tsdb/chunkenc/bstream.go @@ -86,8 +86,8 @@ func (b *bstream) writeBit(bit bit) { func (b *bstream) writeByte(byt byte) { if b.count == 0 { - b.stream = append(b.stream, 0) - b.count = 8 + b.stream = append(b.stream, byt) + return } i := len(b.stream) - 1 @@ -95,10 +95,8 @@ func (b *bstream) writeByte(byt byte) { // Complete the last byte with the leftmost b.count bits from byt. b.stream[i] |= byt >> (8 - b.count) - b.stream = append(b.stream, 0) - i++ // Write the remainder, if any. - b.stream[i] = byt << b.count + b.stream = append(b.stream, byt< 250 { - break - } a.Append(p.t, p.v) - i++ - j++ } - chunks = append(chunks, c) } - - fmt.Println("num", b.N, "created chunks", len(chunks)) } diff --git a/tsdb/chunkenc/histogram_meta_test.go b/tsdb/chunkenc/histogram_meta_test.go index fdbd1825aa..1774dee867 100644 --- a/tsdb/chunkenc/histogram_meta_test.go +++ b/tsdb/chunkenc/histogram_meta_test.go @@ -14,7 +14,7 @@ // The code in this file was largely written by Damian Gryski as part of // https://github.com/dgryski/go-tsz and published under the license below. // It was modified to accommodate reading from byte slices without modifying -// the underlying bytes, which would panic when reading from mmap'd +// the underlying bytes, which would panic when reading from mmapped // read-only byte slices. package chunkenc diff --git a/tsdb/chunkenc/xor.go b/tsdb/chunkenc/xor.go index 3177762f81..ac75a5994b 100644 --- a/tsdb/chunkenc/xor.go +++ b/tsdb/chunkenc/xor.go @@ -14,7 +14,7 @@ // The code in this file was largely written by Damian Gryski as part of // https://github.com/dgryski/go-tsz and published under the license below. // It was modified to accommodate reading from byte slices without modifying -// the underlying bytes, which would panic when reading from mmap'd +// the underlying bytes, which would panic when reading from mmapped // read-only byte slices. // Copyright (c) 2015,2016 Damian Gryski @@ -191,8 +191,8 @@ func (a *xorAppender) Append(t int64, v float64) { case dod == 0: a.b.writeBit(zero) case bitRange(dod, 14): - a.b.writeBits(0b10, 2) - a.b.writeBits(uint64(dod), 14) + a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. case bitRange(dod, 17): a.b.writeBits(0b110, 3) a.b.writeBits(uint64(dod), 17) diff --git a/tsdb/chunks/chunk_write_queue.go b/tsdb/chunks/chunk_write_queue.go index 86bd60381f..19ff19ce13 100644 --- a/tsdb/chunks/chunk_write_queue.go +++ b/tsdb/chunks/chunk_write_queue.go @@ -24,7 +24,7 @@ import ( ) const ( - // Minimum recorded peak since the last shrinking of chunkWriteQueue.chunkrefMap to shrink it again. + // Minimum recorded peak since the last shrinking of chunkWriteQueue.chunkRefMap to shrink it again. chunkRefMapShrinkThreshold = 1000 // Minimum interval between shrinking of chunkWriteQueue.chunkRefMap. diff --git a/tsdb/compact.go b/tsdb/compact.go index 9b57f28edd..c176c80483 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -19,16 +19,16 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "slices" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "go.uber.org/atomic" "golang.org/x/sync/semaphore" @@ -88,7 +88,7 @@ type Compactor interface { // LeveledCompactor implements the Compactor interface. type LeveledCompactor struct { metrics *CompactorMetrics - logger log.Logger + logger *slog.Logger ranges []int64 chunkPool chunkenc.Pool ctx context.Context @@ -176,7 +176,7 @@ type LeveledCompactorOptions struct { EnableOverlappingCompaction bool } -func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { +func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{ MaxBlockChunkSegmentSize: maxBlockChunkSegmentSize, MergeFunc: mergeFunc, @@ -184,14 +184,14 @@ func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Register }) } -func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { +func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{ MergeFunc: mergeFunc, EnableOverlappingCompaction: true, }) } -func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts LeveledCompactorOptions) (*LeveledCompactor, error) { +func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts LeveledCompactorOptions) (*LeveledCompactor, error) { if len(ranges) == 0 { return nil, fmt.Errorf("at least one range must be provided") } @@ -199,7 +199,7 @@ func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer pool = chunkenc.NewPool() } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } mergeFunc := opts.MergeFunc if mergeFunc == nil { @@ -550,8 +550,8 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, meta := outBlocks[ix].meta if meta.Stats.NumSamples == 0 { - level.Info(c.logger).Log( - "msg", "compact blocks resulted in empty block", + c.logger.Info( + "compact blocks resulted in empty block", "count", len(blocks), "sources", fmt.Sprintf("%v", uids), "duration", time.Since(start), @@ -561,8 +561,8 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, allOutputBlocksAreEmpty = false ulids[ix] = outBlocks[ix].meta.ULID - level.Info(c.logger).Log( - "msg", "compact blocks", + c.logger.Info( + "compact blocks", "count", len(blocks), "mint", meta.MinTime, "maxt", meta.MaxTime, @@ -580,8 +580,8 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, b.meta.Compaction.Deletable = true n, err := writeMetaFile(c.logger, b.dir, &b.meta) if err != nil { - level.Error(c.logger).Log( - "msg", "Failed to write 'Deletable' to meta file after compaction", + c.logger.Error( + "Failed to write 'Deletable' to meta file after compaction", "ulid", b.meta.ULID, ) } @@ -680,12 +680,12 @@ func (c *LeveledCompactor) compactOOO(dest string, oooHead *OOOCompactionHead, s for _, ob := range obs { if ob.tmpDir != "" { if removeErr := os.RemoveAll(ob.tmpDir); removeErr != nil { - level.Error(c.logger).Log("msg", "Failed to remove temp folder after failed compaction", "dir", ob.tmpDir, "err", removeErr.Error()) + c.logger.Error("Failed to remove temp folder after failed compaction", "dir", ob.tmpDir, "err", removeErr.Error()) } } if ob.blockDir != "" { if removeErr := os.RemoveAll(ob.blockDir); removeErr != nil { - level.Error(c.logger).Log("msg", "Failed to remove block folder after failed compaction", "dir", ob.blockDir, "err", removeErr.Error()) + c.logger.Error("Failed to remove block folder after failed compaction", "dir", ob.blockDir, "err", removeErr.Error()) } } } @@ -700,8 +700,8 @@ func (c *LeveledCompactor) compactOOO(dest string, oooHead *OOOCompactionHead, s meta := outBlocks[ix][jx].meta if meta.Stats.NumSamples != 0 { noOOOBlock = false - level.Info(c.logger).Log( - "msg", "compact ooo head", + c.logger.Info( + "compact ooo head", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, @@ -716,8 +716,8 @@ func (c *LeveledCompactor) compactOOO(dest string, oooHead *OOOCompactionHead, s } if noOOOBlock { - level.Info(c.logger).Log( - "msg", "compact ooo head resulted in no blocks", + c.logger.Info( + "compact ooo head resulted in no blocks", "duration", time.Since(start), ) return nil, nil @@ -754,8 +754,8 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b } if meta.Stats.NumSamples == 0 { - level.Info(c.logger).Log( - "msg", "write block resulted in empty block", + c.logger.Info( + "write block resulted in empty block", "mint", meta.MinTime, "maxt", meta.MaxTime, "duration", time.Since(start), @@ -763,8 +763,8 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b return nil, nil } - level.Info(c.logger).Log( - "msg", "write block", + c.logger.Info( + "write block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, @@ -804,7 +804,7 @@ func (c *LeveledCompactor) write(dest string, outBlocks []shardedBlock, blockPop if ob.tmpDir != "" { // RemoveAll returns no error when tmp doesn't exist so it is safe to always run it. if removeErr := os.RemoveAll(ob.tmpDir); removeErr != nil { - level.Error(c.logger).Log("msg", "Failed to remove temp folder after failed compaction", "dir", ob.tmpDir, "err", removeErr.Error()) + c.logger.Error("Failed to remove temp folder after failed compaction", "dir", ob.tmpDir, "err", removeErr.Error()) } } @@ -814,7 +814,7 @@ func (c *LeveledCompactor) write(dest string, outBlocks []shardedBlock, blockPop if err != nil && ob.blockDir != "" { // RemoveAll returns no error when tmp doesn't exist so it is safe to always run it. if removeErr := os.RemoveAll(ob.blockDir); removeErr != nil { - level.Error(c.logger).Log("msg", "Failed to remove block folder after failed compaction", "dir", ob.blockDir, "err", removeErr.Error()) + c.logger.Error("Failed to remove block folder after failed compaction", "dir", ob.blockDir, "err", removeErr.Error()) } } } @@ -939,7 +939,7 @@ func (c *LeveledCompactor) write(dest string, outBlocks []shardedBlock, blockPop return nil } -func debugOutOfOrderChunks(lbls labels.Labels, chks []chunks.Meta, logger log.Logger) { +func debugOutOfOrderChunks(lbls labels.Labels, chks []chunks.Meta, logger *slog.Logger) { if len(chks) <= 1 { return } @@ -955,7 +955,6 @@ func debugOutOfOrderChunks(lbls labels.Labels, chks []chunks.Meta, logger log.Lo // Looks like the chunk is out of order. logValues := []any{ - "msg", "found out-of-order chunk when compacting", "num_chunks_for_series", len(chks), "index", i, "labels", lbls.String(), @@ -983,7 +982,7 @@ func debugOutOfOrderChunks(lbls labels.Labels, chks []chunks.Meta, logger log.Lo ) } - level.Warn(logger).Log(logValues...) + logger.Warn("found out-of-order chunk when compacting", logValues...) } } @@ -992,7 +991,7 @@ func timeFromMillis(ms int64) time.Time { } type BlockPopulator interface { - PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger log.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, concurrencyOpts LeveledCompactorConcurrencyOptions, blocks []BlockReader, minT, maxT int64, outBlocks []shardedBlock, postingsFunc IndexReaderPostingsFunc) error + PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger *slog.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, concurrencyOpts LeveledCompactorConcurrencyOptions, blocks []BlockReader, minT, maxT int64, outBlocks []shardedBlock, postingsFunc IndexReaderPostingsFunc) error } // IndexReaderPostingsFunc is a function to get a sorted posting iterator from a given index reader. @@ -1015,7 +1014,7 @@ type DefaultBlockPopulator struct{} // It expects sorted blocks input by mint. // If there is more than 1 output block, each output block will only contain series that hash into its shard // (based on total number of output blocks). -func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger log.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, concurrencyOpts LeveledCompactorConcurrencyOptions, blocks []BlockReader, minT, maxT int64, outBlocks []shardedBlock, postingsFunc IndexReaderPostingsFunc) (err error) { +func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger *slog.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, concurrencyOpts LeveledCompactorConcurrencyOptions, blocks []BlockReader, minT, maxT int64, outBlocks []shardedBlock, postingsFunc IndexReaderPostingsFunc) (err error) { if len(blocks) == 0 { return errors.New("cannot populate block(s) from no readers") } @@ -1049,7 +1048,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa if i > 0 && b.Meta().MinTime < globalMaxt { metrics.OverlappingBlocks.Inc() overlapping = true - level.Info(logger).Log("msg", "Found overlapping blocks during compaction") + logger.Info("Found overlapping blocks during compaction") } if b.Meta().MaxTime > globalMaxt { globalMaxt = b.Meta().MaxTime @@ -1319,7 +1318,7 @@ func populateSymbols(ctx context.Context, mergeFunc storage.VerticalChunkSeriesM } // Returns opened blocks, and blocks that should be closed (also returned in case of error). -func openBlocksForCompaction(dirs []string, open []*Block, logger log.Logger, pool chunkenc.Pool, concurrency int) (blocks, blocksToClose []*Block, _ error) { +func openBlocksForCompaction(dirs []string, open []*Block, logger *slog.Logger, pool chunkenc.Pool, concurrency int) (blocks, blocksToClose []*Block, _ error) { blocks = make([]*Block, 0, len(dirs)) blocksToClose = make([]*Block, 0, len(dirs)) diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index 1e85aec88a..f66ec062ef 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -29,9 +29,9 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "golang.org/x/sync/semaphore" @@ -436,7 +436,7 @@ func TestRangeWithFailedCompactionWontGetSelected(t *testing.T) { } func TestCompactionFailWillCleanUpTempDir(t *testing.T) { - compactor, err := NewLeveledCompactorWithChunkSize(context.Background(), nil, log.NewNopLogger(), []int64{ + compactor, err := NewLeveledCompactorWithChunkSize(context.Background(), nil, promslog.NewNopLogger(), []int64{ 20, 60, 240, @@ -537,7 +537,7 @@ func TestCompaction_CompactWithSplitting(t *testing.T) { for _, shardCount := range shardCounts { t.Run(fmt.Sprintf("series=%d, shards=%d", series, shardCount), func(t *testing.T) { - c, err := NewLeveledCompactorWithChunkSize(ctx, nil, log.NewNopLogger(), []int64{0}, nil, chunks.DefaultChunkSegmentSize, nil) + c, err := NewLeveledCompactorWithChunkSize(ctx, nil, promslog.NewNopLogger(), []int64{0}, nil, chunks.DefaultChunkSegmentSize, nil) require.NoError(t, err) blockIDs, err := c.CompactWithSplitting(dir, blockDirs, openBlocks, shardCount) @@ -572,7 +572,7 @@ func TestCompaction_CompactWithSplitting(t *testing.T) { // Our splitting compaction preserves it too. seriesSymbols[""] = struct{}{} - block, err := OpenBlock(log.NewNopLogger(), filepath.Join(dir, blockID.String()), nil) + block, err := OpenBlock(promslog.NewNopLogger(), filepath.Join(dir, blockID.String()), nil) require.NoError(t, err) defer func() { @@ -658,7 +658,7 @@ func TestCompaction_CompactEmptyBlocks(t *testing.T) { require.NoError(t, os.Mkdir(bdir, 0o777)) require.NoError(t, os.Mkdir(chunkDir(bdir), 0o777)) - _, err := writeMetaFile(log.NewNopLogger(), bdir, m) + _, err := writeMetaFile(promslog.NewNopLogger(), bdir, m) require.NoError(t, err) iw, err := index.NewWriter(context.Background(), filepath.Join(bdir, indexFilename)) @@ -671,7 +671,7 @@ func TestCompaction_CompactEmptyBlocks(t *testing.T) { blockDirs = append(blockDirs, bdir) } - c, err := NewLeveledCompactorWithChunkSize(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil, chunks.DefaultChunkSegmentSize, nil) + c, err := NewLeveledCompactorWithChunkSize(context.Background(), nil, promslog.NewNopLogger(), []int64{0}, nil, chunks.DefaultChunkSegmentSize, nil) require.NoError(t, err) blockIDs, err := c.CompactWithSplitting(dir, blockDirs, nil, 5) @@ -1247,8 +1247,7 @@ func TestCompaction_populateBlock(t *testing.T) { } err = blockPopulator.PopulateBlock(c.ctx, c.metrics, c.logger, c.chunkPool, c.mergeFunc, c.concurrencyOpts, blocks, meta.MinTime, meta.MaxTime, []shardedBlock{ob}, irPostingsFunc) if tc.expErr != nil { - require.Error(t, err) - require.Equal(t, tc.expErr.Error(), err.Error()) + require.EqualError(t, err, tc.expErr.Error()) return } require.NoError(t, err) @@ -1365,7 +1364,7 @@ func BenchmarkCompaction(b *testing.B) { blockDirs = append(blockDirs, block.Dir()) } - c, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(b, err) b.ResetTimer() @@ -1521,7 +1520,7 @@ func TestCancelCompactions(t *testing.T) { // Measure the compaction time without interrupting it. var timeCompactionUninterrupted time.Duration { - db, err := open(tmpdir, log.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) + db, err := open(tmpdir, promslog.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 3, "initial block count mismatch") require.Equal(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.Ran), "initial compaction counter mismatch") @@ -1540,7 +1539,7 @@ func TestCancelCompactions(t *testing.T) { } // Measure the compaction time when closing the db in the middle of compaction. { - db, err := open(tmpdirCopy, log.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) + db, err := open(tmpdirCopy, promslog.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 3, "initial block count mismatch") require.Equal(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.Ran), "initial compaction counter mismatch") @@ -1561,7 +1560,7 @@ func TestCancelCompactions(t *testing.T) { // This checks that the `context.Canceled` error is properly checked at all levels: // - tsdb_errors.NewMulti() should have the Is() method implemented for correct checks. // - callers should check with errors.Is() instead of ==. - readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", log.NewNopLogger()) + readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", promslog.NewNopLogger()) require.NoError(t, err) blocks, err := readOnlyDB.Blocks() require.NoError(t, err) @@ -1573,7 +1572,7 @@ func TestCancelCompactions(t *testing.T) { } // TestDeleteCompactionBlockAfterFailedReload ensures that a failed reloadBlocks immediately after a compaction -// deletes the resulting block to avoid creatings blocks with the same time range. +// deletes the resulting block to avoid creating blocks with the same time range. func TestDeleteCompactionBlockAfterFailedReload(t *testing.T) { tests := map[string]func(*DB) int{ "Test Head Compaction": func(db *DB) int { @@ -1660,7 +1659,7 @@ func TestOpenBlocksForCompaction(t *testing.T) { // Open subset of blocks first. const blocksToOpen = 2 - opened, toClose, err := openBlocksForCompaction(blockDirs[:blocksToOpen], nil, log.NewNopLogger(), nil, 10) + opened, toClose, err := openBlocksForCompaction(blockDirs[:blocksToOpen], nil, promslog.NewNopLogger(), nil, 10) for _, b := range toClose { defer func(b *Block) { require.NoError(t, b.Close()) }(b) } @@ -1670,7 +1669,7 @@ func TestOpenBlocksForCompaction(t *testing.T) { checkBlocks(t, toClose, blockDirs[:blocksToOpen]...) // Open all blocks, but provide previously opened blocks. - opened2, toClose2, err := openBlocksForCompaction(blockDirs, opened, log.NewNopLogger(), nil, 10) + opened2, toClose2, err := openBlocksForCompaction(blockDirs, opened, promslog.NewNopLogger(), nil, 10) for _, b := range toClose2 { defer func(b *Block) { require.NoError(t, b.Close()) }(b) } @@ -1696,11 +1695,11 @@ func TestOpenBlocksForCompactionErrorsNoMeta(t *testing.T) { } // open block[0] - b0, err := OpenBlock(log.NewNopLogger(), blockDirs[0], nil) + b0, err := OpenBlock(promslog.NewNopLogger(), blockDirs[0], nil) require.NoError(t, err) defer func() { require.NoError(t, b0.Close()) }() - _, toClose, err := openBlocksForCompaction(blockDirs, []*Block{b0}, log.NewNopLogger(), nil, 10) + _, toClose, err := openBlocksForCompaction(blockDirs, []*Block{b0}, promslog.NewNopLogger(), nil, 10) require.Error(t, err) // We didn't get to opening more blocks, because we found invalid dir, so there is nothing to close. @@ -1723,7 +1722,7 @@ func TestOpenBlocksForCompactionErrorsMissingIndex(t *testing.T) { } // open block[1] - b1, err := OpenBlock(log.NewNopLogger(), blockDirs[1], nil) + b1, err := OpenBlock(promslog.NewNopLogger(), blockDirs[1], nil) require.NoError(t, err) defer func() { require.NoError(t, b1.Close()) }() @@ -1733,7 +1732,7 @@ func TestOpenBlocksForCompactionErrorsMissingIndex(t *testing.T) { // Block[2] will be opened correctly. // Block[3] is invalid and will cause error. // Block[4] will not be opened at all. - opened, toClose, err := openBlocksForCompaction(blockDirs, []*Block{b1}, log.NewNopLogger(), nil, 1) + opened, toClose, err := openBlocksForCompaction(blockDirs, []*Block{b1}, promslog.NewNopLogger(), nil, 1) for _, b := range toClose { defer func(b *Block) { require.NoError(t, b.Close()) }(b) } @@ -2540,7 +2539,7 @@ func TestCompactEmptyResultBlockWithTombstone(t *testing.T) { err = block.Delete(ctx, 0, 10, labels.MustNewMatcher(labels.MatchEqual, defaultLabelName, "0")) require.NoError(t, err) - c, err := NewLeveledCompactor(ctx, nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(ctx, nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(t, err) ulids, err := c.Compact(tmpdir, []string{blockDir}, []*Block{block}) @@ -2736,7 +2735,7 @@ func TestDelayedCompactionDoesNotBlockUnrelatedOps(t *testing.T) { t.Parallel() tmpdir := t.TempDir() - // Some blocks that need compation are present. + // Some blocks that need compaction are present. createBlock(t, tmpdir, genSeries(1, 1, 0, 100)) createBlock(t, tmpdir, genSeries(1, 1, 100, 200)) createBlock(t, tmpdir, genSeries(1, 1, 200, 300)) @@ -2744,7 +2743,7 @@ func TestDelayedCompactionDoesNotBlockUnrelatedOps(t *testing.T) { options := DefaultOptions() // This will make the test timeout if compaction really waits for it. options.CompactionDelay = time.Hour - db, err := open(tmpdir, log.NewNopLogger(), nil, options, []int64{10, 200}, nil) + db, err := open(tmpdir, promslog.NewNopLogger(), nil, options, []int64{10, 200}, nil) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) diff --git a/tsdb/db.go b/tsdb/db.go index 9f40a2c499..4d721dca05 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "io/fs" + "log/slog" "math" "math/rand" "os" @@ -29,10 +30,9 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "go.uber.org/atomic" "golang.org/x/sync/errgroup" @@ -274,7 +274,7 @@ type Options struct { BlockChunkQuerierFunc BlockChunkQuerierFunc } -type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) +type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) type BlocksToDeleteFunc func(blocks []*Block) map[ulid.ULID]struct{} @@ -288,7 +288,7 @@ type DB struct { dir string locker *tsdbutil.DirLocker - logger log.Logger + logger *slog.Logger metrics *dbMetrics opts *Options chunkPool chunkenc.Pool @@ -479,7 +479,7 @@ var ErrClosed = errors.New("db already closed") // Current implementation doesn't support concurrency so // all API calls should happen in the same go routine. type DBReadOnly struct { - logger log.Logger + logger *slog.Logger dir string sandboxDir string closers []io.Closer @@ -487,7 +487,7 @@ type DBReadOnly struct { } // OpenDBReadOnly opens DB in the given directory for read only operations. -func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, error) { +func OpenDBReadOnly(dir, sandboxDirRoot string, l *slog.Logger) (*DBReadOnly, error) { if _, err := os.Stat(dir); err != nil { return nil, fmt.Errorf("opening the db dir: %w", err) } @@ -501,7 +501,7 @@ func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, erro } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } return &DBReadOnly{ @@ -700,7 +700,7 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { if len(corrupted) > 0 { for _, b := range loadable { if err := b.Close(); err != nil { - level.Warn(db.logger).Log("msg", "Closing block failed", "err", err, "block", b) + db.logger.Warn("Closing block failed", "err", err, "block", b) } } errs := tsdb_errors.NewMulti() @@ -732,7 +732,7 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { blockMetas = append(blockMetas, b.Meta()) } if overlaps := OverlappingBlocks(blockMetas); len(overlaps) > 0 { - level.Warn(db.logger).Log("msg", "Overlapping blocks found during opening", "detail", overlaps.String()) + db.logger.Warn("Overlapping blocks found during opening", "detail", overlaps.String()) } // Close all previously open readers and add the new ones to the cache. @@ -810,7 +810,7 @@ func (db *DBReadOnly) Close() error { defer func() { // Delete the temporary sandbox directory that was created when opening the DB. if err := os.RemoveAll(db.sandboxDir); err != nil { - level.Error(db.logger).Log("msg", "delete sandbox dir", "err", err) + db.logger.Error("delete sandbox dir", "err", err) } }() select { @@ -824,7 +824,7 @@ func (db *DBReadOnly) Close() error { } // Open returns a new DB in the given directory. If options are empty, DefaultOptions will be used. -func Open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, stats *DBStats) (db *DB, err error) { +func Open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, stats *DBStats) (db *DB, err error) { var rngs []int64 opts, rngs = validateOpts(opts, nil) @@ -877,12 +877,12 @@ func validateOpts(opts *Options, rngs []int64) (*Options, []int64) { // open returns a new DB in the given directory. // It initializes the lockfile, WAL, compactor, and Head (by replaying the WAL), and runs the database. // It is not safe to open more than one DB in the same directory. -func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs []int64, stats *DBStats) (_ *DB, returnedErr error) { +func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rngs []int64, stats *DBStats) (_ *DB, returnedErr error) { if err := os.MkdirAll(dir, 0o777); err != nil { return nil, err } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if stats == nil { stats = NewDBStats() @@ -1067,17 +1067,17 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs db.head.metrics.walCorruptionsTotal.Inc() var e *errLoadWbl if errors.As(initErr, &e) { - level.Warn(db.logger).Log("msg", "Encountered WBL read error, attempting repair", "err", initErr) + db.logger.Warn("Encountered WBL read error, attempting repair", "err", initErr) if err := wbl.Repair(e.err); err != nil { return nil, fmt.Errorf("repair corrupted WBL: %w", err) } - level.Info(db.logger).Log("msg", "Successfully repaired WBL") + db.logger.Info("Successfully repaired WBL") } else { - level.Warn(db.logger).Log("msg", "Encountered WAL read error, attempting repair", "err", initErr) + db.logger.Warn("Encountered WAL read error, attempting repair", "err", initErr) if err := wal.Repair(initErr); err != nil { return nil, fmt.Errorf("repair corrupted WAL: %w", err) } - level.Info(db.logger).Log("msg", "Successfully repaired WAL") + db.logger.Info("Successfully repaired WAL") } } @@ -1095,7 +1095,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs return db, nil } -func removeBestEffortTmpDirs(l log.Logger, dir string) error { +func removeBestEffortTmpDirs(l *slog.Logger, dir string) error { files, err := os.ReadDir(dir) if os.IsNotExist(err) { return nil @@ -1106,10 +1106,10 @@ func removeBestEffortTmpDirs(l log.Logger, dir string) error { for _, f := range files { if isTmpDir(f) { if err := os.RemoveAll(filepath.Join(dir, f.Name())); err != nil { - level.Error(l).Log("msg", "failed to delete tmp block dir", "dir", filepath.Join(dir, f.Name()), "err", err) + l.Error("failed to delete tmp block dir", "dir", filepath.Join(dir, f.Name()), "err", err) continue } - level.Info(l).Log("msg", "Found and deleted tmp block dir", "dir", filepath.Join(dir, f.Name())) + l.Info("Found and deleted tmp block dir", "dir", filepath.Join(dir, f.Name())) } } return nil @@ -1147,7 +1147,7 @@ func (db *DB) run(ctx context.Context) { case <-time.After(1 * time.Minute): db.cmtx.Lock() if err := db.reloadBlocks(); err != nil { - level.Error(db.logger).Log("msg", "reloadBlocks", "err", err) + db.logger.Error("reloadBlocks", "err", err) } db.cmtx.Unlock() @@ -1163,7 +1163,7 @@ func (db *DB) run(ctx context.Context) { db.autoCompactMtx.Lock() if db.autoCompact { if err := db.Compact(ctx); err != nil { - level.Error(db.logger).Log("msg", "compaction failed", "err", err) + db.logger.Error("compaction failed", "err", err) backoff = exponential(backoff, 1*time.Second, 1*time.Minute) } else { backoff = 0 @@ -1377,8 +1377,8 @@ func (db *DB) Compact(ctx context.Context) (returnErr error) { compactionDuration := time.Since(start) if compactionDuration.Milliseconds() > db.head.chunkRange.Load() { - level.Warn(db.logger).Log( - "msg", "Head compaction took longer than the block time range, compactions are falling behind and won't be able to catch up", + db.logger.Warn( + "Head compaction took longer than the block time range, compactions are falling behind and won't be able to catch up", "duration", compactionDuration.String(), "block_range", db.head.chunkRange.Load(), ) @@ -1514,15 +1514,15 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID } if len(ulids) == 0 { - level.Info(db.logger).Log( - "msg", "compact ooo head resulted in no blocks", + db.logger.Info( + "compact ooo head resulted in no blocks", "duration", time.Since(start), ) return nil, nil } - level.Info(db.logger).Log( - "msg", "out-of-order compaction completed", + db.logger.Info( + "out-of-order compaction completed", "duration", time.Since(start), "ulids", fmt.Sprintf("%v", ulids), ) @@ -1567,7 +1567,7 @@ func (db *DB) compactBlocks() (err error) { // long enough that we end up with a HEAD block that needs to be written. // Check if that's the case and stop compactions early. if db.head.compactable() && !db.waitingForCompactionDelay() { - level.Warn(db.logger).Log("msg", "aborting block compactions to persit the head block") + db.logger.Warn("aborting block compactions to persit the head block") return nil } @@ -1663,7 +1663,7 @@ func (db *DB) reloadBlocks() (err error) { for _, b := range block.Meta().Compaction.Parents { if _, ok := corrupted[b.ULID]; ok { delete(corrupted, b.ULID) - level.Warn(db.logger).Log("msg", "Found corrupted block, but replaced by compacted one so it's safe to delete. This should not happen with atomic deletes.", "block", b.ULID) + db.logger.Warn("Found corrupted block, but replaced by compacted one so it's safe to delete. This should not happen with atomic deletes.", "block", b.ULID) } deletable[b.ULID] = nil } @@ -1725,7 +1725,7 @@ func (db *DB) reloadBlocks() (err error) { blockMetas = append(blockMetas, b.Meta()) } if overlaps := OverlappingBlocks(blockMetas); len(overlaps) > 0 { - level.Warn(db.logger).Log("msg", "Overlapping blocks found during reloadBlocks", "detail", overlaps.String()) + db.logger.Warn("Overlapping blocks found during reloadBlocks", "detail", overlaps.String()) } } @@ -1741,7 +1741,7 @@ func (db *DB) reloadBlocks() (err error) { return nil } -func openBlocks(l log.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool, cache *hashcache.SeriesHashCache, postingsCacheTTL time.Duration, postingsCacheMaxItems int, postingsCacheMaxBytes int64, postingsCacheForce bool) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { +func openBlocks(l *slog.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool, cache *hashcache.SeriesHashCache, postingsCacheTTL time.Duration, postingsCacheMaxItems int, postingsCacheMaxBytes int64, postingsCacheForce bool) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { bDirs, err := blockDirs(dir) if err != nil { return nil, nil, fmt.Errorf("find blocks: %w", err) @@ -1751,7 +1751,7 @@ func openBlocks(l log.Logger, dir string, loaded []*Block, chunkPool chunkenc.Po for _, bDir := range bDirs { meta, _, err := readMetaFile(bDir) if err != nil { - level.Error(l).Log("msg", "Failed to read meta.json for a block during reloadBlocks. Skipping", "dir", bDir, "err", err) + l.Error("Failed to read meta.json for a block during reloadBlocks. Skipping", "dir", bDir, "err", err) continue } @@ -1873,7 +1873,7 @@ func (db *DB) deleteBlocks(blocks map[ulid.ULID]*Block) error { for ulid, block := range blocks { if block != nil { if err := block.Close(); err != nil { - level.Warn(db.logger).Log("msg", "Closing block failed", "err", err, "block", ulid) + db.logger.Warn("Closing block failed", "err", err, "block", ulid) } } @@ -1894,7 +1894,7 @@ func (db *DB) deleteBlocks(blocks map[ulid.ULID]*Block) error { if err := os.RemoveAll(tmpToDelete); err != nil { return fmt.Errorf("delete obsolete block %s: %w", ulid, err) } - level.Info(db.logger).Log("msg", "Deleting obsolete block", "block", ulid) + db.logger.Info("Deleting obsolete block", "block", ulid) } return nil @@ -2062,7 +2062,7 @@ func (db *DB) DisableCompactions() { defer db.autoCompactMtx.Unlock() db.autoCompact = false - level.Info(db.logger).Log("msg", "Compactions disabled") + db.logger.Info("Compactions disabled") } // EnableCompactions enables auto compactions. @@ -2071,7 +2071,7 @@ func (db *DB) EnableCompactions() { defer db.autoCompactMtx.Unlock() db.autoCompact = true - level.Info(db.logger).Log("msg", "Compactions enabled") + db.logger.Info("Compactions enabled") } func (db *DB) generateCompactionDelay() time.Duration { @@ -2101,7 +2101,7 @@ func (db *DB) Snapshot(dir string, withHead bool) error { defer db.mtx.RUnlock() for _, b := range db.blocks { - level.Info(db.logger).Log("msg", "Snapshotting block", "block", b) + db.logger.Info("Snapshotting block", "block", b) if err := b.Snapshot(dir); err != nil { return fmt.Errorf("error snapshotting block: %s: %w", b.Dir(), err) @@ -2372,7 +2372,7 @@ func (db *DB) CleanTombstones() (err error) { for _, uid := range uids { dir := filepath.Join(db.Dir(), uid.String()) if err := os.RemoveAll(dir); err != nil { - level.Error(db.logger).Log("msg", "failed to delete block after failed `CleanTombstones`", "dir", dir, "err", err) + db.logger.Error("failed to delete block after failed `CleanTombstones`", "dir", dir, "err", err) } } if err != nil { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 9a5dc0e2af..ed8dfa0014 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -15,13 +15,17 @@ package tsdb import ( "bufio" + "bytes" "context" "encoding/binary" "flag" "fmt" "hash/crc32" + "log/slog" "math" "math/rand" + "net/http" + "net/http/httptest" "os" "path" "path/filepath" @@ -32,14 +36,20 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "go.uber.org/goleak" + "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/storage/remote" + + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -235,8 +245,8 @@ func TestDataAvailableOnlyAfterCommit(t *testing.T) { func TestNoPanicAfterWALCorruption(t *testing.T) { db := openTestDB(t, &Options{WALSegmentSize: 32 * 1024}, nil) - // Append until the first mmaped head chunk. - // This is to ensure that all samples can be read from the mmaped chunks when the WAL is corrupted. + // Append until the first mmapped head chunk. + // This is to ensure that all samples can be read from the mmapped chunks when the WAL is corrupted. var expSamples []chunks.Sample var maxt int64 ctx := context.Background() @@ -255,7 +265,7 @@ func TestNoPanicAfterWALCorruption(t *testing.T) { // Corrupt the WAL after the first sample of the series so that it has at least one sample and // it is not garbage collected. - // The repair deletes all WAL records after the corrupted record and these are read from the mmaped chunk. + // The repair deletes all WAL records after the corrupted record and these are read from the mmapped chunk. { walFiles, err := os.ReadDir(path.Join(db.Dir(), "wal")) require.NoError(t, err) @@ -1130,7 +1140,7 @@ func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBefore require.NoError(t, db.Close()) // Reopen the DB, replaying the WAL. - reopenDB, err := Open(db.Dir(), log.NewLogfmtLogger(os.Stderr), nil, nil, nil) + reopenDB, err := Open(db.Dir(), promslog.New(&promslog.Config{}), nil, nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, reopenDB.Close()) @@ -1599,7 +1609,7 @@ func TestSizeRetention(t *testing.T) { // Create a WAL checkpoint, and compare sizes. first, last, err := wlog.Segments(db.Head().wal.Dir()) require.NoError(t, err) - _, err = wlog.Checkpoint(log.NewNopLogger(), db.Head().wal, first, last-1, func(x chunks.HeadSeriesRef) bool { return false }, 0) + _, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(x chunks.HeadSeriesRef) bool { return false }, 0) require.NoError(t, err) blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. walSize, err = db.Head().wal.Size() @@ -2340,7 +2350,7 @@ func TestCorrectNumTombstones(t *testing.T) { // This ensures that a snapshot that includes the head and creates a block with a custom time range // will not overlap with the first block created by the next compaction. func TestBlockRanges(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger := promslog.New(&promslog.Config{}) ctx := context.Background() dir := t.TempDir() @@ -2425,7 +2435,7 @@ func TestBlockRanges(t *testing.T) { func TestDBReadOnly(t *testing.T) { var ( dbDir string - logger = log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger = promslog.New(&promslog.Config{}) expBlocks []*Block expBlock *Block expSeries map[string][]chunks.Sample @@ -2543,7 +2553,7 @@ func TestDBReadOnly(t *testing.T) { // all api methods return an ErrClosed. func TestDBReadOnlyClosing(t *testing.T) { sandboxDir := t.TempDir() - db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) + db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, promslog.New(&promslog.Config{})) require.NoError(t, err) // The sandboxDir was there. require.DirExists(t, db.sandboxDir) @@ -2560,7 +2570,7 @@ func TestDBReadOnlyClosing(t *testing.T) { func TestDBReadOnly_FlushWAL(t *testing.T) { var ( dbDir string - logger = log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger = promslog.New(&promslog.Config{}) err error maxt int ctx = context.Background() @@ -2640,7 +2650,7 @@ func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { spinUpQuerierAndCheck := func(dir, sandboxDir string, chunksCount int) { dBDirHash := dirHash(dir) - // Bootsrap a RO db from the same dir and set up a querier. + // Bootstrap a RO db from the same dir and set up a querier. dbReadOnly, err := OpenDBReadOnly(dir, sandboxDir, nil) require.NoError(t, err) require.Equal(t, chunksCount, countChunks(dir)) @@ -2659,7 +2669,7 @@ func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { require.NoError(t, db.Close()) }() - // Append until the first mmaped head chunk. + // Append until the first mmapped head chunk. for i := 0; i < 121; i++ { app := db.Appender(context.Background()) _, err := app.Append(0, labels.FromStrings("foo", "bar"), int64(i), 0) @@ -3105,7 +3115,7 @@ func TestCompactHead(t *testing.T) { WALCompression: wlog.CompressionSnappy, } - db, err := Open(dbDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err := Open(dbDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) ctx := context.Background() app := db.Appender(ctx) @@ -3126,7 +3136,7 @@ func TestCompactHead(t *testing.T) { // Delete everything but the new block and // reopen the db to query it to ensure it includes the head data. require.NoError(t, deleteNonBlocks(db.Dir())) - db, err = Open(dbDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err = Open(dbDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 1) require.Equal(t, int64(maxt), db.Head().MinTime()) @@ -3153,7 +3163,7 @@ func TestCompactHead(t *testing.T) { // TestCompactHeadWithDeletion tests https://github.com/prometheus/prometheus/issues/11585. func TestCompactHeadWithDeletion(t *testing.T) { - db, err := Open(t.TempDir(), log.NewNopLogger(), prometheus.NewRegistry(), nil, nil) + db, err := Open(t.TempDir(), promslog.NewNopLogger(), prometheus.NewRegistry(), nil, nil) require.NoError(t, err) ctx := context.Background() @@ -3266,7 +3276,7 @@ func TestOpen_VariousBlockStates(t *testing.T) { // Regression test: Already removed parent can be still in list, which was causing Open errors. m.Compaction.Parents = append(m.Compaction.Parents, BlockDesc{ULID: ulid.MustParse(filepath.Base(compacted))}) m.Compaction.Parents = append(m.Compaction.Parents, BlockDesc{ULID: ulid.MustParse(filepath.Base(compacted))}) - _, err = writeMetaFile(log.NewLogfmtLogger(os.Stderr), dir, m) + _, err = writeMetaFile(promslog.New(&promslog.Config{}), dir, m) require.NoError(t, err) } tmpCheckpointDir := path.Join(tmpDir, "wal/checkpoint.00000001.tmp") @@ -3278,7 +3288,7 @@ func TestOpen_VariousBlockStates(t *testing.T) { opts := DefaultOptions() opts.RetentionDuration = 0 - db, err := Open(tmpDir, log.NewLogfmtLogger(os.Stderr), nil, opts, nil) + db, err := Open(tmpDir, promslog.New(&promslog.Config{}), nil, opts, nil) require.NoError(t, err) loadedBlocks := db.Blocks() @@ -3322,7 +3332,7 @@ func TestOneCheckpointPerCompactCall(t *testing.T) { tmpDir := t.TempDir() ctx := context.Background() - db, err := Open(tmpDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err := Open(tmpDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, db.Close()) @@ -3384,7 +3394,7 @@ func TestOneCheckpointPerCompactCall(t *testing.T) { createBlock(t, db.dir, genSeries(1, 1, newBlockMint, newBlockMaxt)) - db, err = Open(db.dir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err = Open(db.dir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) db.DisableCompactions() @@ -3433,7 +3443,7 @@ func TestNoPanicOnTSDBOpenError(t *testing.T) { tmpdir := t.TempDir() // Taking the lock will cause a TSDB startup error. - l, err := tsdbutil.NewDirLocker(tmpdir, "tsdb", log.NewNopLogger(), nil) + l, err := tsdbutil.NewDirLocker(tmpdir, "tsdb", promslog.NewNopLogger(), nil) require.NoError(t, err) require.NoError(t, l.Lock()) @@ -4588,7 +4598,7 @@ func TestMetadataCheckpointingOnlyKeepsLatestEntry(t *testing.T) { keep := func(id chunks.HeadSeriesRef) bool { return id != 3 } - _, err = wlog.Checkpoint(log.NewNopLogger(), w, first, last-1, keep, 0) + _, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0) require.NoError(t, err) // Confirm there's been a checkpoint. @@ -5613,7 +5623,7 @@ func testQuerierOOOQuery(t *testing.T, }, }, { - name: "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query inorder contain ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 5, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5626,7 +5636,7 @@ func testQuerierOOOQuery(t *testing.T, }, { minT: minutes(101), - maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk. + maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmapped OOO chunk and fit inside the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5639,7 +5649,7 @@ func testQuerierOOOQuery(t *testing.T, }, }, { - name: "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query overlapping inorder and ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 30, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5652,7 +5662,7 @@ func testQuerierOOOQuery(t *testing.T, }, { minT: minutes(101), - maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk. + maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmapped OOO chunk and overlap the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5944,7 +5954,7 @@ func testChunkQuerierOOOQuery(t *testing.T, }, }, { - name: "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query inorder contain ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 5, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5957,7 +5967,7 @@ func testChunkQuerierOOOQuery(t *testing.T, }, { minT: minutes(101), - maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk. + maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmapped OOO chunk and fit inside the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5970,7 +5980,7 @@ func testChunkQuerierOOOQuery(t *testing.T, }, }, { - name: "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query overlapping inorder and ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 30, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5983,7 +5993,7 @@ func testChunkQuerierOOOQuery(t *testing.T, }, { minT: minutes(101), - maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk. + maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmapped OOO chunk and overlap the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -6607,7 +6617,7 @@ func testWBLAndMmapReplay(t *testing.T, scenario sampleTypeScenario) { resetMmapToOriginal() // We neet to reset because new duplicate chunks can be written above. // Removing m-map markers in WBL by rewriting it. - newWbl, err := wlog.New(log.NewNopLogger(), nil, filepath.Join(t.TempDir(), "new_wbl"), wlog.CompressionNone) + newWbl, err := wlog.New(promslog.NewNopLogger(), nil, filepath.Join(t.TempDir(), "new_wbl"), wlog.CompressionNone) require.NoError(t, err) sr, err := wlog.NewSegmentsReader(originalWblDir) require.NoError(t, err) @@ -8749,7 +8759,7 @@ func TestAbortBlockCompactions(t *testing.T) { defer func() { require.NoError(t, db.Close()) }() - // It should NOT be compactible at the beginning of the test + // It should NOT be compactable at the beginning of the test require.False(t, db.head.compactable(), "head should NOT be compactable") // Track the number of compactions run inside db.compactBlocks() @@ -8759,7 +8769,7 @@ func TestAbortBlockCompactions(t *testing.T) { db.compactor = &mockCompactorFn{ planFn: func() ([]string, error) { // On every Plan() run increment compactions. After 4 compactions - // update HEAD to make it compactible to force an exit from db.compactBlocks() loop. + // update HEAD to make it compactable to force an exit from db.compactBlocks() loop. compactions++ if compactions > 3 { chunkRange := db.head.chunkRange.Load() @@ -8788,7 +8798,7 @@ func TestNewCompactorFunc(t *testing.T) { opts := DefaultOptions() block1 := ulid.MustNew(1, nil) block2 := ulid.MustNew(2, nil) - opts.NewCompactorFunc = func(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) { + opts.NewCompactorFunc = func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) { return &mockCompactorFn{ planFn: func() ([]string, error) { return []string{block1.String(), block2.String()}, nil @@ -8970,3 +8980,110 @@ func TestGenerateCompactionDelay(t *testing.T) { assertDelay(db.generateCompactionDelay()) } } + +type blockedResponseRecorder struct { + r *httptest.ResponseRecorder + + // writeblocked is used to block writing until the test wants it to resume. + writeBlocked chan struct{} + // writeStarted is closed by blockedResponseRecorder to signal that writing has started. + writeStarted chan struct{} +} + +func (br *blockedResponseRecorder) Write(buf []byte) (int, error) { + select { + case <-br.writeStarted: + default: + close(br.writeStarted) + } + + <-br.writeBlocked + return br.r.Write(buf) +} + +func (br *blockedResponseRecorder) Header() http.Header { return br.r.Header() } + +func (br *blockedResponseRecorder) WriteHeader(code int) { br.r.WriteHeader(code) } + +func (br *blockedResponseRecorder) Flush() { br.r.Flush() } + +// TestBlockClosingBlockedDuringRemoteRead ensures that a TSDB Block is not closed while it is being queried +// through remote read. This is a regression test for https://github.com/prometheus/prometheus/issues/14422. +// TODO: Ideally, this should reside in storage/remote/read_handler_test.go once the necessary TSDB utils are accessible there. +func TestBlockClosingBlockedDuringRemoteRead(t *testing.T) { + dir := t.TempDir() + + createBlock(t, dir, genSeries(2, 1, 0, 10)) + db, err := Open(dir, nil, nil, nil, nil) + require.NoError(t, err) + // No error checking as manually closing the block is supposed to make this fail. + defer db.Close() + + readAPI := remote.NewReadHandler(nil, nil, db, func() config.Config { + return config.Config{} + }, + 0, 1, 0, + ) + + matcher, err := labels.NewMatcher(labels.MatchRegexp, "__name__", ".*") + require.NoError(t, err) + + query, err := remote.ToQuery(0, 10, []*labels.Matcher{matcher}, nil) + require.NoError(t, err) + + req := &prompb.ReadRequest{ + Queries: []*prompb.Query{query}, + AcceptedResponseTypes: []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS}, + } + data, err := proto.Marshal(req) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, "", bytes.NewBuffer(snappy.Encode(nil, data))) + require.NoError(t, err) + + blockedRecorder := &blockedResponseRecorder{ + r: httptest.NewRecorder(), + writeBlocked: make(chan struct{}), + writeStarted: make(chan struct{}), + } + + readDone := make(chan struct{}) + go func() { + readAPI.ServeHTTP(blockedRecorder, request) + require.Equal(t, http.StatusOK, blockedRecorder.r.Code) + close(readDone) + }() + + // Wait for the read API to start streaming data. + <-blockedRecorder.writeStarted + + // Try to close the queried block. + blockClosed := make(chan struct{}) + go func() { + for _, block := range db.Blocks() { + block.Close() + } + close(blockClosed) + }() + + // Closing the queried block should block. + // Wait a little bit to make sure of that. + select { + case <-time.After(100 * time.Millisecond): + case <-readDone: + require.Fail(t, "read API should still be streaming data.") + case <-blockClosed: + require.Fail(t, "Block shouldn't get closed while being queried.") + } + + // Resume the read API data streaming. + close(blockedRecorder.writeBlocked) + <-readDone + + // The block should be no longer needed and closing it should end. + select { + case <-time.After(10 * time.Millisecond): + require.Fail(t, "Closing the block timed out.") + case <-blockClosed: + } +} diff --git a/tsdb/docs/format/chunks.md b/tsdb/docs/format/chunks.md index 8318e0a540..7eb0820e44 100644 --- a/tsdb/docs/format/chunks.md +++ b/tsdb/docs/format/chunks.md @@ -29,14 +29,15 @@ in-file offset (lower 4 bytes) and segment sequence number (upper 4 bytes). # Chunk ``` -┌───────────────┬───────────────────┬──────────────┬────────────────┐ -│ len │ encoding <1 byte> │ data │ CRC32 <4 byte> │ -└───────────────┴───────────────────┴──────────────┴────────────────┘ +┌───────────────┬───────────────────┬─────────────┬────────────────┐ +│ len │ encoding <1 byte> │ data │ CRC32 <4 byte> │ +└───────────────┴───────────────────┴─────────────┴────────────────┘ ``` Notes: * `` has 1 to 10 bytes. -* `encoding`: Currently either `XOR` or `histogram`. +* `encoding`: Currently either `XOR`, `histogram`, or `floathistogram`, see + [code for numerical values](https://github.com/prometheus/prometheus/blob/02d0de9987ad99dee5de21853715954fadb3239f/tsdb/chunkenc/chunk.go#L28-L47). * `data`: See below for each encoding. ## XOR chunk data @@ -67,9 +68,9 @@ Notes: ## Histogram chunk data ``` -┌──────────────────────┬──────────────────────────┬───────────────────────────────┬─────────────────────┬──────────────────┬──────────────────┬────────────────┬──────────────────┐ -│ num_samples │ histogram_flags <1 byte> │ zero_threshold <1 or 9 bytes> │ schema │ pos_spans │ neg_spans │ samples │ padding │ -└──────────────────────┴──────────────────────────┴───────────────────────────────┴─────────────────────┴──────────────────┴──────────────────┴────────────────┴──────────────────┘ +┌──────────────────────┬──────────────────────────┬───────────────────────────────┬─────────────────────┬──────────────────┬──────────────────┬──────────────────────┬────────────────┬──────────────────┐ +│ num_samples │ histogram_flags <1 byte> │ zero_threshold <1 or 9 bytes> │ schema │ pos_spans │ neg_spans │ custom_values │ samples │ padding │ +└──────────────────────┴──────────────────────────┴───────────────────────────────┴─────────────────────┴──────────────────┴──────────────────┴──────────────────────┴────────────────┴──────────────────┘ ``` ### Positive and negative spans data: @@ -80,6 +81,16 @@ Notes: └─────────────────────────┴────────────────────────┴───────────────────────┴────────────────────────┴───────────────────────┴─────┴────────────────────────┴───────────────────────┘ ``` +### Custom values data: + +The `custom_values` data is currently only used for schema -53 (custom bucket boundaries). For other schemas, it is empty (length of zero). + +``` +┌──────────────────────────┬──────────────────┬──────────────────┬─────┬──────────────────┐ +│ num_values │ value_0 │ value_1 │ ... │ value_n │ +└──────────────────────────┴─────────────────────────────────────┴─────┴──────────────────┘ +``` + ### Samples data: ``` @@ -92,7 +103,7 @@ Notes: ├──────────────────────────┤ │ ... │ ├──────────────────────────┤ -│ Sample_n │ +│ sample_n │ └──────────────────────────┘ ``` @@ -107,9 +118,9 @@ Notes: #### Sample 1 data: ``` -┌────────────────────────┬───────────────────────────┬────────────────────────────────┬──────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┐ -│ ts_delta │ count_delta │ zero_count_delta │ sum_xor │ pos_bucket_0_delta │ ... │ pos_bucket_n_delta │ neg_bucket_0_delta │ ... │ neg_bucket_n_delta │ -└────────────────────────┴───────────────────────────┴────────────────────────────────┴──────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┘ +┌───────────────────────┬──────────────────────────┬───────────────────────────────┬──────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┐ +│ ts_delta │ count_delta │ zero_count_delta │ sum_xor │ pos_bucket_0_delta │ ... │ pos_bucket_n_delta │ neg_bucket_0_delta │ ... │ neg_bucket_n_delta │ +└───────────────────────┴──────────────────────────┴───────────────────────────────┴──────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┘ ``` #### Sample 2 data and following: @@ -131,7 +142,9 @@ Notes: * If 0, it is a single zero byte. * If a power of two between 2^-243 and 2^10, it is a single byte between 1 and 254. * Otherwise, it is a byte with all bits set (255), followed by a float64, resulting in 9 bytes length. -* `schema` is a specific value defined by the exposition format. Currently valid values are -4 <= n <= 8. +* `schema` is a specific value defined by the exposition format. Currently + valid values are either -4 <= n <= 8 (standard exponential schemas) or -53 + (custom bucket boundaries). * `` is a variable bitwidth encoding for signed integers, optimized for “delta of deltas” of bucket deltas. It has between 1 bit and 9 bytes. See [code for details](https://github.com/prometheus/prometheus/blob/8c1507ebaa4ca552958ffb60c2d1b21afb7150e4/tsdb/chunkenc/varbit.go#L31-L60). * `` is a variable bitwidth encoding for unsigned integers with the same bit-bucketing as ``. @@ -142,3 +155,69 @@ Notes: * Note that buckets are inherently deltas between the current bucket and the previous bucket. Only `bucket_0` is an absolute count. * The chunk can have as few as one sample, i.e. sample 1 and following are optional. * Similarly, there could be down to zero spans and down to zero buckets. + +The `` encoding within the custom values data depends on the schema. +For schema -53 (custom bucket boundaries, currently the only use case for +custom values), the values to encode are bucket boundaries in the form of +floats. The encoding of a given float value _x_ works as follows: + +1. Create an intermediate value _y_ = _x_ * 1000. +2. If 0 ≤ _y_ ≤ 33554430 _and_ if the decimal value of _y_ is integer, store + _y_ + 1 as ``. +3. Otherwise, store a 0 bit, followed by the 64 bit of the original _x_ + encoded as plain ``. + +Note that values stored as per (2) will always start with a 1 bit, which allow +decoders to recognize this case in contrast to values stores as per (3), which +always start with a 0 bit. + +The rational behind this encoding is that most custom bucket boundaries are set +by humans as decimal numbers with not very many decimal places. In most cases, +the encoding will therefore result in a short varbit representation. The upper +bound of 33554430 is picked so that the varbit encoded value will take at most +4 bytes. + + +## Float histogram chunk data + +Float histograms have the same layout as histograms apart from the encoding of samples. + +### Samples data: + +``` +┌──────────────────────────┐ +│ sample_0 │ +├──────────────────────────┤ +│ sample_1 │ +├──────────────────────────┤ +│ sample_2 │ +├──────────────────────────┤ +│ ... │ +├──────────────────────────┤ +│ sample_n │ +└──────────────────────────┘ +``` + +#### Sample 0 data: + +``` +┌─────────────────┬─────────────────┬──────────────────────┬───────────────┬────────────────────────┬─────┬────────────────────────┬────────────────────────┬─────┬────────────────────────┐ +│ ts │ count │ zero_count │ sum │ pos_bucket_0 │ ... │ pos_bucket_n │ neg_bucket_0 │ ... │ neg_bucket_n │ +└─────────────────┴─────────────────┴──────────────────────┴───────────────┴────────────────────────┴─────┴────────────────────────┴────────────────────────┴─────┴────────────────────────┘ +``` + +#### Sample 1 data: + +``` +┌───────────────────────┬────────────────────────┬─────────────────────────────┬──────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┐ +│ ts_delta │ count_xor │ zero_count_xor │ sum_xor │ pos_bucket_0_xor │ ... │ pos_bucket_n_xor │ neg_bucket_0_xor │ ... │ neg_bucket_n_xor │ +└───────────────────────┴────────────────────────┴─────────────────────────────┴──────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┘ +``` + +#### Sample 2 data and following: + +``` +┌─────────────────────┬────────────────────────┬─────────────────────────────┬──────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┐ +│ ts_dod │ count_xor │ zero_count_xor │ sum_xor │ pos_bucket_0_xor │ ... │ pos_bucket_n_xor │ neg_bucket_0_xor │ ... │ neg_bucket_n_xor │ +└─────────────────────┴────────────────────────┴─────────────────────────────┴──────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┘ +``` diff --git a/tsdb/docs/format/head_chunks.md b/tsdb/docs/format/head_chunks.md index 5737f42058..7040dcf41a 100644 --- a/tsdb/docs/format/head_chunks.md +++ b/tsdb/docs/format/head_chunks.md @@ -37,3 +37,7 @@ is used while replaying the chunks. | series ref <8 byte> | mint <8 byte, uint64> | maxt <8 byte, uint64> | encoding <1 byte> | len | data │ CRC32 <4 byte> │ └─────────────────────┴───────────────────────┴───────────────────────┴───────────────────┴───────────────┴──────────────┴────────────────┘ ``` + +## OOO encoding + +Head chunks use the highest bit of the `encoding` field to indicate whether it is out-of-order (1) or not (0). This bit is not set for chunks in the on-disk blocks. diff --git a/tsdb/encoding/encoding.go b/tsdb/encoding/encoding.go index 88fdd30c85..cc7d0990f6 100644 --- a/tsdb/encoding/encoding.go +++ b/tsdb/encoding/encoding.go @@ -20,7 +20,6 @@ import ( "hash" "hash/crc32" "math" - "unsafe" "github.com/dennwc/varint" ) @@ -75,8 +74,7 @@ func (e *Encbuf) PutVarint64(x int64) { // PutUvarintStr writes a string to the buffer prefixed by its varint length (in bytes!). func (e *Encbuf) PutUvarintStr(s string) { - b := *(*[]byte)(unsafe.Pointer(&s)) - e.PutUvarint(len(b)) + e.PutUvarint(len(s)) e.PutString(s) } @@ -201,8 +199,9 @@ func (d *Decbuf) UvarintStr() string { return string(d.UvarintBytes()) } -// UvarintBytes returns invalid values if the byte slice goes away. -// Compared to UvarintStr, it avoid allocations. +// UvarintBytes returns a pointer to internal data; +// the return value becomes invalid if the byte slice goes away. +// Compared to UvarintStr, this avoids allocations. func (d *Decbuf) UvarintBytes() []byte { l := d.Uvarint64() if d.E != nil { diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 8f39377de0..31d461bed9 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -29,7 +29,7 @@ import ( ) const ( - // Indicates that there is no index entry for an exmplar. + // Indicates that there is no index entry for an exemplar. noExemplar = -1 // Estimated number of exemplars per series, for sizing the index. estimatedExemplarsPerSeries = 16 diff --git a/tsdb/head.go b/tsdb/head.go index 9f175b446d..324b0a6060 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "path/filepath" "runtime" @@ -25,10 +26,9 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "go.uber.org/atomic" "github.com/prometheus/prometheus/config" @@ -83,7 +83,7 @@ type Head struct { wal, wbl *wlog.WL exemplarMetrics *ExemplarMetrics exemplars ExemplarStorage - logger log.Logger + logger *slog.Logger appendPool zeropool.Pool[[]record.RefSample] exemplarsPool zeropool.Pool[[]exemplarWithSeriesRef] histogramsPool zeropool.Pool[[]record.RefHistogramSample] @@ -157,10 +157,6 @@ type HeadOptions struct { // OutOfOrderTimeWindow is > 0 EnableOOONativeHistograms atomic.Bool - // EnableCreatedTimestampZeroIngestion enables the ingestion of the created timestamp as a synthetic zero sample. - // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md - EnableCreatedTimestampZeroIngestion bool - ChunkRange int64 // ChunkDirRoot is the parent directory of the chunks directory. ChunkDirRoot string @@ -248,10 +244,10 @@ type SeriesLifecycleCallback interface { } // NewHead opens the head block in dir. -func NewHead(r prometheus.Registerer, l log.Logger, wal, wbl *wlog.WL, opts *HeadOptions, stats *HeadStats) (*Head, error) { +func NewHead(r prometheus.Registerer, l *slog.Logger, wal, wbl *wlog.WL, opts *HeadOptions, stats *HeadStats) (*Head, error) { var err error if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if opts.OutOfOrderTimeWindow.Load() < 0 { @@ -597,7 +593,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { }, func() float64 { val, err := h.chunkDiskMapper.Size() if err != nil { - level.Error(h.logger).Log("msg", "Failed to calculate size of \"chunks_head\" dir", + h.logger.Error("Failed to calculate size of \"chunks_head\" dir", "err", err.Error()) } return float64(val) @@ -660,7 +656,7 @@ func (h *Head) Init(minValidTime int64) error { } }() - level.Info(h.logger).Log("msg", "Replaying on-disk memory mappable chunks if any") + h.logger.Info("Replaying on-disk memory mappable chunks if any") start := time.Now() snapIdx, snapOffset := -1, 0 @@ -669,7 +665,7 @@ func (h *Head) Init(minValidTime int64) error { snapshotLoaded := false var chunkSnapshotLoadDuration time.Duration if h.opts.EnableMemorySnapshotOnShutdown { - level.Info(h.logger).Log("msg", "Chunk snapshot is enabled, replaying from the snapshot") + h.logger.Info("Chunk snapshot is enabled, replaying from the snapshot") // If there are any WAL files, there should be at least one WAL file with an index that is current or newer // than the snapshot index. If the WAL index is behind the snapshot index somehow, the snapshot is assumed // to be outdated. @@ -682,14 +678,14 @@ func (h *Head) Init(minValidTime int64) error { _, idx, _, err := LastChunkSnapshot(h.opts.ChunkDirRoot) if err != nil && !errors.Is(err, record.ErrNotFound) { - level.Error(h.logger).Log("msg", "Could not find last snapshot", "err", err) + h.logger.Error("Could not find last snapshot", "err", err) } if err == nil && endAt < idx { loadSnapshot = false - level.Warn(h.logger).Log("msg", "Last WAL file is behind snapshot, removing snapshots") + h.logger.Warn("Last WAL file is behind snapshot, removing snapshots") if err := DeleteChunkSnapshots(h.opts.ChunkDirRoot, math.MaxInt, math.MaxInt); err != nil { - level.Error(h.logger).Log("msg", "Error while deleting snapshot directories", "err", err) + h.logger.Error("Error while deleting snapshot directories", "err", err) } } } @@ -699,14 +695,14 @@ func (h *Head) Init(minValidTime int64) error { if err == nil { snapshotLoaded = true chunkSnapshotLoadDuration = time.Since(start) - level.Info(h.logger).Log("msg", "Chunk snapshot loading time", "duration", chunkSnapshotLoadDuration.String()) + h.logger.Info("Chunk snapshot loading time", "duration", chunkSnapshotLoadDuration.String()) } if err != nil { snapIdx, snapOffset = -1, 0 refSeries = make(map[chunks.HeadSeriesRef]*memSeries) h.metrics.snapshotReplayErrorTotal.Inc() - level.Error(h.logger).Log("msg", "Failed to load chunk snapshot", "err", err) + h.logger.Error("Failed to load chunk snapshot", "err", err) // We clear the partially loaded data to replay fresh from the WAL. if err := h.resetInMemoryState(); err != nil { return err @@ -730,7 +726,7 @@ func (h *Head) Init(minValidTime int64) error { mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.loadMmappedChunks(refSeries) if err != nil { // TODO(codesome): clear out all m-map chunks here for refSeries. - level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err) + h.logger.Error("Loading on-disk chunks failed", "err", err) var cerr *chunks.CorruptionErr if errors.As(err, &cerr) { h.metrics.mmapChunkCorruptionTotal.Inc() @@ -747,15 +743,15 @@ func (h *Head) Init(minValidTime int64) error { } } mmapChunkReplayDuration = time.Since(mmapChunkReplayStart) - level.Info(h.logger).Log("msg", "On-disk memory mappable chunks replay completed", "duration", mmapChunkReplayDuration.String()) + h.logger.Info("On-disk memory mappable chunks replay completed", "duration", mmapChunkReplayDuration.String()) } if h.wal == nil { - level.Info(h.logger).Log("msg", "WAL not found") + h.logger.Info("WAL not found") return nil } - level.Info(h.logger).Log("msg", "Replaying WAL, this may take a while") + h.logger.Info("Replaying WAL, this may take a while") checkpointReplayStart := time.Now() // Backfill the checkpoint first if it exists. @@ -781,7 +777,7 @@ func (h *Head) Init(minValidTime int64) error { } defer func() { if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wal segments reader", "err", err) + h.logger.Warn("Error while closing the wal segments reader", "err", err) } }() @@ -792,7 +788,7 @@ func (h *Head) Init(minValidTime int64) error { } h.updateWALReplayStatusRead(startFrom) startFrom++ - level.Info(h.logger).Log("msg", "WAL checkpoint loaded") + h.logger.Info("WAL checkpoint loaded") } checkpointReplayDuration := time.Since(checkpointReplayStart) @@ -822,12 +818,12 @@ func (h *Head) Init(minValidTime int64) error { } err = h.loadWAL(wlog.NewReader(sr), syms, multiRef, mmappedChunks, oooMmappedChunks) if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wal segments reader", "err", err) + h.logger.Warn("Error while closing the wal segments reader", "err", err) } if err != nil { return err } - level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", endAt) + h.logger.Info("WAL segment loaded", "segment", i, "maxSegment", endAt) h.updateWALReplayStatusRead(i) } walReplayDuration := time.Since(walReplayStart) @@ -850,12 +846,12 @@ func (h *Head) Init(minValidTime int64) error { sr := wlog.NewSegmentBufReader(s) err = h.loadWBL(wlog.NewReader(sr), syms, multiRef, lastMmapRef) if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wbl segments reader", "err", err) + h.logger.Warn("Error while closing the wbl segments reader", "err", err) } if err != nil { return &errLoadWbl{err} } - level.Info(h.logger).Log("msg", "WBL segment loaded", "segment", i, "maxSegment", endAt) + h.logger.Info("WBL segment loaded", "segment", i, "maxSegment", endAt) h.updateWALReplayStatusRead(i) } } @@ -864,8 +860,8 @@ func (h *Head) Init(minValidTime int64) error { totalReplayDuration := time.Since(start) h.metrics.dataTotalReplayDuration.Set(totalReplayDuration.Seconds()) - level.Info(h.logger).Log( - "msg", "WAL replay completed", + h.logger.Info( + "WAL replay completed", "checkpoint_replay_duration", checkpointReplayDuration.String(), "wal_replay_duration", walReplayDuration.String(), "wbl_replay_duration", wblReplayDuration.String(), @@ -975,28 +971,28 @@ func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries) // removeCorruptedMmappedChunks attempts to delete the corrupted mmapped chunks and if it fails, it clears all the previously // loaded mmapped chunks. func (h *Head) removeCorruptedMmappedChunks(err error) (map[chunks.HeadSeriesRef][]*mmappedChunk, map[chunks.HeadSeriesRef][]*mmappedChunk, chunks.ChunkDiskMapperRef, error) { - level.Info(h.logger).Log("msg", "Deleting mmapped chunk files") + h.logger.Info("Deleting mmapped chunk files") // We never want to preserve the in-memory series from snapshots if we are repairing m-map chunks. if err := h.resetInMemoryState(); err != nil { return map[chunks.HeadSeriesRef][]*mmappedChunk{}, map[chunks.HeadSeriesRef][]*mmappedChunk{}, 0, err } - level.Info(h.logger).Log("msg", "Deleting mmapped chunk files") + h.logger.Info("Deleting mmapped chunk files") if err := h.chunkDiskMapper.DeleteCorrupted(err); err != nil { - level.Info(h.logger).Log("msg", "Deletion of corrupted mmap chunk files failed, discarding chunk files completely", "err", err) + h.logger.Info("Deletion of corrupted mmap chunk files failed, discarding chunk files completely", "err", err) if err := h.chunkDiskMapper.Truncate(math.MaxUint32); err != nil { - level.Error(h.logger).Log("msg", "Deletion of all mmap chunk files failed", "err", err) + h.logger.Error("Deletion of all mmap chunk files failed", "err", err) } return map[chunks.HeadSeriesRef][]*mmappedChunk{}, map[chunks.HeadSeriesRef][]*mmappedChunk{}, 0, nil } - level.Info(h.logger).Log("msg", "Deletion of mmap chunk files successful, reattempting m-mapping the on-disk chunks") + h.logger.Info("Deletion of mmap chunk files successful, reattempting m-mapping the on-disk chunks") mmappedChunks, oooMmappedChunks, lastRef, err := h.loadMmappedChunks(make(map[chunks.HeadSeriesRef]*memSeries)) if err != nil { - level.Error(h.logger).Log("msg", "Loading on-disk chunks failed, discarding chunk files completely", "err", err) + h.logger.Error("Loading on-disk chunks failed, discarding chunk files completely", "err", err) if err := h.chunkDiskMapper.Truncate(math.MaxUint32); err != nil { - level.Error(h.logger).Log("msg", "Deletion of all mmap chunk files failed after failed loading", "err", err) + h.logger.Error("Deletion of all mmap chunk files failed after failed loading", "err", err) } mmappedChunks = map[chunks.HeadSeriesRef][]*mmappedChunk{} } @@ -1031,7 +1027,7 @@ func (h *Head) ApplyConfig(cfg *config.Config, wbl *wlog.WL) { } migrated := h.exemplars.(*CircularExemplarStorage).Resize(newSize) - level.Info(h.logger).Log("msg", "Exemplar storage resized", "from", prevSize, "to", newSize, "migrated", migrated) + h.logger.Info("Exemplar storage resized", "from", prevSize, "to", newSize, "migrated", migrated) } // SetOutOfOrderTimeWindow updates the out of order related parameters. @@ -1342,7 +1338,7 @@ func (h *Head) truncateWAL(mint int64) error { // If truncating fails, we'll just try again at the next checkpoint. // Leftover segments will just be ignored in the future if there's a checkpoint // that supersedes them. - level.Error(h.logger).Log("msg", "truncating segments failed", "err", err) + h.logger.Error("truncating segments failed", "err", err) } // The checkpoint is written and segments before it is truncated, so we no @@ -1360,12 +1356,12 @@ func (h *Head) truncateWAL(mint int64) error { // Leftover old checkpoints do not cause problems down the line beyond // occupying disk space. // They will just be ignored since a higher checkpoint exists. - level.Error(h.logger).Log("msg", "delete old checkpoints", "err", err) + h.logger.Error("delete old checkpoints", "err", err) h.metrics.checkpointDeleteFail.Inc() } h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) - level.Info(h.logger).Log("msg", "WAL checkpoint complete", + h.logger.Info("WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) return nil @@ -1403,7 +1399,7 @@ func (h *Head) truncateSeriesAndChunkDiskMapper(caller string) error { start := time.Now() headMaxt := h.MaxTime() actualMint, minOOOTime, minMmapFile := h.gc() - level.Info(h.logger).Log("msg", "Head GC completed", "caller", caller, "duration", time.Since(start)) + h.logger.Info("Head GC completed", "caller", caller, "duration", time.Since(start)) h.metrics.gcDuration.Observe(time.Since(start).Seconds()) if actualMint > h.minTime.Load() { @@ -1555,7 +1551,7 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match series := h.series.getByID(chunks.HeadSeriesRef(p.At())) if series == nil { - level.Debug(h.logger).Log("msg", "Series not found in Head.Delete") + h.logger.Debug("Series not found in Head.Delete") continue } @@ -2156,7 +2152,7 @@ type memSeries struct { // before compaction: mmappedChunks=[p5,p6,p7,p8,p9] firstChunkID=5 // after compaction: mmappedChunks=[p7,p8,p9] firstChunkID=7 // - // pN is the pointer to the mmappedChunk referered to by HeadChunkID=N + // pN is the pointer to the mmappedChunk referred to by HeadChunkID=N mmappedChunks []*mmappedChunk // Most recent chunks in memory that are still being built or waiting to be mmapped. // This is a linked list, headChunks points to the most recent chunk, headChunks.next points diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 6a308abb14..b545e97ae3 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -17,11 +17,9 @@ import ( "context" "errors" "fmt" + "log/slog" "math" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -42,6 +40,12 @@ type initAppender struct { var _ storage.GetRef = &initAppender{} +func (a *initAppender) SetOptions(opts *storage.AppendOptions) { + if a.app != nil { + a.app.SetOptions(opts) + } +} + func (a *initAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if a.app != nil { return a.app.Append(ref, lset, t, v) @@ -79,6 +83,16 @@ func (a *initAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t return a.app.AppendHistogram(ref, l, t, h, fh) } +func (a *initAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if a.app != nil { + return a.app.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) + } + a.head.initTime(t) + a.app = a.head.appender() + + return a.app.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) +} + func (a *initAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { if a.app != nil { return a.app.UpdateMetadata(ref, l, m) @@ -318,6 +332,11 @@ type headAppender struct { appendID, cleanupAppendIDsBelow uint64 closed bool + hints *storage.AppendOptions +} + +func (a *headAppender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts } func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { @@ -351,13 +370,18 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 } s.Lock() + + defer s.Unlock() // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise // to skip that sample from the WAL and write only in the WBL. - _, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) + isOOO, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) if err == nil { + if isOOO && a.hints != nil && a.hints.DiscardOutOfOrder { + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + return 0, storage.ErrOutOfOrderSample + } s.pendingCommit = true } - s.Unlock() if delta > 0 { a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) } @@ -392,7 +416,7 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 // storage.CreatedTimestampAppender.AppendCTZeroSample for further documentation. func (a *headAppender) AppendCTZeroSample(ref storage.SeriesRef, lset labels.Labels, t, ct int64) (storage.SeriesRef, error) { if ct >= t { - return 0, fmt.Errorf("CT is newer or the same as sample's timestamp, ignoring") + return 0, storage.ErrCTNewerThanSample } s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) @@ -450,9 +474,10 @@ func (a *headAppender) getOrCreate(lset labels.Labels) (s *memSeries, created bo return s, created, nil } -// appendable checks whether the given sample is valid for appending to the series. (if we return false and no error) -// The sample belongs to the out of order chunk if we return true and no error. -// An error signifies the sample cannot be handled. +// appendable checks whether the given sample is valid for appending to the series. +// If the sample is valid and in-order, it returns false with no error. +// If the sample belongs to the out-of-order chunk, it returns true with no error. +// If the sample cannot be handled, it returns an error. func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTimeWindow int64) (isOOO bool, oooDelta int64, err error) { // Check if we can append in the in-order chunk. if t >= minValidTime { @@ -743,6 +768,102 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels return storage.SeriesRef(s.ref), nil } +func (a *headAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, lset labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if !a.head.opts.EnableNativeHistograms.Load() { + return 0, storage.ErrNativeHistogramsDisabled + } + + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + var created bool + s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) + if s == nil { + var err error + s, created, err = a.getOrCreate(lset) + if err != nil { + return 0, err + } + } + + switch { + case h != nil: + zeroHistogram := &histogram.Histogram{} + s.Lock() + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastHistogramValue = zeroHistogram + } + + // Although we call `appendableHistogram` with oooHistogramsEnabled=true, for CTZeroSamples OOO is not allowed. + // We set it to true to make this implementation as close as possible to the float implementation. + isOOO, _, err := s.appendableHistogram(ct, zeroHistogram, a.headMaxt, a.minValidTime, a.oooTimeWindow, true) + if err != nil { + s.Unlock() + if errors.Is(err, storage.ErrOutOfOrderSample) { + return 0, storage.ErrOutOfOrderCT + } + } + // OOO is not allowed because after the first scrape, CT will be the same for most (if not all) future samples. + // This is to prevent the injected zero from being marked as OOO forever. + if isOOO { + s.Unlock() + return 0, storage.ErrOutOfOrderCT + } + s.pendingCommit = true + s.Unlock() + a.histograms = append(a.histograms, record.RefHistogramSample{ + Ref: s.ref, + T: ct, + H: zeroHistogram, + }) + a.histogramSeries = append(a.histogramSeries, s) + case fh != nil: + zeroFloatHistogram := &histogram.FloatHistogram{} + s.Lock() + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastFloatHistogramValue = zeroFloatHistogram + } + + // Although we call `appendableFloatHistogram` with oooHistogramsEnabled=true, for CTZeroSamples OOO is not allowed. + // We set it to true to make this implementation as close as possible to the float implementation. + isOOO, _, err := s.appendableFloatHistogram(ct, zeroFloatHistogram, a.headMaxt, a.minValidTime, a.oooTimeWindow, true) // OOO is not allowed for CTZeroSamples. + if err != nil { + s.Unlock() + if errors.Is(err, storage.ErrOutOfOrderSample) { + return 0, storage.ErrOutOfOrderCT + } + } + // OOO is not allowed because after the first scrape, CT will be the same for most (if not all) future samples. + // This is to prevent the injected zero from being marked as OOO forever. + if isOOO { + s.Unlock() + return 0, storage.ErrOutOfOrderCT + } + s.pendingCommit = true + s.Unlock() + a.floatHistograms = append(a.floatHistograms, record.RefFloatHistogramSample{ + Ref: s.ref, + T: ct, + FH: zeroFloatHistogram, + }) + a.floatHistogramSeries = append(a.floatHistogramSeries, s) + } + + if ct > a.maxt { + a.maxt = ct + } + return storage.SeriesRef(s.ref), nil +} + // UpdateMetadata for headAppender assumes the series ref already exists, and so it doesn't // use getOrCreate or make any of the lset sanity checks that Append does. func (a *headAppender) UpdateMetadata(ref storage.SeriesRef, lset labels.Labels, meta metadata.Metadata) (storage.SeriesRef, error) { @@ -863,23 +984,38 @@ func exemplarsForEncoding(es []exemplarWithSeriesRef) []record.RefExemplar { return ret } -// Commit writes to the WAL and adds the data to the Head. -// TODO(codesome): Refactor this method to reduce indentation and make it more readable. -func (a *headAppender) Commit() (err error) { - if a.closed { - return ErrAppenderClosed - } - defer func() { a.closed = true }() - - if err := a.log(); err != nil { - _ = a.Rollback() // Most likely the same error will happen again. - return fmt.Errorf("write to WAL: %w", err) - } - - if a.head.writeNotified != nil { - a.head.writeNotified.Notify() - } +type appenderCommitContext struct { + floatsAppended int + histogramsAppended int + // Number of samples out of order but accepted: with ooo enabled and within time window. + oooFloatsAccepted int + oooHistogramAccepted int + // Number of samples rejected due to: out of order but OOO support disabled. + floatOOORejected int + histoOOORejected int + // Number of samples rejected due to: out of order but too old (OOO support enabled, but outside time window). + floatTooOldRejected int + histoTooOldRejected int + // Number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled). + floatOOBRejected int + histoOOBRejected int + inOrderMint int64 + inOrderMaxt int64 + oooMinT int64 + oooMaxT int64 + wblSamples []record.RefSample + wblHistograms []record.RefHistogramSample + wblFloatHistograms []record.RefFloatHistogramSample + oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef + oooMmapMarkersCount int + oooRecords [][]byte + oooCapMax int64 + appendChunkOpts chunkOpts + enc record.Encoder +} +// commitExemplars adds all exemplars from headAppender to the head's exemplar storage. +func (a *headAppender) commitExemplars() { // No errors logging to WAL, so pass the exemplars along to the in memory storage. for _, e := range a.exemplars { s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref)) @@ -894,128 +1030,117 @@ func (a *headAppender) Commit() (err error) { if errors.Is(err, storage.ErrOutOfOrderExemplar) { continue } - level.Debug(a.head.logger).Log("msg", "Unknown error while adding exemplar", "err", err) + a.head.logger.Debug("Unknown error while adding exemplar", "err", err) } } +} - defer a.head.metrics.activeAppenders.Dec() - defer a.head.putAppendBuffer(a.samples) - defer a.head.putSeriesBuffer(a.sampleSeries) - defer a.head.putExemplarBuffer(a.exemplars) - defer a.head.putHistogramBuffer(a.histograms) - defer a.head.putFloatHistogramBuffer(a.floatHistograms) - defer a.head.putMetadataBuffer(a.metadata) - defer a.head.iso.closeAppend(a.appendID) +func (acc *appenderCommitContext) collectOOORecords(a *headAppender) { + if a.head.wbl == nil { + // WBL is not enabled. So no need to collect. + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil + acc.oooMmapMarkersCount = 0 + return + } - var ( - floatsAppended = len(a.samples) - histogramsAppended = len(a.histograms) + len(a.floatHistograms) - // number of samples out of order but accepted: with ooo enabled and within time window - oooFloatsAccepted int - oooHistogramAccepted int - // number of samples rejected due to: out of order but OOO support disabled. - floatOOORejected int - histoOOORejected int - // number of samples rejected due to: that are out of order but too old (OOO support enabled, but outside time window) - floatTooOldRejected int - histoTooOldRejected int - // number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled) - floatOOBRejected int - histoOOBRejected int - inOrderMint int64 = math.MaxInt64 - inOrderMaxt int64 = math.MinInt64 - oooMinT int64 = math.MaxInt64 - oooMaxT int64 = math.MinInt64 - wblSamples []record.RefSample - wblHistograms []record.RefHistogramSample - wblFloatHistograms []record.RefFloatHistogramSample - oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef - oooMmapMarkersCount int - oooRecords [][]byte - oooCapMax = a.head.opts.OutOfOrderCapMax.Load() - series *memSeries - appendChunkOpts = chunkOpts{ - chunkDiskMapper: a.head.chunkDiskMapper, - chunkRange: a.head.chunkRange.Load(), - samplesPerChunk: a.head.opts.SamplesPerChunk, - } - enc record.Encoder - ) - defer func() { - for i := range oooRecords { - a.head.putBytesBuffer(oooRecords[i][:0]) - } - }() - collectOOORecords := func() { - if a.head.wbl == nil { - // WBL is not enabled. So no need to collect. - wblSamples = nil - wblHistograms = nil - wblFloatHistograms = nil - oooMmapMarkers = nil - oooMmapMarkersCount = 0 - return - } - // The m-map happens before adding a new sample. So we collect - // the m-map markers first, and then samples. - // WBL Graphically: - // WBL Before this Commit(): [old samples before this commit for chunk 1] - // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] - if oooMmapMarkers != nil { - markers := make([]record.RefMmapMarker, 0, oooMmapMarkersCount) - for ref, mmapRefs := range oooMmapMarkers { - for _, mmapRef := range mmapRefs { - markers = append(markers, record.RefMmapMarker{ - Ref: ref, - MmapRef: mmapRef, - }) - } + // The m-map happens before adding a new sample. So we collect + // the m-map markers first, and then samples. + // WBL Graphically: + // WBL Before this Commit(): [old samples before this commit for chunk 1] + // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] + if acc.oooMmapMarkers != nil { + markers := make([]record.RefMmapMarker, 0, acc.oooMmapMarkersCount) + for ref, mmapRefs := range acc.oooMmapMarkers { + for _, mmapRef := range mmapRefs { + markers = append(markers, record.RefMmapMarker{ + Ref: ref, + MmapRef: mmapRef, + }) } - r := enc.MmapMarkers(markers, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) } + r := acc.enc.MmapMarkers(markers, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } - if len(wblSamples) > 0 { - r := enc.Samples(wblSamples, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblHistograms) > 0 { - r := enc.HistogramSamples(wblHistograms, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblFloatHistograms) > 0 { - r := enc.FloatHistogramSamples(wblFloatHistograms, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } + if len(acc.wblSamples) > 0 { + r := acc.enc.Samples(acc.wblSamples, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblHistograms) > 0 { + r := acc.enc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblFloatHistograms) > 0 { + r := acc.enc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil +} - wblSamples = nil - wblHistograms = nil - wblFloatHistograms = nil - oooMmapMarkers = nil +// handleAppendableError processes errors encountered during sample appending and updates +// the provided counters accordingly. +// +// Parameters: +// - err: The error encountered during appending. +// - appended: Pointer to the counter tracking the number of successfully appended samples. +// - oooRejected: Pointer to the counter tracking the number of out-of-order samples rejected. +// - oobRejected: Pointer to the counter tracking the number of out-of-bounds samples rejected. +// - tooOldRejected: Pointer to the counter tracking the number of too-old samples rejected. +func handleAppendableError(err error, appended, oooRejected, oobRejected, tooOldRejected *int) { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + *appended-- + *oooRejected++ + case errors.Is(err, storage.ErrOutOfBounds): + *appended-- + *oobRejected++ + case errors.Is(err, storage.ErrTooOldSample): + *appended-- + *tooOldRejected++ + default: + *appended-- } +} + +// commitSamples processes and commits the samples in the headAppender to the series. +// It handles both in-order and out-of-order samples, updating the appenderCommitContext +// with the results of the append operations. +// +// The function iterates over the samples in the headAppender and attempts to append each sample +// to its corresponding series. It handles various error cases such as out-of-order samples, +// out-of-bounds samples, and too-old samples, updating the appenderCommitContext accordingly. +// +// For out-of-order samples, it checks if the sample can be inserted into the series and updates +// the out-of-order mmap markers if necessary. It also updates the write-ahead log (WBL) samples +// and the minimum and maximum timestamps for out-of-order samples. +// +// For in-order samples, it attempts to append the sample to the series and updates the minimum +// and maximum timestamps for in-order samples. +// +// The function also increments the chunk metrics if a new chunk is created and performs cleanup +// operations on the series after appending the samples. +// +// There are also specific functions to commit histograms and float histograms. +func (a *headAppender) commitSamples(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + for i, s := range a.samples { series = a.sampleSeries[i] series.Lock() oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - floatsAppended-- - floatOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - floatsAppended-- - floatOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - floatsAppended-- - floatTooOldRejected++ - default: - floatsAppended-- + if err != nil { + handleAppendableError(err, &acc.floatsAppended, &acc.floatOOORejected, &acc.floatOOBRejected, &acc.floatTooOldRejected) } - var ok, chunkCreated bool - switch { case err != nil: // Do nothing here. @@ -1023,9 +1148,9 @@ func (a *headAppender) Commit() (err error) { // Sample is OOO and OOO handling is enabled // and the delta is within the OOO tolerance. var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger) + ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) if chunkCreated { - r, ok := oooMmapMarkers[series.ref] + r, ok := acc.oooMmapMarkers[series.ref] if !ok || r != nil { // !ok means there are no markers collected for these samples yet. So we first flush the samples // before setting this m-map marker. @@ -1033,49 +1158,49 @@ func (a *headAppender) Commit() (err error) { // r != nil means we have already m-mapped a chunk for this series in the same Commit(). // Hence, before we m-map again, we should add the samples and m-map markers // seen till now to the WBL records. - collectOOORecords() + acc.collectOOORecords(a) } - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) } if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) } else { // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ } } if ok { - wblSamples = append(wblSamples, s) - if s.T < oooMinT { - oooMinT = s.T + acc.wblSamples = append(acc.wblSamples, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T } - if s.T > oooMaxT { - oooMaxT = s.T + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T } - oooFloatsAccepted++ + acc.oooFloatsAccepted++ } else { // Sample is an exact duplicate of the last sample. // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, // not with samples in already flushed OOO chunks. // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - floatsAppended-- + acc.floatsAppended-- } default: - ok, chunkCreated = series.append(s.T, s.V, a.appendID, appendChunkOpts) + ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts) if ok { - if s.T < inOrderMint { - inOrderMint = s.T + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T } - if s.T > inOrderMaxt { - inOrderMaxt = s.T + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T } } else { // The sample is an exact duplicate, and should be silently dropped. - floatsAppended-- + acc.floatsAppended-- } } @@ -1088,30 +1213,22 @@ func (a *headAppender) Commit() (err error) { series.pendingCommit = false series.Unlock() } +} + +// For details on the commitHistograms function, see the commitSamples docs. +func (a *headAppender) commitHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries for i, s := range a.histograms { series = a.histogramSeries[i] series.Lock() oooSample, _, err := series.appendableHistogram(s.T, s.H, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - histogramsAppended-- - histoOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - histogramsAppended-- - histoOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - histogramsAppended-- - histoTooOldRejected++ - default: - histogramsAppended-- + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) } - var ok, chunkCreated bool - switch { case err != nil: // Do nothing here. @@ -1119,9 +1236,9 @@ func (a *headAppender) Commit() (err error) { // Sample is OOO and OOO handling is enabled // and the delta is within the OOO tolerance. var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger) + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) if chunkCreated { - r, ok := oooMmapMarkers[series.ref] + r, ok := acc.oooMmapMarkers[series.ref] if !ok || r != nil { // !ok means there are no markers collected for these samples yet. So we first flush the samples // before setting this m-map marker. @@ -1129,49 +1246,49 @@ func (a *headAppender) Commit() (err error) { // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). // Hence, before we m-map again, we should add the samples and m-map markers // seen till now to the WBL records. - collectOOORecords() + acc.collectOOORecords(a) } - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) } if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) } else { // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ } } if ok { - wblHistograms = append(wblHistograms, s) - if s.T < oooMinT { - oooMinT = s.T + acc.wblHistograms = append(acc.wblHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T } - if s.T > oooMaxT { - oooMaxT = s.T + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T } - oooHistogramAccepted++ + acc.oooHistogramAccepted++ } else { // Sample is an exact duplicate of the last sample. // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, // not with samples in already flushed OOO chunks. // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - histogramsAppended-- + acc.histogramsAppended-- } default: - ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, appendChunkOpts) + ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, acc.appendChunkOpts) if ok { - if s.T < inOrderMint { - inOrderMint = s.T + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T } - if s.T > inOrderMaxt { - inOrderMaxt = s.T + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T } } else { - histogramsAppended-- - histoOOORejected++ + acc.histogramsAppended-- + acc.histoOOORejected++ } } @@ -1184,30 +1301,22 @@ func (a *headAppender) Commit() (err error) { series.pendingCommit = false series.Unlock() } +} + +// For details on the commitFloatHistograms function, see the commitSamples docs. +func (a *headAppender) commitFloatHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries for i, s := range a.floatHistograms { series = a.floatHistogramSeries[i] series.Lock() oooSample, _, err := series.appendableFloatHistogram(s.T, s.FH, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - histogramsAppended-- - histoOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - histogramsAppended-- - histoOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - histogramsAppended-- - histoTooOldRejected++ - default: - histogramsAppended-- + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) } - var ok, chunkCreated bool - switch { case err != nil: // Do nothing here. @@ -1215,9 +1324,9 @@ func (a *headAppender) Commit() (err error) { // Sample is OOO and OOO handling is enabled // and the delta is within the OOO tolerance. var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, oooCapMax, a.head.logger) + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) if chunkCreated { - r, ok := oooMmapMarkers[series.ref] + r, ok := acc.oooMmapMarkers[series.ref] if !ok || r != nil { // !ok means there are no markers collected for these samples yet. So we first flush the samples // before setting this m-map marker. @@ -1225,49 +1334,49 @@ func (a *headAppender) Commit() (err error) { // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). // Hence, before we m-map again, we should add the samples and m-map markers // seen till now to the WBL records. - collectOOORecords() + acc.collectOOORecords(a) } - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) } if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) } else { // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ } } if ok { - wblFloatHistograms = append(wblFloatHistograms, s) - if s.T < oooMinT { - oooMinT = s.T + acc.wblFloatHistograms = append(acc.wblFloatHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T } - if s.T > oooMaxT { - oooMaxT = s.T + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T } - oooHistogramAccepted++ + acc.oooHistogramAccepted++ } else { // Sample is an exact duplicate of the last sample. // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, // not with samples in already flushed OOO chunks. // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - histogramsAppended-- + acc.histogramsAppended-- } default: - ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, appendChunkOpts) + ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, acc.appendChunkOpts) if ok { - if s.T < inOrderMint { - inOrderMint = s.T + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T } - if s.T > inOrderMaxt { - inOrderMaxt = s.T + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T } } else { - histogramsAppended-- - histoOOORejected++ + acc.histogramsAppended-- + acc.histoOOORejected++ } } @@ -1280,40 +1389,102 @@ func (a *headAppender) Commit() (err error) { series.pendingCommit = false series.Unlock() } +} +// commitMetadata commits the metadata for each series in the headAppender. +// It iterates over the metadata slice and updates the corresponding series +// with the new metadata information. The series is locked during the update +// to ensure thread safety. +func (a *headAppender) commitMetadata() { + var series *memSeries for i, m := range a.metadata { series = a.metadataSeries[i] series.Lock() series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} series.Unlock() } +} - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOORejected)) - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histoOOORejected)) - a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOBRejected)) - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatTooOldRejected)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatsAppended)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histogramsAppended)) - a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(oooFloatsAccepted)) - a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(oooHistogramAccepted)) - a.head.updateMinMaxTime(inOrderMint, inOrderMaxt) - a.head.updateMinOOOMaxOOOTime(oooMinT, oooMaxT) +// Commit writes to the WAL and adds the data to the Head. +// TODO(codesome): Refactor this method to reduce indentation and make it more readable. +func (a *headAppender) Commit() (err error) { + if a.closed { + return ErrAppenderClosed + } + defer func() { a.closed = true }() + + if err := a.log(); err != nil { + _ = a.Rollback() // Most likely the same error will happen again. + return fmt.Errorf("write to WAL: %w", err) + } + + if a.head.writeNotified != nil { + a.head.writeNotified.Notify() + } + + a.commitExemplars() + + defer a.head.metrics.activeAppenders.Dec() + defer a.head.putAppendBuffer(a.samples) + defer a.head.putSeriesBuffer(a.sampleSeries) + defer a.head.putExemplarBuffer(a.exemplars) + defer a.head.putHistogramBuffer(a.histograms) + defer a.head.putFloatHistogramBuffer(a.floatHistograms) + defer a.head.putMetadataBuffer(a.metadata) + defer a.head.iso.closeAppend(a.appendID) + + acc := &appenderCommitContext{ + floatsAppended: len(a.samples), + histogramsAppended: len(a.histograms) + len(a.floatHistograms), + inOrderMint: math.MaxInt64, + inOrderMaxt: math.MinInt64, + oooMinT: math.MaxInt64, + oooMaxT: math.MinInt64, + oooCapMax: a.head.opts.OutOfOrderCapMax.Load(), + appendChunkOpts: chunkOpts{ + chunkDiskMapper: a.head.chunkDiskMapper, + chunkRange: a.head.chunkRange.Load(), + samplesPerChunk: a.head.opts.SamplesPerChunk, + }, + } + + defer func() { + for i := range acc.oooRecords { + a.head.putBytesBuffer(acc.oooRecords[i][:0]) + } + }() - collectOOORecords() + a.commitSamples(acc) + a.commitHistograms(acc) + a.commitFloatHistograms(acc) + a.commitMetadata() + + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOORejected)) + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected)) + a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected)) + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatsAppended)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histogramsAppended)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.oooFloatsAccepted)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.oooHistogramAccepted)) + a.head.updateMinMaxTime(acc.inOrderMint, acc.inOrderMaxt) + a.head.updateMinOOOMaxOOOTime(acc.oooMinT, acc.oooMaxT) + + acc.collectOOORecords(a) if a.head.wbl != nil { - if err := a.head.wbl.Log(oooRecords...); err != nil { + if err := a.head.wbl.Log(acc.oooRecords...); err != nil { // TODO(codesome): Currently WBL logging of ooo samples is best effort here since we cannot try logging // until we have found what samples become OOO. We can try having a metric for this failure. // Returning the error here is not correct because we have already put the samples into the memory, // hence the append/insert was a success. - level.Error(a.head.logger).Log("msg", "Failed to log out of order samples into the WAL", "err", err) + a.head.logger.Error("Failed to log out of order samples into the WAL", "err", err) } } return nil } // insert is like append, except it inserts. Used for OOO samples. -func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger log.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) { +func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger *slog.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) { if s.ooo == nil { s.ooo = &memSeriesOOOFields{} } @@ -1702,7 +1873,7 @@ func (s *memSeries) cutNewHeadChunk(mint int64, e chunkenc.Encoding, chunkRange // cutNewOOOHeadChunk cuts a new OOO chunk and m-maps the old chunk. // The caller must ensure that s is locked and s.ooo is not nil. -func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger log.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) { +func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) { ref := s.mmapCurrentOOOHeadChunk(chunkDiskMapper, logger) s.ooo.oooHeadChunk = &oooHeadChunk{ @@ -1715,7 +1886,7 @@ func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.Chunk } // s must be locked when calling. -func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger log.Logger) []chunks.ChunkDiskMapperRef { +func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) []chunks.ChunkDiskMapperRef { if s.ooo == nil || s.ooo.oooHeadChunk == nil { // OOO is not enabled or there is no head chunk, so nothing to m-map here. return nil @@ -1728,7 +1899,7 @@ func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMap chunkRefs := make([]chunks.ChunkDiskMapperRef, 0, len(chks)) for _, memchunk := range chks { if len(s.ooo.oooMmappedChunks) >= (oooChunkIDMask - 1) { - level.Error(logger).Log("msg", "Too many OOO chunks, dropping data", "series", s.lset.String()) + logger.Error("Too many OOO chunks, dropping data", "series", s.lset.String()) break } chunkRef := chunkDiskMapper.WriteChunk(s.ref, memchunk.minTime, memchunk.maxTime, memchunk.chunk, true, handleChunkWriteError) diff --git a/tsdb/head_bench_test.go b/tsdb/head_bench_test.go index a037948100..dc682602b1 100644 --- a/tsdb/head_bench_test.go +++ b/tsdb/head_bench_test.go @@ -14,15 +14,22 @@ package tsdb import ( + "context" "errors" + "fmt" + "math/rand" "strconv" "testing" "github.com/stretchr/testify/require" "go.uber.org/atomic" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/wlog" ) func BenchmarkHeadStripeSeriesCreate(b *testing.B) { @@ -79,6 +86,86 @@ func BenchmarkHeadStripeSeriesCreate_PreCreationFailure(b *testing.B) { } } +func BenchmarkHead_WalCommit(b *testing.B) { + seriesCounts := []int{100, 1000, 10000} + series := genSeries(10000, 10, 0, 0) // Only using the generated labels. + + appendSamples := func(b *testing.B, app storage.Appender, seriesCount int, ts int64) { + var err error + for i, s := range series[:seriesCount] { + var ref storage.SeriesRef + // if i is even, append a sample, else append a histogram. + if i%2 == 0 { + ref, err = app.Append(ref, s.Labels(), ts, float64(ts)) + } else { + h := &histogram.Histogram{ + Count: 7 + uint64(ts*5), + ZeroCount: 2 + uint64(ts), + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: 1, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{ts + 1, 1, -1, 0}, + } + ref, err = app.AppendHistogram(ref, s.Labels(), ts, h, nil) + } + require.NoError(b, err) + + _, err = app.AppendExemplar(ref, s.Labels(), exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), + Value: rand.Float64(), + Ts: ts, + }) + require.NoError(b, err) + } + } + + for _, seriesCount := range seriesCounts { + b.Run(fmt.Sprintf("%d series", seriesCount), func(b *testing.B) { + for _, commits := range []int64{1, 2} { // To test commits that create new series and when the series already exists. + b.Run(fmt.Sprintf("%d commits", commits), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + b.StopTimer() + h, w := newTestHead(b, 10000, wlog.CompressionNone, false) + b.Cleanup(func() { + if h != nil { + h.Close() + } + if w != nil { + w.Close() + } + }) + app := h.Appender(context.Background()) + + appendSamples(b, app, seriesCount, 0) + + b.StartTimer() + require.NoError(b, app.Commit()) + if commits == 2 { + b.StopTimer() + app = h.Appender(context.Background()) + appendSamples(b, app, seriesCount, 1) + b.StartTimer() + require.NoError(b, app.Commit()) + } + b.StopTimer() + h.Close() + h = nil + w.Close() + w = nil + } + }) + } + }) + } +} + type failingSeriesLifecycleCallback struct{} func (failingSeriesLifecycleCallback) PreCreation(labels.Labels) error { return errors.New("failed") } diff --git a/tsdb/head_dedupelabels.go b/tsdb/head_dedupelabels.go index a16d907261..a75f337224 100644 --- a/tsdb/head_dedupelabels.go +++ b/tsdb/head_dedupelabels.go @@ -16,8 +16,7 @@ package tsdb import ( - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "log/slog" "github.com/prometheus/prometheus/model/labels" ) @@ -31,8 +30,8 @@ func (s *memSeries) labels() labels.Labels { // RebuildSymbolTable goes through all the series in h, build a SymbolTable with all names and values, // replace each series' Labels with one using that SymbolTable. -func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { - level.Info(logger).Log("msg", "RebuildSymbolTable starting") +func (h *Head) RebuildSymbolTable(logger *slog.Logger) *labels.SymbolTable { + logger.Info("RebuildSymbolTable starting") st := labels.NewSymbolTable() builder := labels.NewScratchBuilderWithSymbolTable(st, 0) rebuildLabels := func(lbls labels.Labels) labels.Labels { @@ -66,7 +65,7 @@ func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { if e, ok := h.exemplars.(withReset); ok { e.ResetSymbolTable(st) } - level.Info(logger).Log("msg", "RebuildSymbolTable finished", "size", st.Len()) + logger.Info("RebuildSymbolTable finished", "size", st.Len()) return st } diff --git a/tsdb/head_other.go b/tsdb/head_other.go index fea91530dc..c73872c12e 100644 --- a/tsdb/head_other.go +++ b/tsdb/head_other.go @@ -16,7 +16,7 @@ package tsdb import ( - "github.com/go-kit/log" + "log/slog" "github.com/prometheus/prometheus/model/labels" ) @@ -27,6 +27,6 @@ func (s *memSeries) labels() labels.Labels { } // RebuildSymbolTable is a no-op when not using dedupelabels. -func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { +func (h *Head) RebuildSymbolTable(logger *slog.Logger) *labels.SymbolTable { return nil } diff --git a/tsdb/head_read.go b/tsdb/head_read.go index f85120a97f..f6215c5206 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -21,8 +21,6 @@ import ( "slices" "sync" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -136,7 +134,7 @@ func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { for p.Next() { s := h.head.series.getByID(chunks.HeadSeriesRef(p.At())) if s == nil { - level.Debug(h.head.logger).Log("msg", "Looked up series not found") + h.head.logger.Debug("Looked up series not found") } else { series = append(series, s) } @@ -169,7 +167,7 @@ func (h *headIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCou for p.Next() { s := h.head.series.getByID(chunks.HeadSeriesRef(p.At())) if s == nil { - level.Debug(h.head.logger).Log("msg", "Looked up series not found") + h.head.logger.Debug("Looked up series not found") continue } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index e58abe28a9..b92480b67c 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -23,6 +23,7 @@ import ( "path" "path/filepath" "reflect" + "slices" "sort" "strconv" "strings" @@ -515,9 +516,9 @@ func TestHead_HighConcurrencyReadAndWrite(t *testing.T) { workerReadyWg.Add(writeConcurrency + readConcurrency) // Start the write workers. - for workerID := 0; workerID < writeConcurrency; workerID++ { + for wid := 0; wid < writeConcurrency; wid++ { // Create copy of workerID to be used by worker routine. - workerID := workerID + workerID := wid g.Go(func() error { // The label sets which this worker will write. @@ -559,9 +560,9 @@ func TestHead_HighConcurrencyReadAndWrite(t *testing.T) { readerTsCh := make(chan uint64) // Start the read workers. - for workerID := 0; workerID < readConcurrency; workerID++ { + for wid := 0; wid < readConcurrency; wid++ { // Create copy of threadID to be used by worker routine. - workerID := workerID + workerID := wid g.Go(func() error { querySeriesRef := (seriesCnt / readConcurrency) * workerID @@ -1060,7 +1061,7 @@ func TestMemSeries_truncateChunks_scenarios(t *testing.T) { tests := []struct { name string - headChunks int // the number of head chubks to create on memSeries by appending enough samples + headChunks int // the number of head chunks to create on memSeries by appending enough samples mmappedChunks int // the number of mmapped chunks to create on memSeries by appending enough samples truncateBefore int64 // the mint to pass to truncateChunksBefore() expectedTruncated int // the number of chunks that we're expecting be truncated and returned by truncateChunksBefore() @@ -2101,6 +2102,36 @@ func TestHead_LogRollback(t *testing.T) { } } +func TestHead_ReturnsSortedLabelValues(t *testing.T) { + h, _ := newTestHead(t, 1000, wlog.CompressionNone, false) + defer func() { + require.NoError(t, h.Close()) + }() + + h.initTime(0) + + app := h.appender() + for i := 100; i > 0; i-- { + for j := 0; j < 10; j++ { + lset := labels.FromStrings( + "__name__", fmt.Sprintf("metric_%d", i), + "label", fmt.Sprintf("value_%d", j), + ) + _, err := app.Append(0, lset, 2100, 1) + require.NoError(t, err) + } + } + + q, err := NewBlockQuerier(h, 1500, 2500) + require.NoError(t, err) + + res, _, err := q.LabelValues(context.Background(), "__name__", nil) + require.NoError(t, err) + + require.True(t, slices.IsSorted(res)) + require.NoError(t, q.Close()) +} + // TestWalRepair_DecodingError ensures that a repair is run for an error // when decoding a record. func TestWalRepair_DecodingError(t *testing.T) { @@ -2383,8 +2414,7 @@ func TestAddDuplicateLabelName(t *testing.T) { add := func(labels labels.Labels, labelName string) { app := h.Appender(context.Background()) _, err := app.Append(0, labels, 0, 0) - require.Error(t, err) - require.Equal(t, fmt.Sprintf(`label name "%s" is not unique: invalid sample`, labelName), err.Error()) + require.EqualError(t, err, fmt.Sprintf(`label name "%s" is not unique: invalid sample`, labelName)) } add(labels.FromStrings("a", "c", "a", "b"), "a") @@ -6411,11 +6441,15 @@ func TestHeadAppender_AppendFloatWithSameTimestampAsPreviousHistogram(t *testing require.ErrorIs(t, err, storage.NewDuplicateHistogramToFloatErr(2_000, 10.0)) } -func TestHeadAppender_AppendCTZeroSample(t *testing.T) { +func TestHeadAppender_AppendCT(t *testing.T) { + testHistogram := tsdbutil.GenerateTestHistogram(1) + testFloatHistogram := tsdbutil.GenerateTestFloatHistogram(1) type appendableSamples struct { - ts int64 - val float64 - ct int64 + ts int64 + fSample float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + ct int64 } for _, tc := range []struct { name string @@ -6423,20 +6457,54 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { expectedSamples []chunks.Sample }{ { - name: "In order ct+normal sample", + name: "In order ct+normal sample/floatSample", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 1}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, sample{t: 100, f: 10}, + sample{t: 101, f: 10}, + }, + }, + { + name: "In order ct+normal sample/histogram", + appendableSamples: []appendableSamples{ + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "In order ct+normal sample/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 1}, }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), }, { - name: "Consecutive appends with same ct ignore ct", + name: "Consecutive appends with same ct ignore ct/floatSample", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 101, val: 10, ct: 1}, + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 1}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -6445,10 +6513,42 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { }, }, { - name: "Consecutive appends with newer ct do not ignore ct", + name: "Consecutive appends with same ct ignore ct/histogram", + appendableSamples: []appendableSamples{ + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "Consecutive appends with same ct ignore ct/floathistogram", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 102, val: 10, ct: 101}, + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), + }, + { + name: "Consecutive appends with newer ct do not ignore ct/floatSample", + appendableSamples: []appendableSamples{ + {ts: 100, fSample: 10, ct: 1}, + {ts: 102, fSample: 10, ct: 101}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -6458,10 +6558,36 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { }, }, { - name: "CT equals to previous sample timestamp is ignored", + name: "Consecutive appends with newer ct do not ignore ct/histogram", + appendableSamples: []appendableSamples{ + {ts: 100, h: testHistogram, ct: 1}, + {ts: 102, h: testHistogram, ct: 101}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &histogram.Histogram{CounterResetHint: histogram.CounterReset}}, + sample{t: 102, h: testHistogram}, + }, + }, + { + name: "Consecutive appends with newer ct do not ignore ct/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 102, fh: testFloatHistogram, ct: 101}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &histogram.FloatHistogram{CounterResetHint: histogram.CounterReset}}, + sample{t: 102, fh: testFloatHistogram}, + }, + }, + { + name: "CT equals to previous sample timestamp is ignored/floatSample", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 101, val: 10, ct: 100}, + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 100}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -6469,6 +6595,38 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { sample{t: 101, f: 10}, }, }, + { + name: "CT equals to previous sample timestamp is ignored/histogram", + appendableSamples: []appendableSamples{ + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 100}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "CT equals to previous sample timestamp is ignored/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 100}, + }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), + }, } { t.Run(tc.name, func(t *testing.T) { h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false) @@ -6478,10 +6636,21 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { a := h.Appender(context.Background()) lbls := labels.FromStrings("foo", "bar") for _, sample := range tc.appendableSamples { - _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) - require.NoError(t, err) - _, err = a.Append(0, lbls, sample.ts, sample.val) - require.NoError(t, err) + // Append float if it's a float test case + if sample.fSample != 0 { + _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) + require.NoError(t, err) + _, err = a.Append(0, lbls, sample.ts, sample.fSample) + require.NoError(t, err) + } + + // Append histograms if it's a histogram test case + if sample.h != nil || sample.fh != nil { + ref, err := a.AppendHistogramCTZeroSample(0, lbls, sample.ts, sample.ct, sample.h, sample.fh) + require.NoError(t, err) + _, err = a.AppendHistogram(ref, lbls, sample.ts, sample.h, sample.fh) + require.NoError(t, err) + } } require.NoError(t, a.Commit()) diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index 674d708012..6729d77090 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -24,7 +24,6 @@ import ( "sync" "time" - "github.com/go-kit/log/level" "go.uber.org/atomic" "github.com/prometheus/prometheus/model/exemplar" @@ -128,7 +127,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch // replaying the WAL, so lets just log the error if it's not that type. err = h.exemplars.AddExemplar(ms.labels(), exemplar.Exemplar{Ts: e.T, Value: e.V, Labels: e.Labels}) if err != nil && errors.Is(err, storage.ErrOutOfOrderExemplar) { - level.Warn(h.logger).Log("msg", "Unexpected error when replaying WAL on exemplar record", "err", err) + h.logger.Warn("Unexpected error when replaying WAL on exemplar record", "err", err) } } }(exemplarsInput) @@ -421,8 +420,8 @@ Outer: } if unknownRefs.Load()+unknownExemplarRefs.Load()+unknownHistogramRefs.Load()+unknownMetadataRefs.Load() > 0 { - level.Warn(h.logger).Log( - "msg", "Unknown series references", + h.logger.Warn( + "Unknown series references", "samples", unknownRefs.Load(), "exemplars", unknownExemplarRefs.Load(), "histograms", unknownHistogramRefs.Load(), @@ -430,7 +429,7 @@ Outer: ) } if count := mmapOverlappingChunks.Load(); count > 0 { - level.Info(h.logger).Log("msg", "Overlapping m-map chunks on duplicate series records", "count", count) + h.logger.Info("Overlapping m-map chunks on duplicate series records", "count", count) } return nil } @@ -446,8 +445,8 @@ func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc, oooMmc []*m mmc[0].minTime, mmc[len(mmc)-1].maxTime, ) { - level.Debug(h.logger).Log( - "msg", "M-mapped chunks overlap on a duplicate series record", + h.logger.Debug( + "M-mapped chunks overlap on a duplicate series record", "series", mSeries.labels().String(), "oldref", mSeries.ref, "oldmint", mSeries.mmappedChunks[0].minTime, @@ -913,7 +912,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } if unknownRefs.Load() > 0 || mmapMarkerUnknownRefs.Load() > 0 { - level.Warn(h.logger).Log("msg", "Unknown series references for ooo WAL replay", "samples", unknownRefs.Load(), "mmap_markers", mmapMarkerUnknownRefs.Load()) + h.logger.Warn("Unknown series references for ooo WAL replay", "samples", unknownRefs.Load(), "mmap_markers", mmapMarkerUnknownRefs.Load()) } return nil } @@ -1214,7 +1213,7 @@ const chunkSnapshotPrefix = "chunk_snapshot." func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { if h.wal == nil { // If we are not storing any WAL, does not make sense to take a snapshot too. - level.Warn(h.logger).Log("msg", "skipping chunk snapshotting as WAL is disabled") + h.logger.Warn("skipping chunk snapshotting as WAL is disabled") return &ChunkSnapshotStats{}, nil } h.chunkSnapshotMtx.Lock() @@ -1363,7 +1362,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { // Leftover old chunk snapshots do not cause problems down the line beyond // occupying disk space. // They will just be ignored since a higher chunk snapshot exists. - level.Error(h.logger).Log("msg", "delete old chunk snapshots", "err", err) + h.logger.Error("delete old chunk snapshots", "err", err) } return stats, nil } @@ -1373,12 +1372,12 @@ func chunkSnapshotDir(wlast, woffset int) string { } func (h *Head) performChunkSnapshot() error { - level.Info(h.logger).Log("msg", "creating chunk snapshot") + h.logger.Info("creating chunk snapshot") startTime := time.Now() stats, err := h.ChunkSnapshot() elapsed := time.Since(startTime) if err == nil { - level.Info(h.logger).Log("msg", "chunk snapshot complete", "duration", elapsed.String(), "num_series", stats.TotalSeries, "dir", stats.Dir) + h.logger.Info("chunk snapshot complete", "duration", elapsed.String(), "num_series", stats.TotalSeries, "dir", stats.Dir) } if err != nil { return fmt.Errorf("chunk snapshot: %w", err) @@ -1493,7 +1492,7 @@ func (h *Head) loadChunkSnapshot() (int, int, map[chunks.HeadSeriesRef]*memSerie } defer func() { if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "error while closing the wal segments reader", "err", err) + h.logger.Warn("error while closing the wal segments reader", "err", err) } }() @@ -1682,9 +1681,9 @@ Outer: } elapsed := time.Since(start) - level.Info(h.logger).Log("msg", "chunk snapshot loaded", "dir", dir, "num_series", numSeries, "duration", elapsed.String()) + h.logger.Info("chunk snapshot loaded", "dir", dir, "num_series", numSeries, "duration", elapsed.String()) if unknownRefs > 0 { - level.Warn(h.logger).Log("msg", "unknown series references during chunk snapshot replay", "count", unknownRefs) + h.logger.Warn("unknown series references during chunk snapshot replay", "count", unknownRefs) } return snapIdx, snapOffset, refSeries, nil diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 9eaa0a97d2..957106b604 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -44,10 +44,12 @@ const ( // HeaderLen represents number of bytes reserved of index for header. HeaderLen = 5 - // FormatV1 represents 1 version of index. + // FormatV1 represents version 1 of index. FormatV1 = 1 - // FormatV2 represents 2 version of index. + // FormatV2 represents version 2 of index. FormatV2 = 2 + // FormatV3 represents version 3 of index. + FormatV3 = 3 indexFilename = "index" @@ -437,7 +439,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... return err } if labels.Compare(lset, w.lastSeries) <= 0 { - return fmt.Errorf("out-of-order series added with label set %q", lset) + return fmt.Errorf("out-of-order series added with label set %q, last label set %q", lset, w.lastSeries) } if ref < w.lastSeriesRef && !w.lastSeries.IsEmpty() { @@ -1212,7 +1214,9 @@ func newReader(b ByteSlice, c io.Closer, cacheProvider ReaderCacheProvider) (*Re } r.version = int(r.b.Range(4, 5)[0]) - if r.version != FormatV1 && r.version != FormatV2 { + switch r.version { + case FormatV1, FormatV2, FormatV3: + default: return nil, fmt.Errorf("unknown index file version %d", r.version) } @@ -1370,7 +1374,9 @@ func (s Symbols) Lookup(o uint32) (string, error) { B: s.bs.Range(0, s.bs.Len()), } - if s.version == FormatV2 { + if s.version == FormatV1 { + d.Skip(int(o)) + } else { if int(o) >= s.seen { return "", fmt.Errorf("unknown symbol offset %d", o) } @@ -1379,8 +1385,6 @@ func (s Symbols) Lookup(o uint32) (string, error) { for i := o - (o / symbolFactor * symbolFactor); i > 0; i-- { d.UvarintBytes() } - } else { - d.Skip(int(o)) } sym := d.UvarintStr() if d.Err() != nil { @@ -1426,10 +1430,10 @@ func (s Symbols) ReverseLookup(sym string) (uint32, error) { if lastSymbol != sym { return 0, fmt.Errorf("unknown symbol %q", sym) } - if s.version == FormatV2 { - return uint32(res), nil + if s.version == FormatV1 { + return uint32(s.bs.Len() - lastLen), nil } - return uint32(s.bs.Len() - lastLen), nil + return uint32(res), nil } func (s Symbols) Size() int { @@ -1588,7 +1592,7 @@ func (r *Reader) LabelNamesFor(ctx context.Context, postings Postings) ([]string offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } @@ -1627,7 +1631,7 @@ func (r *Reader) LabelValueFor(ctx context.Context, id storage.SeriesRef, label offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } d := encoding.NewDecbufUvarintAt(r.b, int(offset), castagnoliTable) @@ -1653,7 +1657,7 @@ func (r *Reader) Series(id storage.SeriesRef, builder *labels.ScratchBuilder, ch offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } d := encoding.NewDecbufUvarintAt(r.b, int(offset), castagnoliTable) @@ -2106,5 +2110,5 @@ func (dec *Decoder) Series(b []byte, builder *labels.ScratchBuilder, chks *[]chu } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } diff --git a/tsdb/ooo_head_test.go b/tsdb/ooo_head_test.go index b9badfea21..b1641e29ba 100644 --- a/tsdb/ooo_head_test.go +++ b/tsdb/ooo_head_test.go @@ -27,7 +27,6 @@ import ( const testMaxSize int = 32 -// Formulas chosen to make testing easy. // Formulas chosen to make testing easy. func valEven(pos int) int64 { return int64(pos*2 + 2) } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values func valOdd(pos int) int64 { return int64(pos*2 + 1) } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals. diff --git a/tsdb/querier.go b/tsdb/querier.go index 6005ad83ce..e506153eba 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -253,6 +253,10 @@ func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*lab return nil, err } its = append(its, allPostings) + case m.Type == labels.MatchRegexp && m.Value == ".*": + // .* regexp matches any string: do nothing. + case m.Type == labels.MatchNotRegexp && m.Value == ".*": + return index.EmptyPostings(), nil case labelMustBeSet[m.Name]: // If this matcher must be non-empty, we can be smarter. matchesEmpty := m.Matches("") diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go index adcc163849..93b1d9a712 100644 --- a/tsdb/querier_bench_test.go +++ b/tsdb/querier_bench_test.go @@ -106,17 +106,17 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { jFoo := labels.MustNewMatcher(labels.MatchEqual, "j", "foo") jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo") - iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$") - i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$") - iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$") - iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1.*$") - iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$") - i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$") - iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$") + iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*") + i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.*") + iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*1") + iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*1.*") + iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", ".+") + i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.+") + iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "") iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "") iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "i", "2"+postingsBenchSuffix) - iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$") - iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$") + iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "2.*") + iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*2.*") jFooBar := labels.MustNewMatcher(labels.MatchRegexp, "j", "foo|bar") jXXXYYY := labels.MustNewMatcher(labels.MatchRegexp, "j", "XXX|YYY") jXplus := labels.MustNewMatcher(labels.MatchRegexp, "j", "X.+") @@ -189,13 +189,13 @@ func benchmarkLabelValuesWithMatchers(b *testing.B, ir IndexReader) { i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.+") i1PostingsBenchSuffix := labels.MustNewMatcher(labels.MatchEqual, "i", "1"+postingsBenchSuffix) iSuffix := labels.MustNewMatcher(labels.MatchRegexp, "i", ".+ddd") - iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$") + iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*") jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo") jXXXYYY := labels.MustNewMatcher(labels.MatchRegexp, "j", "XXX|YYY") jXplus := labels.MustNewMatcher(labels.MatchRegexp, "j", "X.+") n1 := labels.MustNewMatcher(labels.MatchEqual, "n", "1"+postingsBenchSuffix) nX := labels.MustNewMatcher(labels.MatchNotEqual, "n", "X"+postingsBenchSuffix) - nPlus := labels.MustNewMatcher(labels.MatchRegexp, "n", "^.+$") + nPlus := labels.MustNewMatcher(labels.MatchRegexp, "n", ".+") primesTimes := labels.MustNewMatcher(labels.MatchEqual, "i_times_n", "533701") // = 76243*7, ie. multiplication of primes. It will match single i*n combination. nonPrimesTimes := labels.MustNewMatcher(labels.MatchEqual, "i_times_n", "20") // 1*20, 2*10, 4*5, 5*4 times12 := labels.MustNewMatcher(labels.MatchRegexp, "i_times_n", "12.*") @@ -211,12 +211,12 @@ func benchmarkLabelValuesWithMatchers(b *testing.B, ir IndexReader) { {`i with i="1"`, "i", []*labels.Matcher{i1}}, // i has 100k values. {`i with n="1"`, "i", []*labels.Matcher{n1}}, - {`i with n="^.+$"`, "i", []*labels.Matcher{nPlus}}, + {`i with n=".+"`, "i", []*labels.Matcher{nPlus}}, {`i with n="1",j!="foo"`, "i", []*labels.Matcher{n1, jNotFoo}}, {`i with n="1",j=~"X.+"`, "i", []*labels.Matcher{n1, jXplus}}, {`i with n="1",j=~"XXX|YYY"`, "i", []*labels.Matcher{n1, jXXXYYY}}, {`i with n="X",j!="foo"`, "i", []*labels.Matcher{nX, jNotFoo}}, - {`i with n="1",i=~"^.*$",j!="foo"`, "i", []*labels.Matcher{n1, iStar, jNotFoo}}, + {`i with n="1",i=~".*",j!="foo"`, "i", []*labels.Matcher{n1, iStar, jNotFoo}}, {`i with i_times_n=533701`, "i", []*labels.Matcher{primesTimes}}, {`i with i_times_n=20`, "i", []*labels.Matcher{nonPrimesTimes}}, {`i with i_times_n=~"12.*""`, "i", []*labels.Matcher{times12}}, diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index f9d3460774..b06ba1f263 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -2758,6 +2758,7 @@ func TestPostingsForMatchers(t *testing.T) { app.Append(0, labels.FromStrings("n", "1"), 0, 0) app.Append(0, labels.FromStrings("n", "1", "i", "a"), 0, 0) app.Append(0, labels.FromStrings("n", "1", "i", "b"), 0, 0) + app.Append(0, labels.FromStrings("n", "1", "i", "\n"), 0, 0) app.Append(0, labels.FromStrings("n", "2"), 0, 0) app.Append(0, labels.FromStrings("n", "2.5"), 0, 0) require.NoError(t, app.Commit()) @@ -2773,6 +2774,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2791,6 +2793,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), labels.FromStrings("n", "2"), labels.FromStrings("n", "2.5"), }, @@ -2808,6 +2811,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2819,6 +2823,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2826,6 +2831,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, // Regex. @@ -2835,6 +2841,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2870,6 +2877,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2877,6 +2885,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, // Not regex. @@ -2885,6 +2894,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2918,12 +2928,14 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^a?$")}, exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2931,6 +2943,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2964,6 +2977,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), labels.FromStrings("n", "2"), }, }, @@ -3011,6 +3025,57 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "2.5"), }, }, + // Test shortcut for i=~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", ".*")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + }, + }, + // Test shortcut for n=~".*" and i=~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", ".*"), labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + }, + }, + // Test shortcut for n=~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "^.*$"), labels.MustNewMatcher(labels.MatchEqual, "i", "a")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + }, + }, + // Test shortcut for i!~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut for n!~"^.*$", i!~".*". First one triggers empty result. + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "n", "^.*$"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut i!~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", ".*"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut i!~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*$")}, + exp: []labels.Labels{}, + }, } ir, err := h.Index() @@ -3304,7 +3369,7 @@ func (m mockMatcherIndex) LabelValueFor(context.Context, storage.SeriesRef, stri } func (m mockMatcherIndex) LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error) { - return nil, errors.New("label names for for called") + return nil, errors.New("label names for called") } func (m mockMatcherIndex) Postings(context.Context, string, ...string) (index.Postings, error) { @@ -3937,3 +4002,35 @@ func (m mockReaderOfLabels) LabelValuesFor(index.Postings, string) storage.Label func (m mockReaderOfLabels) PostingsForMatchers(context.Context, bool, ...*labels.Matcher) (index.Postings, error) { panic("PostingsForMatchers called") } + +// TestMergeQuerierConcurrentSelectMatchers reproduces the data race bug from +// https://github.com/prometheus/prometheus/issues/14723, when one of the queriers (blockQuerier in this case) +// alters the passed matchers. +func TestMergeQuerierConcurrentSelectMatchers(t *testing.T) { + block, err := OpenBlock(nil, createBlock(t, t.TempDir(), genSeries(1, 1, 0, 1)), nil) + require.NoError(t, err) + defer func() { + require.NoError(t, block.Close()) + }() + p, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + // A secondary querier is required to enable concurrent select; a blockQuerier is used for simplicity. + s, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + originalMatchers := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "baz", ".*"), + labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"), + } + matchers := append([]*labels.Matcher{}, originalMatchers...) + + mergedQuerier := storage.NewMergeQuerier([]storage.Querier{p}, []storage.Querier{s}, storage.ChainedSeriesMerge) + defer func() { + require.NoError(t, mergedQuerier.Close()) + }() + + mergedQuerier.Select(context.Background(), false, nil, matchers...) + + require.Equal(t, originalMatchers, matchers) +} diff --git a/tsdb/record/record_test.go b/tsdb/record/record_test.go index da7748e187..f3a657aecb 100644 --- a/tsdb/record/record_test.go +++ b/tsdb/record/record_test.go @@ -166,7 +166,7 @@ func TestRecord_EncodeDecode(t *testing.T) { require.NoError(t, err) require.Equal(t, floatHistograms, decFloatHistograms) - // Gauge ingeger histograms. + // Gauge integer histograms. for i := range histograms { histograms[i].H.CounterResetHint = histogram.GaugeType } diff --git a/tsdb/repair.go b/tsdb/repair.go index 9d2c5738d1..8bdc645b5e 100644 --- a/tsdb/repair.go +++ b/tsdb/repair.go @@ -17,19 +17,17 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "os" "path/filepath" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" "github.com/prometheus/prometheus/tsdb/fileutil" ) // repairBadIndexVersion repairs an issue in index and meta.json persistence introduced in // commit 129773b41a565fde5156301e37f9a87158030443. -func repairBadIndexVersion(logger log.Logger, dir string) error { +func repairBadIndexVersion(logger *slog.Logger, dir string) error { // All blocks written by Prometheus 2.1 with a meta.json version of 2 are affected. // We must actually set the index file version to 2 and revert the meta.json version back to 1. dirs, err := blockDirs(dir) @@ -41,7 +39,7 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { defer func() { for _, tmp := range tmpFiles { if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } } }() @@ -49,20 +47,20 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { for _, d := range dirs { meta, err := readBogusMetaFile(d) if err != nil { - level.Error(logger).Log("msg", "failed to read meta.json for a block during repair process; skipping", "dir", d, "err", err) + logger.Error("failed to read meta.json for a block during repair process; skipping", "dir", d, "err", err) continue } if meta.Version == metaVersion1 { - level.Info(logger).Log( - "msg", "Found healthy block", + logger.Info( + "Found healthy block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, ) continue } - level.Info(logger).Log( - "msg", "Fixing broken block", + logger.Info( + "Fixing broken block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, diff --git a/tsdb/tombstones/tombstones.go b/tsdb/tombstones/tombstones.go index 4cea5005db..dcba298f3b 100644 --- a/tsdb/tombstones/tombstones.go +++ b/tsdb/tombstones/tombstones.go @@ -19,15 +19,13 @@ import ( "fmt" "hash" "hash/crc32" + "log/slog" "math" "os" "path/filepath" "sort" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/encoding" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -76,7 +74,7 @@ type Reader interface { Close() error } -func WriteFile(logger log.Logger, dir string, tr Reader) (int64, error) { +func WriteFile(logger *slog.Logger, dir string, tr Reader) (int64, error) { path := filepath.Join(dir, TombstonesFilename) tmp := path + ".tmp" hash := newCRC32() @@ -89,11 +87,11 @@ func WriteFile(logger log.Logger, dir string, tr Reader) (int64, error) { defer func() { if f != nil { if err := f.Close(); err != nil { - level.Error(logger).Log("msg", "close tmp file", "err", err.Error()) + logger.Error("close tmp file", "err", err.Error()) } } if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } }() diff --git a/tsdb/tombstones/tombstones_test.go b/tsdb/tombstones/tombstones_test.go index 627f3f7e75..2cfede9491 100644 --- a/tsdb/tombstones/tombstones_test.go +++ b/tsdb/tombstones/tombstones_test.go @@ -20,9 +20,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/testutil" ) @@ -50,7 +51,7 @@ func TestWriteAndReadbackTombstones(t *testing.T) { stones.AddInterval(storage.SeriesRef(ref), dranges...) } - _, err := WriteFile(log.NewNopLogger(), tmpdir, stones) + _, err := WriteFile(promslog.NewNopLogger(), tmpdir, stones) require.NoError(t, err) restr, _, err := ReadTombstones(tmpdir) diff --git a/tsdb/tsdbblockutil.go b/tsdb/tsdbblockutil.go index f7b27c2e08..b49757223f 100644 --- a/tsdb/tsdbblockutil.go +++ b/tsdb/tsdbblockutil.go @@ -16,10 +16,9 @@ package tsdb import ( "context" "fmt" + "log/slog" "path/filepath" - "github.com/go-kit/log" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" ) @@ -27,7 +26,7 @@ import ( var ErrInvalidTimes = fmt.Errorf("max time is lesser than min time") // CreateBlock creates a chunkrange block from the samples passed to it, and writes it to disk. -func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger log.Logger) (string, error) { +func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger *slog.Logger) (string, error) { if chunkRange == 0 { chunkRange = DefaultBlockDuration } @@ -41,7 +40,7 @@ func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger l } defer func() { if err := w.Close(); err != nil { - logger.Log("err closing blockwriter", err.Error()) + logger.Error("err closing blockwriter", "err", err.Error()) } }() diff --git a/tsdb/tsdbutil/dir_locker.go b/tsdb/tsdbutil/dir_locker.go index fa939879ca..4b69e1f9d6 100644 --- a/tsdb/tsdbutil/dir_locker.go +++ b/tsdb/tsdbutil/dir_locker.go @@ -16,11 +16,10 @@ package tsdbutil import ( "errors" "fmt" + "log/slog" "os" "path/filepath" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -34,7 +33,7 @@ const ( ) type DirLocker struct { - logger log.Logger + logger *slog.Logger createdCleanly prometheus.Gauge @@ -43,7 +42,7 @@ type DirLocker struct { } // NewDirLocker creates a DirLocker that can obtain an exclusive lock on dir. -func NewDirLocker(dir, subsystem string, l log.Logger, r prometheus.Registerer) (*DirLocker, error) { +func NewDirLocker(dir, subsystem string, l *slog.Logger, r prometheus.Registerer) (*DirLocker, error) { lock := &DirLocker{ logger: l, createdCleanly: prometheus.NewGauge(prometheus.GaugeOpts{ @@ -74,7 +73,7 @@ func (l *DirLocker) Lock() error { } if _, err := os.Stat(l.path); err == nil { - level.Warn(l.logger).Log("msg", "A lockfile from a previous execution already existed. It was replaced", "file", l.path) + l.logger.Warn("A lockfile from a previous execution already existed. It was replaced", "file", l.path) l.createdCleanly.Set(lockfileReplaced) } else { diff --git a/tsdb/tsdbutil/dir_locker_test.go b/tsdb/tsdbutil/dir_locker_test.go index fc7d905b2d..65e2761692 100644 --- a/tsdb/tsdbutil/dir_locker_test.go +++ b/tsdb/tsdbutil/dir_locker_test.go @@ -16,15 +16,16 @@ package tsdbutil import ( "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/util/testutil" ) func TestLockfile(t *testing.T) { TestDirLockerUsage(t, func(t *testing.T, data string, createLock bool) (*DirLocker, testutil.Closer) { - locker, err := NewDirLocker(data, "tsdbutil", log.NewNopLogger(), nil) + locker, err := NewDirLocker(data, "tsdbutil", promslog.NewNopLogger(), nil) require.NoError(t, err) if createLock { diff --git a/tsdb/tsdbutil/dir_locker_testutil.go b/tsdb/tsdbutil/dir_locker_testutil.go index a4cf5abd68..7228dbafed 100644 --- a/tsdb/tsdbutil/dir_locker_testutil.go +++ b/tsdb/tsdbutil/dir_locker_testutil.go @@ -18,8 +18,8 @@ import ( "os" "testing" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/util/testutil" @@ -68,7 +68,7 @@ func TestDirLockerUsage(t *testing.T, open func(t *testing.T, data string, creat // Test preconditions (file already exists + lockfile option) if c.fileAlreadyExists { - tmpLocker, err := NewDirLocker(tmpdir, "tsdb", log.NewNopLogger(), nil) + tmpLocker, err := NewDirLocker(tmpdir, "tsdb", promslog.NewNopLogger(), nil) require.NoError(t, err) err = os.WriteFile(tmpLocker.path, []byte{}, 0o644) require.NoError(t, err) diff --git a/tsdb/wlog/checkpoint.go b/tsdb/wlog/checkpoint.go index a16cd5fc74..58e11c770e 100644 --- a/tsdb/wlog/checkpoint.go +++ b/tsdb/wlog/checkpoint.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "os" "path/filepath" @@ -25,9 +26,6 @@ import ( "strconv" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -94,11 +92,11 @@ const checkpointPrefix = "checkpoint." // segmented format as the original WAL itself. // This makes it easy to read it through the WAL package and concatenate // it with the original WAL. -func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) { +func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) { stats := &CheckpointStats{} var sgmReader io.ReadCloser - level.Info(logger).Log("msg", "Creating checkpoint", "from_segment", from, "to_segment", to, "mint", mint) + logger.Info("Creating checkpoint", "from_segment", from, "to_segment", to, "mint", mint) { var sgmRange []SegmentRange diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index a9786454de..8ee193f5ac 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -23,9 +23,10 @@ import ( "strings" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" @@ -244,7 +245,7 @@ func TestCheckpoint(t *testing.T) { } require.NoError(t, w.Close()) - stats, err := Checkpoint(log.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool { + stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool { return x%2 == 0 }, last/2) require.NoError(t, err) @@ -354,7 +355,7 @@ func TestCheckpointNoTmpFolderAfterError(t *testing.T) { require.NoError(t, f.Close()) // Run the checkpoint and since the wlog contains corrupt data this should return an error. - _, err = Checkpoint(log.NewNopLogger(), w, 0, 1, nil, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0) require.Error(t, err) // Walk the wlog dir to make sure there are no tmp folder left behind after the error. diff --git a/tsdb/wlog/live_reader.go b/tsdb/wlog/live_reader.go index 6eaef5f396..a017d362d1 100644 --- a/tsdb/wlog/live_reader.go +++ b/tsdb/wlog/live_reader.go @@ -20,9 +20,8 @@ import ( "fmt" "hash/crc32" "io" + "log/slog" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/golang/snappy" "github.com/klauspost/compress/zstd" "github.com/prometheus/client_golang/prometheus" @@ -51,7 +50,7 @@ func NewLiveReaderMetrics(reg prometheus.Registerer) *LiveReaderMetrics { } // NewLiveReader returns a new live reader. -func NewLiveReader(logger log.Logger, metrics *LiveReaderMetrics, r io.Reader) *LiveReader { +func NewLiveReader(logger *slog.Logger, metrics *LiveReaderMetrics, r io.Reader) *LiveReader { // Calling zstd.NewReader with a nil io.Reader and no options cannot return an error. zstdReader, _ := zstd.NewReader(nil) @@ -73,7 +72,7 @@ func NewLiveReader(logger log.Logger, metrics *LiveReaderMetrics, r io.Reader) * // that are still in the process of being written, and returns records as soon // as they can be read. type LiveReader struct { - logger log.Logger + logger *slog.Logger rdr io.Reader err error rec []byte @@ -311,7 +310,7 @@ func (r *LiveReader) readRecord() ([]byte, int, error) { return nil, 0, fmt.Errorf("record would overflow current page: %d > %d", r.readIndex+recordHeaderSize+length, pageSize) } r.metrics.readerCorruptionErrors.WithLabelValues("record_span_page").Inc() - level.Warn(r.logger).Log("msg", "Record spans page boundaries", "start", r.readIndex, "end", recordHeaderSize+length, "pageSize", pageSize) + r.logger.Warn("Record spans page boundaries", "start", r.readIndex, "end", recordHeaderSize+length, "pageSize", pageSize) } if recordHeaderSize+length > pageSize { return nil, 0, fmt.Errorf("record length greater than a single page: %d > %d", recordHeaderSize+length, pageSize) diff --git a/tsdb/wlog/reader_test.go b/tsdb/wlog/reader_test.go index 484eff3664..2ac63cbf15 100644 --- a/tsdb/wlog/reader_test.go +++ b/tsdb/wlog/reader_test.go @@ -29,11 +29,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" - "github.com/prometheus/prometheus/util/testutil" ) type reader interface { @@ -53,7 +53,7 @@ var readerConstructors = map[string]func(io.Reader) reader{ return NewReader(r) }, "LiveReader": func(r io.Reader) reader { - lr := NewLiveReader(log.NewNopLogger(), NewLiveReaderMetrics(nil), r) + lr := NewLiveReader(promslog.NewNopLogger(), NewLiveReaderMetrics(nil), r) lr.eofNonErr = true return lr }, @@ -196,7 +196,7 @@ func TestReader(t *testing.T) { } func TestReader_Live(t *testing.T) { - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() for i := range testReaderCases { t.Run(strconv.Itoa(i), func(t *testing.T) { @@ -353,7 +353,7 @@ func TestReaderFuzz(t *testing.T) { } func TestReaderFuzz_Live(t *testing.T) { - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() for _, compress := range []CompressionType{CompressionNone, CompressionSnappy, CompressionZstd} { t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { dir := t.TempDir() @@ -441,7 +441,7 @@ func TestReaderFuzz_Live(t *testing.T) { func TestLiveReaderCorrupt_ShortFile(t *testing.T) { // Write a corrupt WAL segment, there is one record of pageSize in length, // but the segment is only half written. - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() dir := t.TempDir() w, err := NewSize(nil, nil, dir, pageSize, CompressionNone) @@ -481,7 +481,7 @@ func TestLiveReaderCorrupt_ShortFile(t *testing.T) { func TestLiveReaderCorrupt_RecordTooLongAndShort(t *testing.T) { // Write a corrupt WAL segment, when record len > page size. - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() dir := t.TempDir() w, err := NewSize(nil, nil, dir, pageSize*2, CompressionNone) diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go index ac5041e87b..d68ef2accb 100644 --- a/tsdb/wlog/watcher.go +++ b/tsdb/wlog/watcher.go @@ -17,6 +17,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "os" "path/filepath" @@ -24,9 +25,8 @@ import ( "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" @@ -84,7 +84,7 @@ type WatcherMetrics struct { type Watcher struct { name string writer WriteTo - logger log.Logger + logger *slog.Logger walDir string lastCheckpoint string sendExemplars bool @@ -172,9 +172,9 @@ func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics { } // NewWatcher creates a new WAL watcher for a given WriteTo. -func NewWatcher(metrics *WatcherMetrics, readerMetrics *LiveReaderMetrics, logger log.Logger, name string, writer WriteTo, dir string, sendExemplars, sendHistograms, sendMetadata bool) *Watcher { +func NewWatcher(metrics *WatcherMetrics, readerMetrics *LiveReaderMetrics, logger *slog.Logger, name string, writer WriteTo, dir string, sendExemplars, sendHistograms, sendMetadata bool) *Watcher { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Watcher{ logger: logger, @@ -222,7 +222,7 @@ func (w *Watcher) setMetrics() { // Start the Watcher. func (w *Watcher) Start() { w.setMetrics() - level.Info(w.logger).Log("msg", "Starting WAL watcher", "queue", w.name) + w.logger.Info("Starting WAL watcher", "queue", w.name) go w.loop() } @@ -241,7 +241,7 @@ func (w *Watcher) Stop() { w.metrics.currentSegment.DeleteLabelValues(w.name) } - level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name) + w.logger.Info("WAL watcher stopped", "queue", w.name) } func (w *Watcher) loop() { @@ -251,7 +251,7 @@ func (w *Watcher) loop() { for !isClosed(w.quit) { w.SetStartTime(time.Now()) if err := w.Run(); err != nil { - level.Error(w.logger).Log("msg", "error tailing WAL", "err", err) + w.logger.Error("error tailing WAL", "err", err) } select { @@ -274,7 +274,7 @@ func (w *Watcher) Run() error { // Run will be called again if there was a failure to read the WAL. w.sendSamples = false - level.Info(w.logger).Log("msg", "Replaying WAL", "queue", w.name) + w.logger.Info("Replaying WAL", "queue", w.name) // Backfill from the checkpoint first if it exists. lastCheckpoint, checkpointIndex, err := LastCheckpoint(w.walDir) @@ -294,13 +294,13 @@ func (w *Watcher) Run() error { return err } - level.Debug(w.logger).Log("msg", "Tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment) + w.logger.Debug("Tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment) for !isClosed(w.quit) { w.currentSegmentMetric.Set(float64(currentSegment)) // On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment. // On subsequent calls to this function, currentSegment will have been incremented and we should open that segment. - level.Debug(w.logger).Log("msg", "Processing segment", "currentSegment", currentSegment) + w.logger.Debug("Processing segment", "currentSegment", currentSegment) if err := w.watch(currentSegment, currentSegment >= lastSegment); err != nil && !errors.Is(err, ErrIgnorable) { return err } @@ -338,9 +338,9 @@ func (w *Watcher) readAndHandleError(r *LiveReader, segmentNum int, tail bool, s // Ignore all errors reading to end of segment whilst replaying the WAL. if !tail { if err != nil && !errors.Is(err, io.EOF) { - level.Warn(w.logger).Log("msg", "Ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err) + w.logger.Warn("Ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err) } else if r.Offset() != size { - level.Warn(w.logger).Log("msg", "Expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", r.Offset(), "size", size) + w.logger.Warn("Expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", r.Offset(), "size", size) } return ErrIgnorable } @@ -403,7 +403,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error { <-gcSem }() if err := w.garbageCollectSeries(segmentNum); err != nil { - level.Warn(w.logger).Log("msg", "Error process checkpoint", "err", err) + w.logger.Warn("Error process checkpoint", "err", err) } }() default: @@ -424,7 +424,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error { // we haven't read due to a notification in quite some time, try reading anyways case <-readTicker.C: - level.Debug(w.logger).Log("msg", "Watcher is reading the WAL due to timeout, haven't received any write notifications recently", "timeout", readTimeout) + w.logger.Debug("Watcher is reading the WAL due to timeout, haven't received any write notifications recently", "timeout", readTimeout) err := w.readAndHandleError(reader, segmentNum, tail, size) if err != nil { return err @@ -460,11 +460,11 @@ func (w *Watcher) garbageCollectSeries(segmentNum int) error { } if index >= segmentNum { - level.Debug(w.logger).Log("msg", "Current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir) + w.logger.Debug("Current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir) return nil } - level.Debug(w.logger).Log("msg", "New checkpoint detected", "new", dir, "currentSegment", segmentNum) + w.logger.Debug("New checkpoint detected", "new", dir, "currentSegment", segmentNum) if err = w.readCheckpoint(dir, (*Watcher).readSegmentForGC); err != nil { return fmt.Errorf("readCheckpoint: %w", err) @@ -519,7 +519,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } samplesToSend = append(samplesToSend, s) } @@ -564,7 +564,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } histogramsToSend = append(histogramsToSend, h) } @@ -592,7 +592,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } floatHistogramsToSend = append(floatHistogramsToSend, fh) } @@ -670,7 +670,7 @@ type segmentReadFn func(w *Watcher, r *LiveReader, segmentNum int, tail bool) er // Read all the series records from a Checkpoint directory. func (w *Watcher) readCheckpoint(checkpointDir string, readFn segmentReadFn) error { - level.Debug(w.logger).Log("msg", "Reading checkpoint", "dir", checkpointDir) + w.logger.Debug("Reading checkpoint", "dir", checkpointDir) index, err := checkpointNum(checkpointDir) if err != nil { return fmt.Errorf("checkpointNum: %w", err) @@ -704,7 +704,7 @@ func (w *Watcher) readCheckpoint(checkpointDir string, readFn segmentReadFn) err } } - level.Debug(w.logger).Log("msg", "Read series references from checkpoint", "checkpoint", checkpointDir) + w.logger.Debug("Read series references from checkpoint", "checkpoint", checkpointDir) return nil } diff --git a/tsdb/wlog/watcher_test.go b/tsdb/wlog/watcher_test.go index dc0314e8c9..68c2c5afda 100644 --- a/tsdb/wlog/watcher_test.go +++ b/tsdb/wlog/watcher_test.go @@ -22,9 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -52,6 +53,13 @@ func retry(t *testing.T, interval time.Duration, n int, f func() bool) { t.Logf("function returned false") } +// Overwrite readTimeout defined in watcher.go. +func overwriteReadTimeout(t *testing.T, val time.Duration) { + initialVal := readTimeout + readTimeout = val + t.Cleanup(func() { readTimeout = initialVal }) +} + type writeToMock struct { samplesAppended int exemplarsAppended int @@ -302,7 +310,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) { } } require.NoError(t, w.Log(recs...)) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) _, _, err = Segments(w.Dir()) require.NoError(t, err) @@ -367,7 +375,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { } } - Checkpoint(log.NewNopLogger(), w, 0, 1, func(x chunks.HeadSeriesRef) bool { return true }, 0) + Checkpoint(promslog.NewNopLogger(), w, 0, 1, func(x chunks.HeadSeriesRef) bool { return true }, 0) w.Truncate(1) // Write more records after checkpointing. @@ -394,7 +402,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { _, _, err = Segments(w.Dir()) require.NoError(t, err) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) wt := newWriteToMock(0) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) go watcher.Start() @@ -458,7 +466,7 @@ func TestReadCheckpoint(t *testing.T) { } _, err = w.NextSegmentSync() require.NoError(t, err) - _, err = Checkpoint(log.NewNopLogger(), w, 30, 31, func(x chunks.HeadSeriesRef) bool { return true }, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 30, 31, func(x chunks.HeadSeriesRef) bool { return true }, 0) require.NoError(t, err) require.NoError(t, w.Truncate(32)) @@ -607,7 +615,7 @@ func TestCheckpointSeriesReset(t *testing.T) { _, _, err = Segments(w.Dir()) require.NoError(t, err) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) wt := newWriteToMock(0) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) watcher.MaxSegment = -1 @@ -621,7 +629,7 @@ func TestCheckpointSeriesReset(t *testing.T) { return wt.checkNumSeries() == seriesCount }, 10*time.Second, 1*time.Second) - _, err = Checkpoint(log.NewNopLogger(), w, 2, 4, func(x chunks.HeadSeriesRef) bool { return true }, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 2, 4, func(x chunks.HeadSeriesRef) bool { return true }, 0) require.NoError(t, err) err = w.Truncate(5) @@ -742,9 +750,6 @@ func TestRun_AvoidNotifyWhenBehind(t *testing.T) { const seriesCount = 10 const samplesCount = 50 - // This test can take longer than intended to finish in cloud CI. - readTimeout := 10 * time.Second - for _, compress := range []CompressionType{CompressionNone, CompressionSnappy, CompressionZstd} { t.Run(string(compress), func(t *testing.T) { dir := t.TempDir() @@ -755,36 +760,50 @@ func TestRun_AvoidNotifyWhenBehind(t *testing.T) { w, err := NewSize(nil, nil, wdir, segmentSize, compress) require.NoError(t, err) - var wg sync.WaitGroup - // Generate one segment initially to ensure that watcher.Run() finds at least one segment on disk. + // Write to 00000000, the watcher will read series from it. require.NoError(t, generateWALRecords(w, 0, seriesCount, samplesCount)) - w.NextSegment() // Force creation of the next segment - wg.Add(1) - go func() { - defer wg.Done() - for i := 1; i < segmentsToWrite; i++ { - require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount)) - w.NextSegment() - } - }() + // Create 00000001, the watcher will tail it once started. + w.NextSegment() + // Set up the watcher and run it in the background. wt := newWriteToMock(time.Millisecond) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + watcher.setMetrics() watcher.MaxSegment = segmentsToRead - watcher.setMetrics() - startTime := time.Now() - err = watcher.Run() - wg.Wait() - require.Less(t, time.Since(startTime), readTimeout) + var g errgroup.Group + g.Go(func() error { + startTime := time.Now() + err = watcher.Run() + if err != nil { + return err + } + // If the watcher was to wait for readTicker to read every new segment, it would need readTimeout * segmentsToRead. + d := time.Since(startTime) + if d > readTimeout { + return fmt.Errorf("watcher ran for %s, it shouldn't rely on readTicker=%s to read the new segments", d, readTimeout) + } + return nil + }) - // But samples records shouldn't get dropped + // The watcher went through 00000000 and is tailing the next one. retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumSeries() > 0 + return wt.checkNumSeries() == seriesCount }) - require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended) - require.NoError(t, err) + // In the meantime, add some new segments in bulk. + // We should end up with segmentsToWrite + 1 segments now. + for i := 1; i < segmentsToWrite; i++ { + require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount)) + w.NextSegment() + } + + // Wait for the watcher. + require.NoError(t, g.Wait()) + + // All series and samples were read. + require.Equal(t, (segmentsToRead+1)*seriesCount, wt.checkNumSeries()) // Series from 00000000 are also read. + require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended) require.NoError(t, w.Close()) }) } diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go index b14521f358..54c257d61a 100644 --- a/tsdb/wlog/wlog.go +++ b/tsdb/wlog/wlog.go @@ -21,6 +21,7 @@ import ( "fmt" "hash/crc32" "io" + "log/slog" "os" "path/filepath" "slices" @@ -28,11 +29,10 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/golang/snappy" "github.com/klauspost/compress/zstd" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/tsdb/fileutil" ) @@ -121,7 +121,7 @@ func (e *CorruptionErr) Unwrap() error { } // OpenWriteSegment opens segment k in dir. The returned segment is ready for new appends. -func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) { +func OpenWriteSegment(logger *slog.Logger, dir string, k int) (*Segment, error) { segName := SegmentName(dir, k) f, err := os.OpenFile(segName, os.O_WRONLY|os.O_APPEND, 0o666) if err != nil { @@ -138,7 +138,7 @@ func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) { // If it was torn mid-record, a full read (which the caller should do anyway // to ensure integrity) will detect it as a corruption by the end. if d := stat.Size() % pageSize; d != 0 { - level.Warn(logger).Log("msg", "Last page of the wlog is torn, filling it with zeros", "segment", segName) + logger.Warn("Last page of the wlog is torn, filling it with zeros", "segment", segName) if _, err := f.Write(make([]byte, pageSize-d)); err != nil { f.Close() return nil, fmt.Errorf("zero-pad torn page: %w", err) @@ -201,7 +201,7 @@ func ParseCompressionType(compress bool, compressType string) CompressionType { // beyond the most recent segment. type WL struct { dir string - logger log.Logger + logger *slog.Logger segmentSize int mtx sync.RWMutex segment *Segment // Active segment. @@ -286,7 +286,7 @@ func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics { }, func() float64 { val, err := w.Size() if err != nil { - level.Error(w.logger).Log("msg", "Failed to calculate size of \"wal\" dir", + w.logger.Error("Failed to calculate size of \"wal\" dir", "err", err.Error()) } return float64(val) @@ -309,13 +309,13 @@ func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics { } // New returns a new WAL over the given directory. -func New(logger log.Logger, reg prometheus.Registerer, dir string, compress CompressionType) (*WL, error) { +func New(logger *slog.Logger, reg prometheus.Registerer, dir string, compress CompressionType) (*WL, error) { return NewSize(logger, reg, dir, DefaultSegmentSize, compress) } // NewSize returns a new write log over the given directory. // New segments are created with the specified size. -func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSize int, compress CompressionType) (*WL, error) { +func NewSize(logger *slog.Logger, reg prometheus.Registerer, dir string, segmentSize int, compress CompressionType) (*WL, error) { if segmentSize%pageSize != 0 { return nil, errors.New("invalid segment size") } @@ -323,7 +323,7 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi return nil, fmt.Errorf("create dir: %w", err) } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } var zstdWriter *zstd.Encoder @@ -378,9 +378,9 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi } // Open an existing WAL. -func Open(logger log.Logger, dir string) (*WL, error) { +func Open(logger *slog.Logger, dir string) (*WL, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } zstdWriter, err := zstd.NewWriter(nil) if err != nil { @@ -443,7 +443,7 @@ func (w *WL) Repair(origErr error) error { if cerr.Segment < 0 { return errors.New("corruption error does not specify position") } - level.Warn(w.logger).Log("msg", "Starting corruption repair", + w.logger.Warn("Starting corruption repair", "segment", cerr.Segment, "offset", cerr.Offset) // All segments behind the corruption can no longer be used. @@ -451,7 +451,7 @@ func (w *WL) Repair(origErr error) error { if err != nil { return fmt.Errorf("list segments: %w", err) } - level.Warn(w.logger).Log("msg", "Deleting all segments newer than corrupted segment", "segment", cerr.Segment) + w.logger.Warn("Deleting all segments newer than corrupted segment", "segment", cerr.Segment) for _, s := range segs { if w.segment.i == s.index { @@ -473,7 +473,7 @@ func (w *WL) Repair(origErr error) error { // Regardless of the corruption offset, no record reaches into the previous segment. // So we can safely repair the WAL by removing the segment and re-inserting all // its records up to the corruption. - level.Warn(w.logger).Log("msg", "Rewrite corrupted segment", "segment", cerr.Segment) + w.logger.Warn("Rewrite corrupted segment", "segment", cerr.Segment) fn := SegmentName(w.Dir(), cerr.Segment) tmpfn := fn + ".repair" @@ -583,10 +583,10 @@ func (w *WL) nextSegment(async bool) (int, error) { // Don't block further writes by fsyncing the last segment. f := func() { if err := w.fsync(prev); err != nil { - level.Error(w.logger).Log("msg", "sync previous segment", "err", err) + w.logger.Error("sync previous segment", "err", err) } if err := prev.Close(); err != nil { - level.Error(w.logger).Log("msg", "close previous segment", "err", err) + w.logger.Error("close previous segment", "err", err) } } if async { @@ -890,10 +890,10 @@ func (w *WL) Close() (err error) { <-donec if err = w.fsync(w.segment); err != nil { - level.Error(w.logger).Log("msg", "sync previous segment", "err", err) + w.logger.Error("sync previous segment", "err", err) } if err := w.segment.Close(); err != nil { - level.Error(w.logger).Log("msg", "close previous segment", "err", err) + w.logger.Error("close previous segment", "err", err) } w.metrics.Unregister() diff --git a/tsdb/wlog/wlog_test.go b/tsdb/wlog/wlog_test.go index 39bc815e9a..5380798b42 100644 --- a/tsdb/wlog/wlog_test.go +++ b/tsdb/wlog/wlog_test.go @@ -23,9 +23,9 @@ import ( "path/filepath" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/tsdb/fileutil" @@ -214,7 +214,7 @@ func TestCorruptAndCarryOn(t *testing.T) { dir := t.TempDir() var ( - logger = testutil.NewLogger(t) + logger = promslog.NewNopLogger() segmentSize = pageSize * 3 recordSize = (pageSize / 3) - recordHeaderSize ) @@ -567,7 +567,7 @@ func TestUnregisterMetrics(t *testing.T) { reg := prometheus.NewRegistry() for i := 0; i < 2; i++ { - wl, err := New(log.NewNopLogger(), reg, t.TempDir(), CompressionNone) + wl, err := New(promslog.NewNopLogger(), reg, t.TempDir(), CompressionNone) require.NoError(t, err) require.NoError(t, wl.Close()) } diff --git a/ui-commits b/ui-commits new file mode 100644 index 0000000000..7f34e1f95a --- /dev/null +++ b/ui-commits @@ -0,0 +1,12 @@ +dfec29d8e Fix border color for target pools with one target that is failing +65743bf9b ui: drop template readme +a7c1a951d Add general Mantine overrides CSS file +0757fbbec Make sure that alert element table headers are not wrapped +0180cf31a Factor out common icon and card styles +50af7d589 Fix tree line drawing by using a callback ref +ac01dc903 Explain, vector-to-vector: Do not compute results for set operators +9b0dc68d0 PromQL explain view: Support set operators +57898c792 Refactor and fix time formatting functions, add tests +091fc403c Fiddle with targets table styles to try and improve things a bit +a1908df92 Don't wrap action buttons below metric name in metrics explorer +ac5377873 mantine UI: Distinguish between Not Ready and Stopping diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go index b0272b7fee..ebe74ecd11 100644 --- a/util/annotations/annotations.go +++ b/util/annotations/annotations.go @@ -146,6 +146,7 @@ var ( PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count/_bucket:", PromQLInfo) HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) for metric name", PromQLInfo) + IncompatibleTypesInBinOpInfo = fmt.Errorf("%w: incompatible sample types encountered for binary operator", PromQLInfo) ) type annoErr struct { @@ -273,3 +274,12 @@ func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange. Err: fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityInfo, metricName), } } + +// NewIncompatibleTypesInBinOpInfo is used if binary operators act on a +// combination of types that doesn't work and therefore returns no result. +func NewIncompatibleTypesInBinOpInfo(lhsType, operator, rhsType string, pos posrange.PositionRange) error { + return annoErr{ + PositionRange: pos, + Err: fmt.Errorf("%w %q: %s %s %s", IncompatibleTypesInBinOpInfo, operator, lhsType, operator, rhsType), + } +} diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go new file mode 100644 index 0000000000..5e08422aa0 --- /dev/null +++ b/util/convertnhcb/convertnhcb.go @@ -0,0 +1,173 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package convertnhcb + +import ( + "fmt" + "math" + "sort" + "strings" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" +) + +// TempHistogram is used to collect information about classic histogram +// samples incrementally before creating a histogram.Histogram or +// histogram.FloatHistogram based on the values collected. +type TempHistogram struct { + BucketCounts map[float64]float64 + Count float64 + Sum float64 + HasFloat bool +} + +// NewTempHistogram creates a new TempHistogram to +// collect information about classic histogram samples. +func NewTempHistogram() TempHistogram { + return TempHistogram{ + BucketCounts: map[float64]float64{}, + } +} + +func (h TempHistogram) getIntBucketCounts() (map[float64]int64, error) { + bucketCounts := map[float64]int64{} + for le, count := range h.BucketCounts { + intCount := int64(math.Round(count)) + if float64(intCount) != count { + return nil, fmt.Errorf("bucket count %f for le %g is not an integer", count, le) + } + bucketCounts[le] = intCount + } + return bucketCounts, nil +} + +// ProcessUpperBoundsAndCreateBaseHistogram prepares an integer native +// histogram with custom buckets based on the provided upper bounds. +// Everything is set except the bucket counts. +// The sorted upper bounds are also returned. +func ProcessUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64, needsDedup bool) ([]float64, *histogram.Histogram) { + sort.Float64s(upperBounds0) + var upperBounds []float64 + if needsDedup { + upperBounds = make([]float64, 0, len(upperBounds0)) + prevLE := math.Inf(-1) + for _, le := range upperBounds0 { + if le != prevLE { + upperBounds = append(upperBounds, le) + prevLE = le + } + } + } else { + upperBounds = upperBounds0 + } + var customBounds []float64 + if upperBounds[len(upperBounds)-1] == math.Inf(1) { + customBounds = upperBounds[:len(upperBounds)-1] + } else { + customBounds = upperBounds + } + return upperBounds, &histogram.Histogram{ + Count: 0, + Sum: 0, + Schema: histogram.CustomBucketsSchema, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: uint32(len(upperBounds))}, + }, + PositiveBuckets: make([]int64, len(upperBounds)), + CustomValues: customBounds, + } +} + +// NewHistogram fills the bucket counts in the provided histogram.Histogram +// or histogram.FloatHistogram based on the provided temporary histogram and +// upper bounds. +func NewHistogram(histogram TempHistogram, upperBounds []float64, hBase *histogram.Histogram, fhBase *histogram.FloatHistogram) (*histogram.Histogram, *histogram.FloatHistogram) { + intBucketCounts, err := histogram.getIntBucketCounts() + if err != nil { + return nil, newFloatHistogram(histogram, upperBounds, histogram.BucketCounts, fhBase) + } + return newIntegerHistogram(histogram, upperBounds, intBucketCounts, hBase), nil +} + +func newIntegerHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]int64, hBase *histogram.Histogram) *histogram.Histogram { + h := hBase.Copy() + absBucketCounts := make([]int64, len(h.PositiveBuckets)) + var prevCount, total int64 + for i, le := range upperBounds { + currCount, exists := bucketCounts[le] + if !exists { + currCount = 0 + } + count := currCount - prevCount + absBucketCounts[i] = count + total += count + prevCount = currCount + } + h.PositiveBuckets[0] = absBucketCounts[0] + for i := 1; i < len(h.PositiveBuckets); i++ { + h.PositiveBuckets[i] = absBucketCounts[i] - absBucketCounts[i-1] + } + h.Sum = histogram.Sum + if histogram.Count != 0 { + total = int64(histogram.Count) + } + h.Count = uint64(total) + return h.Compact(0) +} + +func newFloatHistogram(histogram TempHistogram, upperBounds []float64, bucketCounts map[float64]float64, fhBase *histogram.FloatHistogram) *histogram.FloatHistogram { + fh := fhBase.Copy() + var prevCount, total float64 + for i, le := range upperBounds { + currCount, exists := bucketCounts[le] + if !exists { + currCount = 0 + } + count := currCount - prevCount + fh.PositiveBuckets[i] = count + total += count + prevCount = currCount + } + fh.Sum = histogram.Sum + if histogram.Count != 0 { + total = histogram.Count + } + fh.Count = total + return fh.Compact(0) +} + +func GetHistogramMetricBase(m labels.Labels, suffix string) labels.Labels { + mName := m.Get(labels.MetricName) + return labels.NewBuilder(m). + Set(labels.MetricName, strings.TrimSuffix(mName, suffix)). + Del(labels.BucketLabel). + Labels() +} + +// GetHistogramMetricBaseName removes the suffixes _bucket, _sum, _count from +// the metric name. We specifically do not remove the _created suffix as that +// should be removed by the caller. +func GetHistogramMetricBaseName(s string) string { + if r, ok := strings.CutSuffix(s, "_bucket"); ok { + return r + } + if r, ok := strings.CutSuffix(s, "_sum"); ok { + return r + } + if r, ok := strings.CutSuffix(s, "_count"); ok { + return r + } + return s +} diff --git a/util/fmtutil/format.go b/util/fmtutil/format.go index 9034a90fa7..a10908bb8c 100644 --- a/util/fmtutil/format.go +++ b/util/fmtutil/format.go @@ -113,7 +113,7 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, labels, timestamp, m.GetCounter().GetValue()) case m.Summary != nil: metricName := labels[model.MetricNameLabel] - // Preserve metric name order with first quantile labels timeseries then sum suffix timeserie and finally count suffix timeserie + // Preserve metric name order with first quantile labels timeseries then sum suffix timeseries and finally count suffix timeseries // Add Summary quantile timeseries quantileLabels := make(map[string]string, len(labels)+1) for key, value := range labels { @@ -125,16 +125,16 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, quantileLabels, timestamp, q.GetValue()) } // Overwrite label model.MetricNameLabel for count and sum metrics - // Add Summary sum timeserie + // Add Summary sum timeseries labels[model.MetricNameLabel] = metricName + sumStr toTimeseries(wr, labels, timestamp, m.GetSummary().GetSampleSum()) - // Add Summary count timeserie + // Add Summary count timeseries labels[model.MetricNameLabel] = metricName + countStr toTimeseries(wr, labels, timestamp, float64(m.GetSummary().GetSampleCount())) case m.Histogram != nil: metricName := labels[model.MetricNameLabel] - // Preserve metric name order with first bucket suffix timeseries then sum suffix timeserie and finally count suffix timeserie + // Preserve metric name order with first bucket suffix timeseries then sum suffix timeseries and finally count suffix timeseries // Add Histogram bucket timeseries bucketLabels := make(map[string]string, len(labels)+1) for key, value := range labels { @@ -146,10 +146,10 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, bucketLabels, timestamp, float64(b.GetCumulativeCount())) } // Overwrite label model.MetricNameLabel for count and sum metrics - // Add Histogram sum timeserie + // Add Histogram sum timeseries labels[model.MetricNameLabel] = metricName + sumStr toTimeseries(wr, labels, timestamp, m.GetHistogram().GetSampleSum()) - // Add Histogram count timeserie + // Add Histogram count timeseries labels[model.MetricNameLabel] = metricName + countStr toTimeseries(wr, labels, timestamp, float64(m.GetHistogram().GetSampleCount())) diff --git a/util/logging/dedupe.go b/util/logging/dedupe.go index d490a6afdf..d5aee5c095 100644 --- a/util/logging/dedupe.go +++ b/util/logging/dedupe.go @@ -14,12 +14,10 @@ package logging import ( - "bytes" + "context" + "log/slog" "sync" "time" - - "github.com/go-kit/log" - "github.com/go-logfmt/logfmt" ) const ( @@ -28,22 +26,9 @@ const ( maxEntries = 1024 ) -type logfmtEncoder struct { - *logfmt.Encoder - buf bytes.Buffer -} - -var logfmtEncoderPool = sync.Pool{ - New: func() interface{} { - var enc logfmtEncoder - enc.Encoder = logfmt.NewEncoder(&enc.buf) - return &enc - }, -} - -// Deduper implement log.Logger, dedupes log lines. +// Deduper implements *slog.Handler, dedupes log lines based on a time duration. type Deduper struct { - next log.Logger + next *slog.Logger repeat time.Duration quit chan struct{} mtx sync.RWMutex @@ -51,7 +36,7 @@ type Deduper struct { } // Dedupe log lines to next, only repeating every repeat duration. -func Dedupe(next log.Logger, repeat time.Duration) *Deduper { +func Dedupe(next *slog.Logger, repeat time.Duration) *Deduper { d := &Deduper{ next: next, repeat: repeat, @@ -62,6 +47,63 @@ func Dedupe(next log.Logger, repeat time.Duration) *Deduper { return d } +// Enabled returns true if the Deduper's internal slog.Logger is enabled at the +// provided context and log level, and returns false otherwise. It implements +// slog.Handler. +func (d *Deduper) Enabled(ctx context.Context, level slog.Level) bool { + return d.next.Enabled(ctx, level) +} + +// Handle uses the provided context and slog.Record to deduplicate messages +// every 1m. Log records received within the interval are not acted on, and +// thus dropped. Log records that pass deduplication and need action invoke the +// Handle() method on the Deduper's internal slog.Logger's handler, effectively +// chaining log calls to the internal slog.Logger. +func (d *Deduper) Handle(ctx context.Context, r slog.Record) error { + line := r.Message + d.mtx.RLock() + last, ok := d.seen[line] + d.mtx.RUnlock() + + if ok && time.Since(last) < d.repeat { + return nil + } + + d.mtx.Lock() + if len(d.seen) < maxEntries { + d.seen[line] = time.Now() + } + d.mtx.Unlock() + + return d.next.Handler().Handle(ctx, r.Clone()) +} + +// WithAttrs adds the provided attributes to the Deduper's internal +// slog.Logger. It implements slog.Handler. +func (d *Deduper) WithAttrs(attrs []slog.Attr) slog.Handler { + return &Deduper{ + next: slog.New(d.next.Handler().WithAttrs(attrs)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } +} + +// WithGroup adds the provided group name to the Deduper's internal +// slog.Logger. It implements slog.Handler. +func (d *Deduper) WithGroup(name string) slog.Handler { + if name == "" { + return d + } + + return &Deduper{ + next: slog.New(d.next.Handler().WithGroup(name)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } +} + // Stop the Deduper. func (d *Deduper) Stop() { close(d.quit) @@ -87,44 +129,3 @@ func (d *Deduper) run() { } } } - -// Log implements log.Logger. -func (d *Deduper) Log(keyvals ...interface{}) error { - line, err := encode(keyvals...) - if err != nil { - return err - } - - d.mtx.RLock() - last, ok := d.seen[line] - d.mtx.RUnlock() - - if ok && time.Since(last) < d.repeat { - return nil - } - - d.mtx.Lock() - if len(d.seen) < maxEntries { - d.seen[line] = time.Now() - } - d.mtx.Unlock() - - return d.next.Log(keyvals...) -} - -func encode(keyvals ...interface{}) (string, error) { - enc := logfmtEncoderPool.Get().(*logfmtEncoder) - enc.buf.Reset() - defer logfmtEncoderPool.Put(enc) - - if err := enc.EncodeKeyvals(keyvals...); err != nil { - return "", err - } - - // Add newline to the end of the buffer - if err := enc.EndRecord(); err != nil { - return "", err - } - - return enc.buf.String(), nil -} diff --git a/util/logging/dedupe_test.go b/util/logging/dedupe_test.go index e05d6454c5..5baa90b038 100644 --- a/util/logging/dedupe_test.go +++ b/util/logging/dedupe_test.go @@ -14,34 +14,45 @@ package logging import ( + "bytes" + "log/slog" + "strings" "testing" "time" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) -type counter int - -func (c *counter) Log(...interface{}) error { - (*c)++ - return nil -} - func TestDedupe(t *testing.T) { - var c counter - d := Dedupe(&c, 100*time.Millisecond) + var buf bytes.Buffer + d := Dedupe(promslog.New(&promslog.Config{Writer: &buf}), 100*time.Millisecond) + dlog := slog.New(d) defer d.Stop() // Log 10 times quickly, ensure they are deduped. for i := 0; i < 10; i++ { - err := d.Log("msg", "hello") - require.NoError(t, err) + dlog.Info("test", "hello", "world") } - require.Equal(t, 1, int(c)) + + // Trim empty lines + lines := []string{} + for _, line := range strings.Split(buf.String(), "\n") { + if line != "" { + lines = append(lines, line) + } + } + require.Len(t, lines, 1) // Wait, then log again, make sure it is logged. time.Sleep(200 * time.Millisecond) - err := d.Log("msg", "hello") - require.NoError(t, err) - require.Equal(t, 2, int(c)) + dlog.Info("test", "hello", "world") + // Trim empty lines + lines = []string{} + for _, line := range strings.Split(buf.String(), "\n") { + if line != "" { + lines = append(lines, line) + } + } + require.Len(t, lines, 2) } diff --git a/util/logging/file.go b/util/logging/file.go index 2afa828547..f20927beda 100644 --- a/util/logging/file.go +++ b/util/logging/file.go @@ -15,20 +15,15 @@ package logging import ( "fmt" + "log/slog" "os" - "time" - "github.com/go-kit/log" + "github.com/prometheus/common/promslog" ) -var timestampFormat = log.TimestampFormat( - func() time.Time { return time.Now().UTC() }, - "2006-01-02T15:04:05.000Z07:00", -) - -// JSONFileLogger represents a logger that writes JSON to a file. +// JSONFileLogger represents a logger that writes JSON to a file. It implements the promql.QueryLogger interface. type JSONFileLogger struct { - logger log.Logger + logger *slog.Logger file *os.File } @@ -40,21 +35,48 @@ func NewJSONFileLogger(s string) (*JSONFileLogger, error) { f, err := os.OpenFile(s, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) if err != nil { - return nil, fmt.Errorf("can't create json logger: %w", err) + return nil, fmt.Errorf("can't create json log file: %w", err) } + jsonFmt := &promslog.AllowedFormat{} + _ = jsonFmt.Set("json") return &JSONFileLogger{ - logger: log.With(log.NewJSONLogger(f), "ts", timestampFormat), + logger: promslog.New(&promslog.Config{Format: jsonFmt, Writer: f}), file: f, }, nil } -// Close closes the underlying file. +// Close closes the underlying file. It implements the promql.QueryLogger interface. func (l *JSONFileLogger) Close() error { return l.file.Close() } -// Log calls the Log function of the underlying logger. -func (l *JSONFileLogger) Log(i ...interface{}) error { - return l.logger.Log(i...) +// With calls the `With()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) With(args ...any) { + l.logger = l.logger.With(args...) +} + +// Info calls the `Info()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Info(msg string, args ...any) { + l.logger.Info(msg, args...) +} + +// Error calls the `Error()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Error(msg string, args ...any) { + l.logger.Error(msg, args...) +} + +// Debug calls the `Debug()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Debug(msg string, args ...any) { + l.logger.Debug(msg, args...) +} + +// Warn calls the `Warn()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Warn(msg string, args ...any) { + l.logger.Warn(msg, args...) } diff --git a/util/logging/file_test.go b/util/logging/file_test.go index 0e760a4848..8ab4754339 100644 --- a/util/logging/file_test.go +++ b/util/logging/file_test.go @@ -34,12 +34,13 @@ func TestJSONFileLogger_basic(t *testing.T) { require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l.Log("test", "yes") + l.Info("test", "hello", "world") require.NoError(t, err) r := make([]byte, 1024) _, err = f.Read(r) require.NoError(t, err) - result, err := regexp.Match(`^{"test":"yes","ts":"[^"]+"}\n`, r) + + result, err := regexp.Match(`^{"time":"[^"]+","level":"INFO","source":\{.+\},"msg":"test","hello":"world"}\n`, r) require.NoError(t, err) require.True(t, result, "unexpected content: %s", r) @@ -63,14 +64,14 @@ func TestJSONFileLogger_parallel(t *testing.T) { require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l.Log("test", "yes") + l.Info("test", "hello", "world") require.NoError(t, err) l2, err := NewJSONFileLogger(f.Name()) require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l2.Log("test", "yes") + l2.Info("test", "hello", "world") require.NoError(t, err) err = l.Close() diff --git a/util/logging/ratelimit.go b/util/logging/ratelimit.go deleted file mode 100644 index 32d1e249e6..0000000000 --- a/util/logging/ratelimit.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2019 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package logging - -import ( - "github.com/go-kit/log" - "golang.org/x/time/rate" -) - -type ratelimiter struct { - limiter *rate.Limiter - next log.Logger -} - -// RateLimit write to a logger. -func RateLimit(next log.Logger, limit rate.Limit) log.Logger { - return &ratelimiter{ - limiter: rate.NewLimiter(limit, int(limit)), - next: next, - } -} - -func (r *ratelimiter) Log(keyvals ...interface{}) error { - if r.limiter.Allow() { - return r.next.Log(keyvals...) - } - return nil -} diff --git a/util/notifications/notifications.go b/util/notifications/notifications.go new file mode 100644 index 0000000000..4888a0b664 --- /dev/null +++ b/util/notifications/notifications.go @@ -0,0 +1,185 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package notifications + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + ConfigurationUnsuccessful = "Configuration reload has failed." + StartingUp = "Prometheus is starting and replaying the write-ahead log (WAL)." + ShuttingDown = "Prometheus is shutting down and gracefully stopping all operations." +) + +// Notification represents an individual notification message. +type Notification struct { + Text string `json:"text"` + Date time.Time `json:"date"` + Active bool `json:"active"` +} + +// Notifications stores a list of Notification objects. +// It also manages live subscribers that receive notifications via channels. +type Notifications struct { + mu sync.Mutex + notifications []Notification + subscribers map[chan Notification]struct{} // Active subscribers. + maxSubscribers int + + subscriberGauge prometheus.Gauge + notificationsSent prometheus.Counter + notificationsDropped prometheus.Counter +} + +// NewNotifications creates a new Notifications instance. +func NewNotifications(maxSubscribers int, reg prometheus.Registerer) *Notifications { + n := &Notifications{ + subscribers: make(map[chan Notification]struct{}), + maxSubscribers: maxSubscribers, + subscriberGauge: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_active_subscribers", + Help: "The current number of active notification subscribers.", + }), + notificationsSent: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_updates_sent_total", + Help: "Total number of notification updates sent.", + }), + notificationsDropped: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_updates_dropped_total", + Help: "Total number of notification updates dropped.", + }), + } + + if reg != nil { + reg.MustRegister(n.subscriberGauge, n.notificationsSent, n.notificationsDropped) + } + + return n +} + +// AddNotification adds a new notification or updates the timestamp if it already exists. +func (n *Notifications) AddNotification(text string) { + n.mu.Lock() + defer n.mu.Unlock() + + for i, notification := range n.notifications { + if notification.Text == text { + n.notifications[i].Date = time.Now() + + n.notifySubscribers(n.notifications[i]) + return + } + } + + newNotification := Notification{ + Text: text, + Date: time.Now(), + Active: true, + } + n.notifications = append(n.notifications, newNotification) + + n.notifySubscribers(newNotification) +} + +// notifySubscribers sends a notification to all active subscribers. +func (n *Notifications) notifySubscribers(notification Notification) { + for sub := range n.subscribers { + // Non-blocking send to avoid subscriber blocking issues. + n.notificationsSent.Inc() + select { + case sub <- notification: + // Notification sent to the subscriber. + default: + // Drop the notification if the subscriber's channel is full. + n.notificationsDropped.Inc() + } + } +} + +// DeleteNotification removes the first notification that matches the provided text. +// The deleted notification is sent to subscribers with Active: false before being removed. +func (n *Notifications) DeleteNotification(text string) { + n.mu.Lock() + defer n.mu.Unlock() + + // Iterate through the notifications to find the matching text. + for i, notification := range n.notifications { + if notification.Text == text { + // Mark the notification as inactive and notify subscribers. + notification.Active = false + n.notifySubscribers(notification) + + // Remove the notification from the list. + n.notifications = append(n.notifications[:i], n.notifications[i+1:]...) + return + } + } +} + +// Get returns a copy of the list of notifications for safe access outside the struct. +func (n *Notifications) Get() []Notification { + n.mu.Lock() + defer n.mu.Unlock() + + // Return a copy of the notifications slice to avoid modifying the original slice outside. + notificationsCopy := make([]Notification, len(n.notifications)) + copy(notificationsCopy, n.notifications) + return notificationsCopy +} + +// Sub allows a client to subscribe to live notifications. +// It returns a channel where the subscriber will receive notifications and a function to unsubscribe. +// Each subscriber has its own goroutine to handle notifications and prevent blocking. +func (n *Notifications) Sub() (<-chan Notification, func(), bool) { + n.mu.Lock() + defer n.mu.Unlock() + + if len(n.subscribers) >= n.maxSubscribers { + return nil, nil, false + } + + ch := make(chan Notification, 10) // Buffered channel to prevent blocking. + + // Add the new subscriber to the list. + n.subscribers[ch] = struct{}{} + n.subscriberGauge.Set(float64(len(n.subscribers))) + + // Send all current notifications to the new subscriber. + for _, notification := range n.notifications { + ch <- notification + } + + // Unsubscribe function to remove the channel from subscribers. + unsubscribe := func() { + n.mu.Lock() + defer n.mu.Unlock() + + // Close the channel and remove it from the subscribers map. + close(ch) + delete(n.subscribers, ch) + n.subscriberGauge.Set(float64(len(n.subscribers))) + } + + return ch, unsubscribe, true +} diff --git a/util/notifications/notifications_test.go b/util/notifications/notifications_test.go new file mode 100644 index 0000000000..e487e9ce54 --- /dev/null +++ b/util/notifications/notifications_test.go @@ -0,0 +1,223 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package notifications + +import ( + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// TestNotificationLifecycle tests adding, modifying, and deleting notifications. +func TestNotificationLifecycle(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Add a notification. + notifs.AddNotification("Test Notification 1") + + // Check if the notification was added. + notifications := notifs.Get() + require.Len(t, notifications, 1, "Expected 1 notification after addition.") + require.Equal(t, "Test Notification 1", notifications[0].Text, "Notification text mismatch.") + require.True(t, notifications[0].Active, "Expected notification to be active.") + + // Modify the notification. + notifs.AddNotification("Test Notification 1") + notifications = notifs.Get() + require.Len(t, notifications, 1, "Expected 1 notification after modification.") + + // Delete the notification. + notifs.DeleteNotification("Test Notification 1") + notifications = notifs.Get() + require.Empty(t, notifications, "Expected no notifications after deletion.") +} + +// TestSubscriberReceivesNotifications tests that a subscriber receives notifications, including modifications and deletions. +func TestSubscriberReceivesNotifications(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe to notifications. + sub, unsubscribe, ok := notifs.Sub() + require.True(t, ok) + + var wg sync.WaitGroup + wg.Add(1) + + receivedNotifications := make([]Notification, 0) + + // Goroutine to listen for notifications. + go func() { + defer wg.Done() + for notification := range sub { + receivedNotifications = append(receivedNotifications, notification) + } + }() + + // Add notifications. + notifs.AddNotification("Test Notification 1") + notifs.AddNotification("Test Notification 2") + + // Modify a notification. + notifs.AddNotification("Test Notification 1") + + // Delete a notification. + notifs.DeleteNotification("Test Notification 2") + + // Wait for notifications to propagate. + time.Sleep(100 * time.Millisecond) + + unsubscribe() + wg.Wait() // Wait for the subscriber goroutine to finish. + + // Verify that we received the expected number of notifications. + require.Len(t, receivedNotifications, 4, "Expected 4 notifications (2 active, 1 modified, 1 deleted).") + + // Check the content and state of received notifications. + expected := []struct { + Text string + Active bool + }{ + {"Test Notification 1", true}, + {"Test Notification 2", true}, + {"Test Notification 1", true}, + {"Test Notification 2", false}, + } + + for i, n := range receivedNotifications { + require.Equal(t, expected[i].Text, n.Text, "Notification text mismatch at index %d.", i) + require.Equal(t, expected[i].Active, n.Active, "Notification active state mismatch at index %d.", i) + } +} + +// TestMultipleSubscribers tests that multiple subscribers receive notifications independently. +func TestMultipleSubscribers(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe two subscribers to notifications. + sub1, unsubscribe1, ok1 := notifs.Sub() + require.True(t, ok1) + + sub2, unsubscribe2, ok2 := notifs.Sub() + require.True(t, ok2) + + var wg sync.WaitGroup + wg.Add(2) + + receivedSub1 := make([]Notification, 0) + receivedSub2 := make([]Notification, 0) + + // Goroutine for subscriber 1. + go func() { + defer wg.Done() + for notification := range sub1 { + receivedSub1 = append(receivedSub1, notification) + } + }() + + // Goroutine for subscriber 2. + go func() { + defer wg.Done() + for notification := range sub2 { + receivedSub2 = append(receivedSub2, notification) + } + }() + + // Add and delete notifications. + notifs.AddNotification("Test Notification 1") + notifs.DeleteNotification("Test Notification 1") + + // Wait for notifications to propagate. + time.Sleep(100 * time.Millisecond) + + // Unsubscribe both. + unsubscribe1() + unsubscribe2() + + wg.Wait() + + // Both subscribers should have received the same 2 notifications. + require.Len(t, receivedSub1, 2, "Expected 2 notifications for subscriber 1.") + require.Len(t, receivedSub2, 2, "Expected 2 notifications for subscriber 2.") + + // Verify that both subscribers received the same notifications. + for i := 0; i < 2; i++ { + require.Equal(t, receivedSub1[i], receivedSub2[i], "Subscriber notification mismatch at index %d.", i) + } +} + +// TestUnsubscribe tests that unsubscribing prevents further notifications from being received. +func TestUnsubscribe(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe to notifications. + sub, unsubscribe, ok := notifs.Sub() + require.True(t, ok) + + var wg sync.WaitGroup + wg.Add(1) + + receivedNotifications := make([]Notification, 0) + + // Goroutine to listen for notifications. + go func() { + defer wg.Done() + for notification := range sub { + receivedNotifications = append(receivedNotifications, notification) + } + }() + + // Add a notification and then unsubscribe. + notifs.AddNotification("Test Notification 1") + time.Sleep(100 * time.Millisecond) // Allow time for notification delivery. + unsubscribe() // Unsubscribe. + + // Add another notification after unsubscribing. + notifs.AddNotification("Test Notification 2") + + // Wait for the subscriber goroutine to finish. + wg.Wait() + + // Only the first notification should have been received. + require.Len(t, receivedNotifications, 1, "Expected 1 notification before unsubscribe.") + require.Equal(t, "Test Notification 1", receivedNotifications[0].Text, "Unexpected notification text.") +} + +// TestMaxSubscribers tests that exceeding the max subscribers limit prevents additional subscriptions. +func TestMaxSubscribers(t *testing.T) { + maxSubscribers := 2 + notifs := NewNotifications(maxSubscribers, nil) + + // Subscribe the maximum number of subscribers. + _, unsubscribe1, ok1 := notifs.Sub() + require.True(t, ok1, "Expected first subscription to succeed.") + + _, unsubscribe2, ok2 := notifs.Sub() + require.True(t, ok2, "Expected second subscription to succeed.") + + // Try to subscribe more than the max allowed. + _, _, ok3 := notifs.Sub() + require.False(t, ok3, "Expected third subscription to fail due to max subscriber limit.") + + // Unsubscribe one subscriber and try again. + unsubscribe1() + + _, unsubscribe4, ok4 := notifs.Sub() + require.True(t, ok4, "Expected subscription to succeed after unsubscribing a subscriber.") + + // Clean up the subscriptions. + unsubscribe2() + unsubscribe4() +} diff --git a/util/runtime/limits_default.go b/util/runtime/limits_default.go index 0126adb1a8..156747d450 100644 --- a/util/runtime/limits_default.go +++ b/util/runtime/limits_default.go @@ -23,7 +23,7 @@ import ( // syscall.RLIM_INFINITY is a constant. // Its type is int on most architectures but there are exceptions such as loong64. -// Uniform it to uint accorind to the standard. +// Uniform it to uint according to the standard. // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_resource.h.html var unlimited uint64 = syscall.RLIM_INFINITY & math.MaxUint64 diff --git a/util/teststorage/storage.go b/util/teststorage/storage.go index 7d1f9dda24..e15d591e0c 100644 --- a/util/teststorage/storage.go +++ b/util/teststorage/storage.go @@ -30,15 +30,15 @@ import ( // New returns a new TestStorage for testing purposes // that removes all associated files on closing. -func New(t testutil.T) *TestStorage { - stor, err := NewWithError() +func New(t testutil.T, outOfOrderTimeWindow ...int64) *TestStorage { + stor, err := NewWithError(outOfOrderTimeWindow...) require.NoError(t, err) return stor } // NewWithError returns a new TestStorage for user facing tests, which reports // errors directly. -func NewWithError() (*TestStorage, error) { +func NewWithError(outOfOrderTimeWindow ...int64) (*TestStorage, error) { dir, err := os.MkdirTemp("", "test_storage") if err != nil { return nil, fmt.Errorf("opening test directory: %w", err) @@ -51,6 +51,14 @@ func NewWithError() (*TestStorage, error) { opts.MaxBlockDuration = int64(24 * time.Hour / time.Millisecond) opts.RetentionDuration = 0 opts.EnableNativeHistograms = true + + // Set OutOfOrderTimeWindow if provided, otherwise use default (0) + if len(outOfOrderTimeWindow) > 0 { + opts.OutOfOrderTimeWindow = outOfOrderTimeWindow[0] + } else { + opts.OutOfOrderTimeWindow = 0 // Default value is zero + } + db, err := tsdb.Open(dir, nil, nil, opts, tsdb.NewDBStats()) if err != nil { return nil, fmt.Errorf("opening test storage: %w", err) diff --git a/util/testutil/port.go b/util/testutil/port.go index 1e449b123d..7cf4cf1ccc 100644 --- a/util/testutil/port.go +++ b/util/testutil/port.go @@ -15,21 +15,56 @@ package testutil import ( "net" + "sync" "testing" ) +var ( + mu sync.Mutex + usedPorts []int +) + // RandomUnprivilegedPort returns valid unprivileged random port number which can be used for testing. func RandomUnprivilegedPort(t *testing.T) int { t.Helper() + mu.Lock() + defer mu.Unlock() + + port, err := getPort() + if err != nil { + t.Fatal(err) + } + + for portWasUsed(port) { + port, err = getPort() + if err != nil { + t.Fatal(err) + } + } + + usedPorts = append(usedPorts, port) + + return port +} + +func portWasUsed(port int) bool { + for _, usedPort := range usedPorts { + if port == usedPort { + return true + } + } + return false +} +func getPort() (int, error) { listener, err := net.Listen("tcp", ":0") if err != nil { - t.Fatalf("Listening on random port: %v", err) + return 0, err } if err := listener.Close(); err != nil { - t.Fatalf("Closing listener: %v", err) + return 0, err } - return listener.Addr().(*net.TCPAddr).Port + return listener.Addr().(*net.TCPAddr).Port, nil } diff --git a/util/treecache/treecache.go b/util/treecache/treecache.go index bbbaaf3d6e..4d4b6f544c 100644 --- a/util/treecache/treecache.go +++ b/util/treecache/treecache.go @@ -17,12 +17,11 @@ import ( "bytes" "errors" "fmt" + "log/slog" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/go-zookeeper/zk" "github.com/prometheus/client_golang/prometheus" ) @@ -47,19 +46,19 @@ func init() { prometheus.MustRegister(numWatchers) } -// ZookeeperLogger wraps a log.Logger into a zk.Logger. +// ZookeeperLogger wraps a *slog.Logger into a zk.Logger. type ZookeeperLogger struct { - logger log.Logger + logger *slog.Logger } // NewZookeeperLogger is a constructor for ZookeeperLogger. -func NewZookeeperLogger(logger log.Logger) ZookeeperLogger { +func NewZookeeperLogger(logger *slog.Logger) ZookeeperLogger { return ZookeeperLogger{logger: logger} } // Printf implements zk.Logger. func (zl ZookeeperLogger) Printf(s string, i ...interface{}) { - level.Info(zl.logger).Log("msg", fmt.Sprintf(s, i...)) + zl.logger.Info(s, i...) } // A ZookeeperTreeCache keeps data from all children of a Zookeeper path @@ -72,7 +71,7 @@ type ZookeeperTreeCache struct { wg *sync.WaitGroup head *zookeeperTreeCacheNode - logger log.Logger + logger *slog.Logger } // A ZookeeperTreeCacheEvent models a Zookeeper event for a path. @@ -90,7 +89,7 @@ type zookeeperTreeCacheNode struct { } // NewZookeeperTreeCache creates a new ZookeeperTreeCache for a given path. -func NewZookeeperTreeCache(conn *zk.Conn, path string, events chan ZookeeperTreeCacheEvent, logger log.Logger) *ZookeeperTreeCache { +func NewZookeeperTreeCache(conn *zk.Conn, path string, events chan ZookeeperTreeCacheEvent, logger *slog.Logger) *ZookeeperTreeCache { tc := &ZookeeperTreeCache{ conn: conn, prefix: path, @@ -144,20 +143,20 @@ func (tc *ZookeeperTreeCache) loop(path string) { err := tc.recursiveNodeUpdate(path, tc.head) if err != nil { - level.Error(tc.logger).Log("msg", "Error during initial read of Zookeeper", "err", err) + tc.logger.Error("Error during initial read of Zookeeper", "err", err) failure() } for { select { case ev := <-tc.head.events: - level.Debug(tc.logger).Log("msg", "Received Zookeeper event", "event", ev) + tc.logger.Debug("Received Zookeeper event", "event", ev) if failureMode { continue } if ev.Type == zk.EventNotWatching { - level.Info(tc.logger).Log("msg", "Lost connection to Zookeeper.") + tc.logger.Info("Lost connection to Zookeeper.") failure() } else { path := strings.TrimPrefix(ev.Path, tc.prefix) @@ -178,15 +177,15 @@ func (tc *ZookeeperTreeCache) loop(path string) { switch err := tc.recursiveNodeUpdate(ev.Path, node); { case err != nil: - level.Error(tc.logger).Log("msg", "Error during processing of Zookeeper event", "err", err) + tc.logger.Error("Error during processing of Zookeeper event", "err", err) failure() case tc.head.data == nil: - level.Error(tc.logger).Log("msg", "Error during processing of Zookeeper event", "err", "path no longer exists", "path", tc.prefix) + tc.logger.Error("Error during processing of Zookeeper event", "err", "path no longer exists", "path", tc.prefix) failure() } } case <-retryChan: - level.Info(tc.logger).Log("msg", "Attempting to resync state with Zookeeper") + tc.logger.Info("Attempting to resync state with Zookeeper") previousState := &zookeeperTreeCacheNode{ children: tc.head.children, } @@ -194,13 +193,13 @@ func (tc *ZookeeperTreeCache) loop(path string) { tc.head.children = make(map[string]*zookeeperTreeCacheNode) if err := tc.recursiveNodeUpdate(tc.prefix, tc.head); err != nil { - level.Error(tc.logger).Log("msg", "Error during Zookeeper resync", "err", err) + tc.logger.Error("Error during Zookeeper resync", "err", err) // Revert to our previous state. tc.head.children = previousState.children failure() } else { tc.resyncState(tc.prefix, tc.head, previousState) - level.Info(tc.logger).Log("msg", "Zookeeper resync successful") + tc.logger.Info("Zookeeper resync successful") failureMode = false } case <-tc.stop: diff --git a/util/zeropool/pool_test.go b/util/zeropool/pool_test.go index fea8200226..e9793f64d7 100644 --- a/util/zeropool/pool_test.go +++ b/util/zeropool/pool_test.go @@ -81,7 +81,7 @@ func TestPool(t *testing.T) { t.Run("does not allocate", func(t *testing.T) { pool := zeropool.New(func() []byte { return make([]byte, 1024) }) - // Warm up, this will alloate one slice. + // Warm up, this will allocate one slice. slice := pool.Get() pool.Put(slice) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index ff700d835f..63ffe62760 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -15,8 +15,12 @@ package v1 import ( "context" + "crypto/sha1" + "encoding/hex" + "encoding/json" "errors" "fmt" + "log/slog" "math" "math/rand" "net" @@ -30,8 +34,6 @@ import ( "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" jsoniter "github.com/json-iterator/go" "github.com/munnerz/goautoneg" @@ -53,6 +55,7 @@ import ( "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/httputil" + "github.com/prometheus/prometheus/util/notifications" "github.com/prometheus/prometheus/util/stats" ) @@ -202,16 +205,18 @@ type API struct { ready func(http.HandlerFunc) http.HandlerFunc globalURLOptions GlobalURLOptions - db TSDBAdminStats - dbDir string - enableAdmin bool - logger log.Logger - CORSOrigin *regexp.Regexp - buildInfo *PrometheusVersion - runtimeInfo func() (RuntimeInfo, error) - gatherer prometheus.Gatherer - isAgent bool - statsRenderer StatsRenderer + db TSDBAdminStats + dbDir string + enableAdmin bool + logger *slog.Logger + CORSOrigin *regexp.Regexp + buildInfo *PrometheusVersion + runtimeInfo func() (RuntimeInfo, error) + gatherer prometheus.Gatherer + isAgent bool + statsRenderer StatsRenderer + notificationsGetter func() []notifications.Notification + notificationsSub func() (<-chan notifications.Notification, func(), bool) remoteWriteHandler http.Handler remoteReadHandler http.Handler @@ -236,7 +241,7 @@ func NewAPI( db TSDBAdminStats, dbDir string, enableAdmin bool, - logger log.Logger, + logger *slog.Logger, rr func(context.Context) RulesRetriever, remoteReadSampleLimit int, remoteReadConcurrencyLimit int, @@ -245,6 +250,8 @@ func NewAPI( corsOrigin *regexp.Regexp, runtimeInfo func() (RuntimeInfo, error), buildInfo *PrometheusVersion, + notificationsGetter func() []notifications.Notification, + notificationsSub func() (<-chan notifications.Notification, func(), bool), gatherer prometheus.Gatherer, registerer prometheus.Registerer, statsRenderer StatsRenderer, @@ -263,22 +270,24 @@ func NewAPI( targetRetriever: tr, alertmanagerRetriever: ar, - now: time.Now, - config: configFunc, - flagsMap: flagsMap, - ready: readyFunc, - globalURLOptions: globalURLOptions, - db: db, - dbDir: dbDir, - enableAdmin: enableAdmin, - rulesRetriever: rr, - logger: logger, - CORSOrigin: corsOrigin, - runtimeInfo: runtimeInfo, - buildInfo: buildInfo, - gatherer: gatherer, - isAgent: isAgent, - statsRenderer: DefaultStatsRenderer, + now: time.Now, + config: configFunc, + flagsMap: flagsMap, + ready: readyFunc, + globalURLOptions: globalURLOptions, + db: db, + dbDir: dbDir, + enableAdmin: enableAdmin, + rulesRetriever: rr, + logger: logger, + CORSOrigin: corsOrigin, + runtimeInfo: runtimeInfo, + buildInfo: buildInfo, + gatherer: gatherer, + isAgent: isAgent, + statsRenderer: DefaultStatsRenderer, + notificationsGetter: notificationsGetter, + notificationsSub: notificationsSub, remoteReadHandler: remote.NewReadHandler(logger, registerer, q, configFunc, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame), } @@ -368,6 +377,9 @@ func (api *API) Register(r *route.Router) { r.Get("/format_query", wrapAgent(api.formatQuery)) r.Post("/format_query", wrapAgent(api.formatQuery)) + r.Get("/parse_query", wrapAgent(api.parseQuery)) + r.Post("/parse_query", wrapAgent(api.parseQuery)) + r.Get("/labels", wrapAgent(api.labelNames)) r.Post("/labels", wrapAgent(api.labelNames)) r.Get("/label/:name/values", wrapAgent(api.labelValues)) @@ -389,6 +401,8 @@ func (api *API) Register(r *route.Router) { r.Get("/status/flags", wrap(api.serveFlags)) r.Get("/status/tsdb", wrapAgent(api.serveTSDBStatus)) r.Get("/status/walreplay", api.serveWALReplayStatus) + r.Get("/notifications", api.notifications) + r.Get("/notifications/live", api.notificationsSSE) r.Post("/read", api.ready(api.remoteRead)) r.Post("/write", api.ready(api.remoteWrite)) r.Post("/otlp/v1/metrics", api.ready(api.otlpWrite)) @@ -487,6 +501,15 @@ func (api *API) formatQuery(r *http.Request) (result apiFuncResult) { return apiFuncResult{expr.Pretty(0), nil, nil, nil} } +func (api *API) parseQuery(r *http.Request) apiFuncResult { + expr, err := parser.ParseExpr(r.FormValue("query")) + if err != nil { + return invalidParamError(err, "query") + } + + return apiFuncResult{data: translateAST(expr), err: nil, warnings: nil, finalizer: nil} +} + func extractQueryOpts(r *http.Request) (promql.QueryOpts, error) { var duration time.Duration @@ -814,12 +837,22 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) { } var ( - // MinTime is the default timestamp used for the begin of optional time ranges. - // Exposed to let downstream projects to reference it. + // MinTime is the default timestamp used for the start of optional time ranges. + // Exposed to let downstream projects reference it. + // + // Historical note: This should just be time.Unix(math.MinInt64/1000, 0).UTC(), + // but it was set to a higher value in the past due to a misunderstanding. + // The value is still low enough for practical purposes, so we don't want + // to change it now, avoiding confusion for importers of this variable. MinTime = time.Unix(math.MinInt64/1000+62135596801, 0).UTC() // MaxTime is the default timestamp used for the end of optional time ranges. // Exposed to let downstream projects to reference it. + // + // Historical note: This should just be time.Unix(math.MaxInt64/1000, 0).UTC(), + // but it was set to a lower value in the past due to a misunderstanding. + // The value is still high enough for practical purposes, so we don't want + // to change it now, avoiding confusion for importers of this variable. MaxTime = time.Unix(math.MaxInt64/1000-62135596801, 999999999).UTC() minTimeFormatted = MinTime.Format(time.RFC3339Nano) @@ -1342,7 +1375,8 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { // RuleDiscovery has info for all rules. type RuleDiscovery struct { - RuleGroups []*RuleGroup `json:"groups"` + RuleGroups []*RuleGroup `json:"groups"` + GroupNextToken string `json:"groupNextToken:omitempty"` } // RuleGroup has info for rules which are part of a group. @@ -1429,8 +1463,23 @@ func (api *API) rules(r *http.Request) apiFuncResult { return invalidParamError(err, "exclude_alerts") } + maxGroups, nextToken, parseErr := parseListRulesPaginationRequest(r) + if parseErr != nil { + return *parseErr + } + rgs := make([]*RuleGroup, 0, len(ruleGroups)) + + foundToken := false + for _, grp := range ruleGroups { + if maxGroups > 0 && nextToken != "" && !foundToken { + if nextToken != getRuleGroupNextToken(grp.File(), grp.Name()) { + continue + } + foundToken = true + } + if len(rgSet) > 0 { if _, ok := rgSet[grp.Name()]; !ok { continue @@ -1475,6 +1524,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { if !excludeAlerts { activeAlerts = rulesAlertsToAPIAlerts(rule.ActiveAlerts()) } + enrichedRule = AlertingRule{ State: rule.State().String(), Name: rule.Name(), @@ -1490,6 +1540,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { LastEvaluation: rule.GetEvaluationTimestamp(), Type: "alerting", } + case *rules.RecordingRule: if !returnRecording { break @@ -1516,9 +1567,20 @@ func (api *API) rules(r *http.Request) apiFuncResult { // If the rule group response has no rules, skip it - this means we filtered all the rules of this group. if len(apiRuleGroup.Rules) > 0 { + if maxGroups > 0 && len(rgs) == int(maxGroups) { + // We've reached the capacity of our page plus one. That means that for sure there will be at least one + // rule group in a subsequent request. Therefore a next token is required. + res.GroupNextToken = getRuleGroupNextToken(grp.File(), grp.Name()) + break + } rgs = append(rgs, apiRuleGroup) } } + + if maxGroups > 0 && nextToken != "" && !foundToken { + return invalidParamError(fmt.Errorf("invalid group_next_token '%v'. were rule groups changed?", nextToken), "group_next_token") + } + res.RuleGroups = rgs return apiFuncResult{res, nil, nil, nil} } @@ -1537,6 +1599,44 @@ func parseExcludeAlerts(r *http.Request) (bool, error) { return excludeAlerts, nil } +func parseListRulesPaginationRequest(r *http.Request) (int64, string, *apiFuncResult) { + var ( + parsedMaxGroups int64 = -1 + err error + ) + maxGroups := r.URL.Query().Get("group_limit") + nextToken := r.URL.Query().Get("group_next_token") + + if nextToken != "" && maxGroups == "" { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be present in order to paginate over the groups"), "group_next_token") + return -1, "", &errResult + } + + if maxGroups != "" { + parsedMaxGroups, err = strconv.ParseInt(maxGroups, 10, 32) + if err != nil { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be a valid number: %w", err), "group_limit") + return -1, "", &errResult + } + if parsedMaxGroups <= 0 { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be greater than 0"), "group_limit") + return -1, "", &errResult + } + } + + if parsedMaxGroups > 0 { + return parsedMaxGroups, nextToken, nil + } + + return -1, "", nil +} + +func getRuleGroupNextToken(file, group string) string { + h := sha1.New() + h.Write([]byte(file + ";" + group)) + return hex.EncodeToString(h.Sum(nil)) +} + type prometheusConfig struct { YAML string `json:"yaml"` } @@ -1658,6 +1758,57 @@ func (api *API) serveWALReplayStatus(w http.ResponseWriter, r *http.Request) { }, nil, "") } +func (api *API) notifications(w http.ResponseWriter, r *http.Request) { + httputil.SetCORS(w, api.CORSOrigin, r) + api.respond(w, r, api.notificationsGetter(), nil, "") +} + +func (api *API) notificationsSSE(w http.ResponseWriter, r *http.Request) { + httputil.SetCORS(w, api.CORSOrigin, r) + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + // Subscribe to notifications. + notifications, unsubscribe, ok := api.notificationsSub() + if !ok { + w.WriteHeader(http.StatusNoContent) + return + } + defer unsubscribe() + + // Set up a flusher to push the response to the client. + flusher, ok := w.(http.Flusher) + if !ok { + http.Error(w, "Streaming unsupported", http.StatusInternalServerError) + return + } + + // Flush the response to ensure the headers are immediately and eventSource + // onopen is triggered client-side. + flusher.Flush() + + for { + select { + case notification := <-notifications: + // Marshal the notification to JSON. + jsonData, err := json.Marshal(notification) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + continue + } + + // Write the event data in SSE format with JSON content. + fmt.Fprintf(w, "data: %s\n\n", jsonData) + + // Flush the response to ensure the data is sent immediately. + flusher.Flush() + case <-r.Context().Done(): + return + } + } +} + func (api *API) remoteRead(w http.ResponseWriter, r *http.Request) { // This is only really for tests - this will never be nil IRL. if api.remoteReadHandler != nil { @@ -1679,7 +1830,7 @@ func (api *API) otlpWrite(w http.ResponseWriter, r *http.Request) { if api.otlpWriteHandler != nil { api.otlpWriteHandler.ServeHTTP(w, r) } else { - http.Error(w, "otlp write receiver needs to be enabled with --enable-feature=otlp-write-receiver", http.StatusNotFound) + http.Error(w, "otlp write receiver needs to be enabled with --web.enable-otlp-receiver", http.StatusNotFound) } } @@ -1782,7 +1933,7 @@ func (api *API) respond(w http.ResponseWriter, req *http.Request, data interface b, err := codec.Encode(resp) if err != nil { - level.Error(api.logger).Log("msg", "error marshaling response", "url", req.URL, "err", err) + api.logger.Error("error marshaling response", "url", req.URL, "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -1790,7 +1941,7 @@ func (api *API) respond(w http.ResponseWriter, req *http.Request, data interface w.Header().Set("Content-Type", codec.ContentType().String()) w.WriteHeader(http.StatusOK) if n, err := w.Write(b); err != nil { - level.Error(api.logger).Log("msg", "error writing response", "url", req.URL, "bytesWritten", n, "err", err) + api.logger.Error("error writing response", "url", req.URL, "bytesWritten", n, "err", err) } } @@ -1820,7 +1971,7 @@ func (api *API) respondError(w http.ResponseWriter, apiErr *apiError, data inter Data: data, }) if err != nil { - level.Error(api.logger).Log("msg", "error marshaling json response", "err", err) + api.logger.Error("error marshaling json response", "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -1848,7 +1999,7 @@ func (api *API) respondError(w http.ResponseWriter, apiErr *apiError, data inter w.Header().Set("Content-Type", "application/json") w.WriteHeader(code) if n, err := w.Write(b); err != nil { - level.Error(api.logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) + api.logger.Error("error writing response", "bytesWritten", n, "err", err) } } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index ef9d53dd9d..35ad4a9ad3 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -34,12 +34,11 @@ import ( "github.com/prometheus/prometheus/util/stats" "github.com/prometheus/prometheus/util/testutil" - "github.com/go-kit/log" jsoniter "github.com/json-iterator/go" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/route" "github.com/stretchr/testify/require" @@ -238,7 +237,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule2 := rules.NewAlertingRule( "test_metric4", @@ -250,7 +249,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule3 := rules.NewAlertingRule( "test_metric5", @@ -262,7 +261,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.FromStrings("name", "tm5"), "", false, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule4 := rules.NewAlertingRule( "test_metric6", @@ -274,7 +273,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule5 := rules.NewAlertingRule( "test_metric7", @@ -286,7 +285,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) var r []*rules.AlertingRule r = append(r, rule1) @@ -314,7 +313,7 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { QueryFunc: rules.EngineQueryFunc(engine, storage), Appendable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, } @@ -339,7 +338,15 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { ShouldRestore: false, Opts: opts, }) - m.ruleGroups = []*rules.Group{group} + group2 := rules.NewGroup(rules.GroupOptions{ + Name: "grp2", + File: "/path/to/file", + Interval: time.Second, + Rules: []rules.Rule{r[0]}, + ShouldRestore: false, + Opts: opts, + }) + m.ruleGroups = []*rules.Group{group, group2} } func (m *rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { @@ -471,20 +478,20 @@ func TestEndpoints(t *testing.T) { u, err := url.Parse(server.URL) require.NoError(t, err) - al := promlog.AllowedLevel{} + al := promslog.AllowedLevel{} require.NoError(t, al.Set("debug")) - af := promlog.AllowedFormat{} + af := promslog.AllowedFormat{} require.NoError(t, af.Set("logfmt")) - promlogConfig := promlog.Config{ + promslogConfig := promslog.Config{ Level: &al, Format: &af, } dbDir := t.TempDir() - remote := remote.NewStorage(promlog.New(&promlogConfig), prometheus.DefaultRegisterer, func() (int64, error) { + remote := remote.NewStorage(promslog.New(&promslogConfig), prometheus.DefaultRegisterer, func() (int64, error) { return 0, nil }, dbDir, 1*time.Second, nil, false) @@ -2242,6 +2249,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2330,6 +2356,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: nil, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2411,6 +2456,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2682,6 +2746,159 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, zeroFunc: rulesZeroFunc, }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + }, + response: &RuleDiscovery{ + GroupNextToken: getRuleGroupNextToken("/path/to/file", "grp2"), + RuleGroups: []*RuleGroup{ + { + Name: "grp", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric6", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("testlabel", "rule"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric7", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("templatedlabel", "{{ $externalURL }}"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + RecordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Health: "ok", + Type: "recording", + }, + RecordingRule{ + Name: "recording-rule-2", + Query: "vector(1)", + Labels: labels.FromStrings("testlabel", "rule"), + Health: "ok", + Type: "recording", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { // invalid pagination request + endpoint: api.rules, + query: url.Values{ + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // invalid group_limit + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"0"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // Pagination token is invalid due to changes in the rule groups + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/removed/file", "notfound")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, { endpoint: api.queryExemplars, query: url.Values{ @@ -3530,7 +3747,7 @@ func TestAdminEndpoints(t *testing.T) { func TestRespondSuccess(t *testing.T) { api := API{ - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), } api.ClearCodecs() @@ -3622,7 +3839,7 @@ func TestRespondSuccess(t *testing.T) { func TestRespondSuccess_DefaultCodecCannotEncodeResponse(t *testing.T) { api := API{ - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), } api.ClearCodecs() @@ -4034,13 +4251,13 @@ func TestGetGlobalURL(t *testing.T) { false, }, { - mustParseURL(t, "http://exemple.com"), + mustParseURL(t, "http://example.com"), GlobalURLOptions{ ListenAddress: "127.0.0.1:9090", Host: "prometheus.io", Scheme: "https", }, - mustParseURL(t, "http://exemple.com"), + mustParseURL(t, "http://example.com"), false, }, { @@ -4176,7 +4393,7 @@ func TestExtractQueryOpts(t *testing.T) { if test.err == nil { require.NoError(t, err) } else { - require.Equal(t, test.err.Error(), err.Error()) + require.EqualError(t, err, test.err.Error()) } }) } diff --git a/web/api/v1/errors_test.go b/web/api/v1/errors_test.go index 2076c72383..2b1098e218 100644 --- a/web/api/v1/errors_test.go +++ b/web/api/v1/errors_test.go @@ -23,9 +23,9 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/route" "github.com/stretchr/testify/require" @@ -105,7 +105,7 @@ func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable) *route t.Helper() engine := promqltest.NewTestEngineWithOpts(t, promql.EngineOpts{ - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Reg: nil, ActiveQueryTracker: nil, MaxSamples: 100, @@ -127,13 +127,15 @@ func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable) *route nil, // Only needed for admin APIs. "", // This is for snapshots, which is disabled when admin APIs are disabled. Hence empty. false, // Disable admin APIs. - log.NewNopLogger(), + promslog.NewNopLogger(), func(context.Context) RulesRetriever { return &DummyRulesRetriever{} }, 0, 0, 0, // Remote read samples and concurrency limit. false, // Not an agent. regexp.MustCompile(".*"), func() (RuntimeInfo, error) { return RuntimeInfo{}, errors.New("not implemented") }, &PrometheusVersion{}, + nil, + nil, prometheus.DefaultGatherer, nil, nil, diff --git a/web/api/v1/translate_ast.go b/web/api/v1/translate_ast.go new file mode 100644 index 0000000000..afa11f16b9 --- /dev/null +++ b/web/api/v1/translate_ast.go @@ -0,0 +1,157 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "strconv" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" +) + +// Take a Go PromQL AST and translate it to an object that's nicely JSON-serializable +// for the tree view in the UI. +// TODO: Could it make sense to do this via the normal JSON marshalling methods? Maybe +// too UI-specific though. +func translateAST(node parser.Expr) interface{} { + if node == nil { + return nil + } + + switch n := node.(type) { + case *parser.AggregateExpr: + return map[string]interface{}{ + "type": "aggregation", + "op": n.Op.String(), + "expr": translateAST(n.Expr), + "param": translateAST(n.Param), + "grouping": sanitizeList(n.Grouping), + "without": n.Without, + } + case *parser.BinaryExpr: + var matching interface{} + if m := n.VectorMatching; m != nil { + matching = map[string]interface{}{ + "card": m.Card.String(), + "labels": sanitizeList(m.MatchingLabels), + "on": m.On, + "include": sanitizeList(m.Include), + } + } + + return map[string]interface{}{ + "type": "binaryExpr", + "op": n.Op.String(), + "lhs": translateAST(n.LHS), + "rhs": translateAST(n.RHS), + "matching": matching, + "bool": n.ReturnBool, + } + case *parser.Call: + args := []interface{}{} + for _, arg := range n.Args { + args = append(args, translateAST(arg)) + } + + return map[string]interface{}{ + "type": "call", + "func": map[string]interface{}{ + "name": n.Func.Name, + "argTypes": n.Func.ArgTypes, + "variadic": n.Func.Variadic, + "returnType": n.Func.ReturnType, + }, + "args": args, + } + case *parser.MatrixSelector: + vs := n.VectorSelector.(*parser.VectorSelector) + return map[string]interface{}{ + "type": "matrixSelector", + "name": vs.Name, + "range": n.Range.Milliseconds(), + "offset": vs.OriginalOffset.Milliseconds(), + "matchers": translateMatchers(vs.LabelMatchers), + "timestamp": vs.Timestamp, + "startOrEnd": getStartOrEnd(vs.StartOrEnd), + } + case *parser.SubqueryExpr: + return map[string]interface{}{ + "type": "subquery", + "expr": translateAST(n.Expr), + "range": n.Range.Milliseconds(), + "offset": n.OriginalOffset.Milliseconds(), + "step": n.Step.Milliseconds(), + "timestamp": n.Timestamp, + "startOrEnd": getStartOrEnd(n.StartOrEnd), + } + case *parser.NumberLiteral: + return map[string]string{ + "type": "numberLiteral", + "val": strconv.FormatFloat(n.Val, 'f', -1, 64), + } + case *parser.ParenExpr: + return map[string]interface{}{ + "type": "parenExpr", + "expr": translateAST(n.Expr), + } + case *parser.StringLiteral: + return map[string]interface{}{ + "type": "stringLiteral", + "val": n.Val, + } + case *parser.UnaryExpr: + return map[string]interface{}{ + "type": "unaryExpr", + "op": n.Op.String(), + "expr": translateAST(n.Expr), + } + case *parser.VectorSelector: + return map[string]interface{}{ + "type": "vectorSelector", + "name": n.Name, + "offset": n.OriginalOffset.Milliseconds(), + "matchers": translateMatchers(n.LabelMatchers), + "timestamp": n.Timestamp, + "startOrEnd": getStartOrEnd(n.StartOrEnd), + } + } + panic("unsupported node type") +} + +func sanitizeList(l []string) []string { + if l == nil { + return []string{} + } + return l +} + +func translateMatchers(in []*labels.Matcher) interface{} { + out := []map[string]interface{}{} + for _, m := range in { + out = append(out, map[string]interface{}{ + "name": m.Name, + "value": m.Value, + "type": m.Type.String(), + }) + } + return out +} + +func getStartOrEnd(startOrEnd parser.ItemType) interface{} { + if startOrEnd == 0 { + return nil + } + + return startOrEnd.String() +} diff --git a/web/federate.go b/web/federate.go index 8176eba365..8e20a60f0f 100644 --- a/web/federate.go +++ b/web/federate.go @@ -21,7 +21,6 @@ import ( "sort" "strings" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" @@ -157,7 +156,7 @@ Loop: }) } if ws := set.Warnings(); len(ws) > 0 { - level.Debug(h.logger).Log("msg", "Federation select returned warnings", "warnings", ws) + h.logger.Debug("Federation select returned warnings", "warnings", ws) federationWarnings.Add(float64(len(ws))) } if set.Err() != nil { @@ -253,11 +252,11 @@ Loop: }) if err != nil { federationErrors.Inc() - level.Error(h.logger).Log("msg", "federation failed", "err", err) + h.logger.Error("federation failed", "err", err) return } if !nameSeen { - level.Warn(h.logger).Log("msg", "Ignoring nameless metric during federation", "metric", s.Metric) + h.logger.Warn("Ignoring nameless metric during federation", "metric", s.Metric) continue } // Attach global labels if they do not exist yet. @@ -314,7 +313,7 @@ Loop: if protMetricFam != nil { if err := enc.Encode(protMetricFam); err != nil { federationErrors.Inc() - level.Error(h.logger).Log("msg", "federation failed", "err", err) + h.logger.Error("federation failed", "err", err) } } } diff --git a/web/ui/README.md b/web/ui/README.md index 465adf5372..49ec27d8b4 100644 --- a/web/ui/README.md +++ b/web/ui/README.md @@ -1,4 +1,5 @@ ## Overview + The `ui` directory contains static files and templates used in the web UI. For easier distribution they are compressed (c.f. Makefile) and statically compiled into the Prometheus binary using the embed package. @@ -11,19 +12,48 @@ in `.promu.yml`, and then `make build` (or build Prometheus using This will serve all files from your local filesystem. This is for development purposes only. +### Using Prebuilt UI Assets + +If you are only working on the go backend, for faster builds, you can use +prebuilt web UI assets available with each Prometheus release +(`prometheus-web-ui-.tar.gz`). This allows you to skip building the UI +from source. + +1. Download and extract the prebuilt UI tarball: + ```bash + tar -xvf prometheus-web-ui-.tar.gz -C web/ui + ``` + +2. Build Prometheus using the prebuilt assets by passing the following parameter + to `make`: + ```bash + make PREBUILT_ASSETS_STATIC_DIR=web/ui/static build + ``` + +This will include the prebuilt UI files directly in the Prometheus binary, +avoiding the need to install npm or rebuild the frontend from source. + ## React-app ### Introduction -The react application is a monorepo composed by multiple different npm packages. The main one is `react-app` which -contains the code of the react application. +This directory contains two generations of Prometheus' React-based web UI: + +* `react-app`: The old 2.x web UI +* `mantine-ui`: The new 3.x web UI + +Both UIs are built and compiled into Prometheus. The new UI is served by default, but a feature flag +(`--enable-feature=old-ui`) can be used to switch back to serving the old UI. Then you have different npm packages located in the folder `modules`. These packages are supposed to be used by the -react-app and also by others consumers (like Thanos) +two React apps and also by others consumers (like Thanos). + +While most of these applications / modules are part of the same npm workspace, the old UI in the `react-app` directory +has been separated out of the workspace setup, since its dependencies were too incompatible. ### Pre-requisite -To be able to build the react application you need: +To be able to build either of the React applications, you need: * npm >= v7 * node >= v20 @@ -38,46 +68,50 @@ need to move to the directory `web/ui` and then download and install them locall npm consults the `package.json` and `package-lock.json` files for dependencies to install. It creates a `node_modules` directory with all installed dependencies. -**NOTE**: Do not run `npm install` in the `react-app` folder or in any sub folder of the `module` directory. +**NOTE**: Do not run `npm install` in the `react-app` / `mantine-ui` folder or in any sub folder of the `module` directory. ### Upgrading npm dependencies -As it is a monorepo, when upgrading a dependency, you have to upgrade it in every packages that composed this monorepo ( -aka, in all sub folder of `module` and in `react-app`) +As it is a monorepo, when upgrading a dependency, you have to upgrade it in every packages that composed this monorepo +(aka, in all sub folders of `module` and `react-app` / `mantine-ui`) Then you have to run the command `npm install` in `web/ui` and not in a sub folder / sub package. It won't simply work. ### Running a local development server -You can start a development server for the React UI outside of a running Prometheus server by running: +You can start a development server for the new React UI outside of a running Prometheus server by running: npm start -This will open a browser window with the React app running on http://localhost:3000/. The page will reload if you make +(For the old UI, you will have to run the same command from the `react-app` subdirectory.) + +This will open a browser window with the React app running on http://localhost:5173/. The page will reload if you make edits to the source code. You will also see any lint errors in the console. -**NOTE**: It will reload only if you change the code in `react-app` folder. Any code changes in the folder `module` is +**NOTE**: It will reload only if you change the code in `mantine-ui` folder. Any code changes in the folder `module` is not considered by the command `npm start`. In order to see the changes in the react-app you will have to run `npm run build:module` -Due to a `"proxy": "http://localhost:9090"` setting in the `package.json` file, any API requests from the React UI are +Due to a `"proxy": "http://localhost:9090"` setting in the `mantine-ui/vite.config.ts` file, any API requests from the React UI are proxied to `localhost` on port `9090` by the development server. This allows you to run a normal Prometheus server to handle API requests, while iterating separately on the UI. - [browser] ----> [localhost:3000 (dev server)] --(proxy API requests)--> [localhost:9090 (Prometheus)] + [browser] ----> [localhost:5173 (dev server)] --(proxy API requests)--> [localhost:9090 (Prometheus)] ### Running tests -To run the test for the react-app and for all modules, you can simply run: +To run the test for the new React app and for all modules, you can simply run: ```bash npm test ``` -if you want to run the test only for a specific module, you need to go to the folder of the module and run +(For the old UI, you will have to run the same command from the `react-app` subdirectory.) + +If you want to run the test only for a specific module, you need to go to the folder of the module and run again `npm test`. -For example, in case you only want to run the test of the react-app, go to `web/ui/react-app` and run `npm test` +For example, in case you only want to run the test of the new React app, go to `web/ui/mantine-ui` and run `npm test` To generate an HTML-based test coverage report, run: @@ -93,7 +127,7 @@ running tests. ### Building the app for production -To build a production-optimized version of the React app to a `build` subdirectory, run: +To build a production-optimized version of both React app versions to a `static/{react-app,mantine-ui}` subdirectory, run: npm run build @@ -102,10 +136,10 @@ Prometheus `Makefile` when building the full binary. ### Integration into Prometheus -To build a Prometheus binary that includes a compiled-in version of the production build of the React app, change to the +To build a Prometheus binary that includes a compiled-in version of the production build of both React app versions, change to the root of the repository and run: make build -This installs dependencies via npm, builds a production build of the React app, and then finally compiles in all web +This installs dependencies via npm, builds a production build of both React apps, and then finally compiles in all web assets into the Prometheus binary. diff --git a/web/ui/build_ui.sh b/web/ui/build_ui.sh index 8761bfa1e3..360ec33ea8 100644 --- a/web/ui/build_ui.sh +++ b/web/ui/build_ui.sh @@ -21,6 +21,7 @@ then fi buildOrder=(lezer-promql codemirror-promql) +assetsDir="./static" function buildModule() { for module in "${buildOrder[@]}"; do @@ -31,9 +32,18 @@ function buildModule() { function buildReactApp() { echo "build react-app" - npm run build -w @prometheus-io/app - rm -rf ./static/react - mv ./react-app/build ./static/react + (cd react-app && npm run build) + mkdir -p ${assetsDir} + rm -rf ${assetsDir}/react-app + mv ./react-app/build ${assetsDir}/react-app +} + +function buildMantineUI() { + echo "build mantine-ui" + npm run build -w @prometheus-io/mantine-ui + mkdir -p ${assetsDir} + rm -rf ${assetsDir}/mantine-ui + mv ./mantine-ui/dist ${assetsDir}/mantine-ui } for i in "$@"; do @@ -41,6 +51,7 @@ for i in "$@"; do --all) buildModule buildReactApp + buildMantineUI shift ;; --build-module) diff --git a/web/ui/mantine-ui/.gitignore b/web/ui/mantine-ui/.gitignore new file mode 100644 index 0000000000..a547bf36d8 --- /dev/null +++ b/web/ui/mantine-ui/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/web/ui/mantine-ui/eslint.config.mjs b/web/ui/mantine-ui/eslint.config.mjs new file mode 100644 index 0000000000..c3cc58920e --- /dev/null +++ b/web/ui/mantine-ui/eslint.config.mjs @@ -0,0 +1,71 @@ +import { fixupConfigRules } from '@eslint/compat'; +import reactRefresh from 'eslint-plugin-react-refresh'; +import globals from 'globals'; +import tsParser from '@typescript-eslint/parser'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import js from '@eslint/js'; +import { FlatCompat } from '@eslint/eslintrc'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const compat = new FlatCompat({ + baseDirectory: __dirname, + recommendedConfig: js.configs.recommended, + allConfig: js.configs.all +}); + +export default [{ + ignores: ['**/dist', '**/.eslintrc.cjs'], +}, ...fixupConfigRules(compat.extends( + 'eslint:recommended', + 'plugin:@typescript-eslint/recommended', + 'plugin:react-hooks/recommended', +)), { + plugins: { + 'react-refresh': reactRefresh, + }, + + languageOptions: { + globals: { + ...globals.browser, + }, + + parser: tsParser, + }, + + rules: { + 'react-refresh/only-export-components': ['warn', { + allowConstantExport: true, + }], + + // Disable the base rule as it can report incorrect errors + 'no-unused-vars': 'off', + + // Use the TypeScript-specific rule for unused vars + '@typescript-eslint/no-unused-vars': ['warn', { + argsIgnorePattern: '^_', + varsIgnorePattern: '^_', + caughtErrorsIgnorePattern: '^_', + }], + + 'prefer-const': ['error', { + destructuring: 'all', + }], + }, + }, + // Override for Node.js-based config files + { + files: ['postcss.config.cjs'], // Specify any other config files + languageOptions: { + ecmaVersion: 2021, // Optional, set ECMAScript version + sourceType: 'script', // For CommonJS (non-ESM) modules + globals: { + module: 'readonly', + require: 'readonly', + process: 'readonly', + __dirname: 'readonly', // Include other Node.js globals if needed + }, + }, + }, +]; diff --git a/web/ui/mantine-ui/index.html b/web/ui/mantine-ui/index.html new file mode 100644 index 0000000000..d2723488ac --- /dev/null +++ b/web/ui/mantine-ui/index.html @@ -0,0 +1,37 @@ + + + + + + + + + + + + TITLE_PLACEHOLDER + + +
+ + + diff --git a/web/ui/mantine-ui/package.json b/web/ui/mantine-ui/package.json new file mode 100644 index 0000000000..4e886e482f --- /dev/null +++ b/web/ui/mantine-ui/package.json @@ -0,0 +1,73 @@ +{ + "name": "@prometheus-io/mantine-ui", + "private": true, + "version": "0.300.0-beta.1", + "type": "module", + "scripts": { + "start": "vite", + "build": "tsc && vite build", + "lint": "eslint . --report-unused-disable-directives --max-warnings 0", + "lint:fix": "eslint . --report-unused-disable-directives --max-warnings 0 --fix", + "preview": "vite preview", + "test": "vitest" + }, + "dependencies": { + "@codemirror/autocomplete": "^6.18.1", + "@codemirror/language": "^6.10.2", + "@codemirror/lint": "^6.8.1", + "@codemirror/state": "^6.4.1", + "@codemirror/view": "^6.34.1", + "@floating-ui/dom": "^1.6.7", + "@lezer/common": "^1.2.1", + "@lezer/highlight": "^1.2.1", + "@mantine/code-highlight": "^7.13.1", + "@mantine/core": "^7.11.2", + "@mantine/dates": "^7.13.1", + "@mantine/hooks": "^7.11.2", + "@mantine/notifications": "^7.13.1", + "@microsoft/fetch-event-source": "^2.0.1", + "@nexucis/fuzzy": "^0.5.1", + "@nexucis/kvsearch": "^0.9.1", + "@prometheus-io/codemirror-promql": "0.300.0-beta.1", + "@reduxjs/toolkit": "^2.2.1", + "@tabler/icons-react": "^3.19.0", + "@tanstack/react-query": "^5.59.0", + "@testing-library/jest-dom": "^6.5.0", + "@testing-library/react": "^16.0.1", + "@types/lodash": "^4.17.9", + "@types/sanitize-html": "^2.13.0", + "@uiw/react-codemirror": "^4.23.3", + "clsx": "^2.1.1", + "dayjs": "^1.11.10", + "lodash": "^4.17.21", + "react": "^18.3.1", + "react-dom": "^18.3.1", + "react-infinite-scroll-component": "^6.1.0", + "react-redux": "^9.1.2", + "react-router-dom": "^6.26.2", + "sanitize-html": "^2.13.0", + "uplot": "^1.6.30", + "uplot-react": "^1.2.2", + "use-query-params": "^2.2.1" + }, + "devDependencies": { + "@eslint/compat": "^1.1.1", + "@eslint/eslintrc": "^3.1.0", + "@eslint/js": "^9.11.1", + "@types/react": "^18.3.5", + "@types/react-dom": "^18.3.0", + "@typescript-eslint/eslint-plugin": "^6.21.0", + "@typescript-eslint/parser": "^6.21.0", + "@vitejs/plugin-react": "^4.2.1", + "eslint": "^9.11.1", + "eslint-plugin-react-hooks": "^5.1.0-rc-e56f4ae3-20240830", + "eslint-plugin-react-refresh": "^0.4.12", + "globals": "^15.10.0", + "jsdom": "^25.0.1", + "postcss": "^8.4.47", + "postcss-preset-mantine": "^1.17.0", + "postcss-simple-vars": "^7.0.1", + "vite": "^5.4.8", + "vitest": "^2.1.1" + } +} diff --git a/web/ui/mantine-ui/postcss.config.cjs b/web/ui/mantine-ui/postcss.config.cjs new file mode 100644 index 0000000000..bfba0ddfae --- /dev/null +++ b/web/ui/mantine-ui/postcss.config.cjs @@ -0,0 +1,14 @@ +module.exports = { + plugins: { + 'postcss-preset-mantine': {}, + 'postcss-simple-vars': { + variables: { + 'mantine-breakpoint-xs': '36em', + 'mantine-breakpoint-sm': '48em', + 'mantine-breakpoint-md': '62em', + 'mantine-breakpoint-lg': '75em', + 'mantine-breakpoint-xl': '88em', + }, + }, + }, +}; diff --git a/web/ui/mantine-ui/public/favicon.svg b/web/ui/mantine-ui/public/favicon.svg new file mode 100644 index 0000000000..5c51f66d90 --- /dev/null +++ b/web/ui/mantine-ui/public/favicon.svg @@ -0,0 +1,50 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/web/ui/mantine-ui/src/App.module.css b/web/ui/mantine-ui/src/App.module.css new file mode 100644 index 0000000000..2a66b14f36 --- /dev/null +++ b/web/ui/mantine-ui/src/App.module.css @@ -0,0 +1,40 @@ +.control { + display: block; + padding: var(--mantine-spacing-xs) var(--mantine-spacing-md); + border-radius: var(--mantine-radius-md); + font-weight: 500; + + @mixin hover { + background-color: var(--mantine-color-gray-8); + } +} + +.link { + display: block; + line-height: 1; + padding: rem(8px) rem(12px); + border-radius: var(--mantine-radius-sm); + text-decoration: none; + color: var(--mantine-color-gray-0); + font-size: var(--mantine-font-size-sm); + font-weight: 500; + background-color: transparent; + + @mixin hover { + background-color: var(--mantine-color-gray-6); + color: var(--mantine-color-gray-0); + } + + [data-mantine-color-scheme] &[aria-current="page"] { + background-color: var(--mantine-color-blue-filled); + color: var(--mantine-color-white); + } +} + +/* Font used for autocompletion item icons. */ +@font-face { + font-family: "codicon"; + src: + local("codicon"), + url(./fonts/codicon.ttf) format("truetype"); +} diff --git a/web/ui/mantine-ui/src/App.tsx b/web/ui/mantine-ui/src/App.tsx new file mode 100644 index 0000000000..3bec30fa31 --- /dev/null +++ b/web/ui/mantine-ui/src/App.tsx @@ -0,0 +1,449 @@ +import "@mantine/core/styles.css"; +import "@mantine/code-highlight/styles.css"; +import "@mantine/notifications/styles.css"; +import "@mantine/dates/styles.css"; +import "./mantine-overrides.css"; +import classes from "./App.module.css"; +import PrometheusLogo from "./images/prometheus-logo.svg"; + +import { + ActionIcon, + AppShell, + Box, + Burger, + Button, + Group, + MantineProvider, + Menu, + Skeleton, + Text, + createTheme, + rem, +} from "@mantine/core"; +import { useDisclosure } from "@mantine/hooks"; +import { + IconBell, + IconBellFilled, + IconBook, + IconChevronDown, + IconChevronRight, + IconCloudDataConnection, + IconDatabase, + IconDeviceDesktopAnalytics, + IconFlag, + IconHeartRateMonitor, + IconInfoCircle, + IconSearch, + IconServer, + IconServerCog, +} from "@tabler/icons-react"; +import { + BrowserRouter, + Link, + NavLink, + Navigate, + Route, + Routes, +} from "react-router-dom"; +import { IconTable } from "@tabler/icons-react"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import QueryPage from "./pages/query/QueryPage"; +import AlertsPage from "./pages/AlertsPage"; +import RulesPage from "./pages/RulesPage"; +import TargetsPage from "./pages/targets/TargetsPage"; +import StatusPage from "./pages/StatusPage"; +import TSDBStatusPage from "./pages/TSDBStatusPage"; +import FlagsPage from "./pages/FlagsPage"; +import ConfigPage from "./pages/ConfigPage"; +import AgentPage from "./pages/AgentPage"; +import { Suspense } from "react"; +import ErrorBoundary from "./components/ErrorBoundary"; +import { ThemeSelector } from "./components/ThemeSelector"; +import { Notifications } from "@mantine/notifications"; +import { useSettings } from "./state/settingsSlice"; +import SettingsMenu from "./components/SettingsMenu"; +import ReadinessWrapper from "./components/ReadinessWrapper"; +import NotificationsProvider from "./components/NotificationsProvider"; +import NotificationsIcon from "./components/NotificationsIcon"; +import { QueryParamProvider } from "use-query-params"; +import { ReactRouter6Adapter } from "use-query-params/adapters/react-router-6"; +import ServiceDiscoveryPage from "./pages/service-discovery/ServiceDiscoveryPage"; +import AlertmanagerDiscoveryPage from "./pages/AlertmanagerDiscoveryPage"; +import { actionIconStyle, navIconStyle } from "./styles"; + +const queryClient = new QueryClient(); + +const mainNavPages = [ + { + title: "Query", + path: "/query", + icon: , + element: , + inAgentMode: false, + }, + { + title: "Alerts", + path: "/alerts", + icon: , + element: , + inAgentMode: false, + }, +]; + +const monitoringStatusPages = [ + { + title: "Target health", + path: "/targets", + icon: , + element: , + inAgentMode: true, + }, + { + title: "Rule health", + path: "/rules", + icon: , + element: , + inAgentMode: false, + }, + { + title: "Service discovery", + path: "/service-discovery", + icon: , + element: , + inAgentMode: true, + }, +]; + +const serverStatusPages = [ + { + title: "Runtime & build information", + path: "/status", + icon: , + element: , + inAgentMode: true, + }, + { + title: "TSDB status", + path: "/tsdb-status", + icon: , + element: , + inAgentMode: false, + }, + { + title: "Command-line flags", + path: "/flags", + icon: , + element: , + inAgentMode: true, + }, + { + title: "Configuration", + path: "/config", + icon: , + element: , + inAgentMode: true, + }, + { + title: "Alertmanager discovery", + path: "/alertmanager-discovery", + icon: , + element: , + inAgentMode: false, + }, +]; + +const allStatusPages = [...monitoringStatusPages, ...serverStatusPages]; + +const theme = createTheme({ + colors: { + "codebox-bg": [ + "#f5f5f5", + "#e7e7e7", + "#cdcdcd", + "#b2b2b2", + "#9a9a9a", + "#8b8b8b", + "#848484", + "#717171", + "#656565", + "#575757", + ], + }, +}); + +const navLinkXPadding = "md"; + +function App() { + const [opened, { toggle }] = useDisclosure(); + + const { agentMode, consolesLink, pathPrefix } = useSettings(); + + const navLinks = ( + <> + {consolesLink && ( + + )} + + {mainNavPages + .filter((p) => !agentMode || p.inAgentMode) + .map((p) => ( + + ))} + + + + {allStatusPages + .filter((p) => !agentMode || p.inAgentMode) + .map((p) => ( + + + + } + /> + ))} + + + + } + /> + + + + Monitoring status + {monitoringStatusPages + .filter((p) => !agentMode || p.inAgentMode) + .map((p) => ( + + {p.title} + + ))} + + + Server status + {serverStatusPages + .filter((p) => !agentMode || p.inAgentMode) + .map((p) => ( + + {p.title} + + ))} + + + + ); + + const navActionIcons = ( + <> + + + + + + + + ); + + return ( + + + + + + + + + + + + + + + + Prometheus{agentMode && " Agent"} + + + + {navLinks} + + + + {navActionIcons} + + + + + + + + {navLinks} + + {navActionIcons} + + + + + + + + {Array.from(Array(10), (_, i) => ( + + ))} + + } + > + + + } + /> + {agentMode ? ( + + + + } + /> + ) : ( + <> + + + + } + /> + + + + } + /> + + )} + {allStatusPages.map((p) => ( + {p.element} + } + /> + ))} + + + + + + {/* */} + + + + + ); +} + +export default App; diff --git a/web/ui/mantine-ui/src/Badge.module.css b/web/ui/mantine-ui/src/Badge.module.css new file mode 100644 index 0000000000..afa27cea2e --- /dev/null +++ b/web/ui/mantine-ui/src/Badge.module.css @@ -0,0 +1,58 @@ +.statsBadge { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-gray-8) + ); + color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-gray-5)); +} + +.labelBadge { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-gray-8) + ); + color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-gray-5)); +} + +.healthOk { + background-color: light-dark( + var(--mantine-color-green-1), + var(--mantine-color-green-9) + ); + color: light-dark(var(--mantine-color-green-9), var(--mantine-color-green-1)); +} + +.healthErr { + background-color: light-dark( + var(--mantine-color-red-1), + darken(var(--mantine-color-red-9), 0.25) + ); + color: light-dark(var(--mantine-color-red-9), var(--mantine-color-red-1)); +} + +.healthWarn { + background-color: light-dark( + var(--mantine-color-yellow-1), + var(--mantine-color-yellow-9) + ); + color: light-dark( + var(--mantine-color-yellow-9), + var(--mantine-color-yellow-1) + ); +} + +.healthInfo { + background-color: light-dark( + var(--mantine-color-blue-1), + var(--mantine-color-blue-9) + ); + color: light-dark(var(--mantine-color-blue-9), var(--mantine-color-blue-1)); +} + +.healthUnknown { + background-color: light-dark( + var(--mantine-color-gray-2), + var(--mantine-color-gray-7) + ); + color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-gray-4)); +} diff --git a/web/ui/mantine-ui/src/Panel.module.css b/web/ui/mantine-ui/src/Panel.module.css new file mode 100644 index 0000000000..0c90b70de0 --- /dev/null +++ b/web/ui/mantine-ui/src/Panel.module.css @@ -0,0 +1,19 @@ +.panelHealthOk { + border-left: 5px solid + light-dark(var(--mantine-color-green-3), var(--mantine-color-green-8)) !important; +} + +.panelHealthErr { + border-left: 5px solid + light-dark(var(--mantine-color-red-3), var(--mantine-color-red-9)) !important; +} + +.panelHealthWarn { + border-left: 5px solid + light-dark(var(--mantine-color-orange-3), var(--mantine-color-yellow-9)) !important; +} + +.panelHealthUnknown { + border-left: 5px solid + light-dark(var(--mantine-color-gray-3), var(--mantine-color-gray-6)) !important; +} diff --git a/web/ui/mantine-ui/src/api/api.ts b/web/ui/mantine-ui/src/api/api.ts new file mode 100644 index 0000000000..f1dd2b8c0c --- /dev/null +++ b/web/ui/mantine-ui/src/api/api.ts @@ -0,0 +1,131 @@ +import { QueryKey, useQuery, useSuspenseQuery } from "@tanstack/react-query"; +import { useSettings } from "../state/settingsSlice"; + +export const API_PATH = "api/v1"; + +export type SuccessAPIResponse = { + status: "success"; + data: T; + warnings?: string[]; + infos?: string[]; +}; + +export type ErrorAPIResponse = { + status: "error"; + errorType: string; + error: string; +}; + +export type APIResponse = SuccessAPIResponse | ErrorAPIResponse; + +const createQueryFn = + ({ + pathPrefix, + path, + params, + recordResponseTime, + }: { + pathPrefix: string; + path: string; + params?: Record; + recordResponseTime?: (time: number) => void; + }) => + async ({ signal }: { signal: AbortSignal }) => { + const queryString = params + ? `?${new URLSearchParams(params).toString()}` + : ""; + + try { + const startTime = Date.now(); + + const res = await fetch( + `${pathPrefix}/${API_PATH}${path}${queryString}`, + { + cache: "no-store", + credentials: "same-origin", + signal, + } + ); + + if ( + !res.ok && + !res.headers.get("content-type")?.startsWith("application/json") + ) { + // For example, Prometheus may send a 503 Service Unavailable response + // with a "text/plain" content type when it's starting up. But the API + // may also respond with a JSON error message and the same error code. + throw new Error(res.statusText); + } + + const apiRes = (await res.json()) as APIResponse; + + if (recordResponseTime) { + recordResponseTime(Date.now() - startTime); + } + + if (apiRes.status === "error") { + throw new Error( + apiRes.error !== undefined + ? apiRes.error + : 'missing "error" field in response JSON' + ); + } + + return apiRes as SuccessAPIResponse; + } catch (error) { + if (!(error instanceof Error)) { + throw new Error("Unknown error"); + } + + switch (error.name) { + case "TypeError": + throw new Error("Network error or unable to reach the server"); + case "SyntaxError": + throw new Error("Invalid JSON response"); + default: + throw error; + } + } + }; + +type QueryOptions = { + key?: QueryKey; + path: string; + params?: Record; + enabled?: boolean; + refetchInterval?: false | number; + recordResponseTime?: (time: number) => void; +}; + +export const useAPIQuery = ({ + key, + path, + params, + enabled, + recordResponseTime, + refetchInterval, +}: QueryOptions) => { + const { pathPrefix } = useSettings(); + + return useQuery>({ + queryKey: key !== undefined ? key : [path, params], + retry: false, + refetchOnWindowFocus: false, + refetchInterval: refetchInterval, + gcTime: 0, + enabled, + queryFn: createQueryFn({ pathPrefix, path, params, recordResponseTime }), + }); +}; + +export const useSuspenseAPIQuery = ({ key, path, params }: QueryOptions) => { + const { pathPrefix } = useSettings(); + + return useSuspenseQuery>({ + queryKey: key !== undefined ? key : [path, params], + retry: false, + refetchOnWindowFocus: false, + gcTime: 0, + queryFn: createQueryFn({ pathPrefix, path, params }), + }); +}; diff --git a/web/ui/mantine-ui/src/api/responseTypes/alertmanagers.ts b/web/ui/mantine-ui/src/api/responseTypes/alertmanagers.ts new file mode 100644 index 0000000000..2fcb8c23a3 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/alertmanagers.ts @@ -0,0 +1,10 @@ +export type AlertmanagerTarget = { + url: string; +}; + +// Result type for /api/v1/alertmanagers endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#alertmanagers +export type AlertmanagersResult = { + activeAlertmanagers: AlertmanagerTarget[]; + droppedAlertmanagers: AlertmanagerTarget[]; +}; diff --git a/web/ui/mantine-ui/src/api/responseTypes/config.ts b/web/ui/mantine-ui/src/api/responseTypes/config.ts new file mode 100644 index 0000000000..4f509f9e00 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/config.ts @@ -0,0 +1,5 @@ +// Result type for /api/v1/status/config endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#config +export default interface ConfigResult { + yaml: string; +} diff --git a/web/ui/mantine-ui/src/api/responseTypes/labelValues.ts b/web/ui/mantine-ui/src/api/responseTypes/labelValues.ts new file mode 100644 index 0000000000..3c69b79b5a --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/labelValues.ts @@ -0,0 +1,3 @@ +// Result type for /api/v1/label//values endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values +export type LabelValuesResult = string[]; diff --git a/web/ui/mantine-ui/src/api/responseTypes/metadata.ts b/web/ui/mantine-ui/src/api/responseTypes/metadata.ts new file mode 100644 index 0000000000..1bd168455c --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/metadata.ts @@ -0,0 +1,6 @@ +// Result type for /api/v1/alerts endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#querying-target-metadata +export type MetadataResult = Record< + string, + { type: string; help: string; unit: string }[] +>; diff --git a/web/ui/mantine-ui/src/api/responseTypes/notifications.ts b/web/ui/mantine-ui/src/api/responseTypes/notifications.ts new file mode 100644 index 0000000000..d6ebf68d41 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/notifications.ts @@ -0,0 +1,8 @@ +export interface Notification { + text: string; + date: string; + active: boolean; + modified: boolean; +} + +export type NotificationsResult = Notification[]; diff --git a/web/ui/mantine-ui/src/api/responseTypes/query.ts b/web/ui/mantine-ui/src/api/responseTypes/query.ts new file mode 100644 index 0000000000..327b049b5f --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/query.ts @@ -0,0 +1,51 @@ +export interface Metric { + [key: string]: string; +} + +export interface Histogram { + count: string; + sum: string; + buckets?: [number, string, string, string][]; +} + +export interface InstantSample { + metric: Metric; + value?: SampleValue; + histogram?: SampleHistogram; +} + +export interface RangeSamples { + metric: Metric; + values?: SampleValue[]; + histograms?: SampleHistogram[]; +} + +export type SampleValue = [number, string]; +export type SampleHistogram = [number, Histogram]; + +// Result type for /api/v1/query endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries +export type InstantQueryResult = + | { + resultType: "vector"; + result: InstantSample[]; + } + | { + resultType: "matrix"; + result: RangeSamples[]; + } + | { + resultType: "scalar"; + result: SampleValue; + } + | { + resultType: "string"; + result: SampleValue; + }; + +// Result type for /api/v1/query_range endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries +export type RangeQueryResult = { + resultType: "matrix"; + result: RangeSamples[]; +}; diff --git a/web/ui/mantine-ui/src/api/responseTypes/rules.ts b/web/ui/mantine-ui/src/api/responseTypes/rules.ts new file mode 100644 index 0000000000..13535315c7 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/rules.ts @@ -0,0 +1,63 @@ +type RuleState = "pending" | "firing" | "inactive"; + +export interface Alert { + labels: Record; + state: RuleState; + value: string; + annotations: Record; + activeAt: string; + keepFiringSince: string; +} + +type CommonRuleFields = { + name: string; + query: string; + evaluationTime: string; + health: "ok" | "unknown" | "err"; + lastError?: string; + lastEvaluation: string; +}; + +export type AlertingRule = { + type: "alerting"; + // For alerting rules, the 'labels' field is always present, even when there are no labels. + labels: Record; + annotations: Record; + duration: number; + keepFiringFor: number; + state: RuleState; + alerts: Alert[]; +} & CommonRuleFields; + +type RecordingRule = { + type: "recording"; + // For recording rules, the 'labels' field is only present when there are labels. + labels?: Record; +} & CommonRuleFields; + +export type Rule = AlertingRule | RecordingRule; + +interface RuleGroup { + name: string; + file: string; + interval: string; + rules: Rule[]; + evaluationTime: string; + lastEvaluation: string; +} + +export type AlertingRuleGroup = Omit & { + rules: AlertingRule[]; +}; + +// Result type for /api/v1/alerts endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#alerts +export interface RulesResult { + groups: RuleGroup[]; +} + +// Same as RulesResult above, but can be used when the caller ensures via a +// "type=alert" query parameter that all rules are alerting rules. +export interface AlertingRulesResult { + groups: AlertingRuleGroup[]; +} diff --git a/web/ui/mantine-ui/src/api/responseTypes/scrapePools.ts b/web/ui/mantine-ui/src/api/responseTypes/scrapePools.ts new file mode 100644 index 0000000000..793ad71571 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/scrapePools.ts @@ -0,0 +1,2 @@ +// Result type for /api/v1/scrape_pools endpoint. +export type ScrapePoolsResult = { scrapePools: string[] }; diff --git a/web/ui/mantine-ui/src/api/responseTypes/series.ts b/web/ui/mantine-ui/src/api/responseTypes/series.ts new file mode 100644 index 0000000000..0c07def223 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/series.ts @@ -0,0 +1,6 @@ +// Result type for /api/v1/series endpoint. + +import { Metric } from "./query"; + +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers +export type SeriesResult = Metric[]; diff --git a/web/ui/mantine-ui/src/api/responseTypes/targets.ts b/web/ui/mantine-ui/src/api/responseTypes/targets.ts new file mode 100644 index 0000000000..cc0e891f05 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/targets.ts @@ -0,0 +1,29 @@ +export interface Labels { + [key: string]: string; +} + +export type Target = { + discoveredLabels: Labels; + labels: Labels; + scrapePool: string; + scrapeUrl: string; + globalUrl: string; + lastError: string; + lastScrape: string; + lastScrapeDuration: number; + health: string; + scrapeInterval: string; + scrapeTimeout: string; +}; + +export interface DroppedTarget { + discoveredLabels: Labels; +} + +// Result type for /api/v1/targets endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#targets +export type TargetsResult = { + activeTargets: Target[]; + droppedTargets: DroppedTarget[]; + droppedTargetCounts: Record; +}; diff --git a/web/ui/mantine-ui/src/api/responseTypes/tsdbStatus.ts b/web/ui/mantine-ui/src/api/responseTypes/tsdbStatus.ts new file mode 100644 index 0000000000..255249757f --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/tsdbStatus.ts @@ -0,0 +1,22 @@ +interface Stats { + name: string; + value: number; +} + +interface HeadStats { + numSeries: number; + numLabelPairs: number; + chunkCount: number; + minTime: number; + maxTime: number; +} + +// Result type for /api/v1/status/tsdb endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats +export interface TSDBStatusResult { + headStats: HeadStats; + seriesCountByMetricName: Stats[]; + labelValueCountByLabelName: Stats[]; + memoryInBytesByLabelName: Stats[]; + seriesCountByLabelValuePair: Stats[]; +} diff --git a/web/ui/mantine-ui/src/api/responseTypes/walreplay.ts b/web/ui/mantine-ui/src/api/responseTypes/walreplay.ts new file mode 100644 index 0000000000..b881ba750d --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/walreplay.ts @@ -0,0 +1,7 @@ +// Result type for /api/v1/status/walreplay endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#wal-replay-stats +export interface WALReplayStatus { + min: number; + max: number; + current: number; +} diff --git a/web/ui/mantine-ui/src/codemirror/theme.ts b/web/ui/mantine-ui/src/codemirror/theme.ts new file mode 100644 index 0000000000..4649837593 --- /dev/null +++ b/web/ui/mantine-ui/src/codemirror/theme.ts @@ -0,0 +1,323 @@ +import { HighlightStyle } from "@codemirror/language"; +import { EditorView } from "@codemirror/view"; +import { tags } from "@lezer/highlight"; + +export const baseTheme = EditorView.theme({ + ".cm-content": { + paddingTop: "3px", + paddingBottom: "0px", + }, + "&.cm-editor": { + "&.cm-focused": { + outline: "none", + outline_fallback: "none", + }, + backgroundColor: "transparent", + }, + ".cm-scroller": { + overflow: "hidden", + fontFamily: '"DejaVu Sans Mono", monospace', + }, + ".cm-placeholder": { + fontFamily: + '-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans","Liberation Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"', + }, + + ".cm-matchingBracket": { + fontWeight: "bold", + outline: "1px dashed transparent", + }, + ".cm-nonmatchingBracket": { borderColor: "red" }, + + ".cm-tooltip.cm-tooltip-autocomplete": { + "& > ul": { + maxHeight: "350px", + fontFamily: '"DejaVu Sans Mono", monospace', + maxWidth: "unset", + }, + "& > ul > li": { + padding: "2px 1em 2px 3px", + }, + minWidth: "30%", + }, + + ".cm-completionDetail": { + float: "right", + color: "#999", + }, + + ".cm-tooltip.cm-completionInfo": { + padding: "10px", + fontFamily: + "'Open Sans', 'Lucida Sans Unicode', 'Lucida Grande', sans-serif;", + border: "none", + minWidth: "250px", + maxWidth: "min-content", + }, + + ".cm-completionInfo.cm-completionInfo-right": { + "&:before": { + content: "' '", + height: "0", + position: "absolute", + width: "0", + left: "-20px", + borderWidth: "10px", + borderStyle: "solid", + borderColor: "transparent", + }, + marginTop: "-11px", + marginLeft: "12px", + }, + ".cm-completionInfo.cm-completionInfo-left": { + "&:before": { + content: "' '", + height: "0", + position: "absolute", + width: "0", + right: "-20px", + borderWidth: "10px", + borderStyle: "solid", + borderColor: "transparent", + }, + marginTop: "-11px", + marginRight: "12px", + }, + ".cm-completionInfo.cm-completionInfo-right-narrow": { + "&:before": { + content: "' '", + height: "0", + position: "absolute", + width: "0", + top: "-20px", + borderWidth: "10px", + borderStyle: "solid", + borderColor: "transparent", + }, + marginTop: "10px", + marginLeft: "150px", + }, + ".cm-completionMatchedText": { + textDecoration: "none", + fontWeight: "bold", + }, + + ".cm-selectionMatch": { + backgroundColor: "#e6f3ff", + }, + + ".cm-diagnostic": { + "&.cm-diagnostic-error": { + borderLeft: "3px solid #e65013", + }, + }, + + ".cm-completionIcon": { + boxSizing: "content-box", + fontSize: "16px", + lineHeight: "1", + marginRight: "10px", + verticalAlign: "top", + "&:after": { content: "'\\ea88'" }, + fontFamily: "codicon", + paddingRight: "0", + opacity: "1", + }, + + ".cm-completionIcon-function, .cm-completionIcon-method": { + "&:after": { content: "'\\ea8c'" }, + }, + ".cm-completionIcon-class": { + "&:after": { content: "'○'" }, + }, + ".cm-completionIcon-interface": { + "&:after": { content: "'◌'" }, + }, + ".cm-completionIcon-variable": { + "&:after": { content: "'𝑥'" }, + }, + ".cm-completionIcon-constant": { + "&:after": { content: "'\\eb5f'" }, + }, + ".cm-completionIcon-type": { + "&:after": { content: "'𝑡'" }, + }, + ".cm-completionIcon-enum": { + "&:after": { content: "'∪'" }, + }, + ".cm-completionIcon-property": { + "&:after": { content: "'□'" }, + }, + ".cm-completionIcon-keyword": { + "&:after": { content: "'\\eb62'" }, + }, + ".cm-completionIcon-namespace": { + "&:after": { content: "'▢'" }, + }, + ".cm-completionIcon-text": { + "&:after": { content: "'\\ea95'" }, + color: "#ee9d28", + }, +}); + +export const lightTheme = EditorView.theme( + { + ".cm-tooltip": { + backgroundColor: "#f8f8f8", + borderColor: "rgba(52, 79, 113, 0.2)", + }, + + ".cm-tooltip.cm-tooltip-autocomplete": { + "& li:hover": { + backgroundColor: "#ddd", + }, + "& > ul > li[aria-selected]": { + backgroundColor: "#d6ebff", + color: "unset", + }, + }, + + ".cm-tooltip.cm-completionInfo": { + backgroundColor: "#d6ebff", + }, + + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-right": { + "&:before": { + borderRightColor: "#d6ebff", + }, + }, + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-right-narrow": { + "&:before": { + borderBottomColor: "#d6ebff", + }, + }, + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-left": { + "&:before": { + borderLeftColor: "#d6ebff", + }, + }, + + ".cm-line": { + "&::selection": { + backgroundColor: "#add6ff", + }, + "& > span::selection": { + backgroundColor: "#add6ff", + }, + }, + + ".cm-matchingBracket": { + color: "#000", + backgroundColor: "#dedede", + }, + + ".cm-completionMatchedText": { + color: "#0066bf", + }, + + ".cm-completionIcon": { + color: "#007acc", + }, + + ".cm-completionIcon-constant": { + color: "#007acc", + }, + + ".cm-completionIcon-function, .cm-completionIcon-method": { + color: "#652d90", + }, + + ".cm-completionIcon-keyword": { + color: "#616161", + }, + }, + { dark: false } +); + +export const darkTheme = EditorView.theme( + { + ".cm-content": { + caretColor: "#fff", + }, + + ".cm-tooltip.cm-completionInfo": { + backgroundColor: "#333338", + }, + + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-right": { + "&:before": { + borderRightColor: "#333338", + }, + }, + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-right-narrow": { + "&:before": { + borderBottomColor: "#333338", + }, + }, + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-left": { + "&:before": { + borderLeftColor: "#333338", + }, + }, + + ".cm-line": { + "&::selection": { + backgroundColor: "#767676", + }, + "& > span::selection": { + backgroundColor: "#767676", + }, + }, + + ".cm-matchingBracket, &.cm-focused .cm-matchingBracket": { + backgroundColor: "#616161", + }, + + ".cm-completionMatchedText": { + color: "#7dd3fc", + }, + + ".cm-completionIcon, .cm-completionIcon-constant": { + color: "#7dd3fc", + }, + + ".cm-completionIcon-function, .cm-completionIcon-method": { + color: "#d8b4fe", + }, + + ".cm-completionIcon-keyword": { + color: "#cbd5e1 !important", + }, + }, + { dark: true } +); + +export const promqlHighlighter = HighlightStyle.define([ + { tag: tags.number, color: "#09885a" }, + { tag: tags.string, color: "#a31515" }, + { tag: tags.keyword, color: "#008080" }, + { tag: tags.function(tags.variableName), color: "#008080" }, + { tag: tags.labelName, color: "#800000" }, + { tag: tags.operator }, + { tag: tags.modifier, color: "#008080" }, + { tag: tags.paren }, + { tag: tags.squareBracket }, + { tag: tags.brace }, + { tag: tags.invalid, color: "red" }, + { tag: tags.comment, color: "#888", fontStyle: "italic" }, +]); + +export const darkPromqlHighlighter = HighlightStyle.define([ + { tag: tags.number, color: "#22c55e" }, + { tag: tags.string, color: "#fca5a5" }, + { tag: tags.keyword, color: "#14bfad" }, + { tag: tags.function(tags.variableName), color: "#14bfad" }, + { tag: tags.labelName, color: "#ff8585" }, + { tag: tags.operator }, + { tag: tags.modifier, color: "#14bfad" }, + { tag: tags.paren }, + { tag: tags.squareBracket }, + { tag: tags.brace }, + { tag: tags.invalid, color: "#ff3d3d" }, + { tag: tags.comment, color: "#9ca3af", fontStyle: "italic" }, +]); diff --git a/web/ui/mantine-ui/src/components/CustomInfiniteScroll.tsx b/web/ui/mantine-ui/src/components/CustomInfiniteScroll.tsx new file mode 100644 index 0000000000..f926c299c5 --- /dev/null +++ b/web/ui/mantine-ui/src/components/CustomInfiniteScroll.tsx @@ -0,0 +1,54 @@ +import { ComponentType, useEffect, useState } from "react"; +import InfiniteScroll from "react-infinite-scroll-component"; + +const initialNumberOfItemsDisplayed = 50; + +export interface InfiniteScrollItemsProps { + items: T[]; +} + +interface CustomInfiniteScrollProps { + allItems: T[]; + child: ComponentType>; +} + +const CustomInfiniteScroll = ({ + allItems, + child, +}: CustomInfiniteScrollProps) => { + const [items, setItems] = useState(allItems.slice(0, 50)); + const [index, setIndex] = useState(initialNumberOfItemsDisplayed); + const [hasMore, setHasMore] = useState( + allItems.length > initialNumberOfItemsDisplayed + ); + const Child = child; + + useEffect(() => { + setItems(allItems.slice(0, initialNumberOfItemsDisplayed)); + setHasMore(allItems.length > initialNumberOfItemsDisplayed); + }, [allItems]); + + const fetchMoreData = () => { + if (items.length === allItems.length) { + setHasMore(false); + } else { + const newIndex = index + initialNumberOfItemsDisplayed; + setIndex(newIndex); + setItems(allItems.slice(0, newIndex)); + } + }; + + return ( + loading...} + dataLength={items.length} + height={items.length > 25 ? "75vh" : ""} + > + + + ); +}; + +export default CustomInfiniteScroll; diff --git a/web/ui/mantine-ui/src/components/EndpointLink.tsx b/web/ui/mantine-ui/src/components/EndpointLink.tsx new file mode 100644 index 0000000000..c9b6a8989c --- /dev/null +++ b/web/ui/mantine-ui/src/components/EndpointLink.tsx @@ -0,0 +1,58 @@ +import { Anchor, Badge, Group, Stack } from "@mantine/core"; +import { FC } from "react"; + +export interface EndpointLinkProps { + endpoint: string; + globalUrl: string; +} + +const EndpointLink: FC = ({ endpoint, globalUrl }) => { + let url: URL; + let search = ""; + let invalidURL = false; + try { + url = new URL(endpoint); + } catch (err: unknown) { + // In cases of IPv6 addresses with a Zone ID, URL may not be parseable. + // See https://github.com/prometheus/prometheus/issues/9760 + // In this case, we attempt to prepare a synthetic URL with the + // same query parameters, for rendering purposes. + invalidURL = true; + if (endpoint.indexOf("?") > -1) { + search = endpoint.substring(endpoint.indexOf("?")); + } + url = new URL("http://0.0.0.0" + search); + } + + const { host, pathname, protocol, searchParams }: URL = url; + const params = Array.from(searchParams.entries()); + const displayLink = invalidURL + ? endpoint.replace(search, "") + : `${protocol}//${host}${pathname}`; + return ( + + + {displayLink} + + {params.length > 0 && ( + + {params.map(([labelName, labelValue]: [string, string]) => { + return ( + + {`${labelName}="${labelValue}"`} + + ); + })} + + )} + + ); +}; + +export default EndpointLink; diff --git a/web/ui/mantine-ui/src/components/ErrorBoundary.tsx b/web/ui/mantine-ui/src/components/ErrorBoundary.tsx new file mode 100644 index 0000000000..6e7e4c0cb5 --- /dev/null +++ b/web/ui/mantine-ui/src/components/ErrorBoundary.tsx @@ -0,0 +1,58 @@ +import { Alert } from "@mantine/core"; +import { IconAlertTriangle } from "@tabler/icons-react"; +import { Component, ErrorInfo, ReactNode } from "react"; +import { useLocation } from "react-router-dom"; + +interface Props { + children?: ReactNode; + title?: string; +} + +interface State { + error: Error | null; +} + +class ErrorBoundary extends Component { + public state: State = { + error: null, + }; + + public static getDerivedStateFromError(error: Error): State { + // Update state so the next render will show the fallback UI. + return { error }; + } + + public componentDidCatch(error: Error, errorInfo: ErrorInfo) { + console.error("Uncaught error:", error, errorInfo); + } + + public render() { + if (this.state.error !== null) { + return ( + } + maw={500} + mx="auto" + mt="lg" + > + Error: {this.state.error.message} + + ); + } + + return this.props.children; + } +} + +const ResettingErrorBoundary = (props: Props) => { + const location = useLocation(); + return ( + + {props.children} + + ); +}; + +export default ResettingErrorBoundary; diff --git a/web/ui/mantine-ui/src/components/InfoPageCard.tsx b/web/ui/mantine-ui/src/components/InfoPageCard.tsx new file mode 100644 index 0000000000..f6797133c4 --- /dev/null +++ b/web/ui/mantine-ui/src/components/InfoPageCard.tsx @@ -0,0 +1,32 @@ +import { Card, Group } from "@mantine/core"; +import { IconProps } from "@tabler/icons-react"; +import { FC, ReactNode } from "react"; +import { infoPageCardTitleIconStyle } from "../styles"; + +const InfoPageCard: FC<{ + children: ReactNode; + title?: string; + icon?: React.ComponentType; +}> = ({ children, title, icon: Icon }) => { + return ( + + {title && ( + + {Icon && } + {title} + + )} + {children} + + ); +}; + +export default InfoPageCard; diff --git a/web/ui/mantine-ui/src/components/InfoPageStack.tsx b/web/ui/mantine-ui/src/components/InfoPageStack.tsx new file mode 100644 index 0000000000..eacc4f8318 --- /dev/null +++ b/web/ui/mantine-ui/src/components/InfoPageStack.tsx @@ -0,0 +1,12 @@ +import { Stack } from "@mantine/core"; +import { FC, ReactNode } from "react"; + +const InfoPageStack: FC<{ children: ReactNode }> = ({ children }) => { + return ( + + {children} + + ); +}; + +export default InfoPageStack; diff --git a/web/ui/mantine-ui/src/components/LabelBadges.tsx b/web/ui/mantine-ui/src/components/LabelBadges.tsx new file mode 100644 index 0000000000..f60a37f032 --- /dev/null +++ b/web/ui/mantine-ui/src/components/LabelBadges.tsx @@ -0,0 +1,38 @@ +import { Badge, BadgeVariant, Group, MantineColor, Stack } from "@mantine/core"; +import { FC } from "react"; +import { escapeString } from "../lib/escapeString"; +import badgeClasses from "../Badge.module.css"; + +export interface LabelBadgesProps { + labels: Record; + variant?: BadgeVariant; + color?: MantineColor; + wrapper?: typeof Group | typeof Stack; +} + +export const LabelBadges: FC = ({ + labels, + variant, + color, + wrapper: Wrapper = Group, +}) => ( + + {Object.entries(labels).map(([k, v]) => { + return ( + + {k}="{escapeString(v)}" + + ); + })} + +); diff --git a/web/ui/mantine-ui/src/components/NotificationsIcon.tsx b/web/ui/mantine-ui/src/components/NotificationsIcon.tsx new file mode 100644 index 0000000000..6d5afa1901 --- /dev/null +++ b/web/ui/mantine-ui/src/components/NotificationsIcon.tsx @@ -0,0 +1,106 @@ +import { + ActionIcon, + Indicator, + Popover, + Card, + Text, + Stack, + ScrollArea, + Group, + rem, +} from "@mantine/core"; +import { + IconAlertTriangle, + IconNetworkOff, + IconMessageExclamation, +} from "@tabler/icons-react"; +import { useNotifications } from "../state/useNotifications"; +import { actionIconStyle } from "../styles"; +import { useSettings } from "../state/settingsSlice"; +import { formatTimestamp } from "../lib/formatTime"; + +const NotificationsIcon = () => { + const { notifications, isConnectionError } = useNotifications(); + const { useLocalTime } = useSettings(); + + return notifications.length === 0 && !isConnectionError ? null : ( + + + + + + + + + + + + Notifications + + + {isConnectionError ? ( + + + + + + Real-time notifications interrupted. + + + Please refresh the page or check your connection. + + + + + ) : notifications.length === 0 ? ( + + No notifications + + ) : ( + notifications.map((notification, index) => ( + + + + + + {notification.text} + + + {formatTimestamp( + new Date(notification.date).valueOf() / 1000, + useLocalTime + )} + + + + + )) + )} + + + + + + ); +}; + +export default NotificationsIcon; diff --git a/web/ui/mantine-ui/src/components/NotificationsProvider.tsx b/web/ui/mantine-ui/src/components/NotificationsProvider.tsx new file mode 100644 index 0000000000..a331e524b0 --- /dev/null +++ b/web/ui/mantine-ui/src/components/NotificationsProvider.tsx @@ -0,0 +1,79 @@ +import React, { useEffect, useState } from 'react'; +import { useSettings } from '../state/settingsSlice'; +import { NotificationsContext } from '../state/useNotifications'; +import { Notification, NotificationsResult } from "../api/responseTypes/notifications"; +import { useAPIQuery } from '../api/api'; +import { fetchEventSource } from '@microsoft/fetch-event-source'; + +export const NotificationsProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { + const { pathPrefix } = useSettings(); + const [notifications, setNotifications] = useState([]); + const [isConnectionError, setIsConnectionError] = useState(false); + const [shouldFetchFromAPI, setShouldFetchFromAPI] = useState(false); + + const { data, isError } = useAPIQuery({ + path: '/notifications', + enabled: shouldFetchFromAPI, + refetchInterval: 10000, + }); + + useEffect(() => { + if (data && data.data) { + setNotifications(data.data); + } + setIsConnectionError(isError); + }, [data, isError]); + + useEffect(() => { + const controller = new AbortController(); + fetchEventSource(`${pathPrefix}/api/v1/notifications/live`, { + signal: controller.signal, + async onopen(response) { + if (response.ok) { + if (response.status === 200) { + setNotifications([]); + setIsConnectionError(false); + } else if (response.status === 204) { + controller.abort(); + setShouldFetchFromAPI(true); + } + } else { + setIsConnectionError(true); + throw new Error(`Unexpected response: ${response.status} ${response.statusText}`); + } + }, + onmessage(event) { + const notification: Notification = JSON.parse(event.data); + + setNotifications((prev: Notification[]) => { + const updatedNotifications = [...prev.filter((n: Notification) => n.text !== notification.text)]; + + if (notification.active) { + updatedNotifications.push(notification); + } + + return updatedNotifications; + }); + }, + onclose() { + throw new Error("Server closed the connection"); + }, + onerror() { + setIsConnectionError(true); + return 5000; + }, + }); + + return () => { + controller.abort(); + }; + }, [pathPrefix]); + + return ( + + {children} + + ); +}; + +export default NotificationsProvider; diff --git a/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx b/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx new file mode 100644 index 0000000000..2e471de5e3 --- /dev/null +++ b/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx @@ -0,0 +1,122 @@ +import { FC, PropsWithChildren, useEffect, useState } from "react"; +import { IconAlertTriangle } from "@tabler/icons-react"; +import { useAppDispatch } from "../state/hooks"; +import { updateSettings, useSettings } from "../state/settingsSlice"; +import { useSuspenseAPIQuery } from "../api/api"; +import { WALReplayStatus } from "../api/responseTypes/walreplay"; +import { Progress, Alert, Stack } from "@mantine/core"; +import { useSuspenseQuery } from "@tanstack/react-query"; + +const STATUS_STARTING = "is starting up..."; +const STATUS_STOPPING = "is shutting down..."; +const STATUS_LOADING = "is not ready..."; + +const ReadinessLoader: FC = () => { + const { pathPrefix, agentMode } = useSettings(); + const dispatch = useAppDispatch(); + + // Query key is incremented every second to retrigger the status fetching. + const [queryKey, setQueryKey] = useState(0); + const [statusMessage, setStatusMessage] = useState(""); + + // Query readiness status. + const { data: ready } = useSuspenseQuery({ + queryKey: ["ready", queryKey], + retry: false, + refetchOnWindowFocus: false, + gcTime: 0, + queryFn: async ({ signal }: { signal: AbortSignal }) => { + try { + const res = await fetch(`${pathPrefix}/-/ready`, { + cache: "no-store", + credentials: "same-origin", + signal, + }); + switch (res.status) { + case 200: + setStatusMessage(""); // Clear any status message when ready. + return true; + case 503: + // Check the custom header `X-Prometheus-Stopping` for stopping information. + if (res.headers.get("X-Prometheus-Stopping") === "true") { + setStatusMessage(STATUS_STOPPING); + } else { + setStatusMessage(STATUS_STARTING); + } + + return false; + default: + throw new Error(res.statusText); + } + } catch (error) { + throw new Error("Unexpected error while fetching ready status"); + } + }, + }); + + // Only call WAL replay status API if the service is starting up. + const shouldQueryWALReplay = statusMessage === STATUS_STARTING; + + const { data: walData, isSuccess: walSuccess } = + useSuspenseAPIQuery({ + path: "/status/walreplay", + key: ["walreplay", queryKey], + enabled: shouldQueryWALReplay, // Only enabled when service is starting up. + }); + + useEffect(() => { + if (ready) { + dispatch(updateSettings({ ready: ready })); + } + }, [ready, dispatch]); + + useEffect(() => { + const interval = setInterval(() => setQueryKey((v) => v + 1), 1000); + return () => clearInterval(interval); + }, []); + + return ( + } + maw={500} + mx="auto" + mt="lg" + > + {shouldQueryWALReplay && walSuccess && walData && ( + + + Replaying WAL ({walData.data.current}/{walData.data.max}) + + + + )} + + ); +}; + +export const ReadinessWrapper: FC = ({ children }) => { + const { ready } = useSettings(); + + if (ready) { + return <>{children}; + } + + return ; +}; + +export default ReadinessWrapper; diff --git a/web/ui/mantine-ui/src/components/RuleDefinition.module.css b/web/ui/mantine-ui/src/components/RuleDefinition.module.css new file mode 100644 index 0000000000..15a8d4a420 --- /dev/null +++ b/web/ui/mantine-ui/src/components/RuleDefinition.module.css @@ -0,0 +1,15 @@ +.codebox { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-gray-9) + ); +} + +.queryButton { + opacity: 0; + transition: opacity 0.1s ease-in-out; +} + +.codebox:hover .queryButton { + opacity: 1; +} diff --git a/web/ui/mantine-ui/src/components/RuleDefinition.tsx b/web/ui/mantine-ui/src/components/RuleDefinition.tsx new file mode 100644 index 0000000000..f3c4c88485 --- /dev/null +++ b/web/ui/mantine-ui/src/components/RuleDefinition.tsx @@ -0,0 +1,116 @@ +import { + ActionIcon, + Badge, + Box, + Card, + Group, + Table, + Tooltip, + useComputedColorScheme, +} from "@mantine/core"; +import { IconClockPause, IconClockPlay, IconSearch } from "@tabler/icons-react"; +import { FC } from "react"; +import { formatPrometheusDuration } from "../lib/formatTime"; +import codeboxClasses from "./RuleDefinition.module.css"; +import { Rule } from "../api/responseTypes/rules"; +import CodeMirror, { EditorView } from "@uiw/react-codemirror"; +import { syntaxHighlighting } from "@codemirror/language"; +import { + baseTheme, + darkPromqlHighlighter, + lightTheme, + promqlHighlighter, +} from "../codemirror/theme"; +import { PromQLExtension } from "@prometheus-io/codemirror-promql"; +import { LabelBadges } from "./LabelBadges"; +import { useSettings } from "../state/settingsSlice"; +import { actionIconStyle, badgeIconStyle } from "../styles"; + +const promqlExtension = new PromQLExtension(); + +const RuleDefinition: FC<{ rule: Rule }> = ({ rule }) => { + const theme = useComputedColorScheme(); + const { pathPrefix } = useSettings(); + + return ( + <> + + + + + { + window.open( + `${pathPrefix}/query?g0.expr=${encodeURIComponent(rule.query)}&g0.tab=1`, + "_blank" + ); + }} + className={codeboxClasses.queryButton} + > + + + + + {rule.type === "alerting" && ( + + {rule.duration && ( + } + > + for: {formatPrometheusDuration(rule.duration * 1000)} + + )} + {rule.keepFiringFor && ( + } + > + keep_firing_for: {formatPrometheusDuration(rule.duration * 1000)} + + )} + + )} + {rule.labels && Object.keys(rule.labels).length > 0 && ( + + + + )} + {rule.type === "alerting" && Object.keys(rule.annotations).length > 0 && ( + + + {Object.entries(rule.annotations).map(([k, v]) => ( + + + {k} + + {v} + + ))} + +
+ )} + + ); +}; + +export default RuleDefinition; diff --git a/web/ui/mantine-ui/src/components/SettingsMenu.tsx b/web/ui/mantine-ui/src/components/SettingsMenu.tsx new file mode 100644 index 0000000000..ada252d57c --- /dev/null +++ b/web/ui/mantine-ui/src/components/SettingsMenu.tsx @@ -0,0 +1,113 @@ +import { Popover, ActionIcon, Fieldset, Checkbox, Stack } from "@mantine/core"; +import { IconSettings } from "@tabler/icons-react"; +import { FC } from "react"; +import { useAppDispatch } from "../state/hooks"; +import { updateSettings, useSettings } from "../state/settingsSlice"; +import { actionIconStyle } from "../styles"; + +const SettingsMenu: FC = () => { + const { + useLocalTime, + enableQueryHistory, + enableAutocomplete, + enableSyntaxHighlighting, + enableLinter, + showAnnotations, + } = useSettings(); + const dispatch = useAppDispatch(); + + return ( + + + + + + + + +
+ + dispatch( + updateSettings({ useLocalTime: event.currentTarget.checked }) + ) + } + /> +
+ +
+ + + dispatch( + updateSettings({ + enableQueryHistory: event.currentTarget.checked, + }) + ) + } + /> + + dispatch( + updateSettings({ + enableAutocomplete: event.currentTarget.checked, + }) + ) + } + /> + + dispatch( + updateSettings({ + enableSyntaxHighlighting: event.currentTarget.checked, + }) + ) + } + /> + + dispatch( + updateSettings({ + enableLinter: event.currentTarget.checked, + }) + ) + } + /> + +
+ +
+ + dispatch( + updateSettings({ + showAnnotations: event.currentTarget.checked, + }) + ) + } + /> +
+
+
+
+ ); +}; + +export default SettingsMenu; diff --git a/web/ui/mantine-ui/src/components/StateMultiSelect.tsx b/web/ui/mantine-ui/src/components/StateMultiSelect.tsx new file mode 100644 index 0000000000..4621c5b9e4 --- /dev/null +++ b/web/ui/mantine-ui/src/components/StateMultiSelect.tsx @@ -0,0 +1,143 @@ +import { FC } from "react"; +import { + CheckIcon, + Combobox, + ComboboxChevron, + ComboboxClearButton, + Group, + Pill, + PillsInput, + useCombobox, +} from "@mantine/core"; +import { IconHeartRateMonitor } from "@tabler/icons-react"; +import { inputIconStyle } from "../styles"; + +interface StatePillProps extends React.ComponentPropsWithoutRef<"div"> { + value: string; + onRemove?: () => void; +} + +export function StatePill({ value, onRemove, ...others }: StatePillProps) { + return ( + + {value} + + ); +} + +interface StateMultiSelectProps { + options: string[]; + optionClass: (option: string) => string; + optionCount?: (option: string) => number; + placeholder: string; + values: string[]; + onChange: (values: string[]) => void; +} + +export const StateMultiSelect: FC = ({ + options, + optionClass, + optionCount, + placeholder, + values, + onChange, +}) => { + const combobox = useCombobox({ + onDropdownClose: () => combobox.resetSelectedOption(), + onDropdownOpen: () => combobox.updateSelectedOptionIndex("active"), + }); + + const handleValueSelect = (val: string) => + onChange( + values.includes(val) ? values.filter((v) => v !== val) : [...values, val] + ); + + const handleValueRemove = (val: string) => + onChange(values.filter((v) => v !== val)); + + const renderedValues = values.map((item) => ( + handleValueRemove(item)} + key={item} + /> + )); + + return ( + + + combobox.toggleDropdown()} + miw={200} + leftSection={} + rightSection={ + values.length > 0 ? ( + onChange([])} /> + ) : ( + + ) + } + > + + {renderedValues.length > 0 ? ( + renderedValues + ) : ( + + )} + + + combobox.closeDropdown()} + onKeyDown={(event) => { + if (event.key === "Backspace") { + event.preventDefault(); + handleValueRemove(values[values.length - 1]); + } + }} + /> + + + + + + + + {options.map((value) => { + return ( + + + {values.includes(value) ? ( + + ) : null} + + + + ); + })} + + + + ); +}; diff --git a/web/ui/mantine-ui/src/components/ThemeSelector.tsx b/web/ui/mantine-ui/src/components/ThemeSelector.tsx new file mode 100644 index 0000000000..a7f5d1dcaa --- /dev/null +++ b/web/ui/mantine-ui/src/components/ThemeSelector.tsx @@ -0,0 +1,64 @@ +import { + useMantineColorScheme, + SegmentedControl, + rem, + MantineColorScheme, + Tooltip, +} from "@mantine/core"; +import { + IconMoonFilled, + IconSunFilled, + IconUserFilled, +} from "@tabler/icons-react"; +import { FC } from "react"; + +export const ThemeSelector: FC = () => { + const { colorScheme, setColorScheme } = useMantineColorScheme(); + const iconProps = { + style: { width: rem(20), height: rem(20), display: "block" }, + stroke: 1.5, + }; + + return ( + setColorScheme(v as MantineColorScheme)} + data={[ + { + value: "light", + label: ( + + + + ), + }, + { + value: "dark", + label: ( + + + + ), + }, + { + value: "auto", + label: ( + + + + ), + }, + ]} + /> + ); +}; diff --git a/web/ui/mantine-ui/src/fonts/codicon.ttf b/web/ui/mantine-ui/src/fonts/codicon.ttf new file mode 100644 index 0000000000..82acc8995b Binary files /dev/null and b/web/ui/mantine-ui/src/fonts/codicon.ttf differ diff --git a/web/ui/mantine-ui/src/images/prometheus-logo.svg b/web/ui/mantine-ui/src/images/prometheus-logo.svg new file mode 100644 index 0000000000..b914095ecf --- /dev/null +++ b/web/ui/mantine-ui/src/images/prometheus-logo.svg @@ -0,0 +1,19 @@ + + + + + + + + + + diff --git a/web/ui/mantine-ui/src/lib/escapeString.ts b/web/ui/mantine-ui/src/lib/escapeString.ts new file mode 100644 index 0000000000..5fc1955de8 --- /dev/null +++ b/web/ui/mantine-ui/src/lib/escapeString.ts @@ -0,0 +1,4 @@ +// Used for escaping escape sequences and double quotes in double-quoted strings. +export const escapeString = (str: string) => { + return str.replace(/([\\"])/g, "\\$1"); +}; diff --git a/web/ui/mantine-ui/src/lib/formatFloatValue.ts b/web/ui/mantine-ui/src/lib/formatFloatValue.ts new file mode 100644 index 0000000000..6b45e626f3 --- /dev/null +++ b/web/ui/mantine-ui/src/lib/formatFloatValue.ts @@ -0,0 +1,21 @@ +export const parsePrometheusFloat = (str: string): number => { + switch (str) { + case "+Inf": + return Infinity; + case "-Inf": + return -Infinity; + default: + return parseFloat(str); + } +}; + +export const formatPrometheusFloat = (num: number): string => { + switch (num) { + case Infinity: + return "+Inf"; + case -Infinity: + return "-Inf"; + default: + return num.toString(); + } +}; diff --git a/web/ui/mantine-ui/src/lib/formatSeries.ts b/web/ui/mantine-ui/src/lib/formatSeries.ts new file mode 100644 index 0000000000..b79c40076d --- /dev/null +++ b/web/ui/mantine-ui/src/lib/formatSeries.ts @@ -0,0 +1,12 @@ +import { escapeString } from "./escapeString"; + +export const formatSeries = (labels: { [key: string]: string }): string => { + if (labels === null) { + return "scalar"; + } + + return `${labels.__name__ || ""}{${Object.entries(labels) + .filter(([k]) => k !== "__name__") + .map(([k, v]) => `${k}="${escapeString(v)}"`) + .join(", ")}}`; +}; diff --git a/web/ui/mantine-ui/src/lib/formatTime.test.ts b/web/ui/mantine-ui/src/lib/formatTime.test.ts new file mode 100644 index 0000000000..597d6909b9 --- /dev/null +++ b/web/ui/mantine-ui/src/lib/formatTime.test.ts @@ -0,0 +1,110 @@ +import { humanizeDuration, formatPrometheusDuration } from "./formatTime"; + +describe("formatPrometheusDuration", () => { + test('returns "0s" for 0 milliseconds', () => { + expect(formatPrometheusDuration(0)).toBe("0s"); + }); + + test("formats milliseconds correctly", () => { + expect(formatPrometheusDuration(1)).toBe("1ms"); + expect(formatPrometheusDuration(999)).toBe("999ms"); + }); + + test("formats seconds correctly", () => { + expect(formatPrometheusDuration(1000)).toBe("1s"); + expect(formatPrometheusDuration(1500)).toBe("1s500ms"); + expect(formatPrometheusDuration(59999)).toBe("59s999ms"); + }); + + test("formats minutes correctly", () => { + expect(formatPrometheusDuration(60000)).toBe("1m"); + expect(formatPrometheusDuration(120000)).toBe("2m"); + expect(formatPrometheusDuration(3599999)).toBe("59m59s999ms"); + }); + + test("formats hours correctly", () => { + expect(formatPrometheusDuration(3600000)).toBe("1h"); + expect(formatPrometheusDuration(7200000)).toBe("2h"); + expect(formatPrometheusDuration(86399999)).toBe("23h59m59s999ms"); + }); + + test("formats days correctly", () => { + expect(formatPrometheusDuration(86400000)).toBe("1d"); + expect(formatPrometheusDuration(172800000)).toBe("2d"); + expect(formatPrometheusDuration(86400000 * 365 - 1)).toBe( + "364d23h59m59s999ms" + ); + }); + + test("handles negative durations", () => { + expect(formatPrometheusDuration(-1000)).toBe("-1s"); + expect(formatPrometheusDuration(-86400000)).toBe("-1d"); + }); + + test("combines multiple units correctly", () => { + expect( + formatPrometheusDuration(86400000 + 3600000 + 60000 + 1000 + 1) + ).toBe("1d1h1m1s1ms"); + }); + + test("omits zero values", () => { + expect(formatPrometheusDuration(86400000 + 1000)).toBe("1d1s"); + }); +}); + +describe("humanizeDuration", () => { + test('returns "0s" for 0 milliseconds', () => { + expect(humanizeDuration(0)).toBe("0s"); + }); + + test("formats submilliseconds correctly", () => { + expect(humanizeDuration(0.1)).toBe("0ms"); + expect(humanizeDuration(0.6)).toBe("1ms"); + expect(humanizeDuration(0.000001)).toBe("0ms"); + }); + + test("formats milliseconds correctly", () => { + expect(humanizeDuration(1)).toBe("1ms"); + expect(humanizeDuration(999)).toBe("999ms"); + }); + + test("formats seconds correctly", () => { + expect(humanizeDuration(1000)).toBe("1s"); + expect(humanizeDuration(1500)).toBe("1.5s"); + expect(humanizeDuration(59999)).toBe("59.999s"); + }); + + test("formats minutes correctly", () => { + expect(humanizeDuration(60000)).toBe("1m"); + expect(humanizeDuration(120000)).toBe("2m"); + expect(humanizeDuration(3599999)).toBe("59m 59.999s"); + }); + + test("formats hours correctly", () => { + expect(humanizeDuration(3600000)).toBe("1h"); + expect(humanizeDuration(7200000)).toBe("2h"); + expect(humanizeDuration(86399999)).toBe("23h 59m 59.999s"); + }); + + test("formats days correctly", () => { + expect(humanizeDuration(86400000)).toBe("1d"); + expect(humanizeDuration(172800000)).toBe("2d"); + expect(humanizeDuration(86400000 * 365 - 1)).toBe("364d 23h 59m 59.999s"); + expect(humanizeDuration(86400000 * 365 - 1)).toBe("364d 23h 59m 59.999s"); + }); + + test("handles negative durations", () => { + expect(humanizeDuration(-1000)).toBe("-1s"); + expect(humanizeDuration(-86400000)).toBe("-1d"); + }); + + test("combines multiple units correctly", () => { + expect(humanizeDuration(86400000 + 3600000 + 60000 + 1000 + 1)).toBe( + "1d 1h 1m 1.001s" + ); + }); + + test("omits zero values", () => { + expect(humanizeDuration(86400000 + 1000)).toBe("1d 1s"); + }); +}); diff --git a/web/ui/mantine-ui/src/lib/formatTime.ts b/web/ui/mantine-ui/src/lib/formatTime.ts new file mode 100644 index 0000000000..95beb21d6b --- /dev/null +++ b/web/ui/mantine-ui/src/lib/formatTime.ts @@ -0,0 +1,127 @@ +import dayjs from "dayjs"; +import duration from "dayjs/plugin/duration"; +dayjs.extend(duration); +import utc from "dayjs/plugin/utc"; +dayjs.extend(utc); + +// Parse Prometheus-specific duration strings such as "5m" or "1d2h3m4s" into milliseconds. +export const parsePrometheusDuration = (durationStr: string): number | null => { + if (durationStr === "") { + return null; + } + if (durationStr === "0") { + // Allow 0 without a unit. + return 0; + } + + const durationRE = new RegExp( + "^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$" + ); + const matches = durationStr.match(durationRE); + if (!matches) { + return null; + } + + let dur = 0; + + // Parse the match at pos `pos` in the regex and use `mult` to turn that + // into ms, then add that value to the total parsed duration. + const m = (pos: number, mult: number) => { + if (matches[pos] === undefined) { + return; + } + const n = parseInt(matches[pos]); + dur += n * mult; + }; + + m(2, 1000 * 60 * 60 * 24 * 365); // y + m(4, 1000 * 60 * 60 * 24 * 7); // w + m(6, 1000 * 60 * 60 * 24); // d + m(8, 1000 * 60 * 60); // h + m(10, 1000 * 60); // m + m(12, 1000); // s + m(14, 1); // ms + + return dur; +}; + +// Used by: +// - formatPrometheusDuration() => "5d5m2s123ms" +// - humanizeDuration() => "5d 5m 2.123s" +const formatDuration = ( + d: number, + componentSeparator?: string, + showFractionalSeconds?: boolean +): string => { + if (d === 0) { + return "0s"; + } + + const sign = d < 0 ? "-" : ""; + let ms = Math.abs(d); + const r: string[] = []; + + for (const { unit, mult, exact } of [ + // Only format years and weeks if the remainder is zero, as it is often + // easier to read 90d than 12w6d. + { unit: "y", mult: 1000 * 60 * 60 * 24 * 365, exact: true }, + { unit: "w", mult: 1000 * 60 * 60 * 24 * 7, exact: true }, + { unit: "d", mult: 1000 * 60 * 60 * 24, exact: false }, + { unit: "h", mult: 1000 * 60 * 60, exact: false }, + { unit: "m", mult: 1000 * 60, exact: false }, + { unit: "s", mult: 1000, exact: false }, + { unit: "ms", mult: 1, exact: false }, + ]) { + if (exact && ms % mult !== 0) { + continue; + } + const v = Math.floor(ms / mult); + if (v > 0) { + ms -= v * mult; + if (showFractionalSeconds && unit === "s" && ms > 0) { + // Show "2.34s" instead of "2s 340ms". + r.push(`${parseFloat((v + ms / 1000).toFixed(3))}s`); + break; + } else { + r.push(`${v}${unit}`); + } + } + if (r.length == 0 && unit == "ms") { + r.push(`${Math.round(ms)}ms`) + } + } + + return sign + r.join(componentSeparator || ""); +}; + +// Format a duration in milliseconds into a Prometheus duration string like "1d2h3m4s". +export const formatPrometheusDuration = (d: number): string => { + return formatDuration(d); +}; + +export function parseTime(timeText: string): number { + return dayjs.utc(timeText).valueOf(); +} + +export const now = (): number => dayjs().valueOf(); + +export const humanizeDuration = (milliseconds: number): string => { + return formatDuration(milliseconds, " ", true); +}; + +export const humanizeDurationRelative = ( + startStr: string, + end: number, + suffix: string = " ago" +): string => { + const start = parseTime(startStr); + if (start < 0) { + return "never"; + } + return humanizeDuration(end - start) + suffix; +}; + +export const formatTimestamp = (t: number, useLocalTime: boolean) => + useLocalTime + ? dayjs.unix(t).tz(dayjs.tz.guess()).format() + : dayjs.unix(t).utc().format(); diff --git a/web/ui/mantine-ui/src/main.tsx b/web/ui/mantine-ui/src/main.tsx new file mode 100644 index 0000000000..71b3cf937f --- /dev/null +++ b/web/ui/mantine-ui/src/main.tsx @@ -0,0 +1,15 @@ +import React from "react"; +import ReactDOM from "react-dom/client"; +import App from "./App.tsx"; +import store from "./state/store.ts"; +import { Provider } from "react-redux"; +import "./fonts/codicon.ttf"; +import "./promql.css"; + +ReactDOM.createRoot(document.getElementById("root")!).render( + + + + + +); diff --git a/web/ui/mantine-ui/src/mantine-overrides.css b/web/ui/mantine-ui/src/mantine-overrides.css new file mode 100644 index 0000000000..b9662f8d52 --- /dev/null +++ b/web/ui/mantine-ui/src/mantine-overrides.css @@ -0,0 +1,4 @@ +.mantine-Badge-label { + overflow: unset; + text-overflow: unset; +} diff --git a/web/ui/mantine-ui/src/pages/AgentPage.tsx b/web/ui/mantine-ui/src/pages/AgentPage.tsx new file mode 100644 index 0000000000..d11af49582 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/AgentPage.tsx @@ -0,0 +1,28 @@ +import { Text } from "@mantine/core"; +import { IconSpy } from "@tabler/icons-react"; +import { FC } from "react"; +import InfoPageStack from "../components/InfoPageStack"; +import InfoPageCard from "../components/InfoPageCard"; + +const AgentPage: FC = () => { + return ( + + + + This Prometheus instance is running in agent mode. In + this mode, Prometheus is only used to scrape discovered targets and + forward the scraped metrics to remote write endpoints. + + + Some features are not available in this mode, such as querying and + alerting. + + + + ); +}; + +export default AgentPage; diff --git a/web/ui/mantine-ui/src/pages/AlertmanagerDiscoveryPage.tsx b/web/ui/mantine-ui/src/pages/AlertmanagerDiscoveryPage.tsx new file mode 100644 index 0000000000..d1ffb2f5fe --- /dev/null +++ b/web/ui/mantine-ui/src/pages/AlertmanagerDiscoveryPage.tsx @@ -0,0 +1,70 @@ +import { Alert, Table } from "@mantine/core"; +import { IconBell, IconBellOff, IconInfoCircle } from "@tabler/icons-react"; + +import { useSuspenseAPIQuery } from "../api/api"; +import { AlertmanagersResult } from "../api/responseTypes/alertmanagers"; +import EndpointLink from "../components/EndpointLink"; +import InfoPageCard from "../components/InfoPageCard"; +import InfoPageStack from "../components/InfoPageStack"; + +export const targetPoolDisplayLimit = 20; + +export default function AlertmanagerDiscoveryPage() { + // Load the list of all available scrape pools. + const { + data: { + data: { activeAlertmanagers, droppedAlertmanagers }, + }, + } = useSuspenseAPIQuery({ + path: `/alertmanagers`, + }); + + return ( + + + {activeAlertmanagers.length === 0 ? ( + }> + No active alertmanagers found. + + ) : ( + + + {activeAlertmanagers.map((alertmanager) => ( + + + + + + ))} + +
+ )} +
+ + {droppedAlertmanagers.length === 0 ? ( + }> + No dropped alertmanagers found. + + ) : ( + + + {droppedAlertmanagers.map((alertmanager) => ( + + + + + + ))} + +
+ )} +
+
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/AlertsPage.tsx b/web/ui/mantine-ui/src/pages/AlertsPage.tsx new file mode 100644 index 0000000000..1513f73d58 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/AlertsPage.tsx @@ -0,0 +1,414 @@ +import { + Card, + Group, + Table, + Text, + Accordion, + Badge, + Tooltip, + Box, + Stack, + Alert, + TextInput, + Anchor, +} from "@mantine/core"; +import { useSuspenseAPIQuery } from "../api/api"; +import { AlertingRule, AlertingRulesResult } from "../api/responseTypes/rules"; +import badgeClasses from "../Badge.module.css"; +import panelClasses from "../Panel.module.css"; +import RuleDefinition from "../components/RuleDefinition"; +import { humanizeDurationRelative, now } from "../lib/formatTime"; +import { Fragment, useMemo } from "react"; +import { StateMultiSelect } from "../components/StateMultiSelect"; +import { IconInfoCircle, IconSearch } from "@tabler/icons-react"; +import { LabelBadges } from "../components/LabelBadges"; +import { useSettings } from "../state/settingsSlice"; +import { + ArrayParam, + BooleanParam, + StringParam, + useQueryParam, + withDefault, +} from "use-query-params"; +import { useDebouncedValue } from "@mantine/hooks"; +import { KVSearch } from "@nexucis/kvsearch"; +import { inputIconStyle } from "../styles"; + +type AlertsPageData = { + // How many rules are in each state across all groups. + globalCounts: { + inactive: number; + pending: number; + firing: number; + }; + groups: { + name: string; + file: string; + // How many rules are in each state for this group. + counts: { + total: number; + inactive: number; + pending: number; + firing: number; + }; + rules: { + rule: AlertingRule; + // How many alerts are in each state for this rule. + counts: { + firing: number; + pending: number; + }; + }[]; + }[]; +}; + +const kvSearch = new KVSearch({ + shouldSort: true, + indexedKeys: ["name", "labels", ["labels", /.*/]], +}); + +const buildAlertsPageData = ( + data: AlertingRulesResult, + search: string, + stateFilter: (string | null)[] +) => { + const pageData: AlertsPageData = { + globalCounts: { + inactive: 0, + pending: 0, + firing: 0, + }, + groups: [], + }; + + for (const group of data.groups) { + const groupCounts = { + total: 0, + inactive: 0, + pending: 0, + firing: 0, + }; + + for (const r of group.rules) { + groupCounts.total++; + switch (r.state) { + case "inactive": + pageData.globalCounts.inactive++; + groupCounts.inactive++; + break; + case "firing": + pageData.globalCounts.firing++; + groupCounts.firing++; + break; + case "pending": + pageData.globalCounts.pending++; + groupCounts.pending++; + break; + default: + throw new Error(`Unknown rule state: ${r.state}`); + } + } + + const filteredRules: AlertingRule[] = ( + search === "" + ? group.rules + : kvSearch.filter(search, group.rules).map((value) => value.original) + ).filter((r) => stateFilter.length === 0 || stateFilter.includes(r.state)); + + pageData.groups.push({ + name: group.name, + file: group.file, + counts: groupCounts, + rules: filteredRules.map((r) => ({ + rule: r, + counts: { + firing: r.alerts.filter((a) => a.state === "firing").length, + pending: r.alerts.filter((a) => a.state === "pending").length, + }, + })), + }); + } + + return pageData; +}; + +export default function AlertsPage() { + // Fetch the alerting rules data. + const { data } = useSuspenseAPIQuery({ + path: `/rules`, + params: { + type: "alert", + }, + }); + + const { showAnnotations } = useSettings(); + + // Define URL query params. + const [stateFilter, setStateFilter] = useQueryParam( + "state", + withDefault(ArrayParam, []) + ); + const [searchFilter, setSearchFilter] = useQueryParam( + "search", + withDefault(StringParam, "") + ); + const [debouncedSearch] = useDebouncedValue(searchFilter.trim(), 250); + const [showEmptyGroups, setShowEmptyGroups] = useQueryParam( + "showEmptyGroups", + withDefault(BooleanParam, true) + ); + + // Update the page data whenever the fetched data or filters change. + const alertsPageData: AlertsPageData = useMemo( + () => buildAlertsPageData(data.data, debouncedSearch, stateFilter), + [data, stateFilter, debouncedSearch] + ); + + const shownGroups = showEmptyGroups + ? alertsPageData.groups + : alertsPageData.groups.filter((g) => g.rules.length > 0); + + return ( + + + + o === "inactive" + ? badgeClasses.healthOk + : o === "pending" + ? badgeClasses.healthWarn + : badgeClasses.healthErr + } + optionCount={(o) => + alertsPageData.globalCounts[ + o as keyof typeof alertsPageData.globalCounts + ] + } + placeholder="Filter by rule state" + values={(stateFilter?.filter((v) => v !== null) as string[]) || []} + onChange={(values) => setStateFilter(values)} + /> + } + placeholder="Filter by rule name or labels" + value={searchFilter || ""} + onChange={(event) => + setSearchFilter(event.currentTarget.value || null) + } + > + + {alertsPageData.groups.length === 0 ? ( + }> + No rules found. + + ) : ( + !showEmptyGroups && + alertsPageData.groups.length !== shownGroups.length && ( + } + > + Hiding {alertsPageData.groups.length - shownGroups.length} empty + groups due to filters or no rules. + setShowEmptyGroups(true)}> + Show empty groups + + + ) + )} + + {shownGroups.map((g, i) => { + return ( + + + + + {g.name} + + + {g.file} + + + + {g.counts.firing > 0 && ( + + firing ({g.counts.firing}) + + )} + {g.counts.pending > 0 && ( + + pending ({g.counts.pending}) + + )} + {g.counts.inactive > 0 && ( + + inactive ({g.counts.inactive}) + + )} + + + {g.counts.total === 0 ? ( + }> + No rules in this group. + setShowEmptyGroups(false)} + > + Hide empty groups + + + ) : g.rules.length === 0 ? ( + }> + No rules in this group match your filter criteria (omitted{" "} + {g.counts.total} filtered rules). + setShowEmptyGroups(false)} + > + Hide empty groups + + + ) : ( + + {g.rules.map((r, j) => { + return ( + 0 + ? panelClasses.panelHealthErr + : r.counts.pending > 0 + ? panelClasses.panelHealthWarn + : panelClasses.panelHealthOk + } + > + + + {r.rule.name} + + {r.counts.firing > 0 && ( + + firing ({r.counts.firing}) + + )} + {r.counts.pending > 0 && ( + + pending ({r.counts.pending}) + + )} + + + + + + {r.rule.alerts.length > 0 && ( + + + + Alert labels + State + Active Since + Value + + + + {r.rule.type === "alerting" && + r.rule.alerts.map((a, k) => ( + + + + + + + + {a.state} + + + + + + {humanizeDurationRelative( + a.activeAt, + now(), + "" + )} + + + + + {isNaN(Number(a.value)) + ? a.value + : Number(a.value)} + + + {showAnnotations && ( + + +
+ + {Object.entries( + a.annotations + ).map(([k, v]) => ( + + + {k} + + {v} + + ))} + +
+ + + )} + + ))} + + + )} +
+
+ ); + })} +
+ )} +
+ ); + })} +
+
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/ConfigPage.tsx b/web/ui/mantine-ui/src/pages/ConfigPage.tsx new file mode 100644 index 0000000000..a8419a8db4 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/ConfigPage.tsx @@ -0,0 +1,23 @@ +import { CodeHighlight } from "@mantine/code-highlight"; +import { useSuspenseAPIQuery } from "../api/api"; +import ConfigResult from "../api/responseTypes/config"; + +export default function ConfigPage() { + const { + data: { + data: { yaml }, + }, + } = useSuspenseAPIQuery({ path: `/status/config` }); + + return ( + + ); +} diff --git a/web/ui/mantine-ui/src/pages/FlagsPage.module.css b/web/ui/mantine-ui/src/pages/FlagsPage.module.css new file mode 100644 index 0000000000..221b2de028 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/FlagsPage.module.css @@ -0,0 +1,21 @@ +.th { + padding: 0; +} + +.control { + width: 100%; + padding: var(--mantine-spacing-xs) var(--mantine-spacing-md); + + @mixin hover { + background-color: light-dark( + var(--mantine-color-gray-0), + var(--mantine-color-dark-6) + ); + } +} + +.icon { + width: rem(21px); + height: rem(21px); + border-radius: rem(21px); +} diff --git a/web/ui/mantine-ui/src/pages/FlagsPage.tsx b/web/ui/mantine-ui/src/pages/FlagsPage.tsx new file mode 100644 index 0000000000..856ab9a40d --- /dev/null +++ b/web/ui/mantine-ui/src/pages/FlagsPage.tsx @@ -0,0 +1,181 @@ +import { useState } from "react"; +import { + Table, + UnstyledButton, + Group, + Text, + Center, + TextInput, + rem, + keys, +} from "@mantine/core"; +import { + IconSelector, + IconChevronDown, + IconChevronUp, + IconSearch, +} from "@tabler/icons-react"; +import classes from "./FlagsPage.module.css"; +import { useSuspenseAPIQuery } from "../api/api"; +import InfoPageStack from "../components/InfoPageStack"; +import InfoPageCard from "../components/InfoPageCard"; +import { inputIconStyle } from "../styles"; + +interface RowData { + flag: string; + value: string; +} + +interface ThProps { + children: React.ReactNode; + reversed: boolean; + sorted: boolean; + onSort(): void; +} + +function Th({ children, reversed, sorted, onSort }: ThProps) { + const Icon = sorted + ? reversed + ? IconChevronUp + : IconChevronDown + : IconSelector; + return ( + + + + + {children} + +
+ +
+
+
+
+ ); +} + +function filterData(data: RowData[], search: string) { + const query = search.toLowerCase().trim(); + return data.filter((item) => + keys(data[0]).some((key) => item[key].toLowerCase().includes(query)) + ); +} + +function sortData( + data: RowData[], + payload: { sortBy: keyof RowData | null; reversed: boolean; search: string } +) { + const { sortBy } = payload; + + if (!sortBy) { + return filterData(data, payload.search); + } + + return filterData( + [...data].sort((a, b) => { + if (payload.reversed) { + return b[sortBy].localeCompare(a[sortBy]); + } + + return a[sortBy].localeCompare(b[sortBy]); + }), + payload.search + ); +} + +export default function FlagsPage() { + const { data } = useSuspenseAPIQuery>({ + path: `/status/flags`, + }); + + const flags = Object.entries(data.data).map(([flag, value]) => ({ + flag, + value, + })); + + const [search, setSearch] = useState(""); + const [sortedData, setSortedData] = useState(flags); + const [sortBy, setSortBy] = useState(null); + const [reverseSortDirection, setReverseSortDirection] = useState(false); + + const setSorting = (field: keyof RowData) => { + const reversed = field === sortBy ? !reverseSortDirection : false; + setReverseSortDirection(reversed); + setSortBy(field); + setSortedData(sortData(flags, { sortBy: field, reversed, search })); + }; + + const handleSearchChange = (event: React.ChangeEvent) => { + const { value } = event.currentTarget; + setSearch(value); + setSortedData( + sortData(flags, { sortBy, reversed: reverseSortDirection, search: value }) + ); + }; + + const rows = sortedData.map((row) => ( + + + --{row.flag} + + + {row.value} + + + )); + + return ( + + + } + value={search} + onChange={handleSearchChange} + /> + + + + + + + + + + {rows.length > 0 ? ( + rows + ) : ( + + + + Nothing found + + + + )} + +
setSorting("flag")} + > + Flag + setSorting("value")} + > + Value +
+
+
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/RulesPage.tsx b/web/ui/mantine-ui/src/pages/RulesPage.tsx new file mode 100644 index 0000000000..a4ed44e7c2 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/RulesPage.tsx @@ -0,0 +1,209 @@ +import { + Accordion, + Alert, + Badge, + Card, + Group, + rem, + Stack, + Text, + Tooltip, +} from "@mantine/core"; +// import { useQuery } from "react-query"; +import { + humanizeDurationRelative, + humanizeDuration, + now, +} from "../lib/formatTime"; +import { + IconAlertTriangle, + IconBell, + IconHourglass, + IconInfoCircle, + IconRefresh, + IconRepeat, + IconTimeline, +} from "@tabler/icons-react"; +import { useSuspenseAPIQuery } from "../api/api"; +import { RulesResult } from "../api/responseTypes/rules"; +import badgeClasses from "../Badge.module.css"; +import RuleDefinition from "../components/RuleDefinition"; +import { badgeIconStyle } from "../styles"; + +const healthBadgeClass = (state: string) => { + switch (state) { + case "ok": + return badgeClasses.healthOk; + case "err": + return badgeClasses.healthErr; + case "unknown": + return badgeClasses.healthUnknown; + default: + throw new Error("Unknown rule health state"); + } +}; + +export default function RulesPage() { + const { data } = useSuspenseAPIQuery({ path: `/rules` }); + + return ( + + {data.data.groups.length === 0 && ( + }> + No rule groups configured. + + )} + {data.data.groups.map((g, i) => ( + + + + + {g.name} + + + {g.file} + + + + + } + > + last run {humanizeDurationRelative(g.lastEvaluation, now())} + + + + } + > + took {humanizeDuration(parseFloat(g.evaluationTime) * 1000)} + + + + } + > + every {humanizeDuration(parseFloat(g.interval) * 1000)}{" "} + + + + + {g.rules.length === 0 && ( + }> + No rules in rule group. + + )} + + {g.rules.map((r, j) => ( + + + + + {r.type === "alerting" ? ( + + + + ) : ( + + + + )} + {r.name} + + + + + } + > + {humanizeDurationRelative(r.lastEvaluation, now())} + + + + + + } + > + {humanizeDuration( + parseFloat(r.evaluationTime) * 1000 + )} + + + + + {r.health} + + + + + + + {r.lastError && ( + } + > + Error: {r.lastError} + + )} + + + ))} + + + ))} + + ); +} diff --git a/web/ui/mantine-ui/src/pages/StatusPage.tsx b/web/ui/mantine-ui/src/pages/StatusPage.tsx new file mode 100644 index 0000000000..71dc476a2d --- /dev/null +++ b/web/ui/mantine-ui/src/pages/StatusPage.tsx @@ -0,0 +1,80 @@ +import { Table } from "@mantine/core"; +import { useSuspenseAPIQuery } from "../api/api"; +import { IconRun, IconWall } from "@tabler/icons-react"; +import { formatTimestamp } from "../lib/formatTime"; +import { useSettings } from "../state/settingsSlice"; +import InfoPageCard from "../components/InfoPageCard"; +import InfoPageStack from "../components/InfoPageStack"; + +export default function StatusPage() { + const { data: buildinfo } = useSuspenseAPIQuery>({ + path: `/status/buildinfo`, + }); + const { data: runtimeinfo } = useSuspenseAPIQuery>({ + path: `/status/runtimeinfo`, + }); + + const { useLocalTime } = useSettings(); + + const statusConfig: Record< + string, + { + title?: string; + formatValue?: (v: string | boolean) => string; + } + > = { + startTime: { + title: "Start time", + formatValue: (v: string | boolean) => + formatTimestamp(new Date(v as string).valueOf() / 1000, useLocalTime), + }, + CWD: { title: "Working directory" }, + reloadConfigSuccess: { + title: "Configuration reload", + formatValue: (v: string | boolean) => (v ? "Successful" : "Unsuccessful"), + }, + lastConfigTime: { + title: "Last successful configuration reload", + formatValue: (v: string | boolean) => + formatTimestamp(new Date(v as string).valueOf() / 1000, useLocalTime), + }, + corruptionCount: { title: "WAL corruptions" }, + goroutineCount: { title: "Goroutines" }, + storageRetention: { title: "Storage retention" }, + }; + + return ( + + + + + {Object.entries(buildinfo.data).map(([k, v]) => ( + + {k} + {v} + + ))} + +
+
+ + + + {Object.entries(runtimeinfo.data).map(([k, v]) => { + const { title = k, formatValue = (val: string) => val } = + statusConfig[k] || {}; + return ( + + + {title} + + {formatValue(v)} + + ); + })} + +
+
+
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/TSDBStatusPage.tsx b/web/ui/mantine-ui/src/pages/TSDBStatusPage.tsx new file mode 100644 index 0000000000..cf7851c165 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/TSDBStatusPage.tsx @@ -0,0 +1,102 @@ +import { Table } from "@mantine/core"; +import { useSuspenseAPIQuery } from "../api/api"; +import { TSDBStatusResult } from "../api/responseTypes/tsdbStatus"; +import { formatTimestamp } from "../lib/formatTime"; +import { useSettings } from "../state/settingsSlice"; +import InfoPageStack from "../components/InfoPageStack"; +import InfoPageCard from "../components/InfoPageCard"; + +export default function TSDBStatusPage() { + const { + data: { + data: { + headStats, + labelValueCountByLabelName, + seriesCountByMetricName, + memoryInBytesByLabelName, + seriesCountByLabelValuePair, + }, + }, + } = useSuspenseAPIQuery({ path: `/status/tsdb` }); + + const { useLocalTime } = useSettings(); + + const unixToTime = (unix: number): string => { + const formatted = formatTimestamp(unix, useLocalTime); + if (formatted === "Invalid Date") { + if (numSeries === 0) { + return "No datapoints yet"; + } + return `Error parsing time (${unix})`; + } + + return formatted; + }; + + const { chunkCount, numSeries, numLabelPairs, minTime, maxTime } = headStats; + const stats = [ + { name: "Number of Series", value: numSeries }, + { name: "Number of Chunks", value: chunkCount }, + { name: "Number of Label Pairs", value: numLabelPairs }, + { name: "Current Min Time", value: `${unixToTime(minTime / 1000)}` }, + { name: "Current Max Time", value: `${unixToTime(maxTime / 1000)}` }, + ]; + + return ( + + {[ + { + title: "TSDB Head Status", + stats, + formatAsCode: false, + }, + { + title: "Top 10 label names with value count", + stats: labelValueCountByLabelName, + formatAsCode: true, + }, + { + title: "Top 10 series count by metric names", + stats: seriesCountByMetricName, + formatAsCode: true, + }, + { + title: "Top 10 label names with high memory usage", + unit: "Bytes", + stats: memoryInBytesByLabelName, + formatAsCode: true, + }, + { + title: "Top 10 series count by label value pairs", + stats: seriesCountByLabelValuePair, + formatAsCode: true, + }, + ].map(({ title, unit = "Count", stats, formatAsCode }) => ( + + + + + Name + {unit} + + + + {stats.map(({ name, value }) => ( + + + {formatAsCode ? {name} : name} + + {value} + + ))} + +
+
+ ))} +
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/query/DataTable.module.css b/web/ui/mantine-ui/src/pages/query/DataTable.module.css new file mode 100644 index 0000000000..255173d9bd --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/DataTable.module.css @@ -0,0 +1,10 @@ +.tableWrapper { + border: 1px solid + light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-5)); + border-radius: var(--mantine-radius-default); +} + +.numberCell { + text-align: right; + font-variant-numeric: tabular-nums; +} diff --git a/web/ui/mantine-ui/src/pages/query/DataTable.tsx b/web/ui/mantine-ui/src/pages/query/DataTable.tsx new file mode 100644 index 0000000000..375b033eda --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/DataTable.tsx @@ -0,0 +1,211 @@ +import { FC, ReactNode, useState } from "react"; +import { + Table, + Alert, + Box, + SegmentedControl, + ScrollArea, + Group, + Stack, + Text, + Anchor, +} from "@mantine/core"; +import { IconAlertTriangle, IconInfoCircle } from "@tabler/icons-react"; +import { + InstantQueryResult, + InstantSample, + RangeSamples, +} from "../../api/responseTypes/query"; +import SeriesName from "./SeriesName"; +import classes from "./DataTable.module.css"; +import dayjs from "dayjs"; +import timezone from "dayjs/plugin/timezone"; +import { formatTimestamp } from "../../lib/formatTime"; +import HistogramChart from "./HistogramChart"; +import { Histogram } from "../../types/types"; +import { bucketRangeString } from "./HistogramHelpers"; +import { useSettings } from "../../state/settingsSlice"; +dayjs.extend(timezone); + +const maxFormattableSeries = 1000; +const maxDisplayableSeries = 10000; + +const limitSeries = ( + series: S[], + limit: boolean +): S[] => { + if (limit && series.length > maxDisplayableSeries) { + return series.slice(0, maxDisplayableSeries); + } + return series; +}; + +export interface DataTableProps { + data: InstantQueryResult; + limitResults: boolean; + setLimitResults: (limit: boolean) => void; +} + +const DataTable: FC = ({ + data, + limitResults, + setLimitResults, +}) => { + const [scale, setScale] = useState("exponential"); + const { useLocalTime } = useSettings(); + + const { result, resultType } = data; + const doFormat = result.length <= maxFormattableSeries; + + return ( + + {limitResults && + ["vector", "matrix"].includes(resultType) && + result.length > maxDisplayableSeries && ( + } + title="Showing limited results" + > + Fetched {data.result.length} metrics, only displaying first{" "} + {maxDisplayableSeries} for performance reasons. + setLimitResults(false)}> + Show all results + + + )} + + {!doFormat && ( + }> + Showing more than {maxFormattableSeries} series, turning off label + formatting to improve rendering performance. + + )} + + + + + {resultType === "vector" ? ( + limitSeries(result, limitResults).map((s, idx) => ( + + + + + + {s.value && s.value[1]} + {s.histogram && ( + + + + + + Count: {s.histogram[1].count} + + + Sum: {s.histogram[1].sum} + + + + x-axis scale: + + + + {histogramTable(s.histogram[1])} + + )} + + + )) + ) : resultType === "matrix" ? ( + limitSeries(result, limitResults).map((s, idx) => ( + + + + + + {s.values && + s.values.map((v, idx) => ( +
+ {v[1]}{" "} + + @ {v[0]} + +
+ ))} +
+
+ )) + ) : resultType === "scalar" ? ( + + Scalar value + {result[1]} + + ) : resultType === "string" ? ( + + String value + {result[1]} + + ) : ( + } + > + Invalid result value type + + )} +
+
+
+
+ ); +}; + +const histogramTable = (h: Histogram): ReactNode => ( + + + + Bucket range + Count + + + {h.buckets?.map((b, i) => ( + + + {bucketRangeString(b)} + + {b[3]} + + ))} + + +
+); + +export default DataTable; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/Aggregation.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/Aggregation.tsx new file mode 100644 index 0000000000..3facd692c1 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/Aggregation.tsx @@ -0,0 +1,159 @@ +import { FC } from "react"; +import ASTNode, { Aggregation, aggregationType } from "../../../promql/ast"; +import { labelNameList } from "../../../promql/format"; +import { parsePrometheusFloat } from "../../../lib/formatFloatValue"; +import { Card, Text } from "@mantine/core"; + +const describeAggregationType = ( + aggrType: aggregationType, + param: ASTNode | null +) => { + switch (aggrType) { + case "sum": + return "sums over the sample values of the input series"; + case "min": + return "takes the minimum of the sample values of the input series"; + case "max": + return "takes the maximum of the sample values of the input series"; + case "avg": + return "calculates the average of the sample values of the input series"; + case "stddev": + return "calculates the population standard deviation of the sample values of the input series"; + case "stdvar": + return "calculates the population standard variation of the sample values of the input series"; + case "count": + return "counts the number of input series"; + case "group": + return "groups the input series by the supplied grouping labels, while setting the sample value to 1"; + case "count_values": + if (param === null) { + throw new Error( + "encountered count_values() node without label parameter" + ); + } + if (param.type !== "stringLiteral") { + throw new Error( + "encountered count_values() node without string literal label parameter" + ); + } + return ( + <> + outputs one time series for each unique sample value in the input + series (each counting the number of occurrences of that value and + indicating the original value in the {labelNameList([param.val])}{" "} + label) + + ); + case "bottomk": + if (param === null || param.type !== "numberLiteral") { + return ( + <> + returns the bottom{" "} + K series by value + + ); + } + return ( + <> + returns the bottom{" "} + {param.val} series + by value + + ); + case "topk": + if (param === null || param.type !== "numberLiteral") { + return ( + <> + returns the top K{" "} + series by value + + ); + } + return ( + <> + returns the top{" "} + {param.val} series + by value + + ); + case "quantile": + if (param === null) { + throw new Error( + "encountered quantile() node without quantile parameter" + ); + } + if (param.type === "numberLiteral") { + return `calculates the ${param.val}th quantile (${ + parsePrometheusFloat(param.val) * 100 + }th percentile) over the sample values of the input series`; + } + return "calculates a quantile over the sample values of the input series"; + + case "limitk": + if (param === null || param.type !== "numberLiteral") { + return ( + <> + limits the output to{" "} + K series + + ); + } + return ( + <> + limits the output to{" "} + {param.val} series + + ); + case "limit_ratio": + if (param === null || param.type !== "numberLiteral") { + return "limits the output to a ratio of the input series"; + } + return ( + <> + limits the output to a ratio of{" "} + {param.val} ( + {parsePrometheusFloat(param.val) * 100}%) of the input series + + ); + default: + throw new Error(`invalid aggregation type ${aggrType}`); + } +}; + +const describeAggregationGrouping = (grouping: string[], without: boolean) => { + if (without) { + return ( + <>aggregating away the [{labelNameList(grouping)}] label dimensions + ); + } + + if (grouping.length === 1) { + return <>grouped by their {labelNameList(grouping)} label dimension; + } + + if (grouping.length > 1) { + return <>grouped by their [{labelNameList(grouping)}] label dimensions; + } + + return "aggregating away any label dimensions"; +}; + +interface AggregationExplainViewProps { + node: Aggregation; +} + +const AggregationExplainView: FC = ({ node }) => { + return ( + + + Aggregation + + + This node {describeAggregationType(node.op, node.param)},{" "} + {describeAggregationGrouping(node.grouping, node.without)}. + + + ); +}; + +export default AggregationExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/BinaryExpr.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/BinaryExpr.tsx new file mode 100644 index 0000000000..837b84da31 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/BinaryExpr.tsx @@ -0,0 +1,106 @@ +import { FC } from "react"; +import { BinaryExpr } from "../../../../promql/ast"; +import serializeNode from "../../../../promql/serialize"; +import VectorScalarBinaryExprExplainView from "./VectorScalar"; +import VectorVectorBinaryExprExplainView from "./VectorVector"; +import ScalarScalarBinaryExprExplainView from "./ScalarScalar"; +import { nodeValueType } from "../../../../promql/utils"; +import { useSuspenseAPIQuery } from "../../../../api/api"; +import { InstantQueryResult } from "../../../../api/responseTypes/query"; +import { Card, Text } from "@mantine/core"; + +interface BinaryExprExplainViewProps { + node: BinaryExpr; +} + +const BinaryExprExplainView: FC = ({ node }) => { + const { data: lhs } = useSuspenseAPIQuery({ + path: `/query`, + params: { + query: serializeNode(node.lhs), + }, + }); + const { data: rhs } = useSuspenseAPIQuery({ + path: `/query`, + params: { + query: serializeNode(node.rhs), + }, + }); + + if ( + lhs.data.resultType !== nodeValueType(node.lhs) || + rhs.data.resultType !== nodeValueType(node.rhs) + ) { + // This can happen for a brief transitionary render when "node" has changed, but "lhs" and "rhs" + // haven't switched back to loading yet (leading to a crash in e.g. the vector-vector explain view). + return null; + } + + // Scalar-scalar binops. + if (lhs.data.resultType === "scalar" && rhs.data.resultType === "scalar") { + return ( + + + Scalar-to-scalar binary operation + + + + ); + } + + // Vector-scalar binops. + if (lhs.data.resultType === "scalar" && rhs.data.resultType === "vector") { + return ( + + + Scalar-to-vector binary operation + + + + ); + } + if (lhs.data.resultType === "vector" && rhs.data.resultType === "scalar") { + return ( + + + Vector-to-scalar binary operation + + + + ); + } + + // Vector-vector binops. + if (lhs.data.resultType === "vector" && rhs.data.resultType === "vector") { + return ( + + + Vector-to-vector binary operation + + + + ); + } + + throw new Error("invalid binary operator argument types"); +}; + +export default BinaryExprExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/ScalarScalar.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/ScalarScalar.tsx new file mode 100644 index 0000000000..874d824486 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/ScalarScalar.tsx @@ -0,0 +1,54 @@ +import { FC } from "react"; +import { BinaryExpr } from "../../../../promql/ast"; +import { scalarBinOp } from "../../../../promql/binOp"; +import { Table } from "@mantine/core"; +import { SampleValue } from "../../../../api/responseTypes/query"; +import { + formatPrometheusFloat, + parsePrometheusFloat, +} from "../../../../lib/formatFloatValue"; + +interface ScalarScalarBinaryExprExplainViewProps { + node: BinaryExpr; + lhs: SampleValue; + rhs: SampleValue; +} + +const ScalarScalarBinaryExprExplainView: FC< + ScalarScalarBinaryExprExplainViewProps +> = ({ node, lhs, rhs }) => { + const [lhsVal, rhsVal] = [ + parsePrometheusFloat(lhs[1]), + parsePrometheusFloat(rhs[1]), + ]; + + return ( + + + + Left value + Operator + Right value + + Result + + + + + {lhs[1]} + + {node.op} + {node.bool && " bool"} + + {rhs[1]} + = + + {formatPrometheusFloat(scalarBinOp(node.op, lhsVal, rhsVal))} + + + +
+ ); +}; + +export default ScalarScalarBinaryExprExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorScalar.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorScalar.tsx new file mode 100644 index 0000000000..3a0652f818 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorScalar.tsx @@ -0,0 +1,104 @@ +import { FC } from "react"; +import { BinaryExpr } from "../../../../promql/ast"; +// import SeriesName from '../../../../utils/SeriesName'; +import { isComparisonOperator } from "../../../../promql/utils"; +import { vectorElemBinop } from "../../../../promql/binOp"; +import { + InstantSample, + SampleValue, +} from "../../../../api/responseTypes/query"; +import { Alert, Table, Text } from "@mantine/core"; +import { + formatPrometheusFloat, + parsePrometheusFloat, +} from "../../../../lib/formatFloatValue"; +import SeriesName from "../../SeriesName"; + +interface VectorScalarBinaryExprExplainViewProps { + node: BinaryExpr; + scalar: SampleValue; + vector: InstantSample[]; + scalarLeft: boolean; +} + +const VectorScalarBinaryExprExplainView: FC< + VectorScalarBinaryExprExplainViewProps +> = ({ node, scalar, vector, scalarLeft }) => { + if (vector.length === 0) { + return ( + + One side of the binary operation produces 0 results, no matching + information shown. + + ); + } + + return ( + + + + {!scalarLeft && Left labels} + Left value + Operator + {scalarLeft && Right labels} + Right value + + Result + + + + {vector.map((sample: InstantSample, idx) => { + if (!sample.value) { + // TODO: Handle native histograms or show a better error message. + throw new Error("Native histograms are not supported yet"); + } + + const vecVal = parsePrometheusFloat(sample.value[1]); + const scalVal = parsePrometheusFloat(scalar[1]); + + let { value, keep } = scalarLeft + ? vectorElemBinop(node.op, scalVal, vecVal) + : vectorElemBinop(node.op, vecVal, scalVal); + if (isComparisonOperator(node.op) && scalarLeft) { + value = vecVal; + } + if (node.bool) { + value = Number(keep); + keep = true; + } + + const scalarCell = {scalar[1]}; + const vectorCells = ( + <> + + + + {sample.value[1]} + + ); + + return ( + + {scalarLeft ? scalarCell : vectorCells} + + {node.op} + {node.bool && " bool"} + + {scalarLeft ? vectorCells : scalarCell} + = + + {keep ? ( + formatPrometheusFloat(value) + ) : ( + dropped + )} + + + ); + })} + +
+ ); +}; + +export default VectorScalarBinaryExprExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorVector.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorVector.tsx new file mode 100644 index 0000000000..e70b7a3f3e --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorVector.tsx @@ -0,0 +1,658 @@ +import React, { FC, useState } from "react"; +import { BinaryExpr, vectorMatchCardinality } from "../../../../promql/ast"; +import { InstantSample, Metric } from "../../../../api/responseTypes/query"; +import { isComparisonOperator, isSetOperator } from "../../../../promql/utils"; +import { + VectorMatchError, + BinOpMatchGroup, + MatchErrorType, + computeVectorVectorBinOp, + filteredSampleValue, +} from "../../../../promql/binOp"; +import { formatNode, labelNameList } from "../../../../promql/format"; +import { + Alert, + Anchor, + Box, + Group, + List, + Switch, + Table, + Text, +} from "@mantine/core"; +import { useLocalStorage } from "@mantine/hooks"; +import { IconAlertTriangle } from "@tabler/icons-react"; +import SeriesName from "../../SeriesName"; + +// We use this color pool for two purposes: +// +// 1. To distinguish different match groups from each other. +// 2. To distinguish multiple series within one match group from each other. +const colorPool = [ + "#1f77b4", + "#ff7f0e", + "#2ca02c", + "#d62728", + "#9467bd", + "#8c564b", + "#e377c2", + "#7f7f7f", + "#bcbd22", + "#17becf", + "#393b79", + "#637939", + "#8c6d31", + "#843c39", + "#d6616b", + "#7b4173", + "#ce6dbd", + "#9c9ede", + "#c5b0d5", + "#c49c94", + "#f7b6d2", + "#c7c7c7", + "#dbdb8d", + "#9edae5", + "#393b79", + "#637939", + "#8c6d31", + "#843c39", + "#d6616b", + "#7b4173", + "#ce6dbd", + "#9c9ede", + "#c5b0d5", + "#c49c94", + "#f7b6d2", + "#c7c7c7", + "#dbdb8d", + "#9edae5", + "#17becf", + "#393b79", + "#637939", + "#8c6d31", + "#843c39", + "#d6616b", + "#7b4173", + "#ce6dbd", + "#9c9ede", + "#c5b0d5", + "#c49c94", + "#f7b6d2", +]; + +const rhsColorOffset = colorPool.length / 2 + 3; +const colorForIndex = (idx: number, offset?: number) => + `${colorPool[(idx + (offset || 0)) % colorPool.length]}80`; + +const seriesSwatch = (color: string) => ( + +); +interface VectorVectorBinaryExprExplainViewProps { + node: BinaryExpr; + lhs: InstantSample[]; + rhs: InstantSample[]; +} + +const noMatchLabels = ( + metric: Metric, + on: boolean, + labels: string[] +): Metric => { + const result: Metric = {}; + for (const name in metric) { + if (!(labels.includes(name) === on && (on || name !== "__name__"))) { + result[name] = metric[name]; + } + } + return result; +}; + +const explanationText = (node: BinaryExpr): React.ReactNode => { + const matching = node.matching!; + const [oneSide, manySide] = + matching.card === vectorMatchCardinality.oneToMany + ? ["left", "right"] + : ["right", "left"]; + + return ( + <> + + {isComparisonOperator(node.op) ? ( + <> + This node filters the series from the left-hand side based on the + result of a " + {node.op}" + comparison with matching series from the right-hand side. + + ) : ( + <> + This node calculates the result of applying the " + {node.op}" + operator between the sample values of matching series from two sets + of time series. + + )} + + + {(matching.labels.length > 0 || matching.on) && + (matching.on ? ( + + on( + {labelNameList(matching.labels)}):{" "} + {matching.labels.length > 0 ? ( + <> + series on both sides are matched on the labels{" "} + {labelNameList(matching.labels)} + + ) : ( + <> + all series from one side are matched to all series on the + other side. + + )} + + ) : ( + + ignoring( + {labelNameList(matching.labels)}): series on both sides are + matched on all of their labels, except{" "} + {labelNameList(matching.labels)}. + + ))} + {matching.card === vectorMatchCardinality.oneToOne ? ( + + One-to-one match. Each series from the left-hand side is allowed to + match with at most one series on the right-hand side, and vice + versa. + + ) : ( + + + group_{manySide}({labelNameList(matching.include)}) + + : {matching.card} match. Each series from the {oneSide}-hand side is + allowed to match with multiple series from the {manySide}-hand side. + {matching.include.length !== 0 && ( + <> + {" "} + Any {labelNameList(matching.include)} labels found on the{" "} + {oneSide}-hand side are propagated into the result, in addition + to the match group's labels. + + )} + + )} + {node.bool && ( + + bool: Instead of + filtering series based on the outcome of the comparison for matched + series, keep all series, but return the comparison outcome as a + boolean 0 or{" "} + 1 sample value. + + )} + + + ); +}; + +const explainError = ( + binOp: BinaryExpr, + _mg: BinOpMatchGroup, + err: VectorMatchError +) => { + const fixes = ( + <> + + Possible fixes: + + + {err.type === MatchErrorType.multipleMatchesForOneToOneMatching && ( + + + + Allow {err.dupeSide === "left" ? "many-to-one" : "one-to-many"}{" "} + matching + + : If you want to allow{" "} + {err.dupeSide === "left" ? "many-to-one" : "one-to-many"}{" "} + matching, you need to explicitly request it by adding a{" "} + + group_{err.dupeSide}() + {" "} + modifier to the operator: + + + {formatNode( + { + ...binOp, + matching: { + ...(binOp.matching + ? binOp.matching + : { labels: [], on: false, include: [] }), + card: + err.dupeSide === "left" + ? vectorMatchCardinality.manyToOne + : vectorMatchCardinality.oneToMany, + }, + }, + true, + 1 + )} + + + )} + + Update your matching parameters: Consider including + more differentiating labels in your matching modifiers (via{" "} + on() /{" "} + ignoring()) to + split multiple series into distinct match groups. + + + Aggregate the input: Consider aggregating away the + extra labels that create multiple series per group before applying the + binary operation. + + + + ); + + switch (err.type) { + case MatchErrorType.multipleMatchesForOneToOneMatching: + return ( + <> + + Binary operators only allow one-to-one matching by + default, but we found{" "} + multiple series on the {err.dupeSide} side for this + match group. + + {fixes} + + ); + case MatchErrorType.multipleMatchesOnBothSides: + return ( + <> + + We found multiple series on both sides for this + match group. Since many-to-many matching is not + supported, you need to ensure that at least one of the sides only + yields a single series. + + {fixes} + + ); + case MatchErrorType.multipleMatchesOnOneSide: { + const [oneSide, manySide] = + binOp.matching!.card === vectorMatchCardinality.oneToMany + ? ["left", "right"] + : ["right", "left"]; + return ( + <> + + You requested{" "} + + {oneSide === "right" ? "many-to-one" : "one-to-many"} matching + {" "} + via{" "} + + group_{manySide}() + + , but we also found{" "} + multiple series on the {oneSide} side of the match + group. Make sure that the {oneSide} side only contains a single + series. + + {fixes} + + ); + } + default: + throw new Error("unknown match error"); + } +}; + +const VectorVectorBinaryExprExplainView: FC< + VectorVectorBinaryExprExplainViewProps +> = ({ node, lhs, rhs }) => { + // TODO: Don't use Mantine's local storage as a one-off here. Decide whether we + // want to keep Redux, and then do it only via one or the other everywhere. + const [showSampleValues, setShowSampleValues] = useLocalStorage({ + key: "queryPage.explain.binaryOperators.showSampleValues", + defaultValue: false, + }); + + const [maxGroups, setMaxGroups] = useState(100); + const [maxSeriesPerGroup, setMaxSeriesPerGroup] = useState< + number | undefined + >(100); + + const { matching } = node; + if (matching === null) { + // The parent should make sure to only pass in vector-vector binops that have their "matching" field filled out. + throw new Error("missing matching parameters in vector-to-vector binop"); + } + + const { groups: matchGroups, numGroups } = computeVectorVectorBinOp( + node.op, + matching, + node.bool, + lhs, + rhs, + { + maxGroups: maxGroups, + maxSeriesPerGroup: maxSeriesPerGroup, + } + ); + const errCount = Object.values(matchGroups).filter((mg) => mg.error).length; + + return ( + <> + {explanationText(node)} + + + setShowSampleValues(event.currentTarget.checked)} + /> + + + {numGroups > Object.keys(matchGroups).length && ( + }> + Too many match groups to display, only showing{" "} + {Object.keys(matchGroups).length} out of {numGroups} groups. +
+
+ setMaxGroups(undefined)}> + Show all groups + +
+ )} + + {errCount > 0 && ( + }> + Found matching issues in {errCount} match group + {errCount > 1 ? "s" : ""}. See below for per-group error details. + + )} + + + + {Object.values(matchGroups).map((mg, mgIdx) => { + const { groupLabels, lhs, lhsCount, rhs, rhsCount, result, error } = + mg; + + const matchGroupTitleRow = (color: string) => ( + + + + + + ); + + const matchGroupTable = ( + series: InstantSample[], + seriesCount: number, + color: string, + colorOffset?: number + ) => ( + +
+ + {seriesCount === 0 ? ( + + + no matching series + + + ) : ( + <> + {matchGroupTitleRow(color)} + {series.map((s, sIdx) => { + if (s.value === undefined) { + // TODO: Figure out how to handle native histograms. + throw new Error( + "Native histograms are not supported yet" + ); + } + + return ( + + + + {seriesSwatch( + colorForIndex(sIdx, colorOffset) + )} + + + + + {showSampleValues && ( + {s.value[1]} + )} + + ); + })} + + )} + {seriesCount > series.length && ( + + + {seriesCount - series.length} more series omitted +   –   + setMaxSeriesPerGroup(undefined)} + > + Show all series + + + + )} + +
+
+ ); + + const groupColor = colorPool[mgIdx % colorPool.length]; + + const lhsTable = matchGroupTable(lhs, lhsCount, groupColor); + const rhsTable = matchGroupTable( + rhs, + rhsCount, + groupColor, + rhsColorOffset + ); + + const resultTable = ( + + + + {error !== null ? ( + + + error, result omitted + + + ) : result.length === 0 ? ( + + + dropped + + + ) : error !== null ? ( + + + error, result omitted + + + ) : ( + <> + {result.map(({ sample, manySideIdx }, resIdx) => { + if (sample.value === undefined) { + // TODO: Figure out how to handle native histograms. + throw new Error( + "Native histograms are not supported yet" + ); + } + + const filtered = + sample.value[1] === filteredSampleValue; + const [lIdx, rIdx] = + matching.card === vectorMatchCardinality.oneToMany + ? [0, manySideIdx] + : [manySideIdx, 0]; + + return ( + + + + + {seriesSwatch(colorForIndex(lIdx))} + + {seriesSwatch( + colorForIndex(rIdx, rhsColorOffset) + )} + + + + + + {showSampleValues && ( + + {filtered ? ( + + filtered + + ) : ( + {sample.value[1]} + )} + + )} + + ); + })} + + )} + +
+
+ ); + + return ( + + {mgIdx !== 0 && } + + + {error && ( + } + > + {explainError(node, mg, error)} + + )} + + + + + {lhsTable} + + + {node.op} + {node.bool && " bool"} + + + {rhsTable} + + = + + {resultTable} + + + + ); + })} + + + + ); +}; + +export default VectorVectorBinaryExprExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.module.css b/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.module.css new file mode 100644 index 0000000000..7cb36c7624 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.module.css @@ -0,0 +1,8 @@ +.funcDoc code { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-dark-5) + ); + padding: 0.05em 0.2em; + border-radius: 0.2em; +} diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.tsx new file mode 100644 index 0000000000..e089c7851a --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.tsx @@ -0,0 +1,201 @@ +import { FC } from "react"; +import { Alert, Text, Anchor, Card, Divider } from "@mantine/core"; +import ASTNode, { nodeType } from "../../../promql/ast"; +// import AggregationExplainView from "./Aggregation"; +// import BinaryExprExplainView from "./BinaryExpr/BinaryExpr"; +// import SelectorExplainView from "./Selector"; +import funcDocs from "../../../promql/functionDocs"; +import { escapeString } from "../../../promql/utils"; +import { formatPrometheusDuration } from "../../../lib/formatTime"; +import classes from "./ExplainView.module.css"; +import SelectorExplainView from "./Selector"; +import AggregationExplainView from "./Aggregation"; +import BinaryExprExplainView from "./BinaryExpr/BinaryExpr"; +import { IconInfoCircle } from "@tabler/icons-react"; +interface ExplainViewProps { + node: ASTNode | null; + treeShown: boolean; + showTree: () => void; +} + +const ExplainView: FC = ({ + node, + treeShown, + showTree: setShowTree, +}) => { + if (node === null) { + return ( + }> + This tab can help you understand the behavior of individual components + of a query. +
+
+ To use the Explain view,{" "} + {!treeShown && ( + <> + + enable the query tree view + {" "} + (also available via the expression input menu) and then + + )}{" "} + select a node in the tree above. +
+ ); + } + + switch (node.type) { + case nodeType.aggregation: + return ; + case nodeType.binaryExpr: + return ; + case nodeType.call: + return ( + + + Function call + + + This node calls the{" "} + + + {node.func.name}() + + {" "} + function{node.args.length > 0 ? " on the provided inputs" : ""}. + + + {/* TODO: Some docs, like x_over_time, have relative links pointing back to the Prometheus docs, + make sure to modify those links in the docs extraction so they work from the explain view */} + + {funcDocs[node.func.name]} + + + ); + case nodeType.matrixSelector: + return ; + case nodeType.subquery: + return ( + + + Subquery + + + This node evaluates the passed expression as a subquery over the + last{" "} + + {formatPrometheusDuration(node.range)} + {" "} + at a query resolution{" "} + {node.step > 0 ? ( + <> + of{" "} + + {formatPrometheusDuration(node.step)} + + + ) : ( + "equal to the default rule evaluation interval" + )} + {node.timestamp !== null ? ( + <> + , evaluated relative to an absolute evaluation timestamp of{" "} + + {(node.timestamp / 1000).toFixed(3)} + + + ) : node.startOrEnd !== null ? ( + <> + , evaluated relative to the {node.startOrEnd} of the query range + + ) : ( + <> + )} + {node.offset === 0 ? ( + <> + ) : node.offset > 0 ? ( + <> + , time-shifted{" "} + + {formatPrometheusDuration(node.offset)} + {" "} + into the past + + ) : ( + <> + , time-shifted{" "} + + {formatPrometheusDuration(-node.offset)} + {" "} + into the future + + )} + . + + + ); + case nodeType.numberLiteral: + return ( + + + Number literal + + + A scalar number literal with the value{" "} + {node.val}. + + + ); + case nodeType.parenExpr: + return ( + + + Parentheses + + + Parentheses that contain a sub-expression to be evaluated. + + + ); + case nodeType.stringLiteral: + return ( + + + String literal + + + A string literal with the value{" "} + + "{escapeString(node.val)}" + + . + + + ); + case nodeType.unaryExpr: + return ( + + + Unary expression + + + A unary expression that{" "} + {node.op === "+" + ? "does not affect the expression it is applied to" + : "changes the sign of the expression it is applied to"} + . + + + ); + case nodeType.vectorSelector: + return ; + default: + throw new Error("invalid node type"); + } +}; + +export default ExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/Selector.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/Selector.tsx new file mode 100644 index 0000000000..1b71e3591d --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/Selector.tsx @@ -0,0 +1,232 @@ +import { FC, ReactNode } from "react"; +import { + VectorSelector, + MatrixSelector, + nodeType, + LabelMatcher, + matchType, +} from "../../../promql/ast"; +import { escapeString } from "../../../promql/utils"; +import { useSuspenseAPIQuery } from "../../../api/api"; +import { Card, Text, Divider, List } from "@mantine/core"; +import { MetadataResult } from "../../../api/responseTypes/metadata"; +import { formatPrometheusDuration } from "../../../lib/formatTime"; +import { useSettings } from "../../../state/settingsSlice"; + +interface SelectorExplainViewProps { + node: VectorSelector | MatrixSelector; +} + +const matchingCriteriaList = ( + name: string, + matchers: LabelMatcher[] +): ReactNode => { + return ( + + {name.length > 0 && ( + + The metric name is{" "} + {name}. + + )} + {matchers + .filter((m) => !(m.name === "__name__")) + .map((m) => { + switch (m.type) { + case matchType.equal: + return ( + + + {m.name} + + {m.type} + + "{escapeString(m.value)}" + + : The label{" "} + + {m.name} + {" "} + is exactly{" "} + + "{escapeString(m.value)}" + + . + + ); + case matchType.notEqual: + return ( + + + {m.name} + + {m.type} + + "{escapeString(m.value)}" + + : The label{" "} + + {m.name} + {" "} + is not{" "} + + "{escapeString(m.value)}" + + . + + ); + case matchType.matchRegexp: + return ( + + + {m.name} + + {m.type} + + "{escapeString(m.value)}" + + : The label{" "} + + {m.name} + {" "} + matches the regular expression{" "} + + "{escapeString(m.value)}" + + . + + ); + case matchType.matchNotRegexp: + return ( + + + {m.name} + + {m.type} + + "{escapeString(m.value)}" + + : The label{" "} + + {m.name} + {" "} + does not match the regular expression{" "} + + "{escapeString(m.value)}" + + . + + ); + default: + throw new Error("invalid matcher type"); + } + })} + + ); +}; + +const SelectorExplainView: FC = ({ node }) => { + const baseMetricName = node.name.replace(/(_count|_sum|_bucket)$/, ""); + const { lookbackDelta } = useSettings(); + const { data: metricMeta } = useSuspenseAPIQuery({ + path: `/metadata`, + params: { + metric: baseMetricName, + }, + }); + + return ( + + + {node.type === nodeType.vectorSelector ? "Instant" : "Range"} vector + selector + + + {metricMeta.data === undefined || + metricMeta.data[baseMetricName] === undefined || + metricMeta.data[baseMetricName].length < 1 ? ( + <>No metric metadata found. + ) : ( + <> + Metric help:{" "} + {metricMeta.data[baseMetricName][0].help} +
+ Metric type:{" "} + {metricMeta.data[baseMetricName][0].type} + + )} +
+ + + {node.type === nodeType.vectorSelector ? ( + <> + This node selects the latest (non-stale) sample value within the + last {lookbackDelta} + + ) : ( + <> + This node selects{" "} + + {formatPrometheusDuration(node.range)} + {" "} + of data going backward from the evaluation timestamp + + )} + {node.timestamp !== null ? ( + <> + , evaluated relative to an absolute evaluation timestamp of{" "} + + {(node.timestamp / 1000).toFixed(3)} + + + ) : node.startOrEnd !== null ? ( + <>, evaluated relative to the {node.startOrEnd} of the query range + ) : ( + <> + )} + {node.offset === 0 ? ( + <> + ) : node.offset > 0 ? ( + <> + , time-shifted{" "} + + {formatPrometheusDuration(node.offset)} + {" "} + into the past, + + ) : ( + <> + , time-shifted{" "} + + {formatPrometheusDuration(-node.offset)} + {" "} + into the future, + + )}{" "} + for any series that match all of the following criteria: + + {matchingCriteriaList(node.name, node.matchers)} + + If a series has no values in the last{" "} + + {node.type === nodeType.vectorSelector + ? lookbackDelta + : formatPrometheusDuration(node.range)} + + {node.offset > 0 && ( + <> + {" "} + (relative to the time-shifted instant{" "} + + {formatPrometheusDuration(node.offset)} + {" "} + in the past) + + )} + , the series will not be returned. + +
+ ); +}; + +export default SelectorExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExpressionInput.module.css b/web/ui/mantine-ui/src/pages/query/ExpressionInput.module.css new file mode 100644 index 0000000000..027919f2e0 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExpressionInput.module.css @@ -0,0 +1,13 @@ +.input { + /* border: calc(0.0625rem * var(--mantine-scale)) solid var(--input-bd); */ + border-radius: var(--mantine-radius-default); + flex: auto; + /* padding: 4px 0 0 8px; */ + /* font-size: 15px; */ + /* font-family: "DejaVu Sans Mono"; */ + + &:focus-within { + outline: rem(1.3px) solid var(--mantine-color-blue-filled); + border-color: transparent; + } +} diff --git a/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx b/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx new file mode 100644 index 0000000000..a4b26cd910 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx @@ -0,0 +1,374 @@ +import { + ActionIcon, + Box, + Button, + Group, + InputBase, + Loader, + Menu, + Modal, + Skeleton, + useComputedColorScheme, +} from "@mantine/core"; +import { + CompleteStrategy, + PromQLExtension, + newCompleteStrategy, +} from "@prometheus-io/codemirror-promql"; +import { FC, Suspense, useEffect, useRef, useState } from "react"; +import CodeMirror, { + EditorState, + EditorView, + Prec, + ReactCodeMirrorRef, + highlightSpecialChars, + keymap, + placeholder, +} from "@uiw/react-codemirror"; +import { + baseTheme, + darkPromqlHighlighter, + darkTheme, + lightTheme, + promqlHighlighter, +} from "../../codemirror/theme"; +import { + bracketMatching, + indentOnInput, + syntaxHighlighting, + syntaxTree, +} from "@codemirror/language"; +import classes from "./ExpressionInput.module.css"; +import { + CompletionContext, + CompletionResult, + autocompletion, + closeBrackets, + closeBracketsKeymap, + completionKeymap, +} from "@codemirror/autocomplete"; +import { + defaultKeymap, + history, + historyKeymap, + insertNewlineAndIndent, +} from "@codemirror/commands"; +import { highlightSelectionMatches } from "@codemirror/search"; +import { lintKeymap } from "@codemirror/lint"; +import { + IconAlignJustified, + IconBinaryTree, + IconDotsVertical, + IconSearch, + IconTerminal, + IconTrash, +} from "@tabler/icons-react"; +import { useAPIQuery } from "../../api/api"; +import { notifications } from "@mantine/notifications"; +import { useSettings } from "../../state/settingsSlice"; +import MetricsExplorer from "./MetricsExplorer/MetricsExplorer"; +import ErrorBoundary from "../../components/ErrorBoundary"; +import { useAppSelector } from "../../state/hooks"; +import { inputIconStyle, menuIconStyle } from "../../styles"; + +const promqlExtension = new PromQLExtension(); + +// Autocompletion strategy that wraps the main one and enriches +// it with past query items. +export class HistoryCompleteStrategy implements CompleteStrategy { + private complete: CompleteStrategy; + private queryHistory: string[]; + constructor(complete: CompleteStrategy, queryHistory: string[]) { + this.complete = complete; + this.queryHistory = queryHistory; + } + + promQL( + context: CompletionContext + ): Promise | CompletionResult | null { + return Promise.resolve(this.complete.promQL(context)).then((res) => { + const { state, pos } = context; + const tree = syntaxTree(state).resolve(pos, -1); + const start = res != null ? res.from : tree.from; + + if (start !== 0) { + return res; + } + + const historyItems: CompletionResult = { + from: start, + to: pos, + options: this.queryHistory.map((q) => ({ + label: q.length < 80 ? q : q.slice(0, 76).concat("..."), + detail: "past query", + apply: q, + info: q.length < 80 ? undefined : q, + })), + validFor: /^[a-zA-Z0-9_:]+$/, + }; + + if (res !== null) { + historyItems.options = historyItems.options.concat(res.options); + } + return historyItems; + }); + } +} + +interface ExpressionInputProps { + initialExpr: string; + metricNames: string[]; + executeQuery: (expr: string) => void; + treeShown: boolean; + setShowTree: (showTree: boolean) => void; + removePanel: () => void; +} + +const ExpressionInput: FC = ({ + initialExpr, + metricNames, + executeQuery, + removePanel, + treeShown, + setShowTree, +}) => { + const theme = useComputedColorScheme(); + const { queryHistory } = useAppSelector((state) => state.queryPage); + const { + pathPrefix, + enableAutocomplete, + enableSyntaxHighlighting, + enableLinter, + enableQueryHistory, + } = useSettings(); + const [expr, setExpr] = useState(initialExpr); + useEffect(() => { + setExpr(initialExpr); + }, [initialExpr]); + + const { + data: formatResult, + error: formatError, + isFetching: isFormatting, + refetch: formatQuery, + } = useAPIQuery({ + path: "/format_query", + params: { + query: expr, + }, + enabled: false, + }); + + useEffect(() => { + if (formatError) { + notifications.show({ + color: "red", + title: "Error formatting query", + message: formatError.message, + }); + return; + } + + if (formatResult) { + setExpr(formatResult.data); + notifications.show({ + title: "Expression formatted", + message: "Expression formatted successfully!", + }); + } + }, [formatResult, formatError]); + + const cmRef = useRef(null); + + const [showMetricsExplorer, setShowMetricsExplorer] = useState(false); + + // (Re)initialize editor based on settings / setting changes. + useEffect(() => { + // Build the dynamic part of the config. + promqlExtension + .activateCompletion(enableAutocomplete) + .activateLinter(enableLinter) + .setComplete({ + completeStrategy: new HistoryCompleteStrategy( + newCompleteStrategy({ + remote: { + url: pathPrefix, + cache: { initialMetricList: metricNames }, + }, + }), + enableQueryHistory ? queryHistory : [] + ), + }); + }, [ + pathPrefix, + metricNames, + enableAutocomplete, + enableLinter, + enableQueryHistory, + queryHistory, + ]); // TODO: Maybe use dynamic config compartment again as in the old UI? + + return ( + + {/* eslint-disable-next-line @typescript-eslint/no-explicit-any */} + + leftSection={ + isFormatting ? : + } + rightSection={ + + + + + + + + Query options + } + onClick={() => setShowMetricsExplorer(true)} + > + Explore metrics + + } + onClick={() => formatQuery()} + disabled={ + isFormatting || expr === "" || expr === formatResult?.data + } + > + Format expression + + } + onClick={() => setShowTree(!treeShown)} + > + {treeShown ? "Hide" : "Show"} tree view + + } + onClick={removePanel} + > + Remove query + + + + } + component={CodeMirror} + className={classes.input} + basicSetup={false} + value={expr} + onChange={setExpr} + autoFocus + ref={cmRef} + extensions={[ + baseTheme, + highlightSpecialChars(), + history(), + EditorState.allowMultipleSelections.of(true), + indentOnInput(), + bracketMatching(), + closeBrackets(), + autocompletion(), + highlightSelectionMatches(), + EditorView.lineWrapping, + keymap.of([ + ...closeBracketsKeymap, + ...defaultKeymap, + ...historyKeymap, + ...completionKeymap, + ...lintKeymap, + ]), + placeholder("Enter expression (press Shift+Enter for newlines)"), + enableSyntaxHighlighting + ? syntaxHighlighting( + theme === "light" ? promqlHighlighter : darkPromqlHighlighter + ) + : [], + promqlExtension.asExtension(), + theme === "light" ? lightTheme : darkTheme, + keymap.of([ + { + key: "Escape", + run: (v: EditorView): boolean => { + v.contentDOM.blur(); + return false; + }, + }, + ]), + Prec.highest( + keymap.of([ + { + key: "Enter", + run: (): boolean => { + executeQuery(expr); + return true; + }, + }, + { + key: "Shift-Enter", + run: insertNewlineAndIndent, + }, + ]) + ), + ]} + multiline + /> + + + setShowMetricsExplorer(false)} + title="Explore metrics" + > + + + {Array.from(Array(20), (_, i) => ( + + ))} + + } + > + { + if (cmRef.current && cmRef.current.view) { + const view = cmRef.current.view; + view.dispatch( + view.state.update({ + changes: { + from: view.state.selection.ranges[0].from, + to: view.state.selection.ranges[0].to, + insert: text, + }, + }) + ); + } + }} + close={() => setShowMetricsExplorer(false)} + /> + + + + + ); +}; + +export default ExpressionInput; diff --git a/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx.old b/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx.old new file mode 100644 index 0000000000..52b10f6713 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx.old @@ -0,0 +1,367 @@ +import React, { FC, useState, useEffect, useRef } from "react"; + +import { + EditorView, + highlightSpecialChars, + keymap, + ViewUpdate, + placeholder, +} from "@codemirror/view"; +import { EditorState, Prec, Compartment } from "@codemirror/state"; +import { + bracketMatching, + indentOnInput, + syntaxHighlighting, + syntaxTree, +} from "@codemirror/language"; +import { + defaultKeymap, + history, + historyKeymap, + insertNewlineAndIndent, +} from "@codemirror/commands"; +import { highlightSelectionMatches } from "@codemirror/search"; +import { lintKeymap } from "@codemirror/lint"; +import { + autocompletion, + completionKeymap, + CompletionContext, + CompletionResult, + closeBrackets, + closeBracketsKeymap, +} from "@codemirror/autocomplete"; +import { + baseTheme, + lightTheme, + darkTheme, + promqlHighlighter, + darkPromqlHighlighter, +} from "../../codemirror/theme"; + +import { + CompleteStrategy, + PromQLExtension, +} from "@prometheus-io/codemirror-promql"; +import { newCompleteStrategy } from "@prometheus-io/codemirror-promql/dist/esm/complete"; + +const promqlExtension = new PromQLExtension(); + +interface ExpressionInputProps { + value: string; + onChange: (expr: string) => void; + queryHistory: string[]; + metricNames: string[]; + executeQuery: () => void; +} + +const dynamicConfigCompartment = new Compartment(); + +// Autocompletion strategy that wraps the main one and enriches +// it with past query items. +export class HistoryCompleteStrategy implements CompleteStrategy { + private complete: CompleteStrategy; + private queryHistory: string[]; + constructor(complete: CompleteStrategy, queryHistory: string[]) { + this.complete = complete; + this.queryHistory = queryHistory; + } + + promQL( + context: CompletionContext + ): Promise | CompletionResult | null { + return Promise.resolve(this.complete.promQL(context)).then((res) => { + const { state, pos } = context; + const tree = syntaxTree(state).resolve(pos, -1); + const start = res != null ? res.from : tree.from; + + if (start !== 0) { + return res; + } + + const historyItems: CompletionResult = { + from: start, + to: pos, + options: this.queryHistory.map((q) => ({ + label: q.length < 80 ? q : q.slice(0, 76).concat("..."), + detail: "past query", + apply: q, + info: q.length < 80 ? undefined : q, + })), + validFor: /^[a-zA-Z0-9_:]+$/, + }; + + if (res !== null) { + historyItems.options = historyItems.options.concat(res.options); + } + return historyItems; + }); + } +} + +const ExpressionInput: FC = ({ + value, + onChange, + queryHistory, + metricNames, +}) => { + const containerRef = useRef(null); + const viewRef = useRef(null); + const [showMetricsExplorer, setShowMetricsExplorer] = + useState(false); + const pathPrefix = usePathPrefix(); + const { theme } = useTheme(); + + const [formatError, setFormatError] = useState(null); + const [isFormatting, setIsFormatting] = useState(false); + const [exprFormatted, setExprFormatted] = useState(false); + + // (Re)initialize editor based on settings / setting changes. + useEffect(() => { + // Build the dynamic part of the config. + promqlExtension + .activateCompletion(enableAutocomplete) + .activateLinter(enableLinter) + .setComplete({ + completeStrategy: new HistoryCompleteStrategy( + newCompleteStrategy({ + remote: { + url: pathPrefix, + cache: { initialMetricList: metricNames }, + }, + }), + queryHistory + ), + }); + + let highlighter = syntaxHighlighting( + theme === "dark" ? darkPromqlHighlighter : promqlHighlighter + ); + if (theme === "dark") { + highlighter = syntaxHighlighting(darkPromqlHighlighter); + } + + const dynamicConfig = [ + enableHighlighting ? highlighter : [], + promqlExtension.asExtension(), + theme === "dark" ? darkTheme : lightTheme, + ]; + + // Create or reconfigure the editor. + const view = viewRef.current; + if (view === null) { + // If the editor does not exist yet, create it. + if (!containerRef.current) { + throw new Error("expected CodeMirror container element to exist"); + } + + const startState = EditorState.create({ + doc: value, + extensions: [ + baseTheme, + highlightSpecialChars(), + history(), + EditorState.allowMultipleSelections.of(true), + indentOnInput(), + bracketMatching(), + closeBrackets(), + autocompletion(), + highlightSelectionMatches(), + EditorView.lineWrapping, + keymap.of([ + ...closeBracketsKeymap, + ...defaultKeymap, + ...historyKeymap, + ...completionKeymap, + ...lintKeymap, + ]), + placeholder("Expression (press Shift+Enter for newlines)"), + dynamicConfigCompartment.of(dynamicConfig), + // This keymap is added without precedence so that closing the autocomplete dropdown + // via Escape works without blurring the editor. + keymap.of([ + { + key: "Escape", + run: (v: EditorView): boolean => { + v.contentDOM.blur(); + return false; + }, + }, + ]), + Prec.highest( + keymap.of([ + { + key: "Enter", + run: (v: EditorView): boolean => { + executeQuery(); + return true; + }, + }, + { + key: "Shift-Enter", + run: insertNewlineAndIndent, + }, + ]) + ), + EditorView.updateListener.of((update: ViewUpdate): void => { + if (update.docChanged) { + onExpressionChange(update.state.doc.toString()); + setExprFormatted(false); + } + }), + ], + }); + + const view = new EditorView({ + state: startState, + parent: containerRef.current, + }); + + viewRef.current = view; + + view.focus(); + } else { + // The editor already exists, just reconfigure the dynamically configured parts. + view.dispatch( + view.state.update({ + effects: dynamicConfigCompartment.reconfigure(dynamicConfig), + }) + ); + } + // "value" is only used in the initial render, so we don't want to + // re-run this effect every time that "value" changes. + // + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [ + enableAutocomplete, + enableHighlighting, + enableLinter, + executeQuery, + onExpressionChange, + queryHistory, + theme, + ]); + + const insertAtCursor = (value: string) => { + const view = viewRef.current; + if (view === null) { + return; + } + const { from, to } = view.state.selection.ranges[0]; + view.dispatch( + view.state.update({ + changes: { from, to, insert: value }, + }) + ); + }; + + const formatExpression = () => { + setFormatError(null); + setIsFormatting(true); + + fetch( + `${pathPrefix}/${API_PATH}/format_query?${new URLSearchParams({ + query: value, + })}`, + { + cache: "no-store", + credentials: "same-origin", + } + ) + .then((resp) => { + if (!resp.ok && resp.status !== 400) { + throw new Error(`format HTTP request failed: ${resp.statusText}`); + } + + return resp.json(); + }) + .then((json) => { + if (json.status !== "success") { + throw new Error(json.error || "invalid response JSON"); + } + + const view = viewRef.current; + if (view === null) { + return; + } + + view.dispatch( + view.state.update({ + changes: { from: 0, to: view.state.doc.length, insert: json.data }, + }) + ); + setExprFormatted(true); + }) + .catch((err) => { + setFormatError(err.message); + }) + .finally(() => { + setIsFormatting(false); + }); + }; + + return ( + <> + + + + {loading ? ( + + ) : ( + + )} + + +
+ + + + + + + + {formatError && ( + Error formatting expression: {formatError} + )} + + + + ); +}; + +export default ExpressionInput; diff --git a/web/ui/mantine-ui/src/pages/query/Graph.module.css b/web/ui/mantine-ui/src/pages/query/Graph.module.css new file mode 100644 index 0000000000..5b7133058c --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/Graph.module.css @@ -0,0 +1,11 @@ +.chartWrapper { + border: 1px solid + light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-5)); + border-radius: var(--mantine-radius-default); +} + +.uplotChart { + width: 100%; + height: 100%; + padding: 15px; +} diff --git a/web/ui/mantine-ui/src/pages/query/Graph.tsx b/web/ui/mantine-ui/src/pages/query/Graph.tsx new file mode 100644 index 0000000000..c25d5a98eb --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/Graph.tsx @@ -0,0 +1,192 @@ +import { FC, useEffect, useId, useState } from "react"; +import { Alert, Skeleton, Box, LoadingOverlay, Stack } from "@mantine/core"; +import { IconAlertTriangle, IconInfoCircle } from "@tabler/icons-react"; +import { RangeQueryResult } from "../../api/responseTypes/query"; +import { SuccessAPIResponse, useAPIQuery } from "../../api/api"; +import classes from "./Graph.module.css"; +import { + GraphDisplayMode, + GraphResolution, + getEffectiveResolution, +} from "../../state/queryPageSlice"; +import "uplot/dist/uPlot.min.css"; +import "./uplot.css"; +import { useElementSize } from "@mantine/hooks"; +import UPlotChart, { UPlotChartRange } from "./UPlotChart"; +import ASTNode, { nodeType } from "../../promql/ast"; +import serializeNode from "../../promql/serialize"; + +export interface GraphProps { + expr: string; + node: ASTNode | null; + endTime: number | null; + range: number; + resolution: GraphResolution; + showExemplars: boolean; + displayMode: GraphDisplayMode; + retriggerIdx: number; + onSelectRange: (start: number, end: number) => void; +} + +const Graph: FC = ({ + expr, + node, + endTime, + range, + resolution, + showExemplars, + displayMode, + retriggerIdx, + onSelectRange, +}) => { + const { ref, width } = useElementSize(); + const [rerender, setRerender] = useState(true); + + const effectiveExpr = + node === null + ? expr + : serializeNode( + node.type === nodeType.matrixSelector + ? { + type: nodeType.vectorSelector, + name: node.name, + matchers: node.matchers, + offset: node.offset, + timestamp: node.timestamp, + startOrEnd: node.startOrEnd, + } + : node + ); + + const effectiveEndTime = (endTime !== null ? endTime : Date.now()) / 1000; + const startTime = effectiveEndTime - range / 1000; + const effectiveResolution = getEffectiveResolution(resolution, range) / 1000; + + const { data, error, isFetching, isLoading, refetch } = + useAPIQuery({ + key: [useId()], + path: "/query_range", + params: { + query: effectiveExpr, + step: effectiveResolution.toString(), + start: startTime.toString(), + end: effectiveEndTime.toString(), + }, + enabled: effectiveExpr !== "", + }); + + // Bundle the chart data and the displayed range together. This has two purposes: + // 1. If we update them separately, we cause unnecessary rerenders of the uPlot chart itself. + // 2. We want to keep displaying the old range in the chart while a query for a new range + // is still in progress. + const [dataAndRange, setDataAndRange] = useState<{ + data: SuccessAPIResponse; + range: UPlotChartRange; + } | null>(null); + + useEffect(() => { + if (data !== undefined) { + setDataAndRange({ + data: data, + range: { + startTime: startTime, + endTime: effectiveEndTime, + resolution: effectiveResolution, + }, + }); + } + // We actually want to update the displayed range only once the new data is there, + // so we don't want to include any of the range-related parameters in the dependencies. + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [data]); + + // Re-execute the query when the user presses Enter (or hits the Execute button). + useEffect(() => { + effectiveExpr !== "" && refetch(); + }, [retriggerIdx, refetch, effectiveExpr, endTime, range, resolution]); + + // The useElementSize hook above only gets a valid size on the second render, so this + // is a workaround to make the component render twice after mount. + useEffect(() => { + if (dataAndRange !== null && rerender) { + setRerender(false); + } + }, [dataAndRange, rerender, setRerender]); + + // TODO: Share all the loading/error/empty data notices with the DataTable? + + // Show a skeleton only on the first load, not on subsequent ones. + if (isLoading) { + return ( + + {Array.from(Array(5), (_, i) => ( + + ))} + + ); + } + + if (error) { + return ( + } + > + {error.message} + + ); + } + + if (dataAndRange === null) { + return No data queried yet; + } + + const { result } = dataAndRange.data.data; + + if (result.length === 0) { + return ( + }> + This query returned no data. + + ); + } + + return ( + + {node !== null && node.type === nodeType.matrixSelector && ( + } + > + Note: Range vector selectors can't be graphed, so + graphing the equivalent instant vector selector instead. + + )} + + , + // }} + // styles={{ loader: { width: "100%", height: "100%" } }} + /> + + + + ); +}; + +export default Graph; diff --git a/web/ui/mantine-ui/src/pages/query/HistogramChart.module.css b/web/ui/mantine-ui/src/pages/query/HistogramChart.module.css new file mode 100644 index 0000000000..61b9546af7 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/HistogramChart.module.css @@ -0,0 +1,106 @@ +.histogramYWrapper { + display: flex; + flex-wrap: nowrap; + align-items: flex-start; + box-sizing: border-box; + margin: 15px 0; + width: 100%; +} + +.histogramYLabels { + height: 200px; + display: flex; + flex-direction: column; +} + +.histogramYLabel { + margin-right: 8px; + height: 25%; + text-align: right; +} + +.histogramXWrapper { + flex: 1 1 auto; + display: flex; + flex-direction: column; + margin-right: 8px; +} + +.histogramXLabels { + display: flex; +} + +.histogramXLabel { + position: relative; + margin-top: 5px; + width: 100%; + text-align: right; +} + +.histogramContainer { + margin-top: 9px; + position: relative; + height: 200px; +} + +.histogramAxes { + position: absolute; + width: 100%; + height: 100%; + border-bottom: 1px solid var(--mantine-color-gray-7); + border-left: 1px solid var(--mantine-color-gray-7); + pointer-events: none; +} + +.histogramYGrid { + position: absolute; + border-bottom: 1px dashed var(--mantine-color-gray-6); + width: 100%; +} + +.histogramYTick { + position: absolute; + border-bottom: 1px solid var(--mantine-color-gray-7); + left: -5px; + height: 0px; + width: 5px; +} + +.histogramXGrid { + position: absolute; + border-left: 1px dashed var(--mantine-color-gray-6); + height: 100%; + width: 0; +} + +.histogramXTick { + position: absolute; + border-left: 1px solid var(--mantine-color-gray-7); + height: 5px; + width: 0; + bottom: -5px; +} + +.histogramBucketSlot { + position: absolute; + bottom: 0; + top: 0; +} + +.histogramBucket { + position: absolute; + width: 100%; + bottom: 0; + background-color: #2db453; + border: 1px solid #77de94; + pointer-events: none; +} + +.histogramBucketSlot:hover { + background-color: var(--mantine-color-gray-4); +} + +.histogramBucketSlot:hover .histogramBucket { + background-color: #88e1a1; + border: 1px solid #77de94; +} diff --git a/web/ui/mantine-ui/src/pages/query/HistogramChart.tsx b/web/ui/mantine-ui/src/pages/query/HistogramChart.tsx new file mode 100644 index 0000000000..a574d1df44 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/HistogramChart.tsx @@ -0,0 +1,309 @@ +import React, { FC } from "react"; +import { Histogram } from "../../types/types"; +import { + calculateDefaultExpBucketWidth, + findMinPositive, + findMaxNegative, + findZeroAxisLeft, + showZeroAxis, + findZeroBucket, + bucketRangeString, +} from "./HistogramHelpers"; +import classes from "./HistogramChart.module.css"; +import { Tooltip } from "@mantine/core"; + +interface HistogramChartProps { + histogram: Histogram; + index: number; + scale: string; +} + +const HistogramChart: FC = ({ + index, + histogram, + scale, +}) => { + const { buckets } = histogram; + if (!buckets || buckets.length === 0) { + return
No data
; + } + const formatter = Intl.NumberFormat("en", { notation: "compact" }); + + // For linear scales, the count of a histogram bucket is represented by its area rather than its height. This means it considers + // both the count and the range (width) of the bucket. For this, we can set the height of the bucket proportional + // to its frequency density (fd). The fd is the count of the bucket divided by the width of the bucket. + const fds = []; + for (const bucket of buckets) { + const left = parseFloat(bucket[1]); + const right = parseFloat(bucket[2]); + const count = parseFloat(bucket[3]); + const width = right - left; + + // This happens when a user want observations of precisely zero to be included in the zero bucket + if (width === 0) { + fds.push(0); + continue; + } + fds.push(count / width); + } + const fdMax = Math.max(...fds); + + const first = buckets[0]; + const last = buckets[buckets.length - 1]; + + const rangeMax = parseFloat(last[2]); + const rangeMin = parseFloat(first[1]); + const countMax = Math.max(...buckets.map((b) => parseFloat(b[3]))); + + const defaultExpBucketWidth = calculateDefaultExpBucketWidth(last, buckets); + + const maxPositive = rangeMax > 0 ? rangeMax : 0; + const minPositive = findMinPositive(buckets); + const maxNegative = findMaxNegative(buckets); + const minNegative = parseFloat(first[1]) < 0 ? parseFloat(first[1]) : 0; + + // Calculate the borders of positive and negative buckets in the exponential scale from left to right + const startNegative = + minNegative !== 0 ? -Math.log(Math.abs(minNegative)) : 0; + const endNegative = maxNegative !== 0 ? -Math.log(Math.abs(maxNegative)) : 0; + const startPositive = minPositive !== 0 ? Math.log(minPositive) : 0; + const endPositive = maxPositive !== 0 ? Math.log(maxPositive) : 0; + + // Calculate the width of negative, positive, and all exponential bucket ranges on the x-axis + const xWidthNegative = endNegative - startNegative; + const xWidthPositive = endPositive - startPositive; + const xWidthTotal = xWidthNegative + defaultExpBucketWidth + xWidthPositive; + + const zeroBucketIdx = findZeroBucket(buckets); + const zeroAxisLeft = findZeroAxisLeft( + scale, + rangeMin, + rangeMax, + minPositive, + maxNegative, + zeroBucketIdx, + xWidthNegative, + xWidthTotal, + defaultExpBucketWidth, + ); + const zeroAxis = showZeroAxis(zeroAxisLeft); + + return ( +
+
+ {[1, 0.75, 0.5, 0.25].map((i) => ( +
+ {scale === "linear" ? "" : formatter.format(countMax * i)} +
+ ))} +
+ 0 +
+
+
+
+ {[0, 0.25, 0.5, 0.75, 1].map((i) => ( + +
+
+
+
+ ))} +
+
+
+
+ + + +
+
+
+
+ +
+ {formatter.format(rangeMin)} +
+ {rangeMin < 0 && zeroAxis && ( +
+ 0 +
+ )} +
+ {formatter.format(rangeMax)} +
+
+
+
+
+
+ ); +}; + +interface RenderHistogramProps { + buckets: [number, string, string, string][]; + scale: string; + rangeMin: number; + rangeMax: number; + index: number; + fds: number[]; + fdMax: number; + countMax: number; + defaultExpBucketWidth: number; + minPositive: number; + maxNegative: number; + startPositive: number; + startNegative: number; + xWidthNegative: number; + xWidthTotal: number; +} + +const RenderHistogramBars: FC = ({ + buckets, + scale, + rangeMin, + rangeMax, + index, + fds, + fdMax, + countMax, + defaultExpBucketWidth, + minPositive, + maxNegative, + startPositive, + startNegative, + xWidthNegative, + xWidthTotal, +}) => { + return ( + + {buckets.map((b, bIdx) => { + const left = parseFloat(b[1]); + const right = parseFloat(b[2]); + const count = parseFloat(b[3]); + const bucketIdx = `bucket-${index}-${bIdx}-${Math.ceil(parseFloat(b[3]) * 100)}`; + + const logWidth = Math.abs( + Math.log(Math.abs(right)) - Math.log(Math.abs(left)), + ); + const expBucketWidth = + logWidth === 0 ? defaultExpBucketWidth : logWidth; + + let bucketWidth = ""; + let bucketLeft = ""; + let bucketHeight = ""; + + switch (scale) { + case "linear": { + bucketWidth = ((right - left) / (rangeMax - rangeMin)) * 100 + "%"; + bucketLeft = + ((left - rangeMin) / (rangeMax - rangeMin)) * 100 + "%"; + if (left === 0 && right === 0) { + bucketLeft = "0%"; // do not render zero-width zero bucket + bucketWidth = "0%"; + } + bucketHeight = (fds[bIdx] / fdMax) * 100 + "%"; + break; + } + case "exponential": { + let adjust = 0; // if buckets are all positive/negative, we need to remove the width of the zero bucket + if (minPositive === 0 || maxNegative === 0) { + adjust = defaultExpBucketWidth; + } + bucketWidth = (expBucketWidth / (xWidthTotal - adjust)) * 100 + "%"; + if (left < 0) { + // negative buckets boundary + bucketLeft = + (-(Math.log(Math.abs(left)) + startNegative) / + (xWidthTotal - adjust)) * + 100 + + "%"; + } else { + // positive buckets boundary + bucketLeft = + ((Math.log(left) - + startPositive + + defaultExpBucketWidth + + xWidthNegative - + adjust) / + (xWidthTotal - adjust)) * + 100 + + "%"; + } + if (left < 0 && right > 0) { + // if the bucket crosses the zero axis + bucketLeft = (xWidthNegative / xWidthTotal) * 100 + "%"; + } + if (left === 0 && right === 0) { + // do not render zero width zero bucket + bucketLeft = "0%"; + bucketWidth = "0%"; + } + + bucketHeight = (count / countMax) * 100 + "%"; + break; + } + default: + throw new Error("Invalid scale"); + } + + return ( + +
+
+
+
+ ); + })} +
+ ); +}; + +export default HistogramChart; diff --git a/web/ui/mantine-ui/src/pages/query/HistogramHelpers.ts b/web/ui/mantine-ui/src/pages/query/HistogramHelpers.ts new file mode 100644 index 0000000000..12dd3e1bd2 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/HistogramHelpers.ts @@ -0,0 +1,144 @@ +// Calculates a default width of exponential histogram bucket ranges. If the last bucket is [0, 0], +// the width is calculated using the second to last bucket. returns error if the last bucket is [-0, 0], +export function calculateDefaultExpBucketWidth( + last: [number, string, string, string], + buckets: [number, string, string, string][] +): number { + if (parseFloat(last[2]) === 0 || parseFloat(last[1]) === 0) { + if (buckets.length > 1) { + return Math.abs( + Math.log(Math.abs(parseFloat(buckets[buckets.length - 2][2]))) - + Math.log(Math.abs(parseFloat(buckets[buckets.length - 2][1]))) + ); + } else { + throw new Error( + "Only one bucket in histogram ([-0, 0]). Cannot calculate defaultExpBucketWidth." + ); + } + } else { + return Math.abs( + Math.log(Math.abs(parseFloat(last[2]))) - + Math.log(Math.abs(parseFloat(last[1]))) + ); + } +} + +// Finds the lowest positive value from the bucket ranges +// Returns 0 if no positive values are found or if there are no buckets. +export function findMinPositive(buckets: [number, string, string, string][]) { + if (!buckets || buckets.length === 0) { + return 0; // no buckets + } + for (let i = 0; i < buckets.length; i++) { + const right = parseFloat(buckets[i][2]); + const left = parseFloat(buckets[i][1]); + + if (left > 0) { + return left; + } + if (left < 0 && right > 0) { + return right; + } + if (i === buckets.length - 1) { + if (right > 0) { + return right; + } + } + } + return 0; // all buckets are negative +} + +// Finds the lowest negative value from the bucket ranges +// Returns 0 if no negative values are found or if there are no buckets. +export function findMaxNegative(buckets: [number, string, string, string][]) { + if (!buckets || buckets.length === 0) { + return 0; // no buckets + } + for (let i = 0; i < buckets.length; i++) { + const right = parseFloat(buckets[i][2]); + const left = parseFloat(buckets[i][1]); + const prevRight = i > 0 ? parseFloat(buckets[i - 1][2]) : 0; + + if (right >= 0) { + if (i === 0) { + if (left < 0) { + return left; // return the first negative bucket + } + return 0; // all buckets are positive + } + return prevRight; // return the last negative bucket + } + } + console.log("findmaxneg returning: ", buckets[buckets.length - 1][2]); + return parseFloat(buckets[buckets.length - 1][2]); // all buckets are negative +} + +// Calculates the left position of the zero axis as a percentage string. +export function findZeroAxisLeft( + scale: string, + rangeMin: number, + rangeMax: number, + minPositive: number, + maxNegative: number, + zeroBucketIdx: number, + widthNegative: number, + widthTotal: number, + expBucketWidth: number +): string { + if (scale === "linear") { + return ((0 - rangeMin) / (rangeMax - rangeMin)) * 100 + "%"; + } else { + if (maxNegative === 0) { + return "0%"; + } + if (minPositive === 0) { + return "100%"; + } + if (zeroBucketIdx === -1) { + // if there is no zero bucket, we must zero axis between buckets around zero + return (widthNegative / widthTotal) * 100 + "%"; + } + if ((widthNegative + 0.5 * expBucketWidth) / widthTotal > 0) { + return ((widthNegative + 0.5 * expBucketWidth) / widthTotal) * 100 + "%"; + } else { + return "0%"; + } + } +} + +// Determines if the zero axis should be shown such that the zero label does not overlap with the range labels. +// The zero axis is shown if it is between 5% and 95% of the graph. +export function showZeroAxis(zeroAxisLeft: string) { + const axisNumber = parseFloat(zeroAxisLeft.slice(0, -1)); + if (5 < axisNumber && axisNumber < 95) { + return true; + } + return false; +} + +// Finds the index of the bucket whose range includes zero +export function findZeroBucket( + buckets: [number, string, string, string][] +): number { + for (let i = 0; i < buckets.length; i++) { + const left = parseFloat(buckets[i][1]); + const right = parseFloat(buckets[i][2]); + if (left <= 0 && right >= 0) { + return i; + } + } + return -1; +} + +const leftDelim = (br: number): string => (br === 3 || br === 1 ? "[" : "("); +const rightDelim = (br: number): string => (br === 3 || br === 0 ? "]" : ")"); + +export const bucketRangeString = ([ + boundaryRule, + leftBoundary, + rightBoundary, + + _, +]: [number, string, string, string]): string => { + return `${leftDelim(boundaryRule)}${leftBoundary} -> ${rightBoundary}${rightDelim(boundaryRule)}`; +}; diff --git a/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.module.css b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.module.css new file mode 100644 index 0000000000..cd058e6134 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.module.css @@ -0,0 +1,18 @@ +.labelValue { + cursor: pointer; +} + +.labelValue:hover { + background-color: light-dark( + var(--mantine-color-gray-2), + var(--mantine-color-gray-8) + ); + border-radius: var(--mantine-radius-sm); +} + +.promqlPill { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-dark-5) + ); +} diff --git a/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.tsx b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.tsx new file mode 100644 index 0000000000..d18c017b18 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.tsx @@ -0,0 +1,423 @@ +import { FC, useMemo, useState } from "react"; +import { + LabelMatcher, + matchType, + nodeType, + VectorSelector, +} from "../../../promql/ast"; +import { + Alert, + Anchor, + Autocomplete, + Box, + Button, + CopyButton, + Group, + List, + Pill, + Text, + SegmentedControl, + Select, + Skeleton, + Stack, + Table, + rem, +} from "@mantine/core"; +import { escapeString } from "../../../lib/escapeString"; +import serializeNode from "../../../promql/serialize"; +import { SeriesResult } from "../../../api/responseTypes/series"; +import { useAPIQuery } from "../../../api/api"; +import { Metric } from "../../../api/responseTypes/query"; +import { + IconAlertTriangle, + IconArrowLeft, + IconCheck, + IconCodePlus, + IconCopy, + IconX, +} from "@tabler/icons-react"; +import { formatNode } from "../../../promql/format"; +import classes from "./LabelsExplorer.module.css"; +import { buttonIconStyle } from "../../../styles"; + +type LabelsExplorerProps = { + metricName: string; + insertText: (_text: string) => void; + hideLabelsExplorer: () => void; +}; + +const LabelsExplorer: FC = ({ + metricName, + insertText, + hideLabelsExplorer, +}) => { + const [expandedLabels, setExpandedLabels] = useState([]); + const [matchers, setMatchers] = useState([]); + const [newMatcher, setNewMatcher] = useState(null); + const [sortByCard, setSortByCard] = useState(true); + + const removeMatcher = (name: string) => { + setMatchers(matchers.filter((m) => m.name !== name)); + }; + + const addMatcher = () => { + if (newMatcher === null) { + throw new Error("tried to add null label matcher"); + } + + setMatchers([...matchers, newMatcher]); + setNewMatcher(null); + }; + + const matcherBadge = (m: LabelMatcher) => ( + { + removeMatcher(m.name); + }} + className={classes.promqlPill} + > + + {m.name} + {m.type} + "{escapeString(m.value)}" + + + ); + + const selector: VectorSelector = { + type: nodeType.vectorSelector, + name: metricName, + matchers, + offset: 0, + timestamp: null, + startOrEnd: null, + }; + + // Based on the selected pool (if any), load the list of targets. + const { data, error, isLoading } = useAPIQuery({ + path: `/series`, + params: { + "match[]": serializeNode(selector), + }, + }); + + // When new series data is loaded, update the corresponding label cardinality and example data. + const [numSeries, sortedLabelCards, labelExamples] = useMemo(() => { + const labelCardinalities: Record = {}; + const labelExamples: Record = + {}; + + const labelValuesByName: Record> = {}; + + if (data !== undefined) { + data.data.forEach((series: Metric) => { + Object.entries(series).forEach(([ln, lv]) => { + if (ln !== "__name__") { + if (!(ln in labelValuesByName)) { + labelValuesByName[ln] = { [lv]: 1 }; + } else { + if (!(lv in labelValuesByName[ln])) { + labelValuesByName[ln][lv] = 1; + } else { + labelValuesByName[ln][lv]++; + } + } + } + }); + }); + + Object.entries(labelValuesByName).forEach(([ln, lvs]) => { + labelCardinalities[ln] = Object.keys(lvs).length; + // labelExamples[ln] = Array.from({ length: Math.min(5, lvs.size) }, (i => () => i.next().value)(lvs.keys())); + // Sort label values by their number of occurrences within this label name. + labelExamples[ln] = Object.entries(lvs) + .sort(([, aCnt], [, bCnt]) => bCnt - aCnt) + .map(([lv, cnt]) => ({ value: lv, count: cnt })); + }); + } + + // Sort labels by cardinality if desired, so the labels with the most values are at the top. + const sortedLabelCards = Object.entries(labelCardinalities).sort((a, b) => + sortByCard ? b[1] - a[1] : 0 + ); + + return [data?.data.length, sortedLabelCards, labelExamples]; + }, [data, sortByCard]); + + if (error) { + return ( + } + > + Error: {error.message} + + ); + } + + return ( + + + {/* Selector */} + + + Selector: + + + + + {formatNode(selector, false)} + + + + + + + {({ copied, copy }) => ( + + )} + + + + {/* Filters */} + + + Filters: + + + {matchers.length > 0 ? ( + {matchers.map((m) => matcherBadge(m))} + ) : ( + <>No label filters + )} + + {/* Number of series */} + + + Results: + + <>{numSeries !== undefined ? `${numSeries} series` : "loading..."} + + + {/* Sort order */} + + + + + setSortByCard(value === "cardinality")} + data={[ + { label: "By cardinality", value: "cardinality" }, + { label: "Alphabetic", value: "alphabetic" }, + ]} + /> + + + {/* Labels and their values */} + {isLoading ? ( + + {Array.from(Array(10), (_, i) => ( + + ))} + + ) : ( + + + + Label + Values + + + + {sortedLabelCards.map(([ln, card]) => ( + + +
{ + // Without this, the page gets reloaded for forms that only have a single input field, see + // https://stackoverflow.com/questions/1370021/why-does-forms-with-single-input-field-submit-upon-pressing-enter-key-in-input. + e.preventDefault(); + }} + > + + + {ln} + + {matchers.some((m) => m.name === ln) ? ( + matcherBadge(matchers.find((m) => m.name === ln)!) + ) : newMatcher?.name === ln ? ( + +
+ )} +
+ ); +}; + +export default LabelsExplorer; diff --git a/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.module.css b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.module.css new file mode 100644 index 0000000000..5be156d804 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.module.css @@ -0,0 +1,7 @@ +.typeLabel { + color: light-dark(#008080, #14bfad); +} + +.helpLabel { + color: light-dark(#800000, #ff8585); +} diff --git a/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.tsx b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.tsx new file mode 100644 index 0000000000..f0f176c15c --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.tsx @@ -0,0 +1,185 @@ +import { FC, useMemo, useState } from "react"; +import { useSuspenseAPIQuery } from "../../../api/api"; +import { MetadataResult } from "../../../api/responseTypes/metadata"; +import { ActionIcon, Group, Stack, Table, TextInput } from "@mantine/core"; +import React from "react"; +import { Fuzzy } from "@nexucis/fuzzy"; +import sanitizeHTML from "sanitize-html"; +import { IconCodePlus, IconCopy, IconZoomCode } from "@tabler/icons-react"; +import LabelsExplorer from "./LabelsExplorer"; +import { useDebouncedValue } from "@mantine/hooks"; +import classes from "./MetricsExplorer.module.css"; +import CustomInfiniteScroll from "../../../components/CustomInfiniteScroll"; + +const fuz = new Fuzzy({ + pre: '', + post: "", + shouldSort: true, +}); + +const sanitizeOpts = { + allowedTags: ["b"], + allowedAttributes: { b: ["style"] }, +}; + +type MetricsExplorerProps = { + metricNames: string[]; + insertText: (text: string) => void; + close: () => void; +}; + +const getSearchMatches = (input: string, expressions: string[]) => + fuz.filter(input.replace(/ /g, ""), expressions, { + pre: '', + post: "", + }); + +const MetricsExplorer: FC = ({ + metricNames, + insertText, + close, +}) => { + // Fetch the alerting rules data. + const { data } = useSuspenseAPIQuery({ + path: `/metadata`, + }); + const [selectedMetric, setSelectedMetric] = useState(null); + + const [filterText, setFilterText] = useState(""); + const [debouncedFilterText] = useDebouncedValue(filterText, 250); + + const searchMatches = useMemo(() => { + if (debouncedFilterText === "") { + return metricNames.map((m) => ({ original: m, rendered: m })); + } + return getSearchMatches(debouncedFilterText, metricNames); + }, [debouncedFilterText, metricNames]); + + const getMeta = (m: string) => + data.data[m.replace(/(_count|_sum|_bucket)$/, "")] || [ + { help: "unknown", type: "unknown", unit: "unknown" }, + ]; + + if (selectedMetric !== null) { + return ( + { + insertText(text); + close(); + }} + hideLabelsExplorer={() => setSelectedMetric(null)} + /> + ); + } + + return ( + + ) => + setFilterText(e.target.value) + } + autoFocus + /> + ( + + + + Metric + Type + Help + + + + {items.map((m) => ( + + + + {debouncedFilterText === "" ? ( + m.original + ) : ( +
+ )} + + { + setSelectedMetric(m.original); + }} + > + + + { + insertText(m.original); + close(); + }} + > + + + { + navigator.clipboard.writeText(m.original); + }} + > + + + + + + + {getMeta(m.original).map((meta, idx) => ( + + {meta.type} +
+
+ ))} +
+ + {getMeta(m.original).map((meta, idx) => ( + + {meta.help} +
+
+ ))} +
+ + ))} + +
+ )} + /> +
+ ); +}; + +export default MetricsExplorer; diff --git a/web/ui/mantine-ui/src/pages/query/QueryPage.tsx b/web/ui/mantine-ui/src/pages/query/QueryPage.tsx new file mode 100644 index 0000000000..71c969daf8 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/QueryPage.tsx @@ -0,0 +1,136 @@ +import { Alert, Box, Button, Stack } from "@mantine/core"; +import { + IconAlertCircle, + IconAlertTriangle, + IconPlus, +} from "@tabler/icons-react"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + addPanel, + newDefaultPanel, + setPanels, +} from "../../state/queryPageSlice"; +import Panel from "./QueryPanel"; +import { LabelValuesResult } from "../../api/responseTypes/labelValues"; +import { useAPIQuery } from "../../api/api"; +import { useEffect, useState } from "react"; +import { InstantQueryResult } from "../../api/responseTypes/query"; +import { humanizeDuration } from "../../lib/formatTime"; +import { decodePanelOptionsFromURLParams } from "./urlStateEncoding"; +import { buttonIconStyle } from "../../styles"; + +export default function QueryPage() { + const panels = useAppSelector((state) => state.queryPage.panels); + const dispatch = useAppDispatch(); + const [timeDelta, setTimeDelta] = useState(0); + + // Update the panels whenever the URL params change. + useEffect(() => { + const handleURLChange = () => { + const panels = decodePanelOptionsFromURLParams(window.location.search); + if (panels.length > 0) { + dispatch(setPanels(panels)); + } + }; + + handleURLChange(); + + window.addEventListener("popstate", handleURLChange); + + return () => { + window.removeEventListener("popstate", handleURLChange); + }; + }, [dispatch]); + + // Clear the query page when navigating away from it. + useEffect(() => { + return () => { + dispatch(setPanels([newDefaultPanel()])); + }; + }, [dispatch]); + + const { data: metricNamesResult, error: metricNamesError } = + useAPIQuery({ + path: "/label/__name__/values", + }); + + const { data: timeResult, error: timeError } = + useAPIQuery({ + path: "/query", + params: { + query: "time()", + }, + }); + + useEffect(() => { + if (!timeResult) { + return; + } + + if (timeResult.data.resultType !== "scalar") { + throw new Error("Unexpected result type from time query"); + } + + const browserTime = new Date().getTime() / 1000; + const serverTime = timeResult.data.result[0]; + setTimeDelta(Math.abs(browserTime - serverTime)); + }, [timeResult]); + + return ( + + {metricNamesError && ( + } + color="red" + title="Error fetching metrics list" + > + Unable to fetch list of metric names: {metricNamesError.message} + + )} + {timeError && ( + } + color="red" + title="Error fetching server time" + > + {timeError.message} + + )} + {timeDelta > 30 && ( + } + onClose={() => setTimeDelta(0)} + > + Detected a time difference of{" "} + {humanizeDuration(timeDelta * 1000)} between your + browser and the server. You may see unexpected time-shifted query + results due to the time drift. + + )} + + + {panels.map((p, idx) => ( + + ))} + + + + + ); +} diff --git a/web/ui/mantine-ui/src/pages/query/QueryPanel.module.css b/web/ui/mantine-ui/src/pages/query/QueryPanel.module.css new file mode 100644 index 0000000000..c42dfcb0ec --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/QueryPanel.module.css @@ -0,0 +1,14 @@ +.input { + font-family: "DejaVu Sans Mono"; + padding-top: 7px; + transition: none; + + &:focus-within { + outline: rem(2px) solid var(--mantine-color-blue-filled); + border-color: transparent; + } + + &:placeholder-shown { + font-family: unset; + } +} diff --git a/web/ui/mantine-ui/src/pages/query/QueryPanel.tsx b/web/ui/mantine-ui/src/pages/query/QueryPanel.tsx new file mode 100644 index 0000000000..24eb8c59cb --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/QueryPanel.tsx @@ -0,0 +1,337 @@ +import { + Group, + Tabs, + Center, + Space, + Box, + SegmentedControl, + Stack, + Skeleton, +} from "@mantine/core"; +import { + IconChartAreaFilled, + IconChartLine, + IconGraph, + IconInfoCircle, + IconTable, +} from "@tabler/icons-react"; +import { FC, Suspense, useCallback, useMemo, useState } from "react"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + addQueryToHistory, + GraphDisplayMode, + GraphResolution, + removePanel, + setExpr, + setShowTree, + setVisualizer, +} from "../../state/queryPageSlice"; +import TimeInput from "./TimeInput"; +import RangeInput from "./RangeInput"; +import ExpressionInput from "./ExpressionInput"; +import Graph from "./Graph"; +import ResolutionInput from "./ResolutionInput"; +import TableTab from "./TableTab"; +import TreeView from "./TreeView"; +import ErrorBoundary from "../../components/ErrorBoundary"; +import ASTNode from "../../promql/ast"; +import serializeNode from "../../promql/serialize"; +import ExplainView from "./ExplainViews/ExplainView"; + +export interface PanelProps { + idx: number; + metricNames: string[]; +} + +// TODO: This is duplicated everywhere, unify it. +const iconStyle = { width: "0.9rem", height: "0.9rem" }; + +const QueryPanel: FC = ({ idx, metricNames }) => { + // Used to indicate to the selected display component that it should retrigger + // the query, even if the expression has not changed (e.g. when the user presses + // the "Execute" button or hits again). + const [retriggerIdx, setRetriggerIdx] = useState(0); + + const panel = useAppSelector((state) => state.queryPage.panels[idx]); + const dispatch = useAppDispatch(); + + const [selectedNode, setSelectedNode] = useState<{ + id: string; + node: ASTNode; + } | null>(null); + + const expr = useMemo( + () => + selectedNode !== null ? serializeNode(selectedNode.node) : panel.expr, + [selectedNode, panel.expr] + ); + + const onSelectRange = useCallback( + (start: number, end: number) => + dispatch( + setVisualizer({ + idx, + visualizer: { + ...panel.visualizer, + range: (end - start) * 1000, + endTime: end * 1000, + }, + }) + ), + // TODO: How to have panel.visualizer in the dependencies, but not re-create + // the callback every time it changes by the callback's own update? This leads + // to extra renders of the plot further down. + [dispatch, idx, panel.visualizer] + ); + + return ( + + { + setRetriggerIdx((idx) => idx + 1); + dispatch(setExpr({ idx, expr })); + + if (!metricNames.includes(expr) && expr.trim() !== "") { + dispatch(addQueryToHistory(expr)); + } + }} + treeShown={panel.showTree} + setShowTree={(showTree: boolean) => { + dispatch(setShowTree({ idx, showTree })); + if (!showTree) { + setSelectedNode(null); + } + }} + removePanel={() => { + dispatch(removePanel(idx)); + }} + /> + {panel.expr.trim() !== "" && panel.showTree && ( + + + {Array.from(Array(20), (_, i) => ( + + ))} + + } + > + { + dispatch(setShowTree({ idx, showTree: false })); + setSelectedNode(null); + }} + /> + + + )} + + dispatch( + setVisualizer({ + idx, + visualizer: { + ...panel.visualizer, + activeTab: v as "table" | "graph", + }, + }) + ) + } + keepMounted={false} + > + + }> + Table + + }> + Graph + + } + > + Explain + + + + + + + + + + dispatch( + setVisualizer({ + idx, + visualizer: { ...panel.visualizer, range }, + }) + ) + } + /> + + dispatch( + setVisualizer({ + idx, + visualizer: { ...panel.visualizer, endTime: time }, + }) + ) + } + /> + { + dispatch( + setVisualizer({ + idx, + visualizer: { + ...panel.visualizer, + resolution: res, + }, + }) + ); + }} + /> + + + + {/* */} + + + dispatch( + setVisualizer({ + idx, + visualizer: { + ...panel.visualizer, + displayMode: value as GraphDisplayMode, + }, + }) + ) + } + value={panel.visualizer.displayMode} + data={[ + { + value: GraphDisplayMode.Lines, + label: ( +
+ + Unstacked +
+ ), + }, + { + value: GraphDisplayMode.Stacked, + label: ( +
+ + Stacked +
+ ), + }, + // { + // value: GraphDisplayMode.Heatmap, + // label: ( + //
+ // + // Heatmap + //
+ // ), + // }, + ]} + /> +
+
+ + +
+ + + + {Array.from(Array(20), (_, i) => ( + + ))} + + } + > + { + dispatch(setShowTree({ idx, showTree: true })); + }} + /> + + + +
+
+ ); +}; + +export default QueryPanel; diff --git a/web/ui/mantine-ui/src/pages/query/RangeInput.tsx b/web/ui/mantine-ui/src/pages/query/RangeInput.tsx new file mode 100644 index 0000000000..ec70cff969 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/RangeInput.tsx @@ -0,0 +1,120 @@ +import { FC, useEffect, useState } from "react"; +import { ActionIcon, Group, TextInput } from "@mantine/core"; +import { IconMinus, IconPlus } from "@tabler/icons-react"; +import { + formatPrometheusDuration, + parsePrometheusDuration, +} from "../../lib/formatTime"; + +interface RangeInputProps { + range: number; + onChangeRange: (range: number) => void; +} + +const iconStyle = { width: "0.9rem", height: "0.9rem" }; + +const rangeSteps = [ + 1, + 10, + 60, + 5 * 60, + 15 * 60, + 30 * 60, + 60 * 60, + 2 * 60 * 60, + 6 * 60 * 60, + 12 * 60 * 60, + 24 * 60 * 60, + 48 * 60 * 60, + 7 * 24 * 60 * 60, + 14 * 24 * 60 * 60, + 28 * 24 * 60 * 60, + 56 * 24 * 60 * 60, + 112 * 24 * 60 * 60, + 182 * 24 * 60 * 60, + 365 * 24 * 60 * 60, + 730 * 24 * 60 * 60, +].map((s) => s * 1000); + +const RangeInput: FC = ({ range, onChangeRange }) => { + // TODO: Make sure that when "range" changes externally (like via the URL), + // the input is updated, either via useEffect() or some better architecture. + const [rangeInput, setRangeInput] = useState( + formatPrometheusDuration(range) + ); + + useEffect(() => { + setRangeInput(formatPrometheusDuration(range)); + }, [range]); + + const onChangeRangeInput = (rangeText: string): void => { + const newRange = parsePrometheusDuration(rangeText); + if (newRange === null) { + setRangeInput(formatPrometheusDuration(range)); + } else { + onChangeRange(newRange); + } + }; + + const increaseRange = (): void => { + for (const step of rangeSteps) { + if (range < step) { + setRangeInput(formatPrometheusDuration(step)); + onChangeRange(step); + return; + } + } + }; + + const decreaseRange = (): void => { + for (const step of rangeSteps.slice().reverse()) { + if (range > step) { + setRangeInput(formatPrometheusDuration(step)); + onChangeRange(step); + return; + } + } + }; + + return ( + + setRangeInput(event.currentTarget.value)} + onBlur={() => onChangeRangeInput(rangeInput)} + onKeyDown={(event) => + event.key === "Enter" && onChangeRangeInput(rangeInput) + } + aria-label="Range" + style={{ width: `calc(44px + ${rangeInput.length + 3}ch)` }} + leftSection={ + + + + } + rightSection={ + + + + } + leftSectionPointerEvents="all" + rightSectionPointerEvents="all" + /> + + ); +}; + +export default RangeInput; diff --git a/web/ui/mantine-ui/src/pages/query/ResolutionInput.tsx b/web/ui/mantine-ui/src/pages/query/ResolutionInput.tsx new file mode 100644 index 0000000000..8d19deb197 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ResolutionInput.tsx @@ -0,0 +1,142 @@ +import { FC, useState } from "react"; +import { Select, TextInput } from "@mantine/core"; +import { + formatPrometheusDuration, + parsePrometheusDuration, +} from "../../lib/formatTime"; +import { + GraphResolution, + getEffectiveResolution, +} from "../../state/queryPageSlice"; + +interface ResolutionInputProps { + resolution: GraphResolution; + range: number; + onChangeResolution: (resolution: GraphResolution) => void; +} + +const ResolutionInput: FC = ({ + resolution, + range, + onChangeResolution, +}) => { + const [customResolutionInput, setCustomResolutionInput] = useState( + formatPrometheusDuration(getEffectiveResolution(resolution, range)) + ); + + const onChangeCustomResolutionInput = (resText: string): void => { + const newResolution = parsePrometheusDuration(resText); + + if (resolution.type === "custom" && newResolution === resolution.step) { + // Nothing changed. + return; + } + + if (newResolution === null) { + setCustomResolutionInput( + formatPrometheusDuration(getEffectiveResolution(resolution, range)) + ); + } else { + onChangeResolution({ type: "custom", step: newResolution }); + } + }; + + return ( + <> + { + setScrapePool(value); + showLimitAlert && dispatch(setShowLimitAlert(false)); + }} + searchable + /> + + o === "active" ? badgeClasses.healthOk : badgeClasses.healthInfo + } + placeholder="Filter by state" + values={(stateFilter?.filter((v) => v !== null) as string[]) || []} + onChange={(values) => setStateFilter(values)} + /> + } + placeholder="Filter by labels" + value={searchFilter || ""} + onChange={(event) => setSearchFilter(event.currentTarget.value)} + > + 0 + ? "Expand all pools" + : "Collapse all pools" + } + variant="light" + onClick={() => + dispatch( + setCollapsedPools(collapsedPools.length > 0 ? [] : scrapePools) + ) + } + > + {collapsedPools.length > 0 ? ( + + ) : ( + + )} + + + + + + {Array.from(Array(10), (_, i) => ( + + ))} + + } + > + + + + + ); +} diff --git a/web/ui/mantine-ui/src/pages/service-discovery/ServiceDiscoveryPoolsList.tsx b/web/ui/mantine-ui/src/pages/service-discovery/ServiceDiscoveryPoolsList.tsx new file mode 100644 index 0000000000..798783da2f --- /dev/null +++ b/web/ui/mantine-ui/src/pages/service-discovery/ServiceDiscoveryPoolsList.tsx @@ -0,0 +1,356 @@ +import { + Accordion, + Alert, + Anchor, + Group, + RingProgress, + Stack, + Table, + Text, +} from "@mantine/core"; +import { KVSearch } from "@nexucis/kvsearch"; +import { IconInfoCircle } from "@tabler/icons-react"; +import { useSuspenseAPIQuery } from "../../api/api"; +import { + DroppedTarget, + Labels, + Target, + TargetsResult, +} from "../../api/responseTypes/targets"; +import { FC, useMemo } from "react"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + setCollapsedPools, + setShowLimitAlert, +} from "../../state/serviceDiscoveryPageSlice"; +import CustomInfiniteScroll from "../../components/CustomInfiniteScroll"; + +import { useDebouncedValue } from "@mantine/hooks"; +import { targetPoolDisplayLimit } from "./ServiceDiscoveryPage"; +import { BooleanParam, useQueryParam, withDefault } from "use-query-params"; +import { LabelBadges } from "../../components/LabelBadges"; + +type TargetLabels = { + discoveredLabels: Labels; + labels: Labels; + isDropped: boolean; +}; + +type ScrapePool = { + targets: TargetLabels[]; + active: number; + total: number; +}; + +type ScrapePools = { + [scrapePool: string]: ScrapePool; +}; + +const activeTargetKVSearch = new KVSearch({ + shouldSort: true, + indexedKeys: [ + "labels", + "discoveredLabels", + ["discoveredLabels", /.*/], + ["labels", /.*/], + ], +}); + +const droppedTargetKVSearch = new KVSearch({ + shouldSort: true, + indexedKeys: ["discoveredLabels", ["discoveredLabels", /.*/]], +}); + +const buildPoolsData = ( + poolNames: string[], + activeTargets: Target[], + droppedTargets: DroppedTarget[], + search: string, + stateFilter: (string | null)[] +): ScrapePools => { + const pools: ScrapePools = {}; + + for (const pn of poolNames) { + pools[pn] = { + targets: [], + active: 0, + total: 0, + }; + } + + for (const target of activeTargets) { + const pool = pools[target.scrapePool]; + if (!pool) { + // TODO: Should we do better here? + throw new Error( + "Received target information for an unknown scrape pool, likely the list of scrape pools has changed. Please reload the page." + ); + } + + pool.active++; + pool.total++; + } + + const filteredActiveTargets = + stateFilter.length !== 0 && !stateFilter.includes("active") + ? [] + : search === "" + ? activeTargets + : activeTargetKVSearch + .filter(search, activeTargets) + .map((value) => value.original); + + for (const target of filteredActiveTargets) { + pools[target.scrapePool].targets.push({ + discoveredLabels: target.discoveredLabels, + labels: target.labels, + isDropped: false, + }); + } + + for (const target of droppedTargets) { + const { job: poolName } = target.discoveredLabels; + const pool = pools[poolName]; + if (!pool) { + // TODO: Should we do better here? + throw new Error( + "Received target information for an unknown scrape pool, likely the list of scrape pools has changed. Please reload the page." + ); + } + + pool.total++; + } + + const filteredDroppedTargets = + stateFilter.length !== 0 && !stateFilter.includes("dropped") + ? [] + : search === "" + ? droppedTargets + : droppedTargetKVSearch + .filter(search, droppedTargets) + .map((value) => value.original); + + for (const target of filteredDroppedTargets) { + pools[target.discoveredLabels.job].targets.push({ + discoveredLabels: target.discoveredLabels, + isDropped: true, + labels: {}, + }); + } + + return pools; +}; + +type ScrapePoolListProp = { + poolNames: string[]; + selectedPool: string | null; + stateFilter: string[]; + searchFilter: string; +}; + +const ScrapePoolList: FC = ({ + poolNames, + selectedPool, + stateFilter, + searchFilter, +}) => { + const dispatch = useAppDispatch(); + const [showEmptyPools, setShowEmptyPools] = useQueryParam( + "showEmptyPools", + withDefault(BooleanParam, true) + ); + + // Based on the selected pool (if any), load the list of targets. + const { + data: { + data: { activeTargets, droppedTargets }, + }, + } = useSuspenseAPIQuery({ + path: `/targets`, + params: { + scrapePool: selectedPool === null ? "" : selectedPool, + }, + }); + + const { collapsedPools, showLimitAlert } = useAppSelector( + (state) => state.serviceDiscoveryPage + ); + + const [debouncedSearch] = useDebouncedValue(searchFilter, 250); + + const allPools = useMemo( + () => + buildPoolsData( + selectedPool ? [selectedPool] : poolNames, + activeTargets, + droppedTargets, + debouncedSearch, + stateFilter + ), + [ + selectedPool, + poolNames, + activeTargets, + droppedTargets, + debouncedSearch, + stateFilter, + ] + ); + const allPoolNames = Object.keys(allPools); + const shownPoolNames = showEmptyPools + ? allPoolNames + : allPoolNames.filter((pn) => allPools[pn].targets.length !== 0); + + return ( + + {allPoolNames.length === 0 ? ( + }> + No scrape pools found. + + ) : ( + !showEmptyPools && + allPoolNames.length !== shownPoolNames.length && ( + } + > + Hiding {allPoolNames.length - shownPoolNames.length} empty pools due + to filters or no targets. + setShowEmptyPools(true)}> + Show empty pools + + + ) + )} + {showLimitAlert && ( + } + withCloseButton + onClose={() => dispatch(setShowLimitAlert(false))} + > + There are more than {targetPoolDisplayLimit} scrape pools. Showing + only the first one. Use the dropdown to select a different pool. + + )} + !collapsedPools.includes(p))} + onChange={(value) => + dispatch( + setCollapsedPools(allPoolNames.filter((p) => !value.includes(p))) + ) + } + > + {shownPoolNames.map((poolName) => { + const pool = allPools[poolName]; + return ( + + + + {poolName} + + + {pool.active} / {pool.total} + + + + + + + {pool.total === 0 ? ( + }> + No targets in this scrape pool. + setShowEmptyPools(false)} + > + Hide empty pools + + + ) : pool.targets.length === 0 ? ( + }> + No targets in this pool match your filter criteria (omitted{" "} + {pool.total} filtered targets). + setShowEmptyPools(false)} + > + Hide empty pools + + + ) : ( + ( + + + + Discovered labels + Target labels + + + + {items.map((target, i) => ( + // TODO: Find a stable and definitely unique key. + + + + + + {target.isDropped ? ( + + dropped due to relabeling rules + + ) : ( + + )} + + + ))} + +
+ )} + /> + )} +
+
+ ); + })} +
+
+ ); +}; + +export default ScrapePoolList; diff --git a/web/ui/mantine-ui/src/pages/targets/ScrapePoolsList.tsx b/web/ui/mantine-ui/src/pages/targets/ScrapePoolsList.tsx new file mode 100644 index 0000000000..be91b149bb --- /dev/null +++ b/web/ui/mantine-ui/src/pages/targets/ScrapePoolsList.tsx @@ -0,0 +1,422 @@ +import { + Accordion, + Alert, + Anchor, + Badge, + Group, + RingProgress, + Stack, + Table, + Text, + Tooltip, +} from "@mantine/core"; +import { KVSearch } from "@nexucis/kvsearch"; +import { + IconAlertTriangle, + IconHourglass, + IconInfoCircle, + IconRefresh, +} from "@tabler/icons-react"; +import { useSuspenseAPIQuery } from "../../api/api"; +import { Target, TargetsResult } from "../../api/responseTypes/targets"; +import React, { FC, useMemo } from "react"; +import { + humanizeDurationRelative, + humanizeDuration, + now, +} from "../../lib/formatTime"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + setCollapsedPools, + setShowLimitAlert, +} from "../../state/targetsPageSlice"; +import EndpointLink from "../../components/EndpointLink"; +import CustomInfiniteScroll from "../../components/CustomInfiniteScroll"; + +import badgeClasses from "../../Badge.module.css"; +import panelClasses from "../../Panel.module.css"; +import TargetLabels from "./TargetLabels"; +import { useDebouncedValue } from "@mantine/hooks"; +import { targetPoolDisplayLimit } from "./TargetsPage"; +import { BooleanParam, useQueryParam, withDefault } from "use-query-params"; +import { badgeIconStyle } from "../../styles"; + +type ScrapePool = { + targets: Target[]; + count: number; + upCount: number; + downCount: number; + unknownCount: number; +}; + +type ScrapePools = { + [scrapePool: string]: ScrapePool; +}; + +const poolPanelHealthClass = (pool: ScrapePool) => + pool.count > 1 && pool.downCount === pool.count + ? panelClasses.panelHealthErr + : pool.downCount >= 1 + ? panelClasses.panelHealthWarn + : pool.unknownCount === pool.count + ? panelClasses.panelHealthUnknown + : panelClasses.panelHealthOk; + +const healthBadgeClass = (state: string) => { + switch (state.toLowerCase()) { + case "up": + return badgeClasses.healthOk; + case "down": + return badgeClasses.healthErr; + case "unknown": + return badgeClasses.healthUnknown; + default: + // Should never happen. + return badgeClasses.healthWarn; + } +}; + +const kvSearch = new KVSearch({ + shouldSort: true, + indexedKeys: ["labels", "scrapePool", ["labels", /.*/]], +}); + +const buildPoolsData = ( + poolNames: string[], + targets: Target[], + search: string, + healthFilter: (string | null)[] +): ScrapePools => { + const pools: ScrapePools = {}; + + for (const pn of poolNames) { + pools[pn] = { + targets: [], + count: 0, + upCount: 0, + downCount: 0, + unknownCount: 0, + }; + } + + for (const target of targets) { + if (!pools[target.scrapePool]) { + // TODO: Should we do better here? + throw new Error( + "Received target information for an unknown scrape pool, likely the list of scrape pools has changed. Please reload the page." + ); + } + + pools[target.scrapePool].count++; + + switch (target.health.toLowerCase()) { + case "up": + pools[target.scrapePool].upCount++; + break; + case "down": + pools[target.scrapePool].downCount++; + break; + case "unknown": + pools[target.scrapePool].unknownCount++; + break; + } + } + + const filteredTargets: Target[] = ( + search === "" + ? targets + : kvSearch.filter(search, targets).map((value) => value.original) + ).filter( + (target) => + healthFilter.length === 0 || healthFilter.includes(target.health) + ); + + for (const target of filteredTargets) { + pools[target.scrapePool].targets.push(target); + } + + return pools; +}; + +type ScrapePoolListProp = { + poolNames: string[]; + selectedPool: string | null; + healthFilter: string[]; + searchFilter: string; +}; + +const ScrapePoolList: FC = ({ + poolNames, + selectedPool, + healthFilter, + searchFilter, +}) => { + // Based on the selected pool (if any), load the list of targets. + const { + data: { + data: { activeTargets }, + }, + } = useSuspenseAPIQuery({ + path: `/targets`, + params: { + state: "active", + scrapePool: selectedPool === null ? "" : selectedPool, + }, + }); + + const dispatch = useAppDispatch(); + const [showEmptyPools, setShowEmptyPools] = useQueryParam( + "showEmptyPools", + withDefault(BooleanParam, true) + ); + + const { collapsedPools, showLimitAlert } = useAppSelector( + (state) => state.targetsPage + ); + + const [debouncedSearch] = useDebouncedValue(searchFilter.trim(), 250); + + const allPools = useMemo( + () => + buildPoolsData( + selectedPool ? [selectedPool] : poolNames, + activeTargets, + debouncedSearch, + healthFilter + ), + [selectedPool, poolNames, activeTargets, debouncedSearch, healthFilter] + ); + + const allPoolNames = Object.keys(allPools); + const shownPoolNames = showEmptyPools + ? allPoolNames + : allPoolNames.filter((pn) => allPools[pn].targets.length !== 0); + + return ( + + {allPoolNames.length === 0 ? ( + }> + No scrape pools found. + + ) : ( + !showEmptyPools && + allPoolNames.length !== shownPoolNames.length && ( + } + > + Hiding {allPoolNames.length - shownPoolNames.length} empty pools due + to filters or no targets. + setShowEmptyPools(true)}> + Show empty pools + + + ) + )} + {showLimitAlert && ( + } + withCloseButton + onClose={() => dispatch(setShowLimitAlert(false))} + > + There are more than {targetPoolDisplayLimit} scrape pools. Showing + only the first one. Use the dropdown to select a different pool. + + )} + !collapsedPools.includes(p))} + onChange={(value) => + dispatch( + setCollapsedPools(allPoolNames.filter((p) => !value.includes(p))) + ) + } + > + {shownPoolNames.map((poolName) => { + const pool = allPools[poolName]; + return ( + + + + {poolName} + + + {pool.upCount} / {pool.count} up + + + + + + + {pool.count === 0 ? ( + }> + No active targets in this scrape pool. + setShowEmptyPools(false)} + > + Hide empty pools + + + ) : pool.targets.length === 0 ? ( + }> + No targets in this pool match your filter criteria (omitted{" "} + {pool.count} filtered targets). + setShowEmptyPools(false)} + > + Hide empty pools + + + ) : ( + ( + + + + Endpoint + Labels + Last scrape + State + + + + {items.map((target, i) => ( + // TODO: Find a stable and definitely unique key. + + + + + + + + + + + + + + } + > + {humanizeDurationRelative( + target.lastScrape, + now() + )} + + + + + + } + > + {humanizeDuration( + target.lastScrapeDuration * 1000 + )} + + + + + + + {target.health} + + + + {target.lastError && ( + + + } + > + Error scraping target:{" "} + {target.lastError} + + + + )} + + ))} + +
+ )} + /> + )} +
+
+ ); + })} +
+
+ ); +}; + +export default ScrapePoolList; diff --git a/web/ui/mantine-ui/src/pages/targets/TargetLabels.tsx b/web/ui/mantine-ui/src/pages/targets/TargetLabels.tsx new file mode 100644 index 0000000000..472eded0f3 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/targets/TargetLabels.tsx @@ -0,0 +1,47 @@ +import { FC } from "react"; +import { Labels } from "../../api/responseTypes/targets"; +import { LabelBadges } from "../../components/LabelBadges"; +import { ActionIcon, Collapse, Group, Stack, Text } from "@mantine/core"; +import { useDisclosure } from "@mantine/hooks"; +import { IconChevronDown, IconChevronUp } from "@tabler/icons-react"; +import { actionIconStyle } from "../../styles"; + +type TargetLabelsProps = { + labels: Labels; + discoveredLabels: Labels; +}; + +const TargetLabels: FC = ({ discoveredLabels, labels }) => { + const [showDiscovered, { toggle: toggleDiscovered }] = useDisclosure(false); + + return ( + + + + + + {showDiscovered ? ( + + ) : ( + + )} + + + + + + Discovered labels: + + + + + ); +}; + +export default TargetLabels; diff --git a/web/ui/mantine-ui/src/pages/targets/TargetsPage.tsx b/web/ui/mantine-ui/src/pages/targets/TargetsPage.tsx new file mode 100644 index 0000000000..60dae60541 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/targets/TargetsPage.tsx @@ -0,0 +1,154 @@ +import { + ActionIcon, + Box, + Group, + Select, + Skeleton, + TextInput, +} from "@mantine/core"; +import { + IconLayoutNavbarCollapse, + IconLayoutNavbarExpand, + IconSearch, +} from "@tabler/icons-react"; +import { StateMultiSelect } from "../../components/StateMultiSelect"; +import { Suspense } from "react"; +import badgeClasses from "../../Badge.module.css"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + setCollapsedPools, + setShowLimitAlert, +} from "../../state/targetsPageSlice"; +import { + ArrayParam, + StringParam, + useQueryParam, + withDefault, +} from "use-query-params"; +import ErrorBoundary from "../../components/ErrorBoundary"; +import ScrapePoolList from "./ScrapePoolsList"; +import { useSuspenseAPIQuery } from "../../api/api"; +import { ScrapePoolsResult } from "../../api/responseTypes/scrapePools"; +import { expandIconStyle, inputIconStyle } from "../../styles"; + +export const targetPoolDisplayLimit = 20; + +export default function TargetsPage() { + // Load the list of all available scrape pools. + const { + data: { + data: { scrapePools }, + }, + } = useSuspenseAPIQuery({ + path: `/scrape_pools`, + }); + + const dispatch = useAppDispatch(); + + const [scrapePool, setScrapePool] = useQueryParam("pool", StringParam); + const [healthFilter, setHealthFilter] = useQueryParam( + "health", + withDefault(ArrayParam, []) + ); + const [searchFilter, setSearchFilter] = useQueryParam( + "search", + withDefault(StringParam, "") + ); + + const { collapsedPools, showLimitAlert } = useAppSelector( + (state) => state.targetsPage + ); + + // When we have more than X targets, we want to limit the display by selecting the first + // scrape pool and reflecting that in the URL as well. We also want to show an alert + // about the fact that we are limiting the display, but the tricky bit is that this + // alert should only be shown once, upon the first "redirect" that causes the limiting, + // not again when the page is reloaded with the same URL parameters. That's why we remember + // `showLimitAlert` in Redux (just useState() doesn't work properly, because the component + // for some Suspense-related reasons seems to be mounted/unmounted multiple times, so the + // state cell would get initialized multiple times as well). + const limited = + scrapePools.length > targetPoolDisplayLimit && scrapePool === undefined; + if (limited) { + setScrapePool(scrapePools[0]); + dispatch(setShowLimitAlert(true)); + } + + return ( + <> + + s in some browsers, due to the limited stylability of ``s in IE10+.\n &::-ms-expand {\n background-color: transparent;\n border: 0;\n }\n\n // Remove select outline from select box in FF\n &:-moz-focusring {\n color: transparent;\n text-shadow: 0 0 0 $input-color;\n }\n\n // Customize the `:focus` state to imitate native WebKit styles.\n @include form-control-focus($ignore-warning: true);\n\n // Placeholder\n &::placeholder {\n color: $input-placeholder-color;\n // Override Firefox's unusual default opacity; see https://github.com/twbs/bootstrap/pull/11526.\n opacity: 1;\n }\n\n // Disabled and read-only inputs\n //\n // HTML5 says that controls under a fieldset > legend:first-child won't be\n // disabled if the fieldset is disabled. Due to implementation difficulty, we\n // don't honor that edge case; we style them as disabled anyway.\n &:disabled,\n &[readonly] {\n background-color: $input-disabled-bg;\n // iOS fix for unreadable disabled content; see https://github.com/twbs/bootstrap/issues/11655.\n opacity: 1;\n }\n}\n\ninput[type=\"date\"],\ninput[type=\"time\"],\ninput[type=\"datetime-local\"],\ninput[type=\"month\"] {\n &.form-control {\n appearance: none; // Fix appearance for date inputs in Safari\n }\n}\n\nselect.form-control {\n &:focus::-ms-value {\n // Suppress the nested default white text on blue background highlight given to\n // the selected option text when the (still closed) receives focus\n // in IE and (under certain conditions) Edge.\n // See https://github.com/twbs/bootstrap/issues/19398.\n color: $input-color;\n background-color: $input-bg;\n }\n }\n\n &[multiple],\n &[size]:not([size=\"1\"]) {\n height: auto;\n padding-right: $custom-select-padding-x;\n background-image: none;\n }\n\n &:disabled {\n color: $custom-select-disabled-color;\n background-color: $custom-select-disabled-bg;\n }\n\n // Hides the default caret in IE11\n &::-ms-expand {\n display: none;\n }\n\n // Remove outline from select box in FF\n &:-moz-focusring {\n color: transparent;\n text-shadow: 0 0 0 $custom-select-color;\n }\n}\n\n.custom-select-sm {\n height: $custom-select-height-sm;\n padding-top: $custom-select-padding-y-sm;\n padding-bottom: $custom-select-padding-y-sm;\n padding-left: $custom-select-padding-x-sm;\n @include font-size($custom-select-font-size-sm);\n}\n\n.custom-select-lg {\n height: $custom-select-height-lg;\n padding-top: $custom-select-padding-y-lg;\n padding-bottom: $custom-select-padding-y-lg;\n padding-left: $custom-select-padding-x-lg;\n @include font-size($custom-select-font-size-lg);\n}\n\n\n// File\n//\n// Custom file input.\n\n.custom-file {\n position: relative;\n display: inline-block;\n width: 100%;\n height: $custom-file-height;\n margin-bottom: 0;\n}\n\n.custom-file-input {\n position: relative;\n z-index: 2;\n width: 100%;\n height: $custom-file-height;\n margin: 0;\n opacity: 0;\n\n &:focus ~ .custom-file-label {\n border-color: $custom-file-focus-border-color;\n box-shadow: $custom-file-focus-box-shadow;\n }\n\n // Use [disabled] and :disabled to work around https://github.com/twbs/bootstrap/issues/28247\n &[disabled] ~ .custom-file-label,\n &:disabled ~ .custom-file-label {\n background-color: $custom-file-disabled-bg;\n }\n\n @each $lang, $value in $custom-file-text {\n &:lang(#{$lang}) ~ .custom-file-label::after {\n content: $value;\n }\n }\n\n ~ .custom-file-label[data-browse]::after {\n content: attr(data-browse);\n }\n}\n\n.custom-file-label {\n position: absolute;\n top: 0;\n right: 0;\n left: 0;\n z-index: 1;\n height: $custom-file-height;\n padding: $custom-file-padding-y $custom-file-padding-x;\n font-family: $custom-file-font-family;\n font-weight: $custom-file-font-weight;\n line-height: $custom-file-line-height;\n color: $custom-file-color;\n background-color: $custom-file-bg;\n border: $custom-file-border-width solid $custom-file-border-color;\n @include border-radius($custom-file-border-radius);\n @include box-shadow($custom-file-box-shadow);\n\n &::after {\n position: absolute;\n top: 0;\n right: 0;\n bottom: 0;\n z-index: 3;\n display: block;\n height: $custom-file-height-inner;\n padding: $custom-file-padding-y $custom-file-padding-x;\n line-height: $custom-file-line-height;\n color: $custom-file-button-color;\n content: \"Browse\";\n @include gradient-bg($custom-file-button-bg);\n border-left: inherit;\n @include border-radius(0 $custom-file-border-radius $custom-file-border-radius 0);\n }\n}\n\n// Range\n//\n// Style range inputs the same across browsers. Vendor-specific rules for pseudo\n// elements cannot be mixed. As such, there are no shared styles for focus or\n// active states on prefixed selectors.\n\n.custom-range {\n width: 100%;\n height: add($custom-range-thumb-height, $custom-range-thumb-focus-box-shadow-width * 2);\n padding: 0; // Need to reset padding\n background-color: transparent;\n appearance: none;\n\n &:focus {\n outline: none;\n\n // Pseudo-elements must be split across multiple rulesets to have an effect.\n // No box-shadow() mixin for focus accessibility.\n &::-webkit-slider-thumb { box-shadow: $custom-range-thumb-focus-box-shadow; }\n &::-moz-range-thumb { box-shadow: $custom-range-thumb-focus-box-shadow; }\n &::-ms-thumb { box-shadow: $custom-range-thumb-focus-box-shadow; }\n }\n\n &::-moz-focus-outer {\n border: 0;\n }\n\n &::-webkit-slider-thumb {\n width: $custom-range-thumb-width;\n height: $custom-range-thumb-height;\n margin-top: ($custom-range-track-height - $custom-range-thumb-height) / 2; // Webkit specific\n @include gradient-bg($custom-range-thumb-bg);\n border: $custom-range-thumb-border;\n @include border-radius($custom-range-thumb-border-radius);\n @include box-shadow($custom-range-thumb-box-shadow);\n @include transition($custom-forms-transition);\n appearance: none;\n\n &:active {\n @include gradient-bg($custom-range-thumb-active-bg);\n }\n }\n\n &::-webkit-slider-runnable-track {\n width: $custom-range-track-width;\n height: $custom-range-track-height;\n color: transparent; // Why?\n cursor: $custom-range-track-cursor;\n background-color: $custom-range-track-bg;\n border-color: transparent;\n @include border-radius($custom-range-track-border-radius);\n @include box-shadow($custom-range-track-box-shadow);\n }\n\n &::-moz-range-thumb {\n width: $custom-range-thumb-width;\n height: $custom-range-thumb-height;\n @include gradient-bg($custom-range-thumb-bg);\n border: $custom-range-thumb-border;\n @include border-radius($custom-range-thumb-border-radius);\n @include box-shadow($custom-range-thumb-box-shadow);\n @include transition($custom-forms-transition);\n appearance: none;\n\n &:active {\n @include gradient-bg($custom-range-thumb-active-bg);\n }\n }\n\n &::-moz-range-track {\n width: $custom-range-track-width;\n height: $custom-range-track-height;\n color: transparent;\n cursor: $custom-range-track-cursor;\n background-color: $custom-range-track-bg;\n border-color: transparent; // Firefox specific?\n @include border-radius($custom-range-track-border-radius);\n @include box-shadow($custom-range-track-box-shadow);\n }\n\n &::-ms-thumb {\n width: $custom-range-thumb-width;\n height: $custom-range-thumb-height;\n margin-top: 0; // Edge specific\n margin-right: $custom-range-thumb-focus-box-shadow-width; // Workaround that overflowed box-shadow is hidden.\n margin-left: $custom-range-thumb-focus-box-shadow-width; // Workaround that overflowed box-shadow is hidden.\n @include gradient-bg($custom-range-thumb-bg);\n border: $custom-range-thumb-border;\n @include border-radius($custom-range-thumb-border-radius);\n @include box-shadow($custom-range-thumb-box-shadow);\n @include transition($custom-forms-transition);\n appearance: none;\n\n &:active {\n @include gradient-bg($custom-range-thumb-active-bg);\n }\n }\n\n &::-ms-track {\n width: $custom-range-track-width;\n height: $custom-range-track-height;\n color: transparent;\n cursor: $custom-range-track-cursor;\n background-color: transparent;\n border-color: transparent;\n border-width: $custom-range-thumb-height / 2;\n @include box-shadow($custom-range-track-box-shadow);\n }\n\n &::-ms-fill-lower {\n background-color: $custom-range-track-bg;\n @include border-radius($custom-range-track-border-radius);\n }\n\n &::-ms-fill-upper {\n margin-right: 15px; // arbitrary?\n background-color: $custom-range-track-bg;\n @include border-radius($custom-range-track-border-radius);\n }\n\n &:disabled {\n &::-webkit-slider-thumb {\n background-color: $custom-range-thumb-disabled-bg;\n }\n\n &::-webkit-slider-runnable-track {\n cursor: default;\n }\n\n &::-moz-range-thumb {\n background-color: $custom-range-thumb-disabled-bg;\n }\n\n &::-moz-range-track {\n cursor: default;\n }\n\n &::-ms-thumb {\n background-color: $custom-range-thumb-disabled-bg;\n }\n }\n}\n\n.custom-control-label::before,\n.custom-file-label,\n.custom-select {\n @include transition($custom-forms-transition);\n}\n","// Base class\n//\n// Kickstart any navigation component with a set of style resets. Works with\n// `