diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index bd258389f9..098c088b12 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -121,7 +121,7 @@ jobs: with: role-duration-seconds: 21600 # 6 hours - - uses: google-github-actions/get-secretmanager-secrets@e5bb06c2ca53b244f978d33348d18317a7f263ce # v2.2.2 + - uses: google-github-actions/get-secretmanager-secrets@a8440875e1c2892062aef9061228d4f1af8f919b # v2.2.3 with: export_to_environment: true secrets: |- @@ -257,7 +257,7 @@ jobs: # there is a high chance things will stop working # This is trying to reduce the chances of that happening. # See https://github.com/elastic/observability-test-environments/actions/workflows/cluster-rotate-api-keys.yml - - uses: google-github-actions/get-secretmanager-secrets@e5bb06c2ca53b244f978d33348d18317a7f263ce # v2.2.2 + - uses: google-github-actions/get-secretmanager-secrets@a8440875e1c2892062aef9061228d4f1af8f919b # v2.2.3 if: always() with: export_to_environment: true diff --git a/.github/workflows/functional-tests.yml b/.github/workflows/functional-tests.yml index 17ee4f4edb..97eb0b93e7 100644 --- a/.github/workflows/functional-tests.yml +++ b/.github/workflows/functional-tests.yml @@ -39,7 +39,7 @@ jobs: - uses: elastic/oblt-actions/google/auth@v1 - - uses: google-github-actions/get-secretmanager-secrets@e5bb06c2ca53b244f978d33348d18317a7f263ce # v2.2.2 + - uses: google-github-actions/get-secretmanager-secrets@a8440875e1c2892062aef9061228d4f1af8f919b # v2.2.3 with: export_to_environment: true secrets: |- diff --git a/.github/workflows/smoke-tests-ess.yml b/.github/workflows/smoke-tests-ess.yml index 6ee49bdc04..7b7be9723f 100644 --- a/.github/workflows/smoke-tests-ess.yml +++ b/.github/workflows/smoke-tests-ess.yml @@ -68,7 +68,7 @@ jobs: - uses: elastic/oblt-actions/google/auth@v1 - - uses: google-github-actions/get-secretmanager-secrets@e5bb06c2ca53b244f978d33348d18317a7f263ce # v2.2.2 + - uses: google-github-actions/get-secretmanager-secrets@a8440875e1c2892062aef9061228d4f1af8f919b # v2.2.3 with: export_to_environment: true secrets: |- @@ -81,7 +81,7 @@ jobs: # there is a high chance things will stop working # This is trying to reduce the chances of that happening. # See https://github.com/elastic/observability-test-environments/actions/workflows/cluster-rotate-api-keys.yml - - uses: google-github-actions/get-secretmanager-secrets@e5bb06c2ca53b244f978d33348d18317a7f263ce # v2.2.2 + - uses: google-github-actions/get-secretmanager-secrets@a8440875e1c2892062aef9061228d4f1af8f919b # v2.2.3 if: always() with: export_to_environment: true diff --git a/.github/workflows/smoke-tests-os.yml b/.github/workflows/smoke-tests-os.yml index a0b3ba20a0..f2b06db092 100644 --- a/.github/workflows/smoke-tests-os.yml +++ b/.github/workflows/smoke-tests-os.yml @@ -63,7 +63,7 @@ jobs: - uses: elastic/oblt-actions/google/auth@v1 - - uses: google-github-actions/get-secretmanager-secrets@e5bb06c2ca53b244f978d33348d18317a7f263ce # v2.2.2 + - uses: google-github-actions/get-secretmanager-secrets@a8440875e1c2892062aef9061228d4f1af8f919b # v2.2.3 with: export_to_environment: true secrets: |- diff --git a/changelogs/9.0.asciidoc b/changelogs/9.0.asciidoc index b328011e5b..f6c059c601 100644 --- a/changelogs/9.0.asciidoc +++ b/changelogs/9.0.asciidoc @@ -14,7 +14,7 @@ https://github.com/elastic/apm-server/compare/v\...v9.0.0[View commits] [float] ==== Breaking Changes -- Change `sampling.tail.storage_limit` default to `0`. While `0` means unlimited local tail-sampling database size, it now enforces a max 90% disk usage on the disk where the data directory is located. Any tail sampling writes after this threshold will be rejected, similar to what happens when tail-sampling database size exceeds a non-0 storage limit. Setting `sampling.tail.storage_limit` to non-0 maintains the existing behavior which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. {pull}15467[15467] +- Change `sampling.tail.storage_limit` default to `0`. While `0` means unlimited local tail-sampling database size, it now enforces a max 80% disk usage on the disk where the data directory is located. Any tail sampling writes after this threshold will be rejected, similar to what happens when tail-sampling database size exceeds a non-0 storage limit. Setting `sampling.tail.storage_limit` to non-0 maintains the existing behavior which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. {pull}15467[15467] {pull}15524[15524] [float] ==== Deprecations diff --git a/changelogs/all-breaking-changes.asciidoc b/changelogs/all-breaking-changes.asciidoc index cfd4f68427..dbbce5cbc4 100644 --- a/changelogs/all-breaking-changes.asciidoc +++ b/changelogs/all-breaking-changes.asciidoc @@ -16,13 +16,14 @@ The following breaking changes are introduced in APM version 9.0.0: - Change `sampling.tail.storage_limit` default to `0`. While `0` means unlimited local tail-sampling database size, -it now enforces a max 90% disk usage on the disk where the data directory is located. +it now enforces a max 80% disk usage on the disk where the data directory is located. Any tail sampling writes after this threshold will be rejected, similar to what happens when tail-sampling database size exceeds a non-0 storage limit. Setting `sampling.tail.storage_limit` to non-0 maintains the existing behavior which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. -For more details, see https://github.com/elastic/apm-server/pull/15467[PR #15467] +For more details, see https://github.com/elastic/apm-server/pull/15467[PR #15467] and +https://github.com/elastic/apm-server/pull/15524[PR #15524] // end::90-bc[] // tag::811-bc[] diff --git a/internal/beater/config/config_test.go b/internal/beater/config/config_test.go index 7590210134..5c3e98fbcd 100644 --- a/internal/beater/config/config_test.go +++ b/internal/beater/config/config_test.go @@ -364,7 +364,7 @@ func TestUnpackConfig(t *testing.T) { IngestRateDecayFactor: 0.25, StorageLimit: "0", StorageLimitParsed: 0, - DiskUsageThreshold: 0.9, + DiskUsageThreshold: 0.8, TTL: 30 * time.Minute, }, }, diff --git a/internal/beater/config/sampling.go b/internal/beater/config/sampling.go index 83823c3ef4..7986068f25 100644 --- a/internal/beater/config/sampling.go +++ b/internal/beater/config/sampling.go @@ -161,7 +161,7 @@ func defaultTailSamplingConfig() TailSamplingConfig { IngestRateDecayFactor: 0.25, TTL: 30 * time.Minute, StorageLimit: "0", - DiskUsageThreshold: 0.9, + DiskUsageThreshold: 0.8, DiscardOnWriteFailure: false, } parsed, err := humanize.ParseBytes(cfg.StorageLimit) diff --git a/x-pack/apm-server/sampling/eventstorage/storage_manager.go b/x-pack/apm-server/sampling/eventstorage/storage_manager.go index dadd11d31d..c1d7f22130 100644 --- a/x-pack/apm-server/sampling/eventstorage/storage_manager.go +++ b/x-pack/apm-server/sampling/eventstorage/storage_manager.go @@ -449,7 +449,7 @@ func (sm *StorageManager) NewReadWriter(storageLimit uint64, diskUsageThreshold return uint64(float64(sm.cachedDiskStat.total.Load()) * diskUsageThreshold) } // the total disk space could change in runtime, but it is still useful to print it out in logs. - sm.logger.Infof("setting disk usage threshold to %.2f of total disk space of %0.1fgb", diskUsageThreshold, float64(sm.cachedDiskStat.total.Load())/gb) + sm.logger.Infof("setting disk usage threshold to %.0f%% of total disk space of %0.1fgb", diskUsageThreshold*100, float64(sm.cachedDiskStat.total.Load())/gb) diskThresholdChecker := NewStorageLimitCheckerFunc(sm.diskUsed, diskThreshold) rw = NewStorageLimitReadWriter( fmt.Sprintf("disk usage threshold %.2f", diskUsageThreshold),