From 6b642ff779bfd5b3281f722fe55d405957cc6ece Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Mazur?= Date: Mon, 15 Sep 2025 14:39:01 +0200 Subject: [PATCH] Receive: fix capnproto replication in endless loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MichaƂ Mazur --- CHANGELOG.md | 9 ++++++--- pkg/receive/writecapnp/client.go | 17 ++++++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9887a683244..027c2900b35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ## Unreleased +### Fixed + +- [#8254](https://github.com/thanos-io/thanos/issues/8254) Receive: Endless loop of retried replication with capnproto and distributors + ## [v0.40.0](https://github.com/thanos-io/thanos/tree/release-0.40) - 2025 10 20 (in progress) ### Fixed @@ -22,7 +26,6 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#8366](https://github.com/thanos-io/thanos/pull/8366) Store: optionally ignore Parquet migrated blocks - [#8359](https://github.com/thanos-io/thanos/pull/8359) Tools: add `--shipper.upload-compacted` flag for uploading compacted blocks to bucket upload-blocks -- [#8484](https://github.com/thanos-io/thanos/pull/8484) Query: add `/api/v1/status/tsdb` API endpoint. ### Changed @@ -30,14 +33,14 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Removed -## [v0.39.2](https://github.com/thanos-io/thanos/tree/release-0.39) - 2025 07 17 +### [v0.39.2](https://github.com/thanos-io/thanos/tree/release-0.39) - 2025 07 17 ### Fixed - [#8374](https://github.com/thanos-io/thanos/pull/8374) Query: fix panic when concurrently accessing annotations map - [#8375](https://github.com/thanos-io/thanos/pull/8375) Query: fix native histogram buckets in distributed queries -## [v0.39.1](https://github.com/thanos-io/thanos/tree/release-0.39) - 2025 07 01 +### [v0.39.1](https://github.com/thanos-io/thanos/tree/release-0.39) - 2025 07 01 Fixes a memory leak issue on query-frontend. The bug only affects v0.39.0. diff --git a/pkg/receive/writecapnp/client.go b/pkg/receive/writecapnp/client.go index 3cd9f2d0820..b15785ef4e3 100644 --- a/pkg/receive/writecapnp/client.go +++ b/pkg/receive/writecapnp/client.go @@ -117,17 +117,28 @@ func (r *RemoteWriteClient) connect(ctx context.Context) error { return errors.Wrap(err, "failed to dial peer") } r.conn = rpc.NewConn(rpc.NewPackedStreamTransport(conn), nil) - r.writer = Writer(r.conn.Bootstrap(ctx)) + writer := Writer(r.conn.Bootstrap(ctx)) + if err := writer.Resolve(ctx); err != nil { + level.Warn(r.logger).Log("msg", "failed to bootstrap capnp writer, closing connection", "err", err) + r.closeUnlocked() + return errors.Wrap(err, "failed to bootstrap capnp writer") + } + + r.writer = writer return nil } func (r *RemoteWriteClient) Close() error { r.mu.Lock() + r.closeUnlocked() + r.mu.Unlock() + return nil +} + +func (r *RemoteWriteClient) closeUnlocked() { if r.conn != nil { conn := r.conn r.conn = nil go conn.Close() } - r.mu.Unlock() - return nil }