From c5a0b8a5566897ec973e4d563bf10bad37ddb308 Mon Sep 17 00:00:00 2001 From: Matthew McPherrin Date: Tue, 7 Oct 2025 14:30:17 -0400 Subject: [PATCH 1/5] Add an extra timeout waiting for RVAs Once we have enough perspectives, we want to wait a little bit more to see if the last perspective replies, but we don't want to hold up all of issuance on a perspective that's hanging or being slow. --- va/va.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/va/va.go b/va/va.go index 4993aec3602..6e020062cb7 100644 --- a/va/va.go +++ b/va/va.go @@ -620,6 +620,15 @@ func (va *ValidationAuthorityImpl) doRemoteOperation(ctx context.Context, op rem firstProb = currProb } + // If enough perspectives have passed, start a timer for how long we'll wait for remaining perspectives + if len(passed) >= required && len(passedRIRs) >= requiredRIRs { + // TODO: configurable + go func() { + time.Sleep(2 * time.Second) + cancel() + }() + } + // Once all the VAs have returned a result, break the loop. if len(passed)+len(failed) >= remoteVACount { break From 08afa19afccb8b682c500c59b7a85ff710e27d20 Mon Sep 17 00:00:00 2001 From: Aaron Gable Date: Wed, 15 Oct 2025 16:25:58 -0700 Subject: [PATCH 2/5] Use time.AfterFunc and make it configurable --- cmd/boulder-va/main.go | 12 +++++++----- cmd/remoteva/main.go | 4 +++- va/va.go | 14 +++++++------- va/va_test.go | 2 ++ 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/cmd/boulder-va/main.go b/cmd/boulder-va/main.go index dd3fe9b393d..54d030481ca 100644 --- a/cmd/boulder-va/main.go +++ b/cmd/boulder-va/main.go @@ -10,6 +10,7 @@ import ( "github.com/letsencrypt/boulder/bdns" "github.com/letsencrypt/boulder/cmd" + "github.com/letsencrypt/boulder/config" "github.com/letsencrypt/boulder/features" bgrpc "github.com/letsencrypt/boulder/grpc" "github.com/letsencrypt/boulder/iana" @@ -50,10 +51,9 @@ type RemoteVAGRPCClientConfig struct { type Config struct { VA struct { vaConfig.Common - RemoteVAs []RemoteVAGRPCClientConfig `validate:"omitempty,dive"` - // Deprecated and ignored - MaxRemoteValidationFailures int `validate:"omitempty,min=0,required_with=RemoteVAs"` - Features features.Config + RemoteVAs []RemoteVAGRPCClientConfig `validate:"omitempty,dive"` + SlowRemoteTimeout config.Duration + Features features.Config } Syslog cmd.SyslogConfig @@ -149,7 +149,9 @@ func main() { c.VA.AccountURIPrefixes, va.PrimaryPerspective, "", - iana.IsReservedAddr) + iana.IsReservedAddr, + c.VA.SlowRemoteTimeout.Duration, + ) cmd.FailOnError(err, "Unable to create VA server") start, err := bgrpc.NewServer(c.VA.GRPC, logger).Add( diff --git a/cmd/remoteva/main.go b/cmd/remoteva/main.go index d049ba126d9..43b68d621fd 100644 --- a/cmd/remoteva/main.go +++ b/cmd/remoteva/main.go @@ -140,7 +140,9 @@ func main() { c.RVA.AccountURIPrefixes, c.RVA.Perspective, c.RVA.RIR, - iana.IsReservedAddr) + iana.IsReservedAddr, + 0, + ) cmd.FailOnError(err, "Unable to create Remote-VA server") start, err := bgrpc.NewServer(c.RVA.GRPC, logger).Add( diff --git a/va/va.go b/va/va.go index 6e020062cb7..80e131d8109 100644 --- a/va/va.go +++ b/va/va.go @@ -216,6 +216,7 @@ type ValidationAuthorityImpl struct { maxRemoteFailures int accountURIPrefixes []string singleDialTimeout time.Duration + slowRemoteTimeout time.Duration perspective string rir string isReservedIPFunc func(netip.Addr) error @@ -239,6 +240,7 @@ func NewValidationAuthorityImpl( perspective string, rir string, reservedIPChecker func(netip.Addr) error, + slowRemoteTimeout time.Duration, ) (*ValidationAuthorityImpl, error) { if len(accountURIPrefixes) == 0 { @@ -620,13 +622,11 @@ func (va *ValidationAuthorityImpl) doRemoteOperation(ctx context.Context, op rem firstProb = currProb } - // If enough perspectives have passed, start a timer for how long we'll wait for remaining perspectives - if len(passed) >= required && len(passedRIRs) >= requiredRIRs { - // TODO: configurable - go func() { - time.Sleep(2 * time.Second) - cancel() - }() + // If enough perspectives have passed, set a tighter deadline for the + // remaining perspectives. + if len(passed) >= required && len(passedRIRs) >= requiredRIRs && va.slowRemoteTimeout != 0 { + timer := time.AfterFunc(va.slowRemoteTimeout, cancel) + defer timer.Stop() } // Once all the VAs have returned a result, break the loop. diff --git a/va/va_test.go b/va/va_test.go index 9ce91aafa41..196ca97dc01 100644 --- a/va/va_test.go +++ b/va/va_test.go @@ -145,6 +145,7 @@ func setup(srv *httptest.Server, userAgent string, remoteVAs []RemoteVA, mockDNS perspective, "", isNonLoopbackReservedIP, + time.Second, ) if err != nil { panic(fmt.Sprintf("Failed to create validation authority: %v", err)) @@ -323,6 +324,7 @@ func TestNewValidationAuthorityImplWithDuplicateRemotes(t *testing.T) { "example perspective", "", isNonLoopbackReservedIP, + time.Second, ) test.AssertError(t, err, "NewValidationAuthorityImpl allowed duplicate remote perspectives") test.AssertContains(t, err.Error(), "duplicate remote VA perspective \"dadaist\"") From 541c0299f8db9bca44b323bcfb237ccf996e8a56 Mon Sep 17 00:00:00 2001 From: Aaron Gable Date: Wed, 15 Oct 2025 16:34:19 -0700 Subject: [PATCH 3/5] Set it in config-next --- test/config-next/va.json | 1 + 1 file changed, 1 insertion(+) diff --git a/test/config-next/va.json b/test/config-next/va.json index 3b7dba6768e..21cd801760e 100644 --- a/test/config-next/va.json +++ b/test/config-next/va.json @@ -63,6 +63,7 @@ "http://boulder.service.consul:4001/acme/acct/", "http://boulder.service.consul:4000/acme/reg/" ], + "slowRemoteTimeout": "2s", "features": { "DNSAccount01Enabled": true } From 93cdf6d75e345cb5788cfe7caa837741711ba255 Mon Sep 17 00:00:00 2001 From: Aaron Gable Date: Thu, 16 Oct 2025 14:54:20 -0700 Subject: [PATCH 4/5] Review comments --- cmd/boulder-va/main.go | 6 +++++- va/va.go | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cmd/boulder-va/main.go b/cmd/boulder-va/main.go index 54d030481ca..fecf2ed7fdf 100644 --- a/cmd/boulder-va/main.go +++ b/cmd/boulder-va/main.go @@ -51,7 +51,11 @@ type RemoteVAGRPCClientConfig struct { type Config struct { VA struct { vaConfig.Common - RemoteVAs []RemoteVAGRPCClientConfig `validate:"omitempty,dive"` + RemoteVAs []RemoteVAGRPCClientConfig `validate:"omitempty,dive"` + // SlowRemoteTimeout sets how long the VA is willing to wait for slow + // RemoteVA instances to finish their work. It starts counting from + // when the VA first gets a quorum of (un)successful remote results. + // Leaving this value zero means the VA won't early-cancel slow remotes. SlowRemoteTimeout config.Duration Features features.Config } diff --git a/va/va.go b/va/va.go index 80e131d8109..26b18947655 100644 --- a/va/va.go +++ b/va/va.go @@ -622,9 +622,10 @@ func (va *ValidationAuthorityImpl) doRemoteOperation(ctx context.Context, op rem firstProb = currProb } - // If enough perspectives have passed, set a tighter deadline for the - // remaining perspectives. - if len(passed) >= required && len(passedRIRs) >= requiredRIRs && va.slowRemoteTimeout != 0 { + // If enough perspectives have passed, or enough perspectives have + // failed, set a tighter deadline for the remaining perspectives. + if va.slowRemoteTimeout != 0 && ((len(passed) >= required && len(passedRIRs) >= requiredRIRs) || + (len(failed) > remoteVACount-required)) { timer := time.AfterFunc(va.slowRemoteTimeout, cancel) defer timer.Stop() } From 163af3548ae02b0e75f5bd406cf8537a189758b2 Mon Sep 17 00:00:00 2001 From: Aaron Gable Date: Thu, 16 Oct 2025 15:50:19 -0700 Subject: [PATCH 5/5] Simplify conditional --- va/va.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/va/va.go b/va/va.go index 26b18947655..55211e27bcc 100644 --- a/va/va.go +++ b/va/va.go @@ -622,12 +622,14 @@ func (va *ValidationAuthorityImpl) doRemoteOperation(ctx context.Context, op rem firstProb = currProb } - // If enough perspectives have passed, or enough perspectives have - // failed, set a tighter deadline for the remaining perspectives. - if va.slowRemoteTimeout != 0 && ((len(passed) >= required && len(passedRIRs) >= requiredRIRs) || - (len(failed) > remoteVACount-required)) { - timer := time.AfterFunc(va.slowRemoteTimeout, cancel) - defer timer.Stop() + if va.slowRemoteTimeout != 0 { + // If enough perspectives have passed, or enough perspectives have + // failed, set a tighter deadline for the remaining perspectives. + if (len(passed) >= required && len(passedRIRs) >= requiredRIRs) || + (len(failed) > remoteVACount-required) { + timer := time.AfterFunc(va.slowRemoteTimeout, cancel) + defer timer.Stop() + } } // Once all the VAs have returned a result, break the loop.