Skip to content

Commit f4d9857

Browse files
committed
Improve nodes startup in systests (#6706)
## Motivation I observed bootnodes being killed by the k8s startup probe in systests: ``` Normal Killing 38s kubelet Container smesher failed startup probe, will be restarted ``` This happens because bootnodes are deployed before poets and they try to reach poets, retry few times and the startup probe times out.
1 parent d85f2ca commit f4d9857

File tree

6 files changed

+32
-22
lines changed

6 files changed

+32
-22
lines changed

node/node.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,12 @@ func GetCommand() *cobra.Command {
199199

200200
// This blocks until the context is finished or until an error is produced
201201
err = app.Start(ctx)
202+
if err != nil {
203+
app.log.With().Error("app failed", log.Err(err))
204+
} else {
205+
app.log.With().Info("app stopped", log.Err(ctx.Err()))
206+
}
207+
202208
cleanupCtx, cleanupCancel := context.WithTimeout(
203209
context.Background(),
204210
30*time.Second,

systest/cluster/cluster.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -173,17 +173,6 @@ func Default(cctx *testcontext.Context, opts ...Opt) (*Cluster, error) {
173173
zap.Int("remote", cctx.RemoteSize),
174174
)
175175

176-
keys := make([]ed25519.PrivateKey, cctx.ClusterSize)
177-
for i := range keys {
178-
keys[i] = cl.accounts.Private(i)
179-
}
180-
181-
if err := cl.AddBootnodes(cctx, cctx.BootnodeSize); err != nil {
182-
return nil, err
183-
}
184-
if err := cl.AddBootstrappers(cctx); err != nil {
185-
return nil, err
186-
}
187176
pubkey, privkey, err := ed25519.GenerateKey(nil)
188177
if err != nil {
189178
return nil, fmt.Errorf("generating keys for certifier: %w", err)
@@ -198,6 +187,18 @@ func Default(cctx *testcontext.Context, opts ...Opt) (*Cluster, error) {
198187
return nil, err
199188
}
200189

190+
keys := make([]ed25519.PrivateKey, cctx.ClusterSize)
191+
for i := range keys {
192+
keys[i] = cl.accounts.Private(i)
193+
}
194+
195+
if err := cl.AddBootnodes(cctx, cctx.BootnodeSize); err != nil {
196+
return nil, err
197+
}
198+
if err := cl.AddBootstrappers(cctx); err != nil {
199+
return nil, err
200+
}
201+
201202
smesherKeys := keys[cctx.BootnodeSize : cctx.BootnodeSize+smeshers]
202203
if err := cl.AddSmeshers(cctx, smeshers, WithSmeshers(smesherKeys)); err != nil {
203204
return nil, err

systest/cluster/nodes.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -902,9 +902,11 @@ func deployNode(
902902
WithLimits(smesherResources.Get(ctx.Parameters).Limits),
903903
).
904904
WithStartupProbe(
905-
corev1.Probe().WithTCPSocket(
906-
corev1.TCPSocketAction().WithPort(intstr.FromInt32(9092)),
907-
).WithInitialDelaySeconds(10).WithPeriodSeconds(10),
905+
corev1.Probe().
906+
WithTCPSocket(corev1.TCPSocketAction().WithPort(intstr.FromInt32(9092))).
907+
WithInitialDelaySeconds(10).
908+
WithPeriodSeconds(5).
909+
WithFailureThreshold(12),
908910
).
909911
WithEnv(
910912
corev1.EnvVar().WithName("GOMAXPROCS").WithValue("4"),

systest/tests/distributed_post_verification_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ func TestPostMalfeasanceProof(t *testing.T) {
7777
ctx.PoetSize = 1 // one poet guarantees everybody gets the same proof
7878
ctx.ClusterSize = 8
7979
cl := cluster.New(ctx, cluster.WithKeys(10))
80+
require.NoError(t, cl.AddPoets(ctx))
8081
require.NoError(t, cl.AddBootnodes(ctx, 1))
8182
require.NoError(t, cl.AddBootstrappers(ctx))
82-
require.NoError(t, cl.AddPoets(ctx))
8383
require.NoError(t, cl.AddSmeshers(ctx, ctx.ClusterSize-cl.Total(), cluster.WithFlags(cluster.PostK3(1))))
8484

8585
logger := ctx.Log.Desugar().WithOptions(zap.IncreaseLevel(zap.InfoLevel), zap.WithCaller(false))

systest/tests/equivocation_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ func TestEquivocation(t *testing.T) {
2929

3030
const bootnodes = 2
3131
cl := cluster.New(cctx, cluster.WithKeys(cctx.ClusterSize))
32+
require.NoError(t, cl.AddPoets(cctx))
3233
require.NoError(t, cl.AddBootnodes(cctx, bootnodes))
3334
require.NoError(t, cl.AddBootstrappers(cctx))
34-
require.NoError(t, cl.AddPoets(cctx))
3535

3636
smeshers := cctx.ClusterSize - cl.Total()
3737
honest := int(float64(smeshers) * 0.6)

systest/tests/poets_test.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,9 @@ func TestNodesUsingDifferentPoets(t *testing.T) {
131131
}
132132

133133
cl := cluster.New(tctx, cluster.WithKeys(tctx.ClusterSize))
134+
require.NoError(t, cl.AddPoets(tctx))
134135
require.NoError(t, cl.AddBootnodes(tctx, 2))
135136
require.NoError(t, cl.AddBootstrappers(tctx))
136-
require.NoError(t, cl.AddPoets(tctx))
137137

138138
for i := 0; i < tctx.ClusterSize-2; i++ {
139139
poetId := i % tctx.PoetSize
@@ -215,15 +215,12 @@ func TestRegisteringInPoetWithPowAndCert(t *testing.T) {
215215
tctx := testcontext.New(t)
216216

217217
cl := cluster.New(tctx, cluster.WithKeys(10))
218-
require.NoError(t, cl.AddBootnodes(tctx, 2))
219-
require.NoError(t, cl.AddBootstrappers(tctx))
220218

221-
pubkey, privkey, err := ed25519.GenerateKey(nil)
222-
require.NoError(t, err)
223-
require.NoError(t, cl.AddCertifier(tctx, base64.StdEncoding.EncodeToString(privkey.Seed())))
224219
// First poet supports PoW only (legacy)
225220
require.NoError(t, cl.AddPoet(tctx))
226221
// Second poet supports certs
222+
pubkey, privkey, err := ed25519.GenerateKey(nil)
223+
require.NoError(t, err)
227224
require.NoError(
228225
t,
229226
cl.AddPoet(
@@ -232,6 +229,10 @@ func TestRegisteringInPoetWithPowAndCert(t *testing.T) {
232229
cluster.PoetCertifierPubkey(base64.StdEncoding.EncodeToString(pubkey)),
233230
),
234231
)
232+
233+
require.NoError(t, cl.AddBootnodes(tctx, 2))
234+
require.NoError(t, cl.AddBootstrappers(tctx))
235+
require.NoError(t, cl.AddCertifier(tctx, base64.StdEncoding.EncodeToString(privkey.Seed())))
235236
require.NoError(t, cl.AddSmeshers(tctx, tctx.ClusterSize-2))
236237
require.NoError(t, cl.WaitAll(tctx))
237238

0 commit comments

Comments
 (0)