Skip to content

Commit cdbd44f

Browse files
authored
feat: delete draining timeout pod and watch claim on pool (#468)
* feat: delete draining timeout pod and watch claim on pool Signed-off-by: Aylei <[email protected]> * feat: enable cnclaim rolling-update Signed-off-by: Aylei <[email protected]> --------- Signed-off-by: Aylei <[email protected]>
1 parent 01a5010 commit cdbd44f

File tree

9 files changed

+409
-49
lines changed

9 files changed

+409
-49
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ test: api-test unit
9191

9292
# Run unit tests
9393
unit: generate fmt vet manifests
94-
go test ./pkg/... -coverprofile cover.out
94+
CGO_ENABLED=0 go test ./pkg/... -coverprofile cover.out
9595

9696
api-test:
9797
cd api && make test

api/core/v1alpha1/cnclaim_helper.go

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2024 Matrix Origin
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package v1alpha1
16+
17+
func (c *CNClaim) IsReady() bool {
18+
return c.Status.Phase == CNClaimPhaseBound || c.Status.Phase == CNClaimPhaseOutdated
19+
}
20+
21+
func (c *CNClaim) IsUpdated() bool {
22+
return c.Status.Phase == CNClaimPhaseBound
23+
}

api/core/v1alpha1/cnclaim_types.go

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ const (
2525
CNClaimPhaseBound CNClaimPhase = "Bound"
2626
CNClaimPhaseLost CNClaimPhase = "Lost"
2727

28+
CNClaimPhaseOutdated CNClaimPhase = "Outdated"
29+
2830
ClaimOwnerNameLabel = "matrixorigin.io/claim-owner"
2931
)
3032

pkg/controllers/cnclaim/controller.go

+58-4
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,20 @@ import (
2828
apierrors "k8s.io/apimachinery/pkg/api/errors"
2929
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3030
"k8s.io/apimachinery/pkg/labels"
31+
"k8s.io/apimachinery/pkg/types"
32+
"sigs.k8s.io/controller-runtime/pkg/builder"
3133
"sigs.k8s.io/controller-runtime/pkg/client"
34+
"sigs.k8s.io/controller-runtime/pkg/handler"
3235
"sigs.k8s.io/controller-runtime/pkg/manager"
36+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3337
"slices"
3438
"time"
3539
)
3640

41+
const (
42+
waitCacheTimeout = 10 * time.Second
43+
)
44+
3745
// Actor reconciles CN Claim
3846
type Actor struct {
3947
clientMgr *hacli.HAKeeperClientManager
@@ -146,7 +154,7 @@ func (r *Actor) doClaimCN(ctx *recon.Context[*v1alpha1.CNClaim], orphans []corev
146154
return nil, errors.Wrap(err, "error list idle Pods")
147155
}
148156

149-
slices.SortFunc(idleCNs, priorityFunc(c))
157+
sortCNByPriority(c, idleCNs)
150158
for i := range idleCNs {
151159
pod := &idleCNs[i]
152160
pod.Labels[v1alpha1.CNPodPhaseLabel] = v1alpha1.CNPodPhaseBound
@@ -191,7 +199,32 @@ func (r *Actor) bindPod(ctx *recon.Context[*v1alpha1.CNClaim], pod *corev1.Pod,
191199
}
192200

193201
func (r *Actor) Sync(ctx *recon.Context[*v1alpha1.CNClaim]) error {
194-
// TODO: monitor pod health
202+
c := ctx.Obj
203+
switch c.Status.Phase {
204+
case v1alpha1.CNClaimPhasePending:
205+
return errors.Errorf("CN Claim %s/%s is pending, should bind it first", c.Namespace, c.Name)
206+
case v1alpha1.CNClaimPhaseLost:
207+
return nil
208+
case v1alpha1.CNClaimPhaseBound, v1alpha1.CNClaimPhaseOutdated:
209+
// noop
210+
default:
211+
return errors.Errorf("CN Claim %s/%s is in unknown phase %s", c.Namespace, c.Name, c.Status.Phase)
212+
}
213+
pod := &corev1.Pod{}
214+
err := ctx.Get(types.NamespacedName{Namespace: c.Namespace, Name: c.Spec.PodName}, pod)
215+
if err != nil {
216+
if apierrors.IsNotFound(err) {
217+
if c.Status.BoundTime != nil && time.Since(c.Status.BoundTime.Time) < waitCacheTimeout {
218+
return recon.ErrReSync("pod status may be not update to date, wait", waitCacheTimeout)
219+
}
220+
c.Status.Phase = v1alpha1.CNClaimPhaseLost
221+
return nil
222+
}
223+
}
224+
if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodUnknown {
225+
c.Status.Phase = v1alpha1.CNClaimPhaseLost
226+
return nil
227+
}
195228
return nil
196229
}
197230

@@ -223,7 +256,7 @@ func (r *Actor) Finalize(ctx *recon.Context[*v1alpha1.CNClaim]) (bool, error) {
223256
// set the CN Pod to draining phase and let the draining process handle recycling
224257
if err := ctx.Patch(&cn, func() error {
225258
cn.Labels[v1alpha1.CNPodPhaseLabel] = v1alpha1.CNPodPhaseDraining
226-
delete(cn.Labels, v1alpha1.ClaimOwnerNameLabel)
259+
delete(cn.Labels, v1alpha1.PodClaimedByLabel)
227260
if cn.Annotations == nil {
228261
cn.Annotations = map[string]string{}
229262
}
@@ -268,7 +301,24 @@ func (r *Actor) patchStore(ctx *recon.Context[*v1alpha1.CNClaim], pod *corev1.Po
268301
}
269302

270303
func (r *Actor) Start(mgr manager.Manager) error {
271-
return recon.Setup[*v1alpha1.CNClaim](&v1alpha1.CNClaim{}, "cn-claim-manager", mgr, r)
304+
return recon.Setup[*v1alpha1.CNClaim](&v1alpha1.CNClaim{}, "cn-claim-manager", mgr, r, recon.WithBuildFn(func(b *builder.Builder) {
305+
b.Watches(&corev1.Pod{}, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, object client.Object) []reconcile.Request {
306+
pod, ok := object.(*corev1.Pod)
307+
if !ok {
308+
return nil
309+
}
310+
claimName, ok := pod.Labels[v1alpha1.PodClaimedByLabel]
311+
if !ok {
312+
return nil
313+
}
314+
return []reconcile.Request{{
315+
NamespacedName: types.NamespacedName{
316+
Namespace: pod.Namespace,
317+
Name: claimName,
318+
},
319+
}}
320+
}))
321+
}))
272322
}
273323

274324
func toStoreStatus(cn *metadata.CNService) v1alpha1.CNStoreStatus {
@@ -293,6 +343,10 @@ func toStoreStatus(cn *metadata.CNService) v1alpha1.CNStoreStatus {
293343
}
294344
}
295345

346+
func sortCNByPriority(c *v1alpha1.CNClaim, pods []corev1.Pod) {
347+
slices.SortFunc(pods, priorityFunc(c))
348+
}
349+
296350
func priorityFunc(c *v1alpha1.CNClaim) func(a, b corev1.Pod) int {
297351
return func(a, b corev1.Pod) int {
298352
// 1. claim the previously used pod first
+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright 2024 Matrix Origin
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package cnclaim
16+
17+
import (
18+
"github.com/matrixorigin/matrixone-operator/api/core/v1alpha1"
19+
corev1 "k8s.io/api/core/v1"
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
"k8s.io/utils/pointer"
22+
"math/rand"
23+
"testing"
24+
25+
. "github.com/onsi/gomega"
26+
)
27+
28+
func Test_sortCNByPriority(t *testing.T) {
29+
tests := []struct {
30+
name string
31+
c *v1alpha1.CNClaim
32+
pods []corev1.Pod
33+
order []string
34+
}{{
35+
name: "basic",
36+
c: &v1alpha1.CNClaim{
37+
Spec: v1alpha1.CNClaimSpec{
38+
OwnerName: pointer.String("set1"),
39+
},
40+
},
41+
pods: []corev1.Pod{
42+
{
43+
ObjectMeta: metav1.ObjectMeta{
44+
Name: "not-claimed-but-older",
45+
Labels: map[string]string{
46+
v1alpha1.CNPodPhaseLabel: v1alpha1.CNPodPhaseIdle,
47+
},
48+
CreationTimestamp: metav1.Unix(0, 0),
49+
},
50+
},
51+
{
52+
ObjectMeta: metav1.ObjectMeta{
53+
Name: "previously-claimed",
54+
Labels: map[string]string{
55+
v1alpha1.CNPodPhaseLabel: v1alpha1.CNPodPhaseIdle,
56+
v1alpha1.ClaimOwnerNameLabel: "set1",
57+
},
58+
CreationTimestamp: metav1.Unix(10, 0),
59+
},
60+
},
61+
{
62+
ObjectMeta: metav1.ObjectMeta{
63+
Name: "previously-claimed-by-other-set",
64+
Labels: map[string]string{
65+
v1alpha1.CNPodPhaseLabel: v1alpha1.CNPodPhaseIdle,
66+
v1alpha1.ClaimOwnerNameLabel: "set2",
67+
},
68+
CreationTimestamp: metav1.Unix(10, 0),
69+
},
70+
},
71+
},
72+
order: []string{
73+
"previously-claimed",
74+
"not-claimed-but-older",
75+
"previously-claimed-by-other-set",
76+
},
77+
}}
78+
for _, tt := range tests {
79+
t.Run(tt.name, func(t *testing.T) {
80+
rand.Shuffle(len(tt.pods), func(i, j int) {
81+
tt.pods[i], tt.pods[j] = tt.pods[j], tt.pods[i]
82+
})
83+
sortCNByPriority(tt.c, tt.pods)
84+
g := NewGomegaWithT(t)
85+
var res []string
86+
for _, po := range tt.pods {
87+
res = append(res, po.Name)
88+
}
89+
g.Expect(res).To(Equal(tt.order))
90+
})
91+
}
92+
}

0 commit comments

Comments
 (0)