@@ -119,16 +119,20 @@ func (r *SyncReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
119
119
}
120
120
121
121
builder := builder .NewSyncPodBuilder (sync , from , to )
122
+ l .Info ("start to prepare sync worker pods" , "replicas" , sync .Spec .Replicas )
122
123
if err := r .prepareWorkerPod (ctx , sync , builder ); err != nil {
123
124
l .Error (err , "failed to prepare worker pod" )
124
125
return ctrl.Result {}, err
125
126
}
127
+ l .Info ("prepare worker pod done" , "replicas" , sync .Spec .Replicas )
126
128
129
+ l .Info ("start to prepare sync manager pod" )
127
130
// prepare manager pod
128
131
if err := r .prepareManagerPod (ctx , sync , builder ); err != nil {
129
132
l .Error (err , "failed to prepare manager pod" )
130
133
return ctrl.Result {}, err
131
134
}
135
+ l .Info ("prepare manager pod done, ready to sync" )
132
136
133
137
sync .Status .StartAt = & metav1.Time {Time : time .Now ()}
134
138
sync .Status .Phase = juicefsiov1 .SyncPhaseProgressing
@@ -139,13 +143,11 @@ func (r *SyncReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
139
143
140
144
if sync .Status .Phase == juicefsiov1 .SyncPhaseCompleted {
141
145
// delete worker pod
142
- labelSelector := client.MatchingLabels {
143
- common .LabelSync : sync .Name ,
144
- common .LabelAppType : common .LabelSyncWorkerValue ,
145
- }
146
- if err := r .DeleteAllOf (ctx , & corev1.Pod {}, client .InNamespace (sync .Namespace ), labelSelector ); err != nil {
146
+ if err := r .deleteWorkerPods (ctx , sync , true ); err != nil {
147
+ l .Error (err , "failed to delete worker pods" )
147
148
return ctrl.Result {}, err
148
149
}
150
+
149
151
if sync .Spec .TTLSecondsAfterFinished != nil {
150
152
completedAt := sync .Status .CompletedAt
151
153
if completedAt == nil {
@@ -163,29 +165,40 @@ func (r *SyncReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
163
165
}
164
166
165
167
if sync .Status .Phase == juicefsiov1 .SyncPhaseFailed {
166
- labelSelector := client.MatchingLabels {
167
- common .LabelSync : sync .Name ,
168
- common .LabelAppType : common .LabelSyncWorkerValue ,
169
- }
170
- if err := r .DeleteAllOf (ctx , & corev1.Pod {}, client .InNamespace (sync .Namespace ), labelSelector ); err != nil {
171
- return ctrl.Result {}, client .IgnoreNotFound (err )
168
+ if err := r .deleteWorkerPods (ctx , sync , true ); err != nil {
169
+ l .Error (err , "failed to delete worker pods" )
170
+ return ctrl.Result {}, err
172
171
}
173
- return ctrl.Result {}, nil
174
172
}
175
173
176
174
if sync .Status .Phase == juicefsiov1 .SyncPhaseProgressing {
177
175
// get manager pod
178
176
managerPod := & corev1.Pod {}
179
177
if err := r .Get (ctx , client.ObjectKey {Namespace : sync .Namespace , Name : common .GenSyncManagerName (sync .Name )}, managerPod ); err != nil {
180
- return ctrl.Result {}, client .IgnoreNotFound (err )
178
+ if apierrors .IsNotFound (err ) {
179
+ sync .Status .Phase = juicefsiov1 .SyncPhaseFailed
180
+ sync .Status .Reason = "manager pod not found"
181
+ return ctrl.Result {}, r .Status ().Update (ctx , sync )
182
+ }
183
+ l .Error (err , "failed to get manager pod" )
184
+ return ctrl.Result {}, err
181
185
}
182
- status , err := r .calculateSyncStats (ctx , sync , managerPod )
183
- if err != nil {
186
+
187
+ // delete worker completed pod
188
+ if err := r .deleteWorkerPods (ctx , sync , false ); err != nil {
189
+ l .Error (err , "failed to delete worker pods" )
184
190
return ctrl.Result {}, err
185
191
}
192
+
193
+ status , err := r .calculateSyncStats (ctx , sync , managerPod )
186
194
if ! reflect .DeepEqual (sync .Status , status ) {
187
195
sync .Status = status
188
- return ctrl.Result {RequeueAfter : 3 * time .Second }, utils .IgnoreConflict (r .Status ().Update (ctx , sync ))
196
+ if err := r .Status ().Update (ctx , sync ); err != nil {
197
+ return ctrl.Result {}, err
198
+ }
199
+ }
200
+ if err != nil {
201
+ return ctrl.Result {}, err
189
202
}
190
203
return ctrl.Result {RequeueAfter : 3 * time .Second }, nil
191
204
}
@@ -197,22 +210,24 @@ func (r *SyncReconciler) calculateSyncStats(ctx context.Context, sync *juicefsio
197
210
l := log .FromContext (ctx )
198
211
status := sync .Status
199
212
if managerPod .Status .Phase == corev1 .PodSucceeded || managerPod .Status .Phase == corev1 .PodFailed {
213
+ if managerPod .Status .Phase == corev1 .PodFailed {
214
+ status .Phase = juicefsiov1 .SyncPhaseFailed
215
+ } else {
216
+ status .Phase = juicefsiov1 .SyncPhaseCompleted
217
+ }
218
+ status .CompletedAt = & metav1.Time {Time : time .Now ()}
200
219
finishLog , err := utils .LogPod (ctx , sync .Namespace , common .GenSyncManagerName (sync .Name ), common .SyncNamePrefix , 5 )
201
220
if err != nil {
221
+ status .Reason = "failed to get manager pod last logs\n error: " + err .Error ()
202
222
l .Error (err , "failed to get manager pod last logs" )
203
223
return status , err
204
224
}
205
225
if len (finishLog ) > 0 {
206
226
status .FinishLog = finishLog
207
227
}
208
- if managerPod .Status .Phase == corev1 .PodFailed {
209
- status .Phase = juicefsiov1 .SyncPhaseFailed
210
- } else {
211
- status .Phase = juicefsiov1 .SyncPhaseCompleted
212
- }
213
- status .CompletedAt = & metav1.Time {Time : time .Now ()}
214
228
statsMap , err := utils .ParseLog (status .FinishLog )
215
229
if err != nil {
230
+ status .Reason = "failed to parse log\n error: " + err .Error ()
216
231
l .Error (err , "failed to parse log" )
217
232
} else {
218
233
stats := juicefsiov1.SyncStats {}
@@ -347,7 +362,7 @@ func (r *SyncReconciler) prepareWorkerPod(ctx context.Context, sync *juicefsiov1
347
362
}
348
363
}
349
364
if len (ips ) == int (* sync .Spec .Replicas )- 1 {
350
- log .V ( 1 ). Info ("sync worker pod ready" , "ips" , ips )
365
+ log .Info ("sync worker pod ready" , "ips" , ips )
351
366
builder .UpdateWorkerIPs (ips )
352
367
return nil
353
368
}
@@ -356,15 +371,63 @@ func (r *SyncReconciler) prepareWorkerPod(ctx context.Context, sync *juicefsiov1
356
371
}
357
372
}
358
373
374
+ func (r * SyncReconciler ) deleteWorkerPods (ctx context.Context , sync * juicefsiov1.Sync , all bool ) error {
375
+ labelSelector := client.MatchingLabels {
376
+ common .LabelSync : sync .Name ,
377
+ common .LabelAppType : common .LabelSyncWorkerValue ,
378
+ }
379
+ var fieldSelector client.MatchingFields
380
+ if ! all {
381
+ fieldSelector = client.MatchingFields {
382
+ "status.phase" : string (corev1 .PodSucceeded ),
383
+ }
384
+ }
385
+ return client .IgnoreNotFound (
386
+ r .DeleteAllOf (ctx , & corev1.Pod {},
387
+ client .InNamespace (sync .Namespace ),
388
+ labelSelector ,
389
+ fieldSelector ,
390
+ ))
391
+ }
392
+
359
393
func (r * SyncReconciler ) prepareManagerPod (ctx context.Context , sync * juicefsiov1.Sync , builder * builder.SyncPodBuilder ) error {
360
394
managerPod := builder .NewManagerPod ()
361
- if err := r .Get (ctx , client.ObjectKey {Namespace : sync .Namespace , Name : managerPod .Name }, & corev1.Pod {}); err != nil {
362
- if apierrors .IsNotFound (err ) {
363
- return r .Create (ctx , managerPod )
364
- }
395
+ err := r .Get (ctx , client.ObjectKey {Namespace : sync .Namespace , Name : managerPod .Name }, & corev1.Pod {})
396
+ if err != nil && ! apierrors .IsNotFound (err ) {
365
397
return err
366
398
}
367
- return nil
399
+ if apierrors .IsNotFound (err ) {
400
+ if err := r .Create (ctx , managerPod ); err != nil {
401
+ return err
402
+ }
403
+ }
404
+ // waiting for manager pod ready
405
+ ctx , cancel := context .WithTimeout (ctx , 1 * time .Minute )
406
+ defer cancel ()
407
+ for {
408
+ select {
409
+ case <- ctx .Done ():
410
+ return fmt .Errorf ("timeout waiting for manager pod ready" )
411
+ default :
412
+ err := r .Get (ctx , client.ObjectKey {Namespace : sync .Namespace , Name : managerPod .Name }, managerPod )
413
+ if err != nil {
414
+ if apierrors .IsNotFound (err ) {
415
+ time .Sleep (1 * time .Second )
416
+ continue
417
+ }
418
+ return err
419
+ }
420
+ if utils .IsPodReady (* managerPod ) {
421
+ log .FromContext (ctx ).Info ("sync manager pod ready" )
422
+ return nil
423
+ }
424
+ // It may have failed/successed immediately after starting, also returns success at this time.
425
+ if managerPod .Status .Phase == corev1 .PodFailed || managerPod .Status .Phase == corev1 .PodSucceeded {
426
+ return nil
427
+ }
428
+ time .Sleep (5 * time .Second )
429
+ }
430
+ }
368
431
}
369
432
370
433
// SetupWithManager sets up the controller with the Manager.
@@ -373,8 +436,7 @@ func (r *SyncReconciler) SetupWithManager(mgr ctrl.Manager) error {
373
436
For (& juicefsiov1.Sync {}).
374
437
Owns (& corev1.Pod {}).
375
438
WithOptions (controller.Options {
376
- // TODO: configable
377
- MaxConcurrentReconciles : 5 ,
439
+ MaxConcurrentReconciles : common .MaxSyncConcurrentReconciles ,
378
440
}).
379
441
Named ("sync" ).
380
442
Complete (r )
0 commit comments