@@ -20,10 +20,13 @@ import (
20
20
"context"
21
21
22
22
"github.com/go-logr/logr"
23
+ "k8s.io/apimachinery/pkg/runtime/schema"
23
24
"k8s.io/client-go/tools/record"
24
25
"k8s.io/klog/v2"
26
+ "k8s.io/utils/ptr"
25
27
ctrl "sigs.k8s.io/controller-runtime"
26
28
"sigs.k8s.io/controller-runtime/pkg/client"
29
+ "sigs.k8s.io/controller-runtime/pkg/client/apiutil"
27
30
28
31
kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1"
29
32
runtime "github.com/kubeflow/training-operator/pkg/runtime.v2"
@@ -33,13 +36,15 @@ type TrainJobReconciler struct {
33
36
log logr.Logger
34
37
client client.Client
35
38
recorder record.EventRecorder
39
+ runtimes map [string ]runtime.Runtime
36
40
}
37
41
38
- func NewTrainJobReconciler (client client.Client , recorder record.EventRecorder ) * TrainJobReconciler {
42
+ func NewTrainJobReconciler (client client.Client , recorder record.EventRecorder , runs map [ string ]runtime. Runtime ) * TrainJobReconciler {
39
43
return & TrainJobReconciler {
40
44
log : ctrl .Log .WithName ("trainjob-controller" ),
41
45
client : client ,
42
46
recorder : recorder ,
47
+ runtimes : runs ,
43
48
}
44
49
}
45
50
@@ -49,15 +54,70 @@ func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
49
54
return ctrl.Result {}, client .IgnoreNotFound (err )
50
55
}
51
56
log := ctrl .LoggerFrom (ctx ).WithValues ("trainJob" , klog .KObj (& trainJob ))
52
- ctrl .LoggerInto (ctx , log )
57
+ ctx = ctrl .LoggerInto (ctx , log )
53
58
log .V (2 ).Info ("Reconciling TrainJob" )
59
+ if err := r .createOrUpdateObjs (ctx , & trainJob ); err != nil {
60
+ return ctrl.Result {}, err
61
+ }
62
+ // TODO (tenzen-y): Do update the status.
54
63
return ctrl.Result {}, nil
55
64
}
56
65
57
- func (r * TrainJobReconciler ) SetupWithManager (mgr ctrl.Manager , runtimes map [string ]runtime.Runtime ) error {
66
+ func (r * TrainJobReconciler ) createOrUpdateObjs (ctx context.Context , trainJob * kubeflowv2.TrainJob ) error {
67
+ log := ctrl .LoggerFrom (ctx )
68
+
69
+ // Controller assumes the runtime existence has already verified in the webhook on TrainJob creation.
70
+ run := r .runtimes [runtimeRefToGroupKind (trainJob .Spec .RuntimeRef ).String ()]
71
+ objs , err := run .NewObjects (ctx , trainJob )
72
+ if err != nil {
73
+ return err
74
+ }
75
+ for _ , obj := range objs {
76
+ var gvk schema.GroupVersionKind
77
+ if gvk , err = apiutil .GVKForObject (obj .DeepCopyObject (), r .client .Scheme ()); err != nil {
78
+ return err
79
+ }
80
+ logKeysAndValues := []any {
81
+ "groupVersionKind" , gvk .String (),
82
+ "namespace" , obj .GetNamespace (),
83
+ "name" , obj .GetName (),
84
+ }
85
+ // TODO (tenzen-y): Ideally, we should use the SSA instead of checking existence.
86
+ // Non-empty resourceVersion indicates UPDATE operation.
87
+ var creationErr error
88
+ var created bool
89
+ if obj .GetResourceVersion () == "" {
90
+ creationErr = r .client .Create (ctx , obj )
91
+ created = creationErr == nil
92
+ }
93
+ switch {
94
+ case created :
95
+ log .V (5 ).Info ("Succeeded to create object" , logKeysAndValues )
96
+ continue
97
+ case client .IgnoreAlreadyExists (creationErr ) != nil :
98
+ return creationErr
99
+ default :
100
+ // This indicates CREATE operation has not been performed or the object has already existed in the cluster.
101
+ if err = r .client .Update (ctx , obj ); err != nil {
102
+ return err
103
+ }
104
+ log .V (5 ).Info ("Succeeded to update object" , logKeysAndValues )
105
+ }
106
+ }
107
+ return nil
108
+ }
109
+
110
+ func runtimeRefToGroupKind (runtimeRef kubeflowv2.RuntimeRef ) schema.GroupKind {
111
+ return schema.GroupKind {
112
+ Group : ptr .Deref (runtimeRef .APIGroup , "" ),
113
+ Kind : ptr .Deref (runtimeRef .Kind , "" ),
114
+ }
115
+ }
116
+
117
+ func (r * TrainJobReconciler ) SetupWithManager (mgr ctrl.Manager ) error {
58
118
b := ctrl .NewControllerManagedBy (mgr ).
59
119
For (& kubeflowv2.TrainJob {})
60
- for _ , run := range runtimes {
120
+ for _ , run := range r . runtimes {
61
121
for _ , registrar := range run .EventHandlerRegistrars () {
62
122
if registrar != nil {
63
123
b = registrar (b , mgr .GetClient ())
0 commit comments