Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use token based approach for system-agent #769

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ rules:
- patch
- update
- watch
- apiGroups:
- ""
resources:
- serviceaccounts/token
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
Expand Down
2 changes: 1 addition & 1 deletion exp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export CLUSTER_NAMESPACE=default
export CLUSTER_NAME=rke2
export ETCD_MACHINE_SNAPSHOT_NAME="<snapshot_name_from_the_output>"

envsubst < etcdrestore/examples/etcd-restore.yaml | kubectl apply -f -
envsubst < exp/etcdrestore/examples/etcd-restore.yaml | kubectl apply -f -
```

## Cleanup
Expand Down
6 changes: 6 additions & 0 deletions exp/etcdrestore/config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ rules:
- patch
- update
- watch
- apiGroups:
- ""
resources:
- serviceaccounts/token
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
Expand Down
45 changes: 31 additions & 14 deletions exp/etcdrestore/controllers/etcdsnapshotrestore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ import (
snapshotrestorev1 "github.com/rancher/turtles/exp/etcdrestore/api/v1alpha1"
)

// InitMachine is a filter matching on init machine of the ETCD snapshot
func InitMachine(etcdMachineSnapshot *snapshotrestorev1.ETCDMachineSnapshot) collections.Func {
// initMachine is a filter matching on init machine of the ETCD snapshot
func initMachine(etcdMachineSnapshot *snapshotrestorev1.ETCDMachineSnapshot) collections.Func {
return func(machine *clusterv1.Machine) bool {
return machine.Name == etcdMachineSnapshot.Spec.MachineName
}
Expand Down Expand Up @@ -104,6 +104,7 @@ type scope struct {
//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="",resources=secrets;events;configmaps;serviceaccounts,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="",resources=serviceaccounts/token,verbs=create
//+kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles;rolebindings,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="management.cattle.io",resources=*,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=rke2configs;rke2configs/status;rke2configs/finalizers,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -159,7 +160,7 @@ func (r *ETCDSnapshotRestoreReconciler) reconcileNormal(ctx context.Context, etc
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}

if scope.machines.Filter(InitMachine(scope.etcdMachineSnapshot)).Len() != 1 {
if scope.machines.Filter(initMachine(scope.etcdMachineSnapshot)).Len() != 1 {
return ctrl.Result{}, fmt.Errorf(
"init machine %s for snapshot %s is not found",
scope.etcdMachineSnapshot.Spec.MachineName,
Expand Down Expand Up @@ -191,15 +192,13 @@ func (r *ETCDSnapshotRestoreReconciler) reconcileNormal(ctx context.Context, etc

return ctrl.Result{}, nil
case snapshotrestorev1.ETCDSnapshotRestorePhaseStarted:
etcdSnapshotRestore.Status.Phase = snapshotrestorev1.ETCDSnapshotRestorePhaseShutdown

return ctrl.Result{}, nil
return r.preparePlanPermissions(ctx, scope, etcdSnapshotRestore)
case snapshotrestorev1.ETCDSnapshotRestorePhaseShutdown:
// Stop RKE2 on all the machines.
return r.stopRKE2OnAllMachines(ctx, scope, etcdSnapshotRestore)
case snapshotrestorev1.ETCDSnapshotRestorePhaseRunning:
// Restore the etcd snapshot on the init machine.
return r.restoreSnaphotOnInitMachine(ctx, scope, etcdSnapshotRestore)
return r.restoreSnapshotOnInitMachine(ctx, scope, etcdSnapshotRestore)
case snapshotrestorev1.ETCDSnapshotRestorePhaseAgentRestart:
// Start RKE2 on all the machines.
return r.startRKE2OnAllMachines(ctx, scope, etcdSnapshotRestore)
Expand All @@ -212,7 +211,7 @@ func (r *ETCDSnapshotRestoreReconciler) reconcileNormal(ctx context.Context, etc
case snapshotrestorev1.ETCDSnapshotRestorePhaseJoinAgents:
return r.waitForMachinesToJoin(ctx, scope, etcdSnapshotRestore)
case snapshotrestorev1.ETCDSnapshotRestorePhaseFinished, snapshotrestorev1.ETCDSnapshotRestorePhaseFailed:
return ctrl.Result{}, nil
return r.revokePlanPermissions(ctx, scope, etcdSnapshotRestore)
}

return ctrl.Result{}, nil
Expand Down Expand Up @@ -251,6 +250,24 @@ func initScope(ctx context.Context, c client.Client, etcdSnapshotRestore *snapsh
}, nil
}

func (r *ETCDSnapshotRestoreReconciler) preparePlanPermissions(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
if err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, scope.machines.Newest(), scope.machines).Permit(ctx); err != nil {
return ctrl.Result{}, err
}

etcdSnapshotRestore.Status.Phase = snapshotrestorev1.ETCDSnapshotRestorePhaseShutdown

return ctrl.Result{}, nil
}

func (r *ETCDSnapshotRestoreReconciler) revokePlanPermissions(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
if err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, scope.machines.Newest(), scope.machines).Revoke(ctx); err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

func (r *ETCDSnapshotRestoreReconciler) stopRKE2OnAllMachines(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
log := log.FromContext(ctx)

Expand All @@ -259,7 +276,7 @@ func (r *ETCDSnapshotRestoreReconciler) stopRKE2OnAllMachines(ctx context.Contex
log.Info("Stopping RKE2 on machine", "machine", machine.Name)

// Get the plan secret for the machine.
applied, err := Plan(ctx, r.Client, machine).Apply(ctx, RKE2KillAll())
applied, err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, machine, scope.machines).Apply(ctx, RKE2KillAll())
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get plan secret for machine: %w", err)
}
Expand All @@ -286,15 +303,15 @@ func (r *ETCDSnapshotRestoreReconciler) stopRKE2OnAllMachines(ctx context.Contex
return ctrl.Result{}, nil
}

func (r *ETCDSnapshotRestoreReconciler) restoreSnaphotOnInitMachine(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
func (r *ETCDSnapshotRestoreReconciler) restoreSnapshotOnInitMachine(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
log := log.FromContext(ctx)

initMachine := scope.machines.Filter(InitMachine(scope.etcdMachineSnapshot)).UnsortedList()[0]
initMachine := scope.machines.Filter(initMachine(scope.etcdMachineSnapshot)).UnsortedList()[0]

log.Info("Filling plan secret with etcd restore instructions", "machine", initMachine.Name)

// Get the plan secret for the machine.
applied, err := Plan(ctx, r.Client, initMachine).Apply(
applied, err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, initMachine, scope.machines).Apply(
ctx,
RemoveServerURL(),
ManifestRemoval(),
Expand All @@ -318,7 +335,7 @@ func (r *ETCDSnapshotRestoreReconciler) restoreSnaphotOnInitMachine(ctx context.
func (r *ETCDSnapshotRestoreReconciler) startRKE2OnAllMachines(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
log := log.FromContext(ctx)

initMachine := scope.machines.Filter(InitMachine(scope.etcdMachineSnapshot)).UnsortedList()[0]
initMachine := scope.machines.Filter(initMachine(scope.etcdMachineSnapshot)).UnsortedList()[0]

// TODO: other registration methods
initMachineIP := getInternalMachineIP(initMachine)
Expand Down Expand Up @@ -350,7 +367,7 @@ func (r *ETCDSnapshotRestoreReconciler) startRKE2OnAllMachines(ctx context.Conte
StartRKE2())
}

applied, err := Plan(ctx, r.Client, machine).Apply(ctx, instructions...)
applied, err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, machine, scope.machines).Apply(ctx, instructions...)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch plan secret: %w", err)
} else if !applied.Finished {
Expand Down
98 changes: 91 additions & 7 deletions exp/etcdrestore/controllers/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,21 @@ import (
bootstrapv1 "github.com/rancher/cluster-api-provider-rke2/bootstrap/api/v1beta1"
snapshotrestorev1 "github.com/rancher/turtles/exp/etcdrestore/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/collections"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
)

// Planner is responsible for executing instructions on the underlying machine host
// in the specified order, and collecting output from executed steps.
type Planner struct {
Name string
client.Client
machine *clusterv1.Machine
secret *corev1.Secret
machine *clusterv1.Machine
machines collections.Machines
secret *corev1.Secret
}

// Instructions is a one time operation, used to perform shell commands on the host
Expand All @@ -64,16 +68,20 @@ type plan struct {
}

// Plan is initializing Planner, used to perform instructions in a specific order and collect results
func Plan(ctx context.Context, c client.Client, machine *clusterv1.Machine) *Planner {
func Plan(ctx context.Context, c client.Client, name string, machine *clusterv1.Machine, machines collections.Machines) *Planner {
return &Planner{
Client: c,
machine: machine,
secret: initSecret(machine, map[string][]byte{}),
Client: c,
Name: name,
machine: machine,
machines: machines,
secret: initSecret(machine, map[string][]byte{}),
}
}

func initSecret(machine *clusterv1.Machine, data map[string][]byte) *corev1.Secret {
planSecretName := strings.Join([]string{machine.Spec.Bootstrap.ConfigRef.Name, "rke2config", "plan"}, "-")
kind := strings.ToLower(machine.Spec.Bootstrap.ConfigRef.Kind)
name := machine.Spec.Bootstrap.ConfigRef.Name
planSecretName := strings.Join([]string{name, kind, "plan"}, "-")

return &corev1.Secret{
TypeMeta: metav1.TypeMeta{
Expand All @@ -83,6 +91,12 @@ func initSecret(machine *clusterv1.Machine, data map[string][]byte) *corev1.Secr
ObjectMeta: metav1.ObjectMeta{
Namespace: machine.Namespace,
Name: planSecretName,
OwnerReferences: []metav1.OwnerReference{{
Name: machine.Name,
Kind: "Machine",
UID: machine.UID,
APIVersion: clusterv1.GroupVersion.String(),
}},
},
Data: data,
}
Expand Down Expand Up @@ -247,6 +261,76 @@ func (p *Planner) applied(plan, appliedChecksum []byte) bool {
return planHash == string(appliedChecksum)
}

// planRole returns the Role for the Plan.
func (p *Planner) planRole() *rbacv1.Role {
secrets := []string{}
for _, machine := range p.machines.UnsortedList() {
planSecretName := strings.Join([]string{machine.Spec.Bootstrap.ConfigRef.Name, "rke2config", "plan"}, "-")
secrets = append(secrets, planSecretName)
}

return &rbacv1.Role{
ObjectMeta: metav1.ObjectMeta{
Name: p.machine.Labels[clusterv1.ClusterNameLabel] + "-" + p.Name,
Namespace: p.machine.Namespace,
},
Rules: []rbacv1.PolicyRule{
{
Verbs: []string{"watch", "get", "update", "list"},
APIGroups: []string{""},
Resources: []string{"secrets"},
ResourceNames: secrets,
},
},
}
}

// planRoleBinding creates a RoleBinding for the plan.
func (p *Planner) planRoleBinding() *rbacv1.RoleBinding {
return &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: p.machine.Labels[clusterv1.ClusterNameLabel] + "-" + p.Name,
Namespace: p.machine.Namespace,
},
Subjects: []rbacv1.Subject{
{
Kind: "ServiceAccount",
Name: p.machine.Labels[clusterv1.ClusterNameLabel] + "-system-agent",
Namespace: p.machine.Namespace,
},
},
RoleRef: rbacv1.RoleRef{
APIGroup: rbacv1.GroupName,
Kind: "Role",
Name: p.machine.Labels[clusterv1.ClusterNameLabel] + "-" + p.Name,
},
}
}

func (p *Planner) Permit(ctx context.Context) error {
if err := p.Create(ctx, p.planRole()); client.IgnoreAlreadyExists(err) != nil {
return fmt.Errorf("unable to create plan role: %w", err)
}

if err := p.Create(ctx, p.planRoleBinding()); client.IgnoreAlreadyExists(err) != nil {
return fmt.Errorf("unable to create plan role binding: %w", err)
}

return nil
}

func (p *Planner) Revoke(ctx context.Context) error {
if err := p.Delete(ctx, p.planRole()); client.IgnoreNotFound(err) != nil {
return fmt.Errorf("unable to delete plan role: %w", err)
}

if err := p.Delete(ctx, p.planRoleBinding()); client.IgnoreNotFound(err) != nil {
return fmt.Errorf("unable to delete plan role binding: %w", err)
}

return nil
}

func (p *Planner) updatePlanSecret(ctx context.Context, data []byte) error {
log := log.FromContext(ctx)

Expand Down
2 changes: 1 addition & 1 deletion exp/etcdrestore/examples/etcd-restore.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiversion: turtles-capi.cattle.io/v1alpha1
apiVersion: turtles-capi.cattle.io/v1alpha1
kind: ETCDSnapshotRestore
metadata:
name: example-restore
Expand Down
3 changes: 1 addition & 2 deletions exp/etcdrestore/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
k8s.io/client-go v0.29.9
k8s.io/component-base v0.29.9
k8s.io/klog/v2 v2.110.1
k8s.io/utils v0.0.0-20231127182322-b307cd553661
sigs.k8s.io/cluster-api v1.7.7
sigs.k8s.io/cluster-api-operator v0.14.0
sigs.k8s.io/controller-runtime v0.17.6
Expand All @@ -33,7 +34,6 @@ require (
github.com/coreos/vcontext v0.0.0-20230201181013-d72178a18687 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.12.0 // indirect
github.com/evanphx/json-patch v5.7.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
Expand Down Expand Up @@ -85,7 +85,6 @@ require (
k8s.io/apiextensions-apiserver v0.29.9 // indirect
k8s.io/cluster-bootstrap v0.29.3 // indirect
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect
k8s.io/utils v0.0.0-20231127182322-b307cd553661 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
Expand Down
Loading
Loading