Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v23.2.x] k8s: Error out when annotation can not be set #13901

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ func (r *TopicReconciler) createKafkaClient(ctx context.Context, topic *v1alpha1
func (r *TopicReconciler) recordErrorEvent(err error, topic *v1alpha1.Topic, eventType, message string, args ...any) error {
if r.EventRecorder != nil {
var eventArgs []any
copy(args, eventArgs)
copy(eventArgs, args)
eventArgs = append(eventArgs, err.Error())
r.EventRecorder.AnnotatedEventf(topic,
map[string]string{v2.GroupVersion.Group + "/revision": topic.ResourceVersion},
Expand Down
24 changes: 16 additions & 8 deletions src/go/k8s/controllers/redpanda/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func (r *ClusterReconciler) Reconcile(

if vectorizedCluster.Status.CurrentReplicas >= 1 {
if err = r.setPodNodeIDAnnotation(ctx, &vectorizedCluster, log, ar); err != nil {
return ctrl.Result{}, fmt.Errorf("setting pod node_id annotation: %w", err)
log.Error(err, "setting pod node_id annotation")
}
}

Expand Down Expand Up @@ -265,7 +265,10 @@ func (r *ClusterReconciler) Reconcile(

// The following should be at the last part as it requires AdminAPI to be running
if err := r.setPodNodeIDAnnotation(ctx, &vectorizedCluster, log, ar); err != nil {
return ctrl.Result{}, fmt.Errorf("setting pod node_id annotation: %w", err)
log.Error(err, "setting pod node_id annotation after reconciling resources")
return ctrl.Result{
RequeueAfter: 4 * time.Second,
}, nil
}
if err := r.setPodNodeIDLabel(ctx, &vectorizedCluster, log, ar); err != nil {
return ctrl.Result{}, fmt.Errorf("setting pod node_id label: %w", err)
Expand Down Expand Up @@ -338,7 +341,7 @@ func (r *ClusterReconciler) handlePodFinalizer(
// if the pod is not being deleted, set the finalizer
if err = r.setPodFinalizer(ctx, pod, log); err != nil {
//nolint:goerr113 // not going to use wrapped static error here this time
return fmt.Errorf(`unable to set the finalizer on pod "%s": %d`, pod.Name, err)
return fmt.Errorf(`unable to set the finalizer on pod "%s": %w`, pod.Name, err)
}
continue
}
Expand Down Expand Up @@ -464,6 +467,8 @@ func (r *ClusterReconciler) setPodNodeIDAnnotation(
if err != nil {
return fmt.Errorf("unable to fetch PodList: %w", err)
}

var combinedErrors error
for i := range pods.Items {
pod := &pods.Items[i]
if pod.Annotations == nil {
Expand All @@ -473,7 +478,7 @@ func (r *ClusterReconciler) setPodNodeIDAnnotation(

nodeID, err := r.fetchAdminNodeID(ctx, rp, pod, ar)
if err != nil {
log.Error(err, `cannot fetch node id for node-id annotation`)
combinedErrors = errors.Join(combinedErrors, fmt.Errorf(`cannot fetch node id for "%s" node-id annotation: %w`, pod.Name, err))
continue
}

Expand All @@ -487,22 +492,25 @@ func (r *ClusterReconciler) setPodNodeIDAnnotation(
if annotationExist {
oldNodeID, err = strconv.Atoi(nodeIDStrAnnotation)
if err != nil {
return fmt.Errorf("unable to convert node ID (%s) to int: %w", nodeIDStrAnnotation, err)
combinedErrors = errors.Join(combinedErrors, fmt.Errorf("unable to convert node ID (%s) to int: %w", nodeIDStrAnnotation, err))
continue
}

log.WithValues("pod-name", pod.Name, "old-node-id", oldNodeID).Info("decommission old node-id")
if err = r.decommissionBroker(ctx, rp, oldNodeID, log, ar); err != nil {
return fmt.Errorf("unable to decommission broker: %w", err)
combinedErrors = errors.Join(combinedErrors, fmt.Errorf("unable to decommission broker: %w", err))
continue
}
}

log.WithValues("pod-name", pod.Name, "new-node-id", nodeID).Info("setting node-id annotation")
pod.Annotations[resources.PodAnnotationNodeIDKey] = realNodeIDStr
if err := r.Update(ctx, pod, &client.UpdateOptions{}); err != nil {
return fmt.Errorf(`unable to update pod "%s" with node-id annotation: %w`, pod.Name, err)
combinedErrors = errors.Join(combinedErrors, fmt.Errorf(`unable to update pod "%s" with node-id annotation: %w`, pod.Name, err))
continue
}
}
return nil
return combinedErrors
}

func (r *ClusterReconciler) setPodNodeIDLabel(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ var _ = Describe("Redpanda cluster scale resource", func() {

By("Scaling down only when decommissioning is done")
Expect(testAdminAPI.RemoveBroker(2)).To(BeTrue())
testAdminAPI.AddGhostBroker(admin.Broker{NodeID: 2, MembershipStatus: admin.MembershipStatusDraining})
Eventually(resourceDataGetter(key, &sts, func() interface{} {
return *sts.Spec.Replicas
}), timeout, interval).Should(Equal(int32(2)))
Expand All @@ -142,6 +143,7 @@ var _ = Describe("Redpanda cluster scale resource", func() {

By("Removing the other node as well when done")
Expect(testAdminAPI.RemoveBroker(1)).To(BeTrue())
testAdminAPI.AddGhostBroker(admin.Broker{NodeID: 1, MembershipStatus: admin.MembershipStatusDraining})
Eventually(resourceDataGetter(key, &sts, func() interface{} {
return *sts.Spec.Replicas
}), timeout, interval).Should(Equal(int32(1)))
Expand Down
11 changes: 10 additions & 1 deletion src/go/k8s/hack/get-redpanda-info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,16 @@ for cl in $(kubectl -n $script_namespace get cluster --output=jsonpath='{.items.
done
done

kubectl get events --sort-by metadata.creationTimestamp >$ARTIFACTS_PATH/events.txt
kubectl get -n $script_namespace certificates -o yaml >$ARTIFACTS_PATH/certificates.yaml
kubectl get -n $script_namespace certificatesigningrequests -o yaml >$ARTIFACTS_PATH/certificatesigningrequests.yaml
kubectl get -n $script_namespace issuers -o yaml >$ARTIFACTS_PATH/issuers.yaml
kubectl get clusterissuers -o yaml >$ARTIFACTS_PATH/all-clusterissuers.yaml
kubectl get -n $script_namespace certificaterequests -o yaml >$ARTIFACTS_PATH/certificaterequests.yaml

kubectl get -n $script_namespace sts -o yaml >$ARTIFACTS_PATH/sts.yaml
kubectl get -n $script_namespace redpanda -o yaml >$ARTIFACTS_PATH/redpandas.yaml

kubectl get events -n $script_namespace --sort-by metadata.creationTimestamp >$ARTIFACTS_PATH/events.txt
kubectl get events --sort-by metadata.creationTimestamp -A >$ARTIFACTS_PATH/all-events.txt
kubectl describe node >$ARTIFACTS_PATH/described-nodes.txt
kubectl get pod -A -o yaml >$ARTIFACTS_PATH/all-pods.yaml
15 changes: 15 additions & 0 deletions src/go/k8s/kind-for-v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
- role: worker
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
- role: worker
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
- role: worker
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
- role: worker
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
- role: worker
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
4 changes: 0 additions & 4 deletions src/go/k8s/kind.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,3 @@ nodes:
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
- role: worker
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
- role: worker
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
- role: worker
image: kindest/node:v1.24.7@sha256:577c630ce8e509131eab1aea12c022190978dd2f745aac5eb1fe65c0807eb315
7 changes: 3 additions & 4 deletions src/go/k8s/kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ testDirs:
kindConfig: ./kind.yaml
kindNodeCache: false
commands:
- command: kubectl taint node kind-control-plane node-role.kubernetes.io/master- node-role.kubernetes.io/control-plane-
- command: "mkdir -p tests/_e2e_artifacts"
- command: "kubectl taint nodes -l node-role.kubernetes.io/master= node-role.kubernetes.io/master:NoSchedule-"
ignoreFailure: true
- command: "./hack/install-cert-manager.sh tests/_e2e_artifacts"
background: true
ignoreFailure: true
Expand All @@ -23,9 +22,9 @@ commands:
background: true
- command: "./hack/wait-for-webhook-ready.sh"
artifactsDir: tests/_e2e_artifacts
timeout: 390
timeout: 300
reportFormat: xml
parallel: 4
parallel: 2
namespace: redpanda-system
suppress:
- events
2 changes: 1 addition & 1 deletion src/go/k8s/kuttl-v2-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ kindContainers:
- localhost/redpanda-operator:dev
testDirs:
- ./tests/e2e-v2
kindConfig: ./kind.yaml
kindConfig: ./kind-for-v2.yaml
kindNodeCache: false
commands:
- command: "mkdir -p tests/_e2e_artifacts_v2"
Expand Down
17 changes: 17 additions & 0 deletions src/go/k8s/pkg/admin/mock_admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type MockAdminAPI struct {
unknown []string
directValidation bool
brokers []admin.Broker
ghostBrokers []admin.Broker
monitor sync.Mutex
Log logr.Logger
clusterHealth bool
Expand Down Expand Up @@ -338,6 +339,13 @@ func (s *ScopedMockAdminAPI) GetNodeConfig(
if err != nil {
return admin.NodeConfig{}, err
}
for _, b := range s.ghostBrokers {
if b.NodeID == int(s.Ordinal) {
return admin.NodeConfig{
NodeID: b.NodeID,
}, nil
}
}
if len(brokers) <= int(s.Ordinal) {
return admin.NodeConfig{}, fmt.Errorf("broker not registered")
}
Expand All @@ -361,6 +369,15 @@ func (m *MockAdminAPI) AddBroker(broker admin.Broker) {
m.brokers = append(m.brokers, broker)
}

func (m *MockAdminAPI) AddGhostBroker(broker admin.Broker) bool {
m.Log.WithName("AddGhostBroker").WithValues("broker", broker).Info("called")
m.monitor.Lock()
defer m.monitor.Unlock()

m.ghostBrokers = append(m.ghostBrokers, broker)
return true
}

func (m *MockAdminAPI) RemoveBroker(id int) bool {
m.Log.WithName("RemoveBroker").WithValues("id", id).Info("called")
m.monitor.Lock()
Expand Down
5 changes: 4 additions & 1 deletion src/go/k8s/pkg/resources/certmanager/pki.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package certmanager

import (
"context"
"errors"
"fmt"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -79,14 +80,16 @@ func (r *PkiReconciler) Ensure(ctx context.Context) error {
}
toApply = append(toApply, res...)

var joinedErrors error
for _, res := range toApply {
err := res.Ensure(ctx)
if err != nil {
r.logger.Error(err, "Failed to reconcile pki")
joinedErrors = errors.Join(joinedErrors, err)
}
}

return nil
return joinedErrors
}

func (r *PkiReconciler) Key() types.NamespacedName {
Expand Down
5 changes: 5 additions & 0 deletions src/go/k8s/tests/e2e/admin-api-tls/02-clean.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ delete:
kind: Job
name: call-admin-api-tls
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: call-admin-api-tls
namespace: redpanda-system
5 changes: 5 additions & 0 deletions src/go/k8s/tests/e2e/admin-api/02-clean.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ delete:
kind: Job
name: call-admin-api
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: call-admin-api
namespace: redpanda-system
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,17 @@ delete:
kind: Job
name: get-centralized-config-bootstrap-1
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config-bootstrap-1
namespace: redpanda-system
- apiVersion: batch/v1
kind: Job
name: get-centralized-config-bootstrap-2
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config-bootstrap-2
namespace: redpanda-system
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,26 @@ delete:
kind: Job
name: external-change
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: external-change
namespace: redpanda-system
- apiVersion: batch/v1
kind: Job
name: get-centralized-config-1-drift
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config-1-drift
namespace: redpanda-system
- apiVersion: batch/v1
kind: Job
name: get-centralized-config-2-drift
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config-2-drift
namespace: redpanda-system
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,17 @@ delete:
kind: Job
name: get-centralized-config-tls-1
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config-tls-1
namespace: redpanda-system
- apiVersion: batch/v1
kind: Job
name: get-centralized-config-tls-2
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config-tls-2
namespace: redpanda-system
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ delete:
kind: Job
name: get-centralized-config
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config
namespace: redpanda-system
10 changes: 10 additions & 0 deletions src/go/k8s/tests/e2e/centralized-configuration/05-clean.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,17 @@ delete:
kind: Job
name: get-centralized-config-1
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config-1
namespace: redpanda-system
- apiVersion: batch/v1
kind: Job
name: get-centralized-config-2
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: get-centralized-config-2
namespace: redpanda-system
10 changes: 10 additions & 0 deletions src/go/k8s/tests/e2e/confluent-schema-registry/04-clean.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,17 @@ delete:
kind: Job
name: create-schema
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: create-schema
namespace: redpanda-system
- apiVersion: batch/v1
kind: Job
name: retrive-schema
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: retrive-schema
namespace: redpanda-system
10 changes: 10 additions & 0 deletions src/go/k8s/tests/e2e/console-admin-api/04-cleanup.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,17 @@ delete:
kind: Job
name: call-console-admin-api
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: call-console-admin-api
namespace: redpanda-system
- apiVersion: batch/v1
kind: Job
name: check-version
namespace: redpanda-system
- apiVersion: v1
kind: Pod
labels:
job-name: check-version
namespace: redpanda-system
Loading
Loading