From ce1c549271480bb12fd1b3083f0916a4a9a6d86b Mon Sep 17 00:00:00 2001 From: JeremiahUy Date: Thu, 17 Aug 2023 14:21:44 +0200 Subject: [PATCH] UPDATE: namescape and team --- .github/workflows/default.yml | 4 ++-- .nais/alerts.yaml | 23 +++++++++++++++++++++++ .nais/nais.yaml | 34 +++++----------------------------- .nais/topic.yaml | 16 +++++----------- 4 files changed, 35 insertions(+), 42 deletions(-) create mode 100644 .nais/alerts.yaml diff --git a/.github/workflows/default.yml b/.github/workflows/default.yml index 9f90979..c3b7672 100644 --- a/.github/workflows/default.yml +++ b/.github/workflows/default.yml @@ -30,7 +30,7 @@ jobs: if: github.ref == 'refs/heads/master' && github.event_name != 'release' strategy: matrix: - cluster: ["dev-sbs", "dev-fss", "dev-gcp"] + cluster: ["dev-fss", "dev-gcp"] steps: - name: Checkout uses: actions/checkout@master @@ -47,7 +47,7 @@ jobs: if: github.event_name == 'release' strategy: matrix: - cluster: ["prod-sbs", "prod-fss", "prod-gcp"] + cluster: ["prod-fss", "prod-gcp"] steps: - name: Checkout uses: actions/checkout@master diff --git a/.nais/alerts.yaml b/.nais/alerts.yaml new file mode 100644 index 0000000..19c2082 --- /dev/null +++ b/.nais/alerts.yaml @@ -0,0 +1,23 @@ +apiVersion: "nais.io/v1" +kind: "Alert" +metadata: + name: topic-collector + namespace: dataplattform + labels: + team: dataplattform +spec: + receivers: + slack: + channel: '#nada-airflow-alerts' + prependText: ' | ' + alerts: + - alert: ingress-collector stream restarts too often + expr: increase(ingress_collector_counter_k8s_stream_timeout_total[5m])>10 + for: 1m + description: "{{ $labels.app }} stream restarts to often in namespace {{ $labels.kubernetes_namespace }}" + action: "`kubectl logs {{ $labels.kubernetes_pod_name }} -n {{ $labels.kubernetes_namespace }}`" + - alert: ingress-collector pod restarts too often + expr: increase(kube_pod_container_status_restarts_total{container="ingress-collector", namespace="dataplattform"}[20m])>10 + for: 1m + description: "{{ $labels.app }} pod restarts to often in namespace {{ $labels.kubernetes_namespace }}" + action: "`kubectl describe pod {{ $labels.kubernetes_pod_name }} -n {{ $labels.kubernetes_namespace }}` for events, and `kubectl logs {{ $labels.kubernetes_pod_name }} -n {{ $labels.kubernetes_namespace }}` for logs" diff --git a/.nais/nais.yaml b/.nais/nais.yaml index b1cebc9..4c9fb06 100644 --- a/.nais/nais.yaml +++ b/.nais/nais.yaml @@ -2,9 +2,9 @@ apiVersion: "nais.io/v1alpha1" kind: "Application" metadata: labels: - team: dataplattform + team: team-researchops name: ingress-collector - namespace: dataplattform + namespace: team-researchops spec: image: {{ image }} port: 8000 @@ -47,13 +47,13 @@ spec: - name: SSL_CERT_FILE value: /etc/pki/tls/certs/ca-bundle.crt - name: KAFKA_TOPIC - value: dataplattform.ingress-topic-v2 + value: team-researchops.ingress-topic-v2 --- apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: ingress-collector-apiserver - namespace: dataplattform + namespace: team-researchops spec: egress: - to: @@ -63,28 +63,4 @@ spec: matchLabels: app: ingress-collector policyTypes: - - Egress ---- -apiVersion: "nais.io/v1" -kind: "Alert" -metadata: - name: topic-collector - namespace: dataplattform - labels: - team: dataplattform -spec: - receivers: - slack: - channel: '#nada-airflow-alerts' - prependText: ' | ' - alerts: - - alert: ingress-collector stream restarts too often - expr: increase(ingress_collector_counter_k8s_stream_timeout_total[5m])>10 - for: 1m - description: "{{ $labels.app }} stream restarts to often in namespace {{ $labels.kubernetes_namespace }}" - action: "`kubectl logs {{ $labels.kubernetes_pod_name }} -n {{ $labels.kubernetes_namespace }}`" - - alert: ingress-collector pod restarts too often - expr: increase(kube_pod_container_status_restarts_total{container="ingress-collector", namespace="dataplattform"}[20m])>10 - for: 1m - description: "{{ $labels.app }} pod restarts to often in namespace {{ $labels.kubernetes_namespace }}" - action: "`kubectl describe pod {{ $labels.kubernetes_pod_name }} -n {{ $labels.kubernetes_namespace }}` for events, and `kubectl logs {{ $labels.kubernetes_pod_name }} -n {{ $labels.kubernetes_namespace }}` for logs" + - Egress \ No newline at end of file diff --git a/.nais/topic.yaml b/.nais/topic.yaml index fc2c6c3..65c729a 100644 --- a/.nais/topic.yaml +++ b/.nais/topic.yaml @@ -2,9 +2,9 @@ apiVersion: kafka.nais.io/v1 kind: Topic metadata: name: ingress-topic-v2 - namespace: dataplattform + namespace: team-researchops labels: - team: dataplattform + team: team-researchops spec: pool: nav-infrastructure config: # optional; all fields are optional too; defaults shown @@ -15,15 +15,9 @@ spec: retentionBytes: -1 retentionHours: 72 acl: - - team: dataplattform + - team: team-researchops application: ingress-collector access: write - - team: dataplattform + - team: team-researchops application: amplitude-proxy - access: read - - team: dataplattform - application: dekoratoren-bigquery - access: read - - team: dataplattform - application: dakan-ingress-indexer - access: read + access: read \ No newline at end of file