From a92272a839a477e84a999111c853454b9f873257 Mon Sep 17 00:00:00 2001 From: Ryan Date: Wed, 15 May 2024 14:55:47 +0100 Subject: [PATCH] * Chronicle with embedded liveness * Statefulset probes now use prometheus metrics rather than direct interaction Signed-off-by: Ryan --- charts/chronicle-on-sawtooth/Chart.yaml | 4 +- .../templates/check_metrics_available.yaml | 23 +++ .../templates/statefulset.yaml | 133 ++++-------------- charts/chronicle-on-sawtooth/values.yaml | 31 +--- charts/chronicle/Chart.yaml | 2 +- .../templates/check_metrics_available.yaml | 23 +++ charts/chronicle/templates/statefulset.yaml | 133 ++++-------------- charts/chronicle/values.yaml | 30 +--- 8 files changed, 124 insertions(+), 255 deletions(-) create mode 100644 charts/chronicle-on-sawtooth/templates/check_metrics_available.yaml create mode 100644 charts/chronicle/templates/check_metrics_available.yaml diff --git a/charts/chronicle-on-sawtooth/Chart.yaml b/charts/chronicle-on-sawtooth/Chart.yaml index 10396da..e841b85 100644 --- a/charts/chronicle-on-sawtooth/Chart.yaml +++ b/charts/chronicle-on-sawtooth/Chart.yaml @@ -17,11 +17,11 @@ keywords: # This is the chart version. This version number should be incremented each # time you make changes to the chart and its templates, including the app # version. -version: 0.1.24 +version: 0.1.25 # This is the version number of Chronicle being deployed. This version # number should be incremented each time you make changes to Chronicle. -appVersion: 0.7.6 +appVersion: 0.7.7 dependencies: - name: standard-defs diff --git a/charts/chronicle-on-sawtooth/templates/check_metrics_available.yaml b/charts/chronicle-on-sawtooth/templates/check_metrics_available.yaml new file mode 100644 index 0000000..f9965fe --- /dev/null +++ b/charts/chronicle-on-sawtooth/templates/check_metrics_available.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "common.names.fullname" . }}-scripts +data: + first_depth_charge.sh: | + #!/bin/bash + metrics=$(curl -s http://localhost:9000/metrics) + count=$(echo "$metrics" | grep '^depth_charge_round_trip_count' | awk '{print $2}') + if [[ -z "$count" ]] || [[ $count -eq 0 ]]; then + exit 1 + fi + check_timeouts.sh: | + #!/bin/bash + metrics=$(curl -s http://localhost:9000/metrics) + timeouts=$(echo "$metrics" | grep '^depth_charge_timeouts' | awk '{print $2}' | tr -d '\r') + if [[ "$timeouts" =~ ^[0-9]+$ ]] && [[ "$timeouts" -ne 0 ]]; then + echo "Non-zero depth_charge_timeouts detected: $timeouts" + exit 1 + else + echo "No non-zero depth_charge_timeouts detected." + exit 0 + fi diff --git a/charts/chronicle-on-sawtooth/templates/statefulset.yaml b/charts/chronicle-on-sawtooth/templates/statefulset.yaml index ce3124c..c07cdba 100644 --- a/charts/chronicle-on-sawtooth/templates/statefulset.yaml +++ b/charts/chronicle-on-sawtooth/templates/statefulset.yaml @@ -123,13 +123,16 @@ spec: {{- end }} serve-api \ --interface 0.0.0.0:{{ .Values.port }} \ + {{- if .Values.livenessProbe.enabled }} + --liveness-interval {{ .Values.livenessProbe.periodSeconds }} + --liveness-deadline {{ .Values.livenessProbe.timeoutSeconds }} + {{- end }} {{- if .Values.auth.required }} --require-auth \ {{- end }} {{ include "chronicle.jwks-url.cli" . }} {{ include "chronicle.userinfo-url.cli" . }} - {{ include "chronicle.id-claims" . }} - ; + {{ include "chronicle.id-claims" . }}; env: {{ include "lib.safeToYaml" .Values.env | nindent 12 }} - name: RUST_LOG value: {{ .Values.logLevel }} @@ -144,6 +147,26 @@ spec: {{- end }} {{- include "lib.safeToYaml" .Values.postgres.env | nindent 12 }} resources: {{- include "lib.safeToYaml" .Values.resources | nindent 12 }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + exec: + command: + - /bin/bash + - /scripts/check_timeouts.sh + initialDelaySeconds: 1 + periodSeconds: 1 + failureThreshold: 1 + {{- end}} + {{- if .Values.startUpProbe.enabled }} + readinessProbe: + exec: + command: + - /bin/bash + - /scripts/first_depth_charge.sh + initialDelaySeconds: 1 + periodSeconds: 1 + failureThreshold: 600 + {{- end}} volumeMounts: - name: chronicle-config mountPath: /etc/chronicle/config/ @@ -152,107 +175,10 @@ spec: readOnly: true - name: chronicle-data mountPath: /var/lib/chronicle/store/ + - name: check-metrics-available + mountPath: /scripts/ + readOnly: true {{- include "lib.volumeMounts" .Values.extraVolumeMounts | nindent 12 }} - {{- if .Values.livenessProbe.enabled }} - livenessProbe: - exec: - command: - - bash - - -c - - | - PROBE_ID="liveness_$(LC_ALL=C tr -dc A-Za-z0-9 /tmp/import.json && - echo "Probe ID: $PROBE_ID" && - RUST_LOG=error chronicle \ - -c /etc/chronicle/config/config.toml \ - --console-logging json \ - --sawtooth tcp://{{ include "chronicle.sawtooth.service" . }}:{{ include "chronicle.sawtooth.sawcomp" . }} \ - --remote-database \ - --database-name {{ .Values.postgres.database }} \ - --database-username {{ .Values.postgres.user }} \ - --database-host {{ .Values.postgres.host }} \ - {{- if not .Values.opa.enabled }} - --embedded-opa-policy \ - {{- end }} - import {{ .Values.livenessProbe.namespaceName }} {{ .Values.livenessProbe.namespaceUuid }} /tmp/import.json - initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.livenessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} - failureThreshold: {{ .Values.livenessProbe.failureThreshold }} - {{- end }} - {{- if .Values.startUpProbe.enabled }} - startupProbe: - exec: - command: - - bash - - -c - - | - PROBE_ID="startup_$(LC_ALL=C tr -dc A-Za-z0-9 /tmp/import.json && - echo "Probe ID: $PROBE_ID" && - RUST_LOG=error chronicle \ - -c /etc/chronicle/config/config.toml \ - --console-logging json \ - --sawtooth tcp://{{ include "chronicle.sawtooth.service" . }}:{{ include "chronicle.sawtooth.sawcomp" . }} \ - --remote-database \ - --database-name {{ .Values.postgres.database }} \ - --database-username {{ .Values.postgres.user }} \ - --database-host {{ .Values.postgres.host }} \ - {{- if not .Values.opa.enabled }} - --embedded-opa-policy \ - {{- end }} - import {{ .Values.startUpProbe.namespaceName }} {{ .Values.startUpProbe.namespaceUuid }} /tmp/import.json - initialDelaySeconds: {{ .Values.startUpProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.startUpProbe.periodSeconds }} - timeoutSeconds: {{ .Values.startUpProbe.timeoutSeconds }} - failureThreshold: {{ .Values.startUpProbe.failureThreshold }} - {{- end }} volumes: - name: chronicle-secrets persistentVolumeClaim: @@ -263,6 +189,9 @@ spec: - name: chronicle-config configMap: name: {{ .Release.Name }}-chronicle-config + - name: check-metrics-available + configMap: + name: {{ include "common.names.fullname" . }}-scripts {{- if not .Values.postgres.persistence.enabled }} - name: "pgdata" emptyDir: {} diff --git a/charts/chronicle-on-sawtooth/values.yaml b/charts/chronicle-on-sawtooth/values.yaml index ad58b28..d2003eb 100644 --- a/charts/chronicle-on-sawtooth/values.yaml +++ b/charts/chronicle-on-sawtooth/values.yaml @@ -29,28 +29,11 @@ livenessProbe: timeoutSeconds: 20 ## @md | `livenessProbe.periodSeconds` | how often (in seconds) to perform the probe | 60 | periodSeconds: 60 - ## @md | `livenessProbe.failureThreshold` | when a probe fails, Kubernetes will try failureThreshold times before giving up | 1 | - failureThreshold: 1 - ## @md | `livenessProbe.namespaceName` | the Chronicle namespace in which the probe operates | default | - namespaceName: default - ## @md | `livenessProbe.namespaceUuid` | the UUID of the Chronicle namespace in which the probe operates | fd717fd6-70f1-44c1-81de-287d5e101089 | - namespaceUuid: fd717fd6-70f1-44c1-81de-287d5e101089 ## @md | `startUpProbe.enabled` | if true, enables the startup probe | true | startUpProbe: enabled: false - ## @md | `startUpProbe.initialDelaySeconds` | number of seconds after which the probe starts | 5 | - initialDelaySeconds: 5 - ## @md | `startUpProbe.failureThreshold` | when a probe fails, Kubernetes will try failureThreshold times before giving up | 30 | - failureThreshold: 30 - ## @md | `startUpProbe.periodSeconds` | how often (in seconds) to perform the probe | 10 | - periodSeconds: 10 - ## @md | `startUpProbe.timeoutSeconds` | number of seconds after which the probe times out | 3 | - timeoutSeconds: 30 - ## @md | `startUpProbe.namespaceName` | the Chronicle namespace in which the probe operates | default | - namespaceName: default - ## @md | `startUpProbe.namespaceUuid` | the UUID of the Chronicle namespace in which the probe operates | fd717fd6-70f1-44c1-81de-287d5e101089 | - namespaceUuid: fd717fd6-70f1-44c1-81de-287d5e101089 + ## @md | `backtraceLevel` | backtrace level for Chronicle | nil | backtraceLevel: full @@ -64,7 +47,7 @@ devIdProvider: ## @md | `devIdProvider.image.repository` | the image repository | blockchaintp/id-provider | repository: blockchaintp/id-provider-amd64 ## @md | `devIdProvider.image.tag` | the image tag | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `extraVolumes` | a list of additional volumes to add to chronicle | [] | extraVolumes: [] @@ -75,7 +58,7 @@ image: ## @md | `image.repository` | the repository of the image | blockchaintp/chronicle | repository: blockchaintp/chronicle-amd64 ## @md | `image.tag`| the tag of the image to use | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `image.pullPolicy` | the image pull policy to use | IfNotPresent | pullPolicy: IfNotPresent @@ -128,7 +111,7 @@ opa: ## @md | `image.repository` | the repository of the image | blockchaintp/chronicle | repository: blockchaintp/opactl-amd64 ## @md | `image.tag`| the tag of the image to use | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 policy: entrypoint: allow_transactions.allowed_users id: allow_transactions @@ -138,7 +121,7 @@ opa: ## @md | `image.repository` | the repository of the image | blockchaintp/chronicle | repository: blockchaintp/opa-tp-amd64 ## @md | `image.tag`| the tag of the image to use | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `image.pullPolicy` | the image pull policy to use | IfNotPresent | pullPolicy: IfNotPresent ## @md | `opa.tp.resources` | resources | map | nil | @@ -172,7 +155,7 @@ test: ## @md | `test.api.image.repository` | the image repository | blockchaintp/chronicle-helm-api-test | repository: blockchaintp/chronicle-helm-api-test-amd64 ## @md | `test.api.image.tag` | the image tag | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `test.auth` | test the chronicle auth server API | auth: ## @md | `test.auth.enabled` | true to enable auth-related testing | false | @@ -248,7 +231,7 @@ tp: ## @md | `tp.image.repository` | the image repository | blockchaintp/chronicle-tp | repository: blockchaintp/chronicle-tp-amd64 ## @md | `tp.image.tag` | the image tag | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `tp.extraVolumes` | extra volumes declarations for the chronicle-tp deployment | list | nil extraVolumes: ## @md | `tp.extraVolumeMounts` | extra volume mounts for chronicle-tp deployment | list | nil diff --git a/charts/chronicle/Chart.yaml b/charts/chronicle/Chart.yaml index 9ca4cf2..4a00e16 100644 --- a/charts/chronicle/Chart.yaml +++ b/charts/chronicle/Chart.yaml @@ -17,7 +17,7 @@ keywords: # This is the chart version. This version number should be incremented each # time you make changes to the chart and its templates, including the app # version. -version: 0.1.24 +version: 0.1.25 # This is the version number of Chronicle being deployed. This version # number should be incremented each time you make changes to Chronicle. diff --git a/charts/chronicle/templates/check_metrics_available.yaml b/charts/chronicle/templates/check_metrics_available.yaml new file mode 100644 index 0000000..f9965fe --- /dev/null +++ b/charts/chronicle/templates/check_metrics_available.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "common.names.fullname" . }}-scripts +data: + first_depth_charge.sh: | + #!/bin/bash + metrics=$(curl -s http://localhost:9000/metrics) + count=$(echo "$metrics" | grep '^depth_charge_round_trip_count' | awk '{print $2}') + if [[ -z "$count" ]] || [[ $count -eq 0 ]]; then + exit 1 + fi + check_timeouts.sh: | + #!/bin/bash + metrics=$(curl -s http://localhost:9000/metrics) + timeouts=$(echo "$metrics" | grep '^depth_charge_timeouts' | awk '{print $2}' | tr -d '\r') + if [[ "$timeouts" =~ ^[0-9]+$ ]] && [[ "$timeouts" -ne 0 ]]; then + echo "Non-zero depth_charge_timeouts detected: $timeouts" + exit 1 + else + echo "No non-zero depth_charge_timeouts detected." + exit 0 + fi diff --git a/charts/chronicle/templates/statefulset.yaml b/charts/chronicle/templates/statefulset.yaml index ce3124c..c07cdba 100644 --- a/charts/chronicle/templates/statefulset.yaml +++ b/charts/chronicle/templates/statefulset.yaml @@ -123,13 +123,16 @@ spec: {{- end }} serve-api \ --interface 0.0.0.0:{{ .Values.port }} \ + {{- if .Values.livenessProbe.enabled }} + --liveness-interval {{ .Values.livenessProbe.periodSeconds }} + --liveness-deadline {{ .Values.livenessProbe.timeoutSeconds }} + {{- end }} {{- if .Values.auth.required }} --require-auth \ {{- end }} {{ include "chronicle.jwks-url.cli" . }} {{ include "chronicle.userinfo-url.cli" . }} - {{ include "chronicle.id-claims" . }} - ; + {{ include "chronicle.id-claims" . }}; env: {{ include "lib.safeToYaml" .Values.env | nindent 12 }} - name: RUST_LOG value: {{ .Values.logLevel }} @@ -144,6 +147,26 @@ spec: {{- end }} {{- include "lib.safeToYaml" .Values.postgres.env | nindent 12 }} resources: {{- include "lib.safeToYaml" .Values.resources | nindent 12 }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + exec: + command: + - /bin/bash + - /scripts/check_timeouts.sh + initialDelaySeconds: 1 + periodSeconds: 1 + failureThreshold: 1 + {{- end}} + {{- if .Values.startUpProbe.enabled }} + readinessProbe: + exec: + command: + - /bin/bash + - /scripts/first_depth_charge.sh + initialDelaySeconds: 1 + periodSeconds: 1 + failureThreshold: 600 + {{- end}} volumeMounts: - name: chronicle-config mountPath: /etc/chronicle/config/ @@ -152,107 +175,10 @@ spec: readOnly: true - name: chronicle-data mountPath: /var/lib/chronicle/store/ + - name: check-metrics-available + mountPath: /scripts/ + readOnly: true {{- include "lib.volumeMounts" .Values.extraVolumeMounts | nindent 12 }} - {{- if .Values.livenessProbe.enabled }} - livenessProbe: - exec: - command: - - bash - - -c - - | - PROBE_ID="liveness_$(LC_ALL=C tr -dc A-Za-z0-9 /tmp/import.json && - echo "Probe ID: $PROBE_ID" && - RUST_LOG=error chronicle \ - -c /etc/chronicle/config/config.toml \ - --console-logging json \ - --sawtooth tcp://{{ include "chronicle.sawtooth.service" . }}:{{ include "chronicle.sawtooth.sawcomp" . }} \ - --remote-database \ - --database-name {{ .Values.postgres.database }} \ - --database-username {{ .Values.postgres.user }} \ - --database-host {{ .Values.postgres.host }} \ - {{- if not .Values.opa.enabled }} - --embedded-opa-policy \ - {{- end }} - import {{ .Values.livenessProbe.namespaceName }} {{ .Values.livenessProbe.namespaceUuid }} /tmp/import.json - initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.livenessProbe.periodSeconds }} - timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} - failureThreshold: {{ .Values.livenessProbe.failureThreshold }} - {{- end }} - {{- if .Values.startUpProbe.enabled }} - startupProbe: - exec: - command: - - bash - - -c - - | - PROBE_ID="startup_$(LC_ALL=C tr -dc A-Za-z0-9 /tmp/import.json && - echo "Probe ID: $PROBE_ID" && - RUST_LOG=error chronicle \ - -c /etc/chronicle/config/config.toml \ - --console-logging json \ - --sawtooth tcp://{{ include "chronicle.sawtooth.service" . }}:{{ include "chronicle.sawtooth.sawcomp" . }} \ - --remote-database \ - --database-name {{ .Values.postgres.database }} \ - --database-username {{ .Values.postgres.user }} \ - --database-host {{ .Values.postgres.host }} \ - {{- if not .Values.opa.enabled }} - --embedded-opa-policy \ - {{- end }} - import {{ .Values.startUpProbe.namespaceName }} {{ .Values.startUpProbe.namespaceUuid }} /tmp/import.json - initialDelaySeconds: {{ .Values.startUpProbe.initialDelaySeconds }} - periodSeconds: {{ .Values.startUpProbe.periodSeconds }} - timeoutSeconds: {{ .Values.startUpProbe.timeoutSeconds }} - failureThreshold: {{ .Values.startUpProbe.failureThreshold }} - {{- end }} volumes: - name: chronicle-secrets persistentVolumeClaim: @@ -263,6 +189,9 @@ spec: - name: chronicle-config configMap: name: {{ .Release.Name }}-chronicle-config + - name: check-metrics-available + configMap: + name: {{ include "common.names.fullname" . }}-scripts {{- if not .Values.postgres.persistence.enabled }} - name: "pgdata" emptyDir: {} diff --git a/charts/chronicle/values.yaml b/charts/chronicle/values.yaml index 12d4c6d..2706e08 100644 --- a/charts/chronicle/values.yaml +++ b/charts/chronicle/values.yaml @@ -29,28 +29,10 @@ livenessProbe: timeoutSeconds: 20 ## @md | `livenessProbe.periodSeconds` | how often (in seconds) to perform the probe | 60 | periodSeconds: 60 - ## @md | `livenessProbe.failureThreshold` | when a probe fails, Kubernetes will try failureThreshold times before giving up | 1 | - failureThreshold: 1 - ## @md | `livenessProbe.namespaceName` | the Chronicle namespace in which the probe operates | default | - namespaceName: default - ## @md | `livenessProbe.namespaceUuid` | the UUID of the Chronicle namespace in which the probe operates | fd717fd6-70f1-44c1-81de-287d5e101089 | - namespaceUuid: fd717fd6-70f1-44c1-81de-287d5e101089 ## @md | `startUpProbe.enabled` | if true, enables the startup probe | true | startUpProbe: enabled: false - ## @md | `startUpProbe.initialDelaySeconds` | number of seconds after which the probe starts | 5 | - initialDelaySeconds: 5 - ## @md | `startUpProbe.failureThreshold` | when a probe fails, Kubernetes will try failureThreshold times before giving up | 30 | - failureThreshold: 30 - ## @md | `startUpProbe.periodSeconds` | how often (in seconds) to perform the probe | 10 | - periodSeconds: 10 - ## @md | `startUpProbe.timeoutSeconds` | number of seconds after which the probe times out | 3 | - timeoutSeconds: 3 - ## @md | `startUpProbe.namespaceName` | the Chronicle namespace in which the probe operates | default | - namespaceName: default - ## @md | `startUpProbe.namespaceUuid` | the UUID of the Chronicle namespace in which the probe operates | fd717fd6-70f1-44c1-81de-287d5e101089 | - namespaceUuid: fd717fd6-70f1-44c1-81de-287d5e101089 ## @md | `backtraceLevel` | backtrace level for Chronicle | nil | backtraceLevel: full @@ -64,7 +46,7 @@ devIdProvider: ## @md | `devIdProvider.image.repository` | the image repository | blockchaintp/id-provider | repository: blockchaintp/id-provider-amd64 ## @md | `devIdProvider.image.tag` | the image tag | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `extraVolumes` | a list of additional volumes to add to chronicle | [] | extraVolumes: [] @@ -75,7 +57,7 @@ image: ## @md | `image.repository` | the repository of the image | blockchaintp/chronicle | repository: blockchaintp/chronicle-amd64 ## @md | `image.tag`| the tag of the image to use | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `image.pullPolicy` | the image pull policy to use | IfNotPresent | pullPolicy: IfNotPresent @@ -128,7 +110,7 @@ opa: ## @md | `image.repository` | the repository of the image | blockchaintp/chronicle | repository: blockchaintp/opactl-amd64 ## @md | `image.tag`| the tag of the image to use | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 policy: entrypoint: allow_transactions.allowed_users id: allow_transactions @@ -138,7 +120,7 @@ opa: ## @md | `image.repository` | the repository of the image | blockchaintp/chronicle | repository: blockchaintp/opa-tp-amd64 ## @md | `image.tag`| the tag of the image to use | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `image.pullPolicy` | the image pull policy to use | IfNotPresent | pullPolicy: IfNotPresent ## @md | `opa.tp.resources` | resources | map | nil | @@ -172,7 +154,7 @@ test: ## @md | `test.api.image.repository` | the image repository | blockchaintp/chronicle-helm-api-test | repository: blockchaintp/chronicle-helm-api-test-amd64 ## @md | `test.api.image.tag` | the image tag | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `test.auth` | test the chronicle auth server API | auth: ## @md | `test.auth.enabled` | true to enable auth-related testing | true | @@ -248,7 +230,7 @@ tp: ## @md | `tp.image.repository` | the image repository | blockchaintp/chronicle-tp | repository: blockchaintp/chronicle-tp-amd64 ## @md | `tp.image.tag` | the image tag | latest | - tag: BTP2.1.0-0.7.6 + tag: BTP2.1.0-0.7.7 ## @md | `tp.extraVolumes` | extra volumes declarations for the chronicle-tp deployment | list | nil extraVolumes: ## @md | `tp.extraVolumeMounts` | extra volume mounts for chronicle-tp deployment | list | nil