Skip to content

Commit

Permalink
feat: Add grafana and kube-prometheus-stack HelmReleases (#758)
Browse files Browse the repository at this point in the history
  • Loading branch information
onedr0p authored May 31, 2023
1 parent 55b3bdd commit d5107d6
Show file tree
Hide file tree
Showing 14 changed files with 345 additions and 12 deletions.
5 changes: 5 additions & 0 deletions .config.sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ export BOOTSTRAP_FLUX_GITHUB_WEBHOOK_SECRET="generated" # NOTE: Must only contai
# Set this to any other string and it will be used for the secret
export BOOTSTRAP_WEAVE_GITOPS_ADMIN_PASSWORD="generated" # NOTE: Must only contain alphanumeric characters and dashes

# The Grafana admin password
# `generated` - this will generate a token and print it in the logs
# Set this to any other string and it will be used for the secret
export BOOTSTRAP_GRAFANA_ADMIN_PASSWORD="generated" # NOTE: Must only contain alphanumeric characters and dashes

# Age Public Key - string should start with age
# e.g. age15uzrw396e67z9wdzsxzdk7ka0g2gr3l460e0slaea563zll3hdfqwqxdta
export BOOTSTRAP_AGE_PUBLIC_KEY=""
Expand Down
34 changes: 32 additions & 2 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ main() {
generate_ansible_host_secrets
setup_github_webhook
setup_weave_gitops
setup_grafana_admin
setup_prometheus_endpoints
success
fi
}
Expand Down Expand Up @@ -474,11 +476,26 @@ generate_ansible_hosts() {
} > "${PROJECT_DIR}/ansible/inventory/hosts.yml"
}

setup_prometheus_endpoints() {
for var in "${!BOOTSTRAP_ANSIBLE_HOST_ADDR_@}"; do
node_id=$(echo "${var}" | awk -F"_" '{print $5}')
node_control="BOOTSTRAP_ANSIBLE_CONTROL_NODE_${node_id}"
if [[ "${!node_control}" == "true" ]]; then
node_addr="BOOTSTRAP_ANSIBLE_HOST_ADDR_${node_id}"
_log "INFO(${FUNCNAME[0]})" "Setting up Prometheus endpoint for '${!node_addr}'"
yq --inplace 'del(.spec.values.kubeControllerManager.endpoints[])' \
"${PROJECT_DIR}/kubernetes/apps/monitoring/kube-prometheus-stack/app/helmrelease.yaml"
yq --inplace ".spec.values.kubeControllerManager.endpoints += \"${!node_addr}\"" \
"${PROJECT_DIR}/kubernetes/apps/monitoring/kube-prometheus-stack/app/helmrelease.yaml"
fi
done
}

setup_github_webhook() {
_has_envar "BOOTSTRAP_FLUX_GITHUB_WEBHOOK_SECRET"
WEBHOOK_SECRET="${BOOTSTRAP_FLUX_GITHUB_WEBHOOK_SECRET}"
if [[ "${WEBHOOK_SECRET}" == "generated" ]]; then
WEBHOOK_SECRET="$(openssl rand -base64 30)"
WEBHOOK_SECRET="$(openssl rand -hex 12)"
fi
export BOOTSTRAP_FLUX_GITHUB_WEBHOOK_SECRET="${WEBHOOK_SECRET}"
_log "INFO(${FUNCNAME[0]})" "Using GitHub Token '${WEBHOOK_SECRET}' for Flux"
Expand All @@ -491,7 +508,7 @@ setup_weave_gitops() {
_has_envar "BOOTSTRAP_WEAVE_GITOPS_ADMIN_PASSWORD"
WEAVE_GITOPS_ADMIN_PASSWORD="${BOOTSTRAP_WEAVE_GITOPS_ADMIN_PASSWORD}"
if [[ "${WEAVE_GITOPS_ADMIN_PASSWORD}" == "generated" ]]; then
WEAVE_GITOPS_ADMIN_PASSWORD="$(openssl rand -base64 30)"
WEAVE_GITOPS_ADMIN_PASSWORD="$(openssl rand -hex 12)"
fi
export BOOTSTRAP_WEAVE_GITOPS_ADMIN_PASSWORD="${WEAVE_GITOPS_ADMIN_PASSWORD}"
_log "INFO(${FUNCNAME[0]})" "Using admin password '${WEAVE_GITOPS_ADMIN_PASSWORD}' for Weave Gitops"
Expand All @@ -503,6 +520,19 @@ setup_weave_gitops() {
sops --encrypt --in-place "${PROJECT_DIR}/kubernetes/apps/flux-system/weave-gitops/app/secret.sops.yaml"
}

setup_grafana_admin() {
_has_envar "BOOTSTRAP_GRAFANA_ADMIN_PASSWORD"
GRAFANA_ADMIN_PASSWORD="${BOOTSTRAP_GRAFANA_ADMIN_PASSWORD}"
if [[ "${GRAFANA_ADMIN_PASSWORD}" == "generated" ]]; then
GRAFANA_ADMIN_PASSWORD="$(openssl rand -hex 12)"
fi
export BOOTSTRAP_GRAFANA_ADMIN_PASSWORD="${GRAFANA_ADMIN_PASSWORD}"
_log "INFO(${FUNCNAME[0]})" "Using password '${GRAFANA_ADMIN_PASSWORD}' for Grafana"
envsubst < "${PROJECT_DIR}/tmpl/kubernetes/grafana-admin-secret.sops.yaml" \
> "${PROJECT_DIR}/kubernetes/apps/monitoring/grafana/app/secret.sops.yaml"
sops --encrypt --in-place "${PROJECT_DIR}/kubernetes/apps/monitoring/grafana/app/secret.sops.yaml"
}

success() {
_log "INFO(${FUNCNAME[0]})" "All files have been templated, proceed to the next steps outlined in the README"
exit 0
Expand Down
10 changes: 0 additions & 10 deletions kubernetes/apps/cert-manager/cert-manager/app/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,3 @@ namespace: cert-manager
resources:
- ./helmrelease.yaml
- ./prometheusrule.yaml
configMapGenerator:
- name: cert-manager-dashboard
files:
- cert-manager-dashboard.json=https://gitlab.com/uneeq-oss/cert-manager-mixin/-/raw/master/dashboards/cert-manager.json
generatorOptions:
disableNameSuffixHash: true
annotations:
kustomize.toolkit.fluxcd.io/substitute: disabled
labels:
grafana_dashboard: "true"
93 changes: 93 additions & 0 deletions kubernetes/apps/monitoring/grafana/app/helmrelease.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: grafana
namespace: monitoring
spec:
interval: 30m
chart:
spec:
chart: grafana
version: 6.56.6
sourceRef:
kind: HelmRepository
name: grafana
namespace: flux-system
maxHistory: 2
install:
createNamespace: true
remediation:
retries: 3
upgrade:
cleanupOnFail: true
remediation:
retries: 3
uninstall:
keepHistory: false
values:
admin:
existingSecret: grafana-admin-secret
env:
GF_EXPLORE_ENABLED: true
GF_SERVER_ROOT_URL: "https://grafana.${SECRET_DOMAIN}"
grafana.ini:
analytics:
check_for_updates: false
check_for_plugin_updates: false
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: default
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards/default
datasources:
datasources.yaml:
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090
isDefault: true
dashboards:
default:
cert-manager:
url: https://raw.githubusercontent.com/monitoring-mixins/website/master/assets/cert-manager/dashboards/cert-manager.json
datasource: Prometheus
flux-cluster:
url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/monitoring-config/dashboards/cluster.json
datasource: Prometheus
flux-control-plane:
url: https://raw.githubusercontent.com/fluxcd/flux2/main/manifests/monitoring/monitoring-config/dashboards/control-plane.json
datasource: Prometheus
sidecar:
dashboards:
enabled: true
searchNamespace: ALL
datasources:
enabled: true
searchNamespace: ALL
serviceMonitor:
enabled: true
ingress:
enabled: true
ingressClassName: nginx
annotations:
hajimari.io/icon: simple-icons:grafana
hosts:
- &host "grafana.${SECRET_DOMAIN}"
tls:
- hosts:
- *host
persistence:
enabled: true
storageClassName: local-path
testFramework:
enabled: false
7 changes: 7 additions & 0 deletions kubernetes/apps/monitoring/grafana/app/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./secret.sops.yaml
- ./helmrelease.yaml
18 changes: 18 additions & 0 deletions kubernetes/apps/monitoring/grafana/ks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-grafana
namespace: flux-system
spec:
dependsOn:
- name: cluster-apps-local-path-provisioner
path: ./kubernetes/apps/monitoring/grafana/app
prune: true
sourceRef:
kind: GitRepository
name: home-kubernetes
wait: true
interval: 30m
retryInterval: 1m
timeout: 5m
134 changes: 134 additions & 0 deletions kubernetes/apps/monitoring/kube-prometheus-stack/app/helmrelease.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
---
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: kube-prometheus-stack
namespace: monitoring
spec:
interval: 30m
timeout: 15m
chart:
spec:
chart: kube-prometheus-stack
version: 46.5.0
sourceRef:
kind: HelmRepository
name: prometheus-community
namespace: flux-system
maxHistory: 2
install:
createNamespace: true
crds: CreateReplace
remediation:
retries: 3
upgrade:
cleanupOnFail: true
crds: CreateReplace
remediation:
retries: 3
uninstall:
keepHistory: false
values:
cleanPrometheusOperatorObjectNames: true
alertmanager:
enabled: false
grafana:
enabled: false
forceDeployDashboards: true
sidecar:
dashboards:
multicluster:
etcd:
enabled: true
kube-state-metrics:
metricLabelsAllowlist:
- "pods=[*]"
- "deployments=[*]"
- "persistentvolumeclaims=[*]"
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels: ["__meta_kubernetes_pod_node_name"]
targetLabel: kubernetes_node
kubelet:
enabled: true
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authentication_token|cadvisor_version|container_blkio|container_cpu|container_fs|container_last|container_memory|container_network|container_oom|container_processes|container|csi_operations|disabled_metric|get_token|go|hidden_metric|kubelet_certificate|kubelet_cgroup|kubelet_container|kubelet_containers|kubelet_cpu|kubelet_device|kubelet_graceful|kubelet_http|kubelet_lifecycle|kubelet_managed|kubelet_node|kubelet_pleg|kubelet_pod|kubelet_run|kubelet_running|kubelet_runtime|kubelet_server|kubelet_started|kubelet_volume|kubernetes_build|kubernetes_feature|machine_cpu|machine_memory|machine_nvm|machine_scrape|node_namespace|plugin_manager|prober_probe|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|storage_operation|volume_manager|volume_operation|workqueue)_(.+)"
action: keep
- sourceLabels: ["node"]
targetLabel: instance
action: replace
kubeApiServer:
enabled: true
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(aggregator_openapi|aggregator_unavailable|apiextensions_openapi|apiserver_admission|apiserver_audit|apiserver_cache|apiserver_cel|apiserver_client|apiserver_crd|apiserver_current|apiserver_envelope|apiserver_flowcontrol|apiserver_init|apiserver_kube|apiserver_longrunning|apiserver_request|apiserver_requested|apiserver_response|apiserver_selfrequest|apiserver_storage|apiserver_terminated|apiserver_tls|apiserver_watch|apiserver_webhooks|authenticated_user|authentication|disabled_metric|etcd_bookmark|etcd_lease|etcd_request|field_validation|get_token|go|grpc_client|hidden_metric|kube_apiserver|kubernetes_build|kubernetes_feature|node_authorizer|pod_security|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scrape_duration|scrape_samples|scrape_series|serviceaccount_legacy|serviceaccount_stale|serviceaccount_valid|watch_cache|workqueue)_(.+)"
action: keep
# Remove high cardinality metrics
- sourceLabels: ["__name__"]
regex: (apiserver|etcd|rest_client)_request(|_sli|_slo)_duration_seconds_bucket
action: drop
- sourceLabels: ["__name__"]
regex: (apiserver_response_sizes_bucket|apiserver_watch_events_sizes_bucket)
action: drop
kubeControllerManager:
enabled: true
endpoints: &cp
- 127.0.0.1 # Replaced by configure
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|attachdetach_controller|authenticated_user|authentication|cronjob_controller|disabled_metric|endpoint_slice|ephemeral_volume|garbagecollector_controller|get_token|go|hidden_metric|job_controller|kubernetes_build|kubernetes_feature|leader_election|node_collector|node_ipam|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|pv_collector|registered_metric|replicaset_controller|rest_client|retroactive_storageclass|root_ca|running_managed|scrape_duration|scrape_samples|scrape_series|service_controller|storage_count|storage_operation|ttl_after|volume_operation|workqueue)_(.+)"
action: keep
kubeEtcd:
enabled: true
endpoints: *cp
kubeProxy:
enabled: false # Disabled because eBPF
kubeScheduler:
enabled: true
endpoints: *cp
serviceMonitor:
metricRelabelings:
# Remove duplicate metrics
- sourceLabels: ["__name__"]
regex: "(apiserver_audit|apiserver_client|apiserver_delegated|apiserver_envelope|apiserver_storage|apiserver_webhooks|authenticated_user|authentication|disabled_metric|go|hidden_metric|kubernetes_build|kubernetes_feature|leader_election|process_cpu|process_max|process_open|process_resident|process_start|process_virtual|registered_metric|rest_client|scheduler|scrape_duration|scrape_samples|scrape_series|workqueue)_(.+)"
action: keep
prometheus:
ingress:
enabled: true
ingressClassName: nginx
annotations:
hajimari.io/appName: Prometheus
hajimari.io/icon: simple-icons:prometheus
pathType: Prefix
hosts:
- &host "prometheus.${SECRET_DOMAIN}"
tls:
- hosts:
- *host
prometheusSpec:
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
probeSelectorNilUsesHelmValues: false
enableAdminAPI: true
walCompression: true
retentionSize: 8GB
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: local-path
resources:
requests:
storage: 10Gi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring
resources:
- ./helmrelease.yaml
18 changes: 18 additions & 0 deletions kubernetes/apps/monitoring/kube-prometheus-stack/ks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cluster-apps-kube-prometheus-stack
namespace: flux-system
spec:
dependsOn:
- name: cluster-apps-local-path-provisioner
path: ./kubernetes/apps/monitoring/kube-prometheus-stack/app
prune: true
sourceRef:
kind: GitRepository
name: home-kubernetes
wait: true
interval: 30m
retryInterval: 1m
timeout: 5m
2 changes: 2 additions & 0 deletions kubernetes/apps/monitoring/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./namespace.yaml
- ./grafana/ks.yaml
- ./kube-prometheus-stack/ks.yaml
- ./kubernetes-dashboard/ks.yaml
9 changes: 9 additions & 0 deletions kubernetes/flux/repositories/helm/grafana.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
apiVersion: source.toolkit.fluxcd.io/v1beta2
kind: HelmRepository
metadata:
name: grafana
namespace: flux-system
spec:
interval: 1h
url: https://grafana.github.io/helm-charts
Loading

0 comments on commit d5107d6

Please sign in to comment.