diff --git a/add-ons/prometheus-3.7/README.md b/add-ons/prometheus-3.7/README.md new file mode 100644 index 0000000..f47e0ff --- /dev/null +++ b/add-ons/prometheus-3.7/README.md @@ -0,0 +1,31 @@ +# Prometheus Add-on +An addon that will deploy Prometheus, Node-Exporter and AlertManager. + +NOTE: Requires Origin >= 3.7.0-rc.0 + +Verify you have installed these addons, by following the [general readme](../../README.adoc#download-and-use-community-add-ons). + +## Deploy prometheus +To deploy prometheus do: + +``` +minishift addon apply prometheus --addon-env prometheus_namespace=kube-system +``` + +_NOTE_: You should provide the namespace where it will be installed with the addon-env prometheus_namespace, like this: + +## Use prometheus +Prometheus will be available at: + +``` +minishift openshift service prometheus -n +``` + +_NOTE_: There service is exposed though SSL, so use https to access it. + +## Delete prometheus +Delete prometheus with: + +``` +oc delete sa,clusterrolebinding,route,svc,secret,deployment,configmap,daemonset,statefulset -l 'app in (prometheus,prometheus-node-exporter)' -n --as=system:admin +``` diff --git a/add-ons/prometheus-3.7/node-exporter.yaml b/add-ons/prometheus-3.7/node-exporter.yaml new file mode 100644 index 0000000..01d1914 --- /dev/null +++ b/add-ons/prometheus-3.7/node-exporter.yaml @@ -0,0 +1,79 @@ +# node-exporter is an optional component that collects host level metrics from the nodes +# in the cluster. This group of resources will require the 'hostaccess' level of privilege, which +# should only be granted to namespaces that administrators can access. +apiVersion: v1 +kind: List +items: +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: prometheus-node-exporter + # You must grant hostaccess via: oadm policy add-scc-to-user -z prometheus-node-exporter hostaccess + # in order for the node-exporter to access the host network and mount /proc and /sys from the host +- apiVersion: v1 + kind: Service + metadata: + annotations: + prometheus.io/scrape: "true" + labels: + app: prometheus-node-exporter + name: prometheus-node-exporter + spec: + clusterIP: None + ports: + - name: scrape + port: 9100 + protocol: TCP + targetPort: 9100 + selector: + app: prometheus-node-exporter +- apiVersion: extensions/v1beta1 + kind: DaemonSet + metadata: + name: prometheus-node-exporter + labels: + app: prometheus-node-exporter + role: monitoring + spec: + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: prometheus-node-exporter + role: monitoring + name: prometheus-exporter + spec: + serviceAccountName: prometheus-node-exporter + hostNetwork: true + hostPID: true + containers: + - image: openshift/prometheus-node-exporter:v0.14.0 + args: + - "--collector.procfs=/host/proc" + - "--collector.sysfs=/host/sys" + name: node-exporter + ports: + - containerPort: 9100 + name: scrape + resources: + requests: + memory: 30Mi + cpu: 100m + limits: + memory: 50Mi + cpu: 200m + volumeMounts: + - name: proc + readOnly: true + mountPath: /host/proc + - name: sys + readOnly: true + mountPath: /host/sys + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys \ No newline at end of file diff --git a/add-ons/prometheus-3.7/prometheus.addon b/add-ons/prometheus-3.7/prometheus.addon new file mode 100644 index 0000000..6e1d33b --- /dev/null +++ b/add-ons/prometheus-3.7/prometheus.addon @@ -0,0 +1,30 @@ +# Name: prometheus-3.7 +# Description: This template creates a Prometheus instance preconfigured to gather OpenShift and Kubernetes platform and node metrics and report them to admins. It is protected by an OAuth proxy that only allows access for users who have view access to the prometheus namespace. You may customize where the images (built from openshift/prometheus and openshift/oauth-proxy) are pulled from via template parameters. +# Url: https://raw.githubusercontent.com/openshift/origin/master/examples/prometheus/prometheus.yaml +# OpenShift-Version: >=3.7.0 +# Required-Vars: prometheus_namespace + +oc new-app -f prometheus.yaml -p NAMESPACE=#{prometheus_namespace} -n #{prometheus_namespace} +oc create -f node-exporter.yaml -n #{prometheus_namespace} +oc adm policy add-scc-to-user -z prometheus-node-exporter -n #{prometheus_namespace} hostaccess + +# Add labels to things for easy removal +oc label svc/prometheus app=prometheus -n #{prometheus_namespace} +oc label svc/alerts app=prometheus -n #{prometheus_namespace} +oc label configmap/prometheus app=prometheus -n #{prometheus_namespace} +oc label secret/alerts-proxy app=prometheus -n #{prometheus_namespace} +oc label secret/prometheus-proxy app=prometheus -n #{prometheus_namespace} +oc label secret/prometheus-tls app=prometheus -n #{prometheus_namespace} +oc label sa/prometheus app=prometheus -n #{prometheus_namespace} +oc label sa/prometheus-node-exporter app=prometheus -n #{prometheus_namespace} +oc label routes/prometheus app=prometheus -n #{prometheus_namespace} +oc label routes/alerts app=prometheus -n #{prometheus_namespace} +oc label clusterrolebinding/prometheus-cluster-reader app=prometheus -n #{prometheus_namespace} +oc label configmaps/prometheus-alerts app=prometheus -n #{prometheus_namespace} + +echo You have installed #{addon-name} +echo To access #{addon-name} go to https://prometheus-#{prometheus_namespace}.#{routing-suffix} +echo +echo To delete: +echo minishift addon remove prometheus-3.7 +echo oc delete sa,clusterrolebinding,route,svc,secret,deployment,configmap,daemonset,statefulset -l 'app in (prometheus,prometheus-node-exporter)' -n #{prometheus_namespace} --as=system:admin diff --git a/add-ons/prometheus-3.7/prometheus.addon.remove b/add-ons/prometheus-3.7/prometheus.addon.remove new file mode 100644 index 0000000..0dd87c5 --- /dev/null +++ b/add-ons/prometheus-3.7/prometheus.addon.remove @@ -0,0 +1,8 @@ +# Name: prometheus-3.7 +# Description: Remove add on +# Url: https://raw.githubusercontent.com/openshift/origin/master/examples/prometheus/prometheus.yaml + +echo [prometheus] removing all resources +oc delete sa,clusterrolebinding,route,svc,secret,deployment,configmap,daemonset,statefulset -l 'app in (prometheus,prometheus-node-exporter)' -n #{prometheus_namespace} --as=system:admin + +echo prometheus addon removed diff --git a/add-ons/prometheus-3.7/prometheus.yaml b/add-ons/prometheus-3.7/prometheus.yaml new file mode 100644 index 0000000..309de9f --- /dev/null +++ b/add-ons/prometheus-3.7/prometheus.yaml @@ -0,0 +1,504 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + name: prometheus + annotations: + "openshift.io/display-name": Prometheus + description: | + A monitoring solution for an OpenShift cluster - collect and gather metrics and alerts from nodes, services, and the infrastructure. This is a tech preview feature. + iconClass: icon-cogs + tags: "monitoring,prometheus, alertmanager,time-series" +parameters: +- description: The namespace to instantiate prometheus under. Defaults to 'kube-system'. + name: NAMESPACE + value: kube-system +- description: The location of the proxy image + name: IMAGE_PROXY + value: openshift/oauth-proxy:v1.0.0 +- description: The location of the prometheus image + name: IMAGE_PROMETHEUS + value: openshift/prometheus:v2.0.0-dev.3 +- description: The location of the alertmanager image + name: IMAGE_ALERTMANAGER + value: openshift/prometheus-alertmanager:v0.9.1 +- description: The location of alert-buffer image + name: IMAGE_ALERT_BUFFER + value: openshift/prometheus-alert-buffer:v0.0.2 +- description: The session secret for the proxy + name: SESSION_SECRET + generate: expression + from: "[a-zA-Z0-9]{43}" +objects: +# Authorize the prometheus service account to read data about the cluster +- apiVersion: v1 + kind: ServiceAccount + metadata: + name: prometheus + namespace: "${NAMESPACE}" + annotations: + serviceaccounts.openshift.io/oauth-redirectreference.prom: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"prometheus"}}' + serviceaccounts.openshift.io/oauth-redirectreference.alerts: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"alerts"}}' +- apiVersion: authorization.openshift.io/v1 + kind: ClusterRoleBinding + metadata: + name: prometheus-cluster-reader + roleRef: + name: cluster-reader + subjects: + - kind: ServiceAccount + name: prometheus + namespace: "${NAMESPACE}" + +# Create a fully end-to-end TLS connection to the prometheus proxy +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: prometheus + namespace: "${NAMESPACE}" + spec: + to: + name: prometheus + tls: + termination: Reencrypt + insecureEdgeTerminationPolicy: Redirect +- apiVersion: v1 + kind: Service + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/scheme: https + service.alpha.openshift.io/serving-cert-secret-name: prometheus-tls + labels: + name: prometheus + name: prometheus + namespace: "${NAMESPACE}" + spec: + ports: + - name: prometheus + port: 443 + protocol: TCP + targetPort: 8443 + selector: + app: prometheus +- apiVersion: v1 + kind: Secret + metadata: + name: prometheus-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" +- apiVersion: apps/v1beta1 + kind: StatefulSet + metadata: + labels: + app: prometheus + name: prometheus + namespace: "${NAMESPACE}" + spec: + updateStrategy: + type: RollingUpdate + podManagementPolicy: Parallel + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + name: prometheus + spec: + serviceAccountName: prometheus + containers: + # Deploy Prometheus behind an oauth proxy + - name: prom-proxy + image: ${IMAGE_PROXY} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8443 + name: web + args: + - -provider=openshift + - -https-address=:8443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:9090 + - -client-id=system:serviceaccount:${NAMESPACE}:prometheus + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + - -skip-auth-regex=^/metrics + volumeMounts: + - mountPath: /etc/tls/private + name: prometheus-tls + - mountPath: /etc/proxy/secrets + name: prometheus-secrets + - mountPath: /prometheus + name: prometheus-data + + - name: prometheus + args: + - --storage.tsdb.retention=6h + - --storage.tsdb.min-block-duration=2m + - --config.file=/etc/prometheus/prometheus.yml + - --web.listen-address=localhost:9090 + image: ${IMAGE_PROMETHEUS} + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /etc/prometheus + name: prometheus-config + - mountPath: /prometheus + name: prometheus-data + + # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy + # use http port=4190 and https port=9943 to differ from prom-proxy + - name: alerts-proxy + image: ${IMAGE_PROXY} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9443 + name: web + args: + - -provider=openshift + - -https-address=:9443 + - -http-address= + - -email-domain=* + - -upstream=http://localhost:9099 + - -client-id=system:serviceaccount:${NAMESPACE}:prometheus + - -openshift-ca=/etc/pki/tls/cert.pem + - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt + - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}' + - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "${NAMESPACE}", "namespace": "${NAMESPACE}"}}' + - -tls-cert=/etc/tls/private/tls.crt + - -tls-key=/etc/tls/private/tls.key + - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token + - -cookie-secret-file=/etc/proxy/secrets/session_secret + volumeMounts: + - mountPath: /etc/tls/private + name: alerts-tls + - mountPath: /etc/proxy/secrets + name: alerts-secrets + + - name: alert-buffer + args: + - --storage-path=/alert-buffer/messages.db + image: ${IMAGE_ALERT_BUFFER} + imagePullPolicy: IfNotPresent + volumeMounts: + - mountPath: /alert-buffer + name: alert-buffer-data + ports: + - containerPort: 9099 + name: alert-buf + + - name: alertmanager + args: + - -config.file=/etc/alertmanager/alertmanager.yml + image: ${IMAGE_ALERTMANAGER} + imagePullPolicy: IfNotPresent + ports: + - containerPort: 9093 + name: web + volumeMounts: + - mountPath: /etc/alertmanager + name: alertmanager-config + - mountPath: /alertmanager + name: alertmanager-data + + restartPolicy: Always + volumes: + - name: prometheus-config + configMap: + defaultMode: 420 + name: prometheus + - name: prometheus-secrets + secret: + secretName: prometheus-proxy + - name: prometheus-tls + secret: + secretName: prometheus-tls + - name: prometheus-data + emptyDir: {} + - name: alertmanager-config + configMap: + defaultMode: 420 + name: prometheus-alerts + - name: alerts-secrets + secret: + secretName: alerts-proxy + - name: alerts-tls + secret: + secretName: prometheus-alerts-tls + - name: alertmanager-data + emptyDir: {} + - name: alert-buffer-data #TODO: make persistent + emptyDir: {} + +- apiVersion: v1 + kind: ConfigMap + metadata: + name: prometheus + namespace: "${NAMESPACE}" + data: + prometheus.rules: | + groups: + - name: example-rules + interval: 30s # defaults to global interval + rules: + - alert: Node Down + expr: up{job="kubernetes-nodes"} == 0 + annotations: + miqTarget: "ContainerNode" + severity: "HIGH" + message: "{{$labels.instance}} is down" + prometheus.yml: | + rule_files: + - 'prometheus.rules' + + # A scrape configuration for running Prometheus on a Kubernetes cluster. + # This uses separate scrape configs for cluster components (i.e. API server, node) + # and services to allow each to use different authentication configs. + # + # Kubernetes labels will be added as Prometheus labels on metrics via the + # `labelmap` relabeling action. + + # Scrape config for API servers. + # + # Kubernetes exposes API servers as endpoints to the default/kubernetes + # service so this uses `endpoints` role and uses relabelling to only keep + # the endpoints associated with the default/kubernetes service using the + # default named port `https`. This works for single API server deployments as + # well as HA API server deployments. + scrape_configs: + - job_name: 'kubernetes-apiservers' + + kubernetes_sd_configs: + - role: endpoints + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + # Keep only the default/kubernetes service endpoints for the https port. This + # will add targets for each API server which Kubernetes adds an endpoint to + # the default/kubernetes service. + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + # Scrape config for nodes. + # + # Each node exposes a /metrics endpoint that contains operational metrics for + # the Kubelet and other components. + - job_name: 'kubernetes-nodes' + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + + # Scrape config for controllers. + # + # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for + # the controllers. + # + # TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via + # endpoints. + - job_name: 'kubernetes-controllers' + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + # Keep only the default/kubernetes service endpoints for the https port, and then + # set the port to 8444. This is the default configuration for the controllers on OpenShift + # masters. + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + - source_labels: [__address__] + action: replace + target_label: __address__ + regex: (.+)(?::\d+) + replacement: $1:8444 + + # Scrape config for cAdvisor. + # + # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that + # reports container metrics for each running pod. Scrape those by default. + - job_name: 'kubernetes-cadvisor' + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + metrics_path: /metrics/cadvisor + + kubernetes_sd_configs: + - role: node + + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + + # Scrape config for service endpoints. + # + # The relabeling allows the actual service scrape endpoint to be configured + # via the following annotations: + # + # * `prometheus.io/scrape`: Only scrape services that have a value of `true` + # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need + # to set this to `https` & most likely set the `tls_config` of the scrape config. + # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. + # * `prometheus.io/port`: If the metrics are exposed on a different port to the + # service then set this appropriately. + - job_name: 'kubernetes-service-endpoints' + + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # TODO: this should be per target + insecure_skip_verify: true + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: (.+)(?::\d+);(\d+) + replacement: $1:$2 + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username] + action: replace + target_label: __basic_auth_username__ + regex: (.+) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password] + action: replace + target_label: __basic_auth_password__ + regex: (.+) + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + + - job_name: 'openshift-template-service-broker' + + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt + server_name: apiserver.openshift-template-service-broker.svc + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: endpoints + + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: openshift-template-service-broker;apiserver;https + + alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "localhost:9093" + +# Create a fully end-to-end TLS connection to the alert proxy +- apiVersion: route.openshift.io/v1 + kind: Route + metadata: + name: alerts + namespace: "${NAMESPACE}" + spec: + to: + name: alerts + tls: + termination: Reencrypt + insecureEdgeTerminationPolicy: Redirect +- apiVersion: v1 + kind: Service + metadata: + annotations: + service.alpha.openshift.io/serving-cert-secret-name: prometheus-alerts-tls + labels: + name: alerts + name: alerts + namespace: "${NAMESPACE}" + spec: + ports: + - name: alerts + port: 443 + protocol: TCP + targetPort: 9443 + selector: + app: prometheus +- apiVersion: v1 + kind: Secret + metadata: + name: alerts-proxy + namespace: "${NAMESPACE}" + stringData: + session_secret: "${SESSION_SECRET}=" + +- apiVersion: v1 + kind: ConfigMap + metadata: + name: prometheus-alerts + namespace: "${NAMESPACE}" + data: + alertmanager.yml: | + global: + + # The root route on which each incoming alert enters. + route: + # default route if none match + receiver: alert-buffer-wh + + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + # TODO: + group_by: [] + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + + receivers: + - name: alert-buffer-wh + webhook_configs: + - url: http://localhost:9099/topics/alerts