From 429a4edd86719206e880790724a058857d5df0b0 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Fri, 3 Nov 2017 10:59:08 -0500 Subject: [PATCH] Prometheus monitoring for OSH infra This will move prometheus to OSH-infra to be included as part of the basic infrastructure deploy for openstack-helm. It includes charts for Prometheus, Node Exporter, Kube-State-Metrics, and Alertmanager. It provides a base for monitoring and alerting for the underlying infrastructure Partially Implements: blueprint osh-monitoring Change-Id: Ie453373b54c5f1825339ce0566e4b5d0f74abc20 --- alertmanager/Chart.yaml | 24 + alertmanager/requirements.yaml | 18 + .../templates/bin/_alertmanager.sh.tpl | 32 + .../templates/clusterrolebinding.yaml | 31 + alertmanager/templates/configmap-bin.yaml | 29 + alertmanager/templates/configmap-etc.yaml | 27 + .../templates/ingress-alertmanager.yaml | 60 ++ .../templates/job-image-repo-sync.yaml | 65 ++ alertmanager/templates/pvc.yaml | 31 + alertmanager/templates/rbac-entrypoint.yaml | 20 + .../service-ingress-alertmanager.yaml | 32 + alertmanager/templates/service.yaml | 36 + alertmanager/templates/serviceaccount.yaml | 22 + alertmanager/templates/statefulset.yaml | 106 ++ alertmanager/values.yaml | 254 +++++ .../_prometheus_metadata_annotations.tpl | 48 + kube-state-metrics/Chart.yaml | 24 + kube-state-metrics/requirements.yaml | 19 + kube-state-metrics/templates/clusterrole.yaml | 64 ++ .../templates/clusterrolebinding.yaml | 32 + .../templates/configmap-bin.yaml | 27 + kube-state-metrics/templates/deployment.yaml | 52 + .../templates/job-image-repo-sync.yaml | 65 ++ .../templates/rbac-entrypoint.yaml | 20 + .../templates/service-controller-manager.yaml | 39 + .../templates/service-kube-metrics.yaml | 34 + .../templates/service-scheduler.yaml | 39 + .../templates/serviceaccount.yaml | 24 + kube-state-metrics/values.yaml | 149 +++ node-exporter/Chart.yaml | 24 + node-exporter/requirements.yaml | 19 + .../templates/clusterrolebinding.yaml | 32 + node-exporter/templates/configmap-bin.yaml | 27 + node-exporter/templates/daemonset.yaml | 68 ++ .../templates/job-image-repo-sync.yaml | 65 ++ node-exporter/templates/rbac-entrypoint.yaml | 20 + node-exporter/templates/service.yaml | 37 + node-exporter/templates/serviceaccount.yaml | 24 + node-exporter/values.yaml | 136 +++ prometheus/Chart.yaml | 24 + prometheus/requirements.yaml | 18 + prometheus/templates/bin/_helm-tests.sh.tpl | 59 ++ prometheus/templates/bin/_prometheus.sh.tpl | 38 + prometheus/templates/clusterrole.yaml | 46 + prometheus/templates/clusterrolebinding.yaml | 32 + prometheus/templates/configmap-bin.yaml | 31 + prometheus/templates/configmap-etc.yaml | 27 + prometheus/templates/configmap-rules.yaml | 47 + prometheus/templates/ingress-prometheus.yaml | 60 ++ prometheus/templates/job-image-repo-sync.yaml | 65 ++ prometheus/templates/pod-helm-tests.yaml | 46 + prometheus/templates/pvc.yaml | 31 + prometheus/templates/rbac-entrypoint.yaml | 20 + .../templates/service-ingress-prometheus.yaml | 32 + prometheus/templates/service.yaml | 39 + prometheus/templates/serviceaccount.yaml | 22 + prometheus/templates/statefulset.yaml | 158 +++ prometheus/values.yaml | 907 ++++++++++++++++++ tools/gate/chart-deploys/default.yaml | 63 ++ 59 files changed, 3640 insertions(+) create mode 100644 alertmanager/Chart.yaml create mode 100644 alertmanager/requirements.yaml create mode 100644 alertmanager/templates/bin/_alertmanager.sh.tpl create mode 100644 alertmanager/templates/clusterrolebinding.yaml create mode 100644 alertmanager/templates/configmap-bin.yaml create mode 100644 alertmanager/templates/configmap-etc.yaml create mode 100644 alertmanager/templates/ingress-alertmanager.yaml create mode 100644 alertmanager/templates/job-image-repo-sync.yaml create mode 100644 alertmanager/templates/pvc.yaml create mode 100644 alertmanager/templates/rbac-entrypoint.yaml create mode 100644 alertmanager/templates/service-ingress-alertmanager.yaml create mode 100644 alertmanager/templates/service.yaml create mode 100644 alertmanager/templates/serviceaccount.yaml create mode 100644 alertmanager/templates/statefulset.yaml create mode 100644 alertmanager/values.yaml create mode 100644 helm-toolkit/templates/snippets/_prometheus_metadata_annotations.tpl create mode 100644 kube-state-metrics/Chart.yaml create mode 100644 kube-state-metrics/requirements.yaml create mode 100644 kube-state-metrics/templates/clusterrole.yaml create mode 100644 kube-state-metrics/templates/clusterrolebinding.yaml create mode 100644 kube-state-metrics/templates/configmap-bin.yaml create mode 100644 kube-state-metrics/templates/deployment.yaml create mode 100644 kube-state-metrics/templates/job-image-repo-sync.yaml create mode 100644 kube-state-metrics/templates/rbac-entrypoint.yaml create mode 100644 kube-state-metrics/templates/service-controller-manager.yaml create mode 100644 kube-state-metrics/templates/service-kube-metrics.yaml create mode 100644 kube-state-metrics/templates/service-scheduler.yaml create mode 100644 kube-state-metrics/templates/serviceaccount.yaml create mode 100644 kube-state-metrics/values.yaml create mode 100644 node-exporter/Chart.yaml create mode 100644 node-exporter/requirements.yaml create mode 100644 node-exporter/templates/clusterrolebinding.yaml create mode 100644 node-exporter/templates/configmap-bin.yaml create mode 100644 node-exporter/templates/daemonset.yaml create mode 100644 node-exporter/templates/job-image-repo-sync.yaml create mode 100644 node-exporter/templates/rbac-entrypoint.yaml create mode 100644 node-exporter/templates/service.yaml create mode 100644 node-exporter/templates/serviceaccount.yaml create mode 100644 node-exporter/values.yaml create mode 100644 prometheus/Chart.yaml create mode 100644 prometheus/requirements.yaml create mode 100644 prometheus/templates/bin/_helm-tests.sh.tpl create mode 100644 prometheus/templates/bin/_prometheus.sh.tpl create mode 100644 prometheus/templates/clusterrole.yaml create mode 100644 prometheus/templates/clusterrolebinding.yaml create mode 100644 prometheus/templates/configmap-bin.yaml create mode 100644 prometheus/templates/configmap-etc.yaml create mode 100644 prometheus/templates/configmap-rules.yaml create mode 100644 prometheus/templates/ingress-prometheus.yaml create mode 100644 prometheus/templates/job-image-repo-sync.yaml create mode 100644 prometheus/templates/pod-helm-tests.yaml create mode 100644 prometheus/templates/pvc.yaml create mode 100644 prometheus/templates/rbac-entrypoint.yaml create mode 100644 prometheus/templates/service-ingress-prometheus.yaml create mode 100644 prometheus/templates/service.yaml create mode 100644 prometheus/templates/serviceaccount.yaml create mode 100644 prometheus/templates/statefulset.yaml create mode 100644 prometheus/values.yaml diff --git a/alertmanager/Chart.yaml b/alertmanager/Chart.yaml new file mode 100644 index 000000000..dc3f51f82 --- /dev/null +++ b/alertmanager/Chart.yaml @@ -0,0 +1,24 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +description: OpenStack-Helm Alertmanager +name: alertmanager +version: 0.1.0 +home: https://prometheus.io/docs/alerting/alertmanager/ +sources: + - https://github.com/prometheus/alertmanager + - https://git.openstack.org/cgit/openstack/openstack-helm-infra +maintainers: + - name: OpenStack-Helm Authors diff --git a/alertmanager/requirements.yaml b/alertmanager/requirements.yaml new file mode 100644 index 000000000..53782e69b --- /dev/null +++ b/alertmanager/requirements.yaml @@ -0,0 +1,18 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dependencies: + - name: helm-toolkit + repository: http://localhost:8879/charts + version: 0.1.0 diff --git a/alertmanager/templates/bin/_alertmanager.sh.tpl b/alertmanager/templates/bin/_alertmanager.sh.tpl new file mode 100644 index 000000000..0e208388b --- /dev/null +++ b/alertmanager/templates/bin/_alertmanager.sh.tpl @@ -0,0 +1,32 @@ +#!/bin/sh + +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex +COMMAND="${@:-start}" + +function start () { + exec /bin/alertmanager \ + -config.file=/etc/config/alertmanager.yml \ + -storage.path=/var/lib/alertmanager/data +} + +function stop () { + kill -TERM 1 +} + +$COMMAND diff --git a/alertmanager/templates/clusterrolebinding.yaml b/alertmanager/templates/clusterrolebinding.yaml new file mode 100644 index 000000000..f82b65b2e --- /dev/null +++ b/alertmanager/templates/clusterrolebinding.yaml @@ -0,0 +1,31 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.clusterrolebinding }} +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: run-alertmanager +subjects: + - kind: ServiceAccount + name: alertmanager + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/alertmanager/templates/configmap-bin.yaml b/alertmanager/templates/configmap-bin.yaml new file mode 100644 index 000000000..5ccd918c7 --- /dev/null +++ b/alertmanager/templates/configmap-bin.yaml @@ -0,0 +1,29 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_bin }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: alertmanager-bin +data: + alertmanager.sh: | +{{ tuple "bin/_alertmanager.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + image-repo-sync.sh: |+ +{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }} +{{- end }} diff --git a/alertmanager/templates/configmap-etc.yaml b/alertmanager/templates/configmap-etc.yaml new file mode 100644 index 000000000..35bab917e --- /dev/null +++ b/alertmanager/templates/configmap-etc.yaml @@ -0,0 +1,27 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_etc }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: alertmanager-etc +data: + alertmanager.yml: +{{- toYaml .Values.conf.alertmanager | indent 4 }} +{{- end }} diff --git a/alertmanager/templates/ingress-alertmanager.yaml b/alertmanager/templates/ingress-alertmanager.yaml new file mode 100644 index 000000000..490aa780c --- /dev/null +++ b/alertmanager/templates/ingress-alertmanager.yaml @@ -0,0 +1,60 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.ingress }} +{{- $envAll := . }} +{{- if .Values.network.alertmanager.ingress.public }} +{{- $backendServiceType := "alerts" }} +{{- $backendPort := "alerts-api" }} +{{- $ingressName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $backendName := tuple $backendServiceType "internal" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $hostName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $hostNameNamespaced := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_namespaced_endpoint_lookup" }} +{{- $hostNameFull := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }} +--- +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: {{ $ingressName }} + annotations: + kubernetes.io/ingress.class: "nginx" + ingress.kubernetes.io/rewrite-target: / + ingress.kubernetes.io/proxy-body-size: {{ .Values.network.alertmanager.ingress.proxy_body_size }} +spec: + rules: +{{ if ne $hostNameNamespaced $hostNameFull }} +{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced $hostNameFull }} + - host: {{ $vHost }} + http: + paths: + - path: / + backend: + serviceName: {{ $backendName }} + servicePort: {{ $backendPort }} +{{- end }} +{{- else }} +{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced }} + - host: {{ $vHost }} + http: + paths: + - path: / + backend: + serviceName: {{ $backendName }} + servicePort: {{ $backendPort }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/alertmanager/templates/job-image-repo-sync.yaml b/alertmanager/templates/job-image-repo-sync.yaml new file mode 100644 index 000000000..4179f7824 --- /dev/null +++ b/alertmanager/templates/job-image-repo-sync.yaml @@ -0,0 +1,65 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.job_image_repo_sync }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: alertmanager-image-repo-sync +spec: + template: + metadata: + labels: +{{ tuple $envAll "alertmanager" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: image-repo-sync +{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + env: + - name: LOCAL_REPO + value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}" + - name: IMAGE_SYNC_LIST + value: "{{ include "helm-toolkit.utils.image_sync_list" . }}" + command: + - /tmp/image-repo-sync.sh + volumeMounts: + - name: alertmanager-bin + mountPath: /tmp/image-repo-sync.sh + subPath: image-repo-sync.sh + readOnly: true + - name: docker-socket + mountPath: /var/run/docker.sock + volumes: + - name: alertmanager-bin + configMap: + name: alertmanager-bin + defaultMode: 0555 + - name: docker-socket + hostPath: + path: /var/run/docker.sock +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }} +{{- end }} +{{- end }} diff --git a/alertmanager/templates/pvc.yaml b/alertmanager/templates/pvc.yaml new file mode 100644 index 000000000..7bf281b8d --- /dev/null +++ b/alertmanager/templates/pvc.yaml @@ -0,0 +1,31 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.pvc }} +{{- $envAll := . }} +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ .Values.storage.pvc.name }} +spec: + accessModes: + - {{ .Values.storage.pvc.access_mode }} + resources: + requests: + storage: {{ .Values.storage.requests.storage }} + storageClassName: {{ .Values.storage.storage_class }} +{{- end }} diff --git a/alertmanager/templates/rbac-entrypoint.yaml b/alertmanager/templates/rbac-entrypoint.yaml new file mode 100644 index 000000000..64d1b45ab --- /dev/null +++ b/alertmanager/templates/rbac-entrypoint.yaml @@ -0,0 +1,20 @@ + +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.rbac_entrypoint }} +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_rbac"}} +{{- end }} diff --git a/alertmanager/templates/service-ingress-alertmanager.yaml b/alertmanager/templates/service-ingress-alertmanager.yaml new file mode 100644 index 000000000..826f0e5f0 --- /dev/null +++ b/alertmanager/templates/service-ingress-alertmanager.yaml @@ -0,0 +1,32 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service_ingress }} +{{- $envAll := . }} +{{- if .Values.network.alertmanager.ingress.public }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "alerts" "public" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +spec: + ports: + - name: http + port: 80 + selector: + app: ingress-api +{{- end }} +{{- end }} diff --git a/alertmanager/templates/service.yaml b/alertmanager/templates/service.yaml new file mode 100644 index 000000000..fb17dfca3 --- /dev/null +++ b/alertmanager/templates/service.yaml @@ -0,0 +1,36 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "alerts" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +spec: + ports: + - name: alerts-api + {{ if .Values.network.alertmanager.node_port.enabled }} + nodePort: {{ .Values.network.alertmanager.node_port.port }} + {{ end }} + port: {{ .Values.network.alertmanager.port }} + selector: +{{ tuple $envAll "alertmanager" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} + {{ if .Values.network.alertmanager.node_port.enabled }} + type: NodePort + {{ end }} +{{- end }} diff --git a/alertmanager/templates/serviceaccount.yaml b/alertmanager/templates/serviceaccount.yaml new file mode 100644 index 000000000..9800fc214 --- /dev/null +++ b/alertmanager/templates/serviceaccount.yaml @@ -0,0 +1,22 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.serviceaccount }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: alertmanager +{{- end }} diff --git a/alertmanager/templates/statefulset.yaml b/alertmanager/templates/statefulset.yaml new file mode 100644 index 000000000..fea043160 --- /dev/null +++ b/alertmanager/templates/statefulset.yaml @@ -0,0 +1,106 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.statefulset }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.alertmanager .Values.conditional_dependencies.local_image_registry) -}} +{{- else -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.alertmanager -}} +{{- end -}} +{{- $mounts_alertmanager := .Values.pod.mounts.alertmanager.alertmanager }} +{{- $mounts_alertmanager_init := .Values.pod.mounts.alertmanager.init_container }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: alertmanager +spec: + serviceName: {{ tuple "alerts" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} + replicas: {{ .Values.pod.replicas.alertmanager }} + template: + metadata: + labels: +{{ tuple $envAll "alertmanager" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + annotations: + configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} + configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }} + spec: + serviceAccount: alertmanager + affinity: +{{ tuple $envAll "alertmanager" "server" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }} + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.alertmanager.timeout | default "30" }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: alertmanager +{{ tuple $envAll "alertmanager" | include "helm-toolkit.snippets.image" | indent 10 }} + command: + - /tmp/alertmanager.sh + - start + lifecycle: + preStop: + exec: + command: + - /tmp/alertmanager.sh + - stop +{{ tuple $envAll $envAll.Values.pod.resources.alertmanager | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + ports: + - name: alerts-api + containerPort: {{ .Values.network.alertmanager.port }} + readinessProbe: + httpGet: + path: /#/status + port: {{ .Values.network.alertmanager.port }} + initialDelaySeconds: 30 + timeoutSeconds: 30 + volumeMounts: + - name: etc-alertmanager + mountPath: /etc/config + - name: alertmanager-etc + mountPath: /etc/config/alertmanager.yml + subPath: alertmanager.yml + readOnly: true + - name: alertmanager-bin + mountPath: /tmp/alertmanager.sh + subPath: alertmanager.sh + readOnly: true + - name: storage + mountPath: /var/lib/alertmanager/data +{{ if $mounts_alertmanager.volumeMounts }}{{ toYaml $mounts_alertmanager.volumeMounts | indent 12 }}{{ end }} + volumes: +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }} + - name: etc-alertmanager + emptyDir: {} + - name: alertmanager-etc + configMap: + name: alertmanager-etc + - name: alertmanager-bin + configMap: + name: alertmanager-bin + defaultMode: 0555 + {{- if .Values.storage.enabled }} + - name: storage + persistentVolumeClaim: + claimName: {{ .Values.storage.pvc.name }} + {{- else }} + - name: storage + emptyDir: {} + {{- end }} +{{ if $mounts_alertmanager.volumes }}{{ toYaml $mounts_alertmanager.volumes | indent 8 }}{{ end }} +{{- end }} diff --git a/alertmanager/values.yaml b/alertmanager/values.yaml new file mode 100644 index 000000000..0b1ffbb24 --- /dev/null +++ b/alertmanager/values.yaml @@ -0,0 +1,254 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for alertmanager. +# This is a YAML-formatted file. +# Declare name/value pairs to be passed into your templates. +# name: value + +images: + tags: + alertmanager: docker.io/prom/alertmanager:v0.11.0 + dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1 + image_repo_sync: docker.io/docker:17.07.0 + pull_policy: IfNotPresent + local_registry: + active: false + exclude: + - dep_check + - image_repo_sync + +labels: + node_selector_key: openstack-control-plane + node_selector_value: enabled + +pod: + affinity: + anti: + type: + default: preferredDuringSchedulingIgnoredDuringExecution + topologyKey: + default: kubernetes.io/hostname + mounts: + alertmanager: + alertmanager: + init_container: null + replicas: + alertmanager: 1 + lifecycle: + upgrades: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 1 + max_surge: 3 + termination_grace_period: + alertmanager: + timeout: 30 + resources: + enabled: false + alertmanager: + limits: + memory: "1024Mi" + cpu: "2000m" + requests: + memory: "128Mi" + cpu: "500m" + +endpoints: + cluster_domain_suffix: cluster.local + local_image_registry: + name: docker-registry + namespace: docker-registry + hosts: + default: localhost + internal: docker-registry + node: localhost + host_fqdn_override: + default: null + port: + registry: + node: 5000 + alerts: + name: alertmanager + namespace: null + hosts: + default: alerts-api + public: alertmanager + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + api: + default: 9093 + public: 80 + +dependencies: + alertmanager: + services: null + image_repo_sync: + services: + - service: local_image_registry + endpoint: internal + +conditional_dependencies: + local_image_registry: + jobs: + - alertmanager-image-repo-sync + services: + - service: local_image_registry + endpoint: node + +network: + alertmanager: + ingress: + public: true + proxy_body_size: 1024M + node_port: + enabled: false + port: 30903 + port: 9093 + +storage: + enabled: true + pvc: + name: alertmanager-pvc + access_mode: ReadWriteMany + requests: + storage: 5Gi + storage_class: general + +manifests: + clusterrolebinding: true + configmap_bin: true + configmap_etc: true + ingress: true + job_image_repo_sync: true + pvc: true + rbac_entrypoint: true + service: true + service_ingress: true + serviceaccount: true + statefulset: true + +conf: + alertmanager: | + global: + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: 'localhost:25' + smtp_from: 'alertmanager@example.org' + smtp_auth_username: 'alertmanager' + smtp_auth_password: 'password' + # The auth token for Hipchat. + hipchat_auth_token: '1234556789' + # Alternative host for Hipchat. + hipchat_api_url: 'https://hipchat.foobar.org/' + # The directory from which notification templates are read. + templates: + - '/etc/alertmanager/template/*.tmpl' + # The root route on which each incoming alert enters. + route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + group_by: ['alertname', 'cluster', 'service'] + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 3h + # A default receiver + receiver: team-X-mails + # All the above attributes are inherited by all child routes and can + # overwritten on each. + # The child route trees. + routes: + # This routes performs a regular expression match on alert labels to + # catch alerts that are related to a list of services. + - match_re: + service: ^(foo1|foo2|baz)$ + receiver: team-X-mails + # The service has a sub-route for critical alerts, any alerts + # that do not match, i.e. severity != critical, fall-back to the + # parent node and are sent to 'team-X-mails' + routes: + - match: + severity: critical + receiver: team-X-pager + - match: + service: files + receiver: team-Y-mails + routes: + - match: + severity: critical + receiver: team-Y-pager + # This route handles all alerts coming from a database service. If there's + # no team to handle it, it defaults to the DB team. + - match: + service: database + receiver: team-DB-pager + # Also group alerts by affected database. + group_by: [alertname, cluster, database] + routes: + - match: + owner: team-X + receiver: team-X-pager + - match: + owner: team-Y + receiver: team-Y-pager + # Inhibition rules allow to mute a set of alerts given that another alert is + # firing. + # We use this to mute any warning-level notifications if the same alert is + # already critical. + inhibit_rules: + - source_match: + severity: 'critical' + target_match: + severity: 'warning' + # Apply inhibition if the alertname is the same. + equal: ['alertname', 'cluster', 'service'] + receivers: + - name: 'team-X-mails' + email_configs: + - to: 'team-X+alerts@example.org' + - name: 'team-X-pager' + email_configs: + - to: 'team-X+alerts-critical@example.org' + pagerduty_configs: + - service_key: + - name: 'team-Y-mails' + email_configs: + - to: 'team-Y+alerts@example.org' + - name: 'team-Y-pager' + pagerduty_configs: + - service_key: + - name: 'team-DB-pager' + pagerduty_configs: + - service_key: + - name: 'team-X-hipchat' + hipchat_configs: + - auth_token: + room_id: 85 + message_format: html + notify: true diff --git a/helm-toolkit/templates/snippets/_prometheus_metadata_annotations.tpl b/helm-toolkit/templates/snippets/_prometheus_metadata_annotations.tpl new file mode 100644 index 000000000..9f54f4470 --- /dev/null +++ b/helm-toolkit/templates/snippets/_prometheus_metadata_annotations.tpl @@ -0,0 +1,48 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +# Appends annotations for configuring prometheus scrape endpoints via +# annotations. The required annotations are: +# * `prometheus.io/scrape`: Only scrape services that have a value of `true` +# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need +# to set this to `https` & most likely set the `tls_config` of the scrape config. +# * `prometheus.io/path`: If the metrics path is not `/metrics` override this. +# * `prometheus.io/port`: If the metrics are exposed on a different port to the +# service then set this appropriately. + +{{- define "helm-toolkit.snippets.prometheus_service_annotations" -}} +{{- $endpoint := index . 0 -}} +{{- $context := index . 1 -}} +prometheus.io/scrape: {{ $endpoint.scrape | quote }} +prometheus.io/scheme: {{ $endpoint.scheme.default | quote }} +prometheus.io/path: {{ $endpoint.path.default | quote }} +prometheus.io/port: {{ $endpoint.scrape_port | quote }} +{{- end -}} + +# Appends annotations for configuring prometheus scrape jobs via pod +# annotations. The required annotations are: +# * `prometheus.io/scrape`: Only scrape pods that have a value of `true` +# * `prometheus.io/path`: If the metrics path is not `/metrics` override this. +# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the +# pod's declared ports (default is a port-free target if none are declared). + +{{- define "helm-toolkit.snippets.prometheus_pod_annotations" -}} +{{- $pod := index . 0 -}} +{{- $context := index . 1 -}} +prometheus.io/scrape: {{ $pod.scrape | quote }} +prometheus.io/path: {{ $pod.path.default | quote }} +prometheus.io/port: {{ $pod.scrape_port | quote }} +{{- end -}} diff --git a/kube-state-metrics/Chart.yaml b/kube-state-metrics/Chart.yaml new file mode 100644 index 000000000..008c05d5a --- /dev/null +++ b/kube-state-metrics/Chart.yaml @@ -0,0 +1,24 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +description: OpenStack-Helm Kube-State-Metrics +name: kube-state-metrics +version: 0.1.0 +home: https://github.com/kubernetes/kube-state-metrics +sources: + - https://github.com/kubernetes/kube-state-metrics + - https://git.openstack.org/cgit/openstack/openstack-helm-infra +maintainers: + - name: OpenStack-Helm Authors diff --git a/kube-state-metrics/requirements.yaml b/kube-state-metrics/requirements.yaml new file mode 100644 index 000000000..00a045b4e --- /dev/null +++ b/kube-state-metrics/requirements.yaml @@ -0,0 +1,19 @@ + +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dependencies: + - name: helm-toolkit + repository: http://localhost:8879/charts + version: 0.1.0 diff --git a/kube-state-metrics/templates/clusterrole.yaml b/kube-state-metrics/templates/clusterrole.yaml new file mode 100644 index 000000000..c772d777b --- /dev/null +++ b/kube-state-metrics/templates/clusterrole.yaml @@ -0,0 +1,64 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.clusterrole }} +{{- $envAll := . }} +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: kube-state-metrics-runner +rules: + - apiGroups: + - "" + resources: + - namespaces + - nodes + - persistentvolumeclaims + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + verbs: + - list + - watch + - apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - get + - list + - watch + - apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +{{- end }} diff --git a/kube-state-metrics/templates/clusterrolebinding.yaml b/kube-state-metrics/templates/clusterrolebinding.yaml new file mode 100644 index 000000000..434222010 --- /dev/null +++ b/kube-state-metrics/templates/clusterrolebinding.yaml @@ -0,0 +1,32 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.clusterrolebinding }} +{{- $envAll := . }} +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: run-kube-state-metrics +subjects: + - kind: ServiceAccount + name: kube-state-metrics + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: kube-state-metrics-runner + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/kube-state-metrics/templates/configmap-bin.yaml b/kube-state-metrics/templates/configmap-bin.yaml new file mode 100644 index 000000000..c360c8f58 --- /dev/null +++ b/kube-state-metrics/templates/configmap-bin.yaml @@ -0,0 +1,27 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_bin }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kube-metrics-bin +data: + image-repo-sync.sh: |+ +{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }} +{{- end }} diff --git a/kube-state-metrics/templates/deployment.yaml b/kube-state-metrics/templates/deployment.yaml new file mode 100644 index 000000000..60ce56d63 --- /dev/null +++ b/kube-state-metrics/templates/deployment.yaml @@ -0,0 +1,52 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.deployment }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.kube_state_metrics .Values.conditional_dependencies.local_image_registry) -}} +{{- else -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.kube_state_metrics -}} +{{- end -}} +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: kube-state-metrics +spec: + replicas: {{ .Values.pod.replicas.kube_state_metrics }} +{{ tuple $envAll | include "helm-toolkit.snippets.kubernetes_upgrades_deployment" | indent 2 }} + template: + metadata: + labels: +{{ tuple $envAll "kube-state-metrics" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: + serviceAccount: kube-state-metrics + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.kube_state_metrics.timeout | default "30" }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: kube-state-metrics +{{ tuple $envAll "kube_state_metrics" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.kube_state_metrics | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + ports: + - name: metrics + containerPort: {{ .Values.network.kube_state_metrics.port }} + volumes: +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }} +{{- end }} diff --git a/kube-state-metrics/templates/job-image-repo-sync.yaml b/kube-state-metrics/templates/job-image-repo-sync.yaml new file mode 100644 index 000000000..854e74f28 --- /dev/null +++ b/kube-state-metrics/templates/job-image-repo-sync.yaml @@ -0,0 +1,65 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.job_image_repo_sync }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: kube-metrics-image-repo-sync +spec: + template: + metadata: + labels: +{{ tuple $envAll "kube-metrics" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: image-repo-sync +{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + env: + - name: LOCAL_REPO + value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}" + - name: IMAGE_SYNC_LIST + value: "{{ include "helm-toolkit.utils.image_sync_list" . }}" + command: + - /tmp/image-repo-sync.sh + volumeMounts: + - name: kube-metrics-bin + mountPath: /tmp/image-repo-sync.sh + subPath: image-repo-sync.sh + readOnly: true + - name: docker-socket + mountPath: /var/run/docker.sock + volumes: + - name: kube-metrics-bin + configMap: + name: kube-metrics-bin + defaultMode: 0555 + - name: docker-socket + hostPath: + path: /var/run/docker.sock +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }} +{{- end }} +{{- end }} diff --git a/kube-state-metrics/templates/rbac-entrypoint.yaml b/kube-state-metrics/templates/rbac-entrypoint.yaml new file mode 100644 index 000000000..82b9916e8 --- /dev/null +++ b/kube-state-metrics/templates/rbac-entrypoint.yaml @@ -0,0 +1,20 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.rbac_entrypoint }} +{{- $envAll := . }} +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_rbac"}} +{{- end }} diff --git a/kube-state-metrics/templates/service-controller-manager.yaml b/kube-state-metrics/templates/service-controller-manager.yaml new file mode 100644 index 000000000..47ccd24c7 --- /dev/null +++ b/kube-state-metrics/templates/service-controller-manager.yaml @@ -0,0 +1,39 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service_controller_manager }} +{{- $envAll := . }} +{{- $endpoint := $envAll.Values.endpoints.kube_controller_manager }} +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-controller-manager-discovery + labels: + component: kube-controller-manager + annotations: +{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }} +spec: + selector: + component: kube-controller-manager + type: ClusterIP + clusterIP: None + ports: + - name: http-metrics + port: 10252 + targetPort: 10252 + protocol: TCP +{{- end }} diff --git a/kube-state-metrics/templates/service-kube-metrics.yaml b/kube-state-metrics/templates/service-kube-metrics.yaml new file mode 100644 index 000000000..9ea5d9168 --- /dev/null +++ b/kube-state-metrics/templates/service-kube-metrics.yaml @@ -0,0 +1,34 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service_kube_metrics }} +{{- $envAll := . }} +{{- $endpoint := $envAll.Values.endpoints.kube_metrics }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "kube_metrics" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} + annotations: +{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }} +spec: + ports: + - name: http + port: {{ .Values.network.kube_state_metrics.port }} + targetPort: 8080 + selector: +{{ tuple $envAll "kube-state-metrics" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} +{{- end }} diff --git a/kube-state-metrics/templates/service-scheduler.yaml b/kube-state-metrics/templates/service-scheduler.yaml new file mode 100644 index 000000000..7b47e9c0d --- /dev/null +++ b/kube-state-metrics/templates/service-scheduler.yaml @@ -0,0 +1,39 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service_scheduler }} +{{- $envAll := . }} +{{- $endpoint := $envAll.Values.endpoints.kube_scheduler }} +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-scheduler-discovery + labels: + component: kube-scheduler + annotations: +{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }} +spec: + selector: + component: kube-scheduler + type: ClusterIP + clusterIP: None + ports: + - name: http-metrics + port: 10251 + targetPort: 10251 + protocol: TCP +{{- end }} diff --git a/kube-state-metrics/templates/serviceaccount.yaml b/kube-state-metrics/templates/serviceaccount.yaml new file mode 100644 index 000000000..6269e7169 --- /dev/null +++ b/kube-state-metrics/templates/serviceaccount.yaml @@ -0,0 +1,24 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.serviceaccount }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics +{{- end }} diff --git a/kube-state-metrics/values.yaml b/kube-state-metrics/values.yaml new file mode 100644 index 000000000..4900684c3 --- /dev/null +++ b/kube-state-metrics/values.yaml @@ -0,0 +1,149 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for kube-state-metrics. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +images: + tags: + kube_state_metrics: quay.io/coreos/kube-state-metrics:v1.0.1 + dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1 + image_repo_sync: docker.io/docker:17.07.0 + pull_policy: IfNotPresent + local_registry: + active: false + exclude: + - dep_check + - image_repo_sync + +labels: + node_selector_key: openstack-control-plane + node_selector_value: enabled + +pod: + affinity: + anti: + type: + default: preferredDuringSchedulingIgnoredDuringExecution + topologyKey: + default: kubernetes.io/hostname + mounts: + kube_state_metrics: + kube_state_metrics: + init_container: null + replicas: + kube_state_metrics: 1 + prometheus: 1 + lifecycle: + upgrades: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 1 + max_surge: 3 + termination_grace_period: + kube_state_metrics: + timeout: 30 + resources: + enabled: false + kube_state_metrics: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + jobs: + image_repo_sync: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + +dependencies: + image_repo_sync: + services: + - service: local_image_registry + endpoint: internal + +conditional_dependencies: + local_image_registry: + jobs: + - kube-metrics-image-repo-sync + services: + - service: local_image_registry + endpoint: node + +endpoints: + cluster_domain_suffix: cluster.local + local_image_registry: + name: docker-registry + namespace: docker-registry + hosts: + default: localhost + internal: docker-registry + node: localhost + host_fqdn_override: + default: null + port: + registry: + node: 5000 + kube_metrics: + namespace: null + hosts: + default: kube-metrics + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + http: + default: 8080 + scrape: true + scrape_port: 8080 + kube_scheduler: + scheme: + default: 'http' + path: + default: /metrics + scrape: true + scrape_port: 10251 + kube_controller_manager: + scheme: + default: 'http' + path: + default: /metrics + scrape: true + scrape_port: 10252 + +network: + kube_state_metrics: + port: 8080 + +manifests: + configmap_bin: true + clusterrole: true + clusterrolebinding: true + deployment: true + job_image_repo_sync: true + rbac_entrypoint: true + service_kube_metrics: true + service_controller_manager: true + service_scheduler: true + serviceaccount: true diff --git a/node-exporter/Chart.yaml b/node-exporter/Chart.yaml new file mode 100644 index 000000000..202cd4c54 --- /dev/null +++ b/node-exporter/Chart.yaml @@ -0,0 +1,24 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +description: OpenStack-Helm Node Exporter +name: node-exporter +version: 0.1.0 +home: https://github.com/prometheus/node_exporter +sources: + - https://github.com/prometheus/node_exporter + - https://git.openstack.org/cgit/openstack/openstack-helm-infra +maintainers: + - name: OpenStack-Helm Authors diff --git a/node-exporter/requirements.yaml b/node-exporter/requirements.yaml new file mode 100644 index 000000000..00a045b4e --- /dev/null +++ b/node-exporter/requirements.yaml @@ -0,0 +1,19 @@ + +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dependencies: + - name: helm-toolkit + repository: http://localhost:8879/charts + version: 0.1.0 diff --git a/node-exporter/templates/clusterrolebinding.yaml b/node-exporter/templates/clusterrolebinding.yaml new file mode 100644 index 000000000..d6873b42f --- /dev/null +++ b/node-exporter/templates/clusterrolebinding.yaml @@ -0,0 +1,32 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.clusterrolebinding }} +{{- $envAll := . }} +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: run-node-exporter +subjects: + - kind: ServiceAccount + name: node-exporter + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/node-exporter/templates/configmap-bin.yaml b/node-exporter/templates/configmap-bin.yaml new file mode 100644 index 000000000..9ffae3c66 --- /dev/null +++ b/node-exporter/templates/configmap-bin.yaml @@ -0,0 +1,27 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_bin }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: node-exporter-bin +data: + image-repo-sync.sh: |+ +{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }} +{{- end }} diff --git a/node-exporter/templates/daemonset.yaml b/node-exporter/templates/daemonset.yaml new file mode 100644 index 000000000..3cbce45c8 --- /dev/null +++ b/node-exporter/templates/daemonset.yaml @@ -0,0 +1,68 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.daemonset }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.node_exporter .Values.conditional_dependencies.local_image_registry) -}} +{{- else -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.node_exporter -}} +{{- end -}} +--- +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: node-exporter + namespace: {{ .Values.endpoints.node_metrics.namespace }} +spec: +{{ tuple $envAll "node_exporter" | include "helm-toolkit.snippets.kubernetes_upgrades_daemonset" | indent 2 }} + template: + metadata: + labels: +{{ tuple $envAll "node_exporter" "metrics" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + namespace: {{ .Values.endpoints.node_metrics.namespace }} + spec: + serviceAccount: node-exporter + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + hostNetwork: true + hostPID: true + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: node-exporter +{{ tuple $envAll "node_exporter" | include "helm-toolkit.snippets.image" | indent 10 }} + ports: + - name: metrics + containerPort: {{ .Values.network.node_exporter.port }} + hostPort: {{ .Values.network.node_exporter.port }} +{{ tuple $envAll $envAll.Values.pod.resources.node_exporter | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + volumes: +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }} + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys +{{- end }} diff --git a/node-exporter/templates/job-image-repo-sync.yaml b/node-exporter/templates/job-image-repo-sync.yaml new file mode 100644 index 000000000..eaeef8f7b --- /dev/null +++ b/node-exporter/templates/job-image-repo-sync.yaml @@ -0,0 +1,65 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.job_image_repo_sync }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: node-exporter-image-repo-sync +spec: + template: + metadata: + labels: +{{ tuple $envAll "node-exporter" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: image-repo-sync +{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + env: + - name: LOCAL_REPO + value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}" + - name: IMAGE_SYNC_LIST + value: "{{ include "helm-toolkit.utils.image_sync_list" . }}" + command: + - /tmp/image-repo-sync.sh + volumeMounts: + - name: node-exporter-bin + mountPath: /tmp/image-repo-sync.sh + subPath: image-repo-sync.sh + readOnly: true + - name: docker-socket + mountPath: /var/run/docker.sock + volumes: + - name: node-exporter-bin + configMap: + name: node-exporter-bin + defaultMode: 0555 + - name: docker-socket + hostPath: + path: /var/run/docker.sock +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }} +{{- end }} +{{- end }} diff --git a/node-exporter/templates/rbac-entrypoint.yaml b/node-exporter/templates/rbac-entrypoint.yaml new file mode 100644 index 000000000..82b9916e8 --- /dev/null +++ b/node-exporter/templates/rbac-entrypoint.yaml @@ -0,0 +1,20 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.rbac_entrypoint }} +{{- $envAll := . }} +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_rbac"}} +{{- end }} diff --git a/node-exporter/templates/service.yaml b/node-exporter/templates/service.yaml new file mode 100644 index 000000000..9568c544a --- /dev/null +++ b/node-exporter/templates/service.yaml @@ -0,0 +1,37 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service }} +{{- $envAll := . }} +{{- $endpoint := $envAll.Values.endpoints.node_metrics }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "node_metrics" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} + namespace: {{ .Values.endpoints.node_metrics.namespace }} + annotations: +{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }} +spec: + type: ClusterIP + clusterIP: None + ports: + - name: metrics + port: {{ .Values.network.node_exporter.port }} + targetPort: {{ .Values.network.node_exporter.port }} + selector: +{{ tuple $envAll "node_exporter" "metrics" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} +{{- end }} diff --git a/node-exporter/templates/serviceaccount.yaml b/node-exporter/templates/serviceaccount.yaml new file mode 100644 index 000000000..e036edd7a --- /dev/null +++ b/node-exporter/templates/serviceaccount.yaml @@ -0,0 +1,24 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.serviceaccount }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-exporter +{{- end }} diff --git a/node-exporter/values.yaml b/node-exporter/values.yaml new file mode 100644 index 000000000..05ff92d24 --- /dev/null +++ b/node-exporter/values.yaml @@ -0,0 +1,136 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for node-exporter. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +images: + tags: + node_exporter: docker.io/prom/node-exporter:v0.15.0 + dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1 + image_repo_sync: docker.io/docker:17.07.0 + pull_policy: IfNotPresent + local_registry: + active: false + exclude: + - dep_check + - image_repo_sync + +labels: + node_selector_key: openstack-control-plane + node_selector_value: enabled + +pod: + affinity: + anti: + type: + default: preferredDuringSchedulingIgnoredDuringExecution + topologyKey: + default: kubernetes.io/hostname + mounts: + node_exporter: + node_exporter: + init_container: null + lifecycle: + upgrades: + daemonsets: + pod_replacement_strategy: RollingUpdate + node_exporter: + enabled: true + min_ready_seconds: 0 + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 1 + max_surge: 3 + termination_grace_period: + node_exporter: + timeout: 30 + resources: + enabled: false + node_exporter: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + jobs: + image_repo_sync: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + +dependencies: + node_exporter: + services: null + image_repo_sync: + services: + - service: local_image_registry + endpoint: internal + +conditional_dependencies: + local_image_registry: + jobs: + - node-exporter-image-repo-sync + services: + - service: local_image_registry + endpoint: node + +network: + node_exporter: + port: 9100 + +endpoints: + cluster_domain_suffix: cluster.local + local_image_registry: + name: docker-registry + namespace: docker-registry + hosts: + default: localhost + internal: docker-registry + node: localhost + host_fqdn_override: + default: null + port: + registry: + node: 5000 + node_metrics: + namespace: null + hosts: + default: node-exporter + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + metrics: + default: 9100 + scrape: true + scrape_port: 9100 + +manifests: + configmap_bin: true + clusterrolebinding: true + daemonset: true + job_image_repo_sync: true + rbac_entrypoint: true + service: true + serviceaccount: true diff --git a/prometheus/Chart.yaml b/prometheus/Chart.yaml new file mode 100644 index 000000000..3bd9d57b0 --- /dev/null +++ b/prometheus/Chart.yaml @@ -0,0 +1,24 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +description: OpenStack-Helm Prometheus +name: prometheus +version: 0.1.0 +home: https://prometheus.io/ +sources: + - https://github.com/prometheus/prometheus + - https://git.openstack.org/cgit/openstack/openstack-helm-infra +maintainers: + - name: OpenStack-Helm Authors diff --git a/prometheus/requirements.yaml b/prometheus/requirements.yaml new file mode 100644 index 000000000..53782e69b --- /dev/null +++ b/prometheus/requirements.yaml @@ -0,0 +1,18 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dependencies: + - name: helm-toolkit + repository: http://localhost:8879/charts + version: 0.1.0 diff --git a/prometheus/templates/bin/_helm-tests.sh.tpl b/prometheus/templates/bin/_helm-tests.sh.tpl new file mode 100644 index 000000000..1c9933e9a --- /dev/null +++ b/prometheus/templates/bin/_helm-tests.sh.tpl @@ -0,0 +1,59 @@ +#!/bin/bash +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + + +set -ex + +function endpoints_up () { + endpoints_result=$(curl "${PROMETHEUS_ENDPOINT}/api/v1/query?query=up" \ + | python -c "import sys, json; print json.load(sys.stdin)['status']") + if [ "$endpoints_result" = "success" ]; + then + echo "PASS: Endpoints successfully queried!" + else + echo "FAIL: Endpoints not queried!"; + exit 1; + fi +} + +function get_targets () { + targets_result=$(curl "${PROMETHEUS_ENDPOINT}/api/v1/targets" \ + | python -c "import sys, json; print json.load(sys.stdin)['status']") + if [ "$targets_result" = "success" ]; + then + echo "PASS: Targets successfully queried!" + else + echo "FAIL: Endpoints not queried!"; + exit 1; + fi +} + +function get_alertmanagers () { + alertmanager=$(curl "${PROMETHEUS_ENDPOINT}/api/v1/alertmanagers" \ + | python -c "import sys, json; print json.load(sys.stdin)['status']") + if [ "$alertmanager" = "success" ]; + then + echo "PASS: Alertmanager successfully queried!" + else + echo "FAIL: Alertmanager not queried!"; + exit 1; + fi +} + +endpoints_up +get_targets +get_alertmanagers diff --git a/prometheus/templates/bin/_prometheus.sh.tpl b/prometheus/templates/bin/_prometheus.sh.tpl new file mode 100644 index 000000000..2b95c973c --- /dev/null +++ b/prometheus/templates/bin/_prometheus.sh.tpl @@ -0,0 +1,38 @@ +#!/bin/sh + +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex +COMMAND="${@:-start}" + +function start () { + exec /bin/prometheus \ + -config.file=/etc/config/prometheus.yml \ + -alertmanager.url={{ tuple "alerts" "internal" "api" . | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" }} \ + -storage.local.path={{ .Values.conf.prometheus.storage.local.path }} \ + -storage.local.retention={{ .Values.conf.prometheus.storage.local.retention }} \ + -log.format={{ .Values.conf.prometheus.log.format | quote }} \ + -log.level={{ .Values.conf.prometheus.log.level | quote }} \ + -query.max-concurrency={{ .Values.conf.prometheus.query.max_concurrency }} \ + -query.timeout={{ .Values.conf.prometheus.query.timeout }} +} + +function stop () { + kill -TERM 1 +} + +$COMMAND diff --git a/prometheus/templates/clusterrole.yaml b/prometheus/templates/clusterrole.yaml new file mode 100644 index 000000000..6883aef35 --- /dev/null +++ b/prometheus/templates/clusterrole.yaml @@ -0,0 +1,46 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.clusterrole }} +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: prometheus-runner +rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - nonResourceURLs: + - "/metrics" + verbs: + - get +{{- end }} diff --git a/prometheus/templates/clusterrolebinding.yaml b/prometheus/templates/clusterrolebinding.yaml new file mode 100644 index 000000000..c59589ca4 --- /dev/null +++ b/prometheus/templates/clusterrolebinding.yaml @@ -0,0 +1,32 @@ + +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.clusterrolebinding }} +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: run-prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: prometheus-runner + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/prometheus/templates/configmap-bin.yaml b/prometheus/templates/configmap-bin.yaml new file mode 100644 index 000000000..8aaf24e49 --- /dev/null +++ b/prometheus/templates/configmap-bin.yaml @@ -0,0 +1,31 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_bin }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-bin +data: + prometheus.sh: | +{{ tuple "bin/_prometheus.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + helm-tests.sh: | +{{ tuple "bin/_helm-tests.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + image-repo-sync.sh: |+ +{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }} +{{- end }} diff --git a/prometheus/templates/configmap-etc.yaml b/prometheus/templates/configmap-etc.yaml new file mode 100644 index 000000000..29c472822 --- /dev/null +++ b/prometheus/templates/configmap-etc.yaml @@ -0,0 +1,27 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_etc }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-etc +data: + prometheus.yml: +{{- toYaml .Values.conf.prometheus.scrape_configs | indent 4 }} +{{- end }} diff --git a/prometheus/templates/configmap-rules.yaml b/prometheus/templates/configmap-rules.yaml new file mode 100644 index 000000000..d3ed93a02 --- /dev/null +++ b/prometheus/templates/configmap-rules.yaml @@ -0,0 +1,47 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.configmap_rules }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-rules +data: + alertmanager.rules: +{{ toYaml .Values.conf.prometheus.rules.alertmanager | indent 4 }} + etcd3.rules: +{{ toYaml .Values.conf.prometheus.rules.etcd3 | indent 4 }} + kube-apiserver.rules: +{{ toYaml .Values.conf.prometheus.rules.kube_apiserver | indent 4 }} + kube-controller-manager.rules: +{{ toYaml .Values.conf.prometheus.rules.kube_controller_manager | indent 4 }} + kubelet.rules: +{{ toYaml .Values.conf.prometheus.rules.kubelet | indent 4 }} + kubernetes.rules: +{{ toYaml .Values.conf.prometheus.rules.kubernetes | indent 4 }} + rabbitmq.rules: +{{ toYaml .Values.conf.prometheus.rules.rabbitmq | indent 4 }} + mysql.rules: +{{ toYaml .Values.conf.prometheus.rules.mysql | indent 4 }} + ceph.rules: +{{ toYaml .Values.conf.prometheus.rules.ceph | indent 4 }} + openstack.rules: +{{ toYaml .Values.conf.prometheus.rules.openstack | indent 4 }} + custom.rules: +{{ toYaml .Values.conf.prometheus.rules.custom | indent 4 }} +{{- end }} diff --git a/prometheus/templates/ingress-prometheus.yaml b/prometheus/templates/ingress-prometheus.yaml new file mode 100644 index 000000000..6a62a94ec --- /dev/null +++ b/prometheus/templates/ingress-prometheus.yaml @@ -0,0 +1,60 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.ingress_prometheus }} +{{- $envAll := . }} +{{- if .Values.network.prometheus.ingress.public }} +{{- $backendServiceType := "monitoring" }} +{{- $backendPort := "prom-metrics" }} +{{- $ingressName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $backendName := tuple $backendServiceType "internal" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $hostName := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +{{- $hostNameNamespaced := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_namespaced_endpoint_lookup" }} +{{- $hostNameFull := tuple $backendServiceType "public" $envAll | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }} +--- +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: {{ $ingressName }} + annotations: + kubernetes.io/ingress.class: "nginx" + ingress.kubernetes.io/rewrite-target: / + ingress.kubernetes.io/proxy-body-size: {{ .Values.network.prometheus.ingress.proxy_body_size }} +spec: + rules: +{{ if ne $hostNameNamespaced $hostNameFull }} +{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced $hostNameFull }} + - host: {{ $vHost }} + http: + paths: + - path: / + backend: + serviceName: {{ $backendName }} + servicePort: {{ $backendPort }} +{{- end }} +{{- else }} +{{- range $key1, $vHost := tuple $hostName $hostNameNamespaced }} + - host: {{ $vHost }} + http: + paths: + - path: / + backend: + serviceName: {{ $backendName }} + servicePort: {{ $backendPort }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/prometheus/templates/job-image-repo-sync.yaml b/prometheus/templates/job-image-repo-sync.yaml new file mode 100644 index 000000000..57c58f830 --- /dev/null +++ b/prometheus/templates/job-image-repo-sync.yaml @@ -0,0 +1,65 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.job_image_repo_sync }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.image_repo_sync -}} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: prometheus-image-repo-sync +spec: + template: + metadata: + labels: +{{ tuple $envAll "prometheus" "image-repo-sync" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + spec: + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: image-repo-sync +{{ tuple $envAll "image_repo_sync" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.image_repo_sync | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + env: + - name: LOCAL_REPO + value: "{{ tuple "local_image_registry" "node" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}:{{ tuple "local_image_registry" "node" "registry" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}" + - name: IMAGE_SYNC_LIST + value: "{{ include "helm-toolkit.utils.image_sync_list" . }}" + command: + - /tmp/image-repo-sync.sh + volumeMounts: + - name: prometheus-bin + mountPath: /tmp/image-repo-sync.sh + subPath: image-repo-sync.sh + readOnly: true + - name: docker-socket + mountPath: /var/run/docker.sock + volumes: + - name: prometheus-bin + configMap: + name: prometheus-bin + defaultMode: 0555 + - name: docker-socket + hostPath: + path: /var/run/docker.sock +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }} +{{- end }} +{{- end }} diff --git a/prometheus/templates/pod-helm-tests.yaml b/prometheus/templates/pod-helm-tests.yaml new file mode 100644 index 000000000..96a717519 --- /dev/null +++ b/prometheus/templates/pod-helm-tests.yaml @@ -0,0 +1,46 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.helm_tests }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: Pod +metadata: + name: "{{.Release.Name}}-test" + annotations: + "helm.sh/hook": test-success +spec: + restartPolicy: Never + containers: + - name: {{.Release.Name}}-helm-tests +{{ tuple $envAll "helm_tests" | include "helm-toolkit.snippets.image" | indent 6 }} + command: + - /tmp/helm-tests.sh + env: + - name: PROMETHEUS_ENDPOINT + value: {{ tuple "monitoring" "internal" "api" $envAll | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" }} + volumeMounts: + - name: prometheus-bin + mountPath: /tmp/helm-tests.sh + subPath: helm-tests.sh + readOnly: true + volumes: + - name: prometheus-bin + configMap: + name: prometheus-bin + defaultMode: 0555 +{{- end }} diff --git a/prometheus/templates/pvc.yaml b/prometheus/templates/pvc.yaml new file mode 100644 index 000000000..7bf281b8d --- /dev/null +++ b/prometheus/templates/pvc.yaml @@ -0,0 +1,31 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.pvc }} +{{- $envAll := . }} +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ .Values.storage.pvc.name }} +spec: + accessModes: + - {{ .Values.storage.pvc.access_mode }} + resources: + requests: + storage: {{ .Values.storage.requests.storage }} + storageClassName: {{ .Values.storage.storage_class }} +{{- end }} diff --git a/prometheus/templates/rbac-entrypoint.yaml b/prometheus/templates/rbac-entrypoint.yaml new file mode 100644 index 000000000..64d1b45ab --- /dev/null +++ b/prometheus/templates/rbac-entrypoint.yaml @@ -0,0 +1,20 @@ + +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.rbac_entrypoint }} +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_rbac"}} +{{- end }} diff --git a/prometheus/templates/service-ingress-prometheus.yaml b/prometheus/templates/service-ingress-prometheus.yaml new file mode 100644 index 000000000..62bc2511b --- /dev/null +++ b/prometheus/templates/service-ingress-prometheus.yaml @@ -0,0 +1,32 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service_ingress_prometheus }} +{{- if .Values.network.prometheus.ingress.public }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "monitoring" "public" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +spec: + ports: + - name: http + port: 80 + selector: + app: ingress-api +{{- end }} +{{- end }} diff --git a/prometheus/templates/service.yaml b/prometheus/templates/service.yaml new file mode 100644 index 000000000..b28de8f95 --- /dev/null +++ b/prometheus/templates/service.yaml @@ -0,0 +1,39 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.service }} +{{- $envAll := . }} +{{- $endpoint := $envAll.Values.endpoints.monitoring }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "monitoring" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} + annotations: +{{ tuple $endpoint $envAll | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }} +spec: + ports: + - name: prom-metrics + port: {{ .Values.network.prometheus.port }} + {{ if .Values.network.prometheus.node_port.enabled }} + nodePort: {{ .Values.network.prometheus.node_port.port }} + {{ end }} + selector: +{{ tuple $envAll "prometheus" "api" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} + {{ if .Values.network.prometheus.node_port.enabled }} + type: NodePort + {{ end }} +{{- end }} diff --git a/prometheus/templates/serviceaccount.yaml b/prometheus/templates/serviceaccount.yaml new file mode 100644 index 000000000..dd8d7fef6 --- /dev/null +++ b/prometheus/templates/serviceaccount.yaml @@ -0,0 +1,22 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.serviceaccount }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus +{{- end }} diff --git a/prometheus/templates/statefulset.yaml b/prometheus/templates/statefulset.yaml new file mode 100644 index 000000000..3dda9d4f8 --- /dev/null +++ b/prometheus/templates/statefulset.yaml @@ -0,0 +1,158 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.statefulset_prometheus }} +{{- $envAll := . }} +{{- if .Values.images.local_registry.active -}} +{{- $_ := set .Values "pod_dependency" (merge .Values.dependencies.prometheus .Values.conditional_dependencies.local_image_registry) -}} +{{- else -}} +{{- $_ := set .Values "pod_dependency" .Values.dependencies.prometheus -}} +{{- end -}} +{{- $mounts_prometheus := .Values.pod.mounts.prometheus.prometheus }} +{{- $mounts_prometheus_init := .Values.pod.mounts.prometheus.init_container }} +--- +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: prometheus +spec: + serviceName: {{ tuple "monitoring" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} + replicas: {{ .Values.pod.replicas.prometheus }} + template: + metadata: + labels: +{{ tuple $envAll "prometheus" "api" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + annotations: + configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} + configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }} + configmap-rules-hash: {{ tuple "configmap-rules.yaml" . | include "helm-toolkit.utils.hash" }} + spec: + serviceAccount: prometheus + affinity: +{{ tuple $envAll "prometheus" "api" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }} + nodeSelector: + {{ .Values.labels.node_selector_key }}: {{ .Values.labels.node_selector_value }} + terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.prometheus.timeout | default "30" }} + initContainers: +{{ tuple $envAll .Values.pod_dependency list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + containers: + - name: prometheus +{{ tuple $envAll "prometheus" | include "helm-toolkit.snippets.image" | indent 10 }} + command: + - /tmp/prometheus.sh + - start + lifecycle: + preStop: + exec: + command: + - /tmp/prometheus.sh + - stop +{{ tuple $envAll $envAll.Values.pod.resources.prometheus | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + ports: + - name: prom-metrics + containerPort: {{ .Values.network.prometheus.port }} + readinessProbe: + httpGet: + path: /status + port: {{ .Values.network.prometheus.port }} + initialDelaySeconds: 30 + timeoutSeconds: 30 + volumeMounts: + - name: etcprometheus + mountPath: /etc/config + - name: rulesprometheus + mountPath: /etc/config/rules + - name: prometheus-rules + mountPath: /etc/config/rules/alertmanager.rules + subPath: alertmanager.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/etcd3.rules + subPath: etcd3.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/kubernetes.rules + subPath: kubernetes.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/kube-apiserver.rules + subPath: kube-apiserver.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/kube-controller-manager.rules + subPath: kube-controller-manager.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/kubelet.rules + subPath: kubelet.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/rabbitmq.rules + subPath: rabbitmq.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/mysql.rules + subPath: mysql.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/ceph.rules + subPath: ceph.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/openstack.rules + subPath: openstack.rules + readOnly: true + - name: prometheus-rules + mountPath: /etc/config/rules/custom.rules + subPath: custom.rules + readOnly: true + - name: prometheus-etc + mountPath: /etc/config/prometheus.yml + subPath: prometheus.yml + readOnly: true + - name: prometheus-bin + mountPath: /tmp/prometheus.sh + subPath: prometheus.sh + readOnly: true + - name: storage + mountPath: /var/lib/prometheus/data +{{ if $mounts_prometheus.volumeMounts }}{{ toYaml $mounts_prometheus.volumeMounts | indent 12 }}{{ end }} + volumes: +{{ tuple . | include "helm-toolkit.snippets.kubernetes_entrypoint_secret_mount" | indent 8 }} + - name: etcprometheus + emptyDir: {} + - name: rulesprometheus + emptyDir: {} + - name: prometheus-rules + configMap: + name: prometheus-rules + - name: prometheus-etc + configMap: + name: prometheus-etc + - name: prometheus-bin + configMap: + name: prometheus-bin + defaultMode: 0555 + {{- if .Values.storage.enabled }} + - name: storage + persistentVolumeClaim: + claimName: {{ .Values.storage.pvc.name }} + {{- else }} + - name: storage + emptyDir: {} + {{- end }} +{{ if $mounts_prometheus.volumes }}{{ toYaml $mounts_prometheus.volumes | indent 8 }}{{ end }} +{{- end }} diff --git a/prometheus/values.yaml b/prometheus/values.yaml new file mode 100644 index 000000000..d6eec3c52 --- /dev/null +++ b/prometheus/values.yaml @@ -0,0 +1,907 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for prometheus. +# This is a YAML-formatted file. +# Declare name/value pairs to be passed into your templates. +# name: value + +images: + tags: + prometheus: docker.io/prom/prometheus:v1.7.1 + helm_tests: docker.io/kolla/ubuntu-source-kolla-toolbox:3.0.3 + dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1 + image_repo_sync: docker.io/docker:17.07.0 + pull_policy: IfNotPresent + local_registry: + active: false + exclude: + - dep_check + - image_repo_sync + +labels: + node_selector_key: openstack-control-plane + node_selector_value: enabled + +pod: + affinity: + anti: + type: + default: preferredDuringSchedulingIgnoredDuringExecution + topologyKey: + default: kubernetes.io/hostname + mounts: + prometheus: + prometheus: + init_container: null + replicas: + prometheus: 1 + lifecycle: + upgrades: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 1 + max_surge: 3 + termination_grace_period: + prometheus: + timeout: 30 + resources: + enabled: false + prometheus: + limits: + memory: "1024Mi" + cpu: "2000m" + requests: + memory: "128Mi" + cpu: "500m" + +endpoints: + cluster_domain_suffix: cluster.local + local_image_registry: + name: docker-registry + namespace: docker-registry + hosts: + default: localhost + internal: docker-registry + node: localhost + host_fqdn_override: + default: null + port: + registry: + node: 5000 + monitoring: + name: prometheus + namespace: null + hosts: + default: prom-metrics + public: prometheus + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + api: + default: 9090 + public: 80 + scrape: true + scrape_port: 9090 + alerts: + name: alertmanager + namespace: null + hosts: + default: alerts-api + public: alertmanager + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + api: + default: 9093 + public: 80 + +dependencies: + prometheus: + services: null + image_repo_sync: + services: + - service: local_image_registry + endpoint: internal + +conditional_dependencies: + local_image_registry: + jobs: + - prometheus-image-repo-sync + services: + - service: local_image_registry + endpoint: node + +network: + prometheus: + ingress: + public: true + proxy_body_size: 1024M + node_port: + enabled: false + port: 30900 + port: 9090 + +storage: + enabled: true + pvc: + name: prometheus-pvc + access_mode: ReadWriteMany + requests: + storage: 5Gi + storage_class: general + +manifests: + clusterrole: true + clusterrolebinding: true + configmap_bin: true + configmap_etc: true + configmap_rules: true + ingress_prometheus: true + helm_tests: true + job_image_repo_sync: true + pvc: true + rbac_entrypoint: true + service_ingress_prometheus: true + service: true + serviceaccount: true + statefulset_prometheus: true + +conf: + prometheus: + storage: + local: + path: /var/lib/prometheus/data + retention: 168h0m0s + log: + format: logger:stdout?json=true + level: info + query: + max_concurrency: 20 + timeout: 2m0s + scrape_configs: | + global: + scrape_interval: 25s + evaluation_interval: 10s + rule_files: + - /etc/config/rules/alertmanager.rules + - /etc/config/rules/etcd3.rules + - /etc/config/rules/kubernetes.rules + - /etc/config/rules/kube-apiserver.rules + - /etc/config/rules/kube-controller-manager.rules + - /etc/config/rules/kubelet.rules + - /etc/config/rules/kube-scheduler.rules + - /etc/config/rules/rabbitmq.rules + - /etc/config/rules/mysql.rules + - /etc/config/rules/ceph.rules + - /etc/config/rules/openstack.rules + - /etc/config/rules/custom.rules + scrape_configs: + - job_name: kubelet + scheme: https + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + scrape_interval: 45s + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + - source_labels: [__meta_kubernetes_node_name] + action: replace + target_label: kubernetes_io_hostname + # Scrape config for Kubelet cAdvisor. + # + # This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics + # (those whose names begin with 'container_') have been removed from the + # Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to + # retrieve those metrics. + # + # In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor + # HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics" + # in that case (and ensure cAdvisor's HTTP server hasn't been disabled with + # the --cadvisor-port=0 Kubelet flag). + # + # This job is not necessary and should be removed in Kubernetes 1.6 and + # earlier versions, or it will cause the metrics to be scraped twice. + - job_name: 'kubernetes-cadvisor' + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + scrape_interval: 45s + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + - source_labels: [__meta_kubernetes_node_name] + action: replace + target_label: kubernetes_io_hostname + metric_relabel_configs: + - action: replace + source_labels: [id] + regex: '^/machine\.slice/machine-rkt\\x2d([^\\]+)\\.+/([^/]+)\.service$' + target_label: rkt_container_name + replacement: '${2}-${1}' + - action: replace + source_labels: [id] + regex: '^/system\.slice/(.+)\.service$' + target_label: systemd_service_name + replacement: '${1}' + # Scrape config for API servers. + # + # Kubernetes exposes API servers as endpoints to the default/kubernetes + # service so this uses `endpoints` role and uses relabelling to only keep + # the endpoints associated with the default/kubernetes service using the + # default named port `https`. This works for single API server deployments as + # well as HA API server deployments. + - job_name: 'apiserver' + kubernetes_sd_configs: + - role: endpoints + scrape_interval: 45s + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # If your node certificates are self-signed or use a different CA to the + # master CA, then disable certificate verification below. Note that + # certificate verification is an integral part of a secure infrastructure + # so this should only be disabled in a controlled environment. You can + # disable certificate verification by uncommenting the line below. + # + # insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + # Keep only the default/kubernetes service endpoints for the https port. This + # will add targets for each API server which Kubernetes adds an endpoint to + # the default/kubernetes service. + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + # Scrape config for service endpoints. + # + # The relabeling allows the actual service scrape endpoint to be configured + # via the following annotations: + # + # * `prometheus.io/scrape`: Only scrape services that have a value of `true` + # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need + # to set this to `https` & most likely set the `tls_config` of the scrape config. + # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. + # * `prometheus.io/port`: If the metrics are exposed on a different port to the + # service then set this appropriately. + - job_name: 'kubernetes-service-endpoints' + kubernetes_sd_configs: + - role: endpoints + scrape_interval: 60s + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + - source_labels: + - __meta_kubernetes_service_name + target_label: job + replacement: ${1} + - job_name: calico-etcd + honor_labels: false + kubernetes_sd_configs: + - role: service + scrape_interval: 20s + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - action: keep + source_labels: + - __meta_kubernetes_service_name + regex: "calico-etcd" + - action: keep + source_labels: + - __meta_kubernetes_namespace + regex: kube-system + target_label: namespace + - source_labels: + - __meta_kubernetes_pod_name + target_label: pod + - source_labels: + - __meta_kubernetes_service_name + target_label: service + - source_labels: + - __meta_kubernetes_service_name + target_label: job + replacement: ${1} + - source_labels: + - __meta_kubernetes_service_label + target_label: job + regex: calico-etcd + replacement: ${1} + - target_label: endpoint + replacement: "calico-etcd" + alerting: + alertmanagers: + - kubernetes_sd_configs: + - role: endpoints + scheme: http + relabel_configs: + - action: keep + source_labels: + - __meta_kubernetes_service_name + regex: alerts-api + - action: keep + source_labels: + - __meta_kubernetes_namespace + regex: monitoring + - action: keep + source_labels: + - __meta_kubernetes_endpoint_port_name + regex: alerts-api + rules: + alertmanager: |- + ALERT AlertmanagerConfigInconsistent + IF count_values by (service) ("config_hash", alertmanager_config_hash) + / on(service) group_left + label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1 + FOR 5m + LABELS { + severity = "critical" + } + ANNOTATIONS { + summary = "Alertmanager configurations are inconsistent", + description = "The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync." + } + + ALERT AlertmanagerDownOrMissing + IF label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", "alertmanager", "(.*)") + / on(job) group_right + sum by(job) (up) != 1 + FOR 5m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "Alertmanager down or not discovered", + description = "An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery." + } + + ALERT FailedReload + IF alertmanager_config_last_reload_successful == 0 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "Alertmanager configuration reload has failed", + description = "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}." + } + etcd3: |- + # general cluster availability + # alert if another failed member will result in an unavailable cluster + ALERT InsufficientMembers + + IF count(up{job="etcd"} == 0) > (count(up{job="etcd"}) / 2 - 1) + FOR 3m + LABELS { + severity = "critical" + } + ANNOTATIONS { + summary = "etcd cluster insufficient members", + description = "If one more etcd member goes down the cluster will be unavailable", + } + + # etcd leader alerts + # ================== + # alert if any etcd instance has no leader + ALERT NoLeader + IF etcd_server_has_leader{job="etcd"} == 0 + FOR 1m + LABELS { + severity = "critical" + } + ANNOTATIONS { + summary = "etcd member has no leader", + description = "etcd member {{ $labels.instance }} has no leader", + } + + # alert if there are lots of leader changes + ALERT HighNumberOfLeaderChanges + IF increase(etcd_server_leader_changes_seen_total{job="etcd"}[1h]) > 3 + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "a high number of leader changes within the etcd cluster are happening", + description = "etcd instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour", + } + + # gRPC request alerts + # =================== + # alert if more than 1% of gRPC method calls have failed within the last 5 minutes + ALERT HighNumberOfFailedGRPCRequests + IF sum by(grpc_method) (rate(etcd_grpc_requests_failed_total{job="etcd"}[5m])) + / sum by(grpc_method) (rate(etcd_grpc_total{job="etcd"}[5m])) > 0.01 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "a high number of gRPC requests are failing", + description = "{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}", + } + + # alert if more than 5% of gRPC method calls have failed within the last 5 minutes + ALERT HighNumberOfFailedGRPCRequests + IF sum by(grpc_method) (rate(etcd_grpc_requests_failed_total{job="etcd"}[5m])) + / sum by(grpc_method) (rate(etcd_grpc_total{job="etcd"}[5m])) > 0.05 + FOR 5m + LABELS { + severity = "critical" + } + ANNOTATIONS { + summary = "a high number of gRPC requests are failing", + description = "{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}", + } + + # alert if the 99th percentile of gRPC method calls take more than 150ms + ALERT GRPCRequestsSlow + IF histogram_quantile(0.99, rate(etcd_grpc_unary_requests_duration_seconds_bucket[5m])) > 0.15 + FOR 10m + LABELS { + severity = "critical" + } + ANNOTATIONS { + summary = "slow gRPC requests", + description = "on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method }} are slow", + } + + # HTTP requests alerts + # ==================== + # alert if more than 1% of requests to an HTTP endpoint have failed within the last 5 minutes + ALERT HighNumberOfFailedHTTPRequests + IF sum by(method) (rate(etcd_http_failed_total{job="etcd"}[5m])) + / sum by(method) (rate(etcd_http_received_total{job="etcd"}[5m])) > 0.01 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "a high number of HTTP requests are failing", + description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}", + } + + # alert if more than 5% of requests to an HTTP endpoint have failed within the last 5 minutes + ALERT HighNumberOfFailedHTTPRequests + IF sum by(method) (rate(etcd_http_failed_total{job="etcd"}[5m])) + / sum by(method) (rate(etcd_http_received_total{job="etcd"}[5m])) > 0.05 + FOR 5m + LABELS { + severity = "critical" + } + ANNOTATIONS { + summary = "a high number of HTTP requests are failing", + description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}", + } + + # alert if the 99th percentile of HTTP requests take more than 150ms + ALERT HTTPRequestsSlow + IF histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) > 0.15 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "slow HTTP requests", + description = "on etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow", + } + + # etcd member communication alerts + # ================================ + # alert if 99th percentile of round trips take 150ms + ALERT EtcdMemberCommunicationSlow + IF histogram_quantile(0.99, rate(etcd_network_member_round_trip_time_seconds_bucket[5m])) > 0.15 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "etcd member communication is slow", + description = "etcd instance {{ $labels.instance }} member communication with {{ $labels.To }} is slow", + } + + # etcd proposal alerts + # ==================== + # alert if there are several failed proposals within an hour + ALERT HighNumberOfFailedProposals + IF increase(etcd_server_proposals_failed_total{job="etcd"}[1h]) > 5 + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "a high number of proposals within the etcd cluster are failing", + description = "etcd instance {{ $labels.instance }} has seen {{ $value }} proposal failures within the last hour", + } + + # etcd disk io latency alerts + # =========================== + # alert if 99th percentile of fsync durations is higher than 500ms + ALERT HighFsyncDurations + IF histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) > 0.5 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "high fsync durations", + description = "etcd instance {{ $labels.instance }} fync durations are high", + } + + # alert if 99th percentile of commit durations is higher than 250ms + ALERT HighCommitDurations + IF histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) > 0.25 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "high commit durations", + description = "etcd instance {{ $labels.instance }} commit durations are high", + } + kube_apiserver: |- + ALERT K8SApiserverDown + IF absent(up{job="apiserver"} == 1) + FOR 5m + LABELS { + severity = "critical" + } + ANNOTATIONS { + summary = "API server unreachable", + description = "Prometheus failed to scrape API server(s), or all API servers have disappeared from service discovery.", + } + + # Some verbs excluded because they are expected to be long-lasting: + # WATCHLIST is long-poll, CONNECT is `kubectl exec`. + # + # apiserver_request_latencies' unit is microseconds + ALERT K8SApiServerLatency + IF histogram_quantile( + 0.99, + sum without (instance,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH|PROXY"}) + ) / 1e6 > 1.0 + FOR 10m + LABELS { + severity = "warning" + } + ANNOTATIONS { + summary = "Kubernetes apiserver latency is high", + description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.", + } + + kube_controller_manager: |- + ALERT K8SControllerManagerDown + IF absent(up{job="kube-controller-manager"} == 1) + FOR 5m + LABELS { + severity = "critical", + } + ANNOTATIONS { + summary = "Controller manager is down", + description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.", + runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager", + } + + kubelet: |- + ALERT K8SNodeNotReady + IF kube_node_status_ready{condition="true"} == 0 + FOR 1h + LABELS { + severity = "warning", + } + ANNOTATIONS { + summary = "Node status is NotReady", + description = "The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than an hour", + } + + ALERT K8SManyNodesNotReady + IF + count(kube_node_status_ready{condition="true"} == 0) > 1 + AND + ( + count(kube_node_status_ready{condition="true"} == 0) + / + count(kube_node_status_ready{condition="true"}) + ) > 0.2 + FOR 1m + LABELS { + severity = "critical", + } + ANNOTATIONS { + summary = "Many Kubernetes nodes are Not Ready", + description = "{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).", + } + + ALERT K8SKubeletDown + IF count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03 + FOR 1h + LABELS { + severity = "warning", + } + ANNOTATIONS { + summary = "Many Kubelets cannot be scraped", + description = "Prometheus failed to scrape {{ $value }}% of kubelets.", + } + + ALERT K8SKubeletDown + IF absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1 + FOR 1h + LABELS { + severity = "critical", + } + ANNOTATIONS { + summary = "Many Kubelets cannot be scraped", + description = "Prometheus failed to scrape {{ $value }}% of kubelets, or all Kubelets have disappeared from service discovery.", + } + + ALERT K8SKubeletTooManyPods + IF kubelet_running_pod_count > 100 + LABELS { + severity = "warning", + } + ANNOTATIONS { + summary = "Kubelet is close to pod limit", + description = "Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110", + } + + kubernetes: |- + # NOTE: These rules were kindly contributed by the SoundCloud engineering team. + + ### Container resources ### + + cluster_namespace_controller_pod_container:spec_memory_limit_bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_spec_memory_limit_bytes{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:spec_cpu_shares = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_spec_cpu_shares{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:cpu_usage:rate = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + irate( + container_cpu_usage_seconds_total{container_name!=""}[5m] + ), + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:memory_usage:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_memory_usage_bytes{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:memory_working_set:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_memory_working_set_bytes{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:memory_rss:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_memory_rss{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:memory_cache:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_memory_cache{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:disk_usage:bytes = + sum by (cluster,namespace,controller,pod_name,container_name) ( + label_replace( + container_disk_usage_bytes{container_name!=""}, + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:memory_pagefaults:rate = + sum by (cluster,namespace,controller,pod_name,container_name,scope,type) ( + label_replace( + irate( + container_memory_failures_total{container_name!=""}[5m] + ), + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + cluster_namespace_controller_pod_container:memory_oom:rate = + sum by (cluster,namespace,controller,pod_name,container_name,scope,type) ( + label_replace( + irate( + container_memory_failcnt{container_name!=""}[5m] + ), + "controller", "$1", + "pod_name", "^(.*)-[a-z0-9]+" + ) + ) + + ### Cluster resources ### + + cluster:memory_allocation:percent = + 100 * sum by (cluster) ( + container_spec_memory_limit_bytes{pod_name!=""} + ) / sum by (cluster) ( + machine_memory_bytes + ) + + cluster:memory_used:percent = + 100 * sum by (cluster) ( + container_memory_usage_bytes{pod_name!=""} + ) / sum by (cluster) ( + machine_memory_bytes + ) + + cluster:cpu_allocation:percent = + 100 * sum by (cluster) ( + container_spec_cpu_shares{pod_name!=""} + ) / sum by (cluster) ( + container_spec_cpu_shares{id="/"} * on(cluster,instance) machine_cpu_cores + ) + + cluster:node_cpu_use:percent = + 100 * sum by (cluster) ( + rate(node_cpu{mode!="idle"}[5m]) + ) / sum by (cluster) ( + machine_cpu_cores + ) + + ### API latency ### + + # Raw metrics are in microseconds. Convert to seconds. + cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.99"} = + histogram_quantile( + 0.99, + sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket) + ) / 1e6 + cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.9"} = + histogram_quantile( + 0.9, + sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket) + ) / 1e6 + cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.5"} = + histogram_quantile( + 0.5, + sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket) + ) / 1e6 + + ### Scheduling latency ### + + cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.99"} = + histogram_quantile(0.99,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6 + cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.9"} = + histogram_quantile(0.9,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6 + cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.5"} = + histogram_quantile(0.5,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6 + + cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.99"} = + histogram_quantile(0.99,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6 + cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.9"} = + histogram_quantile(0.9,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6 + cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.5"} = + histogram_quantile(0.5,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6 + + cluster:scheduler_binding_latency:quantile_seconds{quantile="0.99"} = + histogram_quantile(0.99,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6 + cluster:scheduler_binding_latency:quantile_seconds{quantile="0.9"} = + histogram_quantile(0.9,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6 + cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} = + histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6 + rabbitmq: |- + + mysql: |- + + ceph: |- + + openstack: |- + + custom: |- diff --git a/tools/gate/chart-deploys/default.yaml b/tools/gate/chart-deploys/default.yaml index 4987ac4ef..d2d3b7f8d 100644 --- a/tools/gate/chart-deploys/default.yaml +++ b/tools/gate/chart-deploys/default.yaml @@ -20,6 +20,14 @@ chart_groups: - docker_registry_redis - docker_registry + - name: infra_monitoring + timeout: 600 + charts: + - prometheus + - node_exporter + - kube_state_metrics + - alertmanager + charts: docker_registry_nfs_provisioner: chart_name: nfs-provisioner @@ -59,3 +67,58 @@ charts: node_selector_value: primary volume: class_name: openstack-helm-bootstrap + + prometheus: + chart_name: prometheus + release: prometheus + namespace: openstack + timeout: 300 + test: + enabled: true + timeout: 300 + output: false + values: + storage: + enabled: false + manifests: + pvc: false + network: + prometheus: + ingress: + public: false + + kube_state_metrics: + chart_name: kube-state-metrics + release: prometheus-kube-metrics + namespace: kube-system + test: + enabled: false + timeout: 300 + output: false + + node_exporter: + chart_name: node-exporter + release: prometheus-node-exporter + namespace: kube-system + test: + enabled: false + timeout: 300 + output: false + + alertmanager: + chart_name: alertmanager + release: prometheus-alertmanager + namespace: openstack + test: + enabled: false + timeout: 300 + output: false + values: + storage: + enabled: false + manifests: + pvc: false + network: + alertmanager: + ingress: + public: false