From e0c4469fdfc5e8b731230a0c0a747a16a54d1708 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Wed, 21 Feb 2018 08:57:15 -0600 Subject: [PATCH] Prometheus: Update Alertmanager discovery, fix rules entry Updates the service discovery mechanism used by Prometheus to identify Alertmanager instances to push alerts to. It moves to use the 'application' label to identify Alertmanager pods instead of searching for pods by the label 'name', as the previous definition was resulting in empty results for Alertmanager targets This also fixes the name of the prometheus label used to track alerts for kube-controller-manager, as it was defined incorrect previously Change-Id: I1fb194550baf803435722e3a01892e49b44259d1 --- prometheus/values.yaml | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/prometheus/values.yaml b/prometheus/values.yaml index 0e73d90f8..126683542 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -120,8 +120,9 @@ endpoints: name: alertmanager namespace: null hosts: - default: alerts-api + default: alerts-engine public: alertmanager + discovery: alertmanager-discovery host_fqdn_override: default: null path: @@ -132,6 +133,8 @@ endpoints: api: default: 9093 public: 80 + mesh: + default: 6783 dependencies: dynamic: @@ -452,20 +455,23 @@ conf: alerting: alertmanagers: - kubernetes_sd_configs: - - role: pod + - role: pod + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - - source_labels: - - __meta_kubernetes_pod_label_name + - source_labels: [__meta_kubernetes_pod_label_application] regex: alertmanager action: keep - - source_labels: - - __meta_kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_container_port_name] + regex: alerts-api + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_name] + regex: peer-mesh + action: drop + - source_labels: [__meta_kubernetes_namespace] regex: openstack action: keep - - source_labels: - - __meta_kubernetes_pod_container_port_number - regex: - action: drop rules: alertmanager: groups: @@ -626,7 +632,7 @@ conf: - name: kube-controller-manager.rules rules: - alert: K8SControllerManagerDown - expr: absent(up{job="kube-controller-manager"} == 1) + expr: absent(up{job="kube-controller-manager-discovery"} == 1) for: 5m labels: severity: critical