Revert "Prometheus: Render Rules as Templates"

This reverts commit fb7fc87d237ce569666f7bd041adea6007549138. I first submitted that as a way to add dynamic capability to the prometheus rules (they infamously don't support ENV variable substitution there). However this be done easily with another solution, and would clean up the prometheus chart values significantly. Change-Id: Ibec512d92490798ae5522468b915b49e7746806a
2020-10-06 10:05:59 +00:00 · 2020-10-06 10:05:59 +00:00 · cdd0f33d0c
commit cdd0f33d0c
parent f4bdb713c1
9 changed files with 242 additions and 224 deletions
--- a/prometheus/Chart.yaml
+++ b/prometheus/Chart.yaml
@ -15,7 +15,7 @@ apiVersion: v1
 appVersion: v2.12.0
 description: OpenStack-Helm Prometheus
 name: prometheus
-version: 0.1.2
+version: 0.1.3
 home: https://prometheus.io/
 sources:
  - https://github.com/prometheus/prometheus
--- a/prometheus/templates/configmap-etc.yaml
+++ b/prometheus/templates/configmap-etc.yaml
@ -22,9 +22,8 @@ metadata:
 type: Opaque
 data:
 {{- include "helm-toolkit.snippets.values_template_renderer" (dict "envAll" $envAll "template" .Values.conf.prometheus.scrape_configs.template "key" "prometheus.yml" "format" "Secret") | indent 2 }}
-{{ range $name, $config := .Values.conf.prometheus.rules }}
-{{- $filename := printf "%s.rules" $name}}
-{{- include "helm-toolkit.snippets.values_template_renderer" (dict "envAll" $envAll "template" $config "key" $filename "format" "Secret") | indent 2 }}
+{{ range $key, $value := .Values.conf.prometheus.rules }}
+  {{ $key }}.rules: {{ toYaml $value | b64enc }}
 {{ end }}
  # NOTE(srwilkers): this must be last, to work round helm ~2.7 bug.
 {{- include "helm-toolkit.snippets.values_template_renderer" (dict "envAll" $envAll "template" .Values.conf.httpd "key" "httpd.conf" "format" "Secret") | indent 2 }}
--- a/prometheus/values_overrides/alertmanager.yaml
+++ b/prometheus/values_overrides/alertmanager.yaml
@ -12,7 +12,7 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`The configuration of the instances of the Alertmanager cluster {{$labels.service}} are out of sync.`}}"
+              description: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.
              summary: Alertmanager configurations are inconsistent
          - alert: AlertmanagerDownOrMissing
            expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
@ -20,7 +20,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery.`}}"
+              description: An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery.
              summary: Alertmanager down or not discovered
          - alert: FailedReload
            expr: alertmanager_config_last_reload_successful == 0
@ -28,6 +28,6 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod }}.`}}"
+              description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod }}.
              summary: Alertmanager configuration reload has failed
 ...
--- a/prometheus/values_overrides/ceph.yaml
+++ b/prometheus/values_overrides/ceph.yaml
@ -29,56 +29,56 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`no ceph active mgr is present or all ceph mgr are down`}}"
-              summary: "{{`no ceph active mgt is present`}}"
+              description: 'no ceph active mgr is present or all ceph mgr are down'
+              summary: 'no ceph active mgt is present'
          - alert: ceph_monitor_quorum_low
            expr: ceph_mon_quorum_count < 3
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`ceph monitor quorum has been less than 3 for more than 5 minutes`}}"
-              summary: "{{`ceph high availability is at risk`}}"
+              description: 'ceph monitor quorum has been less than 3 for more than 5 minutes'
+              summary: 'ceph high availability is at risk'
          - alert: ceph_monitor_quorum_absent
            expr: absent(avg_over_time(ceph_mon_quorum_status[5m]))
            labels:
              severity: page
            annotations:
-              description: "{{`ceph monitor quorum has been gone for more than 5 minutes`}}"
-              summary: "{{`ceph high availability is at risk`}}"
+              description: 'ceph monitor quorum has been gone for more than 5 minutes'
+              summary: 'ceph high availability is at risk'
          - alert: ceph_cluster_usage_high
            expr: avg_over_time(ceph_cluster_usage_percent[5m]) > 80
            labels:
              severity: page
            annotations:
-              description: "{{`ceph cluster capacity usage more than 80 percent`}}"
-              summary: "{{`ceph cluster usage is more than 80 percent`}}"
+              description: 'ceph cluster capacity usage more than 80 percent'
+              summary: 'ceph cluster usage is more than 80 percent'
          - alert: ceph_placement_group_degrade_pct_high
            expr: avg_over_time(ceph_placement_group_degrade_percent[5m]) > 80
            labels:
              severity: critical
            annotations:
-              description: "{{`ceph placement group degradation is more than 80 percent`}}"
-              summary: "{{`ceph placement groups degraded`}}"
+              description: 'ceph placement group degradation is more than 80 percent'
+              summary: 'ceph placement groups degraded'
          - alert: ceph_osd_down_pct_high
            expr: avg_over_time(ceph_osd_down_percent[5m]) > 80
            labels:
              severity: critical
            annotations:
-              description: "{{`ceph OSDs down percent is more than 80 percent`}}"
-              summary: "{{`ceph OSDs down percent is high`}}"
+              description: 'ceph OSDs down percent is more than 80 percent'
+              summary: 'ceph OSDs down percent is high'
          - alert: ceph_osd_down
            expr: avg_over_time(ceph_osd_up[5m]) == 0
            labels:
              severity: critical
            annotations:
-              description: "{{`ceph OSD {{ $labels.ceph_daemon }} is down in instance {{ $labels.instance }}.`}}"
-              summary: "{{`ceph OSD {{ $labels.ceph_daemon }} is down in instance {{ $labels.instance }}.`}}"
+              description: 'ceph OSD {{ $labels.ceph_daemon }} is down in instance {{ $labels.instance }}.'
+              summary: 'ceph OSD {{ $labels.ceph_daemon }} is down in instance {{ $labels.instance }}.'
          - alert: ceph_osd_out
            expr: avg_over_time(ceph_osd_in[5m]) == 0
            labels:
              severity: page
            annotations:
-              description: "{{`ceph OSD {{ $labels.ceph_daemon }} is out in instance {{ $labels.instance }}.`}}"
-              summary: "{{`ceph OSD {{ $labels.ceph_daemon }} is out in instance {{ $labels.instance }}.`}}"
+              description: 'ceph OSD {{ $labels.ceph_daemon }} is out in instance {{ $labels.instance }}.'
+              summary: 'ceph OSD {{ $labels.ceph_daemon }} is out in instance {{ $labels.instance }}.'
 ...
--- a/prometheus/values_overrides/elasticsearch.yaml
+++ b/prometheus/values_overrides/elasticsearch.yaml
@ -20,72 +20,72 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Elasticsearch at {{ $labels.host }} has more than 64000 process open file count.`}}"
-              summary: Elasticsearch has a very high process open file count.
+              description: 'Elasticsearch at {{ $labels.host }} has more than 64000 process open file count.'
+              summary: 'Elasticsearch has a very high process open file count.'
          - alert: es_high_process_cpu_percent
            expr: elasticsearch_process_cpu_percent > 95
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`Elasticsearch at {{ $labels.instance }} has high process cpu percent of {{ $value }}.`}}"
-              summary: Elasticsearch process cpu usage is more than 95 percent.
+              description: 'Elasticsearch at {{ $labels.instance }} has high process cpu percent of {{ $value }}.'
+              summary: 'Elasticsearch process cpu usage is more than 95 percent.'
          - alert: es_fs_usage_high
            expr: (100 * (elasticsearch_filesystem_data_size_bytes - elasticsearch_filesystem_data_free_bytes) / elasticsearch_filesystem_data_size_bytes) > 80
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`Elasticsearch at {{ $labels.instance }} has filesystem usage of {{ $value }}.`}}"
-              summary: Elasticsearch filesystem usage is high.
+              description: 'Elasticsearch at {{ $labels.instance }} has filesystem usage of {{ $value }}.'
+              summary: 'Elasticsearch filesystem usage is high.'
          - alert: es_unassigned_shards
            expr: elasticsearch_cluster_health_unassigned_shards > 0
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`Elasticsearch has {{ $value }} unassigned shards.`}}"
-              summary: Elasticsearch has unassigned shards and hence a unhealthy cluster state.
+              description: 'Elasticsearch has {{ $value }} unassigned shards.'
+              summary: 'Elasticsearch has unassigned shards and hence a unhealthy cluster state.'
          - alert: es_cluster_health_timed_out
            expr: elasticsearch_cluster_health_timed_out > 0
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`Elasticsearch cluster health status call timedout {{ $value }} times.`}}"
-              summary: Elasticsearch cluster health status calls are timing out.
+              description: 'Elasticsearch cluster health status call timedout {{ $value }} times.'
+              summary: 'Elasticsearch cluster health status calls are timing out.'
          - alert: es_cluster_health_status_alert
            expr: (sum(elasticsearch_cluster_health_status{color="green"})*2)+sum(elasticsearch_cluster_health_status{color="yellow"}) < 2
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`Elasticsearch cluster health status is {{ $value }}, not 2 (green). One or more shards or replicas are unallocated.`}}"
-              summary: Elasticsearch cluster health status is not green.
+              description: 'Elasticsearch cluster health status is {{ $value }}, not 2 (green). One or more shards or replicas are unallocated.'
+              summary: 'Elasticsearch cluster health status is not green.'
          - alert: es_cluster_health_too_few_nodes_running
            expr: elasticsearch_cluster_health_number_of_nodes < 3
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`There are only {{$value}} < 3 ElasticSearch nodes running`}}"
-              summary: ElasticSearch running on less than 3 nodes
+              description: 'There are only {{$value}} < 3 ElasticSearch nodes running'
+              summary: 'ElasticSearch running on less than 3 nodes'
          - alert: es_cluster_health_too_few_data_nodes_running
            expr: elasticsearch_cluster_health_number_of_data_nodes < 3
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`There are only {{$value}} < 3 ElasticSearch data nodes running`}}"
-              summary: ElasticSearch running on less than 3 data nodes
+              description: 'There are only {{$value}} < 3 ElasticSearch data nodes running'
+              summary: 'ElasticSearch running on less than 3 data nodes'
          - alert: es_cluster_health_too_few_data_nodes_running
            expr: elasticsearch_cluster_health_number_of_data_nodes < 3
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`There are only {{$value}} < 3 ElasticSearch data nodes running`}}"
-              summary: ElasticSearch running on less than 3 data nodes
+              description: 'There are only {{$value}} < 3 ElasticSearch data nodes running'
+              summary: 'ElasticSearch running on less than 3 data nodes'
      fluentd:
        groups:
        - name: fluentd.alerting_rules
--- a/prometheus/values_overrides/kubernetes.yaml
+++ b/prometheus/values_overrides/kubernetes.yaml
@ -19,45 +19,45 @@ conf:
            labels:
              severity: page
            annotations:
-              description: "{{`Felix instance {{ $labels.instance }} has seen {{ $value }} dataplane failures within the last hour`}}"
-              summary: A high number of dataplane failures within Felix are happening
+              description: 'Felix instance {{ $labels.instance }} has seen {{ $value }} dataplane failures within the last hour'
+              summary: 'A high number of dataplane failures within Felix are happening'
          - alert: calico_datapane_address_msg_batch_size_high_5m
            expr: absent(felix_int_dataplane_addr_msg_batch_size_sum) OR absent(felix_int_dataplane_addr_msg_batch_size_count) OR (felix_int_dataplane_addr_msg_batch_size_sum/felix_int_dataplane_addr_msg_batch_size_count) > 5
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Felix instance {{ $labels.instance }} has seen a high value of {{ $value }} dataplane address message batch size`}}"
-              summary: Felix address message batch size is higher
+              description: 'Felix instance {{ $labels.instance }} has seen a high value of {{ $value }} dataplane address message batch size'
+              summary: 'Felix address message batch size is higher'
          - alert: calico_datapane_iface_msg_batch_size_high_5m
            expr: absent(felix_int_dataplane_iface_msg_batch_size_sum) OR absent(felix_int_dataplane_iface_msg_batch_size_count) OR (felix_int_dataplane_iface_msg_batch_size_sum/felix_int_dataplane_iface_msg_batch_size_count) > 5
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Felix instance {{ $labels.instance }} has seen a high value of {{ $value }} dataplane interface message batch size`}}"
-              summary: Felix interface message batch size is higher
+              description: 'Felix instance {{ $labels.instance }} has seen a high value of {{ $value }} dataplane interface message batch size'
+              summary: 'Felix interface message batch size is higher'
          - alert: calico_ipset_errors_high_1h
            expr: absent(felix_ipset_errors) OR increase(felix_ipset_errors[1h]) > 5
            labels:
              severity: page
            annotations:
-              description: "{{`Felix instance {{ $labels.instance }} has seen {{ $value }} ipset errors within the last hour`}}"
-              summary: A high number of ipset errors within Felix are happening
+              description: 'Felix instance {{ $labels.instance }} has seen {{ $value }} ipset errors within the last hour'
+              summary: 'A high number of ipset errors within Felix are happening'
          - alert: calico_iptable_save_errors_high_1h
            expr: absent(felix_iptables_save_errors) OR increase(felix_iptables_save_errors[1h]) > 5
            labels:
              severity: page
            annotations:
-              description: "{{`Felix instance {{ $labels.instance }} has seen {{ $value }} iptable save errors within the last hour`}}"
-              summary: A high number of iptable save errors within Felix are happening
+              description: 'Felix instance {{ $labels.instance }} has seen {{ $value }} iptable save errors within the last hour'
+              summary: 'A high number of iptable save errors within Felix are happening'
          - alert: calico_iptable_restore_errors_high_1h
            expr: absent(felix_iptables_restore_errors) OR increase(felix_iptables_restore_errors[1h]) > 5
            labels:
              severity: page
            annotations:
-              description: "{{`Felix instance {{ $labels.instance }} has seen {{ $value }} iptable restore errors within the last hour`}}"
-              summary: A high number of iptable restore errors within Felix are happening
+              description: 'Felix instance {{ $labels.instance }} has seen {{ $value }} iptable restore errors within the last hour'
+              summary: 'A high number of iptable restore errors within Felix are happening'
        - name: etcd3.rules
          rules:
          - alert: etcd_InsufficientMembers
@ -74,14 +74,14 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`etcd member {{ $labels.instance }} has no leader`}}"
+              description: etcd member {{ $labels.instance }} has no leader
              summary: etcd member has no leader
          - alert: etcd_HighNumberOfLeaderChanges
            expr: increase(etcd_server_leader_changes_seen_total{job="etcd"}[1h]) > 3
            labels:
              severity: warning
            annotations:
-              description: "{{`etcd instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour`}}"
+              description: etcd instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour
              summary: a high number of leader changes within the etcd cluster are happening
          - alert: etcd_HighNumberOfFailedGRPCRequests
            expr: sum(rate(etcd_grpc_requests_failed_total{job="etcd"}[5m])) BY (grpc_method) / sum(rate(etcd_grpc_total{job="etcd"}[5m])) BY (grpc_method) > 0.01
@ -89,7 +89,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}`}}"
+              description: '{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
              summary: a high number of gRPC requests are failing
          - alert: etcd_HighNumberOfFailedGRPCRequests
            expr: sum(rate(etcd_grpc_requests_failed_total{job="etcd"}[5m])) BY (grpc_method) / sum(rate(etcd_grpc_total{job="etcd"}[5m])) BY (grpc_method) > 0.05
@ -97,7 +97,7 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}`}}"
+              description: '{{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}'
              summary: a high number of gRPC requests are failing
          - alert: etcd_GRPCRequestsSlow
            expr: histogram_quantile(0.99, rate(etcd_grpc_unary_requests_duration_seconds_bucket[5m])) > 0.15
@ -105,7 +105,7 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method }} are slow`}}"
+              description: on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method }} are slow
              summary: slow gRPC requests
          - alert: etcd_HighNumberOfFailedHTTPRequests
            expr: sum(rate(etcd_http_failed_total{job="etcd"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job="etcd"}[5m])) BY (method) > 0.01
@ -113,7 +113,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}`}}"
+              description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}'
              summary: a high number of HTTP requests are failing
          - alert: etcd_HighNumberOfFailedHTTPRequests
            expr: sum(rate(etcd_http_failed_total{job="etcd"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job="etcd"}[5m])) BY (method) > 0.05
@ -121,7 +121,7 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}`}}"
+              description: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}'
              summary: a high number of HTTP requests are failing
          - alert: etcd_HTTPRequestsSlow
            expr: histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m])) > 0.15
@ -129,7 +129,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`on etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow`}}"
+              description: on etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow
              summary: slow HTTP requests
          - alert: etcd_EtcdMemberCommunicationSlow
            expr: histogram_quantile(0.99, rate(etcd_network_member_round_trip_time_seconds_bucket[5m])) > 0.15
@ -137,14 +137,14 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`etcd instance {{ $labels.instance }} member communication with {{ $labels.To }} is slow`}}"
+              description: etcd instance {{ $labels.instance }} member communication with {{ $labels.To }} is slow
              summary: etcd member communication is slow
          - alert: etcd_HighNumberOfFailedProposals
            expr: increase(etcd_server_proposals_failed_total{job="etcd"}[1h]) > 5
            labels:
              severity: warning
            annotations:
-              description: "{{`etcd instance {{ $labels.instance }} has seen {{ $value }} proposal failures within the last hour`}}"
+              description: etcd instance {{ $labels.instance }} has seen {{ $value }} proposal failures within the last hour
              summary: a high number of proposals within the etcd cluster are failing
          - alert: etcd_HighFsyncDurations
            expr: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) > 0.5
@ -152,7 +152,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`etcd instance {{ $labels.instance }} fync durations are high`}}"
+              description: etcd instance {{ $labels.instance }} fync durations are high
              summary: high fsync durations
          - alert: etcd_HighCommitDurations
            expr: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) > 0.25
@ -160,7 +160,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`etcd instance {{ $labels.instance }} commit durations are high`}}"
+              description: etcd instance {{ $labels.instance }} commit durations are high
              summary: high commit durations
        - name: kubelet.rules
          rules:
@ -170,15 +170,15 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than a minute`}}"
-              summary: "{{`{{ $labels.node }} Node status is NotReady and {{ $labels.status }}`}}"
+              description: The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than a minute
+              summary: '{{ $labels.node }} Node status is NotReady and {{ $labels.status }}'
          - alert: K8SManyNodesNotReady
            expr: count(kube_node_status_condition{condition="Ready", status="unknown"} == 1) > 1 and (count(kube_node_status_condition{condition="Ready", status="unknown"} == 1) / count(kube_node_status_condition{condition="Ready", status="unknown"})) > 0.2
            for: 1m
            labels:
              severity: critical
            annotations:
-              description: "{{`{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).`}}"
+              description: '{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).'
              summary: Many Kubernetes nodes are Not Ready
          - alert: K8SManyNodesNotReady
            expr: count(kube_node_status_condition{condition="Ready", status="false"} == 1) > 1 and (count(kube_node_status_condition{condition="Ready", status="false"} == 1) / count(kube_node_status_condition{condition="Ready", status="false"})) > 0.2
@ -186,7 +186,7 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).`}}"
+              description: '{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).'
              summary: Many Kubernetes nodes are Not Ready
          - alert: K8SNodesNotReady
            expr: count(kube_node_status_condition{condition="Ready", status="false"} == 1) > 0 or count(kube_node_status_condition{condition="Ready", status="unknown"} == 1) > 0
@ -194,7 +194,7 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`{{ $value }} nodes are notReady state.`}}"
+              description: '{{ $value }} nodes are notReady state.'
              summary: One or more Kubernetes nodes are Not Ready
          - alert: K8SKubeletDown
            expr: count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03
@ -202,7 +202,7 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`Prometheus failed to scrape {{ $value }}% of kubelets.`}}"
+              description: Prometheus failed to scrape {{ $value }}% of kubelets.
              summary: Many Kubelets cannot be scraped
          - alert: K8SKubeletDown
            expr: absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1
@ -210,14 +210,14 @@ conf:
            labels:
              severity: critical
            annotations:
-              description: "{{`Prometheus failed to scrape {{ $value }}% of kubelets, or all Kubelets have disappeared from service discovery.`}}"
+              description: Prometheus failed to scrape {{ $value }}% of kubelets, or all Kubelets have disappeared from service discovery.
              summary: Many Kubelets cannot be scraped
          - alert: K8SKubeletTooManyPods
            expr: kubelet_running_pod_count > 100
            labels:
              severity: warning
            annotations:
-              description: "{{`Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110`}}"
+              description: Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110
              summary: Kubelet is close to pod limit
        - name: kube-apiserver.rules
          rules:
@ -235,7 +235,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.`}}"
+              description: 99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.
              summary: Kubernetes apiserver latency is high
        - name: kube-controller-manager.rules
          rules:
@ -264,118 +264,118 @@ conf:
            labels:
              severity: page
            annotations:
-              description: "{{`statefulset {{$labels.statefulset}} has {{$value}} replicas, which is less than desired`}}"
-              summary: "{{`{{$labels.statefulset}}: has inssuficient replicas.`}}"
+              description: 'statefulset {{$labels.statefulset}} has {{$value}} replicas, which is less than desired'
+              summary: '{{$labels.statefulset}}: has inssuficient replicas.'
          - alert: daemonsets_misscheduled
            expr: kube_daemonset_status_number_misscheduled > 0
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`Daemonset {{$labels.daemonset}} is running where it is not supposed to run`}}"
-              summary: Daemonsets not scheduled correctly
+              description: 'Daemonset {{$labels.daemonset}} is running where it is not supposed to run'
+              summary: 'Daemonsets not scheduled correctly'
          - alert: daemonsets_not_scheduled
            expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled > 0
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`{{ $value }} of Daemonset {{$labels.daemonset}} scheduled which is less than desired number`}}"
-              summary: Less than desired number of daemonsets scheduled
+              description: '{{ $value }} of Daemonset {{$labels.daemonset}} scheduled which is less than desired number'
+              summary: 'Less than desired number of daemonsets scheduled'
          - alert: daemonset_pods_unavailable
            expr: kube_daemonset_status_number_unavailable > 0
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`Daemonset {{$labels.daemonset}} currently has pods unavailable`}}"
-              summary: Daemonset pods unavailable, due to one of many reasons
+              description: 'Daemonset {{$labels.daemonset}} currently has pods unavailable'
+              summary: 'Daemonset pods unavailable, due to one of many reasons'
          - alert: deployment_replicas_unavailable
            expr: kube_deployment_status_replicas_unavailable > 0
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`deployment {{$labels.deployment}} has {{$value}} replicas unavailable`}}"
-              summary: "{{`{{$labels.deployment}}: has inssuficient replicas.`}}"
+              description: 'deployment {{$labels.deployment}} has {{$value}} replicas unavailable'
+              summary: '{{$labels.deployment}}: has inssuficient replicas.'
          - alert: rollingupdate_deployment_replica_less_than_spec_max_unavailable
            expr: kube_deployment_status_replicas_available - kube_deployment_spec_strategy_rollingupdate_max_unavailable < 0
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`deployment {{$labels.deployment}} has {{$value}} replicas available which is less than specified as max unavailable during a rolling update`}}"
-              summary: "{{`{{$labels.deployment}}: has inssuficient replicas during a rolling update.`}}"
+              description: 'deployment {{$labels.deployment}} has {{$value}} replicas available which is less than specified as max unavailable during a rolling update'
+              summary: '{{$labels.deployment}}: has inssuficient replicas during a rolling update.'
          - alert: job_status_failed
            expr: kube_job_status_failed > 0
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`Job {{$labels.exported_job}} is in failed status`}}"
-              summary: "{{`{{$labels.exported_job}} has failed status`}}"
+              description: 'Job {{$labels.exported_job}} is in failed status'
+              summary: '{{$labels.exported_job}} has failed status'
          - alert: pod_status_pending
            expr: kube_pod_status_phase{phase="Pending"} == 1
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} has been in pending status for more than 10 minutes`}}"
-              summary: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} in pending status`}}"
+              description: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} has been in pending status for more than 10 minutes'
+              summary: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} in pending status'
          - alert: pod_status_error_image_pull
            expr: kube_pod_container_status_waiting_reason {reason="ErrImagePull"} == 1
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} has an Image pull error for more than 10 minutes`}}"
-              summary: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status`}}"
+              description: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} has an Image pull error for more than 10 minutes'
+              summary: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status'
          - alert: pod_status_error_image_pull_backoff
            expr: kube_pod_container_status_waiting_reason {reason="ImagePullBackOff"} == 1
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} has an ImagePullBackOff error for more than 10 minutes`}}"
-              summary: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status`}}"
+              description: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} has an ImagePullBackOff error for more than 10 minutes'
+              summary: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status'
          - alert: pod_error_crash_loop_back_off
            expr: kube_pod_container_status_waiting_reason {reason="CrashLoopBackOff"} == 1
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} has an CrashLoopBackOff  error for more than 10 minutes`}}"
-              summary: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status`}}"
+              description: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} has an CrashLoopBackOff  error for more than 10 minutes'
+              summary: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status'
          - alert: pod_error_config_error
            expr: kube_pod_container_status_waiting_reason {reason="CreateContainerConfigError"} == 1
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} has a CreateContainerConfigError error for more than 10 minutes`}}"
-              summary: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status`}}"
+              description: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} has a CreateContainerConfigError error for more than 10 minutes'
+              summary: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status'
          - alert: replicaset_missing_replicas
            expr: kube_replicaset_spec_replicas -  kube_replicaset_status_ready_replicas > 0
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`Replicaset {{$labels.replicaset}} is missing desired number of replicas for more than 10 minutes`}}"
-              summary: "{{`Replicaset {{$labels.replicaset}} is missing replicas`}}"
+              description: 'Replicaset {{$labels.replicaset}} is missing desired number of replicas for more than 10 minutes'
+              summary: 'Replicaset {{$labels.replicaset}} is missing replicas'
          - alert: pod_container_terminated
            expr: kube_pod_container_status_terminated_reason{reason=~"OOMKilled|Error|ContainerCannotRun"} > 0
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} has a container terminated for more than 10 minutes`}}"
-              summary: "{{`Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status`}}"
+              description: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} has a container terminated for more than 10 minutes'
+              summary: 'Pod {{$labels.pod}} in namespace {{$labels.namespace}} in error status'
          - alert: volume_claim_capacity_high_utilization
            expr: 100 * kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes > 80
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`volume claim {{$labels.persistentvolumeclaim}} usage has exceeded 80% of total capacity`}}"
-              summary: "{{`{{$labels.persistentvolumeclaim}} usage has exceeded 80% of total capacity.`}}"
+              description: 'volume claim {{$labels.persistentvolumeclaim}} usage has exceeded 80% of total capacity'
+              summary: '{{$labels.persistentvolumeclaim}} usage has exceeded 80% of total capacity.'
 ...
--- a/prometheus/values_overrides/nodes.yaml
+++ b/prometheus/values_overrides/nodes.yaml
@ -28,71 +28,80 @@ conf:
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}} has less than 20% free space left.`}}"
-              summary: "{{`{{$labels.alias}}: Filesystem is running out of space soon.`}}"
+              description: '{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}}
+                has less than 20% free space left.'
+              summary: '{{$labels.alias}}: Filesystem is running out of space soon.'
          - alert: node_filesystem_full_in_4h
            expr: predict_linear(node_filesystem_free{fstype =~ "xfs|ext[34]"}[1h], 4 * 3600) <= 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}} is running out of space of in approx. 4 hours`}}"
-              summary: "{{`{{$labels.alias}}: Filesystem is running out of space in 4 hours.`}}"
+              description: '{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}}
+                is running out of space of in approx. 4 hours'
+              summary: '{{$labels.alias}}: Filesystem is running out of space in 4 hours.'
          - alert: node_filedescriptors_full_in_3h
            expr: predict_linear(node_filefd_allocated[1h], 3 * 3600) >= node_filefd_maximum
            for: 20m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} is running out of available file descriptors in approx. 3 hours`}}"
-              summary: "{{`{{$labels.alias}} is running out of available file descriptors in 3 hours.`}}"
+              description: '{{$labels.alias}} is running out of available file descriptors
+                in approx. 3 hours'
+              summary: '{{$labels.alias}} is running out of available file descriptors in
+                3 hours.'
          - alert: node_load1_90percent
            expr: node_load1 / ON(alias) count(node_cpu{mode="system"}) BY (alias) >= 0.9
            for: 1h
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} is running with > 90% total load for at least 1h.`}}"
-              summary: "{{`{{$labels.alias}}: Running on high load.`}}"
+              description: '{{$labels.alias}} is running with > 90% total load for at least
+                1h.'
+              summary: '{{$labels.alias}}: Running on high load.'
          - alert: node_cpu_util_90percent
            expr: 100 - (avg(irate(node_cpu{mode="idle"}[5m])) BY (alias) * 100) >= 90
            for: 1h
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} has total CPU utilization over 90% for at least 1h.`}}"
-              summary: "{{`{{$labels.alias}}: High CPU utilization.`}}"
+              description: '{{$labels.alias}} has total CPU utilization over 90% for at least
+                1h.'
+              summary: '{{$labels.alias}}: High CPU utilization.'
          - alert: node_ram_using_90percent
            expr: avg_over_time(node_ram_usage_percent[2m]) > 90
            for: 30m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} is using at least 90% of its RAM for at least 30 minutes now.`}}"
-              summary: "{{`{{$labels.alias}}: Using lots of RAM.`}}"
+              description: '{{$labels.alias}} is using at least 90% of its RAM for at least
+                30 minutes now.'
+              summary: '{{$labels.alias}}: Using lots of RAM.'
          - alert: node_swap_using_80percent
            expr: avg_over_time(node_swap_usage_percent[2m]) > 80
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} is using 80% of its swap space for at least 10 minutes now.`}}"
-              summary: "{{`{{$labels.alias}}: Running out of swap soon.`}}"
+              description: '{{$labels.alias}} is using 80% of its swap space for at least
+                10 minutes now.'
+              summary: '{{$labels.alias}}: Running out of swap soon.'
          - alert: node_high_cpu_load
            expr: node_load15 / on(alias) count(node_cpu{mode="system"}) by (alias) >= 0
            for: 1m
            labels:
              severity: warning
            annotations:
-              description: "{{`{{$labels.alias}} is running with load15 > 1 for at least 5 minutes: {{$value}}`}}"
-              summary: "{{`{{$labels.alias}}: Running on high load: {{$value}}`}}"
+              description: '{{$labels.alias}} is running with load15 > 1 for at least 5 minutes: {{$value}}'
+              summary: '{{$labels.alias}}: Running on high load: {{$value}}'
          - alert: node_high_memory_load
            expr: avg_over_time(node_ram_usage_percent[2m]) > 85
            for: 1m
            labels:
              severity: warning
            annotations:
-              description: "{{`Host memory usage is {{ humanize $value }}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.`}}"
+              description: Host memory usage is {{ humanize $value }}%. Reported by
+                instance {{ $labels.instance }} of job {{ $labels.job }}.
              summary: Server memory is almost full
          - alert: node_high_storage_load
            expr: avg_over_time(node_storage_usage_percent{mountpoint="/"}[2m]) > 85
@ -100,7 +109,8 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Host storage usage is {{ humanize $value }}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.`}}"
+              description: Host storage usage is {{ humanize $value }}%. Reported by
+                instance {{ $labels.instance }} of job {{ $labels.job }}.
              summary: Server storage is almost full
          - alert: node_high_swap
            expr: (node_memory_SwapTotal - node_memory_SwapFree) < (node_memory_SwapTotal
@ -109,7 +119,8 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Host system has a high swap usage of {{ humanize $value }}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}.`}}"
+              description: Host system has a high swap usage of {{ humanize $value }}. Reported
+                by instance {{ $labels.instance }} of job {{ $labels.job }}.
              summary: Server has a high swap usage
          - alert: node_high_network_drop_rcv
            expr: node_network_receive_drop{device!="lo"} > 3000
@ -117,7 +128,9 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Host system has an unusally high drop in network reception ({{ humanize $value }}). Reported by instance {{ $labels.instance }} of job {{ $labels.job }}`}}"
+              description: Host system has an unusally high drop in network reception ({{
+                humanize $value }}). Reported by instance {{ $labels.instance }} of job {{
+                $labels.job }}
              summary: Server has a high receive drop
          - alert: node_high_network_drop_send
            expr: node_network_transmit_drop{device!="lo"} > 3000
@ -125,7 +138,9 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Host system has an unusally high drop in network transmission ({{ humanize $value }}). Reported by instance {{ $labels.instance }} of job {{$labels.job }}`}}"
+              description: Host system has an unusally high drop in network transmission ({{
+                humanize $value }}). Reported by instance {{ $labels.instance }} of job {{
+                $labels.job }}
              summary: Server has a high transmit drop
          - alert: node_high_network_errs_rcv
            expr: node_network_receive_errs{device!="lo"} > 3000
@ -133,7 +148,9 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Host system has an unusally high error rate in network reception ({{ humanize $value }}). Reported by instance {{ $labels.instance }} of job {{ $labels.job }}`}}"
+              description: Host system has an unusally high error rate in network reception
+                ({{ humanize $value }}). Reported by instance {{ $labels.instance }} of job
+                {{ $labels.job }}
              summary: Server has unusual high reception errors
          - alert: node_high_network_errs_send
            expr: node_network_transmit_errs{device!="lo"} > 3000
@ -141,7 +158,9 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Host system has an unusally high error rate in network transmission ({{ humanize $value }}). Reported by instance {{ $labels.instance }} of job {{ $labels.job }}`}}"
+              description: Host system has an unusally high error rate in network transmission
+                ({{ humanize $value }}). Reported by instance {{ $labels.instance }} of job
+                {{ $labels.job }}
              summary: Server has unusual high transmission errors
          - alert: node_network_conntrack_usage_80percent
            expr: sort(node_nf_conntrack_entries{job="node-exporter"} > node_nf_conntrack_entries_limit{job="node-exporter"}  * 0.8)
@ -149,78 +168,78 @@ conf:
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.instance}} has network conntrack entries of {{ $value }} which is more than 80% of maximum limit`}}"
-              summary: "{{`{{$labels.instance}}: available network conntrack entries are low.`}}"
+              description: '{{$labels.instance}} has network conntrack entries of {{ $value }} which is more than 80% of maximum limit'
+              summary: '{{$labels.instance}}: available network conntrack entries are low.'
          - alert: node_entropy_available_low
            expr: node_entropy_available_bits < 300
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.instance}} has available entropy bits of {{ $value }} which is less than required of 300`}}"
-              summary: "{{`{{$labels.instance}}: is low on entropy bits.`}}"
+              description: '{{$labels.instance}} has available entropy bits of {{ $value }} which is less than required of 300'
+              summary: '{{$labels.instance}}: is low on entropy bits.'
          - alert: node_hwmon_high_cpu_temp
            expr: node_hwmon_temp_crit_celsius*0.9 - node_hwmon_temp_celsius < 0 OR node_hwmon_temp_max_celsius*0.95 - node_hwmon_temp_celsius < 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} reports hwmon sensor {{$labels.sensor}}/{{$labels.chip}} temperature value is nearly critical: {{$value}}`}}"
-              summary: "{{`{{$labels.alias}}: Sensor {{$labels.sensor}}/{{$labels.chip}} temp is high: {{$value}}`}}"
+              description: '{{$labels.alias}} reports hwmon sensor {{$labels.sensor}}/{{$labels.chip}} temperature value is nearly critical: {{$value}}'
+              summary: '{{$labels.alias}}: Sensor {{$labels.sensor}}/{{$labels.chip}} temp is high: {{$value}}'
          - alert: node_vmstat_paging_rate_high
            expr: irate(node_vmstat_pgpgin[5m]) > 80
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} has a memory paging rate of change higher than 80%: {{$value}}`}}"
-              summary: "{{`{{$labels.alias}}: memory paging rate is high: {{$value}}`}}"
+              description: '{{$labels.alias}} has a memory paging rate of change higher than 80%: {{$value}}'
+              summary: '{{$labels.alias}}: memory paging rate is high: {{$value}}'
          - alert: node_xfs_block_allocation_high
            expr: 100*(node_xfs_extent_allocation_blocks_allocated_total{job="node-exporter", instance=~"172.17.0.1.*"} / (node_xfs_extent_allocation_blocks_freed_total{job="node-exporter", instance=~"172.17.0.1.*"} + node_xfs_extent_allocation_blocks_allocated_total{job="node-exporter", instance=~"172.17.0.1.*"})) > 80
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} has xfs allocation blocks higher than 80%: {{$value}}`}}"
-              summary: "{{`{{$labels.alias}}: xfs block allocation high: {{$value}}`}}"
+              description: '{{$labels.alias}} has xfs allocation blocks higher than 80%: {{$value}}'
+              summary: '{{$labels.alias}}: xfs block allocation high: {{$value}}'
          - alert: node_network_bond_slaves_down
            expr: node_net_bonding_slaves - node_net_bonding_slaves_active > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{ $labels.master }} is missing {{ $value }} slave interface(s).`}}"
-              summary: "{{`Instance {{ $labels.instance }}: {{ $labels.master }} missing {{ $value }} slave interface(s)`}}"
+              description: '{{ $labels.master }} is missing {{ $value }} slave interface(s).'
+              summary: 'Instance {{ $labels.instance }}: {{ $labels.master }} missing {{ $value }} slave interface(s)'
          - alert: node_numa_memory_used
            expr: 100*node_memory_numa_MemUsed / node_memory_numa_MemTotal > 80
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} has more than 80% NUMA memory usage: {{ $value }}`}}"
-              summary: "{{`{{$labels.alias}}: has high NUMA memory usage: {{$value}}`}}"
+              description: '{{$labels.alias}} has more than 80% NUMA memory usage: {{ $value }}'
+              summary: '{{$labels.alias}}: has high NUMA memory usage: {{$value}}'
          - alert: node_ntp_clock_skew_high
            expr: abs(node_ntp_drift_seconds) > 2
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.alias}} has time difference of more than 2 seconds compared to NTP server: {{ $value }}`}}"
-              summary: "{{`{{$labels.alias}}: time is skewed by : {{$value}} seconds`}}"
+              description: '{{$labels.alias}} has time difference of more than 2 seconds compared to NTP server: {{ $value }}'
+              summary: '{{$labels.alias}}: time is skewed by : {{$value}} seconds'
          - alert: node_disk_read_latency
            expr: (rate(node_disk_read_time_ms[5m]) / rate(node_disk_reads_completed[5m])) > 40
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.device}} has a high read latency of {{ $value }}`}}"
-              summary: "{{`High read latency observed for device {{ $labels.device }}`}}"
+              description: '{{$labels.device}} has a high read latency of {{ $value }}'
+              summary: 'High read latency observed for device {{ $labels.device }}'
          - alert: node_disk_write_latency
            expr: (rate(node_disk_write_time_ms[5m]) / rate(node_disk_writes_completed[5m])) > 40
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`{{$labels.device}} has a high write latency of {{ $value }}`}}"
-              summary: "{{`High write latency observed for device {{ $labels.device }}`}}"
+              description: '{{$labels.device}} has a high write latency of {{ $value }}'
+              summary: 'High write latency observed for device {{ $labels.device }}'
 ...
--- a/prometheus/values_overrides/openstack.yaml
+++ b/prometheus/values_overrides/openstack.yaml
@ -12,7 +12,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`MariaDB exporter in  {{ $labels.kubernetes_namespace }} is not collecting metrics or is not available for past 10 minutes`}}"
+              description: MariaDB exporter in  {{ $labels.kubernetes_namespace }} is not collecting metrics or is not available for past 10 minutes
              title: MariaDB exporter is not collecting metrics or is not available
          - alert: prom_exporter_mariadb_osh_infra_unavailable
            expr: avg_over_time(up{job="mysql-exporter",kubernetes_namespace="osh-infra"}[5m]) == 0
@ -20,7 +20,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`MariaDB exporter in  {{ $labels.kubernetes_namespace }} is not collecting metrics or is not available for past 10 minutes`}}"
+              description: MariaDB exporter in  {{ $labels.kubernetes_namespace }} is not collecting metrics or is not available for past 10 minutes
              title: MariaDB exporter is not collecting metrics or is not available
          - alert: mariadb_table_lock_wait_high
            expr: 100 * mysql_global_status_table_locks_waited/(mysql_global_status_table_locks_waited + mysql_global_status_table_locks_immediate) > 30
@ -28,32 +28,32 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Mariadb has high table lock waits of {{ $value }} percentage`}}"
-              summary: Mariadb table lock waits are high
+              description: 'Mariadb has high table lock waits of {{ $value }} percentage'
+              summary: 'Mariadb table lock waits are high'
          - alert: mariadb_node_not_ready
            expr: mysql_global_status_wsrep_ready != 1
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`{{$labels.job}} on {{$labels.instance}} is not ready.`}}"
-              summary: Galera cluster node not ready
+              description: '{{$labels.job}} on {{$labels.instance}} is not ready.'
+              summary: 'Galera cluster node not ready'
          - alert: mariadb_galera_node_out_of_sync
            expr: mysql_global_status_wsrep_local_state != 4 AND mysql_global_variables_wsrep_desync == 0
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`{{$labels.job}} on {{$labels.instance}} is not in sync ({{$value}} != 4)`}}"
-              summary: Galera cluster node out of sync
+              description: '{{$labels.job}} on {{$labels.instance}} is not in sync ({{$value}} != 4)'
+              summary: 'Galera cluster node out of sync'
          - alert: mariadb_innodb_replication_fallen_behind
            expr: (mysql_global_variables_innodb_replication_delay > 30) AND on (instance) (predict_linear(mysql_global_variables_innodb_replication_delay[5m], 60*2) > 0)
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: The mysql innodb replication has fallen behind and is not recovering
-              summary: MySQL innodb replication is lagging
+              description: 'The mysql innodb replication has fallen behind and is not recovering'
+              summary: 'MySQL innodb replication is lagging'
        - name: openstack.rules
          rules:
          - alert: prom_exporter_openstack_unavailable
@ -70,184 +70,184 @@ conf:
            labels:
              severity: page
            annotations:
-              description: "{{`Glance API is not available at {{$labels.url}} for more than 5 minutes`}}"
-              summary: "{{`Glance API is not available at {{$labels.url}}`}}"
+              description: 'Glance API is not available at {{$labels.url}} for more than 5 minutes'
+              summary: 'Glance API is not available at {{$labels.url}}'
          - alert: os_nova_api_availability
            expr: openstack_check_nova_api != 1
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Nova API is not available at {{$labels.url}} for more than 5 minutes`}}"
-              summary: "{{`Nova API is not available at {{$labels.url}}`}}"
+              description: 'Nova API is not available at {{$labels.url}} for more than 5 minutes'
+              summary: 'Nova API is not available at {{$labels.url}}'
          - alert: os_keystone_api_availability
            expr: openstack_check_keystone_api != 1
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Keystone API is not available at {{$labels.url}} for more than 5 minutes`}}"
-              summary: "{{`Keystone API is not available at {{$labels.url}}`}}"
+              description: 'Keystone API is not available at {{$labels.url}} for more than 5 minutes'
+              summary: 'Keystone API is not available at {{$labels.url}}'
          - alert: os_neutron_api_availability
            expr: openstack_check_neutron_api != 1
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Neutron API is not available at {{$labels.url}} for more than 5 minutes`}}"
-              summary: "{{`Neutron API is not available at {{$labels.url}}`}}"
+              description: 'Neutron API is not available at {{$labels.url}} for more than 5 minutes'
+              summary: 'Neutron API is not available at {{$labels.url}}'
          - alert: os_neutron_metadata_agent_availability
            expr: openstack_services_neutron_metadata_agent_down_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: One or more neutron metadata_agents are not available for more than 5 minutes
-              summary: One or more neutron metadata_agents are not available
+              description: 'One or more neutron metadata_agents are not available for more than 5 minutes'
+              summary: 'One or more neutron metadata_agents are not available'
          - alert: os_neutron_openvswitch_agent_availability
            expr: openstack_services_neutron_openvswitch_agent_down_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: One or more neutron openvswitch agents are not available for more than 5 minutes
-              summary: One or more neutron openvswitch agents are not available
+              description: 'One or more neutron openvswitch agents are not available for more than 5 minutes'
+              summary: 'One or more neutron openvswitch agents are not available'
          - alert: os_neutron_dhcp_agent_availability
            expr: openstack_services_neutron_dhcp_agent_down_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: One or more neutron dhcp agents are not available for more than 5 minutes
-              summary: One or more neutron dhcp agents are not available
+              description: 'One or more neutron dhcp agents are not available for more than 5 minutes'
+              summary: 'One or more neutron dhcp agents are not available'
          - alert: os_neutron_l3_agent_availability
            expr: openstack_services_neutron_l3_agent_down_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: One or more neutron L3 agents are not available for more than 5 minutes
-              summary: One or more neutron L3 agents are not available
+              description: 'One or more neutron L3 agents are not available for more than 5 minutes'
+              summary: 'One or more neutron L3 agents are not available'
          - alert: os_swift_api_availability
            expr: openstack_check_swift_api != 1
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Swift API is not available at {{$labels.url}} for more than 5 minutes`}}"
-              summary: "{{`Swift API is not available at {{$labels.url}}`}}"
+              description: 'Swift API is not available at {{$labels.url}} for more than 5 minutes'
+              summary: 'Swift API is not available at {{$labels.url}}'
          - alert: os_cinder_api_availability
            expr: openstack_check_cinder_api != 1
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Cinder API is not available at {{$labels.url}} for more than 5 minutes`}}"
-              summary: "{{`Cinder API is not available at {{$labels.url}}`}}"
+              description: 'Cinder API is not available at {{$labels.url}} for more than 5 minutes'
+              summary: 'Cinder API is not available at {{$labels.url}}'
          - alert: os_cinder_scheduler_availability
            expr: openstack_services_cinder_cinder_scheduler != 1
            for: 5m
            labels:
              severity: page
            annotations:
-              description: Cinder scheduler is not available for more than 5 minutes
-              summary: Cinder scheduler is not available
+              description: 'Cinder scheduler is not available for more than 5 minutes'
+              summary: 'Cinder scheduler is not available'
          - alert: os_heat_api_availability
            expr: openstack_check_heat_api != 1
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Heat API is not available at {{$labels.url}} for more than 5 minutes`}}"
-              summary: "{{`Heat API is not available at {{$labels.url}}`}}"
+              description: 'Heat API is not available at {{$labels.url}} for more than 5 minutes'
+              summary: 'Heat API is not available at {{$labels.url}}'
          - alert: os_nova_compute_disabled
            expr: openstack_services_nova_compute_disabled_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: nova-compute is disabled on certain hosts for more than 5 minutes
-              summary: Openstack compute service nova-compute is disabled on some hosts
+              description: 'nova-compute is disabled on certain hosts for more than 5 minutes'
+              summary: 'Openstack compute service nova-compute is disabled on some hosts'
          - alert: os_nova_conductor_disabled
            expr: openstack_services_nova_conductor_disabled_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: nova-conductor is disabled on certain hosts for more than 5 minutes
-              summary: Openstack compute service nova-conductor is disabled on some hosts
+              description: 'nova-conductor is disabled on certain hosts for more than 5 minutes'
+              summary: 'Openstack compute service nova-conductor is disabled on some hosts'
          - alert: os_nova_consoleauth_disabled
            expr: openstack_services_nova_consoleauth_disabled_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: nova-consoleauth is disabled on certain hosts for more than 5 minutes
-              summary: Openstack compute service nova-consoleauth is disabled on some hosts
+              description: 'nova-consoleauth is disabled on certain hosts for more than 5 minutes'
+              summary: 'Openstack compute service nova-consoleauth is disabled on some hosts'
          - alert: os_nova_scheduler_disabled
            expr: openstack_services_nova_scheduler_disabled_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: nova-scheduler is disabled on certain hosts for more than 5 minutes
-              summary: Openstack compute service nova-scheduler is disabled on some hosts
+              description: 'nova-scheduler is disabled on certain hosts for more than 5 minutes'
+              summary: 'Openstack compute service nova-scheduler is disabled on some hosts'
          - alert: os_nova_compute_down
            expr: openstack_services_nova_compute_down_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: nova-compute is down on certain hosts for more than 5 minutes
-              summary: Openstack compute service nova-compute is down on some hosts
+              description: 'nova-compute is down on certain hosts for more than 5 minutes'
+              summary: 'Openstack compute service nova-compute is down on some hosts'
          - alert: os_nova_conductor_down
            expr: openstack_services_nova_conductor_down_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: nova-conductor is down on certain hosts for more than 5 minutes
-              summary: Openstack compute service nova-conductor is down on some hosts
+              description: 'nova-conductor is down on certain hosts for more than 5 minutes'
+              summary: 'Openstack compute service nova-conductor is down on some hosts'
          - alert: os_nova_consoleauth_down
            expr: openstack_services_nova_consoleauth_down_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: nova-consoleauth is down on certain hosts for more than 5 minutes
-              summary: Openstack compute service nova-consoleauth is down on some hosts
+              description: 'nova-consoleauth is down on certain hosts for more than 5 minutes'
+              summary: 'Openstack compute service nova-consoleauth is down on some hosts'
          - alert: os_nova_scheduler_down
            expr: openstack_services_nova_scheduler_down_total > 0
            for: 5m
            labels:
              severity: page
            annotations:
-              description: nova-scheduler is down on certain hosts for more than 5 minutes
-              summary: Openstack compute service nova-scheduler is down on some hosts
+              description: 'nova-scheduler is down on certain hosts for more than 5 minutes'
+              summary: 'Openstack compute service nova-scheduler is down on some hosts'
          - alert: os_vm_vcpu_usage_high
            expr: openstack_total_used_vcpus * 100/(openstack_total_used_vcpus + openstack_total_free_vcpus) > 80
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Openstack VM vcpu usage is hight at {{$value}} percent`}}"
-              summary: Openstack VM vcpu usage is high
+              description: 'Openstack VM vcpu usage is hight at {{$value}} percent'
+              summary: 'Openstack VM vcpu usage is high'
          - alert: os_vm_ram_usage_high
            expr: openstack_total_used_ram_MB * 100/(openstack_total_used_ram_MB + openstack_total_free_ram_MB) > 80
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Openstack VM RAM usage is hight at {{$value}} percent`}}"
-              summary: Openstack VM RAM usage is high
+              description: 'Openstack VM RAM usage is hight at {{$value}} percent'
+              summary: 'Openstack VM RAM usage is high'
          - alert: os_vm_disk_usage_high
            expr: openstack_total_used_disk_GB * 100/ ( openstack_total_used_disk_GB + openstack_total_free_disk_GB ) > 80
            for: 5m
            labels:
              severity: page
            annotations:
-              description: "{{`Openstack VM Disk usage is hight at {{$value}} percent`}}"
-              summary: Openstack VM Disk usage is high
+              description: 'Openstack VM Disk usage is hight at {{$value}} percent'
+              summary: 'Openstack VM Disk usage is high'
        - name: rabbitmq.rules
          rules:
          - alert: rabbitmq_network_pratitions_detected
@ -256,70 +256,70 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`RabbitMQ at {{ $labels.instance }} has {{ $value }} partitions`}}"
-              summary: RabbitMQ Network partitions detected
+              description: 'RabbitMQ at {{ $labels.instance }} has {{ $value }} partitions'
+              summary: 'RabbitMQ Network partitions detected'
          - alert: rabbitmq_down
            expr: min(rabbitmq_up) by(instance) != 1
            for: 10m
            labels:
              severity: page
            annotations:
-              description: "{{`RabbitMQ Server instance {{ $labels.instance }} is down`}}"
-              summary: "{{`The RabbitMQ Server instance at {{ $labels.instance }} has been down the last 10 mins`}}"
+              description: 'RabbitMQ Server instance {{ $labels.instance }} is down'
+              summary: 'The RabbitMQ Server instance at {{ $labels.instance }} has been down the last 10 mins'
          - alert: rabbitmq_file_descriptor_usage_high
            expr: fd_used * 100 /fd_total > 80
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`RabbitMQ Server instance {{ $labels.instance }} has high file descriptor usage of {{ $value }} percent.`}}"
-              summary: RabbitMQ file descriptors usage is high for last 10 mins
+              description: 'RabbitMQ Server instance {{ $labels.instance }} has high file descriptor usage of {{ $value }} percent.'
+              summary: 'RabbitMQ file descriptors usage is high for last 10 mins'
          - alert: rabbitmq_node_disk_free_alarm
            expr: node_disk_free_alarm > 0
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`RabbitMQ Server instance {{ $labels.instance }} has low disk free space available.`}}"
-              summary: RabbitMQ disk space usage is high
+              description: 'RabbitMQ Server instance {{ $labels.instance }} has low disk free space available.'
+              summary: 'RabbitMQ disk space usage is high'
          - alert: rabbitmq_node_memory_alarm
            expr: node_mem_alarm > 0
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: "{{`RabbitMQ Server instance {{ $labels.instance }} has low free memory.`}}"
-              summary: RabbitMQ memory usage is high
+              description: 'RabbitMQ Server instance {{ $labels.instance }} has low free memory.'
+              summary: 'RabbitMQ memory usage is high'
          - alert: rabbitmq_less_than_3_nodes
            expr: running < 3
            for: 10m
            labels:
              severity: warning
            annotations:
-              description: RabbitMQ Server has less than 3 nodes running.
-              summary: RabbitMQ server is at risk of loosing data
+              description: 'RabbitMQ Server has less than 3 nodes running.'
+              summary: 'RabbitMQ server is at risk of loosing data'
          - alert: rabbitmq_queue_messages_returned_high
            expr: queue_messages_returned_total/queue_messages_published_total * 100 > 50
            for: 5m
            labels:
              severity: warning
            annotations:
-              description: RabbitMQ Server is returing more than 50 percent of messages received.
-              summary: RabbitMQ server is returning more than 50 percent of messages received.
+              description: 'RabbitMQ Server is returing more than 50 percent of messages received.'
+              summary: 'RabbitMQ server is returning more than 50 percent of messages received.'
          - alert: rabbitmq_consumers_low_utilization
            expr: queue_consumer_utilisation < .4
            for: 5m
            labels:
              severity: warning
            annotations:
-              description: RabbitMQ consumers message consumption speed is low
-              summary: RabbitMQ consumers message consumption speed is low
+              description: 'RabbitMQ consumers message consumption speed is low'
+              summary: 'RabbitMQ consumers message consumption speed is low'
          - alert: rabbitmq_high_message_load
            expr: queue_messages_total > 17000 or increase(queue_messages_total[5m]) > 4000
            for: 5m
            labels:
              severity: warning
            annotations:
-              description: RabbitMQ has high message load. Total Queue depth > 17000 or growth more than 4000 messages.
-              summary: RabbitMQ has high message load
+              description: 'RabbitMQ has high message load. Total Queue depth > 17000 or growth more than 4000 messages.'
+              summary: 'RabbitMQ has high message load'
 ...
--- a/prometheus/values_overrides/postgresql.yaml
+++ b/prometheus/values_overrides/postgresql.yaml
@ -20,7 +20,7 @@ conf:
            labels:
              severity: warning
            annotations:
-              description: "{{`Replication lag on server {{$labels.instance}} is currently {{$value | humanizeDuration }}`}}"
+              description: Replication lag on server {{$labels.instance}} is currently {{$value | humanizeDuration }}
              title: Postgres Replication lag is over 2 minutes
          - alert: pg_connections_too_high
            expr: sum(pg_stat_activity_count) BY (environment, fqdn) > ON(fqdn) pg_settings_max_connections * 0.95
@ -29,13 +29,13 @@ conf:
              severity: warn
              channel: database
            annotations:
-              description: "{{`Postgresql has {{$value}} connections on {{$labels.fqdn}} which is close to the maximum`}}"
+              title: Postgresql has {{$value}} connections on {{$labels.fqdn}} which is close to the maximum
          - alert: pg_deadlocks_detected
            expr: sum by(datname) (rate(pg_stat_database_deadlocks[1m])) > 0
            for: 5m
            labels:
              severity: warn
            annotations:
-              description: "{{`postgresql at {{$labels.instance}} is showing {{$value}} rate of deadlocks for database {{$labels.datname}}`}}"
+              description: postgresql at {{$labels.instance}} is showing {{$value}} rate of deadlocks for database {{$labels.datname}}
              title: Postgres server is experiencing deadlocks
 ...