From 1c01274207393aa15a4591d8a9d709748d5e9754 Mon Sep 17 00:00:00 2001
From: Steve Wilkerson <wilkers.steve@gmail.com>
Date: Mon, 14 May 2018 17:27:48 -0500
Subject: [PATCH] Update prometheus rule for terminated containers in pods

This updates the prometheus rule for checking for terminated
containers in pods. The previous rule checked for any terminations,
which raised alarms due to completed containers in jobs
being included, which isn't desired behavior. This changes the
expression to check for any containers that have terminated with
a status other than completed

Change-Id: I88e533a56f81f81bd1a81420ecfb7d43ac9e2d0b
---
 prometheus/values.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prometheus/values.yaml b/prometheus/values.yaml
index a940b9cbc..41a24d587 100644
--- a/prometheus/values.yaml
+++ b/prometheus/values.yaml
@@ -841,7 +841,7 @@ conf:
               description: 'Replicaset {{$labels.replicaset}} is missing desired number of replicas for more than 10 minutes'
               summary: 'Replicaset {{$labels.replicaset}} is missing replicas'
           - alert: kube_pod_container_terminated
-            expr: kube_pod_container_status_terminated > 0
+            expr: kube_pod_container_status_terminated_reason{reason=~"OOMKilled|Error|ContainerCannotRun"} > 0
             for: 10m
             labels:
               severity: page