From d7808468fc53cfbb2dd19f55c596646080235b7b Mon Sep 17 00:00:00 2001 From: Chris Wedgwood Date: Mon, 28 Jan 2019 23:10:02 +0000 Subject: [PATCH] [Prometheus] Relax disk IO constraints Relax the timing constrains for disk IO to accommodate rotating disks; a "measured IO" might be the result of a small number of physical IOs, allow for enough time for a small number of disk rotations (this isn't perfect but seems to be about right in testing under load). Change-Id: Ifb067a2218528e5918d2f4b2ba169b6e739084e0 --- prometheus/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prometheus/values.yaml b/prometheus/values.yaml index f28e657c5..1d92467b0 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -1610,7 +1610,7 @@ conf: description: '{{$labels.alias}} has time difference of more than 2 seconds compared to NTP server: {{ $value }}' summary: '{{$labels.alias}}: time is skewed by : {{$value}} seconds' - alert: node_disk_read_latency - expr: (rate(node_disk_read_time_ms[5m]) / rate(node_disk_reads_completed[5m])) > 10 + expr: (rate(node_disk_read_time_ms[5m]) / rate(node_disk_reads_completed[5m])) > 40 for: 5m labels: severity: page @@ -1618,7 +1618,7 @@ conf: description: '{{$labels.device}} has a high read latency of {{ $value }}' summary: 'High read latency observed for device {{ $labels.device }}' - alert: node_disk_write_latency - expr: (rate(node_disk_write_time_ms[5m]) / rate(node_disk_writes_completed[5m])) > 10 + expr: (rate(node_disk_write_time_ms[5m]) / rate(node_disk_writes_completed[5m])) > 40 for: 5m labels: severity: page