3a6df3b544
This removes the default dashboards from the Grafana chart and instead places them in the values_overrides directory, similar to what was done for the Prometheus rules. As Grafana dashboards will likely be heavily dependent upon end-user needs, the old default dashboard configs should only be used as a reference instead of opinionated defaults that are difficult to override. The previous defaults made using specialized labels for dashboard variables difficult, as they were making dangerous assumptions about deployed namespaces and host fqdns. By removing the defaults entirely, end users can define their own dashboards to meet their specialized needs Change-Id: I7def8df68371deda0b75a685363c8a73b818dd45 Signed-off-by: Steve Wilkerson <sw5822@att.com>
1562 lines
39 KiB
YAML
1562 lines
39 KiB
YAML
# NOTE(srwilkers): This overrides file provides a reference for dashboards that
|
|
# reflect the overall state of a Kubernetes deployment
|
|
conf:
|
|
dashboards:
|
|
kubernetes_capacity_planning:
|
|
__inputs:
|
|
- name: DS_PROMETHEUS
|
|
label: prometheus
|
|
description: ''
|
|
type: datasource
|
|
pluginId: prometheus
|
|
pluginName: Prometheus
|
|
__requires:
|
|
- type: grafana
|
|
id: grafana
|
|
name: Grafana
|
|
version: 4.4.1
|
|
- type: panel
|
|
id: graph
|
|
name: Graph
|
|
version: ''
|
|
- type: datasource
|
|
id: prometheus
|
|
name: Prometheus
|
|
version: 1.0.0
|
|
- type: panel
|
|
id: singlestat
|
|
name: Singlestat
|
|
version: ''
|
|
annotations:
|
|
list: []
|
|
description: ''
|
|
editable: true
|
|
gnetId: 22
|
|
graphTooltip: 0
|
|
hideControls: false
|
|
id:
|
|
links: []
|
|
refresh: false
|
|
rows:
|
|
- collapse: false
|
|
height: 250px
|
|
panels:
|
|
- alerting: {}
|
|
aliasColors: {}
|
|
bars: false
|
|
dashLength: 10
|
|
dashes: false
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
fill: 1
|
|
grid: {}
|
|
id: 3
|
|
legend:
|
|
avg: false
|
|
current: false
|
|
max: false
|
|
min: false
|
|
show: true
|
|
total: false
|
|
values: false
|
|
lines: true
|
|
linewidth: 2
|
|
links: []
|
|
nullPointMode: connected
|
|
percentage: false
|
|
pointradius: 5
|
|
points: false
|
|
renderer: flot
|
|
seriesOverrides: []
|
|
spaceLength: 10
|
|
span: 6
|
|
stack: false
|
|
steppedLine: false
|
|
targets:
|
|
- expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100
|
|
hide: false
|
|
intervalFactor: 10
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 50
|
|
thresholds: []
|
|
timeFrom:
|
|
timeShift:
|
|
title: Idle cpu
|
|
tooltip:
|
|
msResolution: false
|
|
shared: true
|
|
sort: 0
|
|
value_type: cumulative
|
|
type: graph
|
|
xaxis:
|
|
buckets:
|
|
mode: time
|
|
name:
|
|
show: true
|
|
values: []
|
|
yaxes:
|
|
- format: percent
|
|
label: cpu usage
|
|
logBase: 1
|
|
max:
|
|
min: 0
|
|
show: true
|
|
- format: short
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- alerting: {}
|
|
aliasColors: {}
|
|
bars: false
|
|
dashLength: 10
|
|
dashes: false
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
fill: 1
|
|
grid: {}
|
|
id: 9
|
|
legend:
|
|
avg: false
|
|
current: false
|
|
max: false
|
|
min: false
|
|
show: true
|
|
total: false
|
|
values: false
|
|
lines: true
|
|
linewidth: 2
|
|
links: []
|
|
nullPointMode: connected
|
|
percentage: false
|
|
pointradius: 5
|
|
points: false
|
|
renderer: flot
|
|
seriesOverrides: []
|
|
spaceLength: 10
|
|
span: 6
|
|
stack: false
|
|
steppedLine: false
|
|
targets:
|
|
- expr: sum(node_load1)
|
|
intervalFactor: 4
|
|
legendFormat: load 1m
|
|
refId: A
|
|
step: 20
|
|
target: ''
|
|
- expr: sum(node_load5)
|
|
intervalFactor: 4
|
|
legendFormat: load 5m
|
|
refId: B
|
|
step: 20
|
|
target: ''
|
|
- expr: sum(node_load15)
|
|
intervalFactor: 4
|
|
legendFormat: load 15m
|
|
refId: C
|
|
step: 20
|
|
target: ''
|
|
thresholds: []
|
|
timeFrom:
|
|
timeShift:
|
|
title: System load
|
|
tooltip:
|
|
msResolution: false
|
|
shared: true
|
|
sort: 0
|
|
value_type: cumulative
|
|
type: graph
|
|
xaxis:
|
|
buckets:
|
|
mode: time
|
|
name:
|
|
show: true
|
|
values: []
|
|
yaxes:
|
|
- format: percentunit
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- format: short
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
repeat:
|
|
repeatIteration:
|
|
repeatRowId:
|
|
showTitle: false
|
|
title: New row
|
|
titleSize: h6
|
|
- collapse: false
|
|
height: 250px
|
|
panels:
|
|
- alerting: {}
|
|
aliasColors: {}
|
|
bars: false
|
|
dashLength: 10
|
|
dashes: false
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
fill: 1
|
|
grid: {}
|
|
id: 4
|
|
legend:
|
|
avg: false
|
|
current: false
|
|
max: false
|
|
min: false
|
|
show: true
|
|
total: false
|
|
values: false
|
|
lines: true
|
|
linewidth: 2
|
|
links: []
|
|
nullPointMode: connected
|
|
percentage: false
|
|
pointradius: 5
|
|
points: false
|
|
renderer: flot
|
|
seriesOverrides:
|
|
- alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"}
|
|
yaxis: 2
|
|
spaceLength: 10
|
|
span: 9
|
|
stack: true
|
|
steppedLine: false
|
|
targets:
|
|
- expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
|
|
- sum(node_memory_Cached)
|
|
intervalFactor: 2
|
|
legendFormat: memory usage
|
|
metric: memo
|
|
refId: A
|
|
step: 10
|
|
target: ''
|
|
- expr: sum(node_memory_Buffers)
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: memory buffers
|
|
metric: memo
|
|
refId: B
|
|
step: 10
|
|
target: ''
|
|
- expr: sum(node_memory_Cached)
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: memory cached
|
|
metric: memo
|
|
refId: C
|
|
step: 10
|
|
target: ''
|
|
- expr: sum(node_memory_MemFree)
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: memory free
|
|
metric: memo
|
|
refId: D
|
|
step: 10
|
|
target: ''
|
|
thresholds: []
|
|
timeFrom:
|
|
timeShift:
|
|
title: Memory usage
|
|
tooltip:
|
|
msResolution: false
|
|
shared: true
|
|
sort: 0
|
|
value_type: individual
|
|
type: graph
|
|
xaxis:
|
|
buckets:
|
|
mode: time
|
|
name:
|
|
show: true
|
|
values: []
|
|
yaxes:
|
|
- format: bytes
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min: '0'
|
|
show: true
|
|
- format: short
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 5
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
|
|
- sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100"
|
|
intervalFactor: 2
|
|
metric: ''
|
|
refId: A
|
|
step: 60
|
|
target: ''
|
|
thresholds: 80, 90
|
|
title: Memory usage
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: avg
|
|
repeat:
|
|
repeatIteration:
|
|
repeatRowId:
|
|
showTitle: false
|
|
title: New row
|
|
titleSize: h6
|
|
- collapse: false
|
|
height: 246
|
|
panels:
|
|
- alerting: {}
|
|
aliasColors: {}
|
|
bars: false
|
|
dashLength: 10
|
|
dashes: false
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
fill: 1
|
|
grid: {}
|
|
id: 6
|
|
legend:
|
|
avg: false
|
|
current: false
|
|
max: false
|
|
min: false
|
|
show: true
|
|
total: false
|
|
values: false
|
|
lines: true
|
|
linewidth: 2
|
|
links: []
|
|
nullPointMode: connected
|
|
percentage: false
|
|
pointradius: 5
|
|
points: false
|
|
renderer: flot
|
|
seriesOverrides:
|
|
- alias: read
|
|
yaxis: 1
|
|
- alias: '{instance="172.17.0.1:9100"}'
|
|
yaxis: 2
|
|
- alias: io time
|
|
yaxis: 2
|
|
spaceLength: 10
|
|
span: 9
|
|
stack: false
|
|
steppedLine: false
|
|
targets:
|
|
- expr: sum(rate(node_disk_bytes_read[5m]))
|
|
hide: false
|
|
intervalFactor: 4
|
|
legendFormat: read
|
|
refId: A
|
|
step: 20
|
|
target: ''
|
|
- expr: sum(rate(node_disk_bytes_written[5m]))
|
|
intervalFactor: 4
|
|
legendFormat: written
|
|
refId: B
|
|
step: 20
|
|
- expr: sum(rate(node_disk_io_time_ms[5m]))
|
|
intervalFactor: 4
|
|
legendFormat: io time
|
|
refId: C
|
|
step: 20
|
|
thresholds: []
|
|
timeFrom:
|
|
timeShift:
|
|
title: Disk I/O
|
|
tooltip:
|
|
msResolution: false
|
|
shared: true
|
|
sort: 0
|
|
value_type: cumulative
|
|
type: graph
|
|
xaxis:
|
|
buckets:
|
|
mode: time
|
|
name:
|
|
show: true
|
|
values: []
|
|
yaxes:
|
|
- format: bytes
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- format: ms
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
format: percentunit
|
|
gauge:
|
|
maxValue: 1
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 12
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"}))
|
|
/ sum(node_filesystem_size{device!="rootfs"})
|
|
intervalFactor: 2
|
|
refId: A
|
|
step: 60
|
|
target: ''
|
|
thresholds: 0.75, 0.9
|
|
title: Disk space usage
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: current
|
|
repeat:
|
|
repeatIteration:
|
|
repeatRowId:
|
|
showTitle: false
|
|
title: New row
|
|
titleSize: h6
|
|
- collapse: false
|
|
height: 250px
|
|
panels:
|
|
- alerting: {}
|
|
aliasColors: {}
|
|
bars: false
|
|
dashLength: 10
|
|
dashes: false
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
fill: 1
|
|
grid: {}
|
|
id: 8
|
|
legend:
|
|
avg: false
|
|
current: false
|
|
max: false
|
|
min: false
|
|
show: true
|
|
total: false
|
|
values: false
|
|
lines: true
|
|
linewidth: 2
|
|
links: []
|
|
nullPointMode: connected
|
|
percentage: false
|
|
pointradius: 5
|
|
points: false
|
|
renderer: flot
|
|
seriesOverrides:
|
|
- alias: 'transmitted '
|
|
yaxis: 2
|
|
spaceLength: 10
|
|
span: 6
|
|
stack: false
|
|
steppedLine: false
|
|
targets:
|
|
- expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m]))
|
|
hide: false
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 10
|
|
target: ''
|
|
thresholds: []
|
|
timeFrom:
|
|
timeShift:
|
|
title: Network received
|
|
tooltip:
|
|
msResolution: false
|
|
shared: true
|
|
sort: 0
|
|
value_type: cumulative
|
|
type: graph
|
|
xaxis:
|
|
buckets:
|
|
mode: time
|
|
name:
|
|
show: true
|
|
values: []
|
|
yaxes:
|
|
- format: bytes
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- format: bytes
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- alerting: {}
|
|
aliasColors: {}
|
|
bars: false
|
|
dashLength: 10
|
|
dashes: false
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
fill: 1
|
|
grid: {}
|
|
id: 10
|
|
legend:
|
|
avg: false
|
|
current: false
|
|
max: false
|
|
min: false
|
|
show: true
|
|
total: false
|
|
values: false
|
|
lines: true
|
|
linewidth: 2
|
|
links: []
|
|
nullPointMode: connected
|
|
percentage: false
|
|
pointradius: 5
|
|
points: false
|
|
renderer: flot
|
|
seriesOverrides:
|
|
- alias: 'transmitted '
|
|
yaxis: 2
|
|
spaceLength: 10
|
|
span: 6
|
|
stack: false
|
|
steppedLine: false
|
|
targets:
|
|
- expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m]))
|
|
hide: false
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: B
|
|
step: 10
|
|
target: ''
|
|
thresholds: []
|
|
timeFrom:
|
|
timeShift:
|
|
title: Network transmitted
|
|
tooltip:
|
|
msResolution: false
|
|
shared: true
|
|
sort: 0
|
|
value_type: cumulative
|
|
type: graph
|
|
xaxis:
|
|
buckets:
|
|
mode: time
|
|
name:
|
|
show: true
|
|
values: []
|
|
yaxes:
|
|
- format: bytes
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- format: bytes
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
repeat:
|
|
repeatIteration:
|
|
repeatRowId:
|
|
showTitle: false
|
|
title: New row
|
|
titleSize: h6
|
|
- collapse: false
|
|
height: 276
|
|
panels:
|
|
- aliasColors: {}
|
|
bars: false
|
|
dashLength: 10
|
|
dashes: false
|
|
datasource: "${DS_PROMETHEUS}"
|
|
fill: 1
|
|
id: 11
|
|
legend:
|
|
avg: false
|
|
current: false
|
|
max: false
|
|
min: false
|
|
show: true
|
|
total: false
|
|
values: false
|
|
lines: true
|
|
linewidth: 1
|
|
links: []
|
|
nullPointMode: 'null'
|
|
percentage: false
|
|
pointradius: 5
|
|
points: false
|
|
renderer: flot
|
|
seriesOverrides: []
|
|
spaceLength: 10
|
|
span: 9
|
|
stack: false
|
|
steppedLine: false
|
|
targets:
|
|
- expr: sum(kube_pod_info)
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: Current number of Pods
|
|
refId: A
|
|
step: 10
|
|
- expr: sum(kube_node_status_capacity_pods)
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: Maximum capacity of pods
|
|
refId: B
|
|
step: 10
|
|
thresholds: []
|
|
timeFrom:
|
|
timeShift:
|
|
title: Cluster Pod Utilization
|
|
tooltip:
|
|
shared: true
|
|
sort: 0
|
|
value_type: individual
|
|
type: graph
|
|
xaxis:
|
|
buckets:
|
|
mode: time
|
|
name:
|
|
show: true
|
|
values: []
|
|
yaxes:
|
|
- format: short
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- format: short
|
|
label:
|
|
logBase: 1
|
|
max:
|
|
min:
|
|
show: true
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
editable: true
|
|
error: false
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 7
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods)
|
|
* 100
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 60
|
|
target: ''
|
|
thresholds: '80,90'
|
|
title: Pod Utilization
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: current
|
|
repeat:
|
|
repeatIteration:
|
|
repeatRowId:
|
|
showTitle: false
|
|
title: Dashboard Row
|
|
titleSize: h6
|
|
schemaVersion: 14
|
|
style: dark
|
|
tags: []
|
|
templating:
|
|
list:
|
|
- current:
|
|
text: Prometheus
|
|
value: Prometheus
|
|
hide: 0
|
|
label: Prometheus datasource
|
|
name: DS_PROMETHEUS
|
|
options: []
|
|
query: prometheus
|
|
refresh: 1
|
|
regex: ''
|
|
type: datasource
|
|
time:
|
|
from: now-1h
|
|
to: now
|
|
timepicker:
|
|
refresh_intervals:
|
|
- 5s
|
|
- 10s
|
|
- 30s
|
|
- 1m
|
|
- 5m
|
|
- 15m
|
|
- 30m
|
|
- 1h
|
|
- 2h
|
|
- 1d
|
|
time_options:
|
|
- 5m
|
|
- 15m
|
|
- 1h
|
|
- 6h
|
|
- 12h
|
|
- 24h
|
|
- 2d
|
|
- 7d
|
|
- 30d
|
|
timezone: browser
|
|
title: Kubernetes Capacity Planning
|
|
version: 4
|
|
inputs:
|
|
- name: prometheus
|
|
pluginId: prometheus
|
|
type: datasource
|
|
value: prometheus
|
|
overwrite: true
|
|
kubernetes_cluster_status:
|
|
__inputs:
|
|
- name: prometheus
|
|
label: prometheus
|
|
description: ''
|
|
type: datasource
|
|
pluginId: prometheus
|
|
pluginName: Prometheus
|
|
__requires:
|
|
- type: grafana
|
|
id: grafana
|
|
name: Grafana
|
|
version: 4.4.1
|
|
- type: datasource
|
|
id: prometheus
|
|
name: Prometheus
|
|
version: 1.0.0
|
|
- type: panel
|
|
id: singlestat
|
|
name: Singlestat
|
|
version: ''
|
|
annotations:
|
|
list: []
|
|
editable: true
|
|
gnetId:
|
|
graphTooltip: 0
|
|
hideControls: false
|
|
id:
|
|
links: []
|
|
rows:
|
|
- collapse: false
|
|
height: 129
|
|
panels:
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: true
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
format: none
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: false
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 5
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 6
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0)
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '1,3'
|
|
title: Control Plane UP
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: UP
|
|
value: 'null'
|
|
valueName: total
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: true
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
format: none
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: false
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 6
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 6
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"})
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '3,5'
|
|
title: Alerts Firing
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: '0'
|
|
value: 'null'
|
|
valueName: current
|
|
repeat:
|
|
repeatIteration:
|
|
repeatRowId:
|
|
showTitle: true
|
|
title: Cluster Health
|
|
titleSize: h6
|
|
- collapse: false
|
|
height: 168
|
|
panels:
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(245, 54, 54, 0.9)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(50, 172, 45, 0.97)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
decimals:
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 1
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '50,80'
|
|
title: API Servers UP
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: current
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(245, 54, 54, 0.9)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(50, 172, 45, 0.97)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
decimals:
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 2
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"}))
|
|
* 100
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '50,80'
|
|
title: Controller Managers UP
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: current
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(245, 54, 54, 0.9)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(50, 172, 45, 0.97)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
decimals:
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 3
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"}))
|
|
* 100
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '50,80'
|
|
title: Schedulers UP
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: current
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: true
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
decimals:
|
|
format: none
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: false
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
hideTimeOverride: false
|
|
id: 4
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h])
|
|
> 5)
|
|
format: time_series
|
|
interval: ''
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '1,3'
|
|
title: Crashlooping Control Plane Pods
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: '0'
|
|
value: 'null'
|
|
valueName: current
|
|
repeat:
|
|
repeatIteration:
|
|
repeatRowId:
|
|
showTitle: true
|
|
title: Control Plane Status
|
|
titleSize: h6
|
|
- collapse: false
|
|
height: 158
|
|
panels:
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 8
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m]))
|
|
* 100)) / count(node_cpu{job="node-exporter",mode="idle"})
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '80,90'
|
|
title: CPU Utilization
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: avg
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 7
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
|
|
- sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100"
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '80,90'
|
|
title: Memory Utilization
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: avg
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 9
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"}))
|
|
/ sum(node_filesystem_size{device!="rootfs"})
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '80,90'
|
|
title: Filesystem Utilization
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: avg
|
|
- cacheTimeout:
|
|
colorBackground: false
|
|
colorValue: false
|
|
colors:
|
|
- rgba(50, 172, 45, 0.97)
|
|
- rgba(237, 129, 40, 0.89)
|
|
- rgba(245, 54, 54, 0.9)
|
|
datasource: "${DS_PROMETHEUS}"
|
|
format: percent
|
|
gauge:
|
|
maxValue: 100
|
|
minValue: 0
|
|
show: true
|
|
thresholdLabels: false
|
|
thresholdMarkers: true
|
|
id: 10
|
|
interval:
|
|
links: []
|
|
mappingType: 1
|
|
mappingTypes:
|
|
- name: value to text
|
|
value: 1
|
|
- name: range to text
|
|
value: 2
|
|
maxDataPoints: 100
|
|
nullPointMode: connected
|
|
nullText:
|
|
postfix: ''
|
|
postfixFontSize: 50%
|
|
prefix: ''
|
|
prefixFontSize: 50%
|
|
rangeMaps:
|
|
- from: 'null'
|
|
text: N/A
|
|
to: 'null'
|
|
span: 3
|
|
sparkline:
|
|
fillColor: rgba(31, 118, 189, 0.18)
|
|
full: false
|
|
lineColor: rgb(31, 120, 193)
|
|
show: false
|
|
tableColumn: ''
|
|
targets:
|
|
- expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods)
|
|
* 100
|
|
format: time_series
|
|
intervalFactor: 2
|
|
legendFormat: ''
|
|
refId: A
|
|
step: 600
|
|
thresholds: '80,90'
|
|
title: Pod Utilization
|
|
type: singlestat
|
|
valueFontSize: 80%
|
|
valueMaps:
|
|
- op: "="
|
|
text: N/A
|
|
value: 'null'
|
|
valueName: avg
|
|
repeat:
|
|
repeatIteration:
|
|
repeatRowId:
|
|
showTitle: true
|
|
title: Capacity Planing
|
|
titleSize: h6
|
|
schemaVersion: 14
|
|
style: dark
|
|
tags: []
|
|
templating:
|
|
list:
|
|
- current:
|
|
text: Prometheus
|
|
value: Prometheus
|
|
hide: 0
|
|
label: Prometheus datasource
|
|
name: DS_PROMETHEUS
|
|
options: []
|
|
query: prometheus
|
|
refresh: 1
|
|
regex: ''
|
|
type: datasource
|
|
time:
|
|
from: now-6h
|
|
to: now
|
|
timepicker:
|
|
refresh_intervals:
|
|
- 5s
|
|
- 10s
|
|
- 30s
|
|
- 1m
|
|
- 5m
|
|
- 15m
|
|
- 30m
|
|
- 1h
|
|
- 2h
|
|
- 1d
|
|
time_options:
|
|
- 5m
|
|
- 15m
|
|
- 1h
|
|
- 6h
|
|
- 12h
|
|
- 24h
|
|
- 2d
|
|
- 7d
|
|
- 30d
|
|
timezone: ''
|
|
title: Kubernetes Cluster Status
|
|
version: 3
|
|
inputs:
|
|
- name: prometheus
|
|
pluginId: prometheus
|
|
type: datasource
|
|
value: prometheus
|
|
overwrite: true
|