Steve Wilkerson a1f608ed74 Add Grafana chart to OSH infra
Moves the grafana chart to OSH infra along with basic rbac rules
that may be tightened with future work.

Change-Id: Ie14627530a73d4b7b01eb93ca5f7174d99d9caec
2018-01-04 09:10:55 -06:00

11974 lines
302 KiB
YAML

# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for grafana
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
images:
tags:
grafana: docker.io/grafana/grafana:4.5.2
datasource: docker.io/kolla/ubuntu-source-heat-engine:3.0.3
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.2.1
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
jobs:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
mounts:
grafana:
init_container: null
grafana:
replicas:
grafana: 1
lifecycle:
upgrades:
deployments:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
grafana:
timeout: 600
resources:
enabled: false
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
bootstrap:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
grafana:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
endpoints:
cluster_domain_suffix: cluster.local
grafana:
name: grafana
namespace: null
auth:
admin:
username: admin
password: admin
hosts:
default: grafana-dashboard
public: grafana
host_fqdn_override:
default: null
path:
default: null
scheme:
default: http
port:
grafana:
default: 3000
monitoring:
name: prometheus
namespace: null
hosts:
default: prom-metrics
public: prometheus
host_fqdn_override:
default: null
path:
default: null
scheme:
default: http
port:
api:
default: 9090
public: 80
dependencies:
register_datasource:
jobs:
services:
- service: grafana
endpoint: internal
grafana:
services: null
image_repo_sync:
services:
- service: local_image_registry
endpoint: internal
conditional_dependencies:
local_image_registry:
jobs:
- grafana-image-repo-sync
services:
- service: local_image_registry
endpoint: node
network:
grafana:
port: 3000
node_port:
enabled: false
port: 30902
ingress:
public: true
proxy_body_size: 1024M
manifests:
configmap_bin: true
configmap_dashboards: true
configmap_etc: true
deployment: true
ingress: true
job_datasource: true
job_image_repo_sync: true
secret_admin: true
service: true
service_ingress: true
conf:
datasource:
name: prometheus
type: prometheus
database:
access: proxy
isDefault: true
grafana:
paths:
data: /var/lib/grafana/data
plugins: /var/lib/grafana/plugins
server:
protocol: http
http_port: 3000
session:
provider: file
provider_config: sessions
cookie_name: grafana_sess
cookie_secure: false
session_life_time: 86400
security:
admin_user: ${GF_SECURITY_ADMIN_USER}
admin_password: ${GF_SECURITY_ADMIN_PASSWORD}
cookie_username: grafana_user
cookie_remember_name: grafana_remember
login_remember_days: 7
users:
allow_sign_up: false
allow_org_create: false
auto_assign_org: true
auto_assign_org_role: Admin
default_theme: dark
log:
mode: console
level: info
log.console:
level: info
format: console
dashboards.json:
enabled: true
path: /var/lib/grafana/dashboards
grafana_net:
url: https://grafana.net
dashboards:
ceph_cluster:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: graph
name: Graph
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - Cluster
tags:
- ceph
- cluster
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 150px
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 21
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_health_status)
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
title: Status
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
- op: "="
text: WARNING
value: '0'
- op: "="
text: HEALTHY
value: '1'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 14
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_monitor_quorum_count
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '2,3'
title: Monitors In Quorum
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 22
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: count(ceph_pool_available_bytes)
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: Pools
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 33
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_cluster_capacity_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: 0.025,0.1
title: Cluster Capacity
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 34
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_cluster_used_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: 0.025,0.1
title: Used Capacity
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 23
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_cluster_available_bytes/ceph_cluster_capacity_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '70,80'
title: Available Capacity
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 100px
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 26
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osds_in
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: OSDs IN
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 40, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 27
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osds - ceph_osds_in
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '1,1'
title: OSDs OUT
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 28
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum(ceph_osd_up)
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: ''
title: OSDs UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 40, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 29
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osds_down
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '1,1'
title: OSDs DOWN
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 30
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: avg(ceph_osd_pgs)
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '250,300'
title: Agerage PGs per OSD
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: s
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 31
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: avg(ceph_osd_perf_apply_latency_seconds)
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: 0.01,0.05
title: Agerage OSD Apply Latency
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: s
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 32
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: avg(ceph_osd_perf_commit_latency_seconds)
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: 0.01,0.05
title: Agerage OSD Commit Latency
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: s
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 24
interval: 1m
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
repeat:
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: true
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: avg(ceph_monitor_latency_seconds)
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '70,80'
title: Average Monitor Latency
transparent: false
type: singlestat
valueFontSize: 100%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 250px
panels:
- aliasColors:
Available: "#EAB839"
Total Capacity: "#447EBC"
Used: "#BF1B00"
total_avail: "#6ED0E0"
total_space: "#7EB26D"
total_used: "#890F02"
bars: false
datasource: prometheus
editable: true
error: false
fill: 4
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: '300'
id: 1
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 0
links: []
minSpan:
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: Total Capacity
fill: 0
linewidth: 3
stack: false
span: 4
stack: true
steppedLine: false
targets:
- expr: ceph_cluster_available_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Available
refId: A
step: 60
- expr: ceph_cluster_used_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Used
refId: B
step: 60
- expr: ceph_cluster_capacity_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Total Capacity
refId: C
step: 60
timeFrom:
timeShift:
title: Capacity
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors:
Total Capacity: "#7EB26D"
Used: "#BF1B00"
total_avail: "#6ED0E0"
total_space: "#7EB26D"
total_used: "#890F02"
bars: false
datasource: prometheus
decimals: 0
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
height: '300'
id: 3
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
minSpan:
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: true
steppedLine: false
targets:
- expr: ceph_client_io_write_ops
interval: "$interval"
intervalFactor: 1
legendFormat: Write
refId: A
step: 60
- expr: ceph_client_io_read_ops
interval: "$interval"
intervalFactor: 1
legendFormat: Read
refId: B
step: 60
timeFrom:
timeShift:
title: IOPS
tooltip:
msResolution: true
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: none
label: ''
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: '300'
id: 7
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: true
steppedLine: false
targets:
- expr: ceph_client_io_write_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Write
refId: A
step: 60
- expr: ceph_client_io_read_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Read
refId: B
step: 60
timeFrom:
timeShift:
title: Throughput
tooltip:
msResolution: false
shared: true
sort: 2
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
repeat:
showTitle: true
title: CLUSTER
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 18
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 12
stack: true
steppedLine: false
targets:
- expr: ceph_cluster_objects
interval: "$interval"
intervalFactor: 1
legendFormat: Total
refId: A
step: 60
- expr: ceph_degraded_objects
interval: "$interval"
intervalFactor: 1
legendFormat: Degraded
refId: B
step: 60
- expr: ceph_misplaced_objects
interval: "$interval"
intervalFactor: 1
legendFormat: Misplaced
refId: C
step: 60
timeFrom:
timeShift:
title: Objects in the Cluster
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 19
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 6
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_pgs)
interval: "$interval"
intervalFactor: 1
legendFormat: Total
refId: A
step: 60
- expr: ceph_degraded_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Degraded
refId: B
step: 60
- expr: ceph_stale_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Stale
refId: C
step: 60
- expr: ceph_unclean_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Unclean
refId: D
step: 60
- expr: ceph_undersized_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Undersized
refId: E
step: 60
- expr: ceph_stuck_degraded_pgs + ceph_stuck_stale_pgs + ceph_stuck_unclean_pgs
+ ceph_stuck_undersized_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Stuck
refId: F
step: 60
timeFrom:
timeShift:
title: PGs
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 20
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
stack: false
span: 6
stack: true
steppedLine: false
targets:
- expr: ceph_stuck_degraded_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Degraded
refId: F
step: 60
- expr: ceph_stuck_stale_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Stale
refId: A
step: 60
- expr: ceph_stuck_unclean_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Unclean
refId: B
step: 60
- expr: ceph_stuck_undersized_pgs
interval: "$interval"
intervalFactor: 1
legendFormat: Undersized
refId: C
step: 60
timeFrom:
timeShift:
title: Stuck PGs
tooltip:
msResolution: false
shared: true
sort: 1
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
title: New row
- collapse: false
editable: true
height: 150px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 15
isNew: true
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: ceph_recovery_io_bytes
interval: "$interval"
intervalFactor: 1
legendFormat: Bytes
refId: A
step: 60
timeFrom:
timeShift:
title: Bytes
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 16
isNew: true
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^.*/"
color: "#E0752D"
span: 4
stack: false
steppedLine: false
targets:
- expr: ceph_recovery_io_keys
interval: "$interval"
intervalFactor: 1
legendFormat: Keys
refId: A
step: 60
timeFrom:
timeShift:
title: Keys
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 17
isNew: true
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^.*$/"
color: "#890F02"
span: 4
stack: false
steppedLine: false
targets:
- expr: ceph_recovery_io_objects
interval: "$interval"
intervalFactor: 1
legendFormat: Objects
refId: A
step: 60
timeFrom:
timeShift:
title: Objects
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
showTitle: true
title: Recovery
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- auto: true
auto_count: 10
auto_min: 1m
current:
tags: []
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
annotations:
list: []
refresh: 1m
schemaVersion: 12
version: 26
links: []
gnetId: 917
description: "Ceph Cluster overview.\r\n"
ceph_osd:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: graph
name: Graph
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - OSD
tags:
- ceph
- osd
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 100px
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 40, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
- from: '0'
text: DOWN
to: '0.99'
- from: '0.99'
text: UP
to: '1'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osd_up{osd="$osd"}
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Status
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 40, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 8
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
- from: '0'
text: OUT
to: '0.99'
- from: '0.99'
text: IN
to: '1'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osd_in{osd="$osd"}
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Available
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
isNew: true
links: []
mappingType: 2
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osds
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: '0,1'
timeFrom:
title: Total OSDs
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: DOWN
value: '0'
- op: "="
text: UP
value: '1'
- op: "="
text: N/A
value: 'null'
valueName: current
title: New row
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1: 250
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2: 300
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: true
id: 5
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Average.*/"
fill: 0
stack: false
span: 10
stack: true
steppedLine: false
targets:
- expr: ceph_osd_pgs{osd=~"$osd"}
interval: "$interval"
intervalFactor: 1
legendFormat: Number of PGs - {{ osd }}
refId: A
step: 60
- expr: avg(ceph_osd_pgs)
interval: "$interval"
intervalFactor: 1
legendFormat: Average Number of PGs in the Cluster
refId: B
step: 60
timeFrom:
timeShift:
title: PGs
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_osd_utilization{osd="$osd"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
refId: A
step: 60
thresholds: '60,80'
timeFrom:
title: Utilization
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: true
title: 'OSD: $osd'
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
id: 4
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: false
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 2
points: true
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: ceph_osd_perf_apply_latency_seconds{osd=~"$osd"}
interval: "$interval"
intervalFactor: 1
legendFormat: Apply Latency (s) - {{ osd }}
refId: A
step: 60
- expr: ceph_osd_perf_commit_latency_seconds{osd=~"$osd"}
interval: "$interval"
intervalFactor: 1
legendFormat: Commit Latency (s) - {{ osd }}
refId: B
step: 60
timeFrom:
timeShift:
title: Latency
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: s
label:
logBase: 1
max:
min: 0
show: true
- format: s
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 2
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: true
steppedLine: false
targets:
- expr: ceph_osd_avail_bytes{osd=~"$osd"}
hide: false
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ osd }}
metric: ceph_osd_avail_bytes
refId: A
step: 60
- expr: ceph_osd_used_bytes{osd=~"$osd"}
interval: "$interval"
intervalFactor: 1
legendFormat: Used - {{ osd }}
metric: ceph_osd_avail_bytes
refId: B
step: 60
timeFrom:
timeShift:
title: OSD Storage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 5
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 9
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: false
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 2
points: true
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: ceph_osd_variance{osd=~"$osd"}
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ osd }}
metric: ceph_osd_avail_bytes
refId: A
step: 60
timeFrom:
timeShift:
title: Utilization Variance
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label:
logBase: 1
max:
min:
show: true
- format: none
label:
logBase: 1
max:
min:
show: true
title: New row
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- auto: true
auto_count: 10
auto_min: 1m
current:
selected: true
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
- current: {}
datasource: prometheus
hide: 0
includeAll: false
label: OSD
multi: false
name: osd
options: []
query: label_values(ceph_osd_up, osd)
refresh: 1
regex: ''
type: query
annotations:
list: []
refresh: 15m
schemaVersion: 12
version: 18
links: []
gnetId: 923
description: CEPH OSD Status.
ceph_pool:
__inputs:
- name: prometheus
label: Prometheus
description: Prometheus.IO
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: graph
name: Graph
version: ''
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
id:
title: Ceph - Pools
tags:
- ceph
- pools
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 4
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 2
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
rightSide: true
show: true
total: false
values: true
lines: true
linewidth: 0
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: "/^Total.*$/"
fill: 0
linewidth: 4
stack: false
- alias: "/^Raw.*$/"
color: "#BF1B00"
fill: 0
linewidth: 4
span: 10
stack: true
steppedLine: false
targets:
- expr: ceph_pool_available_bytes{pool=~"$pool"}
interval: "$interval"
intervalFactor: 1
legendFormat: Avilable - {{ pool }}
metric: ceph_pool_available_bytes
refId: A
step: 60
- expr: ceph_pool_used_bytes{pool=~"$pool"}
interval: "$interval"
intervalFactor: 1
legendFormat: Used - {{ pool }}
metric: ceph_pool
refId: B
step: 60
- expr: ceph_pool_used_bytes{pool=~"$pool"} + ceph_pool_available_bytes{pool=~"$pool"}
interval: "$interval"
intervalFactor: 1
legendFormat: Total - {{ pool }}
metric: ceph_pool
refId: C
step: 60
- expr: ceph_pool_raw_used_bytes{pool=~"$pool"}
interval: "$interval"
intervalFactor: 1
legendFormat: Raw - {{ pool }}
metric: ceph_pool
refId: D
step: 60
timeFrom:
timeShift:
title: Pool Storage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ' ceph_pool_used_bytes{pool="$pool"} / (ceph_pool_available_bytes{pool="$pool"}
+ ceph_pool_used_bytes{pool="$pool"})'
interval: "$interval"
intervalFactor: 1
refId: A
step: 60
thresholds: ''
title: Usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: true
title: 'Pool: $pool'
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 7
isNew: true
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: ceph_pool_objects_total{pool=~"$pool"}
interval: "$interval"
intervalFactor: 1
legendFormat: Objects - {{ pool }}
refId: A
step: 60
- expr: ceph_pool_dirty_objects_total{pool=~"$pool"}
interval: "$interval"
intervalFactor: 1
legendFormat: Dirty Objects - {{ pool }}
refId: B
step: 60
timeFrom:
timeShift:
title: Objects in Pool
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: short
label:
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min: 0
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
id: 4
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: true
steppedLine: false
targets:
- expr: irate(ceph_pool_read_total{pool=~"$pool"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Read - {{ pool }}
refId: B
step: 60
- expr: irate(ceph_pool_write_total{pool=~"$pool"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Write - {{ pool }}
refId: A
step: 60
timeFrom:
timeShift:
title: IOPS
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: none
label: IOPS
logBase: 1
max:
min: 0
show: true
- format: short
label: IOPS
logBase: 1
max:
min: 0
show: false
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 5
interval: "$interval"
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: true
steppedLine: false
targets:
- expr: irate(ceph_pool_read_bytes_total{pool="$pool"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Read Bytes - {{ pool }}
refId: A
step: 60
- expr: irate(ceph_pool_write_bytes_total{pool="$pool"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Written Bytes - {{ pool }}
refId: B
step: 60
timeFrom:
timeShift:
title: Throughput
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
- format: Bps
label:
logBase: 1
max:
min: 0
show: true
title: New row
time:
from: now-3h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- auto: true
auto_count: 10
auto_min: 1m
current:
selected: true
text: 1m
value: 1m
datasource:
hide: 0
includeAll: false
label: Interval
multi: false
name: interval
options:
- selected: false
text: auto
value: "$__auto_interval"
- selected: true
text: 1m
value: 1m
- selected: false
text: 10m
value: 10m
- selected: false
text: 30m
value: 30m
- selected: false
text: 1h
value: 1h
- selected: false
text: 6h
value: 6h
- selected: false
text: 12h
value: 12h
- selected: false
text: 1d
value: 1d
- selected: false
text: 7d
value: 7d
- selected: false
text: 14d
value: 14d
- selected: false
text: 30d
value: 30d
query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d
refresh: 0
type: interval
- current: {}
datasource: prometheus
hide: 0
includeAll: false
label: Pool
multi: false
name: pool
options: []
query: label_values(ceph_pool_objects_total, pool)
refresh: 1
regex: ''
type: query
annotations:
list: []
refresh: 1m
schemaVersion: 12
version: 22
links: []
gnetId: 926
description: Ceph Pools dashboard.
etcd:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId: 3070
graphTooltip: 0
hideControls: false
id:
links: []
rows:
- collapse: false
height: 250
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 44
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: max(etcd_server_has_leader)
format: time_series
intervalFactor: 2
refId: A
step: 600
thresholds: '0,1'
title: Etcd has a leader?
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: 'YES'
value: '1'
- op: "="
text: 'NO'
value: '0'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 42
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: max(etcd_server_leader_changes_seen_total)
format: time_series
intervalFactor: 2
refId: A
step: 600
thresholds: ''
title: The number of leader changes seen
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 43
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: max(etcd_server_leader_changes_seen_total)
format: time_series
intervalFactor: 2
refId: A
step: 600
thresholds: ''
title: The total number of failed proposals seen
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 252
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
id: 23
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(grpc_server_started_total{grpc_type="unary"}[5m]))
format: time_series
intervalFactor: 2
legendFormat: RPC Rate
metric: grpc_server_started_total
refId: A
step: 60
- expr: sum(rate(grpc_server_handled_total{grpc_type="unary",grpc_code!="OK"}[5m]))
format: time_series
intervalFactor: 2
legendFormat: RPC Failed Rate
metric: grpc_server_handled_total
refId: B
step: 60
thresholds: []
timeFrom:
timeShift:
title: RPC Rate
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: ops
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
id: 41
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: true
steppedLine: false
targets:
- expr: sum(grpc_server_started_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
- sum(grpc_server_handled_total{grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})
format: time_series
intervalFactor: 2
legendFormat: Watch Streams
metric: grpc_server_handled_total
refId: A
step: 60
- expr: sum(grpc_server_started_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
- sum(grpc_server_handled_total{grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})
format: time_series
intervalFactor: 2
legendFormat: Lease Streams
metric: grpc_server_handled_total
refId: B
step: 60
thresholds: []
timeFrom:
timeShift:
title: Active Streams
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: ''
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
decimals:
editable: true
error: false
fill: 0
grid: {}
id: 1
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: etcd_debugging_mvcc_db_total_size_in_bytes
format: time_series
hide: false
interval: ''
intervalFactor: 2
legendFormat: "{{instance}} DB Size"
metric: ''
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: DB Size
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
logBase: 1
max:
min:
show: true
- format: short
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 1
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: true
targets:
- expr: histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m]))
by (instance, le))
format: time_series
hide: false
intervalFactor: 2
legendFormat: "{{instance}} WAL fsync"
metric: etcd_disk_wal_fsync_duration_seconds_bucket
refId: A
step: 120
- expr: histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m]))
by (instance, le))
format: time_series
intervalFactor: 2
legendFormat: "{{instance}} DB fsync"
metric: etcd_disk_backend_commit_duration_seconds_bucket
refId: B
step: 120
thresholds: []
timeFrom:
timeShift:
title: Disk Sync Duration
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: s
logBase: 1
max:
min:
show: true
- format: short
logBase: 1
max:
min:
show: false
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
id: 29
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- expr: process_resident_memory_bytes
format: time_series
intervalFactor: 2
legendFormat: "{{instance}} Resident Memory"
metric: process_resident_memory_bytes
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: Memory
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 5
id: 22
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(etcd_network_client_grpc_received_bytes_total[5m])
format: time_series
intervalFactor: 2
legendFormat: "{{instance}} Client Traffic In"
metric: etcd_network_client_grpc_received_bytes_total
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: Client Traffic In
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 5
id: 21
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: true
steppedLine: false
targets:
- expr: rate(etcd_network_client_grpc_sent_bytes_total[5m])
format: time_series
intervalFactor: 2
legendFormat: "{{instance}} Client Traffic Out"
metric: etcd_network_client_grpc_sent_bytes_total
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: Client Traffic Out
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
id: 20
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)
format: time_series
intervalFactor: 2
legendFormat: "{{instance}} Peer Traffic In"
metric: etcd_network_peer_received_bytes_total
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: Peer Traffic In
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
decimals:
editable: true
error: false
fill: 0
grid: {}
id: 16
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 3
stack: false
steppedLine: false
targets:
- expr: sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)
format: time_series
hide: false
interval: ''
intervalFactor: 2
legendFormat: "{{instance}} Peer Traffic Out"
metric: etcd_network_peer_sent_bytes_total
refId: A
step: 120
thresholds: []
timeFrom:
timeShift:
title: Peer Traffic Out
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: Bps
logBase: 1
max:
min:
show: true
- format: short
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
id: 40
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(etcd_server_proposals_failed_total[5m]))
format: time_series
intervalFactor: 2
legendFormat: Proposal Failure Rate
metric: etcd_server_proposals_failed_total
refId: A
step: 60
- expr: sum(etcd_server_proposals_pending)
format: time_series
intervalFactor: 2
legendFormat: Proposal Pending Total
metric: etcd_server_proposals_pending
refId: B
step: 60
- expr: sum(rate(etcd_server_proposals_committed_total[5m]))
format: time_series
intervalFactor: 2
legendFormat: Proposal Commit Rate
metric: etcd_server_proposals_committed_total
refId: C
step: 60
- expr: sum(rate(etcd_server_proposals_applied_total[5m]))
format: time_series
intervalFactor: 2
legendFormat: Proposal Apply Rate
refId: D
step: 60
thresholds: []
timeFrom:
timeShift:
title: Raft Proposals
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label: ''
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
decimals: 0
editable: true
error: false
fill: 0
id: 19
legend:
alignAsTable: false
avg: false
current: false
max: false
min: false
rightSide: false
show: false
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: changes(etcd_server_leader_changes_seen_total[1d])
format: time_series
intervalFactor: 2
legendFormat: "{{instance}} Total Leader Elections Per Day"
metric: etcd_server_leader_changes_seen_total
refId: A
step: 60
thresholds: []
timeFrom:
timeShift:
title: Total Leader Elections Per Day
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
description: |-
proposals_committed_total records the total number of consensus proposals committed. This gauge should increase over time if the cluster is healthy. Several healthy members of an etcd cluster may have different total committed proposals at once. This discrepancy may be due to recovering from peers after starting, lagging behind the leader, or being the leader and therefore having the most commits. It is important to monitor this metric across all the members in the cluster; a consistently large lag between a single member and its leader indicates that member is slow or unhealthy.
proposals_applied_total records the total number of consensus proposals applied. The etcd server applies every committed proposal asynchronously. The difference between proposals_committed_total and proposals_applied_total should usually be small (within a few thousands even under high load). If the difference between them continues to rise, it indicates that the etcd server is overloaded. This might happen when applying expensive queries like heavy range queries or large txn operations.
fill: 1
id: 2
legend:
alignAsTable: true
avg: true
current: true
max: true
min: false
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(etcd_server_proposals_committed_total[5m]))
format: time_series
intervalFactor: 2
legendFormat: total number of consensus proposals committed
metric: ''
refId: A
step: 60
- expr: sum(rate(etcd_server_proposals_applied_total[5m]))
format: time_series
intervalFactor: 2
legendFormat: total number of consensus proposals applied
metric: ''
refId: B
step: 60
thresholds: []
timeFrom:
timeShift:
title: The total number of consensus proposals committed
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label: ''
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
description: indicates how many proposals are queued to commit. Rising pending
proposals suggests there is a high client load or the member cannot commit proposals.
fill: 1
id: 5
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(etcd_server_proposals_pending)
format: time_series
intervalFactor: 2
legendFormat: Proposals pending
refId: A
step: 60
thresholds: []
timeFrom:
timeShift:
title: Proposals pending
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 7
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 12
stack: false
steppedLine: false
targets:
- expr: sum(rate(etcd_disk_wal_fsync_duration_seconds_sum[1m]))
format: time_series
intervalFactor: 2
legendFormat: "\tThe latency distributions of fsync called by wal"
refId: A
step: 30
- expr: sum(rate(etcd_disk_backend_commit_duration_seconds_sum[1m]))
format: time_series
intervalFactor: 2
legendFormat: The latency distributions of commit called by backend
refId: B
step: 30
thresholds: []
timeFrom:
timeShift:
title: Disks operations
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 12
stack: false
steppedLine: false
targets:
- expr: sum(rate(etcd_network_client_grpc_received_bytes_total[1m]))
format: time_series
intervalFactor: 2
legendFormat: The total number of bytes received by grpc clients
refId: A
step: 30
- expr: sum(rate(etcd_network_client_grpc_sent_bytes_total[1m]))
format: time_series
intervalFactor: 2
legendFormat: The total number of bytes sent to grpc clients
refId: B
step: 30
thresholds: []
timeFrom:
timeShift:
title: Network
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
description: Abnormally high snapshot duration (snapshot_save_total_duration_seconds)
indicates disk issues and might cause the cluster to be unstable.
fill: 1
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 12
stack: false
steppedLine: false
targets:
- expr: sum(rate(etcd_debugging_snap_save_total_duration_seconds_sum[1m]))
format: time_series
intervalFactor: 2
legendFormat: The total latency distributions of save called by snapshot
refId: A
step: 30
thresholds: []
timeFrom:
timeShift:
title: Snapshot duration
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-6h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Etcd by Prometheus
version: 2
description: Etcd Dashboard for Prometheus metrics scraper
hosts_containers:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: panel
id: graph
name: Graph
version: ''
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: grafana
id: grafana
name: Grafana
version: 3.1.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.3.0
id:
title: Kubernetes cluster monitoring (via Prometheus)
description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU
/ Memory / Filesystem usage as well as individual pod, containers, systemd services
statistics. Uses cAdvisor metrics only.
tags:
- kubernetes
style: dark
timezone: browser
editable: true
hideControls: false
sharedCrosshair: false
rows:
- collapse: false
editable: true
height: 200px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
thresholdLine: false
height: 200px
id: 32
isNew: true
legend:
alignAsTable: false
avg: true
current: true
max: false
min: false
rightSide: false
show: false
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))
interval: 10s
intervalFactor: 1
legendFormat: Received
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))'
interval: 10s
intervalFactor: 1
legendFormat: Sent
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: Network I/O pressure
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
transparent: false
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: Bps
label:
logBase: 1
max:
min:
show: false
title: Network I/O pressure
- collapse: false
editable: true
height: 250px
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 4
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"})
/ sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) * 100
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: 65, 90
title: Cluster memory usage
transparent: false
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 6
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
/ sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) * 100
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: 65, 90
title: Cluster CPU usage (5m avg)
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
height: 180px
id: 7
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 4
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
/ sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
* 100
interval: 10s
intervalFactor: 1
legendFormat: ''
metric: ''
refId: A
step: 10
thresholds: 65, 90
title: Cluster filesystem usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 9
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 20%
prefix: ''
prefixFontSize: 20%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 10
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 11
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: " cores"
postfixFontSize: 30%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 12
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: " cores"
postfixFontSize: 30%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 13
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Used
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals: 2
editable: true
error: false
format: bytes
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
height: 1px
id: 14
interval:
isNew: true
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 2
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"})
interval: 10s
intervalFactor: 1
refId: A
step: 10
thresholds: ''
title: Total
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
showTitle: false
title: Total usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 17
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ pod_name }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: Pods CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
transparent: false
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
showTitle: false
title: Pods CPU usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 23
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (systemd_service_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "{{ systemd_service_name }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: System services CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: System services CPU usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
height: ''
id: 24
isNew: true
legend:
alignAsTable: true
avg: true
current: true
hideEmpty: false
hideZero: false
max: false
min: false
rightSide: true
show: true
sideWidth:
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: 'pod: {{ pod_name }} | {{ container_name }}'
metric: container_cpu
refId: A
step: 10
- expr: sum (rate (container_cpu_usage_seconds_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})'
metric: container_cpu
refId: B
step: 10
- expr: sum (rate (container_cpu_usage_seconds_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)
interval: 10s
intervalFactor: 1
legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}'
metric: container_cpu
refId: C
step: 10
timeFrom:
timeShift:
title: Containers CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers CPU usage
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 3
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 20
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (rate (container_cpu_usage_seconds_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "{{ id }}"
metric: container_cpu
refId: A
step: 10
timeFrom:
timeShift:
title: All processes CPU usage (5m avg)
tooltip:
msResolution: true
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: none
label: cores
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
repeat:
showTitle: false
title: All processes CPU usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 25
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"})
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ pod_name }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: Pods memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Pods memory usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 26
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"})
by (systemd_service_name)
interval: 10s
intervalFactor: 1
legendFormat: "{{ systemd_service_name }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: System services memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: System services memory usage
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 27
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"})
by (container_name, pod_name)
interval: 10s
intervalFactor: 1
legendFormat: 'pod: {{ pod_name }} | {{ container_name }}'
metric: container_memory_usage:sort_desc
refId: A
step: 10
- expr: sum (container_memory_working_set_bytes{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"})
by (kubernetes_io_hostname, name, image)
interval: 10s
intervalFactor: 1
legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})'
metric: container_memory_usage:sort_desc
refId: B
step: 10
- expr: sum (container_memory_working_set_bytes{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"})
by (kubernetes_io_hostname, rkt_container_name)
interval: 10s
intervalFactor: 1
legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}'
metric: container_memory_usage:sort_desc
refId: C
step: 10
timeFrom:
timeShift:
title: Containers memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers memory usage
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 0
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 28
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: true
targets:
- expr: sum (container_memory_working_set_bytes{id!="/",kubernetes_io_hostname=~"^$Node$"})
by (id)
interval: 10s
intervalFactor: 1
legendFormat: "{{ id }}"
metric: container_memory_usage:sort_desc
refId: A
step: 10
timeFrom:
timeShift:
title: All processes memory usage
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: All processes memory usage
- collapse: false
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 16
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)
interval: 10s
intervalFactor: 1
legendFormat: "-> {{ pod_name }}"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (pod_name)'
interval: 10s
intervalFactor: 1
legendFormat: "<- {{ pod_name }}"
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: Pods network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Pods network I/O
- collapse: true
editable: true
height: 250px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 30
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: true
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> pod: {{ pod_name }} | {{ container_name }}"
metric: network
refId: B
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (container_name, pod_name)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- pod: {{ pod_name }} | {{ container_name }}"
metric: network
refId: D
step: 10
- expr: sum (rate (container_network_receive_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name
}})"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, name, image)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name
}})"
metric: network
refId: C
step: 10
- expr: sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name
}}"
metric: network
refId: E
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (kubernetes_io_hostname, rkt_container_name)'
hide: false
interval: 10s
intervalFactor: 1
legendFormat: "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name
}}"
metric: network
refId: F
step: 10
timeFrom:
timeShift:
title: Containers network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: Containers network I/O
- collapse: true
editable: true
height: 500px
panels:
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 2
editable: true
error: false
fill: 1
grid:
threshold1:
threshold1Color: rgba(216, 200, 27, 0.27)
threshold2:
threshold2Color: rgba(234, 112, 112, 0.22)
id: 29
isNew: true
legend:
alignAsTable: true
avg: true
current: true
max: false
min: false
rightSide: false
show: true
sideWidth: 200
sort: current
sortDesc: true
total: false
values: true
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 12
stack: false
steppedLine: false
targets:
- expr: sum (rate (container_network_receive_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)
interval: 10s
intervalFactor: 1
legendFormat: "-> {{ id }}"
metric: network
refId: A
step: 10
- expr: '- sum (rate (container_network_transmit_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m]))
by (id)'
interval: 10s
intervalFactor: 1
legendFormat: "<- {{ id }}"
metric: network
refId: B
step: 10
timeFrom:
timeShift:
title: All processes network I/O (5m avg)
tooltip:
msResolution: false
shared: true
sort: 2
value_type: cumulative
type: graph
xaxis:
show: true
yaxes:
- format: Bps
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: false
title: All processes network I/O
time:
from: now-5m
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
templating:
list:
- allValue: ".*"
current: {}
datasource: prometheus
hide: 0
includeAll: true
multi: false
name: Node
options: []
query: label_values(kubernetes_io_hostname)
refresh: 1
type: query
annotations:
list: []
refresh: 10s
schemaVersion: 12
version: 13
links: []
gnetId: 315
rabbitmq:
__inputs:
- name: prometheus
label: Prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.2.0
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId: 2121
graphTooltip: 0
hideControls: false
id:
links: []
refresh: 5s
rows:
- collapse: false
height: 266
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 13
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: rabbitmq_up
intervalFactor: 2
metric: rabbitmq_up
refId: A
step: 2
thresholds: Up,Down
timeFrom: 30s
title: RabbitMQ Server
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
- op: "="
text: Down
value: '0'
- op: "="
text: Up
value: '1'
valueName: current
- alert:
conditions:
- evaluator:
params:
- 1
type: lt
operator:
type: and
query:
params:
- A
- 10s
- now
reducer:
params: []
type: last
type: query
- evaluator:
params: []
type: no_value
operator:
type: and
query:
params:
- A
- 10s
- now
reducer:
params: []
type: last
type: query
executionErrorState: alerting
frequency: 60s
handler: 1
message: Some of the RabbitMQ node is down
name: Node Stats alert
noDataState: no_data
notifications: []
aliasColors: {}
bars: true
datasource: prometheus
decimals: 0
fill: 1
id: 12
legend:
alignAsTable: true
avg: false
current: true
max: false
min: false
show: true
total: false
values: true
lines: false
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 9
stack: false
steppedLine: false
targets:
- expr: rabbitmq_running
intervalFactor: 2
legendFormat: "{{node}}"
metric: rabbitmq_running
refId: A
step: 2
thresholds:
- colorMode: critical
fill: true
line: true
op: lt
value: 1
timeFrom: 30s
timeShift:
title: Node up Stats
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 6
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_exchangesTotal
intervalFactor: 2
legendFormat: "{{instance}}:exchanges"
metric: rabbitmq_exchangesTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Exchanges
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 4
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_channelsTotal
intervalFactor: 2
legendFormat: "{{instance}}:channels"
metric: rabbitmq_channelsTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Channels
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 3
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_consumersTotal
intervalFactor: 2
legendFormat: "{{instance}}:consumers"
metric: rabbitmq_consumersTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Consumers
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 5
legend:
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_connectionsTotal
intervalFactor: 2
legendFormat: "{{instance}}:connections"
metric: rabbitmq_connectionsTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Connections
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 7
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 4
stack: false
steppedLine: false
targets:
- expr: rabbitmq_queuesTotal
intervalFactor: 2
legendFormat: "{{instance}}:queues"
metric: rabbitmq_queuesTotal
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Queues
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 8
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: sum by (vhost)(rabbitmq_queue_messages_ready)
intervalFactor: 2
legendFormat: "{{vhost}}:ready"
metric: rabbitmq_queue_messages_ready
refId: A
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_published_total)
intervalFactor: 2
legendFormat: "{{vhost}}:published"
metric: rabbitmq_queue_messages_published_total
refId: B
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_delivered_total)
intervalFactor: 2
legendFormat: "{{vhost}}:delivered"
metric: rabbitmq_queue_messages_delivered_total
refId: C
step: 2
- expr: sum by (vhost)(rabbitmq_queue_messages_unacknowledged)
intervalFactor: 2
legendFormat: "{{vhost}}:unack"
metric: ack
refId: D
step: 2
thresholds: []
timeFrom:
timeShift:
title: Messages/host
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
decimals: 0
fill: 1
id: 2
legend:
alignAsTable: true
avg: false
current: true
max: false
min: false
rightSide: false
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_queue_messages
intervalFactor: 2
legendFormat: "{{queue}}:{{durable}}"
metric: rabbitmq_queue_messages
refId: A
step: 2
thresholds: []
timeFrom:
timeShift:
title: Messages / Queue
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 9
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_node_mem_used
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: rabbitmq_node_mem_used
refId: A
step: 2
- expr: rabbitmq_node_mem_limit
intervalFactor: 2
legendFormat: "{{node}}:limit"
metric: node_mem
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: Memory
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: decbytes
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 10
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_fd_used
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: ''
refId: A
step: 2
- expr: rabbitmq_fd_total
intervalFactor: 2
legendFormat: "{{node}}:total"
metric: node_mem
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: FIle descriptors
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
datasource: prometheus
fill: 1
id: 11
legend:
alignAsTable: true
avg: true
current: true
max: true
min: true
show: true
total: false
values: true
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
span: 6
stack: false
steppedLine: false
targets:
- expr: rabbitmq_sockets_used
intervalFactor: 2
legendFormat: "{{node}}:used"
metric: ''
refId: A
step: 2
- expr: rabbitmq_sockets_total
intervalFactor: 2
legendFormat: "{{node}}:total"
metric: ''
refId: B
step: 2
thresholds: []
timeFrom:
timeShift:
title: Sockets
tooltip:
shared: true
sort: 0
value_type: individual
transparent: false
type: graph
xaxis:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list:
- current:
tags: []
text: Prometheus
value: Prometheus
hide: 0
label:
name: datasource
options: []
query: prometheus
refresh: 1
regex: ''
type: datasource
time:
from: now-5m
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: RabbitMQ Metrics
version: 17
description: 'Basic rabbitmq host stats: Node Stats, Exchanges, Channels, Consumers, Connections,
Queues, Messages, Messages per Queue, Memory, File Descriptors, Sockets.'
kubernetes_capacity_planning:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
description: ''
editable: true
gnetId: 22
graphTooltip: 0
hideControls: false
id:
links: []
refresh: false
rows:
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100
hide: false
intervalFactor: 10
legendFormat: ''
refId: A
step: 50
thresholds: []
timeFrom:
timeShift:
title: Idle cpu
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percent
label: cpu usage
logBase: 1
max:
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(node_load1)
intervalFactor: 4
legendFormat: load 1m
refId: A
step: 20
target: ''
- expr: sum(node_load5)
intervalFactor: 4
legendFormat: load 5m
refId: B
step: 20
target: ''
- expr: sum(node_load15)
intervalFactor: 4
legendFormat: load 15m
refId: C
step: 20
target: ''
thresholds: []
timeFrom:
timeShift:
title: System load
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percentunit
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 4
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"}
yaxis: 2
spaceLength: 10
span: 9
stack: true
steppedLine: false
targets:
- expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)
intervalFactor: 2
legendFormat: memory usage
metric: memo
refId: A
step: 10
target: ''
- expr: sum(node_memory_Buffers)
interval: ''
intervalFactor: 2
legendFormat: memory buffers
metric: memo
refId: B
step: 10
target: ''
- expr: sum(node_memory_Cached)
interval: ''
intervalFactor: 2
legendFormat: memory cached
metric: memo
refId: C
step: 10
target: ''
- expr: sum(node_memory_MemFree)
interval: ''
intervalFactor: 2
legendFormat: memory free
metric: memo
refId: D
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Memory usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100"
intervalFactor: 2
metric: ''
refId: A
step: 60
target: ''
thresholds: 80, 90
title: Memory usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 246
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: read
yaxis: 1
- alias: '{instance="172.17.0.1:9100"}'
yaxis: 2
- alias: io time
yaxis: 2
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_disk_bytes_read[5m]))
hide: false
intervalFactor: 4
legendFormat: read
refId: A
step: 20
target: ''
- expr: sum(rate(node_disk_bytes_written[5m]))
intervalFactor: 4
legendFormat: written
refId: B
step: 20
- expr: sum(rate(node_disk_io_time_ms[5m]))
intervalFactor: 4
legendFormat: io time
refId: C
step: 20
thresholds: []
timeFrom:
timeShift:
title: Disk I/O
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: ms
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 12
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"}))
/ sum(node_filesystem_size{device!="rootfs"})
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 0.75, 0.9
title: Disk space usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m]))
hide: false
intervalFactor: 2
legendFormat: ''
refId: A
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network received
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 10
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m]))
hide: false
intervalFactor: 2
legendFormat: ''
refId: B
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network transmitted
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 276
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 11
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum(kube_pod_info)
format: time_series
intervalFactor: 2
legendFormat: Current number of Pods
refId: A
step: 10
- expr: sum(kube_node_status_capacity_pods)
format: time_series
intervalFactor: 2
legendFormat: Maximum capacity of pods
refId: B
step: 10
thresholds: []
timeFrom:
timeShift:
title: Cluster Pod Utilization
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods)
* 100
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 60
target: ''
thresholds: '80,90'
title: Pod Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Kubernetes Capacity Planning
version: 4
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
kubernetes_cluster_health:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId:
graphTooltip: 0
hideControls: false
id:
links: []
rows:
- collapse: false
height: 254
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 1
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(up{job=~"kube-apiserver|kube-scheduler|kube-controller-manager"} ==
0)
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Control Plane Components Down
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: Everything UP and healthy
value: 'null'
- op: "="
text: ''
value: ''
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 2
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '3,5'
title: Alerts Firing
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 3
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(ALERTS{alertstate="pending",alertname!="DeadMansSwitch"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '3,5'
title: Alerts Pending
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 4
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: count(increase(kube_pod_container_status_restarts[1h]) > 5)
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Crashlooping Pods
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(kube_node_status_condition{condition="Ready",status!="true"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Node Not Ready
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(kube_node_status_condition{condition="DiskPressure",status="true"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Node Disk Pressure
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(kube_node_status_condition{condition="MemoryPressure",status="true"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Node Memory Pressure
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 8
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(kube_node_spec_unschedulable)
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Nodes Unschedulable
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-6h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: ''
title: Kubernetes Cluster Health
version: 9
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
kubernetes_cluster_status:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId:
graphTooltip: 0
hideControls: false
id:
links: []
rows:
- collapse: false
height: 129
panels:
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 6
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0)
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Control Plane UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: UP
value: 'null'
valueName: total
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 6
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 6
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '3,5'
title: Alerts Firing
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Cluster Health
titleSize: h6
- collapse: false
height: 168
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 1
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: API Servers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 2
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"}))
* 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: Controller Managers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
decimals:
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 3
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"}))
* 100
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '50,80'
title: Schedulers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
- cacheTimeout:
colorBackground: false
colorValue: true
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
decimals:
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
hideTimeOverride: false
id: 4
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h])
> 5)
format: time_series
interval: ''
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '1,3'
title: Crashlooping Control Plane Pods
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Control Plane Status
titleSize: h6
- collapse: false
height: 158
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 8
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m]))
* 100)) / count(node_cpu{job="node-exporter",mode="idle"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: CPU Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers)
- sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100"
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Memory Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 9
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"}))
/ sum(node_filesystem_size{device!="rootfs"})
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Filesystem Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 10
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods)
* 100
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '80,90'
title: Pod Utilization
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Capacity Planing
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-6h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: ''
title: Kubernetes Cluster Status
version: 3
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
kubernetes_control_plane:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
editable: true
gnetId:
graphTooltip: 0
hideControls: false
id:
links: []
rows:
- collapse: false
height: 250px
panels:
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 1
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="apiserver"} == 1) / sum(up{job="apiserver"})) * 100
format: time_series
intervalFactor: 2
refId: A
step: 600
thresholds: '50,80'
title: API Servers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 2
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / sum(up{job="kube-controller-manager-discovery"}))
* 100
format: time_series
intervalFactor: 2
refId: A
step: 600
thresholds: '50,80'
title: Controller Managers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(245, 54, 54, 0.9)
- rgba(237, 129, 40, 0.89)
- rgba(50, 172, 45, 0.97)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 3
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(up{job="kube-scheduler-discovery"} == 1) / sum(up{job="kube-scheduler-discovery"}))
* 100
format: time_series
intervalFactor: 2
refId: A
step: 600
thresholds: '50,80'
title: Schedulers UP
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 4
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: max(sum by(instance) (rate(apiserver_request_count{code=~"5.."}[5m]))
/ sum by(instance) (rate(apiserver_request_count[5m]))) * 100
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 600
thresholds: '5,10'
title: API Server Request Error Rate
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: '0'
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 7
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 12
stack: false
steppedLine: false
targets:
- expr: sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0)
format: time_series
intervalFactor: 2
legendFormat: ''
refId: A
step: 30
thresholds: []
timeFrom:
timeShift:
title: API Server Request Latency
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
- collapse: false
height: 250
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 5
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
format: time_series
intervalFactor: 2
refId: A
step: 60
thresholds: []
timeFrom:
timeShift:
title: End to end scheduling latency
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: dtdurations
label:
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
fill: 1
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: 'null'
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: sum by(instance) (rate(apiserver_request_count{code!~"2.."}[5m]))
format: time_series
intervalFactor: 2
legendFormat: Error Rate
refId: A
step: 60
- expr: sum by(instance) (rate(apiserver_request_count[5m]))
format: time_series
intervalFactor: 2
legendFormat: Request Rate
refId: B
step: 60
thresholds: []
timeFrom:
timeShift:
title: API Server Request Rates
tooltip:
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: Dashboard Row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list: []
time:
from: now-6h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: ''
title: Kubernetes Control Plane Status
version: 3
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
nodes:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.4.1
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
annotations:
list: []
description: Dashboard to get an overview of one server
editable: true
gnetId: 22
graphTooltip: 0
hideControls: false
id:
links: []
refresh: false
rows:
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 3
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: 100 - (avg by (cpu) (irate(node_cpu{mode="idle", instance="$server"}[5m]))
* 100)
hide: false
intervalFactor: 10
legendFormat: "{{cpu}}"
refId: A
step: 50
thresholds: []
timeFrom:
timeShift:
title: Idle cpu
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percent
label: cpu usage
logBase: 1
max: 100
min: 0
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 9
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: node_load1{instance="$server"}
intervalFactor: 4
legendFormat: load 1m
refId: A
step: 20
target: ''
- expr: node_load5{instance="$server"}
intervalFactor: 4
legendFormat: load 5m
refId: B
step: 20
target: ''
- expr: node_load15{instance="$server"}
intervalFactor: 4
legendFormat: load 15m
refId: C
step: 20
target: ''
thresholds: []
timeFrom:
timeShift:
title: System load
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: percentunit
label:
logBase: 1
max:
min:
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 4
legend:
alignAsTable: false
avg: false
current: false
hideEmpty: false
hideZero: false
max: false
min: false
rightSide: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"}
yaxis: 2
spaceLength: 10
span: 9
stack: true
steppedLine: false
targets:
- expr: node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"}
- node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"}
hide: false
interval: ''
intervalFactor: 2
legendFormat: memory used
metric: ''
refId: C
step: 10
- expr: node_memory_Buffers{instance="$server"}
interval: ''
intervalFactor: 2
legendFormat: memory buffers
metric: ''
refId: E
step: 10
- expr: node_memory_Cached{instance="$server"}
intervalFactor: 2
legendFormat: memory cached
metric: ''
refId: F
step: 10
- expr: node_memory_MemFree{instance="$server"}
intervalFactor: 2
legendFormat: memory free
metric: ''
refId: D
step: 10
thresholds: []
timeFrom:
timeShift:
title: Memory usage
tooltip:
msResolution: false
shared: true
sort: 0
value_type: individual
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min: '0'
show: true
- format: short
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percent
gauge:
maxValue: 100
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 5
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: ((node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} -
node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"})
/ node_memory_MemTotal{instance="$server"}) * 100
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 80, 90
title: Memory usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: avg
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 6
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: read
yaxis: 1
- alias: '{instance="172.17.0.1:9100"}'
yaxis: 2
- alias: io time
yaxis: 2
spaceLength: 10
span: 9
stack: false
steppedLine: false
targets:
- expr: sum by (instance) (rate(node_disk_bytes_read{instance="$server"}[2m]))
hide: false
intervalFactor: 4
legendFormat: read
refId: A
step: 20
target: ''
- expr: sum by (instance) (rate(node_disk_bytes_written{instance="$server"}[2m]))
intervalFactor: 4
legendFormat: written
refId: B
step: 20
- expr: sum by (instance) (rate(node_disk_io_time_ms{instance="$server"}[2m]))
intervalFactor: 4
legendFormat: io time
refId: C
step: 20
thresholds: []
timeFrom:
timeShift:
title: Disk I/O
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: ms
label:
logBase: 1
max:
min:
show: true
- cacheTimeout:
colorBackground: false
colorValue: false
colors:
- rgba(50, 172, 45, 0.97)
- rgba(237, 129, 40, 0.89)
- rgba(245, 54, 54, 0.9)
datasource: prometheus
editable: true
error: false
format: percentunit
gauge:
maxValue: 1
minValue: 0
show: true
thresholdLabels: false
thresholdMarkers: true
id: 7
interval:
links: []
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 3
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- expr: (sum(node_filesystem_size{device!="rootfs",instance="$server"}) - sum(node_filesystem_free{device!="rootfs",instance="$server"}))
/ sum(node_filesystem_size{device!="rootfs",instance="$server"})
intervalFactor: 2
refId: A
step: 60
target: ''
thresholds: 0.75, 0.9
title: Disk space usage
type: singlestat
valueFontSize: 80%
valueMaps:
- op: "="
text: N/A
value: 'null'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
- collapse: false
height: 250px
panels:
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 8
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: rate(node_network_receive_bytes{instance="$server",device!~"lo"}[5m])
hide: false
intervalFactor: 2
legendFormat: "{{device}}"
refId: A
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network received
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
- alerting: {}
aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 1
grid: {}
id: 10
legend:
avg: false
current: false
max: false
min: false
show: true
total: false
values: false
lines: true
linewidth: 2
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides:
- alias: 'transmitted '
yaxis: 2
spaceLength: 10
span: 6
stack: false
steppedLine: false
targets:
- expr: rate(node_network_transmit_bytes{instance="$server",device!~"lo"}[5m])
hide: false
intervalFactor: 2
legendFormat: "{{device}}"
refId: B
step: 10
target: ''
thresholds: []
timeFrom:
timeShift:
title: Network transmitted
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: bytes
label:
logBase: 1
max:
min:
show: true
- format: bytes
label:
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: false
title: New row
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
list:
- allValue:
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: false
name: server
options: []
query: label_values(node_boot_time, instance)
refresh: 1
regex: ''
sort: 0
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-1h
to: now
timepicker:
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
timezone: browser
title: Nodes
version: 2
inputs:
- name: prometheus
pluginId: prometheus
type: datasource
value: prometheus
overwrite: true
openstack_control_plane:
__inputs:
- name: prometheus
label: prometheus
description: ''
type: datasource
pluginId: prometheus
pluginName: Prometheus
__requires:
- type: grafana
id: grafana
name: Grafana
version: 4.5.2
- type: panel
id: graph
name: Graph
version: ''
- type: datasource
id: prometheus
name: Prometheus
version: 1.0.0
- type: panel
id: singlestat
name: Singlestat
version: ''
- type: panel
id: text
name: Text
version: ''
annotations:
list: []
editable: true
gnetId:
graphTooltip: 1
hideControls: false
id:
links: []
refresh: 1m
rows:
- collapse: false
height: 250px
panels:
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 24
interval: "> 60s"
links:
- dashboard: Keystone
name: Drilldown dashboard
title: Keystone
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
dsType: influxdb
expr: check_keystone_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Keystone
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 23
interval: "> 60s"
links:
- dashboard: Glance
name: Drilldown dashboard
title: Glance
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
dsType: influxdb
expr: check_glance_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Glance
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(202, 58, 40, 0.86)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 22
interval: "> 60s"
links:
- dashboard: Heat
name: Drilldown dashboard
title: Heat
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
dsType: influxdb
expr: check_heat_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Heat
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 21
interval: "> 60s"
links:
- dashboard: Neutron
name: Drilldown dashboard
title: Neutron
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
dsType: influxdb
expr: check_neutron_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Neutron
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(208, 53, 34, 0.82)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 5
interval: "> 60s"
links:
- dashboard: Nova
name: Drilldown dashboard
title: Nova
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
dsType: influxdb
expr: check_nova_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Nova
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(200, 54, 35, 0.88)
- rgba(118, 245, 40, 0.73)
- rgba(225, 177, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: none
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 25
interval: "> 60s"
links:
- dashboard: Ceph
name: Drilldown dashboard
title: Ceph
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
condition: ''
dsType: influxdb
expr: check_swift_api{job="openstack-metrics", region="$region"}
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
groupby_field: ''
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
thresholds: '1,2'
title: Ceph
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: CRIT
value: '0'
- op: "="
text: OK
value: '1'
- op: "="
text: UNKW
value: '2'
valueName: current
- content: ''
editable: true
error: false
id: 20
links: []
mode: markdown
span: 1
style: {}
title: ''
type: text
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(71, 212, 59, 0.4)
- rgba(245, 150, 40, 0.73)
- rgba(225, 40, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: short
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 16
interval: ">60s"
links:
- dashboard: RabbitMQ
name: Drilldown dashboard
title: RabbitMQ
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
dsType: influxdb
expr: ''
fill: ''
format: time_series
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
thresholds: ''
title: RabbitMQ
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: OKAY
value: '0'
- op: "="
text: WARN
value: '1'
- op: "="
text: UNKW
value: '2'
- op: "="
text: CRIT
value: '3'
- op: "="
text: DOWN
value: '4'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(71, 212, 59, 0.4)
- rgba(245, 150, 40, 0.73)
- rgba(225, 40, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: short
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 15
interval: ">60s"
links:
- dashboard: MySQL
name: Drilldown dashboard
title: MySQL
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
dsType: influxdb
fill: ''
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
interval: ''
policy: default
rawQuery: false
refId: A
resultFormat: time_series
thresholds: ''
title: MySQL
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: OKAY
value: '0'
- op: "="
text: WARN
value: '1'
- op: "="
text: UNKW
value: '2'
- op: "="
text: CRIT
value: '3'
- op: "="
text: DOWN
value: '4'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(71, 212, 59, 0.4)
- rgba(245, 150, 40, 0.73)
- rgba(225, 40, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: short
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 18
interval: ">60s"
links:
- dashUri: db/apache
dashboard: Apache
name: Drilldown dashboard
title: Apache
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
dsType: influxdb
fill: ''
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
interval: ''
policy: default
rawQuery: false
refId: A
resultFormat: time_series
thresholds: ''
title: Apache
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: OKAY
value: '0'
- op: "="
text: WARN
value: '1'
- op: "="
text: UNKW
value: '2'
- op: "="
text: CRIT
value: '3'
- op: "="
text: DOWN
value: '4'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(71, 212, 59, 0.4)
- rgba(245, 150, 40, 0.73)
- rgba(225, 40, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: short
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 10
interval: ">60s"
links:
- dashUri: db/haproxy
dashboard: HAProxy
name: Drilldown dashboard
title: HAProxy
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
dsType: influxdb
fill: ''
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
interval: ''
policy: default
rawQuery: false
refId: A
resultFormat: time_series
thresholds: ''
title: haproxy
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: OKAY
value: '0'
- op: "="
text: WARN
value: '1'
- op: "="
text: UNKW
value: '2'
- op: "="
text: CRIT
value: '3'
- op: "="
text: DOWN
value: '4'
valueName: current
- cacheTimeout:
colorBackground: true
colorValue: false
colors:
- rgba(71, 212, 59, 0.4)
- rgba(245, 150, 40, 0.73)
- rgba(225, 40, 40, 0.59)
datasource: prometheus
editable: true
error: false
format: short
gauge:
maxValue: 100
minValue: 0
show: false
thresholdLabels: false
thresholdMarkers: true
id: 17
interval: ">60s"
links:
- dashUri: db/memcached
dashboard: Memcached
name: Drilldown dashboard
title: Memcached
type: dashboard
mappingType: 1
mappingTypes:
- name: value to text
value: 1
- name: range to text
value: 2
maxDataPoints: 100
nullPointMode: connected
nullText:
postfix: ''
postfixFontSize: 50%
prefix: ''
prefixFontSize: 50%
rangeMaps:
- from: 'null'
text: N/A
to: 'null'
span: 1
sparkline:
fillColor: rgba(31, 118, 189, 0.18)
full: false
lineColor: rgb(31, 120, 193)
show: false
tableColumn: ''
targets:
- column: value
dsType: influxdb
fill: ''
function: last
groupBy:
- params:
- "$interval"
type: time
- params:
- 'null'
type: fill
groupByTags: []
interval: ''
policy: default
rawQuery: false
refId: A
resultFormat: time_series
thresholds: ''
title: memcached
type: singlestat
valueFontSize: 50%
valueMaps:
- op: "="
text: no data
value: 'null'
- op: "="
text: OKAY
value: '0'
- op: "="
text: WARN
value: '1'
- op: "="
text: UNKW
value: '2'
- op: "="
text: CRIT
value: '3'
- op: "="
text: DOWN
value: '4'
valueName: current
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: OpenStack Services
titleSize: h6
- collapse: false
height: 250px
panels:
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
grid: {}
id: 11
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
dsType: influxdb
expr: total_used_vcpus{job="openstack-metrics", region="$region"} + total_free_vcpus{job="openstack-metrics",
region="$region"}
format: time_series
function: min
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
dsType: influxdb
expr: total_used_vcpus{job="openstack-metrics", region="$region"}
format: time_series
function: max
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: VCPUs (total vs used)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: short
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
grid: {}
id: 12
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
dsType: influxdb
expr: total_used_ram_MB{job="openstack-metrics", region="$region"} + total_free_ram_MB{job="openstack-metrics",
region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
dsType: influxdb
expr: total_used_ram_MB{job="openstack-metrics", region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
interval: ''
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: RAM (total vs used)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: mbytes
label: ''
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
- aliasColors: {}
bars: false
dashLength: 10
dashes: false
datasource: prometheus
editable: true
error: false
fill: 0
grid: {}
id: 13
interval: "> 60s"
legend:
avg: false
current: false
max: false
min: false
show: false
total: false
values: false
lines: true
linewidth: 1
links: []
nullPointMode: connected
percentage: false
pointradius: 5
points: false
renderer: flot
seriesOverrides: []
spaceLength: 10
span: 4
stack: false
steppedLine: false
targets:
- alias: free
column: value
dsType: influxdb
expr: total_used_disk_GB{job="openstack-metrics", region="$region"} + total_free_disk_GB{job="openstack-metrics",
region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: A
resultFormat: time_series
step: 120
- alias: used
column: value
dsType: influxdb
expr: total_used_disk_GB{job="openstack-metrics", region="$region"}
format: time_series
function: mean
groupBy:
- params:
- "$interval"
type: time
- params:
- '0'
type: fill
groupByTags: []
intervalFactor: 2
policy: default
rawQuery: false
refId: B
resultFormat: time_series
step: 120
thresholds: []
timeFrom:
timeShift:
title: Disk (used vs total)
tooltip:
msResolution: false
shared: true
sort: 0
value_type: cumulative
type: graph
xaxis:
buckets:
mode: time
name:
show: true
values: []
yaxes:
- format: gbytes
logBase: 1
max:
min: 0
show: true
- format: short
logBase: 1
max:
min:
show: true
repeat:
repeatIteration:
repeatRowId:
showTitle: true
title: Virtual compute resources
titleSize: h6
schemaVersion: 14
style: dark
tags: []
templating:
enable: true
list:
- allValue:
current: {}
datasource: prometheus
hide: 0
includeAll: false
label:
multi: false
name: region
options: []
query: label_values(openstack_exporter_cache_refresh_duration_seconds, region)
refresh: 1
regex: ''
sort: 0
tagValuesQuery: ''
tags: []
tagsQuery: ''
type: query
useTags: false
time:
from: now-1h
to: now
timepicker:
collapse: false
enable: true
notice: false
now: true
refresh_intervals:
- 5s
- 10s
- 30s
- 1m
- 5m
- 15m
- 30m
- 1h
- 2h
- 1d
status: Stable
time_options:
- 5m
- 15m
- 1h
- 6h
- 12h
- 24h
- 2d
- 7d
- 30d
type: timepicker
timezone: browser
title: Openstack Main1
version: 2