Change noout to be a CRITICAL alert instead of WARNING.
When the noout flag is set in a Ceph cluster, the Nagios check currently marks this as a warning (like Ceph itself). However, setting it to CRITICAL will raise visbility, and indicate to the operator that this should be a temporary state. Closes-Bug: 1926551 Change-Id: I9831cfea3f63e82fbc8bfebc938a9795b69111c7
This commit is contained in:
@@ -200,6 +200,10 @@ def check_ceph_status(args):
|
||||
if args.raise_nodeepscrub:
|
||||
if re.match("nodeep-scrub flag", status):
|
||||
status_critical = True
|
||||
# Check if noout is set
|
||||
if re.match("noout flag", status):
|
||||
status_critical = True
|
||||
status_msg.append("noout flag is set")
|
||||
if overall_status == 'HEALTH_CRITICAL' or \
|
||||
overall_status == 'HEALTH_ERR':
|
||||
# HEALTH_ERR, report critical
|
||||
|
206
unit_tests/ceph_noout.json
Normal file
206
unit_tests/ceph_noout.json
Normal file
@@ -0,0 +1,206 @@
|
||||
{
|
||||
"health": {
|
||||
"health": {
|
||||
"health_services": [
|
||||
{
|
||||
"mons": [
|
||||
{
|
||||
"name": "juju-c62a41-21-lxd-0",
|
||||
"kb_total": 334602320,
|
||||
"kb_used": 2127960,
|
||||
"kb_avail": 315454468,
|
||||
"avail_percent": 94,
|
||||
"last_updated": "2018-11-08 09:47:09.932189",
|
||||
"store_stats": {
|
||||
"bytes_total": 34880542,
|
||||
"bytes_sst": 0,
|
||||
"bytes_log": 1647123,
|
||||
"bytes_misc": 33233419,
|
||||
"last_updated": "0.000000"
|
||||
},
|
||||
"health": "HEALTH_WARN"
|
||||
},
|
||||
{
|
||||
"name": "juju-c62a41-24-lxd-0",
|
||||
"kb_total": 334602320,
|
||||
"kb_used": 2128116,
|
||||
"kb_avail": 315454312,
|
||||
"avail_percent": 94,
|
||||
"last_updated": "2018-11-08 09:47:16.418007",
|
||||
"store_stats": {
|
||||
"bytes_total": 36811676,
|
||||
"bytes_sst": 0,
|
||||
"bytes_log": 3574345,
|
||||
"bytes_misc": 33237331,
|
||||
"last_updated": "0.000000"
|
||||
},
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "juju-c62a41-25-lxd-0",
|
||||
"kb_total": 334602320,
|
||||
"kb_used": 2128860,
|
||||
"kb_avail": 315453568,
|
||||
"avail_percent": 94,
|
||||
"last_updated": "2018-11-08 09:47:21.198816",
|
||||
"store_stats": {
|
||||
"bytes_total": 37388424,
|
||||
"bytes_sst": 0,
|
||||
"bytes_log": 4151569,
|
||||
"bytes_misc": 33236855,
|
||||
"last_updated": "0.000000"
|
||||
},
|
||||
"health": "HEALTH_OK"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"timechecks": {
|
||||
"epoch": 14,
|
||||
"round": 4480,
|
||||
"round_status": "finished",
|
||||
"mons": [
|
||||
{
|
||||
"name": "juju-c62a41-21-lxd-0",
|
||||
"skew": 0.000000,
|
||||
"latency": 0.000000,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "juju-c62a41-24-lxd-0",
|
||||
"skew": 0.000282,
|
||||
"latency": 0.000989,
|
||||
"health": "HEALTH_OK"
|
||||
},
|
||||
{
|
||||
"name": "juju-c62a41-25-lxd-0",
|
||||
"skew": -0.001223,
|
||||
"latency": 0.000776,
|
||||
"health": "HEALTH_OK"
|
||||
}
|
||||
]
|
||||
},
|
||||
"summary": [
|
||||
{
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": "noout flag(s) set"
|
||||
},
|
||||
{
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": "19 pgs backfill_wait"
|
||||
},
|
||||
{
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": "4 pgs backfilling"
|
||||
},
|
||||
{
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": "1 pgs peering"
|
||||
},
|
||||
{
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": "24 pgs stuck unclean"
|
||||
},
|
||||
{
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": "recovery 17386\/112794 objects misplaced (15.414%)"
|
||||
},
|
||||
{
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": "pool pool1 has many more objects per pg than average (too few pgs?)"
|
||||
},
|
||||
{
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": "nodeep-scrub flag(s) set"
|
||||
}
|
||||
],
|
||||
"overall_status": "HEALTH_WARN",
|
||||
"detail": []
|
||||
},
|
||||
"fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284",
|
||||
"election_epoch": 14,
|
||||
"quorum": [
|
||||
0,
|
||||
1,
|
||||
2
|
||||
],
|
||||
"quorum_names": [
|
||||
"juju-c62a41-21-lxd-0",
|
||||
"juju-c62a41-24-lxd-0",
|
||||
"juju-c62a41-25-lxd-0"
|
||||
],
|
||||
"monmap": {
|
||||
"epoch": 2,
|
||||
"fsid": "66af7af5-2f60-4e0e-94dc-49f49bd37284",
|
||||
"modified": "2018-10-31 15:37:56.902830",
|
||||
"created": "2018-10-31 15:37:40.288870",
|
||||
"mons": [
|
||||
{
|
||||
"rank": 0,
|
||||
"name": "juju-c62a41-21-lxd-0",
|
||||
"addr": "100.84.195.4:6789\/0"
|
||||
},
|
||||
{
|
||||
"rank": 1,
|
||||
"name": "juju-c62a41-24-lxd-0",
|
||||
"addr": "100.84.196.4:6789\/0"
|
||||
},
|
||||
{
|
||||
"rank": 2,
|
||||
"name": "juju-c62a41-25-lxd-0",
|
||||
"addr": "100.84.196.5:6789\/0"
|
||||
}
|
||||
]
|
||||
},
|
||||
"osdmap": {
|
||||
"osdmap": {
|
||||
"epoch": 316,
|
||||
"num_osds": 48,
|
||||
"num_up_osds": 48,
|
||||
"num_in_osds": 48,
|
||||
"full": false,
|
||||
"nearfull": false,
|
||||
"num_remapped_pgs": 22
|
||||
}
|
||||
},
|
||||
"pgmap": {
|
||||
"pgs_by_state": [
|
||||
{
|
||||
"state_name": "active+clean",
|
||||
"count": 3448
|
||||
},
|
||||
{
|
||||
"state_name": "active+remapped+wait_backfill",
|
||||
"count": 19
|
||||
},
|
||||
{
|
||||
"state_name": "active+remapped+backfilling",
|
||||
"count": 4
|
||||
},
|
||||
{
|
||||
"state_name": "peering",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"version": 141480,
|
||||
"num_pgs": 3472,
|
||||
"data_bytes": 157009583781,
|
||||
"bytes_used": 487185850368,
|
||||
"bytes_avail": 75282911256576,
|
||||
"bytes_total": 75770097106944,
|
||||
"misplaced_objects": 17386,
|
||||
"misplaced_total": 112794,
|
||||
"misplaced_ratio": 0.154139,
|
||||
"recovering_objects_per_sec": 436,
|
||||
"recovering_bytes_per_sec": 1832614589,
|
||||
"recovering_keys_per_sec": 0,
|
||||
"num_objects_recovered": 446,
|
||||
"num_bytes_recovered": 1870659584,
|
||||
"num_keys_recovered": 0
|
||||
},
|
||||
"fsmap": {
|
||||
"epoch": 1,
|
||||
"by_rank": []
|
||||
}
|
||||
}
|
102
unit_tests/ceph_noout_luminous.json
Normal file
102
unit_tests/ceph_noout_luminous.json
Normal file
@@ -0,0 +1,102 @@
|
||||
{
|
||||
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||
"health": {
|
||||
"checks": {
|
||||
"OSDMAP_FLAGS": {
|
||||
"severity": "HEALTH_WARN",
|
||||
"summary": {
|
||||
"message": "noout flag(s) set"
|
||||
}
|
||||
}
|
||||
},
|
||||
"status": "HEALTH_WARN"
|
||||
},
|
||||
"election_epoch": 5,
|
||||
"quorum": [
|
||||
0
|
||||
],
|
||||
"quorum_names": [
|
||||
"juju-460e0f-11"
|
||||
],
|
||||
"monmap": {
|
||||
"epoch": 1,
|
||||
"fsid": "b03a2900-e297-11e8-a7db-00163ed10659",
|
||||
"modified": "2018-11-07 14:17:12.324408",
|
||||
"created": "2018-11-07 14:17:12.324408",
|
||||
"features": {
|
||||
"persistent": [
|
||||
"kraken",
|
||||
"luminous"
|
||||
],
|
||||
"optional": []
|
||||
},
|
||||
"mons": [
|
||||
{
|
||||
"rank": 0,
|
||||
"name": "juju-460e0f-11",
|
||||
"addr": "192.168.100.81:6789/0",
|
||||
"public_addr": "192.168.100.81:6789/0"
|
||||
}
|
||||
]
|
||||
},
|
||||
"osdmap": {
|
||||
"osdmap": {
|
||||
"epoch": 518,
|
||||
"num_osds": 9,
|
||||
"num_up_osds": 9,
|
||||
"num_in_osds": 9,
|
||||
"full": false,
|
||||
"nearfull": false,
|
||||
"num_remapped_pgs": 0
|
||||
}
|
||||
},
|
||||
"pgmap": {
|
||||
"pgs_by_state": [
|
||||
{
|
||||
"state_name": "active+clean",
|
||||
"count": 128
|
||||
}
|
||||
],
|
||||
"num_pgs": 128,
|
||||
"num_pools": 1,
|
||||
"num_objects": 14896,
|
||||
"data_bytes": 62440603919,
|
||||
"bytes_used": 14225776640,
|
||||
"bytes_avail": 9450938368,
|
||||
"bytes_total": 23676715008
|
||||
},
|
||||
"fsmap": {
|
||||
"epoch": 1,
|
||||
"by_rank": []
|
||||
},
|
||||
"mgrmap": {
|
||||
"epoch": 5,
|
||||
"active_gid": 14097,
|
||||
"active_name": "juju-460e0f-11",
|
||||
"active_addr": "192.168.100.81:6800/204",
|
||||
"available": true,
|
||||
"standbys": [],
|
||||
"modules": [
|
||||
"balancer",
|
||||
"restful",
|
||||
"status"
|
||||
],
|
||||
"available_modules": [
|
||||
"balancer",
|
||||
"dashboard",
|
||||
"influx",
|
||||
"localpool",
|
||||
"prometheus",
|
||||
"restful",
|
||||
"selftest",
|
||||
"status",
|
||||
"zabbix"
|
||||
],
|
||||
"services": {}
|
||||
},
|
||||
"servicemap": {
|
||||
"epoch": 1,
|
||||
"modified": "0.000000",
|
||||
"services": {}
|
||||
}
|
||||
}
|
@@ -120,6 +120,17 @@ class NagiosTestCase(unittest.TestCase):
|
||||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
# Error, pre-luminous, noout
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_crit_noout(self, mock_ceph_version, mock_subprocess):
|
||||
mock_ceph_version.return_value = [10, 2, 9]
|
||||
with open('unit_tests/ceph_noout.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
args = check_ceph_status.parse_args("")
|
||||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
# All OK, luminous
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_ok_luminous(self, mock_ceph_version, mock_subprocess):
|
||||
@@ -209,6 +220,19 @@ class NagiosTestCase(unittest.TestCase):
|
||||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
# Error, luminous, noout
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_health_crit_noout_luminous(self,
|
||||
mock_ceph_version,
|
||||
mock_subprocess):
|
||||
mock_ceph_version.return_value = [12, 2, 0]
|
||||
with open('unit_tests/ceph_noout_luminous.json') as f:
|
||||
tree = f.read()
|
||||
mock_subprocess.return_value = tree.encode('UTF-8')
|
||||
args = check_ceph_status.parse_args("")
|
||||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
# Additional Ok, luminous, deepscrub
|
||||
@patch('check_ceph_status.get_ceph_version')
|
||||
def test_additional_ok_deepscrub_luminous(self,
|
||||
|
Reference in New Issue
Block a user