From 0a467936621009418a9c94f636b33bfb6d947a89 Mon Sep 17 00:00:00 2001 From: Lorcan Date: Thu, 12 Mar 2015 15:40:39 +0000 Subject: [PATCH] Add swift-recon feature to track swift-drive-audit error count This is a follow-on from a previous commit which added recon info for swift-drive-audit (https://review.openstack.org/#/c/122468/). Here, the "--drievaudit" option is added to swift-recon tool. This feature gives the statistics for the system-wide drive errors flagged by swift-drive-audit. An example of the output is as follows: (verbose mode) swift-recon --driveaudit -v =============================================================================== --> Starting reconnaissance on 5 hosts =============================================================================== [2015-03-11 17:13:39] Checking drive-audit errors -> http://1.2.3.4:6000/recon/driveaudit: {'drive_audit_errors': 14} -> http://1.2.3.5:6000/recon/driveaudit: {'drive_audit_errors': 0} -> http://1.2.3.6:6000/recon/driveaudit: {'drive_audit_errors': 37} -> http://1.2.3.7:6000/recon/driveaudit: {'drive_audit_errors': 101} -> http://1.2.3.8:6000/recon/driveaudit: {'drive_audit_errors': 0} [drive_audit_errors] low: 0, high: 101, avg: 30.4, total: 152, Failed: 0.0%, no_result: 0, reported: 5 =============================================================================== Change-Id: Ia16c52a9d613eeb3de1a5a428d88dd1233631912 --- bin/swift-drive-audit | 3 ++ swift/cli/recon.py | 26 ++++++++++++++++ swift/common/middleware/recon.py | 9 ++++++ test/unit/cli/test_recon.py | 37 +++++++++++++++++++++++ test/unit/common/middleware/test_recon.py | 20 ++++++++++++ 5 files changed, 95 insertions(+) diff --git a/bin/swift-drive-audit b/bin/swift-drive-audit index 589b255f22..ea17357998 100755 --- a/bin/swift-drive-audit +++ b/bin/swift-drive-audit @@ -176,6 +176,7 @@ if __name__ == '__main__': if not devices: logger.error("Error: No devices found!") recon_errors = {} + total_errors = 0 for device in devices: recon_errors[device['mount_point']] = 0 errors = get_errors(error_re, log_file_pattern, minutes, logger) @@ -198,8 +199,10 @@ if __name__ == '__main__': comment_fstab(mount_point) unmounts += 1 recon_errors[mount_point] = count + total_errors += count recon_file = recon_cache_path + "/drive.recon" dump_recon_cache(recon_errors, recon_file, logger) + dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger) if unmounts == 0: logger.info("No drives were unmounted") diff --git a/swift/cli/recon.py b/swift/cli/recon.py index 676973c410..b67e2678d7 100755 --- a/swift/cli/recon.py +++ b/swift/cli/recon.py @@ -330,6 +330,27 @@ class SwiftRecon(object): print("[async_pending] - No hosts returned valid data.") print("=" * 79) + def driveaudit_check(self, hosts): + """ + Obtain and print drive audit error statistics + + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)] + """ + scan = {} + recon = Scout("driveaudit", self.verbose, self.suppress_errors, + self.timeout) + print("[%s] Checking drive-audit errors" % self._ptime()) + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + scan[url] = response['drive_audit_errors'] + stats = self._gen_stats(scan.values(), 'drive_audit_errors') + if stats['reported'] > 0: + self._print_stats(stats) + else: + print("[drive_audit_errors] - No hosts returned valid data.") + print("=" * 79) + def umount_check(self, hosts): """ Check for and print unmounted drives @@ -930,6 +951,8 @@ class SwiftRecon(object): "local copy") args.add_option('--sockstat', action="store_true", help="Get cluster socket usage stats") + args.add_option('--driveaudit', action="store_true", + help="Get drive audit error stats") args.add_option('--top', type='int', metavar='COUNT', default=0, help='Also show the top COUNT entries in rank order.') args.add_option('--all', action="store_true", @@ -992,6 +1015,7 @@ class SwiftRecon(object): self.quarantine_check(hosts) self.socket_usage(hosts) self.server_type_check(hosts) + self.driveaudit_check(hosts) else: if options.async: if self.server_type == 'object': @@ -1033,6 +1057,8 @@ class SwiftRecon(object): self.quarantine_check(hosts) if options.sockstat: self.socket_usage(hosts) + if options.driveaudit: + self.driveaudit_check(hosts) def main(): diff --git a/swift/common/middleware/recon.py b/swift/common/middleware/recon.py index c512493354..88d5243a4d 100644 --- a/swift/common/middleware/recon.py +++ b/swift/common/middleware/recon.py @@ -53,6 +53,8 @@ class ReconMiddleware(object): 'container.recon') self.account_recon_cache = os.path.join(self.recon_cache_path, 'account.recon') + self.drive_recon_cache = os.path.join(self.recon_cache_path, + 'drive.recon') self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz') self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz') self.rings = [self.account_ring_path, self.container_ring_path] @@ -124,6 +126,11 @@ class ReconMiddleware(object): return self._from_recon_cache(['async_pending'], self.object_recon_cache) + def get_driveaudit_error(self): + """get # of drive audit errors""" + return self._from_recon_cache(['drive_audit_errors'], + self.drive_recon_cache) + def get_replication_info(self, recon_type): """get replication info""" if recon_type == 'account': @@ -359,6 +366,8 @@ class ReconMiddleware(object): content = self.get_socket_info() elif rcheck == "version": content = self.get_version() + elif rcheck == "driveaudit": + content = self.get_driveaudit_error() else: content = "Invalid path: %s" % req.path return Response(request=req, status="404 Not Found", diff --git a/test/unit/cli/test_recon.py b/test/unit/cli/test_recon.py index e9ad45d2c8..7009be851c 100644 --- a/test/unit/cli/test_recon.py +++ b/test/unit/cli/test_recon.py @@ -293,6 +293,43 @@ class TestRecon(unittest.TestCase): % ex) self.assertFalse(expected) + def test_drive_audit_check(self): + hosts = [('127.0.0.1', 6010), ('127.0.0.1', 6020), + ('127.0.0.1', 6030), ('127.0.0.1', 6040)] + # sample json response from http://:/recon/driveaudit + responses = {6010: {'drive_audit_errors': 15}, + 6020: {'drive_audit_errors': 0}, + 6030: {'drive_audit_errors': 257}, + 6040: {'drive_audit_errors': 56}} + # + expected = (0, 257, 82.0, 328, 0.0, 0, 4) + + def mock_scout_driveaudit(app, host): + url = 'http://%s:%s/recon/driveaudit' % host + response = responses[host[1]] + status = 200 + return url, response, status + + stdout = StringIO() + patches = [ + mock.patch('swift.cli.recon.Scout.scout', mock_scout_driveaudit), + mock.patch('sys.stdout', new=stdout), + ] + with nested(*patches): + self.recon_instance.driveaudit_check(hosts) + + output = stdout.getvalue() + r = re.compile("\[drive_audit_errors(.*)\](.*)") + lines = output.splitlines() + self.assertTrue(lines) + for line in lines: + m = r.match(line) + if m: + self.assertEquals(m.group(2), + " low: %s, high: %s, avg: %s, total: %s," + " Failed: %s%%, no_result: %s, reported: %s" + % expected) + class TestReconCommands(unittest.TestCase): def setUp(self): diff --git a/test/unit/common/middleware/test_recon.py b/test/unit/common/middleware/test_recon.py index 66e97c3088..2032c62d16 100644 --- a/test/unit/common/middleware/test_recon.py +++ b/test/unit/common/middleware/test_recon.py @@ -172,6 +172,9 @@ class FakeRecon(object): def fake_sockstat(self): return {'sockstattest': "1"} + def fake_driveaudit(self): + return {'driveaudittest': "1"} + def nocontent(self): return None @@ -829,6 +832,15 @@ class TestReconSuccess(TestCase): (('/proc/net/sockstat', 'r'), {}), (('/proc/net/sockstat6', 'r'), {})]) + def test_get_driveaudit_info(self): + from_cache_response = {'drive_audit_errors': 7} + self.fakecache.fakeout = from_cache_response + rv = self.app.get_driveaudit_error() + self.assertEquals(self.fakecache.fakeout_calls, + [((['drive_audit_errors'], + '/var/cache/swift/drive.recon'), {})]) + self.assertEquals(rv, {'drive_audit_errors': 7}) + class TestReconMiddleware(unittest.TestCase): @@ -857,6 +869,7 @@ class TestReconMiddleware(unittest.TestCase): self.app.get_swift_conf_md5 = self.frecon.fake_swiftconfmd5 self.app.get_quarantine_count = self.frecon.fake_quarantined self.app.get_socket_info = self.frecon.fake_sockstat + self.app.get_driveaudit_error = self.frecon.fake_driveaudit def test_recon_get_mem(self): get_mem_resp = ['{"memtest": "1"}'] @@ -1084,5 +1097,12 @@ class TestReconMiddleware(unittest.TestCase): resp = self.app(req.environ, start_response) self.assertEquals(resp, 'FAKE APP') + def test_recon_get_driveaudit(self): + get_driveaudit_resp = ['{"driveaudittest": "1"}'] + req = Request.blank('/recon/driveaudit', + environ={'REQUEST_METHOD': 'GET'}) + resp = self.app(req.environ, start_response) + self.assertEquals(resp, get_driveaudit_resp) + if __name__ == '__main__': unittest.main()