From 0a467936621009418a9c94f636b33bfb6d947a89 Mon Sep 17 00:00:00 2001
From: Lorcan <lorcan.browne@hp.com>
Date: Thu, 12 Mar 2015 15:40:39 +0000
Subject: [PATCH] Add swift-recon feature to track swift-drive-audit error
 count

This is a follow-on from a previous commit which added recon info
for swift-drive-audit (https://review.openstack.org/#/c/122468/).

Here, the "--drievaudit" option is added to swift-recon tool. This
feature gives the statistics for the system-wide drive errors flagged
by swift-drive-audit. An example of the output is as follows:
(verbose mode)

swift-recon --driveaudit -v
===============================================================================
--> Starting reconnaissance on 5 hosts
===============================================================================
[2015-03-11 17:13:39] Checking drive-audit errors
-> http://1.2.3.4:6000/recon/driveaudit: {'drive_audit_errors': 14}
-> http://1.2.3.5:6000/recon/driveaudit: {'drive_audit_errors': 0}
-> http://1.2.3.6:6000/recon/driveaudit: {'drive_audit_errors': 37}
-> http://1.2.3.7:6000/recon/driveaudit: {'drive_audit_errors': 101}
-> http://1.2.3.8:6000/recon/driveaudit: {'drive_audit_errors': 0}
[drive_audit_errors] low: 0, high: 101, avg: 30.4, total: 152, Failed: 0.0%, no_result: 0, reported: 5
===============================================================================

Change-Id: Ia16c52a9d613eeb3de1a5a428d88dd1233631912
---
 bin/swift-drive-audit                     |  3 ++
 swift/cli/recon.py                        | 26 ++++++++++++++++
 swift/common/middleware/recon.py          |  9 ++++++
 test/unit/cli/test_recon.py               | 37 +++++++++++++++++++++++
 test/unit/common/middleware/test_recon.py | 20 ++++++++++++
 5 files changed, 95 insertions(+)

diff --git a/bin/swift-drive-audit b/bin/swift-drive-audit
index 589b255f22..ea17357998 100755
--- a/bin/swift-drive-audit
+++ b/bin/swift-drive-audit
@@ -176,6 +176,7 @@ if __name__ == '__main__':
     if not devices:
         logger.error("Error: No devices found!")
     recon_errors = {}
+    total_errors = 0
     for device in devices:
         recon_errors[device['mount_point']] = 0
     errors = get_errors(error_re, log_file_pattern, minutes, logger)
@@ -198,8 +199,10 @@ if __name__ == '__main__':
                         comment_fstab(mount_point)
                         unmounts += 1
                     recon_errors[mount_point] = count
+                    total_errors += count
     recon_file = recon_cache_path + "/drive.recon"
     dump_recon_cache(recon_errors, recon_file, logger)
+    dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger)
 
     if unmounts == 0:
         logger.info("No drives were unmounted")
diff --git a/swift/cli/recon.py b/swift/cli/recon.py
index 676973c410..b67e2678d7 100755
--- a/swift/cli/recon.py
+++ b/swift/cli/recon.py
@@ -330,6 +330,27 @@ class SwiftRecon(object):
             print("[async_pending] - No hosts returned valid data.")
         print("=" * 79)
 
+    def driveaudit_check(self, hosts):
+        """
+        Obtain and print drive audit error statistics
+
+        :param hosts: set of hosts to check. in the format of:
+            set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]
+        """
+        scan = {}
+        recon = Scout("driveaudit", self.verbose, self.suppress_errors,
+                      self.timeout)
+        print("[%s] Checking drive-audit errors" % self._ptime())
+        for url, response, status in self.pool.imap(recon.scout, hosts):
+            if status == 200:
+                scan[url] = response['drive_audit_errors']
+        stats = self._gen_stats(scan.values(), 'drive_audit_errors')
+        if stats['reported'] > 0:
+            self._print_stats(stats)
+        else:
+            print("[drive_audit_errors] - No hosts returned valid data.")
+        print("=" * 79)
+
     def umount_check(self, hosts):
         """
         Check for and print unmounted drives
@@ -930,6 +951,8 @@ class SwiftRecon(object):
                         "local copy")
         args.add_option('--sockstat', action="store_true",
                         help="Get cluster socket usage stats")
+        args.add_option('--driveaudit', action="store_true",
+                        help="Get drive audit error stats")
         args.add_option('--top', type='int', metavar='COUNT', default=0,
                         help='Also show the top COUNT entries in rank order.')
         args.add_option('--all', action="store_true",
@@ -992,6 +1015,7 @@ class SwiftRecon(object):
             self.quarantine_check(hosts)
             self.socket_usage(hosts)
             self.server_type_check(hosts)
+            self.driveaudit_check(hosts)
         else:
             if options.async:
                 if self.server_type == 'object':
@@ -1033,6 +1057,8 @@ class SwiftRecon(object):
                 self.quarantine_check(hosts)
             if options.sockstat:
                 self.socket_usage(hosts)
+            if options.driveaudit:
+                self.driveaudit_check(hosts)
 
 
 def main():
diff --git a/swift/common/middleware/recon.py b/swift/common/middleware/recon.py
index c512493354..88d5243a4d 100644
--- a/swift/common/middleware/recon.py
+++ b/swift/common/middleware/recon.py
@@ -53,6 +53,8 @@ class ReconMiddleware(object):
                                                   'container.recon')
         self.account_recon_cache = os.path.join(self.recon_cache_path,
                                                 'account.recon')
+        self.drive_recon_cache = os.path.join(self.recon_cache_path,
+                                              'drive.recon')
         self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz')
         self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz')
         self.rings = [self.account_ring_path, self.container_ring_path]
@@ -124,6 +126,11 @@ class ReconMiddleware(object):
         return self._from_recon_cache(['async_pending'],
                                       self.object_recon_cache)
 
+    def get_driveaudit_error(self):
+        """get # of drive audit errors"""
+        return self._from_recon_cache(['drive_audit_errors'],
+                                      self.drive_recon_cache)
+
     def get_replication_info(self, recon_type):
         """get replication info"""
         if recon_type == 'account':
@@ -359,6 +366,8 @@ class ReconMiddleware(object):
             content = self.get_socket_info()
         elif rcheck == "version":
             content = self.get_version()
+        elif rcheck == "driveaudit":
+            content = self.get_driveaudit_error()
         else:
             content = "Invalid path: %s" % req.path
             return Response(request=req, status="404 Not Found",
diff --git a/test/unit/cli/test_recon.py b/test/unit/cli/test_recon.py
index e9ad45d2c8..7009be851c 100644
--- a/test/unit/cli/test_recon.py
+++ b/test/unit/cli/test_recon.py
@@ -293,6 +293,43 @@ class TestRecon(unittest.TestCase):
                                   % ex)
         self.assertFalse(expected)
 
+    def test_drive_audit_check(self):
+        hosts = [('127.0.0.1', 6010), ('127.0.0.1', 6020),
+                 ('127.0.0.1', 6030), ('127.0.0.1', 6040)]
+        # sample json response from http://<host>:<port>/recon/driveaudit
+        responses = {6010: {'drive_audit_errors': 15},
+                     6020: {'drive_audit_errors': 0},
+                     6030: {'drive_audit_errors': 257},
+                     6040: {'drive_audit_errors': 56}}
+        # <low> <high> <avg> <total> <Failed> <no_result> <reported>
+        expected = (0, 257, 82.0, 328, 0.0, 0, 4)
+
+        def mock_scout_driveaudit(app, host):
+            url = 'http://%s:%s/recon/driveaudit' % host
+            response = responses[host[1]]
+            status = 200
+            return url, response, status
+
+        stdout = StringIO()
+        patches = [
+            mock.patch('swift.cli.recon.Scout.scout', mock_scout_driveaudit),
+            mock.patch('sys.stdout', new=stdout),
+        ]
+        with nested(*patches):
+            self.recon_instance.driveaudit_check(hosts)
+
+        output = stdout.getvalue()
+        r = re.compile("\[drive_audit_errors(.*)\](.*)")
+        lines = output.splitlines()
+        self.assertTrue(lines)
+        for line in lines:
+            m = r.match(line)
+            if m:
+                self.assertEquals(m.group(2),
+                                  " low: %s, high: %s, avg: %s, total: %s,"
+                                  " Failed: %s%%, no_result: %s, reported: %s"
+                                  % expected)
+
 
 class TestReconCommands(unittest.TestCase):
     def setUp(self):
diff --git a/test/unit/common/middleware/test_recon.py b/test/unit/common/middleware/test_recon.py
index 66e97c3088..2032c62d16 100644
--- a/test/unit/common/middleware/test_recon.py
+++ b/test/unit/common/middleware/test_recon.py
@@ -172,6 +172,9 @@ class FakeRecon(object):
     def fake_sockstat(self):
         return {'sockstattest': "1"}
 
+    def fake_driveaudit(self):
+        return {'driveaudittest': "1"}
+
     def nocontent(self):
         return None
 
@@ -829,6 +832,15 @@ class TestReconSuccess(TestCase):
             (('/proc/net/sockstat', 'r'), {}),
             (('/proc/net/sockstat6', 'r'), {})])
 
+    def test_get_driveaudit_info(self):
+        from_cache_response = {'drive_audit_errors': 7}
+        self.fakecache.fakeout = from_cache_response
+        rv = self.app.get_driveaudit_error()
+        self.assertEquals(self.fakecache.fakeout_calls,
+                          [((['drive_audit_errors'],
+                             '/var/cache/swift/drive.recon'), {})])
+        self.assertEquals(rv, {'drive_audit_errors': 7})
+
 
 class TestReconMiddleware(unittest.TestCase):
 
@@ -857,6 +869,7 @@ class TestReconMiddleware(unittest.TestCase):
         self.app.get_swift_conf_md5 = self.frecon.fake_swiftconfmd5
         self.app.get_quarantine_count = self.frecon.fake_quarantined
         self.app.get_socket_info = self.frecon.fake_sockstat
+        self.app.get_driveaudit_error = self.frecon.fake_driveaudit
 
     def test_recon_get_mem(self):
         get_mem_resp = ['{"memtest": "1"}']
@@ -1084,5 +1097,12 @@ class TestReconMiddleware(unittest.TestCase):
         resp = self.app(req.environ, start_response)
         self.assertEquals(resp, 'FAKE APP')
 
+    def test_recon_get_driveaudit(self):
+        get_driveaudit_resp = ['{"driveaudittest": "1"}']
+        req = Request.blank('/recon/driveaudit',
+                            environ={'REQUEST_METHOD': 'GET'})
+        resp = self.app(req.environ, start_response)
+        self.assertEquals(resp, get_driveaudit_resp)
+
 if __name__ == '__main__':
     unittest.main()