From c2e59b9b8b43e5846c0fe20cfd7810bbc0291cad Mon Sep 17 00:00:00 2001
From: Alistair Coles <alistairncoles@gmail.com>
Date: Mon, 17 Jul 2017 14:29:53 +0100
Subject: [PATCH] Make dict deletion idempotent in dump_recon_cache

Calling dump_recon_cache with a key mapped to an empty dict value
causes the key to be removed from the cache entry. Doing the same
again causes the key to be added back and mapped an empty dict, and
the key continues to toggle as calls are repeated. This behavior is
seen on the Related-Bug report.

This patch fixes dump_recon_cache to make deletion of a key
idempotent. This fix is needed for the Related-Change which makes use
of empty dicts with dump_recon_cache to clear unwanted keys from the
cache.

The only caller that currently set empty dict values is
obj/auditor.py where the current intended behavior would appear to be
as per this patch.

Related-Change: I28925a37f3985c9082b5a06e76af4dc3ec813abe
Related-Bug: #1704858
Change-Id: If9638b4e7dba0ec2c7bd95809cec6c5e18e9301e
---
 swift/common/utils.py          |  23 +++++--
 test/unit/common/test_utils.py |  54 ++++++++++++---
 test/unit/obj/test_auditor.py  | 119 +++++++++++++++++++++++++++++++++
 3 files changed, 178 insertions(+), 18 deletions(-)

diff --git a/swift/common/utils.py b/swift/common/utils.py
index 1c06598f37..cbe27fbd17 100644
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@@ -3060,18 +3060,27 @@ def human_readable(value):
 
 def put_recon_cache_entry(cache_entry, key, item):
     """
-    Function that will check if item is a dict, and if so put it under
-    cache_entry[key].  We use nested recon cache entries when the object
-    auditor runs in parallel or else in 'once' mode with a specified
-    subset of devices.
+    Update a recon cache entry item.
+
+    If ``item`` is an empty dict then any existing ``key`` in ``cache_entry``
+    will be deleted. Similarly if ``item`` is a dict and any of its values are
+    empty dicts then the corrsponsing key will be deleted from the nested dict
+    in ``cache_entry``.
+
+    We use nested recon cache entries when the object auditor
+    runs in parallel or else in 'once' mode with a specified subset of devices.
+
+    :param cache_entry: a dict of existing cache entries
+    :param key: key for item to update
+    :param item: value for item to update
     """
     if isinstance(item, dict):
+        if not item:
+            cache_entry.pop(key, None)
+            return
         if key not in cache_entry or key in cache_entry and not \
                 isinstance(cache_entry[key], dict):
             cache_entry[key] = {}
-        elif key in cache_entry and item == {}:
-            cache_entry.pop(key, None)
-            return
         for k, v in item.items():
             if v == {}:
                 cache_entry[key].pop(k, None)
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index ea788c61a7..076cc10ba0 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -1361,21 +1361,53 @@ class TestUtils(unittest.TestCase):
         testcache_file = os.path.join(testdir_base, 'cache.recon')
         logger = utils.get_logger(None, 'server', log_route='server')
         try:
-            submit_dict = {'key1': {'value1': 1, 'value2': 2}}
+            submit_dict = {'key0': 99,
+                           'key1': {'value1': 1, 'value2': 2}}
             utils.dump_recon_cache(submit_dict, testcache_file, logger)
-            fd = open(testcache_file)
-            file_dict = json.loads(fd.readline())
-            fd.close()
+            with open(testcache_file) as fd:
+                file_dict = json.loads(fd.readline())
             self.assertEqual(submit_dict, file_dict)
             # Use a nested entry
-            submit_dict = {'key1': {'key2': {'value1': 1, 'value2': 2}}}
-            result_dict = {'key1': {'key2': {'value1': 1, 'value2': 2},
-                           'value1': 1, 'value2': 2}}
+            submit_dict = {'key0': 101,
+                           'key1': {'key2': {'value1': 1, 'value2': 2}}}
+            expect_dict = {'key0': 101,
+                           'key1': {'key2': {'value1': 1, 'value2': 2},
+                                    'value1': 1, 'value2': 2}}
             utils.dump_recon_cache(submit_dict, testcache_file, logger)
-            fd = open(testcache_file)
-            file_dict = json.loads(fd.readline())
-            fd.close()
-            self.assertEqual(result_dict, file_dict)
+            with open(testcache_file) as fd:
+                file_dict = json.loads(fd.readline())
+            self.assertEqual(expect_dict, file_dict)
+            # cached entries are sticky
+            submit_dict = {}
+            utils.dump_recon_cache(submit_dict, testcache_file, logger)
+            with open(testcache_file) as fd:
+                file_dict = json.loads(fd.readline())
+            self.assertEqual(expect_dict, file_dict)
+            # nested dicts can be erased...
+            submit_dict = {'key1': {'key2': {}}}
+            expect_dict = {'key0': 101,
+                           'key1': {'value1': 1, 'value2': 2}}
+            utils.dump_recon_cache(submit_dict, testcache_file, logger)
+            with open(testcache_file) as fd:
+                file_dict = json.loads(fd.readline())
+            self.assertEqual(expect_dict, file_dict)
+            # ... and erasure is idempotent
+            utils.dump_recon_cache(submit_dict, testcache_file, logger)
+            with open(testcache_file) as fd:
+                file_dict = json.loads(fd.readline())
+            self.assertEqual(expect_dict, file_dict)
+            # top level dicts can be erased...
+            submit_dict = {'key1': {}}
+            expect_dict = {'key0': 101}
+            utils.dump_recon_cache(submit_dict, testcache_file, logger)
+            with open(testcache_file) as fd:
+                file_dict = json.loads(fd.readline())
+            self.assertEqual(expect_dict, file_dict)
+            # ... and erasure is idempotent
+            utils.dump_recon_cache(submit_dict, testcache_file, logger)
+            with open(testcache_file) as fd:
+                file_dict = json.loads(fd.readline())
+            self.assertEqual(expect_dict, file_dict)
         finally:
             rmtree(testdir_base)
 
diff --git a/test/unit/obj/test_auditor.py b/test/unit/obj/test_auditor.py
index 1bc076ff2a..8482abb927 100644
--- a/test/unit/obj/test_auditor.py
+++ b/test/unit/obj/test_auditor.py
@@ -12,6 +12,7 @@
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import json
 
 from test import unit
 import unittest
@@ -616,6 +617,77 @@ class TestAuditor(unittest.TestCase):
         self.assertGreater(len(log_lines), 0)
         self.assertTrue(log_lines[0].index('ZBF - sda'))
 
+    def test_object_run_recon_cache(self):
+        ts = Timestamp(time.time())
+        data = 'test_data'
+
+        with self.disk_file.create() as writer:
+            writer.write(data)
+            metadata = {
+                'ETag': md5(data).hexdigest(),
+                'X-Timestamp': ts.normal,
+                'Content-Length': str(os.fstat(writer._fd).st_size),
+            }
+            writer.put(metadata)
+            writer.commit(ts)
+
+        # all devices
+        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
+                                               self.rcache, self.devices)
+        auditor_worker.audit_all_objects()
+        with open(self.rcache) as fd:
+            actual_rcache = json.load(fd)
+        expected = {'object_auditor_stats_ALL':
+                    {'passes': 1, 'errors': 0, 'audit_time': mock.ANY,
+                     'start_time': mock.ANY, 'quarantined': 0,
+                     'bytes_processed': 9}}
+        with open(self.rcache) as fd:
+            actual_rcache = json.load(fd)
+        self.assertEqual(expected, actual_rcache)
+
+        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
+                                               self.rcache, self.devices,
+                                               zero_byte_only_at_fps=50)
+        auditor_worker.audit_all_objects()
+        self.assertEqual(expected, actual_rcache)
+        with open(self.rcache) as fd:
+            actual_rcache = json.load(fd)
+        expected.update({
+            'object_auditor_stats_ZBF':
+            {'passes': 1, 'errors': 0, 'audit_time': mock.ANY,
+             'start_time': mock.ANY, 'quarantined': 0,
+             'bytes_processed': 0}})
+        self.assertEqual(expected, actual_rcache)
+
+        # specific devices
+        os.unlink(self.rcache)
+        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
+                                               self.rcache, self.devices)
+        auditor_worker.audit_all_objects(device_dirs=['sda'])
+        with open(self.rcache) as fd:
+            actual_rcache = json.load(fd)
+        expected = {'object_auditor_stats_ALL':
+                    {'sda': {'passes': 1, 'errors': 0, 'audit_time': mock.ANY,
+                             'start_time': mock.ANY, 'quarantined': 0,
+                             'bytes_processed': 9}}}
+        with open(self.rcache) as fd:
+            actual_rcache = json.load(fd)
+        self.assertEqual(expected, actual_rcache)
+
+        auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
+                                               self.rcache, self.devices,
+                                               zero_byte_only_at_fps=50)
+        auditor_worker.audit_all_objects(device_dirs=['sda'])
+        self.assertEqual(expected, actual_rcache)
+        with open(self.rcache) as fd:
+            actual_rcache = json.load(fd)
+        expected.update({
+            'object_auditor_stats_ZBF':
+            {'sda': {'passes': 1, 'errors': 0, 'audit_time': mock.ANY,
+                     'start_time': mock.ANY, 'quarantined': 0,
+                     'bytes_processed': 0}}})
+        self.assertEqual(expected, actual_rcache)
+
     def test_object_run_once_no_sda(self):
         auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
                                                self.rcache, self.devices)
@@ -1237,6 +1309,53 @@ class TestAuditor(unittest.TestCase):
 
         self.assertEqual(sorted(forked_pids), [2, 1001])
 
+    def test_run_audit_once_zbfps(self):
+        my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
+                                                mount_check='false',
+                                                zero_byte_files_per_second=89,
+                                                concurrency=1,
+                                                recon_cache_path=self.testdir))
+
+        with mock.patch.object(my_auditor, '_sleep', lambda *a: None):
+            my_auditor.run_once(zero_byte_fps=50)
+
+        with open(self.rcache) as fd:
+            # there's no objects to audit so expect no stats; this assertion
+            # may change if https://bugs.launchpad.net/swift/+bug/1704858 is
+            # fixed
+            self.assertEqual({}, json.load(fd))
+
+        # check recon cache stays clean after a second run
+        with mock.patch.object(my_auditor, '_sleep', lambda *a: None):
+            my_auditor.run_once(zero_byte_fps=50)
+
+        with open(self.rcache) as fd:
+            self.assertEqual({}, json.load(fd))
+
+        ts = Timestamp(time.time())
+        with self.disk_file.create() as writer:
+            metadata = {
+                'ETag': md5('').hexdigest(),
+                'X-Timestamp': ts.normal,
+                'Content-Length': str(os.fstat(writer._fd).st_size),
+            }
+            writer.put(metadata)
+            writer.commit(ts)
+
+        # check recon cache stays clean after a second run
+        with mock.patch.object(my_auditor, '_sleep', lambda *a: None):
+            my_auditor.run_once(zero_byte_fps=50)
+        with open(self.rcache) as fd:
+            self.assertEqual({
+                'object_auditor_stats_ZBF': {
+                    'audit_time': 0,
+                    'bytes_processed': 0,
+                    'errors': 0,
+                    'passes': 1,
+                    'quarantined': 0,
+                    'start_time': mock.ANY}},
+                json.load(fd))
+
     def test_run_parallel_audit_once(self):
         my_auditor = auditor.ObjectAuditor(
             dict(devices=self.devices, mount_check='false',