From c2e59b9b8b43e5846c0fe20cfd7810bbc0291cad Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Mon, 17 Jul 2017 14:29:53 +0100 Subject: [PATCH] Make dict deletion idempotent in dump_recon_cache Calling dump_recon_cache with a key mapped to an empty dict value causes the key to be removed from the cache entry. Doing the same again causes the key to be added back and mapped an empty dict, and the key continues to toggle as calls are repeated. This behavior is seen on the Related-Bug report. This patch fixes dump_recon_cache to make deletion of a key idempotent. This fix is needed for the Related-Change which makes use of empty dicts with dump_recon_cache to clear unwanted keys from the cache. The only caller that currently set empty dict values is obj/auditor.py where the current intended behavior would appear to be as per this patch. Related-Change: I28925a37f3985c9082b5a06e76af4dc3ec813abe Related-Bug: #1704858 Change-Id: If9638b4e7dba0ec2c7bd95809cec6c5e18e9301e --- swift/common/utils.py | 23 +++++-- test/unit/common/test_utils.py | 54 ++++++++++++--- test/unit/obj/test_auditor.py | 119 +++++++++++++++++++++++++++++++++ 3 files changed, 178 insertions(+), 18 deletions(-) diff --git a/swift/common/utils.py b/swift/common/utils.py index 1c06598f37..cbe27fbd17 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -3060,18 +3060,27 @@ def human_readable(value): def put_recon_cache_entry(cache_entry, key, item): """ - Function that will check if item is a dict, and if so put it under - cache_entry[key]. We use nested recon cache entries when the object - auditor runs in parallel or else in 'once' mode with a specified - subset of devices. + Update a recon cache entry item. + + If ``item`` is an empty dict then any existing ``key`` in ``cache_entry`` + will be deleted. Similarly if ``item`` is a dict and any of its values are + empty dicts then the corrsponsing key will be deleted from the nested dict + in ``cache_entry``. + + We use nested recon cache entries when the object auditor + runs in parallel or else in 'once' mode with a specified subset of devices. + + :param cache_entry: a dict of existing cache entries + :param key: key for item to update + :param item: value for item to update """ if isinstance(item, dict): + if not item: + cache_entry.pop(key, None) + return if key not in cache_entry or key in cache_entry and not \ isinstance(cache_entry[key], dict): cache_entry[key] = {} - elif key in cache_entry and item == {}: - cache_entry.pop(key, None) - return for k, v in item.items(): if v == {}: cache_entry[key].pop(k, None) diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index ea788c61a7..076cc10ba0 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -1361,21 +1361,53 @@ class TestUtils(unittest.TestCase): testcache_file = os.path.join(testdir_base, 'cache.recon') logger = utils.get_logger(None, 'server', log_route='server') try: - submit_dict = {'key1': {'value1': 1, 'value2': 2}} + submit_dict = {'key0': 99, + 'key1': {'value1': 1, 'value2': 2}} utils.dump_recon_cache(submit_dict, testcache_file, logger) - fd = open(testcache_file) - file_dict = json.loads(fd.readline()) - fd.close() + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) self.assertEqual(submit_dict, file_dict) # Use a nested entry - submit_dict = {'key1': {'key2': {'value1': 1, 'value2': 2}}} - result_dict = {'key1': {'key2': {'value1': 1, 'value2': 2}, - 'value1': 1, 'value2': 2}} + submit_dict = {'key0': 101, + 'key1': {'key2': {'value1': 1, 'value2': 2}}} + expect_dict = {'key0': 101, + 'key1': {'key2': {'value1': 1, 'value2': 2}, + 'value1': 1, 'value2': 2}} utils.dump_recon_cache(submit_dict, testcache_file, logger) - fd = open(testcache_file) - file_dict = json.loads(fd.readline()) - fd.close() - self.assertEqual(result_dict, file_dict) + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) + self.assertEqual(expect_dict, file_dict) + # cached entries are sticky + submit_dict = {} + utils.dump_recon_cache(submit_dict, testcache_file, logger) + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) + self.assertEqual(expect_dict, file_dict) + # nested dicts can be erased... + submit_dict = {'key1': {'key2': {}}} + expect_dict = {'key0': 101, + 'key1': {'value1': 1, 'value2': 2}} + utils.dump_recon_cache(submit_dict, testcache_file, logger) + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) + self.assertEqual(expect_dict, file_dict) + # ... and erasure is idempotent + utils.dump_recon_cache(submit_dict, testcache_file, logger) + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) + self.assertEqual(expect_dict, file_dict) + # top level dicts can be erased... + submit_dict = {'key1': {}} + expect_dict = {'key0': 101} + utils.dump_recon_cache(submit_dict, testcache_file, logger) + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) + self.assertEqual(expect_dict, file_dict) + # ... and erasure is idempotent + utils.dump_recon_cache(submit_dict, testcache_file, logger) + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) + self.assertEqual(expect_dict, file_dict) finally: rmtree(testdir_base) diff --git a/test/unit/obj/test_auditor.py b/test/unit/obj/test_auditor.py index 1bc076ff2a..8482abb927 100644 --- a/test/unit/obj/test_auditor.py +++ b/test/unit/obj/test_auditor.py @@ -12,6 +12,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +import json from test import unit import unittest @@ -616,6 +617,77 @@ class TestAuditor(unittest.TestCase): self.assertGreater(len(log_lines), 0) self.assertTrue(log_lines[0].index('ZBF - sda')) + def test_object_run_recon_cache(self): + ts = Timestamp(time.time()) + data = 'test_data' + + with self.disk_file.create() as writer: + writer.write(data) + metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': ts.normal, + 'Content-Length': str(os.fstat(writer._fd).st_size), + } + writer.put(metadata) + writer.commit(ts) + + # all devices + auditor_worker = auditor.AuditorWorker(self.conf, self.logger, + self.rcache, self.devices) + auditor_worker.audit_all_objects() + with open(self.rcache) as fd: + actual_rcache = json.load(fd) + expected = {'object_auditor_stats_ALL': + {'passes': 1, 'errors': 0, 'audit_time': mock.ANY, + 'start_time': mock.ANY, 'quarantined': 0, + 'bytes_processed': 9}} + with open(self.rcache) as fd: + actual_rcache = json.load(fd) + self.assertEqual(expected, actual_rcache) + + auditor_worker = auditor.AuditorWorker(self.conf, self.logger, + self.rcache, self.devices, + zero_byte_only_at_fps=50) + auditor_worker.audit_all_objects() + self.assertEqual(expected, actual_rcache) + with open(self.rcache) as fd: + actual_rcache = json.load(fd) + expected.update({ + 'object_auditor_stats_ZBF': + {'passes': 1, 'errors': 0, 'audit_time': mock.ANY, + 'start_time': mock.ANY, 'quarantined': 0, + 'bytes_processed': 0}}) + self.assertEqual(expected, actual_rcache) + + # specific devices + os.unlink(self.rcache) + auditor_worker = auditor.AuditorWorker(self.conf, self.logger, + self.rcache, self.devices) + auditor_worker.audit_all_objects(device_dirs=['sda']) + with open(self.rcache) as fd: + actual_rcache = json.load(fd) + expected = {'object_auditor_stats_ALL': + {'sda': {'passes': 1, 'errors': 0, 'audit_time': mock.ANY, + 'start_time': mock.ANY, 'quarantined': 0, + 'bytes_processed': 9}}} + with open(self.rcache) as fd: + actual_rcache = json.load(fd) + self.assertEqual(expected, actual_rcache) + + auditor_worker = auditor.AuditorWorker(self.conf, self.logger, + self.rcache, self.devices, + zero_byte_only_at_fps=50) + auditor_worker.audit_all_objects(device_dirs=['sda']) + self.assertEqual(expected, actual_rcache) + with open(self.rcache) as fd: + actual_rcache = json.load(fd) + expected.update({ + 'object_auditor_stats_ZBF': + {'sda': {'passes': 1, 'errors': 0, 'audit_time': mock.ANY, + 'start_time': mock.ANY, 'quarantined': 0, + 'bytes_processed': 0}}}) + self.assertEqual(expected, actual_rcache) + def test_object_run_once_no_sda(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) @@ -1237,6 +1309,53 @@ class TestAuditor(unittest.TestCase): self.assertEqual(sorted(forked_pids), [2, 1001]) + def test_run_audit_once_zbfps(self): + my_auditor = auditor.ObjectAuditor(dict(devices=self.devices, + mount_check='false', + zero_byte_files_per_second=89, + concurrency=1, + recon_cache_path=self.testdir)) + + with mock.patch.object(my_auditor, '_sleep', lambda *a: None): + my_auditor.run_once(zero_byte_fps=50) + + with open(self.rcache) as fd: + # there's no objects to audit so expect no stats; this assertion + # may change if https://bugs.launchpad.net/swift/+bug/1704858 is + # fixed + self.assertEqual({}, json.load(fd)) + + # check recon cache stays clean after a second run + with mock.patch.object(my_auditor, '_sleep', lambda *a: None): + my_auditor.run_once(zero_byte_fps=50) + + with open(self.rcache) as fd: + self.assertEqual({}, json.load(fd)) + + ts = Timestamp(time.time()) + with self.disk_file.create() as writer: + metadata = { + 'ETag': md5('').hexdigest(), + 'X-Timestamp': ts.normal, + 'Content-Length': str(os.fstat(writer._fd).st_size), + } + writer.put(metadata) + writer.commit(ts) + + # check recon cache stays clean after a second run + with mock.patch.object(my_auditor, '_sleep', lambda *a: None): + my_auditor.run_once(zero_byte_fps=50) + with open(self.rcache) as fd: + self.assertEqual({ + 'object_auditor_stats_ZBF': { + 'audit_time': 0, + 'bytes_processed': 0, + 'errors': 0, + 'passes': 1, + 'quarantined': 0, + 'start_time': mock.ANY}}, + json.load(fd)) + def test_run_parallel_audit_once(self): my_auditor = auditor.ObjectAuditor( dict(devices=self.devices, mount_check='false',