Make dict deletion idempotent in dump_recon_cache

Calling dump_recon_cache with a key mapped to an empty dict value
causes the key to be removed from the cache entry. Doing the same
again causes the key to be added back and mapped an empty dict, and
the key continues to toggle as calls are repeated. This behavior is
seen on the Related-Bug report.

This patch fixes dump_recon_cache to make deletion of a key
idempotent. This fix is needed for the Related-Change which makes use
of empty dicts with dump_recon_cache to clear unwanted keys from the
cache.

The only caller that currently set empty dict values is
obj/auditor.py where the current intended behavior would appear to be
as per this patch.

Related-Change: I28925a37f3985c9082b5a06e76af4dc3ec813abe
Related-Bug: #1704858
Change-Id: If9638b4e7dba0ec2c7bd95809cec6c5e18e9301e
This commit is contained in:
Alistair Coles 2017-07-17 14:29:53 +01:00 committed by Clay Gerrard
parent e1a2c28ec3
commit c2e59b9b8b
3 changed files with 178 additions and 18 deletions
swift/common
test/unit

@ -3060,18 +3060,27 @@ def human_readable(value):
def put_recon_cache_entry(cache_entry, key, item):
"""
Function that will check if item is a dict, and if so put it under
cache_entry[key]. We use nested recon cache entries when the object
auditor runs in parallel or else in 'once' mode with a specified
subset of devices.
Update a recon cache entry item.
If ``item`` is an empty dict then any existing ``key`` in ``cache_entry``
will be deleted. Similarly if ``item`` is a dict and any of its values are
empty dicts then the corrsponsing key will be deleted from the nested dict
in ``cache_entry``.
We use nested recon cache entries when the object auditor
runs in parallel or else in 'once' mode with a specified subset of devices.
:param cache_entry: a dict of existing cache entries
:param key: key for item to update
:param item: value for item to update
"""
if isinstance(item, dict):
if not item:
cache_entry.pop(key, None)
return
if key not in cache_entry or key in cache_entry and not \
isinstance(cache_entry[key], dict):
cache_entry[key] = {}
elif key in cache_entry and item == {}:
cache_entry.pop(key, None)
return
for k, v in item.items():
if v == {}:
cache_entry[key].pop(k, None)

@ -1361,21 +1361,53 @@ class TestUtils(unittest.TestCase):
testcache_file = os.path.join(testdir_base, 'cache.recon')
logger = utils.get_logger(None, 'server', log_route='server')
try:
submit_dict = {'key1': {'value1': 1, 'value2': 2}}
submit_dict = {'key0': 99,
'key1': {'value1': 1, 'value2': 2}}
utils.dump_recon_cache(submit_dict, testcache_file, logger)
fd = open(testcache_file)
file_dict = json.loads(fd.readline())
fd.close()
with open(testcache_file) as fd:
file_dict = json.loads(fd.readline())
self.assertEqual(submit_dict, file_dict)
# Use a nested entry
submit_dict = {'key1': {'key2': {'value1': 1, 'value2': 2}}}
result_dict = {'key1': {'key2': {'value1': 1, 'value2': 2},
'value1': 1, 'value2': 2}}
submit_dict = {'key0': 101,
'key1': {'key2': {'value1': 1, 'value2': 2}}}
expect_dict = {'key0': 101,
'key1': {'key2': {'value1': 1, 'value2': 2},
'value1': 1, 'value2': 2}}
utils.dump_recon_cache(submit_dict, testcache_file, logger)
fd = open(testcache_file)
file_dict = json.loads(fd.readline())
fd.close()
self.assertEqual(result_dict, file_dict)
with open(testcache_file) as fd:
file_dict = json.loads(fd.readline())
self.assertEqual(expect_dict, file_dict)
# cached entries are sticky
submit_dict = {}
utils.dump_recon_cache(submit_dict, testcache_file, logger)
with open(testcache_file) as fd:
file_dict = json.loads(fd.readline())
self.assertEqual(expect_dict, file_dict)
# nested dicts can be erased...
submit_dict = {'key1': {'key2': {}}}
expect_dict = {'key0': 101,
'key1': {'value1': 1, 'value2': 2}}
utils.dump_recon_cache(submit_dict, testcache_file, logger)
with open(testcache_file) as fd:
file_dict = json.loads(fd.readline())
self.assertEqual(expect_dict, file_dict)
# ... and erasure is idempotent
utils.dump_recon_cache(submit_dict, testcache_file, logger)
with open(testcache_file) as fd:
file_dict = json.loads(fd.readline())
self.assertEqual(expect_dict, file_dict)
# top level dicts can be erased...
submit_dict = {'key1': {}}
expect_dict = {'key0': 101}
utils.dump_recon_cache(submit_dict, testcache_file, logger)
with open(testcache_file) as fd:
file_dict = json.loads(fd.readline())
self.assertEqual(expect_dict, file_dict)
# ... and erasure is idempotent
utils.dump_recon_cache(submit_dict, testcache_file, logger)
with open(testcache_file) as fd:
file_dict = json.loads(fd.readline())
self.assertEqual(expect_dict, file_dict)
finally:
rmtree(testdir_base)

@ -12,6 +12,7 @@
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from test import unit
import unittest
@ -616,6 +617,77 @@ class TestAuditor(unittest.TestCase):
self.assertGreater(len(log_lines), 0)
self.assertTrue(log_lines[0].index('ZBF - sda'))
def test_object_run_recon_cache(self):
ts = Timestamp(time.time())
data = 'test_data'
with self.disk_file.create() as writer:
writer.write(data)
metadata = {
'ETag': md5(data).hexdigest(),
'X-Timestamp': ts.normal,
'Content-Length': str(os.fstat(writer._fd).st_size),
}
writer.put(metadata)
writer.commit(ts)
# all devices
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)
auditor_worker.audit_all_objects()
with open(self.rcache) as fd:
actual_rcache = json.load(fd)
expected = {'object_auditor_stats_ALL':
{'passes': 1, 'errors': 0, 'audit_time': mock.ANY,
'start_time': mock.ANY, 'quarantined': 0,
'bytes_processed': 9}}
with open(self.rcache) as fd:
actual_rcache = json.load(fd)
self.assertEqual(expected, actual_rcache)
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices,
zero_byte_only_at_fps=50)
auditor_worker.audit_all_objects()
self.assertEqual(expected, actual_rcache)
with open(self.rcache) as fd:
actual_rcache = json.load(fd)
expected.update({
'object_auditor_stats_ZBF':
{'passes': 1, 'errors': 0, 'audit_time': mock.ANY,
'start_time': mock.ANY, 'quarantined': 0,
'bytes_processed': 0}})
self.assertEqual(expected, actual_rcache)
# specific devices
os.unlink(self.rcache)
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)
auditor_worker.audit_all_objects(device_dirs=['sda'])
with open(self.rcache) as fd:
actual_rcache = json.load(fd)
expected = {'object_auditor_stats_ALL':
{'sda': {'passes': 1, 'errors': 0, 'audit_time': mock.ANY,
'start_time': mock.ANY, 'quarantined': 0,
'bytes_processed': 9}}}
with open(self.rcache) as fd:
actual_rcache = json.load(fd)
self.assertEqual(expected, actual_rcache)
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices,
zero_byte_only_at_fps=50)
auditor_worker.audit_all_objects(device_dirs=['sda'])
self.assertEqual(expected, actual_rcache)
with open(self.rcache) as fd:
actual_rcache = json.load(fd)
expected.update({
'object_auditor_stats_ZBF':
{'sda': {'passes': 1, 'errors': 0, 'audit_time': mock.ANY,
'start_time': mock.ANY, 'quarantined': 0,
'bytes_processed': 0}}})
self.assertEqual(expected, actual_rcache)
def test_object_run_once_no_sda(self):
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)
@ -1237,6 +1309,53 @@ class TestAuditor(unittest.TestCase):
self.assertEqual(sorted(forked_pids), [2, 1001])
def test_run_audit_once_zbfps(self):
my_auditor = auditor.ObjectAuditor(dict(devices=self.devices,
mount_check='false',
zero_byte_files_per_second=89,
concurrency=1,
recon_cache_path=self.testdir))
with mock.patch.object(my_auditor, '_sleep', lambda *a: None):
my_auditor.run_once(zero_byte_fps=50)
with open(self.rcache) as fd:
# there's no objects to audit so expect no stats; this assertion
# may change if https://bugs.launchpad.net/swift/+bug/1704858 is
# fixed
self.assertEqual({}, json.load(fd))
# check recon cache stays clean after a second run
with mock.patch.object(my_auditor, '_sleep', lambda *a: None):
my_auditor.run_once(zero_byte_fps=50)
with open(self.rcache) as fd:
self.assertEqual({}, json.load(fd))
ts = Timestamp(time.time())
with self.disk_file.create() as writer:
metadata = {
'ETag': md5('').hexdigest(),
'X-Timestamp': ts.normal,
'Content-Length': str(os.fstat(writer._fd).st_size),
}
writer.put(metadata)
writer.commit(ts)
# check recon cache stays clean after a second run
with mock.patch.object(my_auditor, '_sleep', lambda *a: None):
my_auditor.run_once(zero_byte_fps=50)
with open(self.rcache) as fd:
self.assertEqual({
'object_auditor_stats_ZBF': {
'audit_time': 0,
'bytes_processed': 0,
'errors': 0,
'passes': 1,
'quarantined': 0,
'start_time': mock.ANY}},
json.load(fd))
def test_run_parallel_audit_once(self):
my_auditor = auditor.ObjectAuditor(
dict(devices=self.devices, mount_check='false',