From 609b5182c4282f8965cd6c04e5b2181d0ed08ef8 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Fri, 2 Dec 2016 18:05:06 +0000 Subject: [PATCH] Refactor recon to use single md5_hash_for_file function There were several implementations of hashing the content of a file in cli/recon.py and common/middleware/recon.py. This patch relocates one implementation (_hash_for_ringfile, introduced in the Related Change) to common/utils.py and refactors recon cli and middleware to use that function. Also improves use of mocking in the unit tests to eliminate passing custom file opener functions to the ReconMiddleware get_ring_md5 and get_swift_conf_md5 methods. Related-Change: I9623752c3cd2361f57864f3e938e1baf5e9292d7 Change-Id: Iaad88e49aadeb28f614aafa1e9596fe07ce9793a --- swift/cli/recon.py | 31 +----- swift/common/middleware/recon.py | 37 ++----- swift/common/utils.py | 17 +++ test/unit/cli/test_recon.py | 12 ++- test/unit/common/middleware/test_recon.py | 122 ++++------------------ test/unit/common/test_utils.py | 92 ++++++++++++++++ 6 files changed, 150 insertions(+), 161 deletions(-) diff --git a/swift/cli/recon.py b/swift/cli/recon.py index 1bd7ecabeb..49df596942 100644 --- a/swift/cli/recon.py +++ b/swift/cli/recon.py @@ -19,10 +19,10 @@ from __future__ import print_function from eventlet.green import socket from six.moves.urllib.parse import urlparse -from swift.common.utils import SWIFT_CONF_FILE + +from swift.common.utils import SWIFT_CONF_FILE, md5_hash_for_file from swift.common.ring import Ring from swift.common.storage_policy import POLICIES -from hashlib import md5 import eventlet import json import optparse @@ -194,21 +194,6 @@ class SwiftRecon(object): else: return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()) - def _md5_file(self, path): - """ - Get the MD5 checksum of a file. - - :param path: path to file - :returns: MD5 checksum, hex encoded - """ - md5sum = md5() - with open(path, 'rb') as f: - block = f.read(4096) - while block: - md5sum.update(block) - block = f.read(4096) - return md5sum.hexdigest() - def get_hosts(self, region_filter, zone_filter, swift_dir, ring_names): """ Get a list of hosts in the rings. @@ -249,14 +234,8 @@ class SwiftRecon(object): ring_names.add(ring_name) rings = {} for ring_name in ring_names: - md5sum = md5() - with open(os.path.join(swift_dir, ring_name), 'rb') as f: - block = f.read(4096) - while block: - md5sum.update(block) - block = f.read(4096) - ring_sum = md5sum.hexdigest() - rings[ring_name] = ring_sum + rings[ring_name] = md5_hash_for_file( + os.path.join(swift_dir, ring_name)) recon = Scout("ringmd5", self.verbose, self.suppress_errors, self.timeout) print("[%s] Checking ring md5sums" % self._ptime()) @@ -298,7 +277,7 @@ class SwiftRecon(object): """ matches = 0 errors = 0 - conf_sum = self._md5_file(SWIFT_CONF_FILE) + conf_sum = md5_hash_for_file(SWIFT_CONF_FILE) recon = Scout("swiftconfmd5", self.verbose, self.suppress_errors, self.timeout) printfn("[%s] Checking swift.conf md5sum" % self._ptime()) diff --git a/swift/common/middleware/recon.py b/swift/common/middleware/recon.py index ba4e4fbfd4..c9c994fe72 100644 --- a/swift/common/middleware/recon.py +++ b/swift/common/middleware/recon.py @@ -17,26 +17,15 @@ import errno import json import os import time -from swift import gettext_ as _ +from resource import getpagesize from swift import __version__ as swiftver +from swift import gettext_ as _ +from swift.common.constraints import check_mount from swift.common.storage_policy import POLICIES from swift.common.swob import Request, Response from swift.common.utils import get_logger, config_true_value, \ - SWIFT_CONF_FILE -from swift.common.constraints import check_mount -from resource import getpagesize -from hashlib import md5 - - -MD5_BLOCK_READ_BYTES = 4096 - - -def _hash_for_ringfile(f): - md5sum = md5() - for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), ''): - md5sum.update(block) - return md5sum.hexdigest() + SWIFT_CONF_FILE, md5_hash_for_file class ReconMiddleware(object): @@ -256,35 +245,27 @@ class ReconMiddleware(object): 'size': '', 'used': '', 'avail': ''}) return devices - def get_ring_md5(self, openr=open): + def get_ring_md5(self): """get all ring md5sum's""" sums = {} for ringfile in self.rings: if os.path.exists(ringfile): try: - with openr(ringfile, 'rb') as f: - sums[ringfile] = _hash_for_ringfile(f) + sums[ringfile] = md5_hash_for_file(ringfile) except IOError as err: sums[ringfile] = None if err.errno != errno.ENOENT: self.logger.exception(_('Error reading ringfile')) return sums - def get_swift_conf_md5(self, openr=open): + def get_swift_conf_md5(self): """get md5 of swift.conf""" - md5sum = md5() + hexsum = None try: - with openr(SWIFT_CONF_FILE, 'r') as fh: - chunk = fh.read(4096) - while chunk: - md5sum.update(chunk) - chunk = fh.read(4096) + hexsum = md5_hash_for_file(SWIFT_CONF_FILE) except IOError as err: if err.errno != errno.ENOENT: self.logger.exception(_('Error reading swift.conf')) - hexsum = None - else: - hexsum = md5sum.hexdigest() return {SWIFT_CONF_FILE: hexsum} def get_quarantine_count(self): diff --git a/swift/common/utils.py b/swift/common/utils.py index 44f93f7bc2..1cb1699ddd 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -4200,3 +4200,20 @@ def safe_json_loads(value): except (TypeError, ValueError): pass return None + + +MD5_BLOCK_READ_BYTES = 4096 + + +def md5_hash_for_file(fname): + """ + Get the MD5 checksum of a file. + + :param fname: path to file + :returns: MD5 checksum, hex encoded + """ + with open(fname, 'rb') as f: + md5sum = md5() + for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), ''): + md5sum.update(block) + return md5sum.hexdigest() diff --git a/test/unit/cli/test_recon.py b/test/unit/cli/test_recon.py index 560a201cd5..141cfc5aa2 100644 --- a/test/unit/cli/test_recon.py +++ b/test/unit/cli/test_recon.py @@ -291,7 +291,7 @@ class TestRecon(unittest.TestCase): self.recon_instance.server_type = server_type stdout = StringIO() with mock.patch('sys.stdout', new=stdout), \ - mock.patch('swift.cli.recon.md5', new=mock_hash): + mock.patch('swift.common.utils.md5', new=mock_hash): mock_hash.return_value.hexdigest.return_value = \ empty_file_hash self.recon_instance.get_ringmd5(hosts, self.swift_dir) @@ -319,7 +319,7 @@ class TestRecon(unittest.TestCase): mock_scout.return_value = scout_instance stdout = StringIO() with mock.patch('sys.stdout', new=stdout), \ - mock.patch('swift.cli.recon.md5', new=mock_hash): + mock.patch('swift.common.utils.md5', new=mock_hash): mock_hash.return_value.hexdigest.return_value = \ empty_file_hash self.recon_instance.get_ringmd5(hosts, self.swift_dir) @@ -339,7 +339,7 @@ class TestRecon(unittest.TestCase): self.recon_instance.server_type = 'object' stdout = StringIO() with mock.patch('sys.stdout', new=stdout), \ - mock.patch('swift.cli.recon.md5', new=mock_hash): + mock.patch('swift.common.utils.md5', new=mock_hash): mock_hash.return_value.hexdigest.return_value = \ empty_file_hash self.recon_instance.get_ringmd5(hosts, self.swift_dir) @@ -729,7 +729,8 @@ class TestReconCommands(unittest.TestCase): printed = [] with self.mock_responses(responses): - with mock.patch.object(self.recon, '_md5_file', lambda _: cksum): + with mock.patch('swift.cli.recon.md5_hash_for_file', + lambda _: cksum): self.recon.get_swiftconfmd5(hosts, printfn=printed.append) output = '\n'.join(printed) + '\n' @@ -748,7 +749,8 @@ class TestReconCommands(unittest.TestCase): printed = [] with self.mock_responses(responses): - with mock.patch.object(self.recon, '_md5_file', lambda _: cksum): + with mock.patch('swift.cli.recon.md5_hash_for_file', + lambda _: cksum): self.recon.get_swiftconfmd5(hosts, printfn=printed.append) output = '\n'.join(printed) + '\n' diff --git a/test/unit/common/middleware/test_recon.py b/test/unit/common/middleware/test_recon.py index 6d9b53c778..a21c61dc61 100644 --- a/test/unit/common/middleware/test_recon.py +++ b/test/unit/common/middleware/test_recon.py @@ -21,7 +21,6 @@ from posix import stat_result, statvfs_result from shutil import rmtree import unittest from unittest import TestCase -from six import StringIO from swift import __version__ as swiftver from swift.common import ring, utils @@ -232,12 +231,12 @@ class TestReconSuccess(TestCase): self.app._from_recon_cache = self.fakecache.fake_from_recon_cache self.frecon = FakeRecon() - # replace hash md5 implementation of the _hash_for_ringfile function - mock_hash_for_ringfile = mock.patch( - 'swift.common.middleware.recon._hash_for_ringfile', - lambda f: 'hash-' + os.path.basename(f.name)) - self.addCleanup(mock_hash_for_ringfile.stop) - mock_hash_for_ringfile.start() + # replace hash md5 implementation of the md5_hash_for_file function + mock_hash_for_file = mock.patch( + 'swift.common.middleware.recon.md5_hash_for_file', + lambda f, **kwargs: 'hash-' + os.path.basename(f)) + self.addCleanup(mock_hash_for_file.stop) + mock_hash_for_file.start() self.ring_part_shift = 5 self.ring_devs = [{'id': 0, 'zone': 0, 'weight': 1.0, @@ -347,14 +346,12 @@ class TestReconSuccess(TestCase): # still produce a ringmd5 entry with a None for the hash. Note that # this is different than if an expected ring file simply doesn't exist, # in which case it is excluded altogether from the ringmd5 response. - - def fake_open(fn, fmode): - raise IOError - expt_out = {'%s/account.ring.gz' % self.tempdir: None, '%s/container.ring.gz' % self.tempdir: None, '%s/object.ring.gz' % self.tempdir: None} - ringmd5 = self.app.get_ring_md5(openr=fake_open) + with mock.patch('swift.common.middleware.recon.md5_hash_for_file', + side_effect=IOError): + ringmd5 = self.app.get_ring_md5() self.assertEqual(sorted(ringmd5.items()), sorted(expt_out.items())) @@ -362,30 +359,30 @@ class TestReconSuccess(TestCase): # Ring files that are present but produce an IOError on read will # show a None hash, but if they can be read later their hash # should become available in the ringmd5 response. - - def fake_open(fn, fmode): - raise IOError - expt_out = {'%s/account.ring.gz' % self.tempdir: None, '%s/container.ring.gz' % self.tempdir: None, '%s/object.ring.gz' % self.tempdir: None} - ringmd5 = self.app.get_ring_md5(openr=fake_open) + with mock.patch('swift.common.middleware.recon.md5_hash_for_file', + side_effect=IOError): + ringmd5 = self.app.get_ring_md5() self.assertEqual(sorted(ringmd5.items()), sorted(expt_out.items())) # If we fix a ring and it can be read again, its hash should then # appear using the same app instance - def fake_open_objonly(fn, fmode): + def fake_hash_for_file(fn): if 'object' not in fn: raise IOError - return open(fn, fmode) + return 'hash-' + os.path.basename(fn) expt_out = {'%s/account.ring.gz' % self.tempdir: None, '%s/container.ring.gz' % self.tempdir: None, '%s/object.ring.gz' % self.tempdir: 'hash-object.ring.gz'} - ringmd5 = self.app.get_ring_md5(openr=fake_open_objonly) + with mock.patch('swift.common.middleware.recon.md5_hash_for_file', + fake_hash_for_file): + ringmd5 = self.app.get_ring_md5() self.assertEqual(sorted(ringmd5.items()), sorted(expt_out.items())) @@ -1414,90 +1411,11 @@ class TestReconMiddleware(unittest.TestCase): def test_get_swift_conf_md5_fail(self): """Test get_swift_conf_md5 failure by failing file open""" - resp = self.real_app_get_swift_conf_md5(fail_io_open) + with mock.patch('swift.common.middleware.recon.md5_hash_for_file', + side_effect=IOError): + resp = self.real_app_get_swift_conf_md5() self.assertIsNone(resp['/etc/swift/swift.conf']) -class TestReconUtilityFunctions(unittest.TestCase): - - def test_hash_for_ringfile_on_filelike_smallish(self): - stub_data = 'some data' - stub_filelike = StringIO(stub_data) - with mock.patch('swift.common.middleware.recon.md5') as mock_md5: - mock_hasher = mock_md5.return_value - rv = recon._hash_for_ringfile(stub_filelike) - self.assertTrue(mock_hasher.hexdigest.called) - self.assertEqual(rv, mock_hasher.hexdigest.return_value) - self.assertEqual([mock.call(stub_data)], - mock_hasher.update.call_args_list) - - def test_hash_for_ringfile_on_filelike_big(self): - num_blocks = 10 - block_size = recon.MD5_BLOCK_READ_BYTES - truncate = 523 - start_char = ord('a') - expected_blocks = [chr(i) * block_size - for i in range(start_char, start_char + num_blocks)] - full_data = ''.join(expected_blocks) - trimmed_data = full_data[:-truncate] - # sanity - self.assertEqual(len(trimmed_data), block_size * num_blocks - truncate) - stub_filelike = StringIO(trimmed_data) - with mock.patch('swift.common.middleware.recon.md5') as mock_md5: - mock_hasher = mock_md5.return_value - rv = recon._hash_for_ringfile(stub_filelike) - self.assertTrue(mock_hasher.hexdigest.called) - self.assertEqual(rv, mock_hasher.hexdigest.return_value) - self.assertEqual(num_blocks, len(mock_hasher.update.call_args_list)) - expected_block = 'a' * block_size - found_blocks = [] - for i, (expected_block, call) in enumerate(zip( - expected_blocks, mock_hasher.update.call_args_list)): - args, kwargs = call - self.assertEqual(kwargs, {}) - self.assertEqual(1, len(args)) - block = args[0] - if i < num_blocks - 1: - self.assertEqual(block, expected_block) - else: - self.assertEqual(block, expected_block[:-truncate]) - found_blocks.append(block) - self.assertEqual(''.join(found_blocks), trimmed_data) - - def test_hash_for_ringfile_on_filelike_empty(self): - stub_filelike = StringIO('') - with mock.patch('swift.common.middleware.recon.md5') as mock_md5: - mock_hasher = mock_md5.return_value - rv = recon._hash_for_ringfile(stub_filelike) - self.assertTrue(mock_hasher.hexdigest.called) - self.assertEqual(rv, mock_hasher.hexdigest.return_value) - self.assertEqual([], mock_hasher.update.call_args_list) - - def test_hash_for_ringfile_on_filelike_brittle(self): - data_to_expected_hash = { - '': 'd41d8cd98f00b204e9800998ecf8427e', - 'some data': '1e50210a0202497fb79bc38b6ade6c34', - ('a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3', - } - # unlike some other places where the concrete implementation really - # matters for backwards compatibility these brittle tests are probably - # not needed or justified, if a future maintainer rips them out later - # they're probably doing the right thing - failures = [] - for stub_data, expected_hash in data_to_expected_hash.items(): - rv = recon._hash_for_ringfile(StringIO(stub_data)) - try: - self.assertEqual(expected_hash, rv) - except AssertionError: - trim_cap = 80 - if len(stub_data) > trim_cap: - stub_data = '%s...' % stub_data[:trim_cap] - failures.append('hash for %r was %s instead of expected %s' % ( - stub_data, rv, expected_hash)) - if failures: - self.fail('Some data did not compute expected hash:\n' + - '\n'.join(failures)) - - if __name__ == '__main__': unittest.main() diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 0b25a2bf1c..bbe220a4eb 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -5979,5 +5979,97 @@ class TestSocketStringParser(unittest.TestCase): utils.parse_socket_string(addr, default) +class TestHashForFileFunction(unittest.TestCase): + def setUp(self): + self.tempfilename = tempfile.mktemp() + + def tearDown(self): + try: + os.unlink(self.tempfilename) + except OSError: + pass + + def test_hash_for_file_smallish(self): + stub_data = 'some data' + with open(self.tempfilename, 'wb') as fd: + fd.write(stub_data) + with mock.patch('swift.common.utils.md5') as mock_md5: + mock_hasher = mock_md5.return_value + rv = utils.md5_hash_for_file(self.tempfilename) + self.assertTrue(mock_hasher.hexdigest.called) + self.assertEqual(rv, mock_hasher.hexdigest.return_value) + self.assertEqual([mock.call(stub_data)], + mock_hasher.update.call_args_list) + + def test_hash_for_file_big(self): + num_blocks = 10 + block_size = utils.MD5_BLOCK_READ_BYTES + truncate = 523 + start_char = ord('a') + expected_blocks = [chr(i) * block_size + for i in range(start_char, start_char + num_blocks)] + full_data = ''.join(expected_blocks) + trimmed_data = full_data[:-truncate] + # sanity + self.assertEqual(len(trimmed_data), block_size * num_blocks - truncate) + with open(self.tempfilename, 'wb') as fd: + fd.write(trimmed_data) + with mock.patch('swift.common.utils.md5') as mock_md5: + mock_hasher = mock_md5.return_value + rv = utils.md5_hash_for_file(self.tempfilename) + self.assertTrue(mock_hasher.hexdigest.called) + self.assertEqual(rv, mock_hasher.hexdigest.return_value) + self.assertEqual(num_blocks, len(mock_hasher.update.call_args_list)) + found_blocks = [] + for i, (expected_block, call) in enumerate(zip( + expected_blocks, mock_hasher.update.call_args_list)): + args, kwargs = call + self.assertEqual(kwargs, {}) + self.assertEqual(1, len(args)) + block = args[0] + if i < num_blocks - 1: + self.assertEqual(block, expected_block) + else: + self.assertEqual(block, expected_block[:-truncate]) + found_blocks.append(block) + self.assertEqual(''.join(found_blocks), trimmed_data) + + def test_hash_for_file_empty(self): + with open(self.tempfilename, 'wb'): + pass + with mock.patch('swift.common.utils.md5') as mock_md5: + mock_hasher = mock_md5.return_value + rv = utils.md5_hash_for_file(self.tempfilename) + self.assertTrue(mock_hasher.hexdigest.called) + self.assertEqual(rv, mock_hasher.hexdigest.return_value) + self.assertEqual([], mock_hasher.update.call_args_list) + + def test_hash_for_file_brittle(self): + data_to_expected_hash = { + '': 'd41d8cd98f00b204e9800998ecf8427e', + 'some data': '1e50210a0202497fb79bc38b6ade6c34', + ('a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3', + } + # unlike some other places where the concrete implementation really + # matters for backwards compatibility these brittle tests are probably + # not needed or justified, if a future maintainer rips them out later + # they're probably doing the right thing + failures = [] + for stub_data, expected_hash in data_to_expected_hash.items(): + with open(self.tempfilename, 'wb') as fd: + fd.write(stub_data) + rv = utils.md5_hash_for_file(self.tempfilename) + try: + self.assertEqual(expected_hash, rv) + except AssertionError: + trim_cap = 80 + if len(stub_data) > trim_cap: + stub_data = '%s...' % stub_data[:trim_cap] + failures.append('hash for %r was %s instead of expected %s' % ( + stub_data, rv, expected_hash)) + if failures: + self.fail('Some data did not compute expected hash:\n' + + '\n'.join(failures)) + if __name__ == '__main__': unittest.main()