Refactor recon to use single md5_hash_for_file function

There were several implementations of hashing the content
of a file in cli/recon.py and common/middleware/recon.py.
This patch relocates one implementation (_hash_for_ringfile,
introduced in the Related Change) to common/utils.py and
refactors recon cli and middleware to use that function.

Also improves use of mocking in the unit tests to eliminate passing
custom file opener functions to the ReconMiddleware get_ring_md5
and get_swift_conf_md5 methods.

Related-Change: I9623752c3cd2361f57864f3e938e1baf5e9292d7

Change-Id: Iaad88e49aadeb28f614aafa1e9596fe07ce9793a
This commit is contained in:
Alistair Coles 2016-12-02 18:05:06 +00:00
parent 053b625f42
commit 609b5182c4
6 changed files with 150 additions and 161 deletions

View File

@ -19,10 +19,10 @@ from __future__ import print_function
from eventlet.green import socket from eventlet.green import socket
from six.moves.urllib.parse import urlparse from six.moves.urllib.parse import urlparse
from swift.common.utils import SWIFT_CONF_FILE
from swift.common.utils import SWIFT_CONF_FILE, md5_hash_for_file
from swift.common.ring import Ring from swift.common.ring import Ring
from swift.common.storage_policy import POLICIES from swift.common.storage_policy import POLICIES
from hashlib import md5
import eventlet import eventlet
import json import json
import optparse import optparse
@ -194,21 +194,6 @@ class SwiftRecon(object):
else: else:
return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()) return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
def _md5_file(self, path):
"""
Get the MD5 checksum of a file.
:param path: path to file
:returns: MD5 checksum, hex encoded
"""
md5sum = md5()
with open(path, 'rb') as f:
block = f.read(4096)
while block:
md5sum.update(block)
block = f.read(4096)
return md5sum.hexdigest()
def get_hosts(self, region_filter, zone_filter, swift_dir, ring_names): def get_hosts(self, region_filter, zone_filter, swift_dir, ring_names):
""" """
Get a list of hosts in the rings. Get a list of hosts in the rings.
@ -249,14 +234,8 @@ class SwiftRecon(object):
ring_names.add(ring_name) ring_names.add(ring_name)
rings = {} rings = {}
for ring_name in ring_names: for ring_name in ring_names:
md5sum = md5() rings[ring_name] = md5_hash_for_file(
with open(os.path.join(swift_dir, ring_name), 'rb') as f: os.path.join(swift_dir, ring_name))
block = f.read(4096)
while block:
md5sum.update(block)
block = f.read(4096)
ring_sum = md5sum.hexdigest()
rings[ring_name] = ring_sum
recon = Scout("ringmd5", self.verbose, self.suppress_errors, recon = Scout("ringmd5", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
print("[%s] Checking ring md5sums" % self._ptime()) print("[%s] Checking ring md5sums" % self._ptime())
@ -298,7 +277,7 @@ class SwiftRecon(object):
""" """
matches = 0 matches = 0
errors = 0 errors = 0
conf_sum = self._md5_file(SWIFT_CONF_FILE) conf_sum = md5_hash_for_file(SWIFT_CONF_FILE)
recon = Scout("swiftconfmd5", self.verbose, self.suppress_errors, recon = Scout("swiftconfmd5", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
printfn("[%s] Checking swift.conf md5sum" % self._ptime()) printfn("[%s] Checking swift.conf md5sum" % self._ptime())

View File

@ -17,26 +17,15 @@ import errno
import json import json
import os import os
import time import time
from swift import gettext_ as _ from resource import getpagesize
from swift import __version__ as swiftver from swift import __version__ as swiftver
from swift import gettext_ as _
from swift.common.constraints import check_mount
from swift.common.storage_policy import POLICIES from swift.common.storage_policy import POLICIES
from swift.common.swob import Request, Response from swift.common.swob import Request, Response
from swift.common.utils import get_logger, config_true_value, \ from swift.common.utils import get_logger, config_true_value, \
SWIFT_CONF_FILE SWIFT_CONF_FILE, md5_hash_for_file
from swift.common.constraints import check_mount
from resource import getpagesize
from hashlib import md5
MD5_BLOCK_READ_BYTES = 4096
def _hash_for_ringfile(f):
md5sum = md5()
for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), ''):
md5sum.update(block)
return md5sum.hexdigest()
class ReconMiddleware(object): class ReconMiddleware(object):
@ -256,35 +245,27 @@ class ReconMiddleware(object):
'size': '', 'used': '', 'avail': ''}) 'size': '', 'used': '', 'avail': ''})
return devices return devices
def get_ring_md5(self, openr=open): def get_ring_md5(self):
"""get all ring md5sum's""" """get all ring md5sum's"""
sums = {} sums = {}
for ringfile in self.rings: for ringfile in self.rings:
if os.path.exists(ringfile): if os.path.exists(ringfile):
try: try:
with openr(ringfile, 'rb') as f: sums[ringfile] = md5_hash_for_file(ringfile)
sums[ringfile] = _hash_for_ringfile(f)
except IOError as err: except IOError as err:
sums[ringfile] = None sums[ringfile] = None
if err.errno != errno.ENOENT: if err.errno != errno.ENOENT:
self.logger.exception(_('Error reading ringfile')) self.logger.exception(_('Error reading ringfile'))
return sums return sums
def get_swift_conf_md5(self, openr=open): def get_swift_conf_md5(self):
"""get md5 of swift.conf""" """get md5 of swift.conf"""
md5sum = md5() hexsum = None
try: try:
with openr(SWIFT_CONF_FILE, 'r') as fh: hexsum = md5_hash_for_file(SWIFT_CONF_FILE)
chunk = fh.read(4096)
while chunk:
md5sum.update(chunk)
chunk = fh.read(4096)
except IOError as err: except IOError as err:
if err.errno != errno.ENOENT: if err.errno != errno.ENOENT:
self.logger.exception(_('Error reading swift.conf')) self.logger.exception(_('Error reading swift.conf'))
hexsum = None
else:
hexsum = md5sum.hexdigest()
return {SWIFT_CONF_FILE: hexsum} return {SWIFT_CONF_FILE: hexsum}
def get_quarantine_count(self): def get_quarantine_count(self):

View File

@ -4200,3 +4200,20 @@ def safe_json_loads(value):
except (TypeError, ValueError): except (TypeError, ValueError):
pass pass
return None return None
MD5_BLOCK_READ_BYTES = 4096
def md5_hash_for_file(fname):
"""
Get the MD5 checksum of a file.
:param fname: path to file
:returns: MD5 checksum, hex encoded
"""
with open(fname, 'rb') as f:
md5sum = md5()
for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), ''):
md5sum.update(block)
return md5sum.hexdigest()

View File

@ -291,7 +291,7 @@ class TestRecon(unittest.TestCase):
self.recon_instance.server_type = server_type self.recon_instance.server_type = server_type
stdout = StringIO() stdout = StringIO()
with mock.patch('sys.stdout', new=stdout), \ with mock.patch('sys.stdout', new=stdout), \
mock.patch('swift.cli.recon.md5', new=mock_hash): mock.patch('swift.common.utils.md5', new=mock_hash):
mock_hash.return_value.hexdigest.return_value = \ mock_hash.return_value.hexdigest.return_value = \
empty_file_hash empty_file_hash
self.recon_instance.get_ringmd5(hosts, self.swift_dir) self.recon_instance.get_ringmd5(hosts, self.swift_dir)
@ -319,7 +319,7 @@ class TestRecon(unittest.TestCase):
mock_scout.return_value = scout_instance mock_scout.return_value = scout_instance
stdout = StringIO() stdout = StringIO()
with mock.patch('sys.stdout', new=stdout), \ with mock.patch('sys.stdout', new=stdout), \
mock.patch('swift.cli.recon.md5', new=mock_hash): mock.patch('swift.common.utils.md5', new=mock_hash):
mock_hash.return_value.hexdigest.return_value = \ mock_hash.return_value.hexdigest.return_value = \
empty_file_hash empty_file_hash
self.recon_instance.get_ringmd5(hosts, self.swift_dir) self.recon_instance.get_ringmd5(hosts, self.swift_dir)
@ -339,7 +339,7 @@ class TestRecon(unittest.TestCase):
self.recon_instance.server_type = 'object' self.recon_instance.server_type = 'object'
stdout = StringIO() stdout = StringIO()
with mock.patch('sys.stdout', new=stdout), \ with mock.patch('sys.stdout', new=stdout), \
mock.patch('swift.cli.recon.md5', new=mock_hash): mock.patch('swift.common.utils.md5', new=mock_hash):
mock_hash.return_value.hexdigest.return_value = \ mock_hash.return_value.hexdigest.return_value = \
empty_file_hash empty_file_hash
self.recon_instance.get_ringmd5(hosts, self.swift_dir) self.recon_instance.get_ringmd5(hosts, self.swift_dir)
@ -729,7 +729,8 @@ class TestReconCommands(unittest.TestCase):
printed = [] printed = []
with self.mock_responses(responses): with self.mock_responses(responses):
with mock.patch.object(self.recon, '_md5_file', lambda _: cksum): with mock.patch('swift.cli.recon.md5_hash_for_file',
lambda _: cksum):
self.recon.get_swiftconfmd5(hosts, printfn=printed.append) self.recon.get_swiftconfmd5(hosts, printfn=printed.append)
output = '\n'.join(printed) + '\n' output = '\n'.join(printed) + '\n'
@ -748,7 +749,8 @@ class TestReconCommands(unittest.TestCase):
printed = [] printed = []
with self.mock_responses(responses): with self.mock_responses(responses):
with mock.patch.object(self.recon, '_md5_file', lambda _: cksum): with mock.patch('swift.cli.recon.md5_hash_for_file',
lambda _: cksum):
self.recon.get_swiftconfmd5(hosts, printfn=printed.append) self.recon.get_swiftconfmd5(hosts, printfn=printed.append)
output = '\n'.join(printed) + '\n' output = '\n'.join(printed) + '\n'

View File

@ -21,7 +21,6 @@ from posix import stat_result, statvfs_result
from shutil import rmtree from shutil import rmtree
import unittest import unittest
from unittest import TestCase from unittest import TestCase
from six import StringIO
from swift import __version__ as swiftver from swift import __version__ as swiftver
from swift.common import ring, utils from swift.common import ring, utils
@ -232,12 +231,12 @@ class TestReconSuccess(TestCase):
self.app._from_recon_cache = self.fakecache.fake_from_recon_cache self.app._from_recon_cache = self.fakecache.fake_from_recon_cache
self.frecon = FakeRecon() self.frecon = FakeRecon()
# replace hash md5 implementation of the _hash_for_ringfile function # replace hash md5 implementation of the md5_hash_for_file function
mock_hash_for_ringfile = mock.patch( mock_hash_for_file = mock.patch(
'swift.common.middleware.recon._hash_for_ringfile', 'swift.common.middleware.recon.md5_hash_for_file',
lambda f: 'hash-' + os.path.basename(f.name)) lambda f, **kwargs: 'hash-' + os.path.basename(f))
self.addCleanup(mock_hash_for_ringfile.stop) self.addCleanup(mock_hash_for_file.stop)
mock_hash_for_ringfile.start() mock_hash_for_file.start()
self.ring_part_shift = 5 self.ring_part_shift = 5
self.ring_devs = [{'id': 0, 'zone': 0, 'weight': 1.0, self.ring_devs = [{'id': 0, 'zone': 0, 'weight': 1.0,
@ -347,14 +346,12 @@ class TestReconSuccess(TestCase):
# still produce a ringmd5 entry with a None for the hash. Note that # still produce a ringmd5 entry with a None for the hash. Note that
# this is different than if an expected ring file simply doesn't exist, # this is different than if an expected ring file simply doesn't exist,
# in which case it is excluded altogether from the ringmd5 response. # in which case it is excluded altogether from the ringmd5 response.
def fake_open(fn, fmode):
raise IOError
expt_out = {'%s/account.ring.gz' % self.tempdir: None, expt_out = {'%s/account.ring.gz' % self.tempdir: None,
'%s/container.ring.gz' % self.tempdir: None, '%s/container.ring.gz' % self.tempdir: None,
'%s/object.ring.gz' % self.tempdir: None} '%s/object.ring.gz' % self.tempdir: None}
ringmd5 = self.app.get_ring_md5(openr=fake_open) with mock.patch('swift.common.middleware.recon.md5_hash_for_file',
side_effect=IOError):
ringmd5 = self.app.get_ring_md5()
self.assertEqual(sorted(ringmd5.items()), self.assertEqual(sorted(ringmd5.items()),
sorted(expt_out.items())) sorted(expt_out.items()))
@ -362,30 +359,30 @@ class TestReconSuccess(TestCase):
# Ring files that are present but produce an IOError on read will # Ring files that are present but produce an IOError on read will
# show a None hash, but if they can be read later their hash # show a None hash, but if they can be read later their hash
# should become available in the ringmd5 response. # should become available in the ringmd5 response.
def fake_open(fn, fmode):
raise IOError
expt_out = {'%s/account.ring.gz' % self.tempdir: None, expt_out = {'%s/account.ring.gz' % self.tempdir: None,
'%s/container.ring.gz' % self.tempdir: None, '%s/container.ring.gz' % self.tempdir: None,
'%s/object.ring.gz' % self.tempdir: None} '%s/object.ring.gz' % self.tempdir: None}
ringmd5 = self.app.get_ring_md5(openr=fake_open) with mock.patch('swift.common.middleware.recon.md5_hash_for_file',
side_effect=IOError):
ringmd5 = self.app.get_ring_md5()
self.assertEqual(sorted(ringmd5.items()), self.assertEqual(sorted(ringmd5.items()),
sorted(expt_out.items())) sorted(expt_out.items()))
# If we fix a ring and it can be read again, its hash should then # If we fix a ring and it can be read again, its hash should then
# appear using the same app instance # appear using the same app instance
def fake_open_objonly(fn, fmode): def fake_hash_for_file(fn):
if 'object' not in fn: if 'object' not in fn:
raise IOError raise IOError
return open(fn, fmode) return 'hash-' + os.path.basename(fn)
expt_out = {'%s/account.ring.gz' % self.tempdir: None, expt_out = {'%s/account.ring.gz' % self.tempdir: None,
'%s/container.ring.gz' % self.tempdir: None, '%s/container.ring.gz' % self.tempdir: None,
'%s/object.ring.gz' % self.tempdir: '%s/object.ring.gz' % self.tempdir:
'hash-object.ring.gz'} 'hash-object.ring.gz'}
ringmd5 = self.app.get_ring_md5(openr=fake_open_objonly) with mock.patch('swift.common.middleware.recon.md5_hash_for_file',
fake_hash_for_file):
ringmd5 = self.app.get_ring_md5()
self.assertEqual(sorted(ringmd5.items()), self.assertEqual(sorted(ringmd5.items()),
sorted(expt_out.items())) sorted(expt_out.items()))
@ -1414,90 +1411,11 @@ class TestReconMiddleware(unittest.TestCase):
def test_get_swift_conf_md5_fail(self): def test_get_swift_conf_md5_fail(self):
"""Test get_swift_conf_md5 failure by failing file open""" """Test get_swift_conf_md5 failure by failing file open"""
resp = self.real_app_get_swift_conf_md5(fail_io_open) with mock.patch('swift.common.middleware.recon.md5_hash_for_file',
side_effect=IOError):
resp = self.real_app_get_swift_conf_md5()
self.assertIsNone(resp['/etc/swift/swift.conf']) self.assertIsNone(resp['/etc/swift/swift.conf'])
class TestReconUtilityFunctions(unittest.TestCase):
def test_hash_for_ringfile_on_filelike_smallish(self):
stub_data = 'some data'
stub_filelike = StringIO(stub_data)
with mock.patch('swift.common.middleware.recon.md5') as mock_md5:
mock_hasher = mock_md5.return_value
rv = recon._hash_for_ringfile(stub_filelike)
self.assertTrue(mock_hasher.hexdigest.called)
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
self.assertEqual([mock.call(stub_data)],
mock_hasher.update.call_args_list)
def test_hash_for_ringfile_on_filelike_big(self):
num_blocks = 10
block_size = recon.MD5_BLOCK_READ_BYTES
truncate = 523
start_char = ord('a')
expected_blocks = [chr(i) * block_size
for i in range(start_char, start_char + num_blocks)]
full_data = ''.join(expected_blocks)
trimmed_data = full_data[:-truncate]
# sanity
self.assertEqual(len(trimmed_data), block_size * num_blocks - truncate)
stub_filelike = StringIO(trimmed_data)
with mock.patch('swift.common.middleware.recon.md5') as mock_md5:
mock_hasher = mock_md5.return_value
rv = recon._hash_for_ringfile(stub_filelike)
self.assertTrue(mock_hasher.hexdigest.called)
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
self.assertEqual(num_blocks, len(mock_hasher.update.call_args_list))
expected_block = 'a' * block_size
found_blocks = []
for i, (expected_block, call) in enumerate(zip(
expected_blocks, mock_hasher.update.call_args_list)):
args, kwargs = call
self.assertEqual(kwargs, {})
self.assertEqual(1, len(args))
block = args[0]
if i < num_blocks - 1:
self.assertEqual(block, expected_block)
else:
self.assertEqual(block, expected_block[:-truncate])
found_blocks.append(block)
self.assertEqual(''.join(found_blocks), trimmed_data)
def test_hash_for_ringfile_on_filelike_empty(self):
stub_filelike = StringIO('')
with mock.patch('swift.common.middleware.recon.md5') as mock_md5:
mock_hasher = mock_md5.return_value
rv = recon._hash_for_ringfile(stub_filelike)
self.assertTrue(mock_hasher.hexdigest.called)
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
self.assertEqual([], mock_hasher.update.call_args_list)
def test_hash_for_ringfile_on_filelike_brittle(self):
data_to_expected_hash = {
'': 'd41d8cd98f00b204e9800998ecf8427e',
'some data': '1e50210a0202497fb79bc38b6ade6c34',
('a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3',
}
# unlike some other places where the concrete implementation really
# matters for backwards compatibility these brittle tests are probably
# not needed or justified, if a future maintainer rips them out later
# they're probably doing the right thing
failures = []
for stub_data, expected_hash in data_to_expected_hash.items():
rv = recon._hash_for_ringfile(StringIO(stub_data))
try:
self.assertEqual(expected_hash, rv)
except AssertionError:
trim_cap = 80
if len(stub_data) > trim_cap:
stub_data = '%s...<truncated>' % stub_data[:trim_cap]
failures.append('hash for %r was %s instead of expected %s' % (
stub_data, rv, expected_hash))
if failures:
self.fail('Some data did not compute expected hash:\n' +
'\n'.join(failures))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -5979,5 +5979,97 @@ class TestSocketStringParser(unittest.TestCase):
utils.parse_socket_string(addr, default) utils.parse_socket_string(addr, default)
class TestHashForFileFunction(unittest.TestCase):
def setUp(self):
self.tempfilename = tempfile.mktemp()
def tearDown(self):
try:
os.unlink(self.tempfilename)
except OSError:
pass
def test_hash_for_file_smallish(self):
stub_data = 'some data'
with open(self.tempfilename, 'wb') as fd:
fd.write(stub_data)
with mock.patch('swift.common.utils.md5') as mock_md5:
mock_hasher = mock_md5.return_value
rv = utils.md5_hash_for_file(self.tempfilename)
self.assertTrue(mock_hasher.hexdigest.called)
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
self.assertEqual([mock.call(stub_data)],
mock_hasher.update.call_args_list)
def test_hash_for_file_big(self):
num_blocks = 10
block_size = utils.MD5_BLOCK_READ_BYTES
truncate = 523
start_char = ord('a')
expected_blocks = [chr(i) * block_size
for i in range(start_char, start_char + num_blocks)]
full_data = ''.join(expected_blocks)
trimmed_data = full_data[:-truncate]
# sanity
self.assertEqual(len(trimmed_data), block_size * num_blocks - truncate)
with open(self.tempfilename, 'wb') as fd:
fd.write(trimmed_data)
with mock.patch('swift.common.utils.md5') as mock_md5:
mock_hasher = mock_md5.return_value
rv = utils.md5_hash_for_file(self.tempfilename)
self.assertTrue(mock_hasher.hexdigest.called)
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
self.assertEqual(num_blocks, len(mock_hasher.update.call_args_list))
found_blocks = []
for i, (expected_block, call) in enumerate(zip(
expected_blocks, mock_hasher.update.call_args_list)):
args, kwargs = call
self.assertEqual(kwargs, {})
self.assertEqual(1, len(args))
block = args[0]
if i < num_blocks - 1:
self.assertEqual(block, expected_block)
else:
self.assertEqual(block, expected_block[:-truncate])
found_blocks.append(block)
self.assertEqual(''.join(found_blocks), trimmed_data)
def test_hash_for_file_empty(self):
with open(self.tempfilename, 'wb'):
pass
with mock.patch('swift.common.utils.md5') as mock_md5:
mock_hasher = mock_md5.return_value
rv = utils.md5_hash_for_file(self.tempfilename)
self.assertTrue(mock_hasher.hexdigest.called)
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
self.assertEqual([], mock_hasher.update.call_args_list)
def test_hash_for_file_brittle(self):
data_to_expected_hash = {
'': 'd41d8cd98f00b204e9800998ecf8427e',
'some data': '1e50210a0202497fb79bc38b6ade6c34',
('a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3',
}
# unlike some other places where the concrete implementation really
# matters for backwards compatibility these brittle tests are probably
# not needed or justified, if a future maintainer rips them out later
# they're probably doing the right thing
failures = []
for stub_data, expected_hash in data_to_expected_hash.items():
with open(self.tempfilename, 'wb') as fd:
fd.write(stub_data)
rv = utils.md5_hash_for_file(self.tempfilename)
try:
self.assertEqual(expected_hash, rv)
except AssertionError:
trim_cap = 80
if len(stub_data) > trim_cap:
stub_data = '%s...<truncated>' % stub_data[:trim_cap]
failures.append('hash for %r was %s instead of expected %s' % (
stub_data, rv, expected_hash))
if failures:
self.fail('Some data did not compute expected hash:\n' +
'\n'.join(failures))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()