Refactor recon to use single md5_hash_for_file function
There were several implementations of hashing the content of a file in cli/recon.py and common/middleware/recon.py. This patch relocates one implementation (_hash_for_ringfile, introduced in the Related Change) to common/utils.py and refactors recon cli and middleware to use that function. Also improves use of mocking in the unit tests to eliminate passing custom file opener functions to the ReconMiddleware get_ring_md5 and get_swift_conf_md5 methods. Related-Change: I9623752c3cd2361f57864f3e938e1baf5e9292d7 Change-Id: Iaad88e49aadeb28f614aafa1e9596fe07ce9793a
This commit is contained in:
parent
053b625f42
commit
609b5182c4
@ -19,10 +19,10 @@ from __future__ import print_function
|
||||
|
||||
from eventlet.green import socket
|
||||
from six.moves.urllib.parse import urlparse
|
||||
from swift.common.utils import SWIFT_CONF_FILE
|
||||
|
||||
from swift.common.utils import SWIFT_CONF_FILE, md5_hash_for_file
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.storage_policy import POLICIES
|
||||
from hashlib import md5
|
||||
import eventlet
|
||||
import json
|
||||
import optparse
|
||||
@ -194,21 +194,6 @@ class SwiftRecon(object):
|
||||
else:
|
||||
return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
|
||||
|
||||
def _md5_file(self, path):
|
||||
"""
|
||||
Get the MD5 checksum of a file.
|
||||
|
||||
:param path: path to file
|
||||
:returns: MD5 checksum, hex encoded
|
||||
"""
|
||||
md5sum = md5()
|
||||
with open(path, 'rb') as f:
|
||||
block = f.read(4096)
|
||||
while block:
|
||||
md5sum.update(block)
|
||||
block = f.read(4096)
|
||||
return md5sum.hexdigest()
|
||||
|
||||
def get_hosts(self, region_filter, zone_filter, swift_dir, ring_names):
|
||||
"""
|
||||
Get a list of hosts in the rings.
|
||||
@ -249,14 +234,8 @@ class SwiftRecon(object):
|
||||
ring_names.add(ring_name)
|
||||
rings = {}
|
||||
for ring_name in ring_names:
|
||||
md5sum = md5()
|
||||
with open(os.path.join(swift_dir, ring_name), 'rb') as f:
|
||||
block = f.read(4096)
|
||||
while block:
|
||||
md5sum.update(block)
|
||||
block = f.read(4096)
|
||||
ring_sum = md5sum.hexdigest()
|
||||
rings[ring_name] = ring_sum
|
||||
rings[ring_name] = md5_hash_for_file(
|
||||
os.path.join(swift_dir, ring_name))
|
||||
recon = Scout("ringmd5", self.verbose, self.suppress_errors,
|
||||
self.timeout)
|
||||
print("[%s] Checking ring md5sums" % self._ptime())
|
||||
@ -298,7 +277,7 @@ class SwiftRecon(object):
|
||||
"""
|
||||
matches = 0
|
||||
errors = 0
|
||||
conf_sum = self._md5_file(SWIFT_CONF_FILE)
|
||||
conf_sum = md5_hash_for_file(SWIFT_CONF_FILE)
|
||||
recon = Scout("swiftconfmd5", self.verbose, self.suppress_errors,
|
||||
self.timeout)
|
||||
printfn("[%s] Checking swift.conf md5sum" % self._ptime())
|
||||
|
@ -17,26 +17,15 @@ import errno
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from swift import gettext_ as _
|
||||
from resource import getpagesize
|
||||
|
||||
from swift import __version__ as swiftver
|
||||
from swift import gettext_ as _
|
||||
from swift.common.constraints import check_mount
|
||||
from swift.common.storage_policy import POLICIES
|
||||
from swift.common.swob import Request, Response
|
||||
from swift.common.utils import get_logger, config_true_value, \
|
||||
SWIFT_CONF_FILE
|
||||
from swift.common.constraints import check_mount
|
||||
from resource import getpagesize
|
||||
from hashlib import md5
|
||||
|
||||
|
||||
MD5_BLOCK_READ_BYTES = 4096
|
||||
|
||||
|
||||
def _hash_for_ringfile(f):
|
||||
md5sum = md5()
|
||||
for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), ''):
|
||||
md5sum.update(block)
|
||||
return md5sum.hexdigest()
|
||||
SWIFT_CONF_FILE, md5_hash_for_file
|
||||
|
||||
|
||||
class ReconMiddleware(object):
|
||||
@ -256,35 +245,27 @@ class ReconMiddleware(object):
|
||||
'size': '', 'used': '', 'avail': ''})
|
||||
return devices
|
||||
|
||||
def get_ring_md5(self, openr=open):
|
||||
def get_ring_md5(self):
|
||||
"""get all ring md5sum's"""
|
||||
sums = {}
|
||||
for ringfile in self.rings:
|
||||
if os.path.exists(ringfile):
|
||||
try:
|
||||
with openr(ringfile, 'rb') as f:
|
||||
sums[ringfile] = _hash_for_ringfile(f)
|
||||
sums[ringfile] = md5_hash_for_file(ringfile)
|
||||
except IOError as err:
|
||||
sums[ringfile] = None
|
||||
if err.errno != errno.ENOENT:
|
||||
self.logger.exception(_('Error reading ringfile'))
|
||||
return sums
|
||||
|
||||
def get_swift_conf_md5(self, openr=open):
|
||||
def get_swift_conf_md5(self):
|
||||
"""get md5 of swift.conf"""
|
||||
md5sum = md5()
|
||||
hexsum = None
|
||||
try:
|
||||
with openr(SWIFT_CONF_FILE, 'r') as fh:
|
||||
chunk = fh.read(4096)
|
||||
while chunk:
|
||||
md5sum.update(chunk)
|
||||
chunk = fh.read(4096)
|
||||
hexsum = md5_hash_for_file(SWIFT_CONF_FILE)
|
||||
except IOError as err:
|
||||
if err.errno != errno.ENOENT:
|
||||
self.logger.exception(_('Error reading swift.conf'))
|
||||
hexsum = None
|
||||
else:
|
||||
hexsum = md5sum.hexdigest()
|
||||
return {SWIFT_CONF_FILE: hexsum}
|
||||
|
||||
def get_quarantine_count(self):
|
||||
|
@ -4200,3 +4200,20 @@ def safe_json_loads(value):
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
MD5_BLOCK_READ_BYTES = 4096
|
||||
|
||||
|
||||
def md5_hash_for_file(fname):
|
||||
"""
|
||||
Get the MD5 checksum of a file.
|
||||
|
||||
:param fname: path to file
|
||||
:returns: MD5 checksum, hex encoded
|
||||
"""
|
||||
with open(fname, 'rb') as f:
|
||||
md5sum = md5()
|
||||
for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), ''):
|
||||
md5sum.update(block)
|
||||
return md5sum.hexdigest()
|
||||
|
@ -291,7 +291,7 @@ class TestRecon(unittest.TestCase):
|
||||
self.recon_instance.server_type = server_type
|
||||
stdout = StringIO()
|
||||
with mock.patch('sys.stdout', new=stdout), \
|
||||
mock.patch('swift.cli.recon.md5', new=mock_hash):
|
||||
mock.patch('swift.common.utils.md5', new=mock_hash):
|
||||
mock_hash.return_value.hexdigest.return_value = \
|
||||
empty_file_hash
|
||||
self.recon_instance.get_ringmd5(hosts, self.swift_dir)
|
||||
@ -319,7 +319,7 @@ class TestRecon(unittest.TestCase):
|
||||
mock_scout.return_value = scout_instance
|
||||
stdout = StringIO()
|
||||
with mock.patch('sys.stdout', new=stdout), \
|
||||
mock.patch('swift.cli.recon.md5', new=mock_hash):
|
||||
mock.patch('swift.common.utils.md5', new=mock_hash):
|
||||
mock_hash.return_value.hexdigest.return_value = \
|
||||
empty_file_hash
|
||||
self.recon_instance.get_ringmd5(hosts, self.swift_dir)
|
||||
@ -339,7 +339,7 @@ class TestRecon(unittest.TestCase):
|
||||
self.recon_instance.server_type = 'object'
|
||||
stdout = StringIO()
|
||||
with mock.patch('sys.stdout', new=stdout), \
|
||||
mock.patch('swift.cli.recon.md5', new=mock_hash):
|
||||
mock.patch('swift.common.utils.md5', new=mock_hash):
|
||||
mock_hash.return_value.hexdigest.return_value = \
|
||||
empty_file_hash
|
||||
self.recon_instance.get_ringmd5(hosts, self.swift_dir)
|
||||
@ -729,7 +729,8 @@ class TestReconCommands(unittest.TestCase):
|
||||
|
||||
printed = []
|
||||
with self.mock_responses(responses):
|
||||
with mock.patch.object(self.recon, '_md5_file', lambda _: cksum):
|
||||
with mock.patch('swift.cli.recon.md5_hash_for_file',
|
||||
lambda _: cksum):
|
||||
self.recon.get_swiftconfmd5(hosts, printfn=printed.append)
|
||||
|
||||
output = '\n'.join(printed) + '\n'
|
||||
@ -748,7 +749,8 @@ class TestReconCommands(unittest.TestCase):
|
||||
|
||||
printed = []
|
||||
with self.mock_responses(responses):
|
||||
with mock.patch.object(self.recon, '_md5_file', lambda _: cksum):
|
||||
with mock.patch('swift.cli.recon.md5_hash_for_file',
|
||||
lambda _: cksum):
|
||||
self.recon.get_swiftconfmd5(hosts, printfn=printed.append)
|
||||
|
||||
output = '\n'.join(printed) + '\n'
|
||||
|
@ -21,7 +21,6 @@ from posix import stat_result, statvfs_result
|
||||
from shutil import rmtree
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
from six import StringIO
|
||||
|
||||
from swift import __version__ as swiftver
|
||||
from swift.common import ring, utils
|
||||
@ -232,12 +231,12 @@ class TestReconSuccess(TestCase):
|
||||
self.app._from_recon_cache = self.fakecache.fake_from_recon_cache
|
||||
self.frecon = FakeRecon()
|
||||
|
||||
# replace hash md5 implementation of the _hash_for_ringfile function
|
||||
mock_hash_for_ringfile = mock.patch(
|
||||
'swift.common.middleware.recon._hash_for_ringfile',
|
||||
lambda f: 'hash-' + os.path.basename(f.name))
|
||||
self.addCleanup(mock_hash_for_ringfile.stop)
|
||||
mock_hash_for_ringfile.start()
|
||||
# replace hash md5 implementation of the md5_hash_for_file function
|
||||
mock_hash_for_file = mock.patch(
|
||||
'swift.common.middleware.recon.md5_hash_for_file',
|
||||
lambda f, **kwargs: 'hash-' + os.path.basename(f))
|
||||
self.addCleanup(mock_hash_for_file.stop)
|
||||
mock_hash_for_file.start()
|
||||
|
||||
self.ring_part_shift = 5
|
||||
self.ring_devs = [{'id': 0, 'zone': 0, 'weight': 1.0,
|
||||
@ -347,14 +346,12 @@ class TestReconSuccess(TestCase):
|
||||
# still produce a ringmd5 entry with a None for the hash. Note that
|
||||
# this is different than if an expected ring file simply doesn't exist,
|
||||
# in which case it is excluded altogether from the ringmd5 response.
|
||||
|
||||
def fake_open(fn, fmode):
|
||||
raise IOError
|
||||
|
||||
expt_out = {'%s/account.ring.gz' % self.tempdir: None,
|
||||
'%s/container.ring.gz' % self.tempdir: None,
|
||||
'%s/object.ring.gz' % self.tempdir: None}
|
||||
ringmd5 = self.app.get_ring_md5(openr=fake_open)
|
||||
with mock.patch('swift.common.middleware.recon.md5_hash_for_file',
|
||||
side_effect=IOError):
|
||||
ringmd5 = self.app.get_ring_md5()
|
||||
self.assertEqual(sorted(ringmd5.items()),
|
||||
sorted(expt_out.items()))
|
||||
|
||||
@ -362,30 +359,30 @@ class TestReconSuccess(TestCase):
|
||||
# Ring files that are present but produce an IOError on read will
|
||||
# show a None hash, but if they can be read later their hash
|
||||
# should become available in the ringmd5 response.
|
||||
|
||||
def fake_open(fn, fmode):
|
||||
raise IOError
|
||||
|
||||
expt_out = {'%s/account.ring.gz' % self.tempdir: None,
|
||||
'%s/container.ring.gz' % self.tempdir: None,
|
||||
'%s/object.ring.gz' % self.tempdir: None}
|
||||
ringmd5 = self.app.get_ring_md5(openr=fake_open)
|
||||
with mock.patch('swift.common.middleware.recon.md5_hash_for_file',
|
||||
side_effect=IOError):
|
||||
ringmd5 = self.app.get_ring_md5()
|
||||
self.assertEqual(sorted(ringmd5.items()),
|
||||
sorted(expt_out.items()))
|
||||
|
||||
# If we fix a ring and it can be read again, its hash should then
|
||||
# appear using the same app instance
|
||||
def fake_open_objonly(fn, fmode):
|
||||
def fake_hash_for_file(fn):
|
||||
if 'object' not in fn:
|
||||
raise IOError
|
||||
return open(fn, fmode)
|
||||
return 'hash-' + os.path.basename(fn)
|
||||
|
||||
expt_out = {'%s/account.ring.gz' % self.tempdir: None,
|
||||
'%s/container.ring.gz' % self.tempdir: None,
|
||||
'%s/object.ring.gz' % self.tempdir:
|
||||
'hash-object.ring.gz'}
|
||||
|
||||
ringmd5 = self.app.get_ring_md5(openr=fake_open_objonly)
|
||||
with mock.patch('swift.common.middleware.recon.md5_hash_for_file',
|
||||
fake_hash_for_file):
|
||||
ringmd5 = self.app.get_ring_md5()
|
||||
self.assertEqual(sorted(ringmd5.items()),
|
||||
sorted(expt_out.items()))
|
||||
|
||||
@ -1414,90 +1411,11 @@ class TestReconMiddleware(unittest.TestCase):
|
||||
|
||||
def test_get_swift_conf_md5_fail(self):
|
||||
"""Test get_swift_conf_md5 failure by failing file open"""
|
||||
resp = self.real_app_get_swift_conf_md5(fail_io_open)
|
||||
with mock.patch('swift.common.middleware.recon.md5_hash_for_file',
|
||||
side_effect=IOError):
|
||||
resp = self.real_app_get_swift_conf_md5()
|
||||
self.assertIsNone(resp['/etc/swift/swift.conf'])
|
||||
|
||||
|
||||
class TestReconUtilityFunctions(unittest.TestCase):
|
||||
|
||||
def test_hash_for_ringfile_on_filelike_smallish(self):
|
||||
stub_data = 'some data'
|
||||
stub_filelike = StringIO(stub_data)
|
||||
with mock.patch('swift.common.middleware.recon.md5') as mock_md5:
|
||||
mock_hasher = mock_md5.return_value
|
||||
rv = recon._hash_for_ringfile(stub_filelike)
|
||||
self.assertTrue(mock_hasher.hexdigest.called)
|
||||
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
|
||||
self.assertEqual([mock.call(stub_data)],
|
||||
mock_hasher.update.call_args_list)
|
||||
|
||||
def test_hash_for_ringfile_on_filelike_big(self):
|
||||
num_blocks = 10
|
||||
block_size = recon.MD5_BLOCK_READ_BYTES
|
||||
truncate = 523
|
||||
start_char = ord('a')
|
||||
expected_blocks = [chr(i) * block_size
|
||||
for i in range(start_char, start_char + num_blocks)]
|
||||
full_data = ''.join(expected_blocks)
|
||||
trimmed_data = full_data[:-truncate]
|
||||
# sanity
|
||||
self.assertEqual(len(trimmed_data), block_size * num_blocks - truncate)
|
||||
stub_filelike = StringIO(trimmed_data)
|
||||
with mock.patch('swift.common.middleware.recon.md5') as mock_md5:
|
||||
mock_hasher = mock_md5.return_value
|
||||
rv = recon._hash_for_ringfile(stub_filelike)
|
||||
self.assertTrue(mock_hasher.hexdigest.called)
|
||||
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
|
||||
self.assertEqual(num_blocks, len(mock_hasher.update.call_args_list))
|
||||
expected_block = 'a' * block_size
|
||||
found_blocks = []
|
||||
for i, (expected_block, call) in enumerate(zip(
|
||||
expected_blocks, mock_hasher.update.call_args_list)):
|
||||
args, kwargs = call
|
||||
self.assertEqual(kwargs, {})
|
||||
self.assertEqual(1, len(args))
|
||||
block = args[0]
|
||||
if i < num_blocks - 1:
|
||||
self.assertEqual(block, expected_block)
|
||||
else:
|
||||
self.assertEqual(block, expected_block[:-truncate])
|
||||
found_blocks.append(block)
|
||||
self.assertEqual(''.join(found_blocks), trimmed_data)
|
||||
|
||||
def test_hash_for_ringfile_on_filelike_empty(self):
|
||||
stub_filelike = StringIO('')
|
||||
with mock.patch('swift.common.middleware.recon.md5') as mock_md5:
|
||||
mock_hasher = mock_md5.return_value
|
||||
rv = recon._hash_for_ringfile(stub_filelike)
|
||||
self.assertTrue(mock_hasher.hexdigest.called)
|
||||
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
|
||||
self.assertEqual([], mock_hasher.update.call_args_list)
|
||||
|
||||
def test_hash_for_ringfile_on_filelike_brittle(self):
|
||||
data_to_expected_hash = {
|
||||
'': 'd41d8cd98f00b204e9800998ecf8427e',
|
||||
'some data': '1e50210a0202497fb79bc38b6ade6c34',
|
||||
('a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3',
|
||||
}
|
||||
# unlike some other places where the concrete implementation really
|
||||
# matters for backwards compatibility these brittle tests are probably
|
||||
# not needed or justified, if a future maintainer rips them out later
|
||||
# they're probably doing the right thing
|
||||
failures = []
|
||||
for stub_data, expected_hash in data_to_expected_hash.items():
|
||||
rv = recon._hash_for_ringfile(StringIO(stub_data))
|
||||
try:
|
||||
self.assertEqual(expected_hash, rv)
|
||||
except AssertionError:
|
||||
trim_cap = 80
|
||||
if len(stub_data) > trim_cap:
|
||||
stub_data = '%s...<truncated>' % stub_data[:trim_cap]
|
||||
failures.append('hash for %r was %s instead of expected %s' % (
|
||||
stub_data, rv, expected_hash))
|
||||
if failures:
|
||||
self.fail('Some data did not compute expected hash:\n' +
|
||||
'\n'.join(failures))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -5979,5 +5979,97 @@ class TestSocketStringParser(unittest.TestCase):
|
||||
utils.parse_socket_string(addr, default)
|
||||
|
||||
|
||||
class TestHashForFileFunction(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tempfilename = tempfile.mktemp()
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
os.unlink(self.tempfilename)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def test_hash_for_file_smallish(self):
|
||||
stub_data = 'some data'
|
||||
with open(self.tempfilename, 'wb') as fd:
|
||||
fd.write(stub_data)
|
||||
with mock.patch('swift.common.utils.md5') as mock_md5:
|
||||
mock_hasher = mock_md5.return_value
|
||||
rv = utils.md5_hash_for_file(self.tempfilename)
|
||||
self.assertTrue(mock_hasher.hexdigest.called)
|
||||
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
|
||||
self.assertEqual([mock.call(stub_data)],
|
||||
mock_hasher.update.call_args_list)
|
||||
|
||||
def test_hash_for_file_big(self):
|
||||
num_blocks = 10
|
||||
block_size = utils.MD5_BLOCK_READ_BYTES
|
||||
truncate = 523
|
||||
start_char = ord('a')
|
||||
expected_blocks = [chr(i) * block_size
|
||||
for i in range(start_char, start_char + num_blocks)]
|
||||
full_data = ''.join(expected_blocks)
|
||||
trimmed_data = full_data[:-truncate]
|
||||
# sanity
|
||||
self.assertEqual(len(trimmed_data), block_size * num_blocks - truncate)
|
||||
with open(self.tempfilename, 'wb') as fd:
|
||||
fd.write(trimmed_data)
|
||||
with mock.patch('swift.common.utils.md5') as mock_md5:
|
||||
mock_hasher = mock_md5.return_value
|
||||
rv = utils.md5_hash_for_file(self.tempfilename)
|
||||
self.assertTrue(mock_hasher.hexdigest.called)
|
||||
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
|
||||
self.assertEqual(num_blocks, len(mock_hasher.update.call_args_list))
|
||||
found_blocks = []
|
||||
for i, (expected_block, call) in enumerate(zip(
|
||||
expected_blocks, mock_hasher.update.call_args_list)):
|
||||
args, kwargs = call
|
||||
self.assertEqual(kwargs, {})
|
||||
self.assertEqual(1, len(args))
|
||||
block = args[0]
|
||||
if i < num_blocks - 1:
|
||||
self.assertEqual(block, expected_block)
|
||||
else:
|
||||
self.assertEqual(block, expected_block[:-truncate])
|
||||
found_blocks.append(block)
|
||||
self.assertEqual(''.join(found_blocks), trimmed_data)
|
||||
|
||||
def test_hash_for_file_empty(self):
|
||||
with open(self.tempfilename, 'wb'):
|
||||
pass
|
||||
with mock.patch('swift.common.utils.md5') as mock_md5:
|
||||
mock_hasher = mock_md5.return_value
|
||||
rv = utils.md5_hash_for_file(self.tempfilename)
|
||||
self.assertTrue(mock_hasher.hexdigest.called)
|
||||
self.assertEqual(rv, mock_hasher.hexdigest.return_value)
|
||||
self.assertEqual([], mock_hasher.update.call_args_list)
|
||||
|
||||
def test_hash_for_file_brittle(self):
|
||||
data_to_expected_hash = {
|
||||
'': 'd41d8cd98f00b204e9800998ecf8427e',
|
||||
'some data': '1e50210a0202497fb79bc38b6ade6c34',
|
||||
('a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3',
|
||||
}
|
||||
# unlike some other places where the concrete implementation really
|
||||
# matters for backwards compatibility these brittle tests are probably
|
||||
# not needed or justified, if a future maintainer rips them out later
|
||||
# they're probably doing the right thing
|
||||
failures = []
|
||||
for stub_data, expected_hash in data_to_expected_hash.items():
|
||||
with open(self.tempfilename, 'wb') as fd:
|
||||
fd.write(stub_data)
|
||||
rv = utils.md5_hash_for_file(self.tempfilename)
|
||||
try:
|
||||
self.assertEqual(expected_hash, rv)
|
||||
except AssertionError:
|
||||
trim_cap = 80
|
||||
if len(stub_data) > trim_cap:
|
||||
stub_data = '%s...<truncated>' % stub_data[:trim_cap]
|
||||
failures.append('hash for %r was %s instead of expected %s' % (
|
||||
stub_data, rv, expected_hash))
|
||||
if failures:
|
||||
self.fail('Some data did not compute expected hash:\n' +
|
||||
'\n'.join(failures))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user