Merge "diskfile: Prevent get_hashes from creating missing partition dirs"

This commit is contained in:
Zuul 2021-03-30 11:11:01 +00:00 committed by Gerrit Code Review
commit 0dd057ea5f
4 changed files with 116 additions and 56 deletions

View File

@ -341,6 +341,12 @@ def quarantine_renamer(device_path, corrupted_file_path):
return to_dir
def valid_suffix(value):
if not isinstance(value, str) or len(value) != 3:
return False
return all(c in '0123456789abcdef' for c in value)
def read_hashes(partition_dir):
"""
Read the existing hashes.pkl
@ -365,8 +371,8 @@ def read_hashes(partition_dir):
pass
# Check for corrupted data that could break os.listdir()
for suffix in hashes.keys():
if not suffix.isalnum():
if not all(valid_suffix(key) or key in ('valid', 'updated')
for key in hashes):
return {'valid': False}
# hashes.pkl w/o valid updated key is "valid" but "forever old"
@ -1292,6 +1298,8 @@ class BaseDiskFileManager(object):
self.logger.debug('Run listdir on %s', partition_path)
hashes.update((suffix, None) for suffix in recalculate)
for suffix, hash_ in list(hashes.items()):
if suffix in ('valid', 'updated'):
continue
if not hash_:
suffix_dir = join(partition_path, suffix)
try:
@ -1539,12 +1547,14 @@ class BaseDiskFileManager(object):
if not dev_path:
raise DiskFileDeviceUnavailable()
partition_path = get_part_path(dev_path, policy, partition)
if not os.path.exists(partition_path):
mkdirs(partition_path)
suffixes = [suf for suf in suffixes or [] if valid_suffix(suf)]
if skip_rehash:
for suffix in suffixes or []:
invalidate_hash(os.path.join(partition_path, suffix))
return None
for suffix in suffixes:
self.invalidate_hash(os.path.join(partition_path, suffix))
hashes = None
elif not os.path.exists(partition_path):
hashes = {}
else:
_junk, hashes = tpool.execute(
self._get_hashes, device, partition, policy,

View File

@ -453,6 +453,11 @@ class TestRelinker(unittest.TestCase):
self.assertEqual('foo', hashes[self._hash[-3:]])
self.assertFalse(os.path.exists(
os.path.join(self.part_dir, 'hashes.invalid')))
# Check that only the dirty partition in upper half of next part power
# has been created and rehashed
other_next_part = self.next_part ^ 1
other_next_part_dir = os.path.join(self.objects, str(other_next_part))
self.assertFalse(os.path.exists(other_next_part_dir))
def _do_link_test(self, command, old_file_specs, new_file_specs,
conflict_file_specs, exp_old_specs, exp_new_specs,

View File

@ -6743,6 +6743,15 @@ class TestSuffixHashes(unittest.TestCase):
if suffix_dir != suffix_dir2:
return df2
def test_valid_suffix(self):
self.assertTrue(diskfile.valid_suffix('000'))
self.assertTrue(diskfile.valid_suffix('123'))
self.assertTrue(diskfile.valid_suffix('fff'))
self.assertFalse(diskfile.valid_suffix(list('123')))
self.assertFalse(diskfile.valid_suffix(123))
self.assertFalse(diskfile.valid_suffix(' 12'))
self.assertFalse(diskfile.valid_suffix('-00'))
def check_cleanup_ondisk_files(self, policy, input_files, output_files):
orig_unlink = os.unlink
file_list = list(input_files)
@ -7104,7 +7113,12 @@ class TestSuffixHashes(unittest.TestCase):
def test_invalidate_hash_empty_file_exists(self):
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
pkl_path = os.path.join(part_path, diskfile.HASH_FILE)
self.assertTrue(os.path.exists(pkl_path))
self.assertEqual(hashes, {})
# create something to hash
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
@ -7127,6 +7141,7 @@ class TestSuffixHashes(unittest.TestCase):
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
inv_file = os.path.join(
part_path, diskfile.HASH_INVALIDATIONS_FILE)
hash_file = os.path.join(
@ -7162,9 +7177,14 @@ class TestSuffixHashes(unittest.TestCase):
# get_hashes call
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
if existing:
mkdirs(part_path)
# force hashes.pkl to exist
df_mgr.get_hashes('sda1', '0', [], policy)
self.assertTrue(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
orig_listdir = os.listdir
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
policy=policy)
@ -7184,10 +7204,15 @@ class TestSuffixHashes(unittest.TestCase):
df2.delete(self.ts())
return result
if not existing:
self.assertFalse(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
with mock.patch('swift.obj.diskfile.os.listdir',
mock_listdir):
# creates pkl file
# creates pkl file if not already there
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
self.assertTrue(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
# second suffix added after directory listing, it's added later
self.assertIn(suffix, hashes)
@ -7216,7 +7241,12 @@ class TestSuffixHashes(unittest.TestCase):
orig_hash_suffix = df_mgr._hash_suffix
if existing:
# create hashes.pkl
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
df_mgr.get_hashes('sda1', '0', [], policy)
self.assertTrue(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
policy=policy)
@ -7257,7 +7287,7 @@ class TestSuffixHashes(unittest.TestCase):
return result
with mock.patch.object(df_mgr, '_hash_suffix', mock_hash_suffix):
# creates pkl file and repeats listing when pkl modified
# repeats listing when pkl modified
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
# first get_hashes should complete with suffix1 state
@ -7452,6 +7482,12 @@ class TestSuffixHashes(unittest.TestCase):
# verify that if consolidate_hashes raises an exception then suffixes
# are rehashed and a hashes.pkl is written
for policy in self.iter_policies():
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
invalidations_file = os.path.join(
part_path, diskfile.HASH_INVALIDATIONS_FILE)
self.logger.clear()
df_mgr = self.df_router[policy]
# create something to hash
@ -7461,10 +7497,14 @@ class TestSuffixHashes(unittest.TestCase):
# avoid getting O_TMPFILE warning in logs
if not utils.o_tmpfile_in_tmpdir_supported():
df.manager.use_linkat = False
self.assertFalse(os.path.exists(part_path))
df.delete(self.ts())
self.assertTrue(os.path.exists(invalidations_file))
suffix_dir = os.path.dirname(df._datadir)
suffix = os.path.basename(suffix_dir)
# no pre-existing hashes.pkl
self.assertFalse(os.path.exists(hashes_file))
with mock.patch.object(df_mgr, '_hash_suffix',
return_value='fake hash'):
with mock.patch.object(df_mgr, 'consolidate_hashes',
@ -7473,10 +7513,6 @@ class TestSuffixHashes(unittest.TestCase):
self.assertEqual({suffix: 'fake hash'}, hashes)
# sanity check hashes file
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
with open(hashes_file, 'rb') as f:
found_hashes = pickle.load(f)
found_hashes.pop('updated')
@ -7497,9 +7533,6 @@ class TestSuffixHashes(unittest.TestCase):
self.assertEqual({suffix: 'new fake hash'}, hashes)
# sanity check hashes file
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
with open(hashes_file, 'rb') as f:
found_hashes = pickle.load(f)
found_hashes.pop('updated')
@ -8185,6 +8218,9 @@ class TestSuffixHashes(unittest.TestCase):
def test_hash_suffix_listdir_enoent(self):
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
mkdirs(part_path) # ensure we'll bother writing a pkl at all
orig_listdir = os.listdir
listdir_calls = []
@ -8203,9 +8239,6 @@ class TestSuffixHashes(unittest.TestCase):
# does not exist!
df_mgr.get_hashes('sda1', '0', ['123'], policy)
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
self.assertEqual(listdir_calls, [
# part path gets created automatically
(part_path, True),
@ -8340,7 +8373,7 @@ class TestSuffixHashes(unittest.TestCase):
# get_hashes tests - behaviors
def test_get_hashes_creates_partition_and_pkl(self):
def test_get_hashes_does_not_create_partition(self):
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
@ -8348,6 +8381,18 @@ class TestSuffixHashes(unittest.TestCase):
self.assertEqual(hashes, {})
part_path = os.path.join(
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
self.assertFalse(os.path.exists(part_path))
def test_get_hashes_creates_pkl(self):
# like above, but -- if the partition already exists, make the pickle
for policy in self.iter_policies():
part_path = os.path.join(
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
df_mgr = self.df_router[policy]
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
policy)
self.assertEqual(hashes, {})
self.assertTrue(os.path.exists(part_path))
hashes_file = os.path.join(part_path,
diskfile.HASH_FILE)
@ -8438,6 +8483,7 @@ class TestSuffixHashes(unittest.TestCase):
# create an empty stale pickle
part_path = os.path.join(
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
hashes_file = os.path.join(part_path,
diskfile.HASH_FILE)
hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy)
@ -8641,6 +8687,7 @@ class TestSuffixHashes(unittest.TestCase):
suffix2 = os.path.basename(os.path.dirname(df2._datadir))
part_path = os.path.dirname(os.path.dirname(
os.path.join(df._datadir)))
mkdirs(part_path)
hashfile_path = os.path.join(part_path, diskfile.HASH_FILE)
# create hashes.pkl
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
@ -8745,8 +8792,13 @@ class TestSuffixHashes(unittest.TestCase):
def test_get_hashes_modified_recursive_retry(self):
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, self.existing_device,
diskfile.get_data_dir(policy), '0')
mkdirs(part_path)
# first create an empty pickle
df_mgr.get_hashes(self.existing_device, '0', [], policy)
self.assertTrue(os.path.exists(os.path.join(
part_path, diskfile.HASH_FILE)))
non_local = {'suffix_count': 1}
calls = []
@ -8788,19 +8840,19 @@ class TestHashesHelpers(unittest.TestCase):
rmtree(self.testdir, ignore_errors=1)
def test_read_legacy_hashes(self):
hashes = {'stub': 'fake'}
hashes = {'fff': 'fake'}
hashes_file = os.path.join(self.testdir, diskfile.HASH_FILE)
with open(hashes_file, 'wb') as f:
pickle.dump(hashes, f)
expected = {
'stub': 'fake',
'fff': 'fake',
'updated': -1,
'valid': True,
}
self.assertEqual(expected, diskfile.read_hashes(self.testdir))
def test_write_hashes_valid_updated(self):
hashes = {'stub': 'fake', 'valid': True}
hashes = {'888': 'fake', 'valid': True}
now = time()
with mock.patch('swift.obj.diskfile.time.time', return_value=now):
diskfile.write_hashes(self.testdir, hashes)
@ -8808,7 +8860,7 @@ class TestHashesHelpers(unittest.TestCase):
with open(hashes_file, 'rb') as f:
data = pickle.load(f)
expected = {
'stub': 'fake',
'888': 'fake',
'updated': now,
'valid': True,
}
@ -8843,7 +8895,7 @@ class TestHashesHelpers(unittest.TestCase):
self.assertEqual(expected, data)
def test_read_write_valid_hashes_mutation_and_transative_equality(self):
hashes = {'stub': 'fake', 'valid': True}
hashes = {'000': 'fake', 'valid': True}
diskfile.write_hashes(self.testdir, hashes)
# write_hashes mutates the passed in hashes, it adds the updated key
self.assertIn('updated', hashes)

View File

@ -7158,11 +7158,11 @@ class TestObjectController(unittest.TestCase):
def my_tpool_execute(func, *args, **kwargs):
return func(*args, **kwargs)
was_get_hashes = diskfile.DiskFileManager._get_hashes
was_tpool_exe = tpool.execute
try:
diskfile.DiskFileManager._get_hashes = fake_get_hashes
tpool.execute = my_tpool_execute
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
fake_get_hashes), \
mock.patch.object(tpool, 'execute', my_tpool_execute), \
mock.patch('swift.obj.diskfile.os.path.exists',
return_value=True):
req = Request.blank('/sda1/p/',
environ={'REQUEST_METHOD': 'REPLICATE'},
headers={})
@ -7170,9 +7170,6 @@ class TestObjectController(unittest.TestCase):
self.assertEqual(resp.status_int, 200)
p_data = pickle.loads(resp.body)
self.assertEqual(p_data, {1: 2})
finally:
tpool.execute = was_tpool_exe
diskfile.DiskFileManager._get_hashes = was_get_hashes
def test_REPLICATE_pickle_protocol(self):
@ -7182,15 +7179,15 @@ class TestObjectController(unittest.TestCase):
def my_tpool_execute(func, *args, **kwargs):
return func(*args, **kwargs)
was_get_hashes = diskfile.DiskFileManager._get_hashes
was_tpool_exe = tpool.execute
try:
diskfile.DiskFileManager._get_hashes = fake_get_hashes
tpool.execute = my_tpool_execute
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
fake_get_hashes), \
mock.patch.object(tpool, 'execute', my_tpool_execute), \
mock.patch('swift.obj.server.pickle.dumps') as fake_pickle, \
mock.patch('swift.obj.diskfile.os.path.exists',
return_value=True):
req = Request.blank('/sda1/p/',
environ={'REQUEST_METHOD': 'REPLICATE'},
headers={})
with mock.patch('swift.obj.server.pickle.dumps') as fake_pickle:
fake_pickle.return_value = b''
req.get_response(self.object_controller)
# This is the key assertion: starting in Python 3.0, the
@ -7198,9 +7195,6 @@ class TestObjectController(unittest.TestCase):
# on Python 2. As long as we may need to talk to a Python 2
# process, we need to cap our protocol version.
fake_pickle.assert_called_once_with({1: 2}, protocol=2)
finally:
tpool.execute = was_tpool_exe
diskfile.DiskFileManager._get_hashes = was_get_hashes
def test_REPLICATE_timeout(self):
@ -7210,18 +7204,17 @@ class TestObjectController(unittest.TestCase):
def my_tpool_execute(func, *args, **kwargs):
return func(*args, **kwargs)
was_get_hashes = diskfile.DiskFileManager._get_hashes
was_tpool_exe = tpool.execute
try:
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
fake_get_hashes), \
mock.patch.object(tpool, 'execute', my_tpool_execute), \
mock.patch('swift.obj.diskfile.os.path.exists',
return_value=True):
diskfile.DiskFileManager._get_hashes = fake_get_hashes
tpool.execute = my_tpool_execute
req = Request.blank('/sda1/p/',
environ={'REQUEST_METHOD': 'REPLICATE'},
headers={})
self.assertRaises(Timeout, self.object_controller.REPLICATE, req)
finally:
tpool.execute = was_tpool_exe
diskfile.DiskFileManager._get_hashes = was_get_hashes
def test_REPLICATE_reclaims_tombstones(self):
conf = {'devices': self.testdir, 'mount_check': False,