Merge "diskfile: Prevent get_hashes from creating missing partition dirs"
This commit is contained in:
commit
0dd057ea5f
@ -341,6 +341,12 @@ def quarantine_renamer(device_path, corrupted_file_path):
|
||||
return to_dir
|
||||
|
||||
|
||||
def valid_suffix(value):
|
||||
if not isinstance(value, str) or len(value) != 3:
|
||||
return False
|
||||
return all(c in '0123456789abcdef' for c in value)
|
||||
|
||||
|
||||
def read_hashes(partition_dir):
|
||||
"""
|
||||
Read the existing hashes.pkl
|
||||
@ -365,9 +371,9 @@ def read_hashes(partition_dir):
|
||||
pass
|
||||
|
||||
# Check for corrupted data that could break os.listdir()
|
||||
for suffix in hashes.keys():
|
||||
if not suffix.isalnum():
|
||||
return {'valid': False}
|
||||
if not all(valid_suffix(key) or key in ('valid', 'updated')
|
||||
for key in hashes):
|
||||
return {'valid': False}
|
||||
|
||||
# hashes.pkl w/o valid updated key is "valid" but "forever old"
|
||||
hashes.setdefault('valid', True)
|
||||
@ -1292,6 +1298,8 @@ class BaseDiskFileManager(object):
|
||||
self.logger.debug('Run listdir on %s', partition_path)
|
||||
hashes.update((suffix, None) for suffix in recalculate)
|
||||
for suffix, hash_ in list(hashes.items()):
|
||||
if suffix in ('valid', 'updated'):
|
||||
continue
|
||||
if not hash_:
|
||||
suffix_dir = join(partition_path, suffix)
|
||||
try:
|
||||
@ -1539,12 +1547,14 @@ class BaseDiskFileManager(object):
|
||||
if not dev_path:
|
||||
raise DiskFileDeviceUnavailable()
|
||||
partition_path = get_part_path(dev_path, policy, partition)
|
||||
if not os.path.exists(partition_path):
|
||||
mkdirs(partition_path)
|
||||
suffixes = [suf for suf in suffixes or [] if valid_suffix(suf)]
|
||||
|
||||
if skip_rehash:
|
||||
for suffix in suffixes or []:
|
||||
invalidate_hash(os.path.join(partition_path, suffix))
|
||||
return None
|
||||
for suffix in suffixes:
|
||||
self.invalidate_hash(os.path.join(partition_path, suffix))
|
||||
hashes = None
|
||||
elif not os.path.exists(partition_path):
|
||||
hashes = {}
|
||||
else:
|
||||
_junk, hashes = tpool.execute(
|
||||
self._get_hashes, device, partition, policy,
|
||||
|
@ -453,6 +453,11 @@ class TestRelinker(unittest.TestCase):
|
||||
self.assertEqual('foo', hashes[self._hash[-3:]])
|
||||
self.assertFalse(os.path.exists(
|
||||
os.path.join(self.part_dir, 'hashes.invalid')))
|
||||
# Check that only the dirty partition in upper half of next part power
|
||||
# has been created and rehashed
|
||||
other_next_part = self.next_part ^ 1
|
||||
other_next_part_dir = os.path.join(self.objects, str(other_next_part))
|
||||
self.assertFalse(os.path.exists(other_next_part_dir))
|
||||
|
||||
def _do_link_test(self, command, old_file_specs, new_file_specs,
|
||||
conflict_file_specs, exp_old_specs, exp_new_specs,
|
||||
|
@ -6743,6 +6743,15 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
if suffix_dir != suffix_dir2:
|
||||
return df2
|
||||
|
||||
def test_valid_suffix(self):
|
||||
self.assertTrue(diskfile.valid_suffix('000'))
|
||||
self.assertTrue(diskfile.valid_suffix('123'))
|
||||
self.assertTrue(diskfile.valid_suffix('fff'))
|
||||
self.assertFalse(diskfile.valid_suffix(list('123')))
|
||||
self.assertFalse(diskfile.valid_suffix(123))
|
||||
self.assertFalse(diskfile.valid_suffix(' 12'))
|
||||
self.assertFalse(diskfile.valid_suffix('-00'))
|
||||
|
||||
def check_cleanup_ondisk_files(self, policy, input_files, output_files):
|
||||
orig_unlink = os.unlink
|
||||
file_list = list(input_files)
|
||||
@ -7104,7 +7113,12 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
def test_invalidate_hash_empty_file_exists(self):
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
pkl_path = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
self.assertTrue(os.path.exists(pkl_path))
|
||||
self.assertEqual(hashes, {})
|
||||
# create something to hash
|
||||
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
|
||||
@ -7127,6 +7141,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
inv_file = os.path.join(
|
||||
part_path, diskfile.HASH_INVALIDATIONS_FILE)
|
||||
hash_file = os.path.join(
|
||||
@ -7162,9 +7177,14 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# get_hashes call
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
if existing:
|
||||
mkdirs(part_path)
|
||||
# force hashes.pkl to exist
|
||||
df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
self.assertTrue(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
orig_listdir = os.listdir
|
||||
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
|
||||
policy=policy)
|
||||
@ -7184,10 +7204,15 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
df2.delete(self.ts())
|
||||
return result
|
||||
|
||||
if not existing:
|
||||
self.assertFalse(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
with mock.patch('swift.obj.diskfile.os.listdir',
|
||||
mock_listdir):
|
||||
# creates pkl file
|
||||
# creates pkl file if not already there
|
||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
self.assertTrue(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
|
||||
# second suffix added after directory listing, it's added later
|
||||
self.assertIn(suffix, hashes)
|
||||
@ -7216,7 +7241,12 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
orig_hash_suffix = df_mgr._hash_suffix
|
||||
if existing:
|
||||
# create hashes.pkl
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
self.assertTrue(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
|
||||
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
|
||||
policy=policy)
|
||||
@ -7257,7 +7287,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
return result
|
||||
|
||||
with mock.patch.object(df_mgr, '_hash_suffix', mock_hash_suffix):
|
||||
# creates pkl file and repeats listing when pkl modified
|
||||
# repeats listing when pkl modified
|
||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
|
||||
# first get_hashes should complete with suffix1 state
|
||||
@ -7452,6 +7482,12 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# verify that if consolidate_hashes raises an exception then suffixes
|
||||
# are rehashed and a hashes.pkl is written
|
||||
for policy in self.iter_policies():
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
invalidations_file = os.path.join(
|
||||
part_path, diskfile.HASH_INVALIDATIONS_FILE)
|
||||
|
||||
self.logger.clear()
|
||||
df_mgr = self.df_router[policy]
|
||||
# create something to hash
|
||||
@ -7461,10 +7497,14 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# avoid getting O_TMPFILE warning in logs
|
||||
if not utils.o_tmpfile_in_tmpdir_supported():
|
||||
df.manager.use_linkat = False
|
||||
|
||||
self.assertFalse(os.path.exists(part_path))
|
||||
df.delete(self.ts())
|
||||
self.assertTrue(os.path.exists(invalidations_file))
|
||||
suffix_dir = os.path.dirname(df._datadir)
|
||||
suffix = os.path.basename(suffix_dir)
|
||||
# no pre-existing hashes.pkl
|
||||
self.assertFalse(os.path.exists(hashes_file))
|
||||
with mock.patch.object(df_mgr, '_hash_suffix',
|
||||
return_value='fake hash'):
|
||||
with mock.patch.object(df_mgr, 'consolidate_hashes',
|
||||
@ -7473,10 +7513,6 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
self.assertEqual({suffix: 'fake hash'}, hashes)
|
||||
|
||||
# sanity check hashes file
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
|
||||
with open(hashes_file, 'rb') as f:
|
||||
found_hashes = pickle.load(f)
|
||||
found_hashes.pop('updated')
|
||||
@ -7497,9 +7533,6 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
self.assertEqual({suffix: 'new fake hash'}, hashes)
|
||||
|
||||
# sanity check hashes file
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
with open(hashes_file, 'rb') as f:
|
||||
found_hashes = pickle.load(f)
|
||||
found_hashes.pop('updated')
|
||||
@ -8185,6 +8218,9 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
def test_hash_suffix_listdir_enoent(self):
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path) # ensure we'll bother writing a pkl at all
|
||||
orig_listdir = os.listdir
|
||||
listdir_calls = []
|
||||
|
||||
@ -8203,9 +8239,6 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# does not exist!
|
||||
df_mgr.get_hashes('sda1', '0', ['123'], policy)
|
||||
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
|
||||
self.assertEqual(listdir_calls, [
|
||||
# part path gets created automatically
|
||||
(part_path, True),
|
||||
@ -8340,7 +8373,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
|
||||
# get_hashes tests - behaviors
|
||||
|
||||
def test_get_hashes_creates_partition_and_pkl(self):
|
||||
def test_get_hashes_does_not_create_partition(self):
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
|
||||
@ -8348,6 +8381,18 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
self.assertEqual(hashes, {})
|
||||
part_path = os.path.join(
|
||||
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
|
||||
self.assertFalse(os.path.exists(part_path))
|
||||
|
||||
def test_get_hashes_creates_pkl(self):
|
||||
# like above, but -- if the partition already exists, make the pickle
|
||||
for policy in self.iter_policies():
|
||||
part_path = os.path.join(
|
||||
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
df_mgr = self.df_router[policy]
|
||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
|
||||
policy)
|
||||
self.assertEqual(hashes, {})
|
||||
self.assertTrue(os.path.exists(part_path))
|
||||
hashes_file = os.path.join(part_path,
|
||||
diskfile.HASH_FILE)
|
||||
@ -8438,6 +8483,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# create an empty stale pickle
|
||||
part_path = os.path.join(
|
||||
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
hashes_file = os.path.join(part_path,
|
||||
diskfile.HASH_FILE)
|
||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy)
|
||||
@ -8641,6 +8687,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
suffix2 = os.path.basename(os.path.dirname(df2._datadir))
|
||||
part_path = os.path.dirname(os.path.dirname(
|
||||
os.path.join(df._datadir)))
|
||||
mkdirs(part_path)
|
||||
hashfile_path = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
# create hashes.pkl
|
||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
|
||||
@ -8745,8 +8792,13 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
def test_get_hashes_modified_recursive_retry(self):
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, self.existing_device,
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
# first create an empty pickle
|
||||
df_mgr.get_hashes(self.existing_device, '0', [], policy)
|
||||
self.assertTrue(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
non_local = {'suffix_count': 1}
|
||||
calls = []
|
||||
|
||||
@ -8788,19 +8840,19 @@ class TestHashesHelpers(unittest.TestCase):
|
||||
rmtree(self.testdir, ignore_errors=1)
|
||||
|
||||
def test_read_legacy_hashes(self):
|
||||
hashes = {'stub': 'fake'}
|
||||
hashes = {'fff': 'fake'}
|
||||
hashes_file = os.path.join(self.testdir, diskfile.HASH_FILE)
|
||||
with open(hashes_file, 'wb') as f:
|
||||
pickle.dump(hashes, f)
|
||||
expected = {
|
||||
'stub': 'fake',
|
||||
'fff': 'fake',
|
||||
'updated': -1,
|
||||
'valid': True,
|
||||
}
|
||||
self.assertEqual(expected, diskfile.read_hashes(self.testdir))
|
||||
|
||||
def test_write_hashes_valid_updated(self):
|
||||
hashes = {'stub': 'fake', 'valid': True}
|
||||
hashes = {'888': 'fake', 'valid': True}
|
||||
now = time()
|
||||
with mock.patch('swift.obj.diskfile.time.time', return_value=now):
|
||||
diskfile.write_hashes(self.testdir, hashes)
|
||||
@ -8808,7 +8860,7 @@ class TestHashesHelpers(unittest.TestCase):
|
||||
with open(hashes_file, 'rb') as f:
|
||||
data = pickle.load(f)
|
||||
expected = {
|
||||
'stub': 'fake',
|
||||
'888': 'fake',
|
||||
'updated': now,
|
||||
'valid': True,
|
||||
}
|
||||
@ -8843,7 +8895,7 @@ class TestHashesHelpers(unittest.TestCase):
|
||||
self.assertEqual(expected, data)
|
||||
|
||||
def test_read_write_valid_hashes_mutation_and_transative_equality(self):
|
||||
hashes = {'stub': 'fake', 'valid': True}
|
||||
hashes = {'000': 'fake', 'valid': True}
|
||||
diskfile.write_hashes(self.testdir, hashes)
|
||||
# write_hashes mutates the passed in hashes, it adds the updated key
|
||||
self.assertIn('updated', hashes)
|
||||
|
@ -7158,11 +7158,11 @@ class TestObjectController(unittest.TestCase):
|
||||
def my_tpool_execute(func, *args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
was_get_hashes = diskfile.DiskFileManager._get_hashes
|
||||
was_tpool_exe = tpool.execute
|
||||
try:
|
||||
diskfile.DiskFileManager._get_hashes = fake_get_hashes
|
||||
tpool.execute = my_tpool_execute
|
||||
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
|
||||
fake_get_hashes), \
|
||||
mock.patch.object(tpool, 'execute', my_tpool_execute), \
|
||||
mock.patch('swift.obj.diskfile.os.path.exists',
|
||||
return_value=True):
|
||||
req = Request.blank('/sda1/p/',
|
||||
environ={'REQUEST_METHOD': 'REPLICATE'},
|
||||
headers={})
|
||||
@ -7170,9 +7170,6 @@ class TestObjectController(unittest.TestCase):
|
||||
self.assertEqual(resp.status_int, 200)
|
||||
p_data = pickle.loads(resp.body)
|
||||
self.assertEqual(p_data, {1: 2})
|
||||
finally:
|
||||
tpool.execute = was_tpool_exe
|
||||
diskfile.DiskFileManager._get_hashes = was_get_hashes
|
||||
|
||||
def test_REPLICATE_pickle_protocol(self):
|
||||
|
||||
@ -7182,25 +7179,22 @@ class TestObjectController(unittest.TestCase):
|
||||
def my_tpool_execute(func, *args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
was_get_hashes = diskfile.DiskFileManager._get_hashes
|
||||
was_tpool_exe = tpool.execute
|
||||
try:
|
||||
diskfile.DiskFileManager._get_hashes = fake_get_hashes
|
||||
tpool.execute = my_tpool_execute
|
||||
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
|
||||
fake_get_hashes), \
|
||||
mock.patch.object(tpool, 'execute', my_tpool_execute), \
|
||||
mock.patch('swift.obj.server.pickle.dumps') as fake_pickle, \
|
||||
mock.patch('swift.obj.diskfile.os.path.exists',
|
||||
return_value=True):
|
||||
req = Request.blank('/sda1/p/',
|
||||
environ={'REQUEST_METHOD': 'REPLICATE'},
|
||||
headers={})
|
||||
with mock.patch('swift.obj.server.pickle.dumps') as fake_pickle:
|
||||
fake_pickle.return_value = b''
|
||||
req.get_response(self.object_controller)
|
||||
# This is the key assertion: starting in Python 3.0, the
|
||||
# default protocol version is 3, but such pickles can't be read
|
||||
# on Python 2. As long as we may need to talk to a Python 2
|
||||
# process, we need to cap our protocol version.
|
||||
fake_pickle.assert_called_once_with({1: 2}, protocol=2)
|
||||
finally:
|
||||
tpool.execute = was_tpool_exe
|
||||
diskfile.DiskFileManager._get_hashes = was_get_hashes
|
||||
fake_pickle.return_value = b''
|
||||
req.get_response(self.object_controller)
|
||||
# This is the key assertion: starting in Python 3.0, the
|
||||
# default protocol version is 3, but such pickles can't be read
|
||||
# on Python 2. As long as we may need to talk to a Python 2
|
||||
# process, we need to cap our protocol version.
|
||||
fake_pickle.assert_called_once_with({1: 2}, protocol=2)
|
||||
|
||||
def test_REPLICATE_timeout(self):
|
||||
|
||||
@ -7210,18 +7204,17 @@ class TestObjectController(unittest.TestCase):
|
||||
def my_tpool_execute(func, *args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
was_get_hashes = diskfile.DiskFileManager._get_hashes
|
||||
was_tpool_exe = tpool.execute
|
||||
try:
|
||||
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
|
||||
fake_get_hashes), \
|
||||
mock.patch.object(tpool, 'execute', my_tpool_execute), \
|
||||
mock.patch('swift.obj.diskfile.os.path.exists',
|
||||
return_value=True):
|
||||
diskfile.DiskFileManager._get_hashes = fake_get_hashes
|
||||
tpool.execute = my_tpool_execute
|
||||
req = Request.blank('/sda1/p/',
|
||||
environ={'REQUEST_METHOD': 'REPLICATE'},
|
||||
headers={})
|
||||
self.assertRaises(Timeout, self.object_controller.REPLICATE, req)
|
||||
finally:
|
||||
tpool.execute = was_tpool_exe
|
||||
diskfile.DiskFileManager._get_hashes = was_get_hashes
|
||||
|
||||
def test_REPLICATE_reclaims_tombstones(self):
|
||||
conf = {'devices': self.testdir, 'mount_check': False,
|
||||
|
Loading…
Reference in New Issue
Block a user