Simplify empty suffix handling
We really only need to have one way to cleanup empty suffix dirs, and that's normally during suffix hashing which only happens when invalid suffixes get rehashed. When we iterate a suffix tree using yield hashes, we may discover an expired or otherwise reapable hashdir - when this happens we will now simply invalidate the suffix so that the next rehash can clean it up. This simplification removes an mis-behavior in the handling between the normal suffix rehashing cleanup and what was implemented in ssync. Change-Id: I5629de9f2e9b2331ed3f455d253efc69d030df72 Related-Change-Id: I2849a757519a30684646f3a6f4467c21e9281707 Closes-Bug: 1816501
This commit is contained in:
parent
703249d6cb
commit
585bf40cc0
@ -1565,12 +1565,10 @@ class BaseDiskFileManager(object):
|
||||
partition_path = get_part_path(dev_path, policy, partition)
|
||||
if suffixes is None:
|
||||
suffixes = self.yield_suffixes(device, partition, policy)
|
||||
considering_all_suffixes = True
|
||||
else:
|
||||
suffixes = (
|
||||
(os.path.join(partition_path, suffix), suffix)
|
||||
for suffix in suffixes)
|
||||
considering_all_suffixes = False
|
||||
|
||||
key_preference = (
|
||||
('ts_meta', 'meta_info', 'timestamp'),
|
||||
@ -1581,17 +1579,16 @@ class BaseDiskFileManager(object):
|
||||
|
||||
# We delete as many empty directories as we can.
|
||||
# cleanup_ondisk_files() takes care of the hash dirs, and we take
|
||||
# care of the suffix dirs and possibly even the partition dir.
|
||||
have_nonempty_suffix = False
|
||||
# care of the suffix dirs and invalidate them
|
||||
for suffix_path, suffix in suffixes:
|
||||
have_nonempty_hashdir = False
|
||||
found_files = False
|
||||
for object_hash in self._listdir(suffix_path):
|
||||
object_path = os.path.join(suffix_path, object_hash)
|
||||
try:
|
||||
results = self.cleanup_ondisk_files(
|
||||
object_path, **kwargs)
|
||||
if results['files']:
|
||||
have_nonempty_hashdir = True
|
||||
found_files = True
|
||||
timestamps = {}
|
||||
for ts_key, info_key, info_ts_key in key_preference:
|
||||
if info_key not in results:
|
||||
@ -1611,38 +1608,8 @@ class BaseDiskFileManager(object):
|
||||
'Invalid diskfile filename in %r (%s)' % (
|
||||
object_path, err))
|
||||
|
||||
if have_nonempty_hashdir:
|
||||
have_nonempty_suffix = True
|
||||
else:
|
||||
try:
|
||||
os.rmdir(suffix_path)
|
||||
except OSError as err:
|
||||
if err.errno not in (errno.ENOENT, errno.ENOTEMPTY):
|
||||
self.logger.debug(
|
||||
'Error cleaning up empty suffix directory %s: %s',
|
||||
suffix_path, err)
|
||||
# cleanup_ondisk_files tries to remove empty hash dirs,
|
||||
# but if it fails, so will we. An empty directory
|
||||
# structure won't cause errors (just slowdown), so we
|
||||
# ignore the exception.
|
||||
if considering_all_suffixes and not have_nonempty_suffix:
|
||||
# There's nothing of interest in the partition, so delete it
|
||||
try:
|
||||
# Remove hashes.pkl *then* hashes.invalid; otherwise, if we
|
||||
# remove hashes.invalid but leave hashes.pkl, that makes it
|
||||
# look as though the invalidations in hashes.invalid never
|
||||
# occurred.
|
||||
_unlink_if_present(os.path.join(partition_path, HASH_FILE))
|
||||
_unlink_if_present(os.path.join(partition_path,
|
||||
HASH_INVALIDATIONS_FILE))
|
||||
# This lock is only held by people dealing with the hashes
|
||||
# or the hash invalidations, and we've just removed those.
|
||||
_unlink_if_present(os.path.join(partition_path, ".lock"))
|
||||
_unlink_if_present(os.path.join(partition_path,
|
||||
".lock-replication"))
|
||||
os.rmdir(partition_path)
|
||||
except OSError as err:
|
||||
self.logger.debug("Error cleaning up empty partition: %s", err)
|
||||
if not found_files:
|
||||
self.invalidate_hash(suffix_path)
|
||||
|
||||
|
||||
class BaseDiskFileWriter(object):
|
||||
|
@ -1415,7 +1415,8 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
||||
df2_suffix, df2_hash,
|
||||
"1525354556.65758.ts")))
|
||||
|
||||
# Expire the tombstones
|
||||
# Cache the hashes and expire the tombstones
|
||||
self.df_mgr.get_hashes(self.existing_device, '0', [], POLICIES[0])
|
||||
the_time[0] += 2 * self.df_mgr.reclaim_age
|
||||
|
||||
hashes = list(self.df_mgr.yield_hashes(
|
||||
@ -1440,7 +1441,30 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
||||
self.testdir, self.existing_device, 'objects', '0',
|
||||
df2_suffix, df2_hash)))
|
||||
|
||||
# The empty suffix dirs are gone
|
||||
# The empty suffix dirs, and partition are still there
|
||||
self.assertTrue(os.path.isdir(os.path.join(
|
||||
self.testdir, self.existing_device, 'objects', '0',
|
||||
df1_suffix)))
|
||||
self.assertTrue(os.path.isdir(os.path.join(
|
||||
self.testdir, self.existing_device, 'objects', '0',
|
||||
df2_suffix)))
|
||||
|
||||
# but the suffixes is invalid
|
||||
part_dir = os.path.join(
|
||||
self.testdir, self.existing_device, 'objects', '0')
|
||||
invalidations_file = os.path.join(
|
||||
part_dir, diskfile.HASH_INVALIDATIONS_FILE)
|
||||
with open(invalidations_file) as f:
|
||||
self.assertEqual('%s\n%s' % (df1_suffix, df2_suffix),
|
||||
f.read().strip('\n')) # sanity
|
||||
|
||||
# next time get hashes runs
|
||||
with mock.patch('time.time', mock_time):
|
||||
hashes = self.df_mgr.get_hashes(
|
||||
self.existing_device, '0', [], POLICIES[0])
|
||||
self.assertEqual(hashes, {})
|
||||
|
||||
# ... suffixes will get cleanup
|
||||
self.assertFalse(os.path.exists(os.path.join(
|
||||
self.testdir, self.existing_device, 'objects', '0',
|
||||
df1_suffix)))
|
||||
@ -1448,8 +1472,9 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
||||
self.testdir, self.existing_device, 'objects', '0',
|
||||
df2_suffix)))
|
||||
|
||||
# The empty partition dir is gone
|
||||
self.assertFalse(os.path.exists(os.path.join(
|
||||
# but really it's not diskfile's jobs to decide if a partition belongs
|
||||
# on a node or not
|
||||
self.assertTrue(os.path.isdir(os.path.join(
|
||||
self.testdir, self.existing_device, 'objects', '0')))
|
||||
|
||||
def test_focused_yield_hashes_does_not_clean_up(self):
|
||||
|
@ -1327,12 +1327,31 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
|
||||
for stat_key, expected in expected_stats.items():
|
||||
stat_method, stat_prefix = stat_key
|
||||
self.assertStatCount(stat_method, stat_prefix, expected)
|
||||
|
||||
stub_data = self.reconstructor._get_hashes(
|
||||
'sda1', 2, self.policy, do_listdir=True)
|
||||
stub_data.update({'7ca': {None: '8f19c38e1cf8e2390d4ca29051407ae3'}})
|
||||
pickle_path = os.path.join(part_path, 'hashes.pkl')
|
||||
with open(pickle_path, 'w') as f:
|
||||
pickle.dump(stub_data, f)
|
||||
|
||||
# part 2 should be totally empty
|
||||
hash_gen = self.reconstructor._df_router[self.policy].yield_hashes(
|
||||
'sda1', '2', self.policy)
|
||||
'sda1', '2', self.policy, suffixes=stub_data.keys())
|
||||
for path, hash_, ts in hash_gen:
|
||||
self.fail('found %s with %s in %s' % (hash_, ts, path))
|
||||
# even the partition directory is gone
|
||||
|
||||
new_hashes = self.reconstructor._get_hashes(
|
||||
'sda1', 2, self.policy, do_listdir=True)
|
||||
self.assertFalse(new_hashes)
|
||||
|
||||
# N.B. the partition directory is removed next pass
|
||||
ssync_calls = []
|
||||
with mocked_http_conn() as request_log:
|
||||
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
||||
self._make_fake_ssync(ssync_calls)):
|
||||
self.reconstructor.reconstruct(override_partitions=[2])
|
||||
self.assertEqual([], ssync_calls)
|
||||
self.assertFalse(os.access(part_path, os.F_OK))
|
||||
|
||||
def test_process_job_all_success(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user