Simplify empty suffix handling

We really only need to have one way to cleanup empty suffix dirs, and
that's normally during suffix hashing which only happens when invalid
suffixes get rehashed.

When we iterate a suffix tree using yield hashes, we may discover an
expired or otherwise reapable hashdir - when this happens we will now
simply invalidate the suffix so that the next rehash can clean it up.

This simplification removes an mis-behavior in the handling between the
normal suffix rehashing cleanup and what was implemented in ssync.

Change-Id: I5629de9f2e9b2331ed3f455d253efc69d030df72
Related-Change-Id: I2849a757519a30684646f3a6f4467c21e9281707
Closes-Bug: 1816501
This commit is contained in:
Clay Gerrard 2019-02-18 20:05:46 -06:00
parent 703249d6cb
commit 585bf40cc0
3 changed files with 55 additions and 44 deletions

View File

@ -1565,12 +1565,10 @@ class BaseDiskFileManager(object):
partition_path = get_part_path(dev_path, policy, partition)
if suffixes is None:
suffixes = self.yield_suffixes(device, partition, policy)
considering_all_suffixes = True
else:
suffixes = (
(os.path.join(partition_path, suffix), suffix)
for suffix in suffixes)
considering_all_suffixes = False
key_preference = (
('ts_meta', 'meta_info', 'timestamp'),
@ -1581,17 +1579,16 @@ class BaseDiskFileManager(object):
# We delete as many empty directories as we can.
# cleanup_ondisk_files() takes care of the hash dirs, and we take
# care of the suffix dirs and possibly even the partition dir.
have_nonempty_suffix = False
# care of the suffix dirs and invalidate them
for suffix_path, suffix in suffixes:
have_nonempty_hashdir = False
found_files = False
for object_hash in self._listdir(suffix_path):
object_path = os.path.join(suffix_path, object_hash)
try:
results = self.cleanup_ondisk_files(
object_path, **kwargs)
if results['files']:
have_nonempty_hashdir = True
found_files = True
timestamps = {}
for ts_key, info_key, info_ts_key in key_preference:
if info_key not in results:
@ -1611,38 +1608,8 @@ class BaseDiskFileManager(object):
'Invalid diskfile filename in %r (%s)' % (
object_path, err))
if have_nonempty_hashdir:
have_nonempty_suffix = True
else:
try:
os.rmdir(suffix_path)
except OSError as err:
if err.errno not in (errno.ENOENT, errno.ENOTEMPTY):
self.logger.debug(
'Error cleaning up empty suffix directory %s: %s',
suffix_path, err)
# cleanup_ondisk_files tries to remove empty hash dirs,
# but if it fails, so will we. An empty directory
# structure won't cause errors (just slowdown), so we
# ignore the exception.
if considering_all_suffixes and not have_nonempty_suffix:
# There's nothing of interest in the partition, so delete it
try:
# Remove hashes.pkl *then* hashes.invalid; otherwise, if we
# remove hashes.invalid but leave hashes.pkl, that makes it
# look as though the invalidations in hashes.invalid never
# occurred.
_unlink_if_present(os.path.join(partition_path, HASH_FILE))
_unlink_if_present(os.path.join(partition_path,
HASH_INVALIDATIONS_FILE))
# This lock is only held by people dealing with the hashes
# or the hash invalidations, and we've just removed those.
_unlink_if_present(os.path.join(partition_path, ".lock"))
_unlink_if_present(os.path.join(partition_path,
".lock-replication"))
os.rmdir(partition_path)
except OSError as err:
self.logger.debug("Error cleaning up empty partition: %s", err)
if not found_files:
self.invalidate_hash(suffix_path)
class BaseDiskFileWriter(object):

View File

@ -1415,7 +1415,8 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
df2_suffix, df2_hash,
"1525354556.65758.ts")))
# Expire the tombstones
# Cache the hashes and expire the tombstones
self.df_mgr.get_hashes(self.existing_device, '0', [], POLICIES[0])
the_time[0] += 2 * self.df_mgr.reclaim_age
hashes = list(self.df_mgr.yield_hashes(
@ -1440,7 +1441,30 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
self.testdir, self.existing_device, 'objects', '0',
df2_suffix, df2_hash)))
# The empty suffix dirs are gone
# The empty suffix dirs, and partition are still there
self.assertTrue(os.path.isdir(os.path.join(
self.testdir, self.existing_device, 'objects', '0',
df1_suffix)))
self.assertTrue(os.path.isdir(os.path.join(
self.testdir, self.existing_device, 'objects', '0',
df2_suffix)))
# but the suffixes is invalid
part_dir = os.path.join(
self.testdir, self.existing_device, 'objects', '0')
invalidations_file = os.path.join(
part_dir, diskfile.HASH_INVALIDATIONS_FILE)
with open(invalidations_file) as f:
self.assertEqual('%s\n%s' % (df1_suffix, df2_suffix),
f.read().strip('\n')) # sanity
# next time get hashes runs
with mock.patch('time.time', mock_time):
hashes = self.df_mgr.get_hashes(
self.existing_device, '0', [], POLICIES[0])
self.assertEqual(hashes, {})
# ... suffixes will get cleanup
self.assertFalse(os.path.exists(os.path.join(
self.testdir, self.existing_device, 'objects', '0',
df1_suffix)))
@ -1448,8 +1472,9 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
self.testdir, self.existing_device, 'objects', '0',
df2_suffix)))
# The empty partition dir is gone
self.assertFalse(os.path.exists(os.path.join(
# but really it's not diskfile's jobs to decide if a partition belongs
# on a node or not
self.assertTrue(os.path.isdir(os.path.join(
self.testdir, self.existing_device, 'objects', '0')))
def test_focused_yield_hashes_does_not_clean_up(self):

View File

@ -1327,12 +1327,31 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
for stat_key, expected in expected_stats.items():
stat_method, stat_prefix = stat_key
self.assertStatCount(stat_method, stat_prefix, expected)
stub_data = self.reconstructor._get_hashes(
'sda1', 2, self.policy, do_listdir=True)
stub_data.update({'7ca': {None: '8f19c38e1cf8e2390d4ca29051407ae3'}})
pickle_path = os.path.join(part_path, 'hashes.pkl')
with open(pickle_path, 'w') as f:
pickle.dump(stub_data, f)
# part 2 should be totally empty
hash_gen = self.reconstructor._df_router[self.policy].yield_hashes(
'sda1', '2', self.policy)
'sda1', '2', self.policy, suffixes=stub_data.keys())
for path, hash_, ts in hash_gen:
self.fail('found %s with %s in %s' % (hash_, ts, path))
# even the partition directory is gone
new_hashes = self.reconstructor._get_hashes(
'sda1', 2, self.policy, do_listdir=True)
self.assertFalse(new_hashes)
# N.B. the partition directory is removed next pass
ssync_calls = []
with mocked_http_conn() as request_log:
with mock.patch('swift.obj.reconstructor.ssync_sender',
self._make_fake_ssync(ssync_calls)):
self.reconstructor.reconstruct(override_partitions=[2])
self.assertEqual([], ssync_calls)
self.assertFalse(os.access(part_path, os.F_OK))
def test_process_job_all_success(self):