Add handoffs_first and handoff_delete to obj-repl
If handoffs_first is True, then the object replicator will give partitions that are not supposed to be on the node priority. If handoff_delete is set to a number (n), then it will delete a handoff partition if at least n replicas were successfully replicated Also fixed a couple of things in the object replicator unit tests and added some more DocImpact Change-Id: Icb9968953cf467be2a52046fb16f4b84eb5604e4
This commit is contained in:
parent
cb114e5ecf
commit
a30a7ced9c
@ -414,6 +414,19 @@ stats_interval 3600 Interval in seconds between logging
|
|||||||
replication statistics
|
replication statistics
|
||||||
reclaim_age 604800 Time elapsed in seconds before an
|
reclaim_age 604800 Time elapsed in seconds before an
|
||||||
object can be reclaimed
|
object can be reclaimed
|
||||||
|
handoffs_first false If set to True, partitions that are
|
||||||
|
not supposed to be on the node will be
|
||||||
|
replicated first. The default setting
|
||||||
|
should not be changed, except for
|
||||||
|
extreme situations.
|
||||||
|
handoff_delete auto By default handoff partitions will be
|
||||||
|
removed when it has successfully
|
||||||
|
replicated to all the cannonical nodes.
|
||||||
|
If set to an integer n, it will remove
|
||||||
|
the partition if it is successfully
|
||||||
|
replicated to n nodes. The default
|
||||||
|
setting should not be changed, except
|
||||||
|
for extremem situations.
|
||||||
================== ================= =======================================
|
================== ================= =======================================
|
||||||
|
|
||||||
[object-updater]
|
[object-updater]
|
||||||
|
@ -31,7 +31,7 @@ from swift.common.ring import Ring
|
|||||||
from swift.common.utils import whataremyips, unlink_older_than, \
|
from swift.common.utils import whataremyips, unlink_older_than, \
|
||||||
compute_eta, get_logger, dump_recon_cache, \
|
compute_eta, get_logger, dump_recon_cache, \
|
||||||
rsync_ip, mkdirs, config_true_value, list_from_csv, get_hub, \
|
rsync_ip, mkdirs, config_true_value, list_from_csv, get_hub, \
|
||||||
tpool_reraise
|
tpool_reraise, config_auto_int_value
|
||||||
from swift.common.bufferedhttp import http_connect
|
from swift.common.bufferedhttp import http_connect
|
||||||
from swift.common.daemon import Daemon
|
from swift.common.daemon import Daemon
|
||||||
from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
|
from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
|
||||||
@ -83,6 +83,10 @@ class ObjectReplicator(Daemon):
|
|||||||
'user-agent': 'obj-replicator %s' % os.getpid()}
|
'user-agent': 'obj-replicator %s' % os.getpid()}
|
||||||
self.rsync_error_log_line_length = \
|
self.rsync_error_log_line_length = \
|
||||||
int(conf.get('rsync_error_log_line_length', 0))
|
int(conf.get('rsync_error_log_line_length', 0))
|
||||||
|
self.handoffs_first = config_true_value(conf.get('handoffs_first',
|
||||||
|
False))
|
||||||
|
self.handoff_delete = config_auto_int_value(
|
||||||
|
conf.get('handoff_delete', 'auto'), 0)
|
||||||
|
|
||||||
def _rsync(self, args):
|
def _rsync(self, args):
|
||||||
"""
|
"""
|
||||||
@ -212,8 +216,15 @@ class ObjectReplicator(Daemon):
|
|||||||
'/' + '-'.join(suffixes), headers=self.headers)
|
'/' + '-'.join(suffixes), headers=self.headers)
|
||||||
conn.getresponse().read()
|
conn.getresponse().read()
|
||||||
responses.append(success)
|
responses.append(success)
|
||||||
if not suffixes or (len(responses) ==
|
if self.handoff_delete:
|
||||||
len(job['nodes']) and all(responses)):
|
# delete handoff if we have had handoff_delete successes
|
||||||
|
delete_handoff = len([resp for resp in responses if resp]) >= \
|
||||||
|
self.handoff_delete
|
||||||
|
else:
|
||||||
|
# delete handoff if all syncs were successful
|
||||||
|
delete_handoff = len(responses) == len(job['nodes']) and \
|
||||||
|
all(responses)
|
||||||
|
if not suffixes or delete_handoff:
|
||||||
self.logger.info(_("Removing partition: %s"), job['path'])
|
self.logger.info(_("Removing partition: %s"), job['path'])
|
||||||
tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
|
tpool.execute(shutil.rmtree, job['path'], ignore_errors=True)
|
||||||
except (Exception, Timeout):
|
except (Exception, Timeout):
|
||||||
@ -412,6 +423,9 @@ class ObjectReplicator(Daemon):
|
|||||||
except (ValueError, OSError):
|
except (ValueError, OSError):
|
||||||
continue
|
continue
|
||||||
random.shuffle(jobs)
|
random.shuffle(jobs)
|
||||||
|
if self.handoffs_first:
|
||||||
|
# Move the handoff parts to the front of the list
|
||||||
|
jobs.sort(key=lambda job: not job['delete'])
|
||||||
self.job_count = len(jobs)
|
self.job_count = len(jobs)
|
||||||
return jobs
|
return jobs
|
||||||
|
|
||||||
|
@ -236,7 +236,7 @@ class TestObjectReplicator(unittest.TestCase):
|
|||||||
for job in jobs:
|
for job in jobs:
|
||||||
jobs_by_part[job['partition']] = job
|
jobs_by_part[job['partition']] = job
|
||||||
self.assertEquals(len(jobs_to_delete), 1)
|
self.assertEquals(len(jobs_to_delete), 1)
|
||||||
self.assertTrue('1', jobs_to_delete[0]['partition'])
|
self.assertEquals('1', jobs_to_delete[0]['partition'])
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
[node['id'] for node in jobs_by_part['0']['nodes']], [1, 2])
|
[node['id'] for node in jobs_by_part['0']['nodes']], [1, 2])
|
||||||
self.assertEquals(
|
self.assertEquals(
|
||||||
@ -251,6 +251,12 @@ class TestObjectReplicator(unittest.TestCase):
|
|||||||
self.assertEquals(jobs_by_part[part]['path'],
|
self.assertEquals(jobs_by_part[part]['path'],
|
||||||
os.path.join(self.objects, part))
|
os.path.join(self.objects, part))
|
||||||
|
|
||||||
|
def test_collect_jobs_handoffs_first(self):
|
||||||
|
self.replicator.handoffs_first = True
|
||||||
|
jobs = self.replicator.collect_jobs()
|
||||||
|
self.assertTrue(jobs[0]['delete'])
|
||||||
|
self.assertEquals('1', jobs[0]['partition'])
|
||||||
|
|
||||||
def test_collect_jobs_removes_zbf(self):
|
def test_collect_jobs_removes_zbf(self):
|
||||||
"""
|
"""
|
||||||
After running xfs_repair, a partition directory could become a
|
After running xfs_repair, a partition directory could become a
|
||||||
@ -292,13 +298,137 @@ class TestObjectReplicator(unittest.TestCase):
|
|||||||
with mock.patch('swift.obj.replicator.http_connect',
|
with mock.patch('swift.obj.replicator.http_connect',
|
||||||
mock_http_connect(200)):
|
mock_http_connect(200)):
|
||||||
df = diskfile.DiskFile(self.devices,
|
df = diskfile.DiskFile(self.devices,
|
||||||
'sda', '0', 'a', 'c', 'o', FakeLogger())
|
'sda', '1', 'a', 'c', 'o', FakeLogger())
|
||||||
mkdirs(df.datadir)
|
mkdirs(df.datadir)
|
||||||
|
print df.datadir
|
||||||
|
f = open(os.path.join(df.datadir,
|
||||||
|
normalize_timestamp(time.time()) + '.data'),
|
||||||
|
'wb')
|
||||||
|
f.write('1234567890')
|
||||||
|
f.close()
|
||||||
|
ohash = hash_path('a', 'c', 'o')
|
||||||
|
data_dir = ohash[-3:]
|
||||||
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
||||||
part_path = os.path.join(self.objects, '1')
|
part_path = os.path.join(self.objects, '1')
|
||||||
self.assertTrue(os.access(part_path, os.F_OK))
|
self.assertTrue(os.access(part_path, os.F_OK))
|
||||||
|
nodes = [node for node in
|
||||||
|
self.ring.get_part_nodes(1)
|
||||||
|
if node['ip'] not in _ips()]
|
||||||
|
process_arg_checker = []
|
||||||
|
for node in nodes:
|
||||||
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
||||||
|
process_arg_checker.append(
|
||||||
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
||||||
|
with _mock_process(process_arg_checker):
|
||||||
self.replicator.replicate()
|
self.replicator.replicate()
|
||||||
self.assertFalse(os.access(part_path, os.F_OK))
|
self.assertFalse(os.access(part_path, os.F_OK))
|
||||||
|
|
||||||
|
def test_delete_partition_with_failures(self):
|
||||||
|
with mock.patch('swift.obj.replicator.http_connect',
|
||||||
|
mock_http_connect(200)):
|
||||||
|
df = diskfile.DiskFile(self.devices,
|
||||||
|
'sda', '1', 'a', 'c', 'o', FakeLogger())
|
||||||
|
mkdirs(df.datadir)
|
||||||
|
print df.datadir
|
||||||
|
f = open(os.path.join(df.datadir,
|
||||||
|
normalize_timestamp(time.time()) + '.data'),
|
||||||
|
'wb')
|
||||||
|
f.write('1234567890')
|
||||||
|
f.close()
|
||||||
|
ohash = hash_path('a', 'c', 'o')
|
||||||
|
data_dir = ohash[-3:]
|
||||||
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
||||||
|
part_path = os.path.join(self.objects, '1')
|
||||||
|
self.assertTrue(os.access(part_path, os.F_OK))
|
||||||
|
nodes = [node for node in
|
||||||
|
self.ring.get_part_nodes(1)
|
||||||
|
if node['ip'] not in _ips()]
|
||||||
|
process_arg_checker = []
|
||||||
|
for i, node in enumerate(nodes):
|
||||||
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
||||||
|
if i == 0:
|
||||||
|
# force one of the rsync calls to fail
|
||||||
|
ret_code = 1
|
||||||
|
else:
|
||||||
|
ret_code = 0
|
||||||
|
process_arg_checker.append(
|
||||||
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
||||||
|
with _mock_process(process_arg_checker):
|
||||||
|
self.replicator.replicate()
|
||||||
|
# The path should still exist
|
||||||
|
self.assertTrue(os.access(part_path, os.F_OK))
|
||||||
|
|
||||||
|
def test_delete_partition_with_handoff_delete(self):
|
||||||
|
with mock.patch('swift.obj.replicator.http_connect',
|
||||||
|
mock_http_connect(200)):
|
||||||
|
self.replicator.handoff_delete = 2
|
||||||
|
df = diskfile.DiskFile(self.devices,
|
||||||
|
'sda', '1', 'a', 'c', 'o', FakeLogger())
|
||||||
|
mkdirs(df.datadir)
|
||||||
|
print df.datadir
|
||||||
|
f = open(os.path.join(df.datadir,
|
||||||
|
normalize_timestamp(time.time()) + '.data'),
|
||||||
|
'wb')
|
||||||
|
f.write('1234567890')
|
||||||
|
f.close()
|
||||||
|
ohash = hash_path('a', 'c', 'o')
|
||||||
|
data_dir = ohash[-3:]
|
||||||
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
||||||
|
part_path = os.path.join(self.objects, '1')
|
||||||
|
self.assertTrue(os.access(part_path, os.F_OK))
|
||||||
|
nodes = [node for node in
|
||||||
|
self.ring.get_part_nodes(1)
|
||||||
|
if node['ip'] not in _ips()]
|
||||||
|
process_arg_checker = []
|
||||||
|
for i, node in enumerate(nodes):
|
||||||
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
||||||
|
if i == 0:
|
||||||
|
# force one of the rsync calls to fail
|
||||||
|
ret_code = 1
|
||||||
|
else:
|
||||||
|
ret_code = 0
|
||||||
|
process_arg_checker.append(
|
||||||
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
||||||
|
with _mock_process(process_arg_checker):
|
||||||
|
self.replicator.replicate()
|
||||||
|
self.assertFalse(os.access(part_path, os.F_OK))
|
||||||
|
|
||||||
|
def test_delete_partition_with_handoff_delete_failures(self):
|
||||||
|
with mock.patch('swift.obj.replicator.http_connect',
|
||||||
|
mock_http_connect(200)):
|
||||||
|
self.replicator.handoff_delete = 2
|
||||||
|
df = diskfile.DiskFile(self.devices,
|
||||||
|
'sda', '1', 'a', 'c', 'o', FakeLogger())
|
||||||
|
mkdirs(df.datadir)
|
||||||
|
print df.datadir
|
||||||
|
f = open(os.path.join(df.datadir,
|
||||||
|
normalize_timestamp(time.time()) + '.data'),
|
||||||
|
'wb')
|
||||||
|
f.write('1234567890')
|
||||||
|
f.close()
|
||||||
|
ohash = hash_path('a', 'c', 'o')
|
||||||
|
data_dir = ohash[-3:]
|
||||||
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
||||||
|
part_path = os.path.join(self.objects, '1')
|
||||||
|
self.assertTrue(os.access(part_path, os.F_OK))
|
||||||
|
nodes = [node for node in
|
||||||
|
self.ring.get_part_nodes(1)
|
||||||
|
if node['ip'] not in _ips()]
|
||||||
|
process_arg_checker = []
|
||||||
|
for i, node in enumerate(nodes):
|
||||||
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
||||||
|
if i in (0, 1):
|
||||||
|
# force two of the rsync calls to fail
|
||||||
|
ret_code = 1
|
||||||
|
else:
|
||||||
|
ret_code = 0
|
||||||
|
process_arg_checker.append(
|
||||||
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
||||||
|
with _mock_process(process_arg_checker):
|
||||||
|
self.replicator.replicate()
|
||||||
|
# The file should still exist
|
||||||
|
self.assertTrue(os.access(part_path, os.F_OK))
|
||||||
|
|
||||||
def test_delete_partition_override_params(self):
|
def test_delete_partition_override_params(self):
|
||||||
df = diskfile.DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o',
|
df = diskfile.DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o',
|
||||||
FakeLogger())
|
FakeLogger())
|
||||||
|
Loading…
Reference in New Issue
Block a user