Make rsync ignore it's own temporary files
In situations where rsync may inadvertently be unable to cleanup it's temporary files we shouldn't spread them around the cluster. By asking our rsync subexec to --exclude patterns that match it's own convention for temporary naming we'll only ever transfer real replicated artifacts and never temporary artifacts which should always be ignored until they are fully transfered. Cleanup of stale rsync droppings should be performed by the auditor and will be addressed in a separate change related to lp bug #1554005. Closes-Bug: #1553995 Change-Id: Ibe598b339af024d05e4d89c34d696e972d8189ff
This commit is contained in:
parent
9e90d30962
commit
f87a5487b5
@ -227,6 +227,7 @@ class ObjectReplicator(Daemon):
|
||||
'--timeout=%s' % self.rsync_io_timeout,
|
||||
'--contimeout=%s' % self.rsync_io_timeout,
|
||||
'--bwlimit=%s' % self.rsync_bwlimit,
|
||||
'--exclude=.*.%s' % ''.join('[0-9a-zA-Z]' for i in range(6))
|
||||
]
|
||||
if self.rsync_compress and \
|
||||
job['region'] != node['region']:
|
||||
|
@ -19,6 +19,7 @@ from uuid import uuid4
|
||||
import random
|
||||
from hashlib import md5
|
||||
from collections import defaultdict
|
||||
import os
|
||||
|
||||
from swiftclient import client
|
||||
|
||||
@ -82,6 +83,22 @@ class TestObjectHandoff(ReplProbeTest):
|
||||
raise Exception('Direct object GET did not return VERIFY, instead '
|
||||
'it returned: %s' % repr(odata))
|
||||
|
||||
# drop a tempfile in the handoff's datadir, like it might have
|
||||
# had if there was an rsync failure while it was previously a
|
||||
# primary
|
||||
handoff_device_path = self.device_dir('object', another_onode)
|
||||
data_filename = None
|
||||
for root, dirs, files in os.walk(handoff_device_path):
|
||||
for filename in files:
|
||||
if filename.endswith('.data'):
|
||||
data_filename = filename
|
||||
temp_filename = '.%s.6MbL6r' % data_filename
|
||||
temp_filepath = os.path.join(root, temp_filename)
|
||||
if not data_filename:
|
||||
self.fail('Did not find any data files on %r' %
|
||||
handoff_device_path)
|
||||
open(temp_filepath, 'w')
|
||||
|
||||
# Assert container listing (via proxy and directly) has container/obj
|
||||
objs = [o['name'] for o in
|
||||
client.get_container(self.url, self.token, container)[1]]
|
||||
@ -134,6 +151,20 @@ class TestObjectHandoff(ReplProbeTest):
|
||||
raise Exception('Direct object GET did not return VERIFY, instead '
|
||||
'it returned: %s' % repr(odata))
|
||||
|
||||
# and that it does *not* have a temporary rsync dropping!
|
||||
found_data_filename = False
|
||||
primary_device_path = self.device_dir('object', onode)
|
||||
for root, dirs, files in os.walk(primary_device_path):
|
||||
for filename in files:
|
||||
if filename.endswith('.6MbL6r'):
|
||||
self.fail('Found unexpected file %s' %
|
||||
os.path.join(root, filename))
|
||||
if filename == data_filename:
|
||||
found_data_filename = True
|
||||
self.assertTrue(found_data_filename,
|
||||
'Did not find data file %r on %r' % (
|
||||
data_filename, primary_device_path))
|
||||
|
||||
# Assert the handoff server no longer has container/obj
|
||||
try:
|
||||
direct_client.direct_get_object(
|
||||
|
@ -910,6 +910,49 @@ class TestObjectReplicator(unittest.TestCase):
|
||||
jobs = self.replicator.collect_jobs()
|
||||
self.assertEqual(len(jobs), 0)
|
||||
|
||||
def test_replicator_skips_rsync_temp_files(self):
|
||||
# the empty pre-setup dirs aren't that useful to us
|
||||
device_path = os.path.join(self.devices, 'sda')
|
||||
rmtree(device_path, ignore_errors=1)
|
||||
os.mkdir(device_path)
|
||||
# create a real data file to trigger rsync
|
||||
df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
|
||||
policy=POLICIES.legacy)
|
||||
ts = next(self.ts)
|
||||
with df.create() as w:
|
||||
w.write('asdf')
|
||||
w.put({'X-Timestamp': ts.internal})
|
||||
w.commit(ts)
|
||||
# pre-flight and post sync request for both other primaries
|
||||
expected_replicate_requests = 4
|
||||
process_arg_checker = [
|
||||
# (return_code, stdout, <each in capture rsync args>)
|
||||
(0, '', []),
|
||||
(0, '', []),
|
||||
]
|
||||
stub_body = pickle.dumps({})
|
||||
with _mock_process(process_arg_checker) as rsync_log, \
|
||||
mock.patch('swift.obj.replicator.whataremyips',
|
||||
side_effect=_ips), \
|
||||
mocked_http_conn(*[200] * expected_replicate_requests,
|
||||
body=stub_body) as conn_log:
|
||||
self.replicator.replicate()
|
||||
self.assertEqual(['REPLICATE'] * expected_replicate_requests,
|
||||
[r['method'] for r in conn_log.requests])
|
||||
# expect one rsync to each other primary node
|
||||
self.assertEqual(2, len(rsync_log))
|
||||
expected = '--exclude=.*.[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]' \
|
||||
'[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]'
|
||||
for subprocess_info in rsync_log:
|
||||
rsync_args = subprocess_info['rsync_args']
|
||||
for arg in rsync_args:
|
||||
if arg.startswith('--exclude'):
|
||||
self.assertEqual(arg, expected)
|
||||
break
|
||||
else:
|
||||
self.fail('Did not find --exclude argument in %r' %
|
||||
rsync_args)
|
||||
|
||||
def test_replicator_removes_zbf(self):
|
||||
# After running xfs_repair, a partition directory could become a
|
||||
# zero-byte file. If this happens, the replicator should clean it
|
||||
|
Loading…
x
Reference in New Issue
Block a user