Fixed bug with container reclaim/report race
Before, a really lagged cluster might not get its final report for a deleted container database sent to its corresponding account database. In such a case, the container database file would be permanently deleted while still leaving the container listed in the account database, never to be updated since the actual container database file was gone. The only way to fix such the situation before was to recreate and redelete the container. Now, the container database file will not be permanently deleted until it has sent its final report successfully to its corresponding account database. Change-Id: I1f42202455e7ecb0533b84ce7f45fcc7b98aeaa3
This commit is contained in:
parent
783f16035a
commit
213f385348
@ -361,6 +361,7 @@ class Replicator(Daemon):
|
|||||||
broker.reclaim(time.time() - self.reclaim_age,
|
broker.reclaim(time.time() - self.reclaim_age,
|
||||||
time.time() - (self.reclaim_age * 2))
|
time.time() - (self.reclaim_age * 2))
|
||||||
info = broker.get_replication_info()
|
info = broker.get_replication_info()
|
||||||
|
full_info = broker.get_info()
|
||||||
except (Exception, Timeout), e:
|
except (Exception, Timeout), e:
|
||||||
if 'no such table' in str(e):
|
if 'no such table' in str(e):
|
||||||
self.logger.error(_('Quarantining DB %s'), object_file)
|
self.logger.error(_('Quarantining DB %s'), object_file)
|
||||||
@ -385,10 +386,11 @@ class Replicator(Daemon):
|
|||||||
if delete_timestamp < (time.time() - self.reclaim_age) and \
|
if delete_timestamp < (time.time() - self.reclaim_age) and \
|
||||||
delete_timestamp > put_timestamp and \
|
delete_timestamp > put_timestamp and \
|
||||||
info['count'] in (None, '', 0, '0'):
|
info['count'] in (None, '', 0, '0'):
|
||||||
with lock_parent_directory(object_file):
|
if self.report_up_to_date(full_info):
|
||||||
shutil.rmtree(os.path.dirname(object_file), True)
|
with lock_parent_directory(object_file):
|
||||||
self.stats['remove'] += 1
|
shutil.rmtree(os.path.dirname(object_file), True)
|
||||||
self.logger.increment('removes')
|
self.stats['remove'] += 1
|
||||||
|
self.logger.increment('removes')
|
||||||
self.logger.timing_since('timing', start_time)
|
self.logger.timing_since('timing', start_time)
|
||||||
return
|
return
|
||||||
responses = []
|
responses = []
|
||||||
@ -422,6 +424,9 @@ class Replicator(Daemon):
|
|||||||
self.logger.increment('removes')
|
self.logger.increment('removes')
|
||||||
self.logger.timing_since('timing', start_time)
|
self.logger.timing_since('timing', start_time)
|
||||||
|
|
||||||
|
def report_up_to_date(self, full_info):
|
||||||
|
return True
|
||||||
|
|
||||||
def roundrobin_datadirs(self, datadirs):
|
def roundrobin_datadirs(self, datadirs):
|
||||||
"""
|
"""
|
||||||
Generator to walk the data dirs in a round robin manner, evenly
|
Generator to walk the data dirs in a round robin manner, evenly
|
||||||
|
@ -22,3 +22,10 @@ class ContainerReplicator(db_replicator.Replicator):
|
|||||||
brokerclass = db.ContainerBroker
|
brokerclass = db.ContainerBroker
|
||||||
datadir = container_server.DATADIR
|
datadir = container_server.DATADIR
|
||||||
default_port = 6001
|
default_port = 6001
|
||||||
|
|
||||||
|
def report_up_to_date(self, full_info):
|
||||||
|
for key in ('put_timestamp', 'delete_timestamp', 'object_count',
|
||||||
|
'bytes_used'):
|
||||||
|
if full_info['reported_' + key] != full_info[key]:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
@ -15,17 +15,45 @@
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
from swift.container import replicator
|
from swift.container import replicator
|
||||||
|
from swift.common.utils import normalize_timestamp
|
||||||
|
|
||||||
|
|
||||||
class TestReplicator(unittest.TestCase):
|
class TestReplicator(unittest.TestCase):
|
||||||
"""
|
|
||||||
swift.container.replicator is currently just a subclass with some class
|
|
||||||
variables overridden, but at least this test stub will ensure proper Python
|
|
||||||
syntax.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_placeholder(self):
|
def setUp(self):
|
||||||
pass
|
self.orig_ring = replicator.db_replicator.ring.Ring
|
||||||
|
replicator.db_replicator.ring.Ring = lambda *args, **kwargs: None
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
replicator.db_replicator.ring.Ring = self.orig_ring
|
||||||
|
|
||||||
|
def test_report_up_to_date(self):
|
||||||
|
repl = replicator.ContainerReplicator({})
|
||||||
|
info = {'put_timestamp': normalize_timestamp(1),
|
||||||
|
'delete_timestamp': normalize_timestamp(0),
|
||||||
|
'object_count': 0,
|
||||||
|
'bytes_used': 0,
|
||||||
|
'reported_put_timestamp': normalize_timestamp(1),
|
||||||
|
'reported_delete_timestamp': normalize_timestamp(0),
|
||||||
|
'reported_object_count': 0,
|
||||||
|
'reported_bytes_used': 0}
|
||||||
|
self.assertTrue(repl.report_up_to_date(info))
|
||||||
|
info['delete_timestamp'] = normalize_timestamp(2)
|
||||||
|
self.assertFalse(repl.report_up_to_date(info))
|
||||||
|
info['reported_delete_timestamp'] = normalize_timestamp(2)
|
||||||
|
self.assertTrue(repl.report_up_to_date(info))
|
||||||
|
info['object_count'] = 1
|
||||||
|
self.assertFalse(repl.report_up_to_date(info))
|
||||||
|
info['reported_object_count'] = 1
|
||||||
|
self.assertTrue(repl.report_up_to_date(info))
|
||||||
|
info['bytes_used'] = 1
|
||||||
|
self.assertFalse(repl.report_up_to_date(info))
|
||||||
|
info['reported_bytes_used'] = 1
|
||||||
|
self.assertTrue(repl.report_up_to_date(info))
|
||||||
|
info['put_timestamp'] = normalize_timestamp(3)
|
||||||
|
self.assertFalse(repl.report_up_to_date(info))
|
||||||
|
info['reported_put_timestamp'] = normalize_timestamp(3)
|
||||||
|
self.assertTrue(repl.report_up_to_date(info))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user