Fix account reaper for > 3 replicas

There's a pre-existing IndexError in the pop from the container node
list in reaper's reap_object method for object rings with a replica
count greater than the container replica count.  Which is more likely
on EC storage policies.

When making the backend direct delete requests to the nodes once the
container node's list is exhausted the generic exception handler logs
the error and breaks out of any other backend object requests - but
the reaper marches forward and eventually the tombstones are
replicated.

This change just cycles the container headers across all the nodes -
which seems reasonable enough - but could certainly garner
bikeshedding.

Change-Id: I5897d00b0a8c1e05884945dd93d9ce891b207001
This commit is contained in:
Clay Gerrard 2015-04-03 16:23:14 -07:00
parent 61a9d35fd5
commit b2189ef47a
2 changed files with 47 additions and 22 deletions

View File

@ -19,6 +19,7 @@ from swift import gettext_ as _
from logging import DEBUG from logging import DEBUG
from math import sqrt from math import sqrt
from time import time from time import time
import itertools
from eventlet import GreenPool, sleep, Timeout from eventlet import GreenPool, sleep, Timeout
@ -432,7 +433,7 @@ class AccountReaper(Daemon):
* See also: :func:`swift.common.ring.Ring.get_nodes` for a description * See also: :func:`swift.common.ring.Ring.get_nodes` for a description
of the container node dicts. of the container node dicts.
""" """
container_nodes = list(container_nodes) cnodes = itertools.cycle(container_nodes)
try: try:
ring = self.get_object_ring(policy_index) ring = self.get_object_ring(policy_index)
except PolicyError: except PolicyError:
@ -443,7 +444,7 @@ class AccountReaper(Daemon):
successes = 0 successes = 0
failures = 0 failures = 0
for node in nodes: for node in nodes:
cnode = container_nodes.pop() cnode = next(cnodes)
try: try:
direct_delete_object( direct_delete_object(
node, part, account, container, obj, node, part, account, container, obj,

View File

@ -141,7 +141,7 @@ cont_nodes = [{'device': 'sda1',
@unit.patch_policies([StoragePolicy(0, 'zero', False, @unit.patch_policies([StoragePolicy(0, 'zero', False,
object_ring=unit.FakeRing()), object_ring=unit.FakeRing()),
StoragePolicy(1, 'one', True, StoragePolicy(1, 'one', True,
object_ring=unit.FakeRing())]) object_ring=unit.FakeRing(replicas=4))])
class TestReaper(unittest.TestCase): class TestReaper(unittest.TestCase):
def setUp(self): def setUp(self):
@ -215,7 +215,7 @@ class TestReaper(unittest.TestCase):
r.stats_objects_possibly_remaining = 0 r.stats_objects_possibly_remaining = 0
r.myips = myips r.myips = myips
if fakelogger: if fakelogger:
r.logger = FakeLogger() r.logger = unit.debug_logger('test-reaper')
return r return r
def fake_reap_account(self, *args, **kwargs): def fake_reap_account(self, *args, **kwargs):
@ -287,7 +287,7 @@ class TestReaper(unittest.TestCase):
policy.idx) policy.idx)
for i, call_args in enumerate( for i, call_args in enumerate(
fake_direct_delete.call_args_list): fake_direct_delete.call_args_list):
cnode = cont_nodes[i] cnode = cont_nodes[i % len(cont_nodes)]
host = '%(ip)s:%(port)s' % cnode host = '%(ip)s:%(port)s' % cnode
device = cnode['device'] device = cnode['device']
headers = { headers = {
@ -302,7 +302,8 @@ class TestReaper(unittest.TestCase):
headers=headers, conn_timeout=0.5, headers=headers, conn_timeout=0.5,
response_timeout=10) response_timeout=10)
self.assertEqual(call_args, expected) self.assertEqual(call_args, expected)
self.assertEqual(r.stats_objects_deleted, 3) self.assertEqual(r.stats_objects_deleted,
policy.object_ring.replicas)
def test_reap_object_fail(self): def test_reap_object_fail(self):
r = self.init_reaper({}, fakelogger=True) r = self.init_reaper({}, fakelogger=True)
@ -313,7 +314,26 @@ class TestReaper(unittest.TestCase):
self.fake_direct_delete_object): self.fake_direct_delete_object):
r.reap_object('a', 'c', 'partition', cont_nodes, 'o', r.reap_object('a', 'c', 'partition', cont_nodes, 'o',
policy.idx) policy.idx)
self.assertEqual(r.stats_objects_deleted, 1) # IMHO, the stat handling in the node loop of reap object is
# over indented, but no one has complained, so I'm not inclined
# to move it. However it's worth noting we're currently keeping
# stats on deletes per *replica* - which is rather obvious from
# these tests, but this results is surprising because of some
# funny logic to *skip* increments on successful deletes of
# replicas until we have more successful responses than
# failures. This means that while the first replica doesn't
# increment deleted because of the failure, the second one
# *does* get successfully deleted, but *also does not* increment
# the counter (!?).
#
# In the three replica case this leaves only the last deleted
# object incrementing the counter - in the four replica case
# this leaves the last two.
#
# Basically this test will always result in:
# deleted == num_replicas - 2
self.assertEqual(r.stats_objects_deleted,
policy.object_ring.replicas - 2)
self.assertEqual(r.stats_objects_remaining, 1) self.assertEqual(r.stats_objects_remaining, 1)
self.assertEqual(r.stats_objects_possibly_remaining, 1) self.assertEqual(r.stats_objects_possibly_remaining, 1)
@ -348,7 +368,7 @@ class TestReaper(unittest.TestCase):
mocks['direct_get_container'].side_effect = fake_get_container mocks['direct_get_container'].side_effect = fake_get_container
r.reap_container('a', 'partition', acc_nodes, 'c') r.reap_container('a', 'partition', acc_nodes, 'c')
mock_calls = mocks['direct_delete_object'].call_args_list mock_calls = mocks['direct_delete_object'].call_args_list
self.assertEqual(3, len(mock_calls)) self.assertEqual(policy.object_ring.replicas, len(mock_calls))
for call_args in mock_calls: for call_args in mock_calls:
_args, kwargs = call_args _args, kwargs = call_args
self.assertEqual(kwargs['headers'] self.assertEqual(kwargs['headers']
@ -356,7 +376,7 @@ class TestReaper(unittest.TestCase):
policy.idx) policy.idx)
self.assertEquals(mocks['direct_delete_container'].call_count, 3) self.assertEquals(mocks['direct_delete_container'].call_count, 3)
self.assertEqual(r.stats_objects_deleted, 3) self.assertEqual(r.stats_objects_deleted, policy.object_ring.replicas)
def test_reap_container_get_object_fail(self): def test_reap_container_get_object_fail(self):
r = self.init_reaper({}, fakelogger=True) r = self.init_reaper({}, fakelogger=True)
@ -374,7 +394,7 @@ class TestReaper(unittest.TestCase):
self.fake_reap_object)] self.fake_reap_object)]
with nested(*ctx): with nested(*ctx):
r.reap_container('a', 'partition', acc_nodes, 'c') r.reap_container('a', 'partition', acc_nodes, 'c')
self.assertEqual(r.logger.inc['return_codes.4'], 1) self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 1)
self.assertEqual(r.stats_containers_deleted, 1) self.assertEqual(r.stats_containers_deleted, 1)
def test_reap_container_partial_fail(self): def test_reap_container_partial_fail(self):
@ -393,7 +413,7 @@ class TestReaper(unittest.TestCase):
self.fake_reap_object)] self.fake_reap_object)]
with nested(*ctx): with nested(*ctx):
r.reap_container('a', 'partition', acc_nodes, 'c') r.reap_container('a', 'partition', acc_nodes, 'c')
self.assertEqual(r.logger.inc['return_codes.4'], 2) self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 2)
self.assertEqual(r.stats_containers_possibly_remaining, 1) self.assertEqual(r.stats_containers_possibly_remaining, 1)
def test_reap_container_full_fail(self): def test_reap_container_full_fail(self):
@ -412,7 +432,7 @@ class TestReaper(unittest.TestCase):
self.fake_reap_object)] self.fake_reap_object)]
with nested(*ctx): with nested(*ctx):
r.reap_container('a', 'partition', acc_nodes, 'c') r.reap_container('a', 'partition', acc_nodes, 'c')
self.assertEqual(r.logger.inc['return_codes.4'], 3) self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 3)
self.assertEqual(r.stats_containers_remaining, 1) self.assertEqual(r.stats_containers_remaining, 1)
@patch('swift.account.reaper.Ring', @patch('swift.account.reaper.Ring',
@ -437,8 +457,8 @@ class TestReaper(unittest.TestCase):
mocks['direct_get_container'].side_effect = fake_get_container mocks['direct_get_container'].side_effect = fake_get_container
r.reap_container('a', 'partition', acc_nodes, 'c') r.reap_container('a', 'partition', acc_nodes, 'c')
self.assertEqual(r.logger.msg, self.assertEqual(r.logger.get_lines_for_level('error'), [
'ERROR: invalid storage policy index: 2') 'ERROR: invalid storage policy index: 2'])
def fake_reap_container(self, *args, **kwargs): def fake_reap_container(self, *args, **kwargs):
self.called_amount += 1 self.called_amount += 1
@ -463,13 +483,16 @@ class TestReaper(unittest.TestCase):
nodes = r.get_account_ring().get_part_nodes() nodes = r.get_account_ring().get_part_nodes()
self.assertTrue(r.reap_account(broker, 'partition', nodes)) self.assertTrue(r.reap_account(broker, 'partition', nodes))
self.assertEqual(self.called_amount, 4) self.assertEqual(self.called_amount, 4)
self.assertEqual(r.logger.msg.find('Completed pass'), 0) info_lines = r.logger.get_lines_for_level('info')
self.assertTrue(r.logger.msg.find('1 containers deleted')) self.assertEqual(len(info_lines), 2)
self.assertTrue(r.logger.msg.find('1 objects deleted')) start_line, stat_line = info_lines
self.assertTrue(r.logger.msg.find('1 containers remaining')) self.assertEqual(start_line, 'Beginning pass on account a')
self.assertTrue(r.logger.msg.find('1 objects remaining')) self.assertTrue(stat_line.find('1 containers deleted'))
self.assertTrue(r.logger.msg.find('1 containers possibly remaining')) self.assertTrue(stat_line.find('1 objects deleted'))
self.assertTrue(r.logger.msg.find('1 objects possibly remaining')) self.assertTrue(stat_line.find('1 containers remaining'))
self.assertTrue(stat_line.find('1 objects remaining'))
self.assertTrue(stat_line.find('1 containers possibly remaining'))
self.assertTrue(stat_line.find('1 objects possibly remaining'))
def test_reap_account_no_container(self): def test_reap_account_no_container(self):
broker = FakeAccountBroker(tuple()) broker = FakeAccountBroker(tuple())
@ -483,7 +506,8 @@ class TestReaper(unittest.TestCase):
with nested(*ctx): with nested(*ctx):
nodes = r.get_account_ring().get_part_nodes() nodes = r.get_account_ring().get_part_nodes()
self.assertTrue(r.reap_account(broker, 'partition', nodes)) self.assertTrue(r.reap_account(broker, 'partition', nodes))
self.assertEqual(r.logger.msg.find('Completed pass'), 0) self.assertTrue(r.logger.get_lines_for_level(
'info')[-1].startswith('Completed pass'))
self.assertEqual(self.called_amount, 0) self.assertEqual(self.called_amount, 0)
def test_reap_device(self): def test_reap_device(self):