Don't delete misplaced dbs if not replicated

If one uses only a single replica and a database file is placed on a
wrong partition, it will be removed instead of replicated to the correct
partition.

There are two reasons for this:
1. The list of nodes is empty when there is only a single replica
2. all(responses) is True even if there is no response at all, and the
latter is always True if there is no node to replicate to.

This patch fixes this by adding a special case if used with only one
replica to the node selection loop and ensures that the list of
responses is not empty.  Also adds a test that fails on current master
and passes with this change.

Closes-Bug: 1568591

Change-Id: I028ea8c1928e8c9a401db31fb266ff82606f8371
This commit is contained in:
Christian Schwede 2016-04-11 11:56:07 +02:00
parent b6c3ab26a1
commit 9729bc83eb
2 changed files with 36 additions and 6 deletions

View File

@ -525,10 +525,13 @@ class Replicator(Daemon):
if shouldbehere:
shouldbehere = bool([n for n in nodes if n['id'] == node_id])
# See Footnote [1] for an explanation of the repl_nodes assignment.
i = 0
while i < len(nodes) and nodes[i]['id'] != node_id:
i += 1
repl_nodes = nodes[i + 1:] + nodes[:i]
if len(nodes) > 1:
i = 0
while i < len(nodes) and nodes[i]['id'] != node_id:
i += 1
repl_nodes = nodes[i + 1:] + nodes[:i]
else: # Special case if using only a single replica
repl_nodes = nodes
more_nodes = self.ring.get_more_nodes(int(partition))
if not local_dev:
# Check further if local device is a handoff node
@ -563,7 +566,7 @@ class Replicator(Daemon):
except (Exception, Timeout):
self.logger.exception('UNHANDLED EXCEPTION: in post replicate '
'hook for %s', broker.db_file)
if not shouldbehere and all(responses):
if not shouldbehere and responses and all(responses):
# If the db shouldn't be on this node and has been successfully
# synced to all of its peers, it can be removed.
if not self.delete_db(broker):

View File

@ -73,7 +73,7 @@ class FakeRingWithSingleNode(object):
class Ring(object):
devs = [dict(
id=1, weight=10.0, zone=1, ip='1.1.1.1', port=6200, device='sdb',
meta='', replication_ip='1.1.1.1', replication_port=6200
meta='', replication_ip='1.1.1.1', replication_port=6200, region=1
)]
def __init__(self, path, reload_time=15, ring_name=None):
@ -633,6 +633,9 @@ class TestDBReplicator(unittest.TestCase):
def test_replicate_object_delete_because_not_shouldbehere(self):
replicator = TestReplicator({})
replicator.ring = FakeRingWithNodes().Ring('path')
replicator.brokerclass = FakeAccountBroker
replicator._repl_to_node = lambda *args: True
replicator.delete_db = self.stub_delete_db
replicator._replicate_object('0', '/path/to/file', 'node_id')
self.assertEqual(['/path/to/file'], self.delete_db_calls)
@ -669,6 +672,30 @@ class TestDBReplicator(unittest.TestCase):
[(('Found /path/to/file for /a%20c%20t/c%20o%20n when it should '
'be on partition 0; will replicate out and remove.',), {})])
def test_replicate_container_out_of_place_no_node(self):
replicator = TestReplicator({}, logger=unit.FakeLogger())
replicator.ring = FakeRingWithSingleNode().Ring('path')
replicator._repl_to_node = lambda *args: True
replicator.delete_db = self.stub_delete_db
# Correct node_id, wrong part
part = replicator.ring.get_part(
TEST_ACCOUNT_NAME, TEST_CONTAINER_NAME) + 1
node_id = replicator.ring.get_part_nodes(part)[0]['id']
replicator._replicate_object(str(part), '/path/to/file', node_id)
self.assertEqual(['/path/to/file'], self.delete_db_calls)
self.delete_db_calls = []
# No nodes this time
replicator.ring.get_part_nodes = lambda *args: []
replicator.delete_db = self.stub_delete_db
# Correct node_id, wrong part
part = replicator.ring.get_part(
TEST_ACCOUNT_NAME, TEST_CONTAINER_NAME) + 1
replicator._replicate_object(str(part), '/path/to/file', node_id)
self.assertEqual([], self.delete_db_calls)
def test_replicate_object_different_region(self):
db_replicator.ring = FakeRingWithNodes()
replicator = TestReplicator({})