Use smaller quorum size in proxy for even numbers of replicas

Requiring 2/2 backends for PUT requests means that the cluster can't
tolerate a single failure. Likewise, if you have 4 replicas in 2
regions, requiring 3/4 on a POST request means you cannot POST with
your inter-region link down or congested.

This changes the (replication) quorum size in the proxy to be at least
half the nodes instead of a majority of the nodes.

Daemons that were looking for a majority remain unchanged. The
container reconciler, replicator, and updater still require majorities
so their functioning is unchanged.

Odd numbers of replicas are unaffected by this commit.

Change-Id: I3b07ff0222aba6293ad7d60afe1747acafbe6ce4
This commit is contained in:
Samuel Merritt 2016-04-27 16:59:00 -05:00
parent ce6c850258
commit 29544a9e17
9 changed files with 65 additions and 48 deletions

View File

@ -2937,6 +2937,10 @@ def public(func):
return func return func
def majority_size(n):
return (n // 2) + 1
def quorum_size(n): def quorum_size(n):
""" """
quorum size as it applies to services that use 'replication' for data quorum size as it applies to services that use 'replication' for data
@ -2946,7 +2950,7 @@ def quorum_size(n):
Number of successful backend requests needed for the proxy to consider Number of successful backend requests needed for the proxy to consider
the client request successful. the client request successful.
""" """
return (n // 2) + 1 return (n + 1) // 2
def rsync_ip(ip): def rsync_ip(ip):

View File

@ -25,7 +25,7 @@ from swift.common.direct_client import (
direct_head_container, direct_delete_container_object, direct_head_container, direct_delete_container_object,
direct_put_container_object, ClientException) direct_put_container_object, ClientException)
from swift.common.internal_client import InternalClient, UnexpectedResponse from swift.common.internal_client import InternalClient, UnexpectedResponse
from swift.common.utils import get_logger, split_path, quorum_size, \ from swift.common.utils import get_logger, split_path, majority_size, \
FileLikeIter, Timestamp, last_modified_date_to_timestamp, \ FileLikeIter, Timestamp, last_modified_date_to_timestamp, \
LRUCache, decode_timestamps LRUCache, decode_timestamps
@ -194,7 +194,7 @@ def add_to_reconciler_queue(container_ring, account, container, obj,
server server
:returns: .misplaced_object container name, False on failure. "Success" :returns: .misplaced_object container name, False on failure. "Success"
means a quorum of containers got the update. means a majority of containers got the update.
""" """
container_name = get_reconciler_container_name(obj_timestamp) container_name = get_reconciler_container_name(obj_timestamp)
object_name = get_reconciler_obj_name(obj_policy_index, account, object_name = get_reconciler_obj_name(obj_policy_index, account,
@ -232,7 +232,7 @@ def add_to_reconciler_queue(container_ring, account, container, obj,
response_timeout=response_timeout) response_timeout=response_timeout)
successes = sum(pile) successes = sum(pile)
if successes >= quorum_size(len(nodes)): if successes >= majority_size(len(nodes)):
return container_name return container_name
else: else:
return False return False
@ -289,7 +289,7 @@ def direct_get_container_policy_index(container_ring, account_name,
:param container_ring: ring in which to look up the container locations :param container_ring: ring in which to look up the container locations
:param account_name: name of the container's account :param account_name: name of the container's account
:param container_name: name of the container :param container_name: name of the container
:returns: storage policy index, or None if it couldn't get a quorum :returns: storage policy index, or None if it couldn't get a majority
""" """
def _eat_client_exception(*args): def _eat_client_exception(*args):
try: try:
@ -307,7 +307,7 @@ def direct_get_container_policy_index(container_ring, account_name,
container_name) container_name)
headers = [x for x in pile if x is not None] headers = [x for x in pile if x is not None]
if len(headers) < quorum_size(len(nodes)): if len(headers) < majority_size(len(nodes)):
return return
return best_policy_index(headers) return best_policy_index(headers)

View File

@ -31,7 +31,7 @@ from swift.common.exceptions import DeviceUnavailable
from swift.common.http import is_success from swift.common.http import is_success
from swift.common.db import DatabaseAlreadyExists from swift.common.db import DatabaseAlreadyExists
from swift.common.utils import (Timestamp, hash_path, from swift.common.utils import (Timestamp, hash_path,
storage_directory, quorum_size) storage_directory, majority_size)
class ContainerReplicator(db_replicator.Replicator): class ContainerReplicator(db_replicator.Replicator):
@ -202,9 +202,9 @@ class ContainerReplicator(db_replicator.Replicator):
broker.update_reconciler_sync(info['max_row']) broker.update_reconciler_sync(info['max_row'])
return return
max_sync = self.dump_to_reconciler(broker, point) max_sync = self.dump_to_reconciler(broker, point)
success = responses.count(True) >= quorum_size(len(responses)) success = responses.count(True) >= majority_size(len(responses))
if max_sync > point and success: if max_sync > point and success:
# to be safe, only slide up the sync point with a quorum on # to be safe, only slide up the sync point with a majority on
# replication # replication
broker.update_reconciler_sync(max_sync) broker.update_reconciler_sync(max_sync)

View File

@ -31,7 +31,7 @@ from swift.common.bufferedhttp import http_connect
from swift.common.exceptions import ConnectionTimeout from swift.common.exceptions import ConnectionTimeout
from swift.common.ring import Ring from swift.common.ring import Ring
from swift.common.utils import get_logger, config_true_value, ismount, \ from swift.common.utils import get_logger, config_true_value, ismount, \
dump_recon_cache, quorum_size, Timestamp dump_recon_cache, majority_size, Timestamp
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR
@ -238,7 +238,7 @@ class ContainerUpdater(Daemon):
for event in events: for event in events:
if is_success(event.wait()): if is_success(event.wait()):
successes += 1 successes += 1
if successes >= quorum_size(len(events)): if successes >= majority_size(len(events)):
self.logger.increment('successes') self.logger.increment('successes')
self.successes += 1 self.successes += 1
self.logger.debug( self.logger.debug(

View File

@ -1119,9 +1119,9 @@ class TestStoragePolicies(unittest.TestCase):
def test_quorum_size_replication(self): def test_quorum_size_replication(self):
expected_sizes = {1: 1, expected_sizes = {1: 1,
2: 2, 2: 1,
3: 2, 3: 2,
4: 3, 4: 2,
5: 3} 5: 3}
for n, expected in expected_sizes.items(): for n, expected in expected_sizes.items():
policy = StoragePolicy(0, 'zero', policy = StoragePolicy(0, 'zero',

View File

@ -2504,13 +2504,23 @@ cluster_dfw1 = http://dfw1.host/v1/
self.assertFalse(utils.streq_const_time('a', 'aaaaa')) self.assertFalse(utils.streq_const_time('a', 'aaaaa'))
self.assertFalse(utils.streq_const_time('ABC123', 'abc123')) self.assertFalse(utils.streq_const_time('ABC123', 'abc123'))
def test_replication_quorum_size(self): def test_quorum_size(self):
expected_sizes = {1: 1,
2: 1,
3: 2,
4: 2,
5: 3}
got_sizes = dict([(n, utils.quorum_size(n))
for n in expected_sizes])
self.assertEqual(expected_sizes, got_sizes)
def test_majority_size(self):
expected_sizes = {1: 1, expected_sizes = {1: 1,
2: 2, 2: 2,
3: 2, 3: 2,
4: 3, 4: 3,
5: 3} 5: 3}
got_sizes = dict([(n, utils.quorum_size(n)) got_sizes = dict([(n, utils.majority_size(n))
for n in expected_sizes]) for n in expected_sizes])
self.assertEqual(expected_sizes, got_sizes) self.assertEqual(expected_sizes, got_sizes)

View File

@ -320,16 +320,16 @@ class TestAccountController4Replicas(TestAccountController):
((201, 201, 201, 201), 201), ((201, 201, 201, 201), 201),
((201, 201, 201, 404), 201), ((201, 201, 201, 404), 201),
((201, 201, 201, 503), 201), ((201, 201, 201, 503), 201),
((201, 201, 404, 404), 503), ((201, 201, 404, 404), 201),
((201, 201, 404, 503), 503), ((201, 201, 404, 503), 201),
((201, 201, 503, 503), 503), ((201, 201, 503, 503), 201),
((201, 404, 404, 404), 404), ((201, 404, 404, 404), 404),
((201, 404, 404, 503), 503), ((201, 404, 404, 503), 404),
((201, 404, 503, 503), 503), ((201, 404, 503, 503), 503),
((201, 503, 503, 503), 503), ((201, 503, 503, 503), 503),
((404, 404, 404, 404), 404), ((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404), ((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503), ((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503), ((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503) ((503, 503, 503, 503), 503)
] ]
@ -340,16 +340,16 @@ class TestAccountController4Replicas(TestAccountController):
((204, 204, 204, 204), 204), ((204, 204, 204, 204), 204),
((204, 204, 204, 404), 204), ((204, 204, 204, 404), 204),
((204, 204, 204, 503), 204), ((204, 204, 204, 503), 204),
((204, 204, 404, 404), 503), ((204, 204, 404, 404), 204),
((204, 204, 404, 503), 503), ((204, 204, 404, 503), 204),
((204, 204, 503, 503), 503), ((204, 204, 503, 503), 204),
((204, 404, 404, 404), 404), ((204, 404, 404, 404), 404),
((204, 404, 404, 503), 503), ((204, 404, 404, 503), 404),
((204, 404, 503, 503), 503), ((204, 404, 503, 503), 503),
((204, 503, 503, 503), 503), ((204, 503, 503, 503), 503),
((404, 404, 404, 404), 404), ((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404), ((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503), ((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503), ((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503) ((503, 503, 503, 503), 503)
] ]
@ -360,16 +360,16 @@ class TestAccountController4Replicas(TestAccountController):
((204, 204, 204, 204), 204), ((204, 204, 204, 204), 204),
((204, 204, 204, 404), 204), ((204, 204, 204, 404), 204),
((204, 204, 204, 503), 204), ((204, 204, 204, 503), 204),
((204, 204, 404, 404), 503), ((204, 204, 404, 404), 204),
((204, 204, 404, 503), 503), ((204, 204, 404, 503), 204),
((204, 204, 503, 503), 503), ((204, 204, 503, 503), 204),
((204, 404, 404, 404), 404), ((204, 404, 404, 404), 404),
((204, 404, 404, 503), 503), ((204, 404, 404, 503), 404),
((204, 404, 503, 503), 503), ((204, 404, 503, 503), 503),
((204, 503, 503, 503), 503), ((204, 503, 503, 503), 503),
((404, 404, 404, 404), 404), ((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404), ((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503), ((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503), ((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503) ((503, 503, 503, 503), 503)
] ]

View File

@ -662,12 +662,15 @@ class TestFuncs(unittest.TestCase):
base = Controller(self.app) base = Controller(self.app)
# just throw a bunch of test cases at it # just throw a bunch of test cases at it
self.assertEqual(base.have_quorum([201, 404], 3), False) self.assertEqual(base.have_quorum([201, 404], 3), False)
self.assertEqual(base.have_quorum([201, 201], 4), False) self.assertEqual(base.have_quorum([201, 201], 4), True)
self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), False) self.assertEqual(base.have_quorum([201], 4), False)
self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), False) self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), True)
self.assertEqual(base.have_quorum([201, 302, 418, 503], 4), False)
self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), True)
self.assertEqual(base.have_quorum([201, 201], 3), True) self.assertEqual(base.have_quorum([201, 201], 3), True)
self.assertEqual(base.have_quorum([404, 404], 3), True) self.assertEqual(base.have_quorum([404, 404], 3), True)
self.assertEqual(base.have_quorum([201, 201], 2), True) self.assertEqual(base.have_quorum([201, 201], 2), True)
self.assertEqual(base.have_quorum([201, 404], 2), True)
self.assertEqual(base.have_quorum([404, 404], 2), True) self.assertEqual(base.have_quorum([404, 404], 2), True)
self.assertEqual(base.have_quorum([201, 404, 201, 201], 4), True) self.assertEqual(base.have_quorum([201, 404, 201, 201], 4), True)

View File

@ -278,16 +278,16 @@ class TestContainerController4Replicas(TestContainerController):
((201, 201, 201, 201), 201), ((201, 201, 201, 201), 201),
((201, 201, 201, 404), 201), ((201, 201, 201, 404), 201),
((201, 201, 201, 503), 201), ((201, 201, 201, 503), 201),
((201, 201, 404, 404), 503), ((201, 201, 404, 404), 201),
((201, 201, 404, 503), 503), ((201, 201, 404, 503), 201),
((201, 201, 503, 503), 503), ((201, 201, 503, 503), 201),
((201, 404, 404, 404), 404), ((201, 404, 404, 404), 404),
((201, 404, 404, 503), 503), ((201, 404, 404, 503), 404),
((201, 404, 503, 503), 503), ((201, 404, 503, 503), 503),
((201, 503, 503, 503), 503), ((201, 503, 503, 503), 503),
((404, 404, 404, 404), 404), ((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404), ((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503), ((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503), ((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503) ((503, 503, 503, 503), 503)
] ]
@ -298,16 +298,16 @@ class TestContainerController4Replicas(TestContainerController):
((204, 204, 204, 204), 204), ((204, 204, 204, 204), 204),
((204, 204, 204, 404), 204), ((204, 204, 204, 404), 204),
((204, 204, 204, 503), 204), ((204, 204, 204, 503), 204),
((204, 204, 404, 404), 503), ((204, 204, 404, 404), 204),
((204, 204, 404, 503), 503), ((204, 204, 404, 503), 204),
((204, 204, 503, 503), 503), ((204, 204, 503, 503), 204),
((204, 404, 404, 404), 404), ((204, 404, 404, 404), 404),
((204, 404, 404, 503), 503), ((204, 404, 404, 503), 404),
((204, 404, 503, 503), 503), ((204, 404, 503, 503), 503),
((204, 503, 503, 503), 503), ((204, 503, 503, 503), 503),
((404, 404, 404, 404), 404), ((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404), ((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503), ((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503), ((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503) ((503, 503, 503, 503), 503)
] ]
@ -318,16 +318,16 @@ class TestContainerController4Replicas(TestContainerController):
((204, 204, 204, 204), 204), ((204, 204, 204, 204), 204),
((204, 204, 204, 404), 204), ((204, 204, 204, 404), 204),
((204, 204, 204, 503), 204), ((204, 204, 204, 503), 204),
((204, 204, 404, 404), 503), ((204, 204, 404, 404), 204),
((204, 204, 404, 503), 503), ((204, 204, 404, 503), 204),
((204, 204, 503, 503), 503), ((204, 204, 503, 503), 204),
((204, 404, 404, 404), 404), ((204, 404, 404, 404), 404),
((204, 404, 404, 503), 503), ((204, 404, 404, 503), 404),
((204, 404, 503, 503), 503), ((204, 404, 503, 503), 503),
((204, 503, 503, 503), 503), ((204, 503, 503, 503), 503),
((404, 404, 404, 404), 404), ((404, 404, 404, 404), 404),
((404, 404, 404, 503), 404), ((404, 404, 404, 503), 404),
((404, 404, 503, 503), 503), ((404, 404, 503, 503), 404),
((404, 503, 503, 503), 503), ((404, 503, 503, 503), 503),
((503, 503, 503, 503), 503) ((503, 503, 503, 503), 503)
] ]