Use smaller quorum size in proxy for even numbers of replicas
Requiring 2/2 backends for PUT requests means that the cluster can't tolerate a single failure. Likewise, if you have 4 replicas in 2 regions, requiring 3/4 on a POST request means you cannot POST with your inter-region link down or congested. This changes the (replication) quorum size in the proxy to be at least half the nodes instead of a majority of the nodes. Daemons that were looking for a majority remain unchanged. The container reconciler, replicator, and updater still require majorities so their functioning is unchanged. Odd numbers of replicas are unaffected by this commit. Change-Id: I3b07ff0222aba6293ad7d60afe1747acafbe6ce4
This commit is contained in:
parent
ce6c850258
commit
29544a9e17
@ -2937,6 +2937,10 @@ def public(func):
|
||||
return func
|
||||
|
||||
|
||||
def majority_size(n):
|
||||
return (n // 2) + 1
|
||||
|
||||
|
||||
def quorum_size(n):
|
||||
"""
|
||||
quorum size as it applies to services that use 'replication' for data
|
||||
@ -2946,7 +2950,7 @@ def quorum_size(n):
|
||||
Number of successful backend requests needed for the proxy to consider
|
||||
the client request successful.
|
||||
"""
|
||||
return (n // 2) + 1
|
||||
return (n + 1) // 2
|
||||
|
||||
|
||||
def rsync_ip(ip):
|
||||
|
@ -25,7 +25,7 @@ from swift.common.direct_client import (
|
||||
direct_head_container, direct_delete_container_object,
|
||||
direct_put_container_object, ClientException)
|
||||
from swift.common.internal_client import InternalClient, UnexpectedResponse
|
||||
from swift.common.utils import get_logger, split_path, quorum_size, \
|
||||
from swift.common.utils import get_logger, split_path, majority_size, \
|
||||
FileLikeIter, Timestamp, last_modified_date_to_timestamp, \
|
||||
LRUCache, decode_timestamps
|
||||
|
||||
@ -194,7 +194,7 @@ def add_to_reconciler_queue(container_ring, account, container, obj,
|
||||
server
|
||||
|
||||
:returns: .misplaced_object container name, False on failure. "Success"
|
||||
means a quorum of containers got the update.
|
||||
means a majority of containers got the update.
|
||||
"""
|
||||
container_name = get_reconciler_container_name(obj_timestamp)
|
||||
object_name = get_reconciler_obj_name(obj_policy_index, account,
|
||||
@ -232,7 +232,7 @@ def add_to_reconciler_queue(container_ring, account, container, obj,
|
||||
response_timeout=response_timeout)
|
||||
|
||||
successes = sum(pile)
|
||||
if successes >= quorum_size(len(nodes)):
|
||||
if successes >= majority_size(len(nodes)):
|
||||
return container_name
|
||||
else:
|
||||
return False
|
||||
@ -289,7 +289,7 @@ def direct_get_container_policy_index(container_ring, account_name,
|
||||
:param container_ring: ring in which to look up the container locations
|
||||
:param account_name: name of the container's account
|
||||
:param container_name: name of the container
|
||||
:returns: storage policy index, or None if it couldn't get a quorum
|
||||
:returns: storage policy index, or None if it couldn't get a majority
|
||||
"""
|
||||
def _eat_client_exception(*args):
|
||||
try:
|
||||
@ -307,7 +307,7 @@ def direct_get_container_policy_index(container_ring, account_name,
|
||||
container_name)
|
||||
|
||||
headers = [x for x in pile if x is not None]
|
||||
if len(headers) < quorum_size(len(nodes)):
|
||||
if len(headers) < majority_size(len(nodes)):
|
||||
return
|
||||
return best_policy_index(headers)
|
||||
|
||||
|
@ -31,7 +31,7 @@ from swift.common.exceptions import DeviceUnavailable
|
||||
from swift.common.http import is_success
|
||||
from swift.common.db import DatabaseAlreadyExists
|
||||
from swift.common.utils import (Timestamp, hash_path,
|
||||
storage_directory, quorum_size)
|
||||
storage_directory, majority_size)
|
||||
|
||||
|
||||
class ContainerReplicator(db_replicator.Replicator):
|
||||
@ -202,9 +202,9 @@ class ContainerReplicator(db_replicator.Replicator):
|
||||
broker.update_reconciler_sync(info['max_row'])
|
||||
return
|
||||
max_sync = self.dump_to_reconciler(broker, point)
|
||||
success = responses.count(True) >= quorum_size(len(responses))
|
||||
success = responses.count(True) >= majority_size(len(responses))
|
||||
if max_sync > point and success:
|
||||
# to be safe, only slide up the sync point with a quorum on
|
||||
# to be safe, only slide up the sync point with a majority on
|
||||
# replication
|
||||
broker.update_reconciler_sync(max_sync)
|
||||
|
||||
|
@ -31,7 +31,7 @@ from swift.common.bufferedhttp import http_connect
|
||||
from swift.common.exceptions import ConnectionTimeout
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.utils import get_logger, config_true_value, ismount, \
|
||||
dump_recon_cache, quorum_size, Timestamp
|
||||
dump_recon_cache, majority_size, Timestamp
|
||||
from swift.common.daemon import Daemon
|
||||
from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR
|
||||
|
||||
@ -238,7 +238,7 @@ class ContainerUpdater(Daemon):
|
||||
for event in events:
|
||||
if is_success(event.wait()):
|
||||
successes += 1
|
||||
if successes >= quorum_size(len(events)):
|
||||
if successes >= majority_size(len(events)):
|
||||
self.logger.increment('successes')
|
||||
self.successes += 1
|
||||
self.logger.debug(
|
||||
|
@ -1119,9 +1119,9 @@ class TestStoragePolicies(unittest.TestCase):
|
||||
|
||||
def test_quorum_size_replication(self):
|
||||
expected_sizes = {1: 1,
|
||||
2: 2,
|
||||
2: 1,
|
||||
3: 2,
|
||||
4: 3,
|
||||
4: 2,
|
||||
5: 3}
|
||||
for n, expected in expected_sizes.items():
|
||||
policy = StoragePolicy(0, 'zero',
|
||||
|
@ -2504,13 +2504,23 @@ cluster_dfw1 = http://dfw1.host/v1/
|
||||
self.assertFalse(utils.streq_const_time('a', 'aaaaa'))
|
||||
self.assertFalse(utils.streq_const_time('ABC123', 'abc123'))
|
||||
|
||||
def test_replication_quorum_size(self):
|
||||
def test_quorum_size(self):
|
||||
expected_sizes = {1: 1,
|
||||
2: 1,
|
||||
3: 2,
|
||||
4: 2,
|
||||
5: 3}
|
||||
got_sizes = dict([(n, utils.quorum_size(n))
|
||||
for n in expected_sizes])
|
||||
self.assertEqual(expected_sizes, got_sizes)
|
||||
|
||||
def test_majority_size(self):
|
||||
expected_sizes = {1: 1,
|
||||
2: 2,
|
||||
3: 2,
|
||||
4: 3,
|
||||
5: 3}
|
||||
got_sizes = dict([(n, utils.quorum_size(n))
|
||||
got_sizes = dict([(n, utils.majority_size(n))
|
||||
for n in expected_sizes])
|
||||
self.assertEqual(expected_sizes, got_sizes)
|
||||
|
||||
|
@ -320,16 +320,16 @@ class TestAccountController4Replicas(TestAccountController):
|
||||
((201, 201, 201, 201), 201),
|
||||
((201, 201, 201, 404), 201),
|
||||
((201, 201, 201, 503), 201),
|
||||
((201, 201, 404, 404), 503),
|
||||
((201, 201, 404, 503), 503),
|
||||
((201, 201, 503, 503), 503),
|
||||
((201, 201, 404, 404), 201),
|
||||
((201, 201, 404, 503), 201),
|
||||
((201, 201, 503, 503), 201),
|
||||
((201, 404, 404, 404), 404),
|
||||
((201, 404, 404, 503), 503),
|
||||
((201, 404, 404, 503), 404),
|
||||
((201, 404, 503, 503), 503),
|
||||
((201, 503, 503, 503), 503),
|
||||
((404, 404, 404, 404), 404),
|
||||
((404, 404, 404, 503), 404),
|
||||
((404, 404, 503, 503), 503),
|
||||
((404, 404, 503, 503), 404),
|
||||
((404, 503, 503, 503), 503),
|
||||
((503, 503, 503, 503), 503)
|
||||
]
|
||||
@ -340,16 +340,16 @@ class TestAccountController4Replicas(TestAccountController):
|
||||
((204, 204, 204, 204), 204),
|
||||
((204, 204, 204, 404), 204),
|
||||
((204, 204, 204, 503), 204),
|
||||
((204, 204, 404, 404), 503),
|
||||
((204, 204, 404, 503), 503),
|
||||
((204, 204, 503, 503), 503),
|
||||
((204, 204, 404, 404), 204),
|
||||
((204, 204, 404, 503), 204),
|
||||
((204, 204, 503, 503), 204),
|
||||
((204, 404, 404, 404), 404),
|
||||
((204, 404, 404, 503), 503),
|
||||
((204, 404, 404, 503), 404),
|
||||
((204, 404, 503, 503), 503),
|
||||
((204, 503, 503, 503), 503),
|
||||
((404, 404, 404, 404), 404),
|
||||
((404, 404, 404, 503), 404),
|
||||
((404, 404, 503, 503), 503),
|
||||
((404, 404, 503, 503), 404),
|
||||
((404, 503, 503, 503), 503),
|
||||
((503, 503, 503, 503), 503)
|
||||
]
|
||||
@ -360,16 +360,16 @@ class TestAccountController4Replicas(TestAccountController):
|
||||
((204, 204, 204, 204), 204),
|
||||
((204, 204, 204, 404), 204),
|
||||
((204, 204, 204, 503), 204),
|
||||
((204, 204, 404, 404), 503),
|
||||
((204, 204, 404, 503), 503),
|
||||
((204, 204, 503, 503), 503),
|
||||
((204, 204, 404, 404), 204),
|
||||
((204, 204, 404, 503), 204),
|
||||
((204, 204, 503, 503), 204),
|
||||
((204, 404, 404, 404), 404),
|
||||
((204, 404, 404, 503), 503),
|
||||
((204, 404, 404, 503), 404),
|
||||
((204, 404, 503, 503), 503),
|
||||
((204, 503, 503, 503), 503),
|
||||
((404, 404, 404, 404), 404),
|
||||
((404, 404, 404, 503), 404),
|
||||
((404, 404, 503, 503), 503),
|
||||
((404, 404, 503, 503), 404),
|
||||
((404, 503, 503, 503), 503),
|
||||
((503, 503, 503, 503), 503)
|
||||
]
|
||||
|
@ -662,12 +662,15 @@ class TestFuncs(unittest.TestCase):
|
||||
base = Controller(self.app)
|
||||
# just throw a bunch of test cases at it
|
||||
self.assertEqual(base.have_quorum([201, 404], 3), False)
|
||||
self.assertEqual(base.have_quorum([201, 201], 4), False)
|
||||
self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), False)
|
||||
self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), False)
|
||||
self.assertEqual(base.have_quorum([201, 201], 4), True)
|
||||
self.assertEqual(base.have_quorum([201], 4), False)
|
||||
self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), True)
|
||||
self.assertEqual(base.have_quorum([201, 302, 418, 503], 4), False)
|
||||
self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), True)
|
||||
self.assertEqual(base.have_quorum([201, 201], 3), True)
|
||||
self.assertEqual(base.have_quorum([404, 404], 3), True)
|
||||
self.assertEqual(base.have_quorum([201, 201], 2), True)
|
||||
self.assertEqual(base.have_quorum([201, 404], 2), True)
|
||||
self.assertEqual(base.have_quorum([404, 404], 2), True)
|
||||
self.assertEqual(base.have_quorum([201, 404, 201, 201], 4), True)
|
||||
|
||||
|
@ -278,16 +278,16 @@ class TestContainerController4Replicas(TestContainerController):
|
||||
((201, 201, 201, 201), 201),
|
||||
((201, 201, 201, 404), 201),
|
||||
((201, 201, 201, 503), 201),
|
||||
((201, 201, 404, 404), 503),
|
||||
((201, 201, 404, 503), 503),
|
||||
((201, 201, 503, 503), 503),
|
||||
((201, 201, 404, 404), 201),
|
||||
((201, 201, 404, 503), 201),
|
||||
((201, 201, 503, 503), 201),
|
||||
((201, 404, 404, 404), 404),
|
||||
((201, 404, 404, 503), 503),
|
||||
((201, 404, 404, 503), 404),
|
||||
((201, 404, 503, 503), 503),
|
||||
((201, 503, 503, 503), 503),
|
||||
((404, 404, 404, 404), 404),
|
||||
((404, 404, 404, 503), 404),
|
||||
((404, 404, 503, 503), 503),
|
||||
((404, 404, 503, 503), 404),
|
||||
((404, 503, 503, 503), 503),
|
||||
((503, 503, 503, 503), 503)
|
||||
]
|
||||
@ -298,16 +298,16 @@ class TestContainerController4Replicas(TestContainerController):
|
||||
((204, 204, 204, 204), 204),
|
||||
((204, 204, 204, 404), 204),
|
||||
((204, 204, 204, 503), 204),
|
||||
((204, 204, 404, 404), 503),
|
||||
((204, 204, 404, 503), 503),
|
||||
((204, 204, 503, 503), 503),
|
||||
((204, 204, 404, 404), 204),
|
||||
((204, 204, 404, 503), 204),
|
||||
((204, 204, 503, 503), 204),
|
||||
((204, 404, 404, 404), 404),
|
||||
((204, 404, 404, 503), 503),
|
||||
((204, 404, 404, 503), 404),
|
||||
((204, 404, 503, 503), 503),
|
||||
((204, 503, 503, 503), 503),
|
||||
((404, 404, 404, 404), 404),
|
||||
((404, 404, 404, 503), 404),
|
||||
((404, 404, 503, 503), 503),
|
||||
((404, 404, 503, 503), 404),
|
||||
((404, 503, 503, 503), 503),
|
||||
((503, 503, 503, 503), 503)
|
||||
]
|
||||
@ -318,16 +318,16 @@ class TestContainerController4Replicas(TestContainerController):
|
||||
((204, 204, 204, 204), 204),
|
||||
((204, 204, 204, 404), 204),
|
||||
((204, 204, 204, 503), 204),
|
||||
((204, 204, 404, 404), 503),
|
||||
((204, 204, 404, 503), 503),
|
||||
((204, 204, 503, 503), 503),
|
||||
((204, 204, 404, 404), 204),
|
||||
((204, 204, 404, 503), 204),
|
||||
((204, 204, 503, 503), 204),
|
||||
((204, 404, 404, 404), 404),
|
||||
((204, 404, 404, 503), 503),
|
||||
((204, 404, 404, 503), 404),
|
||||
((204, 404, 503, 503), 503),
|
||||
((204, 503, 503, 503), 503),
|
||||
((404, 404, 404, 404), 404),
|
||||
((404, 404, 404, 503), 404),
|
||||
((404, 404, 503, 503), 503),
|
||||
((404, 404, 503, 503), 404),
|
||||
((404, 503, 503, 503), 503),
|
||||
((503, 503, 503, 503), 503)
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user