From cfd9d055a66b975e9e1bd2e08cd38cfc906d3b15 Mon Sep 17 00:00:00 2001 From: Samuel Merritt Date: Thu, 19 Dec 2013 19:03:06 -0800 Subject: [PATCH] Sanify handoff search depth with non-integer replica counts On GET, the proxy will go search the primary nodes plus some number of handoffs for the account/container/object before giving up and returning a 404. That number is, by default, twice the ring's replica count. This was fine if your ring had an integral number of replicas, but could lead to some slightly-odd behavior if you have fractional replicas. For example, imagine that you have 3.49 replicas in your object ring; perhaps you're migrating a cluster from 3 replicas to 4, and you're being smart and doing it a bit at a time. On an object GET that all the primary nodes 404ed, the proxy would then compute 2 * 3.49 = 6.98, round it up to 7, and go look at 7 handoff nodes. This is sort of weird; the intent was to look at 6 handoffs for objects with 3 replicas, and 8 handoffs for objects with 4, but the effect is 7 for everybody. You also get little latency cliffs as you scale up replica counts. If, instead of 3.49, you had 3.51 replicas, then the proxy would look at 8 handoff nodes in every case [ceil(2 * 3.51) = 8], so there'd be a small-but-noticeable jump in the time it takes to produce a 404. The fix is to compute the number of handoffs based on the number of primary nodes for the partition, not the ring's replica count. This gets rid of the little latency cliffs and makes the behavior more like what you get with integral replica counts. If your ring has an integral number of replicas, there's no behavior change here. Change-Id: I50538941e571135299fd6b86ecd9dc780cf649f5 --- swift/proxy/controllers/obj.py | 2 +- swift/proxy/server.py | 10 +++++----- test/unit/proxy/controllers/test_obj.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index b230269e48..6a99aa6004 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -501,7 +501,7 @@ class ObjectController(Controller): """ primary_nodes = ring.get_part_nodes(partition) - num_locals = self.app.write_affinity_node_count(ring) + num_locals = self.app.write_affinity_node_count(len(primary_nodes)) is_local = self.app.write_affinity_is_local_fn if is_local is None: diff --git a/swift/proxy/server.py b/swift/proxy/server.py index ee6890e7e4..8cc92e97a4 100644 --- a/swift/proxy/server.py +++ b/swift/proxy/server.py @@ -114,10 +114,10 @@ class Application(object): value = conf.get('request_node_count', '2 * replicas').lower().split() if len(value) == 1: value = int(value[0]) - self.request_node_count = lambda r: value + self.request_node_count = lambda replicas: value elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas': value = int(value[0]) - self.request_node_count = lambda r: value * r.replica_count + self.request_node_count = lambda replicas: value * replicas else: raise ValueError( 'Invalid request_node_count value: %r' % ''.join(value)) @@ -140,10 +140,10 @@ class Application(object): '2 * replicas').lower().split() if len(value) == 1: value = int(value[0]) - self.write_affinity_node_count = lambda r: value + self.write_affinity_node_count = lambda replicas: value elif len(value) == 3 and value[1] == '*' and value[2] == 'replicas': value = int(value[0]) - self.write_affinity_node_count = lambda r: value * r.replica_count + self.write_affinity_node_count = lambda replicas: value * replicas else: raise ValueError( 'Invalid write_affinity_node_count value: %r' % ''.join(value)) @@ -439,7 +439,7 @@ class Application(object): primary_nodes = self.sort_nodes( list(itertools.islice(node_iter, num_primary_nodes))) handoff_nodes = node_iter - nodes_left = self.request_node_count(ring) + nodes_left = self.request_node_count(len(primary_nodes)) for node in primary_nodes: if not self.error_limited(node): diff --git a/test/unit/proxy/controllers/test_obj.py b/test/unit/proxy/controllers/test_obj.py index cae62b08cf..69c02cf8a8 100755 --- a/test/unit/proxy/controllers/test_obj.py +++ b/test/unit/proxy/controllers/test_obj.py @@ -44,7 +44,7 @@ class TestObjControllerWriteAffinity(unittest.TestCase): self.app = proxy_server.Application( None, FakeMemcache(), account_ring=FakeRing(), container_ring=FakeRing(), object_ring=FakeRing(max_more_nodes=9)) - self.app.request_node_count = lambda ring: 10000000 + self.app.request_node_count = lambda replicas: 10000000 self.app.sort_nodes = lambda l: l # stop shuffling the primary nodes def test_iter_nodes_local_first_noops_when_no_affinity(self):