Fix missing container update

At PUT object request, proxy server makes backend headers (e.g.
X-Container-Partition) which help object-servers to determine
the container-server they should update. In addition, the backend
headers are created as many as the number of container replicas.
(i.e. 3 replica in container ring, 3 backend headers will be created)

On EC case, Swift fans out fragment archives to backend object-servers.
Basically the number of fragment archives will be more than the container
replica number and proxy-server assumes a request as success when quorum
number of object-server succeeded to store. That would cause to make an
orphaned object which is stored but not container updated.

For example, assuming k=10, m=4, container replica=3 case:

Assuming, proxy-server attempts to make 14 backend streams but
unfortunately first 3 nodes returns 507 (disk failure) and then
the Swift doesn't have any other disks.

In the case, proxy keeps 11 backend streams to store and current Swift
assumes it as sufficient because it is more than or equals quorum (right
now k+1 is sufficient i.e. 11 backend streams are enough to store)
However, in the case, the 11 streams doesn't have the container update
header so that the request will succeed but container will be never updated.

This patch allows to extract container updates up to object quorum_size
+ 1 to more nodes to ensure the updates. This approach sacrifices the
container update cost a bit because duplicated updates will be there but
quorum sizes + 1 seems reasonable (even if it's reaplicated case) to pay
to ensure that instead of whole objects incude the update headers.

Now Swift will work like as follows:

For example:
k=10, m=4, qurum_size=11 (k+1), 3 replica for container.
CU: container update
CA: commit ack

That result in like as
 CU   CU   CU   CU   CU   CU   CU   CU   CU   CU   CU   CU
[507, 507, 507, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201, 201]
                                              CA   CA   CA   CA   CA

In this case, at least 3 container updates are saved.

For another example:
7 replicated objects, qurum_size=4 (7//2+1), 3 replica for container.
CU: container update
CA: commit ack (201s for successful PUT on replicated)

 CU   CU   CU   CU   CU
[507, 507, 507, 201, 201, 201, 201]
                 CA   CA   CA   CA

In this replicated case, at least 2 container updates are saved.

Cleaned up some unit tests so that modifying policies doesn't leak
between tests.

Co-Authored-By: John Dickinson <me@not.mn>
Co-Authored-By: Sam Merritt <sam@swiftstack.com>

Closes-Bug: #1460920
Change-Id: I04132858f44b42ee7ecf3b7994cb22a19d001d70
This commit is contained in:
Kota Tsuyuzaki 2015-05-22 16:53:12 -07:00 committed by John Dickinson
parent 25d5e686a1
commit 3f943cfcf2
4 changed files with 92 additions and 27 deletions

View File

@ -276,19 +276,33 @@ class BaseObjectController(Controller):
container_partition, containers, container_partition, containers,
delete_at_container=None, delete_at_partition=None, delete_at_container=None, delete_at_partition=None,
delete_at_nodes=None): delete_at_nodes=None):
policy_index = req.headers['X-Backend-Storage-Policy-Index']
policy = POLICIES.get_by_index(policy_index)
headers = [self.generate_request_headers(req, additional=req.headers) headers = [self.generate_request_headers(req, additional=req.headers)
for _junk in range(n_outgoing)] for _junk in range(n_outgoing)]
def set_container_update(index, container):
headers[index]['X-Container-Partition'] = container_partition
headers[index]['X-Container-Host'] = csv_append(
headers[index].get('X-Container-Host'),
'%(ip)s:%(port)s' % container)
headers[index]['X-Container-Device'] = csv_append(
headers[index].get('X-Container-Device'),
container['device'])
for i, container in enumerate(containers): for i, container in enumerate(containers):
i = i % len(headers) i = i % len(headers)
set_container_update(i, container)
headers[i]['X-Container-Partition'] = container_partition # if # of container_updates is not enough against # of replicas
headers[i]['X-Container-Host'] = csv_append( # (or fragments). Fill them like as pigeon hole problem.
headers[i].get('X-Container-Host'), # TODO?: apply these to X-Delete-At-Container?
'%(ip)s:%(port)s' % container) n_updates_needed = min(policy.quorum + 1, n_outgoing)
headers[i]['X-Container-Device'] = csv_append( container_iter = itertools.cycle(containers)
headers[i].get('X-Container-Device'), existing_updates = len(containers)
container['device']) while existing_updates < n_updates_needed:
set_container_update(existing_updates, next(container_iter))
existing_updates += 1
for i, node in enumerate(delete_at_nodes or []): for i, node in enumerate(delete_at_nodes or []):
i = i % len(headers) i = i % len(headers)

View File

@ -229,9 +229,7 @@ class FakeRing(Ring):
return [dict(node, index=i) for i, node in enumerate(list(self._devs))] return [dict(node, index=i) for i, node in enumerate(list(self._devs))]
def get_more_nodes(self, part): def get_more_nodes(self, part):
# replicas^2 is the true cap for x in range(self.replicas, (self.replicas + self.max_more_nodes)):
for x in range(self.replicas, min(self.replicas + self.max_more_nodes,
self.replicas * self.replicas)):
yield {'ip': '10.0.0.%s' % x, yield {'ip': '10.0.0.%s' % x,
'replication_ip': '10.0.0.%s' % x, 'replication_ip': '10.0.0.%s' % x,
'port': self._base_port + x, 'port': self._base_port + x,

View File

@ -34,7 +34,7 @@ from swift.common import utils, swob
from swift.proxy import server as proxy_server from swift.proxy import server as proxy_server
from swift.proxy.controllers import obj from swift.proxy.controllers import obj
from swift.proxy.controllers.base import get_info as _real_get_info from swift.proxy.controllers.base import get_info as _real_get_info
from swift.common.storage_policy import POLICIES, ECDriverError from swift.common.storage_policy import POLICIES, ECDriverError, StoragePolicy
from test.unit import FakeRing, FakeMemcache, fake_http_connect, \ from test.unit import FakeRing, FakeMemcache, fake_http_connect, \
debug_logger, patch_policies, SlowBody, FakeStatus debug_logger, patch_policies, SlowBody, FakeStatus
@ -168,16 +168,15 @@ class BaseObjectControllerMixin(object):
# make our fake ring have plenty of nodes, and not get limited # make our fake ring have plenty of nodes, and not get limited
# artificially by the proxy max request node count # artificially by the proxy max request node count
object_ring.max_more_nodes = 100000 object_ring.max_more_nodes = 100000
self.app.request_node_count = lambda r: 100000 # nothing magic about * 2 + 3, just a way to make it bigger
self.app.request_node_count = lambda r: r * 2 + 3
all_nodes = object_ring.get_part_nodes(1) all_nodes = object_ring.get_part_nodes(1)
all_nodes.extend(object_ring.get_more_nodes(1)) all_nodes.extend(object_ring.get_more_nodes(1))
# i guess fake_ring wants the get_more_nodes iter to more safely be # limit to the number we're going to look at in this request
# converted to a list with a smallish sort of limit which *can* be nodes_requested = self.app.request_node_count(object_ring.replicas)
# lower than max_more_nodes all_nodes = all_nodes[:nodes_requested]
fake_rings_real_max_more_nodes_value = object_ring.replicas ** 2
self.assertEqual(len(all_nodes), fake_rings_real_max_more_nodes_value)
# make sure we have enough local nodes (sanity) # make sure we have enough local nodes (sanity)
all_local_nodes = [n for n in all_nodes if all_local_nodes = [n for n in all_nodes if
@ -240,6 +239,10 @@ class BaseObjectControllerMixin(object):
self.assertEqual(resp.status_int, 204) self.assertEqual(resp.status_int, 204)
def test_DELETE_missing_one(self): def test_DELETE_missing_one(self):
# Obviously this test doesn't work if we're testing 1 replica.
# In that case, we don't have any failovers to check.
if self.replicas() == 1:
return
req = swift.common.swob.Request.blank('/v1/a/c/o', method='DELETE') req = swift.common.swob.Request.blank('/v1/a/c/o', method='DELETE')
codes = [404] + [204] * (self.replicas() - 1) codes = [404] + [204] * (self.replicas() - 1)
random.shuffle(codes) random.shuffle(codes)
@ -248,6 +251,10 @@ class BaseObjectControllerMixin(object):
self.assertEqual(resp.status_int, 204) self.assertEqual(resp.status_int, 204)
def test_DELETE_not_found(self): def test_DELETE_not_found(self):
# Obviously this test doesn't work if we're testing 1 replica.
# In that case, we don't have any failovers to check.
if self.replicas() == 1:
return
req = swift.common.swob.Request.blank('/v1/a/c/o', method='DELETE') req = swift.common.swob.Request.blank('/v1/a/c/o', method='DELETE')
codes = [404] * (self.replicas() - 1) + [204] codes = [404] * (self.replicas() - 1) + [204]
with set_http_connect(*codes): with set_http_connect(*codes):
@ -387,7 +394,7 @@ class BaseObjectControllerMixin(object):
def test_HEAD_x_newest(self): def test_HEAD_x_newest(self):
req = swift.common.swob.Request.blank('/v1/a/c/o', method='HEAD', req = swift.common.swob.Request.blank('/v1/a/c/o', method='HEAD',
headers={'X-Newest': 'true'}) headers={'X-Newest': 'true'})
with set_http_connect(200, 200, 200): with set_http_connect(*([200] * self.replicas())):
resp = req.get_response(self.app) resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 200) self.assertEqual(resp.status_int, 200)
@ -395,14 +402,14 @@ class BaseObjectControllerMixin(object):
req = swob.Request.blank('/v1/a/c/o', method='HEAD', req = swob.Request.blank('/v1/a/c/o', method='HEAD',
headers={'X-Newest': 'true'}) headers={'X-Newest': 'true'})
ts = (utils.Timestamp(t) for t in itertools.count(int(time.time()))) ts = (utils.Timestamp(t) for t in itertools.count(int(time.time())))
timestamps = [next(ts) for i in range(3)] timestamps = [next(ts) for i in range(self.replicas())]
newest_timestamp = timestamps[-1] newest_timestamp = timestamps[-1]
random.shuffle(timestamps) random.shuffle(timestamps)
backend_response_headers = [{ backend_response_headers = [{
'X-Backend-Timestamp': t.internal, 'X-Backend-Timestamp': t.internal,
'X-Timestamp': t.normal 'X-Timestamp': t.normal
} for t in timestamps] } for t in timestamps]
with set_http_connect(200, 200, 200, with set_http_connect(*([200] * self.replicas()),
headers=backend_response_headers): headers=backend_response_headers):
resp = req.get_response(self.app) resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 200) self.assertEqual(resp.status_int, 200)
@ -413,14 +420,14 @@ class BaseObjectControllerMixin(object):
headers={'X-Newest': 'true'}) headers={'X-Newest': 'true'})
ts = (utils.Timestamp(time.time(), offset=offset) ts = (utils.Timestamp(time.time(), offset=offset)
for offset in itertools.count()) for offset in itertools.count())
timestamps = [next(ts) for i in range(3)] timestamps = [next(ts) for i in range(self.replicas())]
newest_timestamp = timestamps[-1] newest_timestamp = timestamps[-1]
random.shuffle(timestamps) random.shuffle(timestamps)
backend_response_headers = [{ backend_response_headers = [{
'X-Backend-Timestamp': t.internal, 'X-Backend-Timestamp': t.internal,
'X-Timestamp': t.normal 'X-Timestamp': t.normal
} for t in timestamps] } for t in timestamps]
with set_http_connect(200, 200, 200, with set_http_connect(*([200] * self.replicas()),
headers=backend_response_headers): headers=backend_response_headers):
resp = req.get_response(self.app) resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 200) self.assertEqual(resp.status_int, 200)
@ -478,6 +485,40 @@ class BaseObjectControllerMixin(object):
resp = req.get_response(self.app) resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 411) self.assertEqual(resp.status_int, 411)
def test_container_update_backend_requests(self):
for policy in POLICIES:
req = swift.common.swob.Request.blank(
'/v1/a/c/o', method='PUT',
headers={'Content-Length': '0',
'X-Backend-Storage-Policy-Index': int(policy)})
controller = self.controller_cls(self.app, 'a', 'c', 'o')
# This is the number of container updates we're doing, simulating
# 1 to 15 container replicas.
for num_containers in range(1, 16):
containers = [{'ip': '1.0.0.%s' % i,
'port': '60%s' % str(i).zfill(2),
'device': 'sdb'} for i in range(num_containers)]
backend_headers = controller._backend_requests(
req, self.replicas(policy), 1, containers)
# how many of the backend headers have a container update
container_updates = len(
[headers for headers in backend_headers
if 'X-Container-Partition' in headers])
if num_containers <= self.quorum(policy):
# filling case
expected = min(self.quorum(policy) + 1,
self.replicas(policy))
else:
# container updates >= object replicas
expected = min(num_containers,
self.replicas(policy))
self.assertEqual(container_updates, expected)
# end of BaseObjectControllerMixin # end of BaseObjectControllerMixin
@ -889,6 +930,18 @@ class TestReplicatedObjController(BaseObjectControllerMixin,
self.assertEqual(req.environ.get('swift.log_info'), None) self.assertEqual(req.environ.get('swift.log_info'), None)
@patch_policies(
[StoragePolicy(0, '1-replica', True),
StoragePolicy(1, '5-replica', False),
StoragePolicy(2, '8-replica', False),
StoragePolicy(3, '15-replica', False)],
fake_ring_args=[
{'replicas': 1}, {'replicas': 5}, {'replicas': 8}, {'replicas': 15}])
class TestReplicatedObjControllerVariousReplicas(BaseObjectControllerMixin,
unittest.TestCase):
controller_cls = obj.ReplicatedObjectController
@patch_policies(legacy_only=True) @patch_policies(legacy_only=True)
class TestObjControllerLegacyCache(TestReplicatedObjController): class TestObjControllerLegacyCache(TestReplicatedObjController):
""" """

View File

@ -3650,7 +3650,7 @@ class TestObjectController(unittest.TestCase):
collected_nodes.append(node) collected_nodes.append(node)
self.assertEqual(len(collected_nodes), 5) self.assertEqual(len(collected_nodes), 5)
object_ring.max_more_nodes = 20 object_ring.max_more_nodes = 6
self.app.request_node_count = lambda r: 20 self.app.request_node_count = lambda r: 20
partition, nodes = object_ring.get_nodes('account', partition, nodes = object_ring.get_nodes('account',
'container', 'container',
@ -5829,15 +5829,15 @@ class TestObjectController(unittest.TestCase):
self.assertEqual( self.assertEqual(
seen_headers, [ seen_headers, [
{'X-Container-Host': '10.0.0.0:1000',
'X-Container-Partition': '0',
'X-Container-Device': 'sda'},
{'X-Container-Host': '10.0.0.0:1000', {'X-Container-Host': '10.0.0.0:1000',
'X-Container-Partition': '0', 'X-Container-Partition': '0',
'X-Container-Device': 'sda'}, 'X-Container-Device': 'sda'},
{'X-Container-Host': '10.0.0.1:1001', {'X-Container-Host': '10.0.0.1:1001',
'X-Container-Partition': '0', 'X-Container-Partition': '0',
'X-Container-Device': 'sdb'}, 'X-Container-Device': 'sdb'}])
{'X-Container-Host': None,
'X-Container-Partition': None,
'X-Container-Device': None}])
def test_PUT_x_container_headers_with_more_container_replicas(self): def test_PUT_x_container_headers_with_more_container_replicas(self):
self.app.container_ring.set_replicas(4) self.app.container_ring.set_replicas(4)