Consistently apply node error limiting rules in proxy

All GET or HEAD requests consistently error limit nodes that return 507
and increment errors for nodes responding with any other 5XX.

There were two places in the object PUT path where the proxy was error
limiting nodes and their behavior was inconsistent.  During expect-100
connect we would only error_limit nodes on 507, and during response we
would increment errors for all 5XX series responses.  This was pretty
hard to reason about and the divergence in behavior of questionable
value.

An audit of base controller highlighted where make_requests would apply
error_limit's on 507 but not increment errors on other 5XX responses.

Now anywhere we track errors on nodes we use error_limit on 507 and
error_occurred on any other 5XX series request.  Additionally a Timeout
or Exception that is logged through exception_occurred will bump errors -
which is consistent with the approach in "Add Error Limiting to slow
nodes" [1].

1. https://review.openstack.org/#/c/112424/

Change-Id: I67e489d18afd6bdfc730bfdba76f85a2e3ca74f0
This commit is contained in:
Clay Gerrard 2014-11-17 20:29:45 -08:00
parent e429cd81be
commit 99d501831e
6 changed files with 194 additions and 11 deletions

View File

@ -1033,6 +1033,14 @@ class Controller(object):
elif resp.status == HTTP_INSUFFICIENT_STORAGE:
self.app.error_limit(node,
_('ERROR Insufficient Storage'))
elif is_server_error(resp.status):
self.app.error_occurred(
node, _('ERROR %(status)d '
'Trying to %(method)s %(path)s'
'From Container Server') % {
'status': resp.status,
'method': method,
'path': path})
except (Exception, Timeout):
self.app.exception_occurred(
node, self.server_type,

View File

@ -47,10 +47,11 @@ from swift.common import constraints
from swift.common.exceptions import ChunkReadTimeout, \
ChunkWriteTimeout, ConnectionTimeout, ListingIterNotFound, \
ListingIterNotAuthorized, ListingIterError
from swift.common.http import is_success, is_client_error, HTTP_CONTINUE, \
HTTP_CREATED, HTTP_MULTIPLE_CHOICES, HTTP_NOT_FOUND, \
HTTP_INTERNAL_SERVER_ERROR, HTTP_SERVICE_UNAVAILABLE, \
HTTP_INSUFFICIENT_STORAGE, HTTP_PRECONDITION_FAILED
from swift.common.http import (
is_success, is_client_error, is_server_error, HTTP_CONTINUE, HTTP_CREATED,
HTTP_MULTIPLE_CHOICES, HTTP_NOT_FOUND, HTTP_INTERNAL_SERVER_ERROR,
HTTP_SERVICE_UNAVAILABLE, HTTP_INSUFFICIENT_STORAGE,
HTTP_PRECONDITION_FAILED)
from swift.proxy.controllers.base import Controller, delay_denial, \
cors_validation
from swift.common.swob import HTTPAccepted, HTTPBadRequest, HTTPNotFound, \
@ -372,6 +373,11 @@ class ObjectController(Controller):
return conn
elif resp.status == HTTP_INSUFFICIENT_STORAGE:
self.app.error_limit(node, _('ERROR Insufficient Storage'))
elif is_server_error(resp.status):
self.app.error_occurred(
node, _('ERROR %(status)d Expect: 100-continue '
'From Object Server') % {
'status': resp.status})
except (Exception, Timeout):
self.app.exception_occurred(
node, _('Object'),
@ -404,7 +410,10 @@ class ObjectController(Controller):
statuses.append(response.status)
reasons.append(response.reason)
bodies.append(response.read())
if response.status >= HTTP_INTERNAL_SERVER_ERROR:
if response.status == HTTP_INSUFFICIENT_STORAGE:
self.app.error_limit(conn.node,
_('ERROR Insufficient Storage'))
elif response.status >= HTTP_INTERNAL_SERVER_ERROR:
self.app.error_occurred(
conn.node,
_('ERROR %(status)d %(body)s From Object Server '

View File

@ -452,6 +452,12 @@ class Application(object):
{'msg': msg, 'ip': node['ip'],
'port': node['port'], 'device': node['device']})
def _incr_node_errors(self, node):
node_key = self._error_limit_node_key(node)
error_stats = self._error_limiting.setdefault(node_key, {})
error_stats['errors'] = error_stats.get('errors', 0) + 1
error_stats['last_error'] = time()
def error_occurred(self, node, msg):
"""
Handle logging, and handling of errors.
@ -459,10 +465,7 @@ class Application(object):
:param node: dictionary of node to handle errors for
:param msg: error message
"""
node_key = self._error_limit_node_key(node)
error_stats = self._error_limiting.setdefault(node_key, {})
error_stats['errors'] = error_stats.get('errors', 0) + 1
error_stats['last_error'] = time()
self._incr_node_errors(node)
self.logger.error(_('%(msg)s %(ip)s:%(port)s/%(device)s'),
{'msg': msg, 'ip': node['ip'],
'port': node['port'], 'device': node['device']})
@ -531,6 +534,7 @@ class Application(object):
:param typ: server type
:param additional_info: additional information to log
"""
self._incr_node_errors(node)
self.logger.exception(
_('ERROR with %(type)s server %(ip)s:%(port)s/%(device)s re: '
'%(info)s'),

View File

@ -772,7 +772,25 @@ def fake_http_connect(*code_iter, **kwargs):
@contextmanager
def mocked_http_conn(*args, **kwargs):
requests = []
def capture_requests(ip, port, method, path, headers, qs, ssl):
req = {
'ip': ip,
'port': port,
'method': method,
'path': path,
'headers': headers,
'qs': qs,
'ssl': ssl,
}
requests.append(req)
kwargs.setdefault('give_connect', capture_requests)
fake_conn = fake_http_connect(*args, **kwargs)
fake_conn.requests = requests
with mocklib.patch('swift.common.bufferedhttp.http_connect_raw',
new=fake_conn):
yield fake_conn
left_over_status = list(fake_conn.code_iter)
if left_over_status:
raise AssertionError('left over status %r' % left_over_status)

View File

@ -16,6 +16,8 @@
import mock
import unittest
from eventlet import Timeout
from swift.common.swob import Request
from swift.proxy import server as proxy_server
from swift.proxy.controllers.base import headers_to_container_info
@ -23,17 +25,48 @@ from test.unit import fake_http_connect, FakeRing, FakeMemcache
from swift.common.storage_policy import StoragePolicy
from swift.common.request_helpers import get_sys_meta_prefix
from test.unit import patch_policies
from test.unit import patch_policies, mocked_http_conn, debug_logger
from test.unit.common.ring.test_ring import TestRingBase
from test.unit.proxy.test_server import node_error_count
@patch_policies([StoragePolicy(0, 'zero', True, object_ring=FakeRing())])
class TestContainerController(TestRingBase):
def setUp(self):
TestRingBase.setUp(self)
self.logger = debug_logger()
self.container_ring = FakeRing(max_more_nodes=9)
self.app = proxy_server.Application(None, FakeMemcache(),
logger=self.logger,
account_ring=FakeRing(),
container_ring=FakeRing())
container_ring=self.container_ring)
self.account_info = {
'status': 200,
'container_count': '10',
'total_object_count': '100',
'bytes': '1000',
'meta': {},
'sysmeta': {},
}
class FakeAccountInfoContainerController(
proxy_server.ContainerController):
def account_info(controller, *args, **kwargs):
patch_path = 'swift.proxy.controllers.base.get_info'
with mock.patch(patch_path) as mock_get_info:
mock_get_info.return_value = dict(self.account_info)
return super(FakeAccountInfoContainerController,
controller).account_info(
*args, **kwargs)
_orig_get_controller = self.app.get_controller
def wrapped_get_controller(*args, **kwargs):
with mock.patch('swift.proxy.server.ContainerController',
new=FakeAccountInfoContainerController):
return _orig_get_controller(*args, **kwargs)
self.app.get_controller = wrapped_get_controller
def test_container_info_in_response_env(self):
controller = proxy_server.ContainerController(self.app, 'a', 'c')
@ -123,6 +156,53 @@ class TestContainerController(TestRingBase):
self.assertEqual(context['headers'][user_meta_key], 'bar')
self.assertNotEqual(context['headers']['x-timestamp'], '1.0')
def test_node_errors(self):
self.app.sort_nodes = lambda n: n
for method in ('PUT', 'DELETE', 'POST'):
def test_status_map(statuses, expected):
self.app._error_limiting = {}
req = Request.blank('/v1/a/c', method=method)
with mocked_http_conn(*statuses) as fake_conn:
print 'a' * 50
resp = req.get_response(self.app)
self.assertEqual(resp.status_int, expected)
for req in fake_conn.requests:
self.assertEqual(req['method'], method)
self.assert_(req['path'].endswith('/a/c'))
base_status = [201] * 3
# test happy path
test_status_map(list(base_status), 201)
for i in range(3):
self.assertEqual(node_error_count(
self.app, self.container_ring.devs[i]), 0)
# single node errors and test isolation
for i in range(3):
status_list = list(base_status)
status_list[i] = 503
status_list.append(201)
test_status_map(status_list, 201)
for j in range(3):
expected = 1 if j == i else 0
self.assertEqual(node_error_count(
self.app, self.container_ring.devs[j]), expected)
# timeout
test_status_map((201, Timeout(), 201, 201), 201)
self.assertEqual(node_error_count(
self.app, self.container_ring.devs[1]), 1)
# exception
test_status_map((Exception('kaboom!'), 201, 201, 201), 201)
self.assertEqual(node_error_count(
self.app, self.container_ring.devs[0]), 1)
# insufficient storage
test_status_map((201, 201, 507, 201), 201)
self.assertEqual(node_error_count(
self.app, self.container_ring.devs[2]),
self.app.error_suppression_limit + 1)
if __name__ == '__main__':
unittest.main()

View File

@ -20,6 +20,7 @@ import unittest
from contextlib import contextmanager
import mock
from eventlet import Timeout
import swift
from swift.common import utils, swob
@ -28,6 +29,7 @@ from swift.common.storage_policy import StoragePolicy, POLICIES
from test.unit import FakeRing, FakeMemcache, fake_http_connect, \
debug_logger, patch_policies
from test.unit.proxy.test_server import node_error_count
@contextmanager
@ -51,6 +53,11 @@ def set_http_connect(*args, **kwargs):
class PatchedObjControllerApp(proxy_server.Application):
"""
This patch is just a hook over handle_request to ensure that when
get_controller is called the ObjectController class is patched to
return a (possibly stubbed) ObjectController class.
"""
object_controller = proxy_server.ObjectController
@ -154,6 +161,10 @@ class TestObjController(unittest.TestCase):
return super(FakeContainerInfoObjController,
controller).container_info(*args, **kwargs)
# this is taking advantage of the fact that self.app is a
# PachedObjControllerApp, so handle_response will route into an
# instance of our FakeContainerInfoObjController just by
# overriding the class attribute for object_controller
self.app.object_controller = FakeContainerInfoObjController
def test_PUT_simple(self):
@ -187,6 +198,59 @@ class TestObjController(unittest.TestCase):
resp = req.get_response(self.app)
self.assertEquals(resp.status_int, 400)
def test_PUT_connect_exceptions(self):
object_ring = self.app.get_object_ring(None)
self.app.sort_nodes = lambda n: n # disable shuffle
def test_status_map(statuses, expected):
self.app._error_limiting = {}
req = swob.Request.blank('/v1/a/c/o.jpg', method='PUT',
body='test body')
with set_http_connect(*statuses):
resp = req.get_response(self.app)
self.assertEqual(resp.status_int, expected)
base_status = [201] * 3
# test happy path
test_status_map(list(base_status), 201)
for i in range(3):
self.assertEqual(node_error_count(
self.app, object_ring.devs[i]), 0)
# single node errors and test isolation
for i in range(3):
status_list = list(base_status)
status_list[i] = 503
test_status_map(status_list, 201)
for j in range(3):
self.assertEqual(node_error_count(
self.app, object_ring.devs[j]), 1 if j == i else 0)
# connect errors
test_status_map((201, Timeout(), 201, 201), 201)
self.assertEqual(node_error_count(
self.app, object_ring.devs[1]), 1)
test_status_map((Exception('kaboom!'), 201, 201, 201), 201)
self.assertEqual(node_error_count(
self.app, object_ring.devs[0]), 1)
# expect errors
test_status_map((201, 201, (503, None), 201), 201)
self.assertEqual(node_error_count(
self.app, object_ring.devs[2]), 1)
test_status_map(((507, None), 201, 201, 201), 201)
self.assertEqual(
node_error_count(self.app, object_ring.devs[0]),
self.app.error_suppression_limit + 1)
# response errors
test_status_map(((100, Timeout()), 201, 201), 201)
self.assertEqual(
node_error_count(self.app, object_ring.devs[0]), 1)
test_status_map((201, 201, (100, Exception())), 201)
self.assertEqual(
node_error_count(self.app, object_ring.devs[2]), 1)
test_status_map((201, (100, 507), 201), 201)
self.assertEqual(
node_error_count(self.app, object_ring.devs[1]),
self.app.error_suppression_limit + 1)
def test_GET_simple(self):
req = swift.common.swob.Request.blank('/v1/a/c/o')
with set_http_connect(200):