proxy-server: include suppression time in error limit log

Also, add some more test coverage and tidy up some tests.

Change-Id: Icf3a1df87d52dc5f95e15e676da229a3323326a2
This commit is contained in:
Alistair Coles 2022-11-10 13:00:08 +00:00
parent c82c73122b
commit 7a0118fca0
3 changed files with 55 additions and 78 deletions

View File

@ -675,8 +675,8 @@ class Application(object):
self.error_limiter.limit(node) self.error_limiter.limit(node)
self.logger.increment('error_limiter.forced_limit') self.logger.increment('error_limiter.forced_limit')
self.logger.error( self.logger.error(
'Node will be error limited from now: %s, error: %s', 'Node will be error limited for %.2fs: %s, error: %s',
node_to_string(node), self.error_limiter.suppression_interval, node_to_string(node),
msg) msg)
def _error_increment(self, node): def _error_increment(self, node):
@ -689,8 +689,8 @@ class Application(object):
if self.error_limiter.increment(node): if self.error_limiter.increment(node):
self.logger.increment('error_limiter.incremented_limit') self.logger.increment('error_limiter.incremented_limit')
self.logger.error( self.logger.error(
'Node will be error limited from now: %s', 'Node will be error limited for %.2fs: %s',
node_to_string(node)) self.error_limiter.suppression_interval, node_to_string(node))
def error_occurred(self, node, msg): def error_occurred(self, node, msg):
""" """

View File

@ -71,24 +71,21 @@ class TestErrorLimiter(unittest.TestCase):
def test_increment(self): def test_increment(self):
node = self.ring.devs[-1] node = self.ring.devs[-1]
limiter = ErrorLimiter(suppression_interval=60, suppression_limit=10) limiter = ErrorLimiter(suppression_interval=60, suppression_limit=10)
last_errors = 0
node_key = limiter.node_key(node) node_key = limiter.node_key(node)
for _ in range(limiter.suppression_limit): for i in range(limiter.suppression_limit):
self.assertFalse(limiter.increment(node)) self.assertFalse(limiter.increment(node))
node_errors = limiter.stats.get(node_key) self.assertEqual(i + 1, limiter.stats.get(node_key)['errors'])
self.assertGreater(node_errors['errors'], last_errors)
self.assertFalse(limiter.is_limited(node)) self.assertFalse(limiter.is_limited(node))
last_errors = node_errors['errors']
# One more to make sure it is > suppression_limit # A few more to make sure it is > suppression_limit
for i in range(1, 4):
self.assertTrue(limiter.increment(node)) self.assertTrue(limiter.increment(node))
node_errors = limiter.stats.get(node_key) self.assertEqual(limiter.suppression_limit + i,
self.assertEqual(limiter.suppression_limit + 1, limiter.stats.get(node_key)['errors'])
node_errors['errors'])
self.assertTrue(limiter.is_limited(node)) self.assertTrue(limiter.is_limited(node))
last_time = node_errors['last_error']
# Simulate time with no errors have gone by. # Simulate time with no errors have gone by.
last_time = limiter.stats.get(node_key)['last_error']
now = last_time + limiter.suppression_interval + 1 now = last_time + limiter.suppression_interval + 1
with mock.patch('swift.common.error_limiter.time', with mock.patch('swift.common.error_limiter.time',
return_value=now): return_value=now):

View File

@ -1182,7 +1182,9 @@ class TestProxyServer(unittest.TestCase):
expected_info = additional_info.decode('utf8') expected_info = additional_info.decode('utf8')
else: else:
expected_info = additional_info expected_info = additional_info
for i in range(suppression_limit):
incremented_limit_samples = []
for i in range(suppression_limit + 1):
try: try:
raise Exception('kaboom1!') raise Exception('kaboom1!')
except Exception as err: except Exception as err:
@ -1190,42 +1192,23 @@ class TestProxyServer(unittest.TestCase):
app.exception_occurred( app.exception_occurred(
node, 'server-type', additional_info) node, 'server-type', additional_info)
self.assertEqual(i + 1, node_error_count(app, node)) self.assertEqual(i + 1, node_error_count(app, node))
line = logger.get_lines_for_level('error')[-1] line = logger.get_lines_for_level('error')[i]
self.assertIn('server-type server', line) self.assertIn('server-type server', line)
self.assertIn(expected_info, line) self.assertIn(expected_info, line)
self.assertIn(node['ip'], line) self.assertIn(node['ip'], line)
self.assertIn(str(node['port']), line) self.assertIn(str(node['port']), line)
self.assertIn(node['device'], line) self.assertIn(node['device'], line)
log_args, log_kwargs = logger.log_dict['error'][-1] log_args, log_kwargs = logger.log_dict['error'][i]
self.assertTrue(log_kwargs['exc_info']) self.assertTrue(log_kwargs['exc_info'])
self.assertIs(caught_exc, log_kwargs['exc_info'][1]) self.assertIs(caught_exc, log_kwargs['exc_info'][1])
incremented_limit_samples.append(
self.assertEqual(0, logger.get_increment_counts().get( logger.get_increment_counts().get(
'error_limiter.incremented_limit', 0)) 'error_limiter.incremented_limit', 0))
self.assertEqual([0] * 10 + [1], incremented_limit_samples)
# One more error to go beyond suppression_limit.
try:
raise Exception('kaboom1!')
except Exception as err:
caught_exc = err
app.exception_occurred(node, 'server-type', additional_info)
self.assertEqual(suppression_limit + 1,
node_error_count(app, node))
lines = logger.get_lines_for_level('error')
self.assertIn('server-type server', lines[-2])
self.assertIn(expected_info, lines[-2])
self.assertIn(node['ip'], lines[-2])
self.assertIn(str(node['port']), lines[-2])
self.assertIn(node['device'], lines[-2])
log_args, log_kwargs = logger.log_dict['error'][-2]
self.assertTrue(log_kwargs['exc_info'])
self.assertIs(caught_exc, log_kwargs['exc_info'][1])
self.assertEqual( self.assertEqual(
('Node will be error limited from now: %s' % ('Node will be error limited for 60.00s: %s' %
node_to_string(node)), node_to_string(node)),
lines[-1]) logger.get_lines_for_level('error')[suppression_limit + 1])
self.assertEqual(1, logger.get_increment_counts().get(
'error_limiter.incremented_limit', 0))
do_test('success') do_test('success')
do_test('succès') do_test('succès')
@ -1248,33 +1231,36 @@ class TestProxyServer(unittest.TestCase):
expected_msg = msg.decode('utf8') expected_msg = msg.decode('utf8')
else: else:
expected_msg = msg expected_msg = msg
for i in range(suppression_limit): incremented_limit_samples = []
for i in range(suppression_limit + 1):
app.error_occurred(node, msg) app.error_occurred(node, msg)
self.assertEqual(i + 1, node_error_count(app, node)) self.assertEqual(i + 1, node_error_count(app, node))
line = logger.get_lines_for_level('error')[-1] line = logger.get_lines_for_level('error')[i]
self.assertIn(expected_msg, line) self.assertIn(expected_msg, line)
self.assertIn(node['ip'], line) self.assertIn(node_to_string(node), line)
self.assertIn(str(node['port']), line) incremented_limit_samples.append(
self.assertIn(node['device'], line) logger.get_increment_counts().get(
self.assertEqual(0, logger.get_increment_counts().get(
'error_limiter.incremented_limit', 0)) 'error_limiter.incremented_limit', 0))
# One more error to go beyond suppression_limit. self.assertEqual([0] * 10 + [1], incremented_limit_samples)
app.error_occurred(node, msg)
self.assertEqual(suppression_limit + 1,
node_error_count(app, node))
lines = logger.get_lines_for_level('error')
self.assertIn(expected_msg, lines[-2])
self.assertIn(node['ip'], lines[-2])
self.assertIn(str(node['port']), lines[-2])
self.assertIn(node['device'], lines[-2])
self.assertEqual( self.assertEqual(
('Node will be error limited from now: %s' % ('Node will be error limited for 60.00s: %s' %
node_to_string(node)), node_to_string(node)),
lines[-1]) logger.get_lines_for_level('error')[-1])
self.assertEqual(1, logger.get_increment_counts().get(
# error limiting is extended if another error occurs
app.error_occurred(node, msg)
self.assertEqual(suppression_limit + 2,
node_error_count(app, node))
line = logger.get_lines_for_level('error')[-2]
self.assertIn(expected_msg, line)
self.assertIn(node_to_string(node), line)
self.assertEqual(2, logger.get_increment_counts().get(
'error_limiter.incremented_limit', 0)) 'error_limiter.incremented_limit', 0))
self.assertEqual(
('Node will be error limited for 60.00s: %s' %
node_to_string(node)),
logger.get_lines_for_level('error')[-1])
do_test('success') do_test('success')
do_test('succès') do_test('succès')
@ -1368,9 +1354,8 @@ class TestProxyServer(unittest.TestCase):
error_lines = app.logger.get_lines_for_level('error') error_lines = app.logger.get_lines_for_level('error')
self.assertEqual(1, len(error_lines)) self.assertEqual(1, len(error_lines))
self.assertEqual( self.assertEqual(
'Node will be error limited from now: 10.0.0.0:1000/sda, ' 'Node will be error limited for 60.00s: 10.0.0.0:1000/sda, '
'error: ERROR Insufficient Storage', 'error: ERROR Insufficient Storage', error_lines[0])
error_lines[0])
self.assertEqual(11, node_error_count(app, node)) self.assertEqual(11, node_error_count(app, node))
self.assertTrue(app.error_limited(node)) self.assertTrue(app.error_limited(node))
@ -1381,9 +1366,8 @@ class TestProxyServer(unittest.TestCase):
error_lines = app.logger.get_lines_for_level('error') error_lines = app.logger.get_lines_for_level('error')
self.assertEqual(1, len(error_lines)) self.assertEqual(1, len(error_lines))
self.assertEqual( self.assertEqual(
'Node will be error limited from now: 10.0.0.0:1000/sda, ' 'Node will be error limited for 60.00s: 10.0.0.0:1000/sda, '
'error: ERROR Insufficient Storage', 'error: ERROR Insufficient Storage', error_lines[0])
error_lines[0])
self.assertEqual(11, node_error_count(app, node)) self.assertEqual(11, node_error_count(app, node))
self.assertTrue(app.error_limited(node)) self.assertTrue(app.error_limited(node))
@ -3475,10 +3459,8 @@ class TestReplicatedObjectController(
'error_limiter.forced_limit', 0)) 'error_limiter.forced_limit', 0))
line = self.logger.get_lines_for_level('error')[-1] line = self.logger.get_lines_for_level('error')[-1]
self.assertEqual( self.assertEqual(
('Node will be error limited from now: %s, error: %s' % ('Node will be error limited for 60.00s: %s, error: %s'
(node_to_string(error_node), % (node_to_string(error_node), 'test')), line)
'test')),
line)
# no error limited checking yet. # no error limited checking yet.
self.assertEqual( self.assertEqual(
@ -5339,10 +5321,8 @@ class TestReplicatedObjectController(
'error_limiter.forced_limit', 0)) 'error_limiter.forced_limit', 0))
line = self.logger.get_lines_for_level('error')[-1] line = self.logger.get_lines_for_level('error')[-1]
self.assertEqual( self.assertEqual(
('Node will be error limited from now: %s, error: %s' % ('Node will be error limited for 60.00s: %s, error: %s'
(node_to_string(first_nodes[0]), % (node_to_string(first_nodes[0]), 'test')), line)
'test')),
line)
second_nodes = list(self.app.iter_nodes( second_nodes = list(self.app.iter_nodes(
object_ring, 0, self.logger)) object_ring, 0, self.logger))