Improve exponential backoff for wrap_db_retry
The @oslo_db.api.wrap_db_retry is used for db.api methods retrying. If db_error raised, the decorator help us to call the api methods again after a few seconds(accurately, is 2**retry_times seconds). If the db_error is deadlock error, the old wrap_db_retry is not so suitable anymore. As we know, some deadlocks cause because we call some methods(transactions) concurrently. If we only retry after stable 2**retry_times seconds, we will recall the method concurrently again. In order to minimize the chance of regenerating a deadlock and reduce the average sleep time, we propose to add some random jitter to the delay period by default when the deadlock error is detected. Change-Id: I206745708570f1f292529ff58eee9b83fc09a9f2 Closes-bug: #1737869
This commit is contained in:
parent
85cf42e841
commit
4c20534179
@ -24,6 +24,7 @@ API methods.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
||||
@ -103,15 +104,21 @@ class wrap_db_retry(object):
|
||||
|
||||
:param exception_checker: checks if an exception should trigger a retry
|
||||
:type exception_checker: callable
|
||||
|
||||
:param jitter: determine increase retry interval use jitter or not, jitter
|
||||
is always interpreted as True for a DBDeadlockError
|
||||
:type jitter: bool
|
||||
"""
|
||||
|
||||
def __init__(self, retry_interval=1, max_retries=20,
|
||||
inc_retry_interval=True,
|
||||
max_retry_interval=10, retry_on_disconnect=False,
|
||||
retry_on_deadlock=False,
|
||||
exception_checker=lambda exc: False):
|
||||
exception_checker=lambda exc: False,
|
||||
jitter=False):
|
||||
super(wrap_db_retry, self).__init__()
|
||||
|
||||
self.jitter = jitter
|
||||
self.db_error = (exception.RetryRequest, )
|
||||
# default is that we re-raise anything unexpected
|
||||
self.exception_checker = exception_checker
|
||||
@ -127,7 +134,7 @@ class wrap_db_retry(object):
|
||||
def __call__(self, f):
|
||||
@six.wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
next_interval = self.retry_interval
|
||||
sleep_time = next_interval = self.retry_interval
|
||||
remaining = self.max_retries
|
||||
|
||||
while True:
|
||||
@ -150,12 +157,20 @@ class wrap_db_retry(object):
|
||||
# NOTE(vsergeyev): We are using patched time module, so
|
||||
# this effectively yields the execution
|
||||
# context to another green thread.
|
||||
time.sleep(next_interval)
|
||||
time.sleep(sleep_time)
|
||||
if self.inc_retry_interval:
|
||||
next_interval = min(
|
||||
next_interval * 2,
|
||||
self.max_retry_interval
|
||||
)
|
||||
# NOTE(jiangyikun): In order to minimize the chance of
|
||||
# regenerating a deadlock and reduce the average sleep
|
||||
# time, we are using jitter by default when the
|
||||
# deadlock is detected. With the jitter,
|
||||
# sleep_time = [0, next_interval), otherwise, without
|
||||
# the jitter, sleep_time = next_interval.
|
||||
if isinstance(e, exception.DBDeadlock):
|
||||
jitter = True
|
||||
else:
|
||||
jitter = self.jitter
|
||||
sleep_time, next_interval = self._get_inc_interval(
|
||||
next_interval, jitter)
|
||||
remaining -= 1
|
||||
|
||||
return wrapper
|
||||
@ -170,6 +185,18 @@ class wrap_db_retry(object):
|
||||
return True
|
||||
return self.exception_checker(exc)
|
||||
|
||||
def _get_inc_interval(self, n, jitter):
|
||||
# NOTE(jiangyikun): The "n" help us to record the 2 ** retry_times.
|
||||
# The "sleep_time" means the real time to sleep:
|
||||
# - Without jitter: sleep_time = 2 ** retry_times = n
|
||||
# - With jitter: sleep_time = [0, 2 ** retry_times) < n
|
||||
n = n * 2
|
||||
if jitter:
|
||||
sleep_time = random.uniform(0, n)
|
||||
else:
|
||||
sleep_time = n
|
||||
return min(sleep_time, self.max_retry_interval), n
|
||||
|
||||
|
||||
class DBAPI(object):
|
||||
"""Initialize the chosen DB API backend.
|
||||
|
@ -253,3 +253,59 @@ class DBRetryRequestCase(DBAPITestCase):
|
||||
|
||||
self.assertRaises(AttributeError, some_method)
|
||||
self.assertFalse(mock_log.called)
|
||||
|
||||
@mock.patch('oslo_db.api.time.sleep', return_value=None)
|
||||
def test_retry_wrapper_deadlock(self, mock_sleep):
|
||||
|
||||
# Tests that jitter is False, if the retry wrapper hits a
|
||||
# non-deadlock error
|
||||
@api.wrap_db_retry(max_retries=1, retry_on_deadlock=True)
|
||||
def some_method_no_deadlock():
|
||||
raise exception.RetryRequest(ValueError())
|
||||
with mock.patch(
|
||||
'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
|
||||
mock_get.return_value = 2, 2
|
||||
self.assertRaises(ValueError, some_method_no_deadlock)
|
||||
mock_get.assert_called_once_with(1, False)
|
||||
|
||||
# Tests that jitter is True, if the retry wrapper hits a deadlock
|
||||
# error.
|
||||
@api.wrap_db_retry(max_retries=1, retry_on_deadlock=True)
|
||||
def some_method_deadlock():
|
||||
raise exception.DBDeadlock('test')
|
||||
with mock.patch(
|
||||
'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
|
||||
mock_get.return_value = 0.1, 2
|
||||
self.assertRaises(exception.DBDeadlock, some_method_deadlock)
|
||||
mock_get.assert_called_once_with(1, True)
|
||||
|
||||
# Tests that jitter is True, if the jitter is enable by user
|
||||
@api.wrap_db_retry(max_retries=1, retry_on_deadlock=True, jitter=True)
|
||||
def some_method_no_deadlock_exp():
|
||||
raise exception.RetryRequest(ValueError())
|
||||
with mock.patch(
|
||||
'oslo_db.api.wrap_db_retry._get_inc_interval') as mock_get:
|
||||
mock_get.return_value = 0.1, 2
|
||||
self.assertRaises(ValueError, some_method_no_deadlock_exp)
|
||||
mock_get.assert_called_once_with(1, True)
|
||||
|
||||
def test_wrap_db_retry_get_interval(self):
|
||||
x = api.wrap_db_retry(max_retries=5, retry_on_deadlock=True,
|
||||
max_retry_interval=11)
|
||||
self.assertEqual(11, x.max_retry_interval)
|
||||
for i in (1, 2, 4):
|
||||
# With jitter: sleep_time = [0, 2 ** retry_times)
|
||||
sleep_time, n = x._get_inc_interval(i, True)
|
||||
self.assertEqual(2 * i, n)
|
||||
self.assertTrue(2 * i > sleep_time)
|
||||
# Without jitter: sleep_time = 2 ** retry_times
|
||||
sleep_time, n = x._get_inc_interval(i, False)
|
||||
self.assertEqual(2 * i, n)
|
||||
self.assertEqual(2 * i, sleep_time)
|
||||
for i in (8, 16, 32):
|
||||
sleep_time, n = x._get_inc_interval(i, False)
|
||||
self.assertEqual(x.max_retry_interval, sleep_time)
|
||||
self.assertEqual(2 * i, n)
|
||||
sleep_time, n = x._get_inc_interval(i, True)
|
||||
self.assertTrue(x.max_retry_interval >= sleep_time)
|
||||
self.assertEqual(2 * i, n)
|
||||
|
Loading…
Reference in New Issue
Block a user