Merge "Add allocation_conflict_retry_count conf setting"

This commit is contained in:
Zuul
2019-11-14 21:07:05 +00:00
committed by Gerrit Code Review
5 changed files with 61 additions and 32 deletions

View File

@@ -72,6 +72,14 @@ a project or user identifier for the consumer. In cleaning up the data
modeling, we no longer allow missing project and user information. If an older modeling, we no longer allow missing project and user information. If an older
client makes an allocation, we'll use this in place of the information it client makes an allocation, we'll use this in place of the information it
doesn't provide. doesn't provide.
"""),
cfg.IntOpt(
'allocation_conflict_retry_count',
default=10,
help="""
The number of times to retry, server-side, writing allocations when there is
a resource provider generation conflict. Raising this value may be useful
when many concurrent allocations to the same resource provider are expected.
"""), """),
] ]

View File

@@ -35,10 +35,6 @@ _USER_TBL = models.User.__table__
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
# The number of times to retry set_allocations if there has
# been a resource provider (not consumer) generation coflict.
RP_CONFLICT_RETRY_COUNT = 10
class Allocation(object): class Allocation(object):
@@ -499,7 +495,7 @@ def replace_all(context, alloc_list):
# and try again. For sake of simplicity (and because we don't have # and try again. For sake of simplicity (and because we don't have
# easy access to the information) we reload all the resource # easy access to the information) we reload all the resource
# providers that may be present. # providers that may be present.
retries = RP_CONFLICT_RETRY_COUNT retries = context.config.placement.allocation_conflict_retry_count
while retries: while retries:
retries -= 1 retries -= 1
try: try:
@@ -526,7 +522,7 @@ def replace_all(context, alloc_list):
# information from the allocations is not coherent as this # information from the allocations is not coherent as this
# could be multiple consumers and providers. # could be multiple consumers and providers.
LOG.warning('Exceeded retry limit of %d on allocations write', LOG.warning('Exceeded retry limit of %d on allocations write',
RP_CONFLICT_RETRY_COUNT) context.config.placement.allocation_conflict_retry_count)
raise exception.ResourceProviderConcurrentUpdateDetected() raise exception.ResourceProviderConcurrentUpdateDetected()

View File

@@ -624,40 +624,42 @@ class TestAllocationListCreateDelete(tb.PlacementDbBaseTestCase):
] ]
# Make sure the right exception happens when the retry loop expires. # Make sure the right exception happens when the retry loop expires.
with mock.patch.object(alloc_obj, 'RP_CONFLICT_RETRY_COUNT', 0): self.conf_fixture.config(allocation_conflict_retry_count=0,
self.assertRaises( group='placement')
exception.ResourceProviderConcurrentUpdateDetected, self.assertRaises(
alloc_obj.replace_all, self.ctx, alloc_list) exception.ResourceProviderConcurrentUpdateDetected,
mock_log.warning.assert_called_with( alloc_obj.replace_all, self.ctx, alloc_list)
'Exceeded retry limit of %d on allocations write', 0) mock_log.warning.assert_called_with(
'Exceeded retry limit of %d on allocations write', 0)
# Make sure the right thing happens after a small number of failures. # Make sure the right thing happens after a small number of failures.
# There's a bit of mock magic going on here to enusre that we can # There's a bit of mock magic going on here to enusre that we can
# both do some side effects on _set_allocations as well as have the # both do some side effects on _set_allocations as well as have the
# real behavior. Two generation conflicts and then a success. # real behavior. Two generation conflicts and then a success.
mock_log.reset_mock() mock_log.reset_mock()
with mock.patch.object(alloc_obj, 'RP_CONFLICT_RETRY_COUNT', 3): self.conf_fixture.config(allocation_conflict_retry_count=3,
unmocked_set = alloc_obj._set_allocations group='placement')
with mock.patch('placement.objects.allocation.' unmocked_set = alloc_obj._set_allocations
'_set_allocations') as mock_set: with mock.patch('placement.objects.allocation.'
exceptions = iter([ '_set_allocations') as mock_set:
exception.ResourceProviderConcurrentUpdateDetected(), exceptions = iter([
exception.ResourceProviderConcurrentUpdateDetected(), exception.ResourceProviderConcurrentUpdateDetected(),
]) exception.ResourceProviderConcurrentUpdateDetected(),
])
def side_effect(*args, **kwargs): def side_effect(*args, **kwargs):
try: try:
raise next(exceptions) raise next(exceptions)
except StopIteration: except StopIteration:
return unmocked_set(*args, **kwargs) return unmocked_set(*args, **kwargs)
mock_set.side_effect = side_effect mock_set.side_effect = side_effect
alloc_obj.replace_all(self.ctx, alloc_list) alloc_obj.replace_all(self.ctx, alloc_list)
self.assertEqual(2, mock_log.debug.call_count) self.assertEqual(2, mock_log.debug.call_count)
mock_log.debug.called_with( mock_log.debug.called_with(
'Retrying allocations write on resource provider ' 'Retrying allocations write on resource provider '
'generation conflict') 'generation conflict')
self.assertEqual(3, mock_set.call_count) self.assertEqual(3, mock_set.call_count)
# Confirm we're using a different rp object after the change # Confirm we're using a different rp object after the change
# and that it has a higher generation. # and that it has a higher generation.

View File

@@ -79,6 +79,11 @@ class APIFixture(fixture.GabbiFixture):
self.placement_db_fixture.setUp() self.placement_db_fixture.setUp()
self.context = context.RequestContext() self.context = context.RequestContext()
# Some database interaction methods require access to the oslo config
# via the context. Within the WSGI application this is taken care of
# but here in the fixtures we use some of those methods to create
# entities.
self.context.config = self.conf_fixture.conf
# Set default policy opts, otherwise the deploy module can # Set default policy opts, otherwise the deploy module can
# NoSuchOptError. # NoSuchOptError.

View File

@@ -0,0 +1,18 @@
---
fixes:
- |
When a single resource provider receives many concurrent allocation writes,
retries may be performed server side when there is a resource provider
generation conflict. When those retries are all consumed, the client
receives an HTTP 409 response and may choose to try the request again.
In an environment where high levels of concurrent allocation writes are
common, such as a busy clustered hypervisor, the default retry count may be
too low. See story 2006467_
A new configuation setting,
``[placement]/allocation_conflict_retry_count``, has been added to address
this situation. It defines the number of times to retry, server-side,
writing allocations when there is a resource provider generation conflict.
.. _2006467: https://storyboard.openstack.org/#!/story/2006467