From 6616254449baf8e86905dbec3299acfbcf85abe6 Mon Sep 17 00:00:00 2001 From: Adit Sarfaty Date: Thu, 28 Mar 2019 11:32:39 +0200 Subject: [PATCH] Improve wait-for-realization - Use tenacity retry methos with random wait - Add config for sleep time between retries Change-Id: Iec470d5019b554435d7797a4e94ae73ef6179aa4 --- vmware_nsxlib/v3/config.py | 8 ++++-- vmware_nsxlib/v3/policy/core_resources.py | 34 ++++++++++------------- vmware_nsxlib/v3/utils.py | 11 ++++++-- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/vmware_nsxlib/v3/config.py b/vmware_nsxlib/v3/config.py index 7d462fa1..198f9f53 100644 --- a/vmware_nsxlib/v3/config.py +++ b/vmware_nsxlib/v3/config.py @@ -84,7 +84,8 @@ class NsxLibConfig(object): not supported by the policy manager api. :param realization_max_attempts: Maximum number of times to retry while waiting for a resource to be realized. -. + :param realization_wait_sec: Number of seconds to wait between attempts + for a resource to be realized. """ def __init__(self, @@ -111,8 +112,8 @@ class NsxLibConfig(object): rate_limit_retry=True, cluster_unavailable_retry=False, allow_passthrough=False, - # TODO(asarfaty): reduce the default once plugin is stable - realization_max_attempts=50): + realization_max_attempts=50, + realization_wait_sec=1.0): self.nsx_api_managers = nsx_api_managers self._username = username @@ -137,6 +138,7 @@ class NsxLibConfig(object): self.cluster_unavailable_retry = cluster_unavailable_retry self.allow_passthrough = allow_passthrough self.realization_max_attempts = realization_max_attempts + self.realization_wait_sec = realization_wait_sec if dhcp_profile_uuid: # this is deprecated, and never used. diff --git a/vmware_nsxlib/v3/policy/core_resources.py b/vmware_nsxlib/v3/policy/core_resources.py index 39f810a0..b96f2822 100644 --- a/vmware_nsxlib/v3/policy/core_resources.py +++ b/vmware_nsxlib/v3/policy/core_resources.py @@ -215,33 +215,29 @@ class NsxPolicyResourceBase(object): Return the realization info, or raise an error """ if sleep is None: - sleep = 0.5 + sleep = self.nsxlib_config.realization_wait_sec if max_attempts is None: max_attempts = self.nsxlib_config.realization_max_attempts - test_num = 0 - while test_num < max_attempts: + @utils.retry_upon_none_result(max_attempts, delay=sleep, random=True) + def get_info(): info = self._get_realization_info( resource_def, entity_type=entity_type) if info and info['state'] == constants.STATE_REALIZED: - # TODO(asarfaty): why sometimes realization takes so long? - if test_num > 5: - LOG.warning("Waited %(time)s seconds for realization of " - "%(type)s %(id)s", - {'time': test_num * sleep, - 'type': resource_def.resource_type(), - 'id': resource_def.get_id()}) return info - eventlet.sleep(sleep) - test_num += 1 - err_msg = (_("%(type)s ID %(id)s was not realized after %(attempts)s " - "attempts with %(sleep)s seconds sleep") % - {'type': resource_def.resource_type(), - 'id': resource_def.get_id(), - 'attempts': max_attempts, - 'sleep': sleep}) - raise exceptions.ManagerError(details=err_msg) + try: + return get_info() + except Exception: + # max retries reached + err_msg = (_("%(type)s ID %(id)s was not realized after " + "%(attempts)s attempts with %(sleep)s seconds " + "sleep") % + {'type': resource_def.resource_type(), + 'id': resource_def.get_id(), + 'attempts': max_attempts, + 'sleep': sleep}) + raise exceptions.ManagerError(details=err_msg) def _get_extended_attr_from_realized_info(self, realization_info, requested_attr): diff --git a/vmware_nsxlib/v3/utils.py b/vmware_nsxlib/v3/utils.py index d08380ee..e86d1739 100644 --- a/vmware_nsxlib/v3/utils.py +++ b/vmware_nsxlib/v3/utils.py @@ -184,11 +184,16 @@ def retry_random_upon_exception(exc, delay=0.5, max_delay=5, before=_log_before_retry, after=_log_after_retry) -def retry_upon_none_result(max_attempts, delay=0.5, max_delay=2): +def retry_upon_none_result(max_attempts, delay=0.5, max_delay=2, random=False): + if random: + wait_func = tenacity.wait_exponential( + multiplier=delay, max=max_delay) + else: + wait_func = tenacity.wait_random_exponential( + multiplier=delay, max=max_delay) return tenacity.retry(reraise=True, retry=tenacity.retry_if_result(lambda x: x is None), - wait=tenacity.wait_exponential( - multiplier=delay, max=max_delay), + wait=wait_func, stop=tenacity.stop_after_attempt(max_attempts), before=_log_before_retry, after=_log_after_retry)