Add basic retries for inspection

A transitory connection failure, such as one caused by
a port being held down for traffic forwarding, can experience
intermittent connectivity failures which result in failed
introspections.

Now the agent retries.

Change-Id: I72c5e3aca000d3854a17f8a461b1a2935e5c0d9b
This commit is contained in:
Julia Kreger 2020-08-10 15:26:29 -07:00
parent f45d2cc0e7
commit bb27badf76
3 changed files with 29 additions and 2 deletions

View File

@ -24,6 +24,7 @@ from oslo_serialization import jsonutils
from oslo_utils import excutils
import requests
import stevedore
import tenacity
from ironic_python_agent import config
from ironic_python_agent import encoding
@ -115,6 +116,17 @@ def inspect():
return resp.get('uuid')
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
requests.exceptions.ConnectionError),
stop=tenacity.stop_after_attempt(5),
wait=tenacity.wait_fixed(5),
reraise=True)
def _post_to_inspector(url, data, verify, cert):
return requests.post(CONF.inspection_callback_url, data=data,
verify=verify, cert=cert)
def call_inspector(data, failures):
"""Post data to inspector."""
data['error'] = failures.get_error()
@ -127,8 +139,8 @@ def call_inspector(data, failures):
data = encoder.encode(data)
verify, cert = utils.get_ssl_client_options(CONF)
resp = requests.post(CONF.inspection_callback_url, data=data,
verify=verify, cert=cert)
resp = _post_to_inspector(CONF.inspection_callback_url, data=data,
verify=verify, cert=cert)
if resp.status_code >= 400:
LOG.error('inspector %s error %d: %s, proceeding with lookup',
CONF.inspection_callback_url,

View File

@ -191,6 +191,15 @@ class TestCallInspector(base.IronicAgentTest):
data='{"data": 42, "error": null}')
self.assertIsNone(res)
def test_inspector_retries(self, mock_post):
mock_post.side_effect = requests.exceptions.ConnectionError
failures = utils.AccumulatedFailures()
data = collections.OrderedDict(data=42)
self.assertRaises(requests.exceptions.ConnectionError,
inspector.call_inspector,
data, failures)
self.assertEqual(5, mock_post.call_count)
class BaseDiscoverTest(base.IronicAgentTest):
def setUp(self):

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Fixes an issue where intermittent or transitory connection issues can cause
inspection to fail. The ramdisk now retries to report to inspector a total
of five times.