Merge "Simplify heartbeating by removing use of select()"

This commit is contained in:
Zuul 2020-09-29 15:47:08 +00:00 committed by Gerrit Code Review
commit faeb9441d3
3 changed files with 40 additions and 66 deletions

View File

@ -14,9 +14,7 @@
import collections
import ipaddress
import os
import random
import select
import socket
import threading
import time
@ -79,13 +77,6 @@ class IronicPythonAgentHeartbeater(threading.Thread):
min_jitter_multiplier = 0.3
max_jitter_multiplier = 0.6
# Exponential backoff values used in case of an error. In reality we will
# only wait a portion of either of these delays based on the jitter
# multipliers.
initial_delay = 1.0
max_delay = 300.0
backoff_factor = 2.7
def __init__(self, agent):
"""Initialize the heartbeat thread.
@ -94,39 +85,19 @@ class IronicPythonAgentHeartbeater(threading.Thread):
"""
super(IronicPythonAgentHeartbeater, self).__init__()
self.agent = agent
self.stop_event = threading.Event()
self.api = agent.api_client
self.error_delay = self.initial_delay
self.reader = None
self.writer = None
self.interval = 0
def run(self):
"""Start the heartbeat thread."""
# The first heartbeat happens immediately
LOG.info('starting heartbeater')
interval = 0
LOG.info('Starting heartbeater')
self.agent.set_agent_advertise_addr()
self.reader, self.writer = os.pipe()
p = select.poll()
p.register(self.reader, select.POLLIN)
try:
while True:
if p.poll(interval * 1000):
if os.read(self.reader, 1).decode() == 'a':
break
while not self.stop_event.wait(self.interval):
self.do_heartbeat()
interval_multiplier = random.uniform(
self.min_jitter_multiplier,
self.max_jitter_multiplier)
interval = self.agent.heartbeat_timeout * interval_multiplier
log_msg = 'sleeping before next heartbeat, interval: {}'
LOG.info(log_msg.format(interval))
finally:
os.close(self.reader)
os.close(self.writer)
self.reader = None
self.writer = None
eventlet.sleep(0)
def do_heartbeat(self):
"""Send a heartbeat to Ironic."""
@ -137,27 +108,27 @@ class IronicPythonAgentHeartbeater(threading.Thread):
advertise_protocol=self.agent.advertise_protocol,
generated_cert=self.agent.generated_cert,
)
self.error_delay = self.initial_delay
LOG.info('heartbeat successful')
except errors.HeartbeatConflictError:
LOG.warning('conflict error sending heartbeat to {}'.format(
self.agent.api_url))
self.error_delay = min(self.error_delay * self.backoff_factor,
self.max_delay)
except Exception:
LOG.exception('error sending heartbeat to {}'.format(
self.agent.api_url))
self.error_delay = min(self.error_delay * self.backoff_factor,
self.max_delay)
finally:
interval_multiplier = random.uniform(self.min_jitter_multiplier,
self.max_jitter_multiplier)
self.interval = self.agent.heartbeat_timeout * interval_multiplier
log_msg = 'sleeping before next heartbeat, interval: {0}'
LOG.info(log_msg.format(self.interval))
def force_heartbeat(self):
os.write(self.writer, b'b')
self.do_heartbeat()
def stop(self):
"""Stop the heartbeat thread."""
if self.writer is not None:
LOG.info('stopping heartbeater')
os.write(self.writer, b'a')
self.stop_event.set()
return self.join()

View File

@ -61,21 +61,19 @@ class TestHeartbeater(ironic_agent_base.IronicAgentTest):
hardware.HardwareManager)
self.heartbeater.stop_event = mock.Mock()
@mock.patch('os.read', autospec=True)
@mock.patch('select.poll', autospec=True)
@mock.patch('ironic_python_agent.agent._time', autospec=True)
@mock.patch('random.uniform', autospec=True)
def test_heartbeat(self, mock_uniform, mock_time, mock_poll, mock_read):
def test_heartbeat(self, mock_uniform, mock_time):
time_responses = []
uniform_responses = []
heartbeat_responses = []
poll_responses = []
expected_poll_calls = []
wait_responses = []
expected_stop_calls = []
# FIRST RUN:
# initial delay is 0
expected_poll_calls.append(mock.call(0))
poll_responses.append(False)
expected_stop_calls.append(mock.call(0))
wait_responses.append(False)
# next heartbeat due at t=100
heartbeat_responses.append(100)
# random interval multiplier is 0.5
@ -85,8 +83,8 @@ class TestHeartbeater(ironic_agent_base.IronicAgentTest):
# SECOND RUN:
# 50 * .5 = 25
expected_poll_calls.append(mock.call(1000 * 25.0))
poll_responses.append(False)
expected_stop_calls.append(mock.call(25.0))
wait_responses.append(False)
# next heartbeat due at t=180
heartbeat_responses.append(180)
# random interval multiplier is 0.4
@ -96,36 +94,34 @@ class TestHeartbeater(ironic_agent_base.IronicAgentTest):
# THIRD RUN:
# 50 * .4 = 20
expected_poll_calls.append(mock.call(1000 * 20.0))
poll_responses.append(False)
expected_stop_calls.append(mock.call(20.0))
wait_responses.append(False)
# this heartbeat attempt fails
heartbeat_responses.append(Exception('uh oh!'))
# we check the time to generate a fake deadline, now t=125
time_responses.append(125)
# random interval multiplier is 0.5
uniform_responses.append(0.5)
# we check the time to generate a fake deadline, now t=125
time_responses.append(125)
# time is now 125.5
time_responses.append(125.5)
# FOURTH RUN:
# 50 * .5 = 25
expected_poll_calls.append(mock.call(1000 * 25.0))
# 50 * .5 = 20
expected_stop_calls.append(mock.call(25.0))
# Stop now
poll_responses.append(True)
mock_read.return_value = b'a'
wait_responses.append(True)
# Hook it up and run it
mock_time.side_effect = time_responses
mock_uniform.side_effect = uniform_responses
self.mock_agent.heartbeat_timeout = 50
self.heartbeater.api.heartbeat.side_effect = heartbeat_responses
mock_poll.return_value.poll.side_effect = poll_responses
self.heartbeater.stop_event.wait.side_effect = wait_responses
self.heartbeater.run()
# Validate expectations
self.assertEqual(expected_poll_calls,
mock_poll.return_value.poll.call_args_list)
self.assertEqual(2.7, self.heartbeater.error_delay)
self.assertEqual(expected_stop_calls,
self.heartbeater.stop_event.wait.call_args_list)
@mock.patch.object(hardware, '_md_scan_and_assemble', lambda: None)

View File

@ -0,0 +1,7 @@
---
upgrade:
- |
IPA heartbeat intervals now rely on accurate clock time. Any clean or
deploy steps which attempt to sync the clock may cause heartbeats to not
be emitted. IPA syncs time at startup and shutdown, so these steps should
not be required.