Improve Health Manager error handling
It was reported that the Health Manager process could be crashed with malformed heartbeat packets. I was unable to reproduce the issue (I suspect oslo_utils fixed the root cause), but I could see how this could happen and our error handling could be improved. This is a lower severity as this port is intended to be only accessible from a private lb-mgmt-net network. This patch adds additional exception handling to the Health Manager listener routines to better handle heartbeat packet issues. Change-Id: I2da6fa394f5152148237d0986fd969b7950815ba Story: 2001959 Task: 15081
This commit is contained in:
parent
b4632d7fe9
commit
8e2f7512c2
@ -184,7 +184,13 @@ class UDPStatusGetter(object):
|
||||
"""
|
||||
(data, srcaddr) = self.sock.recvfrom(UDP_MAX_SIZE)
|
||||
LOG.debug('Received packet from %s', srcaddr)
|
||||
obj = status_message.unwrap_envelope(data, self.key)
|
||||
try:
|
||||
obj = status_message.unwrap_envelope(data, self.key)
|
||||
except Exception as e:
|
||||
LOG.warning('Health Manager experienced an exception processing a '
|
||||
'heartbeat message from %s. Ignoring this packet. '
|
||||
'Exception: %s', srcaddr, e)
|
||||
raise exceptions.InvalidHMACException()
|
||||
obj['recv_time'] = time.time()
|
||||
return obj, srcaddr
|
||||
|
||||
@ -194,6 +200,10 @@ class UDPStatusGetter(object):
|
||||
except exceptions.InvalidHMACException:
|
||||
# Pass here as the packet was dropped and logged already
|
||||
pass
|
||||
except Exception as e:
|
||||
LOG.warning('Health Manager experienced an exception processing a'
|
||||
'heartbeat packet. Ignoring this packet. '
|
||||
'Exception: %s', e)
|
||||
else:
|
||||
self.executor.submit(update_health, obj)
|
||||
self.executor.submit(update_stats, obj)
|
||||
|
@ -38,7 +38,11 @@ def hm_listener(exit_event):
|
||||
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
||||
udp_getter = heartbeat_udp.UDPStatusGetter()
|
||||
while not exit_event.is_set():
|
||||
udp_getter.check()
|
||||
try:
|
||||
udp_getter.check()
|
||||
except Exception as e:
|
||||
LOG.error('Health Manager listener experienced unknown error: %s',
|
||||
e)
|
||||
|
||||
|
||||
def hm_health_check(exit_event):
|
||||
|
@ -123,6 +123,29 @@ class TestHeartbeatUDP(base.TestCase):
|
||||
self.assertIsNotNone(obj.pop('recv_time'))
|
||||
self.assertEqual({"testkey": "TEST"}, obj)
|
||||
|
||||
@mock.patch('octavia.amphorae.backends.health_daemon.status_message.'
|
||||
'unwrap_envelope')
|
||||
@mock.patch('socket.getaddrinfo')
|
||||
@mock.patch('socket.socket')
|
||||
def test_dorecv_bad_packet(self, mock_socket, mock_getaddrinfo,
|
||||
mock_unwrap):
|
||||
socket_mock = mock.MagicMock()
|
||||
mock_socket.return_value = socket_mock
|
||||
mock_unwrap.side_effect = Exception('boom')
|
||||
mock_getaddrinfo.return_value = [range(1, 6)]
|
||||
recvfrom = mock.MagicMock()
|
||||
socket_mock.recvfrom = recvfrom
|
||||
|
||||
getter = heartbeat_udp.UDPStatusGetter()
|
||||
|
||||
# key = 'TEST' msg = {"testkey": "TEST"}
|
||||
sample_msg = ('78daab562a492d2ec94ead54b252500a710d0e5'
|
||||
'1aa050041b506245806e5c1971e79951818394e'
|
||||
'a6e71ad989ff950945f9573f4ab6f83e25db8ed7')
|
||||
bin_msg = binascii.unhexlify(sample_msg)
|
||||
recvfrom.return_value = bin_msg, 2
|
||||
self.assertRaises(exceptions.InvalidHMACException, getter.dorecv)
|
||||
|
||||
@mock.patch('socket.getaddrinfo')
|
||||
@mock.patch('socket.socket')
|
||||
def test_check(self, mock_socket, mock_getaddrinfo):
|
||||
|
@ -30,14 +30,13 @@ class TestHealthManagerCMD(base.TestCase):
|
||||
'heartbeat_udp.UDPStatusGetter')
|
||||
def test_hm_listener(self, mock_getter,
|
||||
mock_event):
|
||||
mock_event.is_set.side_effect = [False, False]
|
||||
mock_event.is_set.side_effect = [False, False, True]
|
||||
getter_mock = mock.MagicMock()
|
||||
check_mock = mock.MagicMock()
|
||||
getter_mock.check = check_mock
|
||||
getter_mock.check.side_effect = [None, Exception('break')]
|
||||
mock_getter.return_value = getter_mock
|
||||
self.assertRaisesRegexp(Exception, 'break',
|
||||
health_manager.hm_listener, mock_event)
|
||||
health_manager.hm_listener(mock_event)
|
||||
mock_getter.assert_called_once()
|
||||
self.assertEqual(2, getter_mock.check.call_count)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user