Improve Health Manager error handling

It was reported that the Health Manager process could be crashed with
malformed heartbeat packets. I was unable to reproduce the issue
(I suspect oslo_utils fixed the root cause), but I could see how this
could happen and our error handling could be improved.
This is a lower severity as this port is intended to be only accessible
from a private lb-mgmt-net network.
This patch adds additional exception handling to the Health Manager
listener routines to better handle heartbeat packet issues.

Change-Id: I2da6fa394f5152148237d0986fd969b7950815ba
Story: 2001959
Task: 15081
This commit is contained in:
Michael Johnson 2018-05-03 17:56:52 -07:00
parent b4632d7fe9
commit 8e2f7512c2
4 changed files with 41 additions and 5 deletions

View File

@ -184,7 +184,13 @@ class UDPStatusGetter(object):
"""
(data, srcaddr) = self.sock.recvfrom(UDP_MAX_SIZE)
LOG.debug('Received packet from %s', srcaddr)
obj = status_message.unwrap_envelope(data, self.key)
try:
obj = status_message.unwrap_envelope(data, self.key)
except Exception as e:
LOG.warning('Health Manager experienced an exception processing a '
'heartbeat message from %s. Ignoring this packet. '
'Exception: %s', srcaddr, e)
raise exceptions.InvalidHMACException()
obj['recv_time'] = time.time()
return obj, srcaddr
@ -194,6 +200,10 @@ class UDPStatusGetter(object):
except exceptions.InvalidHMACException:
# Pass here as the packet was dropped and logged already
pass
except Exception as e:
LOG.warning('Health Manager experienced an exception processing a'
'heartbeat packet. Ignoring this packet. '
'Exception: %s', e)
else:
self.executor.submit(update_health, obj)
self.executor.submit(update_stats, obj)

View File

@ -38,7 +38,11 @@ def hm_listener(exit_event):
signal.signal(signal.SIGINT, signal.SIG_IGN)
udp_getter = heartbeat_udp.UDPStatusGetter()
while not exit_event.is_set():
udp_getter.check()
try:
udp_getter.check()
except Exception as e:
LOG.error('Health Manager listener experienced unknown error: %s',
e)
def hm_health_check(exit_event):

View File

@ -123,6 +123,29 @@ class TestHeartbeatUDP(base.TestCase):
self.assertIsNotNone(obj.pop('recv_time'))
self.assertEqual({"testkey": "TEST"}, obj)
@mock.patch('octavia.amphorae.backends.health_daemon.status_message.'
'unwrap_envelope')
@mock.patch('socket.getaddrinfo')
@mock.patch('socket.socket')
def test_dorecv_bad_packet(self, mock_socket, mock_getaddrinfo,
mock_unwrap):
socket_mock = mock.MagicMock()
mock_socket.return_value = socket_mock
mock_unwrap.side_effect = Exception('boom')
mock_getaddrinfo.return_value = [range(1, 6)]
recvfrom = mock.MagicMock()
socket_mock.recvfrom = recvfrom
getter = heartbeat_udp.UDPStatusGetter()
# key = 'TEST' msg = {"testkey": "TEST"}
sample_msg = ('78daab562a492d2ec94ead54b252500a710d0e5'
'1aa050041b506245806e5c1971e79951818394e'
'a6e71ad989ff950945f9573f4ab6f83e25db8ed7')
bin_msg = binascii.unhexlify(sample_msg)
recvfrom.return_value = bin_msg, 2
self.assertRaises(exceptions.InvalidHMACException, getter.dorecv)
@mock.patch('socket.getaddrinfo')
@mock.patch('socket.socket')
def test_check(self, mock_socket, mock_getaddrinfo):

View File

@ -30,14 +30,13 @@ class TestHealthManagerCMD(base.TestCase):
'heartbeat_udp.UDPStatusGetter')
def test_hm_listener(self, mock_getter,
mock_event):
mock_event.is_set.side_effect = [False, False]
mock_event.is_set.side_effect = [False, False, True]
getter_mock = mock.MagicMock()
check_mock = mock.MagicMock()
getter_mock.check = check_mock
getter_mock.check.side_effect = [None, Exception('break')]
mock_getter.return_value = getter_mock
self.assertRaisesRegexp(Exception, 'break',
health_manager.hm_listener, mock_event)
health_manager.hm_listener(mock_event)
mock_getter.assert_called_once()
self.assertEqual(2, getter_mock.check.call_count)