From 12135dcc922210b0ce27f28af890a7e9b1329d4b Mon Sep 17 00:00:00 2001 From: Adam Harwell Date: Wed, 26 Jul 2017 19:35:24 -0700 Subject: [PATCH] Properly handle more states from HAProxy health messages DRAIN / MAINT are handled now. Also, transitional UP/DOWN states report correctly now. Change-Id: I8b2bf54de6f79c98822689e48f6ec0b65c6296c5 Closes-Bug: #1708042 --- api-ref/source/v2/general.inc | 2 + .../amphorae/backends/utils/haproxy_query.py | 10 +- octavia/common/constants.py | 11 +- octavia/controller/healthmanager/update_db.py | 12 +- ...b9e23ad43_add_draining_operating_status.py | 34 +++++ .../backends/utils/test_haproxy_query.py | 19 ++- .../healthmanager/test_update_db.py | 138 +++++++++++++++++- ...ate-haproxy-statuses-7e995bb4c7cc0dd6.yaml | 9 ++ 8 files changed, 215 insertions(+), 20 deletions(-) create mode 100644 octavia/db/migration/alembic_migrations/versions/4aeb9e23ad43_add_draining_operating_status.py create mode 100644 releasenotes/notes/Report-more-accurate-haproxy-statuses-7e995bb4c7cc0dd6.yaml diff --git a/api-ref/source/v2/general.inc b/api-ref/source/v2/general.inc index 6f1ebf81d1..a1d45f707c 100644 --- a/api-ref/source/v2/general.inc +++ b/api-ref/source/v2/general.inc @@ -466,6 +466,8 @@ Operating Status Codes | ONLINE | - Entity is operating normally | | | - All pool members are healthy | +------------+--------------------------------------------------------------+ +| DRAINING | - The member is not accepting new connections | ++------------+--------------------------------------------------------------+ | OFFLINE | - Entity is administratively disabled | +------------+--------------------------------------------------------------+ | DEGRADED | - One or more of the entity's components are in ERROR | diff --git a/octavia/amphorae/backends/utils/haproxy_query.py b/octavia/amphorae/backends/utils/haproxy_query.py index 2a51b5cd1d..44605eb9ad 100644 --- a/octavia/amphorae/backends/utils/haproxy_query.py +++ b/octavia/amphorae/backends/utils/haproxy_query.py @@ -105,7 +105,7 @@ class HAProxyQuery(object): {: { 'uuid': , 'status': 'UP'|'DOWN', - 'members': [: 'UP'|'DOWN'] }} + 'members': [: 'UP'|'DOWN'|'DRAIN'|'no check'] }} """ results = self.show_stat(object_type=6) # servers + pool @@ -114,10 +114,10 @@ class HAProxyQuery(object): for line in results: # pxname: pool, svname: server_name, status: status - # All the way up is UP, otherwise call it DOWN - if (line['status'] != consts.UP and - line['status'] != consts.NO_CHECK): - line['status'] = consts.DOWN + # Due to a bug in some versions of HAProxy, DRAIN mode isn't + # calculated correctly, but we can spoof the correct value here. + if line['status'] == consts.UP and line['weight'] == 0: + line['status'] = consts.DRAIN if line['pxname'] not in final_results: final_results[line['pxname']] = dict(members={}) diff --git a/octavia/common/constants.py b/octavia/common/constants.py index 909992de2f..e24f92b0f4 100644 --- a/octavia/common/constants.py +++ b/octavia/common/constants.py @@ -105,10 +105,12 @@ ONLINE = 'ONLINE' OFFLINE = 'OFFLINE' DEGRADED = 'DEGRADED' ERROR = 'ERROR' +DRAINING = 'DRAINING' NO_MONITOR = 'NO_MONITOR' OPERATING_STATUS = 'operating_status' PROVISIONING_STATUS = 'provisioning_status' -SUPPORTED_OPERATING_STATUSES = (ONLINE, OFFLINE, DEGRADED, ERROR, NO_MONITOR) +SUPPORTED_OPERATING_STATUSES = (ONLINE, OFFLINE, DEGRADED, ERROR, DRAINING, + NO_MONITOR) AMPHORA_VM = 'VM' SUPPORTED_AMPHORA_TYPES = (AMPHORA_VM,) @@ -345,9 +347,14 @@ DOWN = 'DOWN' HAPROXY_BACKEND_STATUSES = (UP, DOWN) +DRAIN = 'DRAIN' +MAINT = 'MAINT' NO_CHECK = 'no check' -HAPROXY_MEMBER_STATUSES = (UP, DOWN, NO_CHECK) +# DRAIN = member is weight 0 and is in draining mode +# MAINT = member is downed for maintenance? not sure when this happens +# NO_CHECK = no health monitor is enabled +HAPROXY_MEMBER_STATUSES = (UP, DOWN, DRAIN, MAINT, NO_CHECK) # Quota Constants QUOTA_UNLIMITED = -1 diff --git a/octavia/controller/healthmanager/update_db.py b/octavia/controller/healthmanager/update_db.py index 14381256e6..fe2b8c9e55 100644 --- a/octavia/controller/healthmanager/update_db.py +++ b/octavia/controller/healthmanager/update_db.py @@ -58,6 +58,8 @@ class UpdateHealthDb(object): entity_type, entity_id, entity.operating_status, new_op_status) repo.update(session, entity_id, operating_status=new_op_status) + if new_op_status == constants.DRAINING: + new_op_status = constants.ONLINE message.update({constants.OPERATING_STATUS: new_op_status}) if self.sync_prv_status: LOG.debug("%s %s provisioning_status %s. Updating db and sending" @@ -167,14 +169,20 @@ class UpdateHealthDb(object): for member_id, status in members.items(): member_status = None - if status == constants.UP: + # Member status can be "UP" or "UP #/#" (transitional) + if status.startswith(constants.UP): member_status = constants.ONLINE - elif status == constants.DOWN: + # Member status can be "DOWN" or "DOWN #/#" (transitional) + elif status.startswith(constants.DOWN): member_status = constants.ERROR if pool_status == constants.ONLINE: pool_status = constants.DEGRADED if lb_status == constants.ONLINE: lb_status = constants.DEGRADED + elif status == constants.DRAIN: + member_status = constants.DRAINING + elif status == constants.MAINT: + member_status = constants.OFFLINE elif status == constants.NO_CHECK: member_status = constants.NO_MONITOR else: diff --git a/octavia/db/migration/alembic_migrations/versions/4aeb9e23ad43_add_draining_operating_status.py b/octavia/db/migration/alembic_migrations/versions/4aeb9e23ad43_add_draining_operating_status.py new file mode 100644 index 0000000000..e521b3bdf3 --- /dev/null +++ b/octavia/db/migration/alembic_migrations/versions/4aeb9e23ad43_add_draining_operating_status.py @@ -0,0 +1,34 @@ +# Copyright 2017 GoDaddy +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +"""Add DRAINING operating status + +Revision ID: 4aeb9e23ad43 +Revises: e6672bda93bf +Create Date: 2017-07-27 00:54:07.128617 + +""" + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = '4aeb9e23ad43' +down_revision = 'e6672bda93bf' + + +def upgrade(): + bind = op.get_bind() + md = sa.MetaData() + sa.Table('operating_status', md, autoload=True, autoload_with=bind) + op.bulk_insert(md.tables['operating_status'], [{'name': 'DRAINING'}]) diff --git a/octavia/tests/unit/amphorae/backends/utils/test_haproxy_query.py b/octavia/tests/unit/amphorae/backends/utils/test_haproxy_query.py index 67589a9e31..3d2457a47a 100644 --- a/octavia/tests/unit/amphorae/backends/utils/test_haproxy_query.py +++ b/octavia/tests/unit/amphorae/backends/utils/test_haproxy_query.py @@ -18,6 +18,7 @@ import mock import six from octavia.amphorae.backends.utils import haproxy_query as query +from octavia.common import constants import octavia.tests.unit.base as base STATS_SOCKET_SAMPLE = ( @@ -38,6 +39,10 @@ STATS_SOCKET_SAMPLE = ( "1,5,1,,0,,2,0,,0,L4TOUT,,30000,,,,,,,0,,,,0,0,,,,,-1,,,0,0,0,0,\n" "tcp-servers,id-34836,0,0,0,0,,0,0,0,,0,,0,0,0,0,UP,1,1,0,1,1,552,552,," "1,5,2,,0,,2,0,,0,L4TOUT,,30001,,,,,,,0,,,,0,0,,,,,-1,,,0,0,0,0,\n" + "tcp-servers,id-34839,0,0,0,0,,0,0,0,,0,,0,0,0,0,DRAIN,0,1,0,0,0,552,0,," + "1,5,2,,0,,2,0,,0,L7OK,,30001,,,,,,,0,,,,0,0,,,,,-1,,,0,0,0,0,\n" + "tcp-servers,id-34842,0,0,0,0,,0,0,0,,0,,0,0,0,0,MAINT,0,1,0,0,0,552,0,," + "1,5,2,,0,,2,0,,0,L7OK,,30001,,,,,,,0,,,,0,0,,,,,-1,,,0,0,0,0,\n" "tcp-servers,BACKEND,0,0,0,0,200,0,0,0,0,0,,0,0,0,0,UP,0,0,0,,1,552,552" ",,1,5,0,,0,,1,0,,0,,,,,,,,,,,,,,0,0,0,0,0,0,-1,,,0,0,0,0," ) @@ -90,17 +95,19 @@ class QueryTestCase(base.TestCase): query_mock.return_value = STATS_SOCKET_SAMPLE self.assertEqual( {'tcp-servers': { - 'status': 'UP', + 'status': constants.UP, 'uuid': 'tcp-servers', 'members': - {'id-34833': 'UP', - 'id-34836': 'UP'}}, + {'id-34833': constants.UP, + 'id-34836': constants.UP, + 'id-34839': constants.DRAIN, + 'id-34842': constants.MAINT}}, 'http-servers': { - 'status': 'DOWN', + 'status': constants.DOWN, 'uuid': 'http-servers', 'members': - {'id-34821': 'DOWN', - 'id-34824': 'DOWN'}}}, + {'id-34821': constants.DOWN, + 'id-34824': constants.DOWN}}}, self.q.get_pool_status() ) diff --git a/octavia/tests/unit/controller/healthmanager/test_update_db.py b/octavia/tests/unit/controller/healthmanager/test_update_db.py index b1b9d76130..68338c560a 100644 --- a/octavia/tests/unit/controller/healthmanager/test_update_db.py +++ b/octavia/tests/unit/controller/healthmanager/test_update_db.py @@ -145,6 +145,120 @@ class TestUpdateHealthDb(base.TestCase): self.hm.update_health(health) + def test_update_health_member_drain(self): + + health = { + "id": self.FAKE_UUID_1, + "listeners": { + "listener-id-1": { + "status": constants.OPEN, + "pools": { + "pool-id-1": { + "status": constants.UP, + "members": {"member-id-1": constants.DRAIN}}}}}} + + self.mock_session.return_value = 'blah' + + self.hm.update_health(health) + self.assertTrue(self.amphora_health_repo.replace.called) + + # test listener, member + for listener_id, listener in six.iteritems( + health.get('listeners', {})): + + self.listener_repo.update.assert_any_call( + 'blah', listener_id, operating_status=constants.ONLINE) + + for pool_id, pool in six.iteritems(listener.get('pools', {})): + + self.hm.pool_repo.update.assert_any_call( + 'blah', pool_id, operating_status=constants.ONLINE) + + for member_id, member in six.iteritems( + pool.get('members', {})): + + self.member_repo.update.assert_any_call( + 'blah', member_id, + operating_status=constants.DRAINING) + + self.hm.listener_repo.count.return_value = 2 + + self.hm.update_health(health) + + def test_update_health_member_maint(self): + + health = { + "id": self.FAKE_UUID_1, + "listeners": { + "listener-id-1": { + "status": constants.OPEN, + "pools": { + "pool-id-1": { + "status": constants.UP, + "members": {"member-id-1": constants.MAINT}}}}}} + + self.mock_session.return_value = 'blah' + + self.hm.update_health(health) + self.assertTrue(self.amphora_health_repo.replace.called) + + # test listener, member + for listener_id, listener in six.iteritems( + health.get('listeners', {})): + + self.listener_repo.update.assert_any_call( + 'blah', listener_id, operating_status=constants.ONLINE) + + for pool_id, pool in six.iteritems(listener.get('pools', {})): + + self.hm.pool_repo.update.assert_any_call( + 'blah', pool_id, operating_status=constants.ONLINE) + + for member_id, member in six.iteritems( + pool.get('members', {})): + + self.member_repo.update.assert_any_call( + 'blah', member_id, + operating_status=constants.OFFLINE) + + self.hm.listener_repo.count.return_value = 2 + + self.hm.update_health(health) + + def test_update_health_member_unknown(self): + + health = { + "id": self.FAKE_UUID_1, + "listeners": { + "listener-id-1": { + "status": constants.OPEN, + "pools": { + "pool-id-1": { + "status": constants.UP, + "members": {"member-id-1": "blah"}}}}}} + + self.mock_session.return_value = 'blah' + + self.hm.update_health(health) + self.assertTrue(self.amphora_health_repo.replace.called) + + # test listener, member + for listener_id, listener in six.iteritems( + health.get('listeners', {})): + + self.listener_repo.update.assert_any_call( + 'blah', listener_id, operating_status=constants.ONLINE) + + for pool_id, pool in six.iteritems(listener.get('pools', {})): + + self.hm.pool_repo.update.assert_any_call( + 'blah', pool_id, operating_status=constants.ONLINE) + self.assertTrue(not self.member_repo.update.called) + + self.hm.listener_repo.count.return_value = 2 + + self.hm.update_health(health) + def test_update_health_member_down(self): health = { @@ -334,11 +448,23 @@ class TestUpdateHealthDb(base.TestCase): } } }, - "listener-id-4": {"status": "bogus", "pools": { - "pool-id-4": {"status": "bogus", - "members": {"member-id-4": "bogus"} - } - } + "listener-id-4": { + "status": constants.OPEN, + "pools": { + "pool-id-4": { + "status": constants.UP, + "members": {"member-id-4": constants.DRAINING} + } + } + }, + "listener-id-5": { + "status": "bogus", + "pools": { + "pool-id-5": { + "status": "bogus", + "members": {"member-id-5": "bogus"} + } + } } } } @@ -358,6 +484,8 @@ class TestUpdateHealthDb(base.TestCase): 'blah', "pool-id-2", operating_status=constants.ONLINE) self.pool_repo.update.assert_any_call( 'blah', "pool-id-3", operating_status=constants.DEGRADED) + self.pool_repo.update.assert_any_call( + 'blah', "pool-id-4", operating_status=constants.ONLINE) # Test code paths where objects are not found in the database def test_update_health_not_found(self): diff --git a/releasenotes/notes/Report-more-accurate-haproxy-statuses-7e995bb4c7cc0dd6.yaml b/releasenotes/notes/Report-more-accurate-haproxy-statuses-7e995bb4c7cc0dd6.yaml new file mode 100644 index 0000000000..59b27795a0 --- /dev/null +++ b/releasenotes/notes/Report-more-accurate-haproxy-statuses-7e995bb4c7cc0dd6.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Some versions of HAProxy incorrectly reported nodes in DRAIN status as + being UP, and Octavia code was written around this incorrect reporting. + This has been fixed in some versions of HAProxy and is now handled + properly in Octavia as well. Now it is possible for members to be in the + status DRAINING. Note that this is masked when statuses are forwarded to + neutron-lbaas in the eventstream, so no compatibility change is necessary.