From 029645562c919a6665075e275ddef81eaa95a608 Mon Sep 17 00:00:00 2001 From: Kevin Benton Date: Fri, 18 Nov 2016 03:04:38 -0700 Subject: [PATCH] Get rid of create_router override in l3_ha mixin Switch to callback events for the create router processing of HA routers. This maintains the same transactional semantics as before. However, if an error is encountered after creating the router during scheduling, it is no longer deleted but it is instead put into the ERROR state. Related: blueprint multi-l3-backends Change-Id: Ib7444408286841e0e3dfdc76339652ceb2e6db94 --- neutron/common/constants.py | 1 + neutron/db/l3_db.py | 4 +- neutron/db/l3_hamode_db.py | 76 ++++++++++++++-------- neutron/extensions/l3_ext_ha_mode.py | 4 ++ neutron/tests/unit/db/test_l3_hamode_db.py | 24 +++---- 5 files changed, 69 insertions(+), 40 deletions(-) diff --git a/neutron/common/constants.py b/neutron/common/constants.py index f22e7afd6c7..a56e7089d7a 100644 --- a/neutron/common/constants.py +++ b/neutron/common/constants.py @@ -20,6 +20,7 @@ ROUTER_PORT_OWNERS = lib_constants.ROUTER_INTERFACE_OWNERS_SNAT + \ (lib_constants.DEVICE_OWNER_ROUTER_GW,) ROUTER_STATUS_ACTIVE = 'ACTIVE' +ROUTER_STATUS_ERROR = 'ERROR' DEVICE_ID_RESERVED_DHCP_PORT = "reserved_dhcp_port" diff --git a/neutron/db/l3_db.py b/neutron/db/l3_db.py index 04398a739b3..6775c9331f2 100644 --- a/neutron/db/l3_db.py +++ b/neutron/db/l3_db.py @@ -197,13 +197,13 @@ class L3_NAT_dbonly_mixin(l3.RouterPluginBase, def _create_router_db(self, context, router, tenant_id): """Create the DB object.""" + router.setdefault('id', uuidutils.generate_uuid()) + router['tenant_id'] = tenant_id registry.notify(resources.ROUTER, events.BEFORE_CREATE, self, context=context, router=router) with context.session.begin(subtransactions=True): # pre-generate id so it will be available when # configuring external gw port - router.setdefault('id', uuidutils.generate_uuid()) - router['tenant_id'] = tenant_id router_db = l3_models.Router( id=router['id'], tenant_id=router['tenant_id'], diff --git a/neutron/db/l3_hamode_db.py b/neutron/db/l3_hamode_db.py index fc06eda232a..4ab931203ec 100644 --- a/neutron/db/l3_hamode_db.py +++ b/neutron/db/l3_hamode_db.py @@ -29,7 +29,7 @@ import sqlalchemy as sa from sqlalchemy import exc as sql_exc from sqlalchemy import orm -from neutron._i18n import _, _LI +from neutron._i18n import _, _LE, _LI, _LW from neutron.api.v2 import attributes from neutron.callbacks import events from neutron.callbacks import registry @@ -116,8 +116,12 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, resources.ROUTER, events.PRECOMMIT_DELETE) registry.subscribe(inst._cleanup_ha_network, resources.ROUTER, events.AFTER_DELETE) - registry.subscribe(inst._set_ha_flag, + registry.subscribe(inst._precommit_router_create, resources.ROUTER, events.PRECOMMIT_CREATE) + registry.subscribe(inst._before_router_create, + resources.ROUTER, events.BEFORE_CREATE) + registry.subscribe(inst._after_router_create, + resources.ROUTER, events.AFTER_CREATE) return inst def get_ha_network(self, context, tenant_id): @@ -379,35 +383,55 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, return n_utils.create_object_with_dependency( creator, dep_getter, dep_creator, dep_id_attr, dep_deleter)[1] - def _set_ha_flag(self, resource, event, trigger, context, router, - router_db, **kwargs): + @db_api.retry_if_session_inactive() + def _before_router_create(self, resource, event, trigger, + context, router, **kwargs): + """Event handler to create HA resources before router creation.""" + if not self._is_ha(router): + return + # ensure the HA network exists before we start router creation so + # we can provide meaningful errors back to the user if no network + # can be allocated + if not self.get_ha_network(context, router['tenant_id']): + self._create_ha_network(context, router['tenant_id']) + + def _precommit_router_create(self, resource, event, trigger, context, + router, router_db, **kwargs): """Event handler to set ha flag and status on creation.""" is_ha = self._is_ha(router) router['ha'] = is_ha self.set_extra_attr_value(context, router_db, 'ha', is_ha) + if not is_ha: + return + # This will throw an exception if there aren't enough agents to + # handle this HA router + self.get_number_of_agents_for_scheduling(context) + ha_net = self.get_ha_network(context, router['tenant_id']) + if not ha_net: + # net was deleted, throw a retry to start over to create another + raise db_exc.RetryRequest( + l3_ha.HANetworkConcurrentDeletion( + tenant_id=router['tenant_id'])) - @db_api.retry_if_session_inactive() - def create_router(self, context, router): - is_ha = self._is_ha(router['router']) - if is_ha: - # This will throw an exception if there aren't enough agents to - # handle this HA router - self.get_number_of_agents_for_scheduling(context) - - router_dict = super(L3_HA_NAT_db_mixin, - self).create_router(context, router) - if is_ha: - try: - router_db = self._get_router(context, router_dict['id']) - - self.schedule_router(context, router_dict['id']) - - router_dict['ha_vr_id'] = router_db.extra_attributes.ha_vr_id - self._notify_router_updated(context, router_db.id) - except Exception: - with excutils.save_and_reraise_exception(): - self.delete_router(context, router_dict['id']) - return router_dict + def _after_router_create(self, resource, event, trigger, context, + router_id, router, router_db, **kwargs): + if not router['ha']: + return + try: + self.schedule_router(context, router_id) + router['ha_vr_id'] = router_db.extra_attributes.ha_vr_id + self._notify_router_updated(context, router_id) + except Exception as e: + with excutils.save_and_reraise_exception() as ctx: + if isinstance(e, l3_ha.NoVRIDAvailable): + ctx.reraise = False + LOG.warning(_LW("No more VRIDs for router: %s"), e) + else: + LOG.exception(_LE("Failed to schedule HA router %s."), + router_id) + router['status'] = self._update_router_db( + context, router_id, + {'status': n_const.ROUTER_STATUS_ERROR})['status'] @db_api.retry_if_session_inactive() def _update_router_db(self, context, router_id, data): diff --git a/neutron/extensions/l3_ext_ha_mode.py b/neutron/extensions/l3_ext_ha_mode.py index bd2a2a7dfda..d1e5f375fa9 100644 --- a/neutron/extensions/l3_ext_ha_mode.py +++ b/neutron/extensions/l3_ext_ha_mode.py @@ -70,6 +70,10 @@ class NoVRIDAvailable(exceptions.Conflict): "of HA Routers per tenant is 254.") +class HANetworkConcurrentDeletion(exceptions.Conflict): + message = _("Network for tenant %(tenant_id)s concurrently deleted.") + + class HANetworkCIDRNotValid(exceptions.NeutronException): message = _("The HA Network CIDR specified in the configuration file " "isn't valid; %(cidr)s.") diff --git a/neutron/tests/unit/db/test_l3_hamode_db.py b/neutron/tests/unit/db/test_l3_hamode_db.py index dd621900edb..20040a751a0 100644 --- a/neutron/tests/unit/db/test_l3_hamode_db.py +++ b/neutron/tests/unit/db/test_l3_hamode_db.py @@ -25,6 +25,7 @@ import testtools from neutron.agent.common import utils as agent_utils from neutron.api.rpc.handlers import l3_rpc +from neutron.callbacks import exceptions as c_exc from neutron.common import constants as n_const from neutron import context from neutron.db import agents_db @@ -420,7 +421,8 @@ class L3HATestCase(L3HATestFramework): @mock.patch('neutron.db.l3_hamode_db.VR_ID_RANGE', new=set(range(1, 1))) def test_vr_id_depleted(self): - self.assertRaises(l3_ext_ha_mode.NoVRIDAvailable, self._create_router) + self.assertEqual(n_const.ROUTER_STATUS_ERROR, + self._create_router()['status']) @mock.patch('neutron.db.l3_hamode_db.VR_ID_RANGE', new=set(range(1, 2))) def test_vr_id_unique_range_per_tenant(self): @@ -633,17 +635,13 @@ class L3HATestCase(L3HATestFramework): self.plugin._create_ha_network_tenant_binding( self.admin_ctx, 't1', network['network_id']) - def test_create_router_db_ha_attribute_failure_rolls_back_router(self): - routers_before = self.plugin.get_routers(self.admin_ctx) - + def test_create_router_db_vr_id_allocation_goes_to_error(self): for method in ('_ensure_vr_id', '_notify_router_updated'): with mock.patch.object(self.plugin, method, side_effect=ValueError): - self.assertRaises(ValueError, self._create_router) - - routers_after = self.plugin.get_routers(self.admin_ctx) - self.assertEqual(routers_before, routers_after) + self.assertEqual(n_const.ROUTER_STATUS_ERROR, + self._create_router()['status']) def test_get_active_host_for_ha_router(self): router = self._create_router() @@ -875,10 +873,12 @@ class L3HATestCase(L3HATestFramework): # Unable to create HA network with mock.patch.object(self.core_plugin, 'create_network', side_effect=n_exc.NoNetworkAvailable): - self.assertRaises(n_exc.NoNetworkAvailable, - self._create_router, - True, - tenant_id) + e = self.assertRaises(c_exc.CallbackFailure, + self._create_router, + True, + tenant_id) + self.assertIsInstance(e.inner_exceptions[0], + n_exc.NoNetworkAvailable) nets_after = self.core_plugin.get_networks(self.admin_ctx) self.assertEqual(nets_before, nets_after) self.assertNotIn('HA network tenant %s' % tenant_id,