diff --git a/neutron/common/constants.py b/neutron/common/constants.py index f22e7afd6c7..a56e7089d7a 100644 --- a/neutron/common/constants.py +++ b/neutron/common/constants.py @@ -20,6 +20,7 @@ ROUTER_PORT_OWNERS = lib_constants.ROUTER_INTERFACE_OWNERS_SNAT + \ (lib_constants.DEVICE_OWNER_ROUTER_GW,) ROUTER_STATUS_ACTIVE = 'ACTIVE' +ROUTER_STATUS_ERROR = 'ERROR' DEVICE_ID_RESERVED_DHCP_PORT = "reserved_dhcp_port" diff --git a/neutron/db/l3_db.py b/neutron/db/l3_db.py index ffc37dd5587..2ba7f57293a 100644 --- a/neutron/db/l3_db.py +++ b/neutron/db/l3_db.py @@ -197,13 +197,13 @@ class L3_NAT_dbonly_mixin(l3.RouterPluginBase, def _create_router_db(self, context, router, tenant_id): """Create the DB object.""" + router.setdefault('id', uuidutils.generate_uuid()) + router['tenant_id'] = tenant_id registry.notify(resources.ROUTER, events.BEFORE_CREATE, self, context=context, router=router) with context.session.begin(subtransactions=True): # pre-generate id so it will be available when # configuring external gw port - router.setdefault('id', uuidutils.generate_uuid()) - router['tenant_id'] = tenant_id router_db = l3_models.Router( id=router['id'], tenant_id=router['tenant_id'], diff --git a/neutron/db/l3_hamode_db.py b/neutron/db/l3_hamode_db.py index 49f0a867f2d..4c43365488c 100644 --- a/neutron/db/l3_hamode_db.py +++ b/neutron/db/l3_hamode_db.py @@ -29,7 +29,7 @@ import sqlalchemy as sa from sqlalchemy import exc as sql_exc from sqlalchemy import orm -from neutron._i18n import _, _LI +from neutron._i18n import _, _LE, _LI, _LW from neutron.api.v2 import attributes from neutron.callbacks import events from neutron.callbacks import registry @@ -116,8 +116,12 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, resources.ROUTER, events.PRECOMMIT_DELETE) registry.subscribe(inst._cleanup_ha_network, resources.ROUTER, events.AFTER_DELETE) - registry.subscribe(inst._set_ha_flag, + registry.subscribe(inst._precommit_router_create, resources.ROUTER, events.PRECOMMIT_CREATE) + registry.subscribe(inst._before_router_create, + resources.ROUTER, events.BEFORE_CREATE) + registry.subscribe(inst._after_router_create, + resources.ROUTER, events.AFTER_CREATE) return inst def get_ha_network(self, context, tenant_id): @@ -379,35 +383,55 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, return n_utils.create_object_with_dependency( creator, dep_getter, dep_creator, dep_id_attr, dep_deleter)[1] - def _set_ha_flag(self, resource, event, trigger, context, router, - router_db, **kwargs): + @db_api.retry_if_session_inactive() + def _before_router_create(self, resource, event, trigger, + context, router, **kwargs): + """Event handler to create HA resources before router creation.""" + if not self._is_ha(router): + return + # ensure the HA network exists before we start router creation so + # we can provide meaningful errors back to the user if no network + # can be allocated + if not self.get_ha_network(context, router['tenant_id']): + self._create_ha_network(context, router['tenant_id']) + + def _precommit_router_create(self, resource, event, trigger, context, + router, router_db, **kwargs): """Event handler to set ha flag and status on creation.""" is_ha = self._is_ha(router) router['ha'] = is_ha self.set_extra_attr_value(context, router_db, 'ha', is_ha) + if not is_ha: + return + # This will throw an exception if there aren't enough agents to + # handle this HA router + self.get_number_of_agents_for_scheduling(context) + ha_net = self.get_ha_network(context, router['tenant_id']) + if not ha_net: + # net was deleted, throw a retry to start over to create another + raise db_exc.RetryRequest( + l3_ha.HANetworkConcurrentDeletion( + tenant_id=router['tenant_id'])) - @db_api.retry_if_session_inactive() - def create_router(self, context, router): - is_ha = self._is_ha(router['router']) - if is_ha: - # This will throw an exception if there aren't enough agents to - # handle this HA router - self.get_number_of_agents_for_scheduling(context) - - router_dict = super(L3_HA_NAT_db_mixin, - self).create_router(context, router) - if is_ha: - try: - router_db = self._get_router(context, router_dict['id']) - - self.schedule_router(context, router_dict['id']) - - router_dict['ha_vr_id'] = router_db.extra_attributes.ha_vr_id - self._notify_router_updated(context, router_db.id) - except Exception: - with excutils.save_and_reraise_exception(): - self.delete_router(context, router_dict['id']) - return router_dict + def _after_router_create(self, resource, event, trigger, context, + router_id, router, router_db, **kwargs): + if not router['ha']: + return + try: + self.schedule_router(context, router_id) + router['ha_vr_id'] = router_db.extra_attributes.ha_vr_id + self._notify_router_updated(context, router_id) + except Exception as e: + with excutils.save_and_reraise_exception() as ctx: + if isinstance(e, l3_ha.NoVRIDAvailable): + ctx.reraise = False + LOG.warning(_LW("No more VRIDs for router: %s"), e) + else: + LOG.exception(_LE("Failed to schedule HA router %s."), + router_id) + router['status'] = self._update_router_db( + context, router_id, + {'status': n_const.ROUTER_STATUS_ERROR})['status'] @db_api.retry_if_session_inactive() def _update_router_db(self, context, router_id, data): diff --git a/neutron/extensions/l3_ext_ha_mode.py b/neutron/extensions/l3_ext_ha_mode.py index bd2a2a7dfda..d1e5f375fa9 100644 --- a/neutron/extensions/l3_ext_ha_mode.py +++ b/neutron/extensions/l3_ext_ha_mode.py @@ -70,6 +70,10 @@ class NoVRIDAvailable(exceptions.Conflict): "of HA Routers per tenant is 254.") +class HANetworkConcurrentDeletion(exceptions.Conflict): + message = _("Network for tenant %(tenant_id)s concurrently deleted.") + + class HANetworkCIDRNotValid(exceptions.NeutronException): message = _("The HA Network CIDR specified in the configuration file " "isn't valid; %(cidr)s.") diff --git a/neutron/tests/unit/db/test_l3_hamode_db.py b/neutron/tests/unit/db/test_l3_hamode_db.py index c2fa47d6064..89f1fc9c642 100644 --- a/neutron/tests/unit/db/test_l3_hamode_db.py +++ b/neutron/tests/unit/db/test_l3_hamode_db.py @@ -25,6 +25,7 @@ import testtools from neutron.agent.common import utils as agent_utils from neutron.api.rpc.handlers import l3_rpc +from neutron.callbacks import exceptions as c_exc from neutron.common import constants as n_const from neutron import context from neutron.db import agents_db @@ -442,7 +443,8 @@ class L3HATestCase(L3HATestFramework): @mock.patch('neutron.db.l3_hamode_db.VR_ID_RANGE', new=set(range(1, 1))) def test_vr_id_depleted(self): - self.assertRaises(l3_ext_ha_mode.NoVRIDAvailable, self._create_router) + self.assertEqual(n_const.ROUTER_STATUS_ERROR, + self._create_router()['status']) @mock.patch('neutron.db.l3_hamode_db.VR_ID_RANGE', new=set(range(1, 2))) def test_vr_id_unique_range_per_tenant(self): @@ -655,17 +657,13 @@ class L3HATestCase(L3HATestFramework): self.plugin._create_ha_network_tenant_binding( self.admin_ctx, 't1', network['network_id']) - def test_create_router_db_ha_attribute_failure_rolls_back_router(self): - routers_before = self.plugin.get_routers(self.admin_ctx) - + def test_create_router_db_vr_id_allocation_goes_to_error(self): for method in ('_ensure_vr_id', '_notify_router_updated'): with mock.patch.object(self.plugin, method, side_effect=ValueError): - self.assertRaises(ValueError, self._create_router) - - routers_after = self.plugin.get_routers(self.admin_ctx) - self.assertEqual(routers_before, routers_after) + self.assertEqual(n_const.ROUTER_STATUS_ERROR, + self._create_router()['status']) def test_get_active_host_for_ha_router(self): router = self._create_router() @@ -897,10 +895,12 @@ class L3HATestCase(L3HATestFramework): # Unable to create HA network with mock.patch.object(self.core_plugin, 'create_network', side_effect=n_exc.NoNetworkAvailable): - self.assertRaises(n_exc.NoNetworkAvailable, - self._create_router, - True, - tenant_id) + e = self.assertRaises(c_exc.CallbackFailure, + self._create_router, + True, + tenant_id) + self.assertIsInstance(e.inner_exceptions[0], + n_exc.NoNetworkAvailable) nets_after = self.core_plugin.get_networks(self.admin_ctx) self.assertEqual(nets_before, nets_after) self.assertNotIn('HA network tenant %s' % tenant_id,