Initialize dist_fip_count after agent restart

Runtime router variable dist_fip_count has been
used to keep track of FIPs for DVR routers.
This variable is not re-initialized correctly on
agent restart and can get stale from other errors
which cause problems with namespace and port cleanup.

This patch will initialize the ri.dist_fip_count
once in process_router for dvr routers only.  This
method was selected instead of the _router_added or
_router_removed path because it is the one central
entry point for rotuer add, delete, and update.

The object self.agent_gateway_port also needs to be
properly handled after an agent restart and this
patch will handle that as well.

When needed, the system will be read via system
calls to determine the state of namespaces and ports
since the variables cannot be relied on.
System calls will be kept to a minimum to reduce
and possible performance hits.

Change-Id: Iae5ebf5249f8e16ab57df78e042293ca2855ddf1
Closes-bug: #1367039
This commit is contained in:
Michael Smith 2014-09-10 16:59:14 -07:00
parent 46161134f8
commit ee4bae2113
4 changed files with 75 additions and 12 deletions

View File

@ -581,6 +581,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
# TODO(mrsmith) - we shouldn't need to check here
if 'distributed' not in ri.router:
ri.router['distributed'] = False
self.scan_fip_ports(ri)
self._process_internal_ports(ri)
self._process_external(ri)
# Process static routes for router
@ -657,14 +658,19 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback,
self._create_agent_gateway_port(ri, floating_ips[0]
['floating_network_id'])
if self.agent_gateway_port:
if floating_ips and ri.dist_fip_count == 0:
self.create_rtr_2_fip_link(ri, floating_ips[0]
['floating_network_id'])
if self.agent_gateway_port and floating_ips:
fip_net_id = floating_ips[0]['floating_network_id']
self.create_rtr_2_fip_link(ri, fip_net_id)
def _get_external_device_interface_name(self, ri, ex_gw_port):
if ri.router['distributed']:
if self.agent_gateway_port:
fip_int = self.get_fip_int_device_name(ri.router_id)
# TODO(mrsmith) refactor for multiple ext nets
fip_ns = self.get_fip_ns_name(str(self._fetch_external_net_id()))
if ip_lib.device_exists(fip_int,
root_helper=self.root_helper,
namespace=fip_ns):
return self.get_rtr_int_device_name(ri.router_id)
else:
return self.get_external_device_name(ex_gw_port['id'])

View File

@ -19,6 +19,7 @@ from neutron.agent.l3 import link_local_allocator as lla
from neutron.agent.linux import ip_lib
from neutron.agent.linux import iptables_manager
from neutron.common import constants as l3_constants
from neutron.common import utils as common_utils
from neutron.i18n import _LE
from neutron.openstack.common import log as logging
@ -119,6 +120,24 @@ class AgentMixin(object):
if f['subnet_id'] == subnet_id:
return port
def scan_fip_ports(self, ri):
# don't scan if not dvr or count is not None
if not ri.router.get('distributed') or ri.dist_fip_count is not None:
return
# scan system for any existing fip ports
ri.dist_fip_count = 0
rtr_2_fip_interface = self.get_rtr_int_device_name(ri.router_id)
if ip_lib.device_exists(rtr_2_fip_interface,
root_helper=self.root_helper,
namespace=ri.ns_name):
device = ip_lib.IPDevice(rtr_2_fip_interface, self.root_helper,
namespace=ri.ns_name)
existing_cidrs = [addr['cidr'] for addr in device.addr.list()]
fip_cidrs = [c for c in existing_cidrs if
common_utils.is_cidr_host(c)]
ri.dist_fip_count = len(fip_cidrs)
def get_fip_ext_device_name(self, port_id):
return (FIP_EXT_DEV_PREFIX +
port_id)[:self.driver.DEV_NAME_LEN]
@ -337,17 +356,17 @@ class AgentMixin(object):
floating_ip = fip_cidr.split('/')[0]
rtr_2_fip_name = self.get_rtr_int_device_name(ri.router_id)
fip_2_rtr_name = self.get_fip_int_device_name(ri.router_id)
if ri.rtr_fip_subnet is None:
ri.rtr_fip_subnet = self.local_subnets.allocate(ri.router_id)
rtr_2_fip, fip_2_rtr = ri.rtr_fip_subnet.get_pair()
fip_ns_name = self.get_fip_ns_name(str(self._fetch_external_net_id()))
ip_rule_rtr = ip_lib.IpRule(self.root_helper, namespace=ri.ns_name)
if floating_ip in ri.floating_ips_dict:
rule_pr = ri.floating_ips_dict[floating_ip]
#TODO(rajeev): Handle else case - exception/log?
else:
rule_pr = None
ip_rule_rtr.delete_rule_priority(rule_pr)
self.fip_priorities.add(rule_pr)
#TODO(rajeev): Handle else case - exception/log?
device = ip_lib.IPDevice(fip_2_rtr_name, self.root_helper,
namespace=fip_ns_name)

View File

@ -23,4 +23,4 @@ class DvrRouter(router.RouterInfo):
self.snat_iptables_manager = None
# Linklocal subnet for router and floating IP namespace link
self.rtr_fip_subnet = None
self.dist_fip_count = 0
self.dist_fip_count = None

View File

@ -794,6 +794,42 @@ class TestBasicRouterOperations(base.BaseTestCase):
4, '1.5.25.15', '00:44:33:22:11:55')
agent.router_deleted(None, router['id'])
@mock.patch('neutron.agent.linux.ip_lib.IPDevice')
def _test_scan_fip_ports(self, ri, ip_list, IPDevice):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
self.device_exists.return_value = True
IPDevice.return_value = device = mock.Mock()
device.addr.list.return_value = ip_list
agent.scan_fip_ports(ri)
def test_scan_fip_ports_restart_fips(self):
router = prepare_router_data()
ri = dvr_router.DvrRouter(router['id'], self.conf.root_helper,
router=router)
ri.router['distributed'] = True
ip_list = [{'cidr': '111.2.3.4/32'}, {'cidr': '111.2.3.5/32'}]
self._test_scan_fip_ports(ri, ip_list)
self.assertEqual(ri.dist_fip_count, 2)
def test_scan_fip_ports_restart_none(self):
router = prepare_router_data()
ri = dvr_router.DvrRouter(router['id'], self.conf.root_helper,
router=router)
ri.router['distributed'] = True
ip_list = []
self._test_scan_fip_ports(ri, ip_list)
self.assertEqual(ri.dist_fip_count, 0)
def test_scan_fip_ports_restart_zero(self):
router = prepare_router_data()
ri = dvr_router.DvrRouter(router['id'], self.conf.root_helper,
router=router)
ri.router['distributed'] = True
ri.dist_fip_count = 0
ip_list = None
self._test_scan_fip_ports(ri, ip_list)
self.assertEqual(ri.dist_fip_count, 0)
def test_process_cent_router(self):
router = prepare_router_data()
ri = l3router.RouterInfo(router['id'], self.conf.root_helper,
@ -802,7 +838,7 @@ class TestBasicRouterOperations(base.BaseTestCase):
def test_process_dist_router(self):
router = prepare_router_data()
ri = l3router.RouterInfo(router['id'], self.conf.root_helper,
ri = dvr_router.DvrRouter(router['id'], self.conf.root_helper,
router=router)
subnet_id = _get_subnet_id(router[l3_constants.INTERFACE_KEY][0])
ri.router['distributed'] = True
@ -969,6 +1005,7 @@ class TestBasicRouterOperations(base.BaseTestCase):
ri = dvr_router.DvrRouter(router['id'], self.conf.root_helper,
router=router)
ri.iptables_manager.ipv4['nat'] = mock.MagicMock()
ri.dist_fip_count = 0
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
agent.host = HOSTNAME
agent.agent_gateway_port = (
@ -1924,6 +1961,7 @@ class TestBasicRouterOperations(base.BaseTestCase):
'port_id': _uuid()}
agent.agent_gateway_port = agent_gw_port
ri.rtr_fip_subnet = lla.LinkLocalAddressPair('169.254.30.42/31')
ri.dist_fip_count = 0
ip_cidr = common_utils.ip_to_cidr(fip['floating_ip_address'])
agent.floating_ip_added_dist(ri, fip, ip_cidr)
self.mock_rule.add_rule_from.assert_called_with('192.168.0.1',