From a7d43ec2753de12fa06a10946e79da967d7a1071 Mon Sep 17 00:00:00 2001 From: Adit Sarfaty Date: Tue, 31 Jan 2017 16:50:38 +0200 Subject: [PATCH] [Admin-Util] NSXv: fix plugin issues 1. Since the nsxv plugin was defined globally in one of the utilities files, it was actually initialized for all the nsxv utilities, which has some side affects (like deploying backup edges during unrelated admin utilities in case some of the pools are not full), and also takes a long time. Now the plugin is initialized only when needed. 2. When the plugin is initialized during an admin-util call, we mustn't exit while spawn jobs for creating edges are still running, or else - those edges will be in PENDING-CREATE state for ever. Initializing the plugin using "with" and adding an exit method which waits for the jobs to finish solved this issue. Change-Id: Ia1fa557a8da149f79a46b3ca49b122f991b2ca9b --- .../plugins/nsxv/resources/dhcp_binding.py | 109 ++++++------ .../admin/plugins/nsxv/resources/routers.py | 84 ++++----- .../plugins/nsxv/resources/securitygroups.py | 168 ++++++++++-------- .../admin/plugins/nsxv/resources/utils.py | 42 +++++ .../tests/unit/shell/test_admin_utils.py | 2 +- 5 files changed, 233 insertions(+), 172 deletions(-) diff --git a/vmware_nsx/shell/admin/plugins/nsxv/resources/dhcp_binding.py b/vmware_nsx/shell/admin/plugins/nsxv/resources/dhcp_binding.py index 90907d292f..38249ee272 100644 --- a/vmware_nsx/shell/admin/plugins/nsxv/resources/dhcp_binding.py +++ b/vmware_nsx/shell/admin/plugins/nsxv/resources/dhcp_binding.py @@ -118,15 +118,15 @@ def nsx_update_dhcp_edge_binding(resource, event, trigger, **kwargs): LOG.info(_LI("Updating NSXv Edge: %s"), edge_id) # Need to create a plugin object; so that we are able to # do neutron list-ports. - plugin = utils.NsxVPluginWrapper() - nsxv_manager = vcns_driver.VcnsDriver( - edge_utils.NsxVCallbacks(plugin)) - edge_manager = edge_utils.EdgeManager(nsxv_manager, plugin) - try: - edge_manager.update_dhcp_service_config( - neutron_db.context, edge_id) - except exceptions.ResourceNotFound: - LOG.error(_LE("Edge %s not found"), edge_id) + with utils.NsxVPluginWrapper() as plugin: + nsxv_manager = vcns_driver.VcnsDriver( + edge_utils.NsxVCallbacks(plugin)) + edge_manager = edge_utils.EdgeManager(nsxv_manager, plugin) + try: + edge_manager.update_dhcp_service_config( + neutron_db.context, edge_id) + except exceptions.ResourceNotFound: + LOG.error(_LE("Edge %s not found"), edge_id) def delete_old_dhcp_edge(context, old_edge_id, bindings): @@ -260,13 +260,6 @@ def nsx_recreate_dhcp_edge(resource, event, trigger, **kwargs): return LOG.info(_LI("ReCreating NSXv Edge: %s"), old_edge_id) - # init the plugin and edge manager - cfg.CONF.set_override('core_plugin', - 'vmware_nsx.shell.admin.plugins.nsxv.resources' - '.utils.NsxVPluginWrapper') - plugin = utils.NsxVPluginWrapper() - nsxv_manager = vcns_driver.VcnsDriver(edge_utils.NsxVCallbacks(plugin)) - edge_manager = edge_utils.EdgeManager(nsxv_manager, plugin) context = n_context.get_admin_context() # verify that this is a DHCP edge @@ -279,29 +272,38 @@ def nsx_recreate_dhcp_edge(resource, event, trigger, **kwargs): {'edge_id': old_edge_id}) return - # find the networks bound to this DHCP edge - networks_binding = nsxv_db.get_edge_vnic_bindings_by_edge( - context.session, old_edge_id) - network_ids = [binding['network_id'] for binding in networks_binding] + # init the plugin and edge manager + cfg.CONF.set_override('core_plugin', + 'vmware_nsx.shell.admin.plugins.nsxv.resources' + '.utils.NsxVPluginWrapper') + with utils.NsxVPluginWrapper() as plugin: + nsxv_manager = vcns_driver.VcnsDriver( + edge_utils.NsxVCallbacks(plugin)) + edge_manager = edge_utils.EdgeManager(nsxv_manager, plugin) - # Find out the vdr router, if this is a vdr DHCP edge - vdr_binding = nsxv_db.get_vdr_dhcp_binding_by_edge( - context.session, old_edge_id) - vdr_router_id = vdr_binding['vdr_router_id'] if vdr_binding else None + # find the networks bound to this DHCP edge + networks_binding = nsxv_db.get_edge_vnic_bindings_by_edge( + context.session, old_edge_id) + network_ids = [binding['network_id'] for binding in networks_binding] - # Delete the old edge - delete_old_dhcp_edge(context, old_edge_id, bindings) + # Find out the vdr router, if this is a vdr DHCP edge + vdr_binding = nsxv_db.get_vdr_dhcp_binding_by_edge( + context.session, old_edge_id) + vdr_router_id = vdr_binding['vdr_router_id'] if vdr_binding else None - if vdr_router_id: - # recreate the edge as a VDR DHCP edge - recreate_vdr_dhcp_edge(context, plugin, edge_manager, - vdr_router_id) - else: - # This is a regular DHCP edge: - # Move all the networks to other (new or existing) edge - for net_id in network_ids: - recreate_network_dhcp(context, plugin, edge_manager, - old_edge_id, net_id) + # Delete the old edge + delete_old_dhcp_edge(context, old_edge_id, bindings) + + if vdr_router_id: + # recreate the edge as a VDR DHCP edge + recreate_vdr_dhcp_edge(context, plugin, edge_manager, + vdr_router_id) + else: + # This is a regular DHCP edge: + # Move all the networks to other (new or existing) edge + for net_id in network_ids: + recreate_network_dhcp(context, plugin, edge_manager, + old_edge_id, net_id) def _get_net_vdr_router_id(plugin, context, net_id): @@ -322,13 +324,6 @@ def nsx_recreate_dhcp_edge_by_net_id(net_id): """Recreate a dhcp edge for a specific network without an edge""" LOG.info(_LI("ReCreating NSXv Edge for network: %s"), net_id) - # init the plugin and edge manager - cfg.CONF.set_override('core_plugin', - 'vmware_nsx.shell.admin.plugins.nsxv.resources' - '.utils.NsxVPluginWrapper') - plugin = utils.NsxVPluginWrapper() - nsxv_manager = vcns_driver.VcnsDriver(edge_utils.NsxVCallbacks(plugin)) - edge_manager = edge_utils.EdgeManager(nsxv_manager, plugin) context = n_context.get_admin_context() # verify that there is no DHCP edge for this network at the moment @@ -346,16 +341,24 @@ def nsx_recreate_dhcp_edge_by_net_id(net_id): # delete this old entry nsxv_db.delete_nsxv_router_binding(context.session, resource_id) - # check if this network is attached to a distributed router - vdr_router_id = _get_net_vdr_router_id(plugin, context, net_id) - if vdr_router_id: - # recreate the edge as a VDR DHCP edge - recreate_vdr_dhcp_edge(context, plugin, edge_manager, - vdr_router_id) - else: - # This is a regular DHCP edge: - recreate_network_dhcp(context, plugin, edge_manager, - None, net_id) + # init the plugin and edge manager + cfg.CONF.set_override('core_plugin', + 'vmware_nsx.shell.admin.plugins.nsxv.resources' + '.utils.NsxVPluginWrapper') + with utils.NsxVPluginWrapper() as plugin: + nsxv_manager = vcns_driver.VcnsDriver(edge_utils.NsxVCallbacks(plugin)) + edge_manager = edge_utils.EdgeManager(nsxv_manager, plugin) + + # check if this network is attached to a distributed router + vdr_router_id = _get_net_vdr_router_id(plugin, context, net_id) + if vdr_router_id: + # recreate the edge as a VDR DHCP edge + recreate_vdr_dhcp_edge(context, plugin, edge_manager, + vdr_router_id) + else: + # This is a regular DHCP edge: + recreate_network_dhcp(context, plugin, edge_manager, + None, net_id) registry.subscribe(list_missing_dhcp_bindings, diff --git a/vmware_nsx/shell/admin/plugins/nsxv/resources/routers.py b/vmware_nsx/shell/admin/plugins/nsxv/resources/routers.py index 49d99fca76..ea19df33bc 100644 --- a/vmware_nsx/shell/admin/plugins/nsxv/resources/routers.py +++ b/vmware_nsx/shell/admin/plugins/nsxv/resources/routers.py @@ -76,53 +76,55 @@ def nsx_recreate_router_edge(resource, event, trigger, **kwargs): cfg.CONF.set_override('core_plugin', 'vmware_nsx.shell.admin.plugins.nsxv.resources' '.utils.NsxVPluginWrapper') - plugin = utils.NsxVPluginWrapper() - nsxv_manager = vcns_driver.VcnsDriver(edge_utils.NsxVCallbacks(plugin)) - edge_manager = edge_utils.EdgeManager(nsxv_manager, plugin) - context = n_context.get_admin_context() + with utils.NsxVPluginWrapper() as plugin: + nsxv_manager = vcns_driver.VcnsDriver( + edge_utils.NsxVCallbacks(plugin)) + edge_manager = edge_utils.EdgeManager(nsxv_manager, plugin) + context = n_context.get_admin_context() - # verify that this is a Router edge - router_ids = edge_manager.get_routers_on_edge(context, old_edge_id) - if not router_ids: - LOG.error(_LE("Edge %(edge_id)s is not a router edge"), - {'edge_id': old_edge_id}) - return + # verify that this is a Router edge + router_ids = edge_manager.get_routers_on_edge(context, old_edge_id) + if not router_ids: + LOG.error(_LE("Edge %(edge_id)s is not a router edge"), + {'edge_id': old_edge_id}) + return - # all the routers on the same edge have the same type, so it - # is ok to check the type once - example_router = plugin.get_router(context, router_ids[0]) - router_driver = plugin._router_managers.get_tenant_router_driver( - context, example_router['router_type']) - if router_driver.get_type() == "distributed": - LOG.error(_LE("Recreating a distributed driver edge is not " - "supported")) - return + # all the routers on the same edge have the same type, so it + # is ok to check the type once + example_router = plugin.get_router(context, router_ids[0]) + router_driver = plugin._router_managers.get_tenant_router_driver( + context, example_router['router_type']) + if router_driver.get_type() == "distributed": + LOG.error(_LE("Recreating a distributed driver edge is not " + "supported")) + return - # load all the routers before deleting their binding - routers = [] - for router_id in router_ids: - routers.append(plugin.get_router(context, router_id)) + # load all the routers before deleting their binding + routers = [] + for router_id in router_ids: + routers.append(plugin.get_router(context, router_id)) - # delete the backend edge and all the relevant DB entries - delete_old_edge(context, old_edge_id) + # delete the backend edge and all the relevant DB entries + delete_old_edge(context, old_edge_id) - # Go over all the relevant routers - for router in routers: - router_id = router['id'] - # clean up other objects related to this router - if plugin.metadata_proxy_handler: - plugin.metadata_proxy_handler.cleanup_router_edge( + # Go over all the relevant routers + for router in routers: + router_id = router['id'] + # clean up other objects related to this router + if plugin.metadata_proxy_handler: + plugin.metadata_proxy_handler.cleanup_router_edge( + context, router_id) + + # attach the router to a new edge + appliance_size = router.get(routersize.ROUTER_SIZE) + router_driver.attach_router(context, router_id, + {'router': router}, + appliance_size=appliance_size) + # find out who is the new edge to print it + new_edge_id = router_driver._get_edge_id_or_raise( context, router_id) - - # attach the router to a new edge - appliance_size = router.get(routersize.ROUTER_SIZE) - router_driver.attach_router(context, router_id, - {'router': router}, - appliance_size=appliance_size) - # find out who is the new edge to print it - new_edge_id = router_driver._get_edge_id_or_raise(context, router_id) - LOG.info(_LI("Router %(router)s was attached to edge %(edge)s"), - {'router': router_id, 'edge': new_edge_id}) + LOG.info(_LI("Router %(router)s was attached to edge %(edge)s"), + {'router': router_id, 'edge': new_edge_id}) registry.subscribe(nsx_recreate_router_edge, diff --git a/vmware_nsx/shell/admin/plugins/nsxv/resources/securitygroups.py b/vmware_nsx/shell/admin/plugins/nsxv/resources/securitygroups.py index cc8c2e3038..844ea52eef 100644 --- a/vmware_nsx/shell/admin/plugins/nsxv/resources/securitygroups.py +++ b/vmware_nsx/shell/admin/plugins/nsxv/resources/securitygroups.py @@ -86,16 +86,17 @@ class NeutronSecurityGroupDB(utils.NeutronDbClient, self.context.session.delete(sg_mapping) def get_vnics_in_security_group(self, security_group_id): - vnics = [] - query = self.context.session.query( - models_v2.Port.id, models_v2.Port.device_id - ).join(sg_models.SecurityGroupPortBinding).filter_by( - security_group_id=security_group_id).all() - for p in query: - vnic_index = plugin._get_port_vnic_index(self.context, p.id) - vnic_id = plugin._get_port_vnic_id(vnic_index, p.device_id) - vnics.append(vnic_id) - return vnics + with utils.NsxVPluginWrapper() as plugin: + vnics = [] + query = self.context.session.query( + models_v2.Port.id, models_v2.Port.device_id + ).join(sg_models.SecurityGroupPortBinding).filter_by( + security_group_id=security_group_id).all() + for p in query: + vnic_index = plugin._get_port_vnic_index(self.context, p.id) + vnic_id = plugin._get_port_vnic_id(vnic_index, p.device_id) + vnics.append(vnic_id) + return vnics class NsxFirewallAPI(object): @@ -104,6 +105,8 @@ class NsxFirewallAPI(object): def list_security_groups(self): h, secgroups = self.vcns.list_security_groups() + if not secgroups: + return [] root = et.fromstring(secgroups) secgroups = [] for sg in root.iter('securitygroup'): @@ -117,6 +120,8 @@ class NsxFirewallAPI(object): def list_fw_sections(self): h, firewall_config = self.vcns.get_dfw_config() + if not firewall_config: + return [] root = et.fromstring(firewall_config) sections = [] for sec in root.iter('section'): @@ -131,6 +136,10 @@ class NsxFirewallAPI(object): def reorder_fw_sections(self): # read all the sections h, firewall_config = self.vcns.get_dfw_config() + if not firewall_config: + LOG.info(_LI("No firewall sections were found.")) + return + root = et.fromstring(firewall_config) for child in root: @@ -163,7 +172,6 @@ class NsxFirewallAPI(object): neutron_sg = NeutronSecurityGroupDB() nsxv_firewall = NsxFirewallAPI() -plugin = utils.NsxVPluginWrapper() def _log_info(resource, data, attrs=['name', 'id']): @@ -263,29 +271,33 @@ def fix_security_groups(resource, event, trigger, **kwargs): context_ = context.get_admin_context() sgs_with_missing_section = _find_missing_sections() sgs_with_missing_nsx_group = _find_missing_security_groups() - plugin = utils.NsxVPluginWrapper() - # If only the fw section is missing then create it. - for sg_id in (set(sgs_with_missing_section.keys()) - - set(sgs_with_missing_nsx_group.keys())): - neutron_sg.delete_security_group_section_mapping(sg_id) - secgroup = plugin.get_security_group(context_, sg_id) - plugin._create_fw_section_for_security_group( - context_, secgroup, - sgs_with_missing_section[sg_id]['nsx-securitygroup-id']) + if not sgs_with_missing_section and not sgs_with_missing_nsx_group: + # no mismatches + return - # If nsx security-group is missing then create both nsx security-group and - # a new fw section (remove old one). - for sg_id, sg in sgs_with_missing_nsx_group.items(): - secgroup = plugin.get_security_group(context_, sg_id) - if sg_id not in sgs_with_missing_section: - plugin._delete_section(sg['section-uri']) - neutron_sg.delete_security_group_section_mapping(sg_id) - neutron_sg.delete_security_group_backend_mapping(sg_id) - plugin._process_security_group_create_backend_resources(context_, - secgroup) - nsx_id = nsx_db.get_nsx_security_group_id(context_.session, sg_id) - for vnic_id in neutron_sg.get_vnics_in_security_group(sg_id): - plugin._add_member_to_security_group(nsx_id, vnic_id) + with utils.NsxVPluginWrapper() as plugin: + # If only the fw section is missing then create it. + for sg_id in (set(sgs_with_missing_section.keys()) - + set(sgs_with_missing_nsx_group.keys())): + neutron_sg.delete_security_group_section_mapping(sg_id) + secgroup = plugin.get_security_group(context_, sg_id) + plugin._create_fw_section_for_security_group( + context_, secgroup, + sgs_with_missing_section[sg_id]['nsx-securitygroup-id']) + + # If nsx security-group is missing then create both nsx security-group + # and a new fw section (remove old one). + for sg_id, sg in sgs_with_missing_nsx_group.items(): + secgroup = plugin.get_security_group(context_, sg_id) + if sg_id not in sgs_with_missing_section: + plugin._delete_section(sg['section-uri']) + neutron_sg.delete_security_group_section_mapping(sg_id) + neutron_sg.delete_security_group_backend_mapping(sg_id) + plugin._process_security_group_create_backend_resources(context_, + secgroup) + nsx_id = nsx_db.get_nsx_security_group_id(context_.session, sg_id) + for vnic_id in neutron_sg.get_vnics_in_security_group(sg_id): + plugin._add_member_to_security_group(nsx_id, vnic_id) @admin_utils.output_header @@ -309,55 +321,57 @@ def migrate_sg_to_policy(resource, event, trigger, **kwargs): # validate that the security group exist and contains rules and no policy context_ = context.get_admin_context() - plugin = utils.NsxVPluginWrapper() - try: - secgroup = plugin.get_security_group(context_, sg_id) - except ext_sg.SecurityGroupNotFound: - LOG.error(_LE("Security group %s was not found"), sg_id) - return - if secgroup.get('policy'): - LOG.error(_LE("Security group %s already uses a policy"), sg_id) - return - - # validate that the policy exists - if not plugin.nsx_v.vcns.validate_inventory(policy_id): - LOG.error(_LE("NSX policy %s was not found"), policy_id) - return - - # Delete the rules from the security group - LOG.info(_LI("Deleting the rules of security group: %s"), sg_id) - for rule in secgroup.get('security_group_rules', []): + with utils.NsxVPluginWrapper() as plugin: try: - plugin.delete_security_group_rule(context_, rule['id']) + secgroup = plugin.get_security_group(context_, sg_id) + except ext_sg.SecurityGroupNotFound: + LOG.error(_LE("Security group %s was not found"), sg_id) + return + if secgroup.get('policy'): + LOG.error(_LE("Security group %s already uses a policy"), sg_id) + return + + # validate that the policy exists + if not plugin.nsx_v.vcns.validate_inventory(policy_id): + LOG.error(_LE("NSX policy %s was not found"), policy_id) + return + + # Delete the rules from the security group + LOG.info(_LI("Deleting the rules of security group: %s"), sg_id) + for rule in secgroup.get('security_group_rules', []): + try: + plugin.delete_security_group_rule(context_, rule['id']) + except Exception as e: + LOG.warning(_LW("Failed to delete rule %(r)s from security " + "group %(sg)s: %(e)s"), + {'r': rule['id'], 'sg': sg_id, 'e': e}) + # continue anyway + + # Delete the security group FW section + LOG.info(_LI("Deleting the section of security group: %s"), sg_id) + try: + section_uri = plugin._get_section_uri(context_.session, sg_id) + plugin._delete_section(section_uri) + nsxv_db.delete_neutron_nsx_section_mapping( + context_.session, sg_id) except Exception as e: - LOG.warning(_LW("Failed to delete rule %(r)s from security group " - "%(sg)s: %(e)s"), - {'r': rule['id'], 'sg': sg_id, 'e': e}) + LOG.warning(_LW("Failed to delete firewall section of security " + "group %(sg)s: %(e)s"), + {'sg': sg_id, 'e': e}) # continue anyway - # Delete the security group FW section - LOG.info(_LI("Deleting the section of security group: %s"), sg_id) - try: - section_uri = plugin._get_section_uri(context_.session, sg_id) - plugin._delete_section(section_uri) - nsxv_db.delete_neutron_nsx_section_mapping(context_.session, sg_id) - except Exception as e: - LOG.warning(_LW("Failed to delete firewall section of security group " - "%(sg)s: %(e)s"), - {'sg': sg_id, 'e': e}) - # continue anyway + # bind this security group to the policy in the backend and DB + nsx_sg_id = nsx_db.get_nsx_security_group_id(context_.session, sg_id) + LOG.info(_LI("Binding the NSX security group %(nsx)s to policy " + "%(pol)s"), + {'nsx': nsx_sg_id, 'pol': policy_id}) + plugin._update_nsx_security_group_policies( + policy_id, None, nsx_sg_id) + prop = plugin._get_security_group_properties(context_, sg_id) + with context_.session.begin(subtransactions=True): + prop.update({sg_policy.POLICY: policy_id}) - # bind this security group to the policy in the backend and DB - nsx_sg_id = nsx_db.get_nsx_security_group_id(context_.session, sg_id) - LOG.info(_LI("Binding the NSX security group %(nsx)s to policy %(pol)s"), - {'nsx': nsx_sg_id, 'pol': policy_id}) - plugin._update_nsx_security_group_policies( - policy_id, None, nsx_sg_id) - prop = plugin._get_security_group_properties(context_, sg_id) - with context_.session.begin(subtransactions=True): - prop.update({sg_policy.POLICY: policy_id}) - - LOG.info(_LI("Done.")) + LOG.info(_LI("Done.")) registry.subscribe(migrate_sg_to_policy, diff --git a/vmware_nsx/shell/admin/plugins/nsxv/resources/utils.py b/vmware_nsx/shell/admin/plugins/nsxv/resources/utils.py index 8a058ade26..3728265417 100644 --- a/vmware_nsx/shell/admin/plugins/nsxv/resources/utils.py +++ b/vmware_nsx/shell/admin/plugins/nsxv/resources/utils.py @@ -12,16 +12,21 @@ # License for the specific language governing permissions and limitations # under the License. +import time from oslo_config import cfg +from oslo_log import log as logging from neutron import context as neutron_context from neutron.db import common_db_mixin as common_db +from vmware_nsx._i18n import _LW from vmware_nsx.common import config from vmware_nsx import plugin from vmware_nsx.plugins.nsx_v.vshield import vcns +LOG = logging.getLogger(__name__) + def get_nsxv_client(): return vcns.Vcns( @@ -55,6 +60,43 @@ class NsxVPluginWrapper(plugin.NsxVPlugin): # skip getting the Qos policy ID because get_object calls # plugin init again on admin-util environment + def count_spawn_jobs(self): + # check if there are any spawn jobs running + return self.edge_manager._get_worker_pool().running() + + # Define enter & exit to be used in with statements + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + """Wait until no more jobs are pending + + We want to wait until all spawn edge creation are done, or else the + edges might be in PERNDING_CREATE state in the nsx DB + """ + if not self.count_spawn_jobs(): + return + + LOG.warning(_LW("Waiting for plugin jobs to finish properly...")) + sleep_time = 1 + print_time = 20 + max_loop = 600 + for print_index in range(1, max_loop): + n_jobs = self.count_spawn_jobs() + if n_jobs > 0: + if (print_index % print_time) == 0: + LOG.warning(_LW("Still Waiting on %(jobs)s " + "job%(plural)s"), + {'jobs': n_jobs, + 'plural': 's' if n_jobs > 1 else ''}) + time.sleep(sleep_time) + else: + LOG.warning(_LW("Done.")) + return + + LOG.warning(_LW("Sorry. Waited for too long. Some jobs are still " + "running.")) + def get_nsxv_backend_edges(): """Get a list of all the backend edges and some of their attributes diff --git a/vmware_nsx/tests/unit/shell/test_admin_utils.py b/vmware_nsx/tests/unit/shell/test_admin_utils.py index 2403ed0ceb..80c1d40e5d 100644 --- a/vmware_nsx/tests/unit/shell/test_admin_utils.py +++ b/vmware_nsx/tests/unit/shell/test_admin_utils.py @@ -118,7 +118,7 @@ class TestNsxvAdminUtils(AbstractTestAdminUtils, # This is an example how to test a specific utility with arguments def test_with_args(self): args = {'property': ["xxx=yyy"]} - self._test_resource('networks', 'list', **args) + self._test_resource('security-groups', 'fix-mismatch', **args) def test_bad_args(self): args = {'property': ["xxx"]}