From a1aa15282ea91eafb4afe476336949aec965add2 Mon Sep 17 00:00:00 2001 From: Lingxian Kong Date: Wed, 28 Aug 2019 22:11:27 +1200 Subject: [PATCH] Support management security group Allow the cloud admin to control the security groups on the management port of Trove instance, a new config option `management_security_groups` is introduced for that purpose. Change-Id: I4b22b87d37792be700d4ec7f78a7ea479ddb5814 Story: 2006466 Task: 36395 --- devstack/plugin.sh | 85 +++++++------- devstack/settings | 1 + doc/source/admin/trovestack.rst | 8 ++ integration/scripts/conf/test_begin.conf | 1 + .../train-02-management-security-group.yaml | 11 ++ trove/common/cfg.py | 8 +- trove/instance/tasks.py | 3 + trove/taskmanager/models.py | 104 ++++++++++++++---- trove/tests/scenario/runners/test_runners.py | 34 +++--- .../unittests/taskmanager/test_models.py | 55 +++++++++ 10 files changed, 227 insertions(+), 83 deletions(-) create mode 100644 releasenotes/notes/train-02-management-security-group.yaml diff --git a/devstack/plugin.sh b/devstack/plugin.sh index b60bf4ba4b..0908ee2784 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -186,28 +186,6 @@ function _config_trove_apache_wsgi { tail_log trove-api /var/log/${APACHE_NAME}/trove-api.log } -function _config_nova_keypair { - export SSH_DIR=${SSH_DIR:-"$HOME/.ssh"} - - if [[ ! -f ${SSH_DIR}/id_rsa.pub ]]; then - mkdir -p ${SSH_DIR} - /usr/bin/ssh-keygen -f ${SSH_DIR}/id_rsa -q -N "" - # This is to allow guest agent ssh into the controller in dev mode. - cat ${SSH_DIR}/id_rsa.pub >> ${SSH_DIR}/authorized_keys - else - # This is to allow guest agent ssh into the controller in dev mode. - cat ${SSH_DIR}/id_rsa.pub >> ${SSH_DIR}/authorized_keys - sort ${SSH_DIR}/authorized_keys | uniq > ${SSH_DIR}/authorized_keys.uniq - mv ${SSH_DIR}/authorized_keys.uniq ${SSH_DIR}/authorized_keys - chmod 600 ${SSH_DIR}/authorized_keys - fi - - echo "Creating Trove management keypair ${TROVE_MGMT_KEYPAIR_NAME}" - openstack --os-region-name RegionOne --os-password ${SERVICE_PASSWORD} --os-project-name service --os-username trove \ - keypair create --public-key ${SSH_DIR}/id_rsa.pub ${TROVE_MGMT_KEYPAIR_NAME} - - iniset $TROVE_CONF DEFAULT nova_keypair ${TROVE_MGMT_KEYPAIR_NAME} -} # configure_trove() - Set config files, create data dirs, etc function configure_trove { setup_develop $TROVE_DIR @@ -477,15 +455,6 @@ function finalize_trove_network { mgmt_net_id=$(openstack network show ${TROVE_MGMT_NETWORK_NAME} -c id -f value) echo "Created Trove management network ${TROVE_MGMT_NETWORK_NAME}(${mgmt_net_id})" - # Create security group for trove management network. For testing purpose, - # we allow everything. In production, the security group should be managed - # by the cloud admin. - SG_NAME=trove-mgmt - openstack security group create --project ${trove_service_project_id} ${SG_NAME} - openstack security group rule create --proto icmp --project ${trove_service_project_id} ${SG_NAME} - openstack security group rule create --protocol tcp --dst-port 1:65535 --project ${trove_service_project_id} ${SG_NAME} - openstack security group rule create --protocol udp --dst-port 1:65535 --project ${trove_service_project_id} ${SG_NAME} - # Share the private network to other projects for testing purpose. We make # the private network accessible to control plane below so that we could # reach the private network for integration tests without floating ips @@ -501,7 +470,7 @@ function finalize_trove_network { # recommended to config the router in the cloud infrastructure for the # communication between Trove control plane and service VMs. INTERFACE=trove-mgmt - MGMT_PORT_ID=$(openstack port create --project ${trove_service_project_id} --security-group ${SG_NAME} --device-owner trove --network ${TROVE_MGMT_NETWORK_NAME} --host=$(hostname) -c id -f value ${INTERFACE}-port) + MGMT_PORT_ID=$(openstack port create --project ${trove_service_project_id} --security-group ${TROVE_MGMT_SECURITY_GROUP} --device-owner trove --network ${TROVE_MGMT_NETWORK_NAME} --host=$(hostname) -c id -f value ${INTERFACE}-port) MGMT_PORT_MAC=$(openstack port show -c mac_address -f value $MGMT_PORT_ID) MGMT_PORT_IP=$(openstack port show -f value -c fixed_ips $MGMT_PORT_ID | awk '{FS=",| "; gsub(",",""); gsub("'\''",""); for(i = 1; i <= NF; ++i) {if ($i ~ /^ip_address/) {n=index($i, "="); if (substr($i, n+1) ~ "\\.") print substr($i, n+1)}}}') sudo ovs-vsctl -- --may-exist add-port ${OVS_BRIDGE:-br-int} $INTERFACE -- set Interface $INTERFACE type=internal -- set Interface $INTERFACE external-ids:iface-status=active -- set Interface $INTERFACE external-ids:attached-mac=$MGMT_PORT_MAC -- set Interface $INTERFACE external-ids:iface-id=$MGMT_PORT_ID -- set Interface $INTERFACE external-ids:skip_cleanup=true @@ -697,6 +666,45 @@ function _setup_minimal_image { fi } +function _config_nova_keypair { + export SSH_DIR=${SSH_DIR:-"$HOME/.ssh"} + + if [[ ! -f ${SSH_DIR}/id_rsa.pub ]]; then + mkdir -p ${SSH_DIR} + /usr/bin/ssh-keygen -f ${SSH_DIR}/id_rsa -q -N "" + # This is to allow guest agent ssh into the controller in dev mode. + cat ${SSH_DIR}/id_rsa.pub >> ${SSH_DIR}/authorized_keys + else + # This is to allow guest agent ssh into the controller in dev mode. + cat ${SSH_DIR}/id_rsa.pub >> ${SSH_DIR}/authorized_keys + sort ${SSH_DIR}/authorized_keys | uniq > ${SSH_DIR}/authorized_keys.uniq + mv ${SSH_DIR}/authorized_keys.uniq ${SSH_DIR}/authorized_keys + chmod 600 ${SSH_DIR}/authorized_keys + fi + + echo "Creating Trove management keypair ${TROVE_MGMT_KEYPAIR_NAME}" + openstack --os-region-name RegionOne --os-password ${SERVICE_PASSWORD} --os-project-name service --os-username trove \ + keypair create --public-key ${SSH_DIR}/id_rsa.pub ${TROVE_MGMT_KEYPAIR_NAME} + + iniset $TROVE_CONF DEFAULT nova_keypair ${TROVE_MGMT_KEYPAIR_NAME} +} + +function _config_mgmt_security_group { + local sgid + + echo "Creating Trove management security group." + sgid=$(openstack --os-region-name RegionOne --os-password ${SERVICE_PASSWORD} --os-project-name service --os-username trove security group create ${TROVE_MGMT_SECURITY_GROUP} -f value -c id) + + # Allow ICMP + openstack --os-region-name RegionOne --os-password ${SERVICE_PASSWORD} --os-project-name service --os-username trove \ + security group rule create --proto icmp $sgid + # Allow SSH + openstack --os-region-name RegionOne --os-password ${SERVICE_PASSWORD} --os-project-name service --os-username trove \ + security group rule create --protocol tcp --dst-port 22 $sgid + + iniset $TROVE_CONF DEFAULT management_security_groups $sgid +} + # Dispatcher for trove plugin if is_service_enabled trove; then if [[ "$1" == "stack" && "$2" == "install" ]]; then @@ -714,17 +722,12 @@ if is_service_enabled trove; then # Initialize trove init_trove + _config_nova_keypair + _config_mgmt_security_group + # finish the last step in trove network configuration echo_summary "Finalizing Trove Network Configuration" - - if is_service_enabled neutron; then - echo "finalize_trove_network: Neutron is enabled." - finalize_trove_network - else - echo "finalize_trove_network: Neutron is not enabled. Nothing to do." - fi - - _config_nova_keypair + finalize_trove_network # Start the trove API and trove taskmgr components echo_summary "Starting Trove" diff --git a/devstack/settings b/devstack/settings index f0636da62b..9e650e67f7 100644 --- a/devstack/settings +++ b/devstack/settings @@ -59,6 +59,7 @@ else fi TROVE_SHARE_NETWORKS=$(trueorfalse TRUE TROVE_SHARE_NETWORKS) TROVE_MGMT_KEYPAIR_NAME=${TROVE_MGMT_KEYPAIR_NAME:-"trove-mgmt"} +TROVE_MGMT_SECURITY_GROUP=${TROVE_MGMT_SECURITY_GROUP:-"trove-mgmt"} # Support entry points installation of console scripts if [[ -d $TROVE_DIR/bin ]]; then diff --git a/doc/source/admin/trovestack.rst b/doc/source/admin/trovestack.rst index a22b1332f2..942535ab45 100644 --- a/doc/source/admin/trovestack.rst +++ b/doc/source/admin/trovestack.rst @@ -54,6 +54,14 @@ The trove guest agent image could be created by running the following command: image at the building time. Now ``dev_mode=false`` is still in experimental and not considered production ready yet. +* Some other global variables: + + * ``HOST_SCP_USERNAME``: only used in dev mode, this is the user name used by + guest agent to connect to the controller host, e.g. in devstack + environment, it should be the ``stack`` user. + * ``GUEST_WORKING_DIR``: The place to save the guest image, default value is + ``$HOME/images``. + For example, in order to build a MySQL image for Ubuntu Xenial operating system in development mode: diff --git a/integration/scripts/conf/test_begin.conf b/integration/scripts/conf/test_begin.conf index 0c5f6caf61..370eba6068 100644 --- a/integration/scripts/conf/test_begin.conf +++ b/integration/scripts/conf/test_begin.conf @@ -96,3 +96,4 @@ "instance_fault_1_eph_flavor_name": "test.eph.fault_1-1", "instance_fault_2_flavor_name": "test.fault_2-7", "instance_fault_2_eph_flavor_name": "test.eph.fault_2-7", + "instance_log_on_failure": false, diff --git a/releasenotes/notes/train-02-management-security-group.yaml b/releasenotes/notes/train-02-management-security-group.yaml new file mode 100644 index 0000000000..a1613688a9 --- /dev/null +++ b/releasenotes/notes/train-02-management-security-group.yaml @@ -0,0 +1,11 @@ +--- +features: + - The cloud admin is able to apply a security group to management port(with + purpose of communicating with control plane and other management tasks) of + the Trove instance, by setting the ``management_security_groups`` config + option. The cloud admin is responsible for managing the security group + rules. The security group and its rules need to be created before deploying + Trove. +upgrade: + - The management security group won't affect the Trove instances created + before upgrade. \ No newline at end of file diff --git a/trove/common/cfg.py b/trove/common/cfg.py index 22dde22e95..a0a6a6a6b9 100644 --- a/trove/common/cfg.py +++ b/trove/common/cfg.py @@ -283,7 +283,7 @@ common_opts = [ deprecated_name='hostname_require_ipv4'), cfg.BoolOpt('trove_security_groups_support', default=True, help='Whether Trove should add Security Groups on create.'), - cfg.StrOpt('trove_security_group_name_prefix', default='SecGroup', + cfg.StrOpt('trove_security_group_name_prefix', default='trove_sg', help='Prefix to use when creating Security Groups.'), cfg.StrOpt('trove_security_group_rule_cidr', default='0.0.0.0/0', help='CIDR to use when creating Security Group Rules.'), @@ -425,7 +425,11 @@ common_opts = [ deprecated_name='default_neutron_networks', help='List of IDs for management networks which should be ' 'attached to the instance regardless of what NICs ' - 'are specified in the create API call.'), + 'are specified in the create API call. Currently only ' + 'one management network is allowed.'), + cfg.ListOpt('management_security_groups', default=[], + help='List of the security group IDs that are applied on the ' + 'management port of the database instance.'), cfg.IntOpt('max_header_line', default=16384, help='Maximum line size of message headers to be accepted. ' 'max_header_line may need to be increased when using ' diff --git a/trove/instance/tasks.py b/trove/instance/tasks.py index 6ca913e1cc..2fd650e54b 100644 --- a/trove/instance/tasks.py +++ b/trove/instance/tasks.py @@ -118,6 +118,9 @@ class InstanceTasks(object): 'Build error: ' 'guestagent timeout.', is_error=True) + BUILDING_ERROR_PORT = InstanceTask(0x5c, 'BUILDING', + 'Build error: Management port.', + is_error=True) # Dissuade further additions at run-time. InstanceTask.__init__ = None diff --git a/trove/taskmanager/models.py b/trove/taskmanager/models.py index f4c8b8bc87..9e7a76c3cb 100755 --- a/trove/taskmanager/models.py +++ b/trove/taskmanager/models.py @@ -22,6 +22,7 @@ from eventlet.timeout import Timeout from novaclient import exceptions as nova_exceptions from oslo_log import log as logging from oslo_utils import netutils +import six from swiftclient.client import ClientException from trove.backup import models as bkup_models @@ -58,6 +59,7 @@ import trove.common.remote as remote from trove.common.remote import create_cinder_client from trove.common.remote import create_dns_client from trove.common.remote import create_guest_client +from trove.common.remote import create_neutron_client from trove.common import server_group as srv_grp from trove.common.strategies.cluster import strategy from trove.common import template @@ -466,6 +468,35 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): self.id, error_message, error_details, skip_delta=CONF.usage_sleep_time + 1) + def _create_management_port(self, network, default_sgs=[]): + """Create port in the management network.""" + security_groups = default_sgs + if len(CONF.management_security_groups) > 0: + security_groups = CONF.management_security_groups + + try: + neutron_client = create_neutron_client(self.context) + + body = { + 'port': { + 'name': 'trove-%s' % self.id, + 'description': ('Management port for Trove instance %s' + % self.id), + 'network_id': network, + 'admin_state_up': True, + 'security_groups': security_groups + } + } + port = neutron_client.create_port(body) + return port['port']['id'] + except Exception: + error = "Failed to create management port." + LOG.exception(error) + self.update_db( + task_status=inst_models.InstanceTasks.BUILDING_ERROR_PORT + ) + raise TroveError(message=error) + def create_instance(self, flavor, image_id, databases, users, datastore_manager, packages, volume_size, backup_id, availability_zone, root_password, nics, @@ -487,8 +518,20 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): err = inst_models.InstanceTasks.BUILDING_ERROR_SEC_GROUP self._log_and_raise(e, log_fmt, exc_fmt, self.id, err) else: - LOG.debug("Successfully created security group for " - "instance: %s", self.id) + LOG.info("Successfully created security group %s for " + "instance: %s", security_groups, self.id) + + if CONF.management_networks: + # The management network is always the last one + nics.pop(-1) + port_id = self._create_management_port( + CONF.management_networks[-1], + security_groups + ) + + LOG.info("Management port %s created for instance: %s", + port_id, self.id) + nics.append({"port-id": port_id}) files = self.get_injected_files(datastore_manager) cinder_volume_type = volume_type or CONF.cinder_volume_type @@ -502,7 +545,8 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): nics, files, cinder_volume_type, - scheduler_hints) + scheduler_hints + ) config = self._render_config(flavor) @@ -1035,30 +1079,42 @@ class BuiltInstanceTasks(BuiltInstance, NotifyMixin, ConfigurationMixin): LOG.debug("Stopping datastore on instance %s before deleting " "any resources.", self.id) self.guest.stop_db() - except Exception: - LOG.exception("Error stopping the datastore before attempting " - "to delete instance id %s.", self.id) + except Exception as e: + LOG.exception("Failed to stop the datastore before attempting " + "to delete instance id %s, error: %s", self.id, + six.text_type(e)) + try: self.server.delete() - except Exception as ex: - LOG.exception("Error during delete compute server %s", - self.server.id) + except Exception as e: + LOG.exception("Failed to delete compute server %s", self.server.id, + six.text_type(e)) + + try: + neutron_client = create_neutron_client(self.context) + ret = neutron_client.list_ports(name='trove-%s' % self.id) + if ret.get("ports", []): + neutron_client.delete_port(ret["ports"][0]["id"]) + except Exception as e: + LOG.error("Failed to delete management port of instance %s, " + "error: %s", self.id, six.text_type(e)) + try: dns_support = CONF.trove_dns_support LOG.debug("trove dns support = %s", dns_support) if dns_support: dns_api = create_dns_client(self.context) - dns_api.delete_instance_entry(instance_id=self.db_info.id) - except Exception as ex: - LOG.exception("Error during dns entry of instance %(id)s: " - "%(ex)s", {'id': self.db_info.id, 'ex': ex}) + dns_api.delete_instance_entry(instance_id=self.id) + except Exception as e: + LOG.error("Failed to delete dns entry of instance %s, error: %s", + self.id, six.text_type(e)) + try: srv_grp.ServerGroup.delete(self.context, self.server_group) - except Exception: - LOG.exception("Error during delete server group for %s", - self.id) + except Exception as e: + LOG.error("Failed to delete server group for %s, error: %s", + self.id, six.text_type(e)) - # Poll until the server is gone. def server_is_finished(): try: server = self.nova_client.servers.get(server_id) @@ -1075,11 +1131,11 @@ class BuiltInstanceTasks(BuiltInstance, NotifyMixin, ConfigurationMixin): utils.poll_until(server_is_finished, sleep_time=2, time_out=CONF.server_delete_time_out) except PollTimeOut: - LOG.exception("Failed to delete instance %(instance_id)s: " - "Timeout deleting compute server %(server_id)s", - {'instance_id': self.id, 'server_id': server_id}) + LOG.error("Failed to delete instance %(instance_id)s: " + "Timeout deleting compute server %(server_id)s", + {'instance_id': self.id, 'server_id': server_id}) - # If volume has been resized it must be manually removed in cinder + # If volume has been resized it must be manually removed try: if self.volume_id: volume_client = create_cinder_client(self.context, @@ -1089,9 +1145,9 @@ class BuiltInstanceTasks(BuiltInstance, NotifyMixin, ConfigurationMixin): LOG.info("Deleting volume %(v)s for instance: %(i)s.", {'v': self.volume_id, 'i': self.id}) volume.delete() - except Exception: - LOG.exception("Error deleting volume of instance %(id)s.", - {'id': self.db_info.id}) + except Exception as e: + LOG.error("Failed to delete volume of instance %s, error: %s", + self.id, six.text_type(e)) TroveInstanceDelete(instance=self, deleted_at=timeutils.isotime(deleted_at), diff --git a/trove/tests/scenario/runners/test_runners.py b/trove/tests/scenario/runners/test_runners.py index 221c7a60fe..5886dc71c5 100644 --- a/trove/tests/scenario/runners/test_runners.py +++ b/trove/tests/scenario/runners/test_runners.py @@ -260,22 +260,24 @@ class LogOnFail(type): report.log(msg_prefix + "Exception detected, " "but no instance IDs are registered to log.") - for inst_id in inst_ids: - try: - client.instances.get(inst_id) - except Exception as ex: - report.log(msg_prefix + "Error in instance show " - "for %s:\n%s" % (inst_id, ex)) - try: - log_gen = client.instances.log_generator( - inst_id, 'guest', - publish=True, lines=0, swift=None) - log_contents = "".join([chunk for chunk in log_gen()]) - report.log(msg_prefix + "Guest log for %s:\n%s" % - (inst_id, log_contents)) - except Exception as ex: - report.log(msg_prefix + "Error in guest log " - "retrieval for %s:\n%s" % (inst_id, ex)) + if CONFIG.instance_log_on_failure: + for inst_id in inst_ids: + try: + client.instances.get(inst_id) + except Exception as ex: + report.log("%s Error in instance show for %s:\n%s" + % (msg_prefix, inst_id, ex)) + try: + log_gen = client.instances.log_generator( + inst_id, 'guest', + publish=True, lines=0, swift=None) + log_contents = "".join( + [chunk for chunk in log_gen()]) + report.log("%s Guest log for %s:\n%s" % + (msg_prefix, inst_id, log_contents)) + except Exception as ex: + report.log("%s Error in guest log retrieval for " + "%s:\n%s" % (msg_prefix, inst_id, ex)) # Only report on the first error that occurs mcs.reset_inst_ids() diff --git a/trove/tests/unittests/taskmanager/test_models.py b/trove/tests/unittests/taskmanager/test_models.py index e91ef38392..cbe830f63d 100644 --- a/trove/tests/unittests/taskmanager/test_models.py +++ b/trove/tests/unittests/taskmanager/test_models.py @@ -406,6 +406,61 @@ class FreshInstanceTasksTest(BaseFreshInstanceTasksTest): 'mysql', mock_build_volume_info()['block_device'], None, None, mock_get_injected_files(), {'group': 'sg-id'}) + @patch.object(BaseInstance, 'update_db') + @patch.object(taskmanager_models.FreshInstanceTasks, '_create_dns_entry') + @patch.object(taskmanager_models.FreshInstanceTasks, 'get_injected_files') + @patch.object(taskmanager_models.FreshInstanceTasks, '_create_server') + @patch.object(taskmanager_models.FreshInstanceTasks, '_create_secgroup') + @patch.object(taskmanager_models.FreshInstanceTasks, '_build_volume_info') + @patch.object(taskmanager_models.FreshInstanceTasks, '_guest_prepare') + @patch.object(template, 'SingleInstanceConfigTemplate') + @patch( + "trove.taskmanager.models.FreshInstanceTasks._create_management_port" + ) + def test_create_instance_with_mgmt_port(self, + mock_create_mgmt_port, + mock_single_instance_template, + mock_guest_prepare, + mock_build_volume_info, + mock_create_secgroup, + mock_create_server, + mock_get_injected_files, + *args): + self.patch_conf_property('management_networks', ['fake-mgmt-uuid']) + mock_create_secgroup.return_value = ['fake-sg'] + mock_create_mgmt_port.return_value = 'fake-port-id' + + mock_flavor = {'id': 8, 'ram': 768, 'name': 'bigger_flavor'} + config_content = {'config_contents': 'some junk'} + mock_single_instance_template.return_value.config_contents = ( + config_content) + overrides = Mock() + + self.freshinstancetasks.create_instance( + mock_flavor, 'mysql-image-id', + None, None, 'mysql', + 'mysql-server', 2, + None, None, None, + [{'net-id': 'fake-net-uuid'}, {'net-id': 'fake-mgmt-uuid'}], + overrides, None, None, + 'volume_type', None, + {'group': 'sg-id'} + ) + + mock_create_secgroup.assert_called_with('mysql') + mock_create_mgmt_port.assert_called_once_with('fake-mgmt-uuid', + ['fake-sg']) + mock_build_volume_info.assert_called_with('mysql', volume_size=2, + volume_type='volume_type') + mock_guest_prepare.assert_called_with( + 768, mock_build_volume_info(), 'mysql-server', None, None, None, + config_content, None, overrides, None, None, None) + mock_create_server.assert_called_with( + 8, 'mysql-image-id', ['fake-sg'], + 'mysql', mock_build_volume_info()['block_device'], None, + [{'net-id': 'fake-net-uuid'}, {'port-id': 'fake-port-id'}], + mock_get_injected_files(), {'group': 'sg-id'}) + @patch.object(BaseInstance, 'update_db') @patch.object(taskmanager_models, 'create_cinder_client') @patch.object(taskmanager_models.FreshInstanceTasks, 'device_path',