From fb8085894b69f56091bde19683a919cb15d502cc Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Wed, 7 Feb 2018 17:07:47 +0100 Subject: [PATCH] Fix AZ not matching backend after migration When we migrate, or retype with migration, a volume between availability zones we are setting the AZ of the migrated volume to the same AZ as the source volume, so we end up with a volume where the AZ does not match the backend's AZ. This patch fixes this issue for the generic migration code as well as the optimized driver migration. Change-Id: Ia1bf3ae0b7cd6d209354024848aef6029179d8e4 Closes-Bug: #1747949 --- cinder/tests/unit/utils.py | 1 + .../unit/volume/test_volume_migration.py | 61 ++++++++++++++++++ cinder/volume/manager.py | 64 ++++++++----------- ...x-cross-az-migration-ce97eff61280e1c7.yaml | 6 ++ 4 files changed, 95 insertions(+), 37 deletions(-) create mode 100644 releasenotes/notes/fix-cross-az-migration-ce97eff61280e1c7.yaml diff --git a/cinder/tests/unit/utils.py b/cinder/tests/unit/utils.py index b28b9319950..1938a134567 100644 --- a/cinder/tests/unit/utils.py +++ b/cinder/tests/unit/utils.py @@ -483,6 +483,7 @@ def default_service_values(): 'topic': 'fake_topic', 'report_count': 3, 'disabled': False, + 'availability_zone': 'nova', } diff --git a/cinder/tests/unit/volume/test_volume_migration.py b/cinder/tests/unit/volume/test_volume_migration.py index 5b939466f54..a6a417d0b4c 100644 --- a/cinder/tests/unit/volume/test_volume_migration.py +++ b/cinder/tests/unit/volume/test_volume_migration.py @@ -24,6 +24,7 @@ from oslo_concurrency import processutils from oslo_config import cfg from oslo_utils import imageutils +from cinder.common import constants from cinder import context from cinder import db from cinder import exception @@ -75,6 +76,9 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase): autospec=True) self._clear_patch.start() self.expected_status = 'available' + self._service = tests_utils.create_service( + self.context, + values={'host': 'newhost', 'binary': constants.VOLUME_BINARY}) def tearDown(self): super(VolumeMigrationTestCase, self).tearDown() @@ -99,6 +103,28 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase): self.assertEqual('newhost', volume.host) self.assertEqual('success', volume.migration_status) + def test_migrate_volume_driver_cross_az(self): + """Test volume migration done by driver.""" + # Mock driver and rpc functions + self.mock_object(self.volume.driver, 'migrate_volume', + lambda x, y, z, new_type_id=None: ( + True, {'user_id': fake.USER_ID})) + dst_az = 'AZ2' + db.service_update(self.context, self._service.id, + {'availability_zone': dst_az}) + + volume = tests_utils.create_volume(self.context, size=0, + host=CONF.host, + migration_status='migrating') + host_obj = {'host': 'newhost', 'capabilities': {}} + self.volume.migrate_volume(self.context, volume, host_obj, False) + + # check volume properties + volume.refresh() + self.assertEqual('newhost', volume.host) + self.assertEqual('success', volume.migration_status) + self.assertEqual(dst_az, volume.availability_zone) + def _fake_create_volume(self, ctxt, volume, req_spec, filters, allow_reschedule=True): return db.volume_update(ctxt, volume['id'], @@ -154,6 +180,41 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase): self.context, volume, new_volume_obj, error=False) self.assertFalse(update_server_volume.called) + @mock.patch('cinder.compute.API') + @mock.patch('cinder.volume.manager.VolumeManager.' + 'migrate_volume_completion') + def test_migrate_volume_generic_cross_az(self, migrate_volume_completion, + nova_api): + """Test that we set the right AZ in cross AZ migrations.""" + original_create = objects.Volume.create + dst_az = 'AZ2' + db.service_update(self.context, self._service.id, + {'availability_zone': dst_az}) + + def my_create(self, *args, **kwargs): + self.status = 'available' + original_create(self, *args, **kwargs) + + volume = tests_utils.create_volume(self.context, size=1, + host=CONF.host) + + host_obj = {'host': 'newhost', 'capabilities': {}} + create_vol = self.patch('cinder.objects.Volume.create', + side_effect=my_create, autospec=True) + + with mock.patch.object(self.volume, '_copy_volume_data') as copy_mock: + self.volume._migrate_volume_generic(self.context, volume, host_obj, + None) + copy_mock.assert_called_with(self.context, volume, mock.ANY, + remote='dest') + migrate_volume_completion.assert_called_with( + self.context, volume, mock.ANY, error=False) + + nova_api.return_value.update_server_volume.assert_not_called() + + self.assertEqual(dst_az, + create_vol.call_args[0][0]['availability_zone']) + @mock.patch('cinder.compute.API') @mock.patch('cinder.volume.manager.VolumeManager.' 'migrate_volume_completion') diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 765e5f0d7b9..d6fecd446c8 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -198,6 +198,12 @@ class VolumeManager(manager.CleanableManager, 'attach_status', 'migration_status', 'volume_type', 'consistencygroup', 'volume_attachment', 'group'} + def _get_service(self, host=None, binary=constants.VOLUME_BINARY): + host = host or self.host + ctxt = context.get_admin_context() + svc_host = vol_utils.extract_host(host, 'backend') + return objects.Service.get_by_args(ctxt, svc_host, binary) + def __init__(self, volume_driver=None, service_name=None, *args, **kwargs): """Load the driver from the one specified in args, or from flags.""" @@ -227,12 +233,8 @@ class VolumeManager(manager.CleanableManager, # We pass the current setting for service.active_backend_id to # the driver on init, in case there was a restart or something curr_active_backend_id = None - svc_host = vol_utils.extract_host(self.host, 'backend') try: - service = objects.Service.get_by_args( - context.get_admin_context(), - svc_host, - constants.VOLUME_BINARY) + service = self._get_service() except exception.ServiceNotFound: # NOTE(jdg): This is to solve problems with unit tests LOG.info("Service not found for updating " @@ -530,12 +532,8 @@ class VolumeManager(manager.CleanableManager, return stats = self.driver.get_volume_stats(refresh=True) - svc_host = vol_utils.extract_host(self.host, 'backend') try: - service = objects.Service.get_by_args( - context.get_admin_context(), - svc_host, - constants.VOLUME_BINARY) + service = self._get_service() except exception.ServiceNotFound: with excutils.save_and_reraise_exception(): LOG.error("Service not found for updating replication_status.") @@ -2068,8 +2066,10 @@ class VolumeManager(manager.CleanableManager, # Create new volume on remote host tmp_skip = {'snapshot_id', 'source_volid'} - skip = self._VOLUME_CLONE_SKIP_PROPERTIES | tmp_skip | {'host', - 'cluster_name'} + skip = {'host', 'cluster_name', 'availability_zone'} + skip.update(tmp_skip) + skip.update(self._VOLUME_CLONE_SKIP_PROPERTIES) + new_vol_values = {k: volume[k] for k in set(volume.keys()) - skip} if new_type_id: new_vol_values['volume_type_id'] = new_type_id @@ -2079,9 +2079,11 @@ class VolumeManager(manager.CleanableManager, ctxt, self.key_manager, new_type_id) new_vol_values['encryption_key_id'] = encryption_key_id + dst_service = self._get_service(backend['host']) new_volume = objects.Volume( context=ctxt, host=backend['host'], + availability_zone=dst_service.availability_zone, cluster_name=backend.get('cluster_name'), status='creating', attach_status=fields.VolumeAttachStatus.DETACHED, @@ -2368,10 +2370,14 @@ class VolumeManager(manager.CleanableManager, volume, host) if moved: - updates = {'host': host['host'], - 'cluster_name': host.get('cluster_name'), - 'migration_status': 'success', - 'previous_status': volume.status} + dst_service = self._get_service(host['host']) + updates = { + 'host': host['host'], + 'cluster_name': host.get('cluster_name'), + 'migration_status': 'success', + 'availability_zone': dst_service.availability_zone, + 'previous_status': volume.status, + } if status_update: updates.update(status_update) if model_update: @@ -2404,13 +2410,9 @@ class VolumeManager(manager.CleanableManager, # value isn't set (we didn't restart services), so we'll go ahead # and make this a part of the service periodic if not self.service_uuid: - svc_host = vol_utils.extract_host(self.host, 'backend') # We hack this with a try/except for unit tests temporarily try: - service = objects.Service.get_by_args( - context, - svc_host, - constants.VOLUME_BINARY) + service = self._get_service() self.service_uuid = service.uuid except exception.ServiceNotFound: LOG.warning("Attempt to update service_uuid " @@ -3994,9 +3996,7 @@ class VolumeManager(manager.CleanableManager, updates = {} repl_status = fields.ReplicationStatus - svc_host = vol_utils.extract_host(self.host, 'backend') - service = objects.Service.get_by_args(context, svc_host, - constants.VOLUME_BINARY) + service = self._get_service() # TODO(geguileo): We should optimize these updates by doing them # directly on the DB with just 3 queries, one to change the volumes @@ -4168,9 +4168,7 @@ class VolumeManager(manager.CleanableManager, doing the failover of the volumes after finished processing the volumes. """ - svc_host = vol_utils.extract_host(self.host, 'backend') - service = objects.Service.get_by_args(context, svc_host, - constants.VOLUME_BINARY) + service = self._get_service() service.update(updates) try: self.driver.failover_completed(context, service.active_backend_id) @@ -4206,12 +4204,8 @@ class VolumeManager(manager.CleanableManager, LOG.warning('Error encountered on Cinder backend during ' 'freeze operation, service is frozen, however ' 'notification to driver has failed.') - svc_host = vol_utils.extract_host(self.host, 'backend') - service = objects.Service.get_by_args( - context, - svc_host, - constants.VOLUME_BINARY) + service = self._get_service() service.disabled = True service.disabled_reason = "frozen" service.save() @@ -4239,12 +4233,8 @@ class VolumeManager(manager.CleanableManager, LOG.error('Error encountered on Cinder backend during ' 'thaw operation, service will remain frozen.') return False - svc_host = vol_utils.extract_host(self.host, 'backend') - service = objects.Service.get_by_args( - context, - svc_host, - constants.VOLUME_BINARY) + service = self._get_service() service.disabled = False service.disabled_reason = "" service.save() diff --git a/releasenotes/notes/fix-cross-az-migration-ce97eff61280e1c7.yaml b/releasenotes/notes/fix-cross-az-migration-ce97eff61280e1c7.yaml new file mode 100644 index 00000000000..3f1088df1da --- /dev/null +++ b/releasenotes/notes/fix-cross-az-migration-ce97eff61280e1c7.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Resolve issue with cross AZ migrations and retypes where the destination + volume kept the source volume's AZ, so we ended up with a volume where the + AZ does not match the backend. (bug 1747949)