diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 685c5386411..2af9c6a0a64 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -654,8 +654,14 @@ class ChunkedBackupDriver(driver.BackupDriver): self._finalize_backup(backup, container, object_meta, object_sha256) - def _restore_v1(self, backup, volume_id, metadata, volume_file): - """Restore a v1 volume backup.""" + def _restore_v1(self, backup, volume_id, metadata, volume_file, + requested_backup): + """Restore a v1 volume backup. + + Raises BackupRestoreCancel on any requested_backup status change, we + ignore the backup parameter for this check since that's only the + current data source from the list of backup sources. + """ backup_id = backup['id'] LOG.debug('v1 volume backup restore of %s started.', backup_id) extra_metadata = metadata.get('extra_metadata') @@ -676,6 +682,13 @@ class ChunkedBackupDriver(driver.BackupDriver): raise exception.InvalidBackup(reason=err) for metadata_object in metadata_objects: + # Abort when status changes to error, available, or anything else + with requested_backup.as_read_deleted(): + requested_backup.refresh() + if requested_backup.status != fields.BackupStatus.RESTORING: + raise exception.BackupRestoreCancel(back_id=backup.id, + vol_id=volume_id) + object_name, obj = list(metadata_object.items())[0] LOG.debug('restoring object. backup: %(backup_id)s, ' 'container: %(container)s, object name: ' @@ -722,7 +735,10 @@ class ChunkedBackupDriver(driver.BackupDriver): backup_id) def restore(self, backup, volume_id, volume_file): - """Restore the given volume backup from backup repository.""" + """Restore the given volume backup from backup repository. + + Raises BackupRestoreCancel on any backup status change. + """ backup_id = backup['id'] container = backup['container'] object_prefix = backup['service_metadata'] @@ -764,7 +780,7 @@ class ChunkedBackupDriver(driver.BackupDriver): backup1 = backup_list[index] index = index - 1 metadata = self._read_metadata(backup1) - restore_func(backup1, volume_id, metadata, volume_file) + restore_func(backup1, volume_id, metadata, volume_file, backup) volume_meta = metadata.get('volume_meta', None) try: diff --git a/cinder/backup/driver.py b/cinder/backup/driver.py index b4158e4f5e7..c7000425fd9 100644 --- a/cinder/backup/driver.py +++ b/cinder/backup/driver.py @@ -383,6 +383,9 @@ class BackupDriver(base.Base): starvation parameter volume_file will be a proxy that will execute all methods in native threads, so the method implementation doesn't need to worry about that.. + + May raise BackupRestoreCancel to indicate that the restoration of a + volume has been aborted by changing the backup status. """ return diff --git a/cinder/backup/manager.py b/cinder/backup/manager.py index b935f72c488..75b8472efeb 100644 --- a/cinder/backup/manager.py +++ b/cinder/backup/manager.py @@ -550,7 +550,10 @@ class BackupManager(manager.ThreadPoolManager): raise exception.InvalidBackup(reason=err) try: + canceled = False self._run_restore(context, backup, volume) + except exception.BackupRestoreCancel: + canceled = True except Exception: with excutils.save_and_reraise_exception(): self.db.volume_update(context, volume_id, @@ -558,12 +561,15 @@ class BackupManager(manager.ThreadPoolManager): backup.status = fields.BackupStatus.AVAILABLE backup.save() - self.db.volume_update(context, volume_id, {'status': 'available'}) + volume.status = 'error' if canceled else 'available' + volume.save() backup.status = fields.BackupStatus.AVAILABLE backup.save() - LOG.info('Restore backup finished, backup %(backup_id)s restored' - ' to volume %(volume_id)s.', - {'backup_id': backup.id, 'volume_id': volume_id}) + LOG.info('%(result)s restoring backup %(backup_id)s to volume ' + '%(volume_id)s.', + {'result': 'Canceled' if canceled else 'Finished', + 'backup_id': backup.id, + 'volume_id': volume_id}) self._notify_about_backup_usage(context, backup, "restore.end") def _run_restore(self, context, backup, volume): diff --git a/cinder/exception.py b/cinder/exception.py index fb48ebd6e79..ed8e50d3135 100644 --- a/cinder/exception.py +++ b/cinder/exception.py @@ -144,6 +144,10 @@ class BackupDriverException(CinderException): message = _("Backup driver reported an error: %(message)s") +class BackupRestoreCancel(CinderException): + message = _("Canceled backup %(back_id)s restore on volume %(vol_id)s") + + class GlanceConnectionFailed(CinderException): message = _("Connection to glance failed: %(reason)s") diff --git a/cinder/tests/unit/backup/drivers/test_backup_google.py b/cinder/tests/unit/backup/drivers/test_backup_google.py index 9e2eeddb1a8..f266112ab49 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_google.py +++ b/cinder/tests/unit/backup/drivers/test_backup_google.py @@ -125,6 +125,7 @@ class GoogleBackupDriverTestCase(test.TestCase): volume_id=_DEFAULT_VOLUME_ID, container=google_dr.CONF.backup_gcs_bucket, parent_id=None, + status=None, service_metadata=None): try: @@ -138,6 +139,7 @@ class GoogleBackupDriverTestCase(test.TestCase): 'parent_id': parent_id, 'user_id': fake.USER_ID, 'project_id': fake.PROJECT_ID, + 'status': status, 'service_metadata': service_metadata, } backup = objects.Backup(context=self.ctxt, **kwargs) @@ -476,7 +478,9 @@ class GoogleBackupDriverTestCase(test.TestCase): @gcs_client def test_restore(self): volume_id = 'c2a81f09-f480-4325-8424-00000071685b' - backup = self._create_backup_db_entry(volume_id=volume_id) + backup = self._create_backup_db_entry( + volume_id=volume_id, + status=objects.fields.BackupStatus.RESTORING) service = google_dr.GoogleBackupDriver(self.ctxt) with tempfile.NamedTemporaryFile() as volume_file: @@ -514,9 +518,11 @@ class GoogleBackupDriverTestCase(test.TestCase): self.volume_file.seek(20 * units.Ki) self.volume_file.write(os.urandom(units.Ki)) - deltabackup = self._create_backup_db_entry(volume_id=volume_id, - container=container_name, - parent_id=backup.id) + deltabackup = self._create_backup_db_entry( + volume_id=volume_id, + status=objects.fields.BackupStatus.RESTORING, + container=container_name, + parent_id=backup.id) self.volume_file.seek(0) service2 = google_dr.GoogleBackupDriver(self.ctxt) service2.backup(deltabackup, self.volume_file, True) diff --git a/cinder/tests/unit/backup/drivers/test_backup_nfs.py b/cinder/tests/unit/backup/drivers/test_backup_nfs.py index 5d1a887b01d..c4695d184e8 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_nfs.py +++ b/cinder/tests/unit/backup/drivers/test_backup_nfs.py @@ -160,7 +160,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): volume_id=_DEFAULT_VOLUME_ID, container='test-container', backup_id=fake.BACKUP_ID, - parent_id=None): + parent_id=None, + status=None): try: db.volume_get(self.ctxt, volume_id) @@ -174,6 +175,7 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): 'parent_id': parent_id, 'user_id': fake.USER_ID, 'project_id': fake.PROJECT_ID, + 'status': status, } return db.backup_create(self.ctxt, backup)['id'] @@ -638,6 +640,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): with tempfile.NamedTemporaryFile() as restored_file: backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.restore(backup, volume_id, restored_file) self.assertTrue(filecmp.cmp(self.volume_file.name, restored_file.name)) @@ -659,6 +663,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): with tempfile.NamedTemporaryFile() as restored_file: backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.restore(backup, volume_id, restored_file) self.assertTrue(filecmp.cmp(self.volume_file.name, restored_file.name)) @@ -679,6 +685,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): service = nfs.NFSBackupDriver(self.ctxt) self._write_effective_compression_file(file_size) backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.backup(backup, self.volume_file) with tempfile.NamedTemporaryFile() as restored_file: @@ -690,6 +698,70 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): self.assertNotEqual(threading.current_thread(), self.thread_dict['thread']) + def test_restore_abort_delta(self): + volume_id = fake.VOLUME_ID + count = set() + + def _fake_generate_object_name_prefix(self, backup): + az = 'az_fake' + backup_name = '%s_backup_%s' % (az, backup['id']) + volume = 'volume_%s' % (backup['volume_id']) + prefix = volume + '_' + backup_name + return prefix + + def my_refresh(): + # This refresh method will abort the backup after 1 chunk + count.add(len(count) + 1) + if len(count) == 2: + backup.status = objects.fields.BackupStatus.AVAILABLE + backup.save() + original_refresh() + + self.mock_object(nfs.NFSBackupDriver, + '_generate_object_name_prefix', + _fake_generate_object_name_prefix) + + self.flags(backup_file_size=(1024 * 8)) + self.flags(backup_sha_block_size_bytes=1024) + + container_name = self.temp_dir.replace(tempfile.gettempdir() + '/', + '', 1) + self._create_backup_db_entry(volume_id=volume_id, + container=container_name, + backup_id=fake.BACKUP_ID) + service = nfs.NFSBackupDriver(self.ctxt) + self.volume_file.seek(0) + backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + service.backup(backup, self.volume_file) + + # Create incremental backup with no change to contents + self.volume_file.seek(16 * 1024) + self.volume_file.write(os.urandom(1024)) + self.volume_file.seek(20 * 1024) + self.volume_file.write(os.urandom(1024)) + + self._create_backup_db_entry( + volume_id=volume_id, + status=objects.fields.BackupStatus.RESTORING, + container=container_name, + backup_id=fake.BACKUP2_ID, + parent_id=fake.BACKUP_ID) + self.volume_file.seek(0) + deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) + service.backup(deltabackup, self.volume_file, True) + deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) + + backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) + original_refresh = backup.refresh + + with tempfile.NamedTemporaryFile() as restored_file, \ + mock.patch('cinder.objects.Backup.refresh', + side_effect=my_refresh): + + self.assertRaises(exception.BackupRestoreCancel, + service.restore, backup, volume_id, + restored_file) + def test_restore_delta(self): volume_id = fake.VOLUME_ID @@ -723,10 +795,12 @@ class BackupNFSSwiftBasedTestCase(test.TestCase): self.volume_file.seek(20 * 1024) self.volume_file.write(os.urandom(1024)) - self._create_backup_db_entry(volume_id=volume_id, - container=container_name, - backup_id=fake.BACKUP2_ID, - parent_id=fake.BACKUP_ID) + self._create_backup_db_entry( + volume_id=volume_id, + status=objects.fields.BackupStatus.RESTORING, + container=container_name, + backup_id=fake.BACKUP2_ID, + parent_id=fake.BACKUP_ID) self.volume_file.seek(0) deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) service.backup(deltabackup, self.volume_file, True) diff --git a/cinder/tests/unit/backup/drivers/test_backup_swift.py b/cinder/tests/unit/backup/drivers/test_backup_swift.py index c31d4f9bc82..4bc6993f2e2 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_swift.py +++ b/cinder/tests/unit/backup/drivers/test_backup_swift.py @@ -701,6 +701,8 @@ class BackupSwiftTestCase(test.TestCase): with tempfile.NamedTemporaryFile() as volume_file: backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.restore(backup, volume_id, volume_file) def test_restore_delta(self): @@ -749,6 +751,8 @@ class BackupSwiftTestCase(test.TestCase): with tempfile.NamedTemporaryFile() as restored_file: backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) + backup.status = objects.fields.BackupStatus.RESTORING + backup.save() service.restore(backup, volume_id, restored_file) self.assertTrue(filecmp.cmp(self.volume_file.name, diff --git a/cinder/tests/unit/backup/test_backup.py b/cinder/tests/unit/backup/test_backup.py index a3b559fe651..94e3e5ed8e2 100644 --- a/cinder/tests/unit/backup/test_backup.py +++ b/cinder/tests/unit/backup/test_backup.py @@ -1085,6 +1085,26 @@ class BackupTestCase(BaseBackupTest): self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status']) self.assertTrue(mock_run_restore.called) + def test_restore_backup_with_driver_cancellation(self): + """Test error handling when a restore is cancelled.""" + vol_id = self._create_volume_db_entry(status='restoring-backup', + size=1) + backup = self._create_backup_db_entry( + status=fields.BackupStatus.RESTORING, volume_id=vol_id) + + mock_run_restore = self.mock_object( + self.backup_mgr, + '_run_restore') + mock_run_restore.side_effect = exception.BackupRestoreCancel( + vol_id=vol_id, back_id=backup.id) + # We shouldn't raise an exception on the call, it's OK to cancel + self.backup_mgr.restore_backup(self.ctxt, backup, vol_id) + vol = objects.Volume.get_by_id(self.ctxt, vol_id) + self.assertEqual('error', vol.status) + backup.refresh() + self.assertEqual(fields.BackupStatus.AVAILABLE, backup.status) + self.assertTrue(mock_run_restore.called) + def test_restore_backup_with_bad_service(self): """Test error handling. @@ -1144,6 +1164,8 @@ class BackupTestCase(BaseBackupTest): mock_open.assert_called_once_with('/dev/null', exp_open_mode) mock_temporary_chown.assert_called_once_with('/dev/null') mock_get_conn.assert_called_once_with() + vol.status = 'available' + vol.obj_reset_changes() mock_secure_enabled.assert_called_once_with(self.ctxt, vol) mock_attach_device.assert_called_once_with(self.ctxt, vol, properties) diff --git a/doc/source/admin/blockstorage-volume-backups.rst b/doc/source/admin/blockstorage-volume-backups.rst index d9c9aa41686..4409aa7db0d 100644 --- a/doc/source/admin/blockstorage-volume-backups.rst +++ b/doc/source/admin/blockstorage-volume-backups.rst @@ -203,3 +203,21 @@ the status of the source resource to see when it stops being "backing-up". operation on the snapshot that followed a cancellation could result in an error if the snapshot was still mapped. Polling on the volume to stop being "backing-up" prior to the deletion is required to ensure success. + +Since Rocky it is also possible to cancel an ongoing restoring operation on any +of the Chunked Backup type of drivers. + +To issue a backup restoration cancellation we need to alter its status to +anything other than `restoring`. We strongly recommend using the "error" state +to avoid any confusion on whether the restore was successful or not. + +.. code-block:: console + + $ openstack volume backup set --state error BACKUP_ID + +.. warning:: + + After a restore operation has started, if it is then cancelled, the + destination volume is useless, as there is no way of knowing how much data, + or if any, was actually restored, hence our recommendation of using the + "error" state. diff --git a/releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml b/releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml new file mode 100644 index 00000000000..1fa7e592721 --- /dev/null +++ b/releasenotes/notes/feature-abort-restore-fe1252288c59e105.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Support backup restore cancelation by changing the backup status to + anything other than `restoring` using `cinder backup-reset-state`.