Add backup restoration cancellation support

Backups can be cancelled by force deleting the in-progress backup, but
there is no mechanism to delete the restoring backups.

We will now monitor the status of the backup and on any status change
will abort the restoration and change the volume status to error.

The reason to diverge from the backup creation cancellation
implementation (deleting the new resource) is that the restoring may be
done into an already existing volume and deleting it is not feasable for
the user, for example when an instance is booted from a Cinder volume
and there's a license linked to the System ID.

Cinder backup drivers may now raise BackupRestoreCancel in `restore`
method when a restoring operation is cancelled.

Change-Id: If2f143d7ba56ae2d74b3bb571237cc053f63054e
This commit is contained in:
Gorka Eguileor 2018-02-01 13:39:56 +01:00
parent fbf51eea48
commit 89f6291ee3
10 changed files with 175 additions and 17 deletions

View File

@ -615,8 +615,14 @@ class ChunkedBackupDriver(driver.BackupDriver):
self._finalize_backup(backup, container, object_meta, object_sha256) self._finalize_backup(backup, container, object_meta, object_sha256)
def _restore_v1(self, backup, volume_id, metadata, volume_file): def _restore_v1(self, backup, volume_id, metadata, volume_file,
"""Restore a v1 volume backup.""" requested_backup):
"""Restore a v1 volume backup.
Raises BackupRestoreCancel on any requested_backup status change, we
ignore the backup parameter for this check since that's only the
current data source from the list of backup sources.
"""
backup_id = backup['id'] backup_id = backup['id']
LOG.debug('v1 volume backup restore of %s started.', backup_id) LOG.debug('v1 volume backup restore of %s started.', backup_id)
extra_metadata = metadata.get('extra_metadata') extra_metadata = metadata.get('extra_metadata')
@ -637,6 +643,13 @@ class ChunkedBackupDriver(driver.BackupDriver):
raise exception.InvalidBackup(reason=err) raise exception.InvalidBackup(reason=err)
for metadata_object in metadata_objects: for metadata_object in metadata_objects:
# Abort when status changes to error, available, or anything else
with requested_backup.as_read_deleted():
requested_backup.refresh()
if requested_backup.status != fields.BackupStatus.RESTORING:
raise exception.BackupRestoreCancel(back_id=backup.id,
vol_id=volume_id)
object_name, obj = list(metadata_object.items())[0] object_name, obj = list(metadata_object.items())[0]
LOG.debug('restoring object. backup: %(backup_id)s, ' LOG.debug('restoring object. backup: %(backup_id)s, '
'container: %(container)s, object name: ' 'container: %(container)s, object name: '
@ -683,7 +696,10 @@ class ChunkedBackupDriver(driver.BackupDriver):
backup_id) backup_id)
def restore(self, backup, volume_id, volume_file): def restore(self, backup, volume_id, volume_file):
"""Restore the given volume backup from backup repository.""" """Restore the given volume backup from backup repository.
Raises BackupRestoreCancel on any backup status change.
"""
backup_id = backup['id'] backup_id = backup['id']
container = backup['container'] container = backup['container']
object_prefix = backup['service_metadata'] object_prefix = backup['service_metadata']
@ -725,7 +741,7 @@ class ChunkedBackupDriver(driver.BackupDriver):
backup1 = backup_list[index] backup1 = backup_list[index]
index = index - 1 index = index - 1
metadata = self._read_metadata(backup1) metadata = self._read_metadata(backup1)
restore_func(backup1, volume_id, metadata, volume_file) restore_func(backup1, volume_id, metadata, volume_file, backup)
volume_meta = metadata.get('volume_meta', None) volume_meta = metadata.get('volume_meta', None)
try: try:

View File

@ -391,6 +391,9 @@ class BackupDriver(base.Base):
starvation parameter volume_file will be a proxy that will execute all starvation parameter volume_file will be a proxy that will execute all
methods in native threads, so the method implementation doesn't need to methods in native threads, so the method implementation doesn't need to
worry about that.. worry about that..
May raise BackupRestoreCancel to indicate that the restoration of a
volume has been aborted by changing the backup status.
""" """
return return

View File

@ -530,7 +530,10 @@ class BackupManager(manager.ThreadPoolManager):
raise exception.InvalidBackup(reason=err) raise exception.InvalidBackup(reason=err)
try: try:
canceled = False
self._run_restore(context, backup, volume) self._run_restore(context, backup, volume)
except exception.BackupRestoreCancel:
canceled = True
except Exception: except Exception:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
self.db.volume_update(context, volume_id, self.db.volume_update(context, volume_id,
@ -538,12 +541,15 @@ class BackupManager(manager.ThreadPoolManager):
backup.status = fields.BackupStatus.AVAILABLE backup.status = fields.BackupStatus.AVAILABLE
backup.save() backup.save()
self.db.volume_update(context, volume_id, {'status': 'available'}) volume.status = 'error' if canceled else 'available'
volume.save()
backup.status = fields.BackupStatus.AVAILABLE backup.status = fields.BackupStatus.AVAILABLE
backup.save() backup.save()
LOG.info('Restore backup finished, backup %(backup_id)s restored' LOG.info('%(result)s restoring backup %(backup_id)s to volume '
' to volume %(volume_id)s.', '%(volume_id)s.',
{'backup_id': backup.id, 'volume_id': volume_id}) {'result': 'Canceled' if canceled else 'Finished',
'backup_id': backup.id,
'volume_id': volume_id})
self._notify_about_backup_usage(context, backup, "restore.end") self._notify_about_backup_usage(context, backup, "restore.end")
def _run_restore(self, context, backup, volume): def _run_restore(self, context, backup, volume):

View File

@ -155,6 +155,10 @@ class BackupDriverException(CinderException):
message = _("Backup driver reported an error: %(message)s") message = _("Backup driver reported an error: %(message)s")
class BackupRestoreCancel(CinderException):
message = _("Canceled backup %(back_id)s restore on volume %(vol_id)s")
class GlanceConnectionFailed(CinderException): class GlanceConnectionFailed(CinderException):
message = _("Connection to glance failed: %(reason)s") message = _("Connection to glance failed: %(reason)s")

View File

@ -125,6 +125,7 @@ class GoogleBackupDriverTestCase(test.TestCase):
volume_id=_DEFAULT_VOLUME_ID, volume_id=_DEFAULT_VOLUME_ID,
container=google_dr.CONF.backup_gcs_bucket, container=google_dr.CONF.backup_gcs_bucket,
parent_id=None, parent_id=None,
status=None,
service_metadata=None): service_metadata=None):
try: try:
@ -138,6 +139,7 @@ class GoogleBackupDriverTestCase(test.TestCase):
'parent_id': parent_id, 'parent_id': parent_id,
'user_id': fake.USER_ID, 'user_id': fake.USER_ID,
'project_id': fake.PROJECT_ID, 'project_id': fake.PROJECT_ID,
'status': status,
'service_metadata': service_metadata, 'service_metadata': service_metadata,
} }
backup = objects.Backup(context=self.ctxt, **kwargs) backup = objects.Backup(context=self.ctxt, **kwargs)
@ -476,7 +478,9 @@ class GoogleBackupDriverTestCase(test.TestCase):
@gcs_client @gcs_client
def test_restore(self): def test_restore(self):
volume_id = 'c2a81f09-f480-4325-8424-00000071685b' volume_id = 'c2a81f09-f480-4325-8424-00000071685b'
backup = self._create_backup_db_entry(volume_id=volume_id) backup = self._create_backup_db_entry(
volume_id=volume_id,
status=objects.fields.BackupStatus.RESTORING)
service = google_dr.GoogleBackupDriver(self.ctxt) service = google_dr.GoogleBackupDriver(self.ctxt)
with tempfile.NamedTemporaryFile() as volume_file: with tempfile.NamedTemporaryFile() as volume_file:
@ -514,9 +518,11 @@ class GoogleBackupDriverTestCase(test.TestCase):
self.volume_file.seek(20 * units.Ki) self.volume_file.seek(20 * units.Ki)
self.volume_file.write(os.urandom(units.Ki)) self.volume_file.write(os.urandom(units.Ki))
deltabackup = self._create_backup_db_entry(volume_id=volume_id, deltabackup = self._create_backup_db_entry(
container=container_name, volume_id=volume_id,
parent_id=backup.id) status=objects.fields.BackupStatus.RESTORING,
container=container_name,
parent_id=backup.id)
self.volume_file.seek(0) self.volume_file.seek(0)
service2 = google_dr.GoogleBackupDriver(self.ctxt) service2 = google_dr.GoogleBackupDriver(self.ctxt)
service2.backup(deltabackup, self.volume_file, True) service2.backup(deltabackup, self.volume_file, True)

View File

@ -127,7 +127,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
volume_id=_DEFAULT_VOLUME_ID, volume_id=_DEFAULT_VOLUME_ID,
container='test-container', container='test-container',
backup_id=fake.BACKUP_ID, backup_id=fake.BACKUP_ID,
parent_id=None): parent_id=None,
status=None):
try: try:
db.volume_get(self.ctxt, volume_id) db.volume_get(self.ctxt, volume_id)
@ -141,6 +142,7 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
'parent_id': parent_id, 'parent_id': parent_id,
'user_id': fake.USER_ID, 'user_id': fake.USER_ID,
'project_id': fake.PROJECT_ID, 'project_id': fake.PROJECT_ID,
'status': status,
} }
return db.backup_create(self.ctxt, backup)['id'] return db.backup_create(self.ctxt, backup)['id']
@ -608,6 +610,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
with tempfile.NamedTemporaryFile() as restored_file: with tempfile.NamedTemporaryFile() as restored_file:
backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
backup.status = objects.fields.BackupStatus.RESTORING
backup.save()
service.restore(backup, volume_id, restored_file) service.restore(backup, volume_id, restored_file)
self.assertTrue(filecmp.cmp(self.volume_file.name, self.assertTrue(filecmp.cmp(self.volume_file.name,
restored_file.name)) restored_file.name))
@ -629,6 +633,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
with tempfile.NamedTemporaryFile() as restored_file: with tempfile.NamedTemporaryFile() as restored_file:
backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
backup.status = objects.fields.BackupStatus.RESTORING
backup.save()
service.restore(backup, volume_id, restored_file) service.restore(backup, volume_id, restored_file)
self.assertTrue(filecmp.cmp(self.volume_file.name, self.assertTrue(filecmp.cmp(self.volume_file.name,
restored_file.name)) restored_file.name))
@ -649,6 +655,8 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
service = nfs.NFSBackupDriver(self.ctxt) service = nfs.NFSBackupDriver(self.ctxt)
self._write_effective_compression_file(file_size) self._write_effective_compression_file(file_size)
backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
backup.status = objects.fields.BackupStatus.RESTORING
backup.save()
service.backup(backup, self.volume_file) service.backup(backup, self.volume_file)
with tempfile.NamedTemporaryFile() as restored_file: with tempfile.NamedTemporaryFile() as restored_file:
@ -660,6 +668,70 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
self.assertNotEqual(threading.current_thread(), self.assertNotEqual(threading.current_thread(),
self.thread_dict['thread']) self.thread_dict['thread'])
def test_restore_abort_delta(self):
volume_id = fake.VOLUME_ID
count = set()
def _fake_generate_object_name_prefix(self, backup):
az = 'az_fake'
backup_name = '%s_backup_%s' % (az, backup['id'])
volume = 'volume_%s' % (backup['volume_id'])
prefix = volume + '_' + backup_name
return prefix
def my_refresh():
# This refresh method will abort the backup after 1 chunk
count.add(len(count) + 1)
if len(count) == 2:
backup.status = objects.fields.BackupStatus.AVAILABLE
backup.save()
original_refresh()
self.mock_object(nfs.NFSBackupDriver,
'_generate_object_name_prefix',
_fake_generate_object_name_prefix)
self.flags(backup_file_size=(1024 * 8))
self.flags(backup_sha_block_size_bytes=1024)
container_name = self.temp_dir.replace(tempfile.gettempdir() + '/',
'', 1)
self._create_backup_db_entry(volume_id=volume_id,
container=container_name,
backup_id=fake.BACKUP_ID)
service = nfs.NFSBackupDriver(self.ctxt)
self.volume_file.seek(0)
backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
service.backup(backup, self.volume_file)
# Create incremental backup with no change to contents
self.volume_file.seek(16 * 1024)
self.volume_file.write(os.urandom(1024))
self.volume_file.seek(20 * 1024)
self.volume_file.write(os.urandom(1024))
self._create_backup_db_entry(
volume_id=volume_id,
status=objects.fields.BackupStatus.RESTORING,
container=container_name,
backup_id=fake.BACKUP2_ID,
parent_id=fake.BACKUP_ID)
self.volume_file.seek(0)
deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
service.backup(deltabackup, self.volume_file, True)
deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
original_refresh = backup.refresh
with tempfile.NamedTemporaryFile() as restored_file, \
mock.patch('cinder.objects.Backup.refresh',
side_effect=my_refresh):
self.assertRaises(exception.BackupRestoreCancel,
service.restore, backup, volume_id,
restored_file)
def test_restore_delta(self): def test_restore_delta(self):
volume_id = fake.VOLUME_ID volume_id = fake.VOLUME_ID
@ -693,10 +765,12 @@ class BackupNFSSwiftBasedTestCase(test.TestCase):
self.volume_file.seek(20 * 1024) self.volume_file.seek(20 * 1024)
self.volume_file.write(os.urandom(1024)) self.volume_file.write(os.urandom(1024))
self._create_backup_db_entry(volume_id=volume_id, self._create_backup_db_entry(
container=container_name, volume_id=volume_id,
backup_id=fake.BACKUP2_ID, status=objects.fields.BackupStatus.RESTORING,
parent_id=fake.BACKUP_ID) container=container_name,
backup_id=fake.BACKUP2_ID,
parent_id=fake.BACKUP_ID)
self.volume_file.seek(0) self.volume_file.seek(0)
deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) deltabackup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
service.backup(deltabackup, self.volume_file, True) service.backup(deltabackup, self.volume_file, True)

View File

@ -700,6 +700,8 @@ class BackupSwiftTestCase(test.TestCase):
with tempfile.NamedTemporaryFile() as volume_file: with tempfile.NamedTemporaryFile() as volume_file:
backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID) backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP_ID)
backup.status = objects.fields.BackupStatus.RESTORING
backup.save()
service.restore(backup, volume_id, volume_file) service.restore(backup, volume_id, volume_file)
def test_restore_delta(self): def test_restore_delta(self):
@ -748,6 +750,8 @@ class BackupSwiftTestCase(test.TestCase):
with tempfile.NamedTemporaryFile() as restored_file: with tempfile.NamedTemporaryFile() as restored_file:
backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID) backup = objects.Backup.get_by_id(self.ctxt, fake.BACKUP2_ID)
backup.status = objects.fields.BackupStatus.RESTORING
backup.save()
service.restore(backup, volume_id, service.restore(backup, volume_id,
restored_file) restored_file)
self.assertTrue(filecmp.cmp(self.volume_file.name, self.assertTrue(filecmp.cmp(self.volume_file.name,

View File

@ -1011,6 +1011,26 @@ class BackupTestCase(BaseBackupTest):
self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status']) self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status'])
self.assertTrue(mock_run_restore.called) self.assertTrue(mock_run_restore.called)
def test_restore_backup_with_driver_cancellation(self):
"""Test error handling when a restore is cancelled."""
vol_id = self._create_volume_db_entry(status='restoring-backup',
size=1)
backup = self._create_backup_db_entry(
status=fields.BackupStatus.RESTORING, volume_id=vol_id)
mock_run_restore = self.mock_object(
self.backup_mgr,
'_run_restore')
mock_run_restore.side_effect = exception.BackupRestoreCancel(
vol_id=vol_id, back_id=backup.id)
# We shouldn't raise an exception on the call, it's OK to cancel
self.backup_mgr.restore_backup(self.ctxt, backup, vol_id)
vol = objects.Volume.get_by_id(self.ctxt, vol_id)
self.assertEqual('error', vol.status)
backup.refresh()
self.assertEqual(fields.BackupStatus.AVAILABLE, backup.status)
self.assertTrue(mock_run_restore.called)
def test_restore_backup_with_bad_service(self): def test_restore_backup_with_bad_service(self):
"""Test error handling. """Test error handling.
@ -1065,6 +1085,8 @@ class BackupTestCase(BaseBackupTest):
mock_temporary_chown.assert_called_once_with('/dev/null') mock_temporary_chown.assert_called_once_with('/dev/null')
mock_get_conn.assert_called_once_with() mock_get_conn.assert_called_once_with()
vol.status = 'available'
vol.obj_reset_changes()
mock_secure_enabled.assert_called_once_with(self.ctxt, vol) mock_secure_enabled.assert_called_once_with(self.ctxt, vol)
mock_attach_device.assert_called_once_with(self.ctxt, vol, mock_attach_device.assert_called_once_with(self.ctxt, vol,
properties) properties)

View File

@ -203,3 +203,21 @@ the status of the source resource to see when it stops being "backing-up".
operation on the snapshot that followed a cancellation could result in an operation on the snapshot that followed a cancellation could result in an
error if the snapshot was still mapped. Polling on the volume to stop being error if the snapshot was still mapped. Polling on the volume to stop being
"backing-up" prior to the deletion is required to ensure success. "backing-up" prior to the deletion is required to ensure success.
Since Rocky it is also possible to cancel an ongoing restoring operation on any
of the Chunked Backup type of drivers.
To issue a backup restoration cancellation we need to alter its status to
anything other than `restoring`. We strongly recommend using the "error" state
to avoid any confusion on whether the restore was successful or not.
.. code-block:: console
$ openstack volume backup set --state error BACKUP_ID
.. warning::
After a restore operation has started, if it is then cancelled, the
destination volume is useless, as there is no way of knowing how much data,
or if any, was actually restored, hence our recommendation of using the
"error" state.

View File

@ -0,0 +1,5 @@
---
features:
- |
Support backup restore cancelation by changing the backup status to
anything other than `restoring` using `cinder backup-reset-state`.