RBD: Call trash operation when plain deletion fails

Currently RBD doesn't allow deleting volumes with snapshots or volume
dependencies. This causes Cinder API errors on delete calls that should
succeed.

When using the RBD v2 clone api, deleting a volume that has a snapshot
in the trash space raises a busy exception.

In order to solve this, this patch removes the proactive VolumeIsBusy
exception raise and calls the trash operation which should succeed when
the volume has dependencies.

In addition to this code it's important to enable the Ceph Trash auto
purge. Otherwise Ceph may end up with a couple of images in trash
namespace for a while. However, this approach is the lesser of 2 evils
because the user will be able to delete volumes with dependencies
while the operator could check the trash namespace and manually purge
the images. It is definitely better to potentially trouble 1 person
(operator) that didn't read the release notes once than troubling
every single user.

Closes-Bug: #1941815
Co-Author: Eric Harney <eharney@redhat.com>
Change-Id: I5dbbcca780017b358600016afca8a9424aa137fd
This commit is contained in:
Sofia Enriquez 2021-08-24 21:02:31 +00:00
parent 4a5ff4eb75
commit ac2f8a4d89
3 changed files with 170 additions and 11 deletions

View File

@ -80,6 +80,10 @@ class MockPermissionError(MockException):
errno = errno.EPERM
class MockImageHasSnapshotsException(MockException):
"""Used as mock for rbd.ImageHasSnapshots."""
class KeyObject(object):
def get_encoded(arg):
return "asdf".encode('utf-8')
@ -108,6 +112,7 @@ def common_mocks(f):
inst.mock_rbd.ImageBusy = MockImageBusyException
inst.mock_rbd.ImageNotFound = MockImageNotFoundException
inst.mock_rbd.ImageExists = MockImageExistsException
inst.mock_rbd.ImageHasSnapshots = MockImageHasSnapshotsException
inst.mock_rbd.InvalidArgument = MockImageNotFoundException
inst.mock_rbd.PermissionError = MockPermissionError
@ -694,6 +699,42 @@ class RBDTestCase(test.TestCase):
self.assertEqual(
1, self.driver.rbd.RBD.return_value.remove.call_count)
@common_mocks
def test_delete_volume_clone_info_return_parent(self):
client = self.mock_client.return_value
self.driver.rbd.Image.return_value.list_snaps.return_value = []
pool = 'volumes'
parent = True
parent_snap = self.snapshot_b
mock_get_clone_info = self.mock_object(self.driver, '_get_clone_info',
return_value=(pool,
parent,
parent_snap))
m_del_clone_parent_refs = self.mock_object(self.driver,
'_delete_clone_parent_refs')
m_del_back_snaps = self.mock_object(self.driver,
'_delete_backup_snaps')
self.driver.delete_volume(self.volume_a)
mock_get_clone_info.assert_called_once_with(
self.mock_rbd.Image.return_value,
self.volume_a.name,
None)
m_del_clone_parent_refs.assert_called_once()
(self.driver.rbd.Image.return_value
.list_snaps.assert_called_once_with())
client.__enter__.assert_called_once_with()
client.__exit__.assert_called_once_with(None, None, None)
m_del_back_snaps.assert_called_once_with(
self.mock_rbd.Image.return_value)
self.assertFalse(
self.driver.rbd.Image.return_value.unprotect_snap.called)
self.assertEqual(
1, self.driver.rbd.RBD.return_value.remove.call_count)
@common_mocks
def test_deferred_deletion(self):
drv = self._make_drv({'enable_deferred_deletion': True,
@ -814,8 +855,7 @@ class RBDTestCase(test.TestCase):
mock_delete_backup_snaps:
with mock.patch.object(driver, 'RADOSClient') as \
mock_rados_client:
self.assertRaises(exception.VolumeIsBusy,
self.driver.delete_volume, self.volume_a)
self.driver.delete_volume(self.volume_a)
mock_get_clone_info.assert_called_once_with(
self.mock_rbd.Image.return_value,
@ -829,11 +869,58 @@ class RBDTestCase(test.TestCase):
self.assertFalse(
self.mock_rbd.Image.return_value.unprotect_snap.called)
self.assertEqual(
3, self.mock_rbd.RBD.return_value.remove.call_count)
self.assertEqual(3, len(RAISED_EXCEPTIONS))
1, self.mock_rbd.RBD.return_value.remove.call_count)
self.assertEqual(1, len(RAISED_EXCEPTIONS))
# Make sure the exception was raised
self.assertIn(self.mock_rbd.ImageBusy, RAISED_EXCEPTIONS)
self.mock_rbd.RBD.return_value.trash_move.\
assert_called_once_with(
mock.ANY,
self.volume_a.name,
0)
@common_mocks
def test_delete_volume_has_snapshots(self):
self.mock_rbd.Image.return_value.list_snaps.return_value = []
self.mock_rbd.RBD.return_value.remove.side_effect = (
self.mock_rbd.ImageHasSnapshots)
mock_get_clone_info = self.mock_object(self.driver,
'_get_clone_info',
return_value=(None,
None,
None))
m_del_backup_snaps = self.mock_object(self.driver,
'_delete_backup_snaps')
with mock.patch.object(driver, 'RADOSClient') as mock_rados_client:
self.driver.delete_volume(self.volume_a)
mock_get_clone_info.assert_called_once_with(
self.mock_rbd.Image.return_value,
self.volume_a.name,
None)
(self.mock_rbd.Image.return_value.list_snaps
.assert_called_once_with())
mock_rados_client.assert_called_once_with(self.driver)
m_del_backup_snaps.assert_called_once_with(
self.mock_rbd.Image.return_value)
self.assertFalse(
self.mock_rbd.Image.return_value.unprotect_snap.called)
self.assertEqual(
1, self.mock_rbd.RBD.return_value.remove.call_count)
self.assertEqual(1, len(RAISED_EXCEPTIONS))
# Make sure the exception was raised
self.assertIn(self.mock_rbd.ImageHasSnapshots,
RAISED_EXCEPTIONS)
self.mock_rbd.RBD.return_value.trash_move.\
assert_called_once_with(
mock.ANY,
self.volume_a.name,
0)
@common_mocks
def test_delete_volume_not_found(self):
self.mock_rbd.Image.return_value.list_snaps.return_value = []
@ -866,6 +953,46 @@ class RBDTestCase(test.TestCase):
self.assertEqual([self.mock_rbd.ImageNotFound],
RAISED_EXCEPTIONS)
@common_mocks
def test_delete_volume_w_clone_snaps(self):
client = self.mock_client.return_value
snapshots = [
{'id': 1, 'name': 'snapshot-00000000-0000-0000-0000-000000000000',
'size': 2147483648},
{'id': 2, 'name': 'snap1', 'size': 6442450944},
{'id': 3, 'size': 8589934592,
'name':
'volume-22222222-2222-2222-2222-222222222222.clone_snap'},
{'id': 4, 'size': 5368709120,
'name':
'backup.33333333-3333-3333-3333-333333333333.snap.123'}]
self.mock_rbd.Image.return_value.list_snaps.return_value = snapshots
mock_get_clone_info = self.mock_object(self.driver,
'_get_clone_info',
return_value=(None,
None,
None))
with mock.patch.object(self.driver, '_delete_backup_snaps') as \
mock_delete_backup_snaps:
self.driver.delete_volume(self.volume_a)
mock_get_clone_info.assert_called_once_with(
self.mock_rbd.Image.return_value,
self.volume_a.name,
snapshots[2]['name'])
(self.driver.rbd.Image.return_value
.list_snaps.assert_called_once_with())
client.__enter__.assert_called_once_with()
client.__exit__.assert_called_once_with(None, None, None)
mock_delete_backup_snaps.assert_called_once_with(
self.mock_rbd.Image.return_value)
self.assertFalse(
self.driver.rbd.Image.return_value.unprotect_snap.called)
self.assertEqual(
1, self.driver.rbd.RBD.return_value.rename.call_count)
@common_mocks
@mock.patch('cinder.objects.Volume.get_by_id')
def test_create_snapshot(self, volume_get_by_id):

View File

@ -1211,8 +1211,6 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD,
clone_snap = snap['name']
break
raise exception.VolumeIsBusy(volume_name=volume_name)
# Determine if this volume is itself a clone
_pool, parent, parent_snap = self._get_clone_info(rbd_image,
volume_name,
@ -1225,13 +1223,23 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD,
self.configuration.rados_connection_retries)
def _try_remove_volume(client, volume_name):
if self.configuration.enable_deferred_deletion:
LOG.debug("moving volume %s to trash", volume_name)
delay = self.configuration.deferred_deletion_delay
self.RBDProxy().trash_move(client.ioctx,
volume_name,
delay)
else:
self.RBDProxy().remove(client.ioctx, volume_name)
try:
self.RBDProxy().remove(client.ioctx, volume_name)
return
except (self.rbd.ImageHasSnapshots, self.rbd.ImageBusy):
delay = 0
LOG.debug("moving volume %s to trash", volume_name)
# When using the RBD v2 clone api, deleting a volume
# that has a snapshot in the trash space raises a
# busy exception.
# In order to solve this, call the trash operation
# which should succeed when the volume has
# dependencies.
self.RBDProxy().trash_move(client.ioctx,
volume_name,
delay)
if clone_snap is None:
LOG.debug("deleting rbd volume %s", volume_name)

View File

@ -0,0 +1,24 @@
---
fixes:
- |
RBD driver `bug #1941815
<https://bugs.launchpad.net/cinder/+bug/1941815>`_: Fixed
deleting volumes with snapshots/volumes in the ceph trash space.
upgrade:
- |
**RBD driver: Enable Ceph V2 Clone API and Ceph Trash auto purge**
In light of the fix for RBD driver `bug #1941815
<https://bugs.launchpad.net/cinder/+bug/1941815>`_, we want to
bring the following information to your attention.
Using the v2 clone format for cloned volumes allows volumes with
dependent images to be moved to the trash - where they remain
until purged - and allow the RBD driver to postpone the deletion
until the volume has no dependent images. Configuring the trash
purge is recommended to avoid wasting space with these trashed
volumes. Since the Ceph Octopus release, the trash can be
configured to automatically purge on a defined schedule.
See the ``rbd trash purge schedule`` commands in the `rbd manpage
<https://docs.ceph.com/en/octopus/man/8/rbd/>`_.