From ac2f8a4d895b41ef107f88b7d22e954ed156e093 Mon Sep 17 00:00:00 2001 From: Sofia Enriquez Date: Tue, 24 Aug 2021 21:02:31 +0000 Subject: [PATCH] RBD: Call trash operation when plain deletion fails Currently RBD doesn't allow deleting volumes with snapshots or volume dependencies. This causes Cinder API errors on delete calls that should succeed. When using the RBD v2 clone api, deleting a volume that has a snapshot in the trash space raises a busy exception. In order to solve this, this patch removes the proactive VolumeIsBusy exception raise and calls the trash operation which should succeed when the volume has dependencies. In addition to this code it's important to enable the Ceph Trash auto purge. Otherwise Ceph may end up with a couple of images in trash namespace for a while. However, this approach is the lesser of 2 evils because the user will be able to delete volumes with dependencies while the operator could check the trash namespace and manually purge the images. It is definitely better to potentially trouble 1 person (operator) that didn't read the release notes once than troubling every single user. Closes-Bug: #1941815 Co-Author: Eric Harney Change-Id: I5dbbcca780017b358600016afca8a9424aa137fd --- cinder/tests/unit/volume/drivers/test_rbd.py | 135 +++++++++++++++++- cinder/volume/drivers/rbd.py | 22 ++- ...plain-deletion-fails-50cef4a8a8010ba9.yaml | 24 ++++ 3 files changed, 170 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/bug-1941815-RBD-call-trash-operation-when-plain-deletion-fails-50cef4a8a8010ba9.yaml diff --git a/cinder/tests/unit/volume/drivers/test_rbd.py b/cinder/tests/unit/volume/drivers/test_rbd.py index 5ccee4eefeb..a85b840e01f 100644 --- a/cinder/tests/unit/volume/drivers/test_rbd.py +++ b/cinder/tests/unit/volume/drivers/test_rbd.py @@ -80,6 +80,10 @@ class MockPermissionError(MockException): errno = errno.EPERM +class MockImageHasSnapshotsException(MockException): + """Used as mock for rbd.ImageHasSnapshots.""" + + class KeyObject(object): def get_encoded(arg): return "asdf".encode('utf-8') @@ -108,6 +112,7 @@ def common_mocks(f): inst.mock_rbd.ImageBusy = MockImageBusyException inst.mock_rbd.ImageNotFound = MockImageNotFoundException inst.mock_rbd.ImageExists = MockImageExistsException + inst.mock_rbd.ImageHasSnapshots = MockImageHasSnapshotsException inst.mock_rbd.InvalidArgument = MockImageNotFoundException inst.mock_rbd.PermissionError = MockPermissionError @@ -694,6 +699,42 @@ class RBDTestCase(test.TestCase): self.assertEqual( 1, self.driver.rbd.RBD.return_value.remove.call_count) + @common_mocks + def test_delete_volume_clone_info_return_parent(self): + client = self.mock_client.return_value + self.driver.rbd.Image.return_value.list_snaps.return_value = [] + pool = 'volumes' + parent = True + parent_snap = self.snapshot_b + + mock_get_clone_info = self.mock_object(self.driver, '_get_clone_info', + return_value=(pool, + parent, + parent_snap)) + + m_del_clone_parent_refs = self.mock_object(self.driver, + '_delete_clone_parent_refs') + m_del_back_snaps = self.mock_object(self.driver, + '_delete_backup_snaps') + + self.driver.delete_volume(self.volume_a) + + mock_get_clone_info.assert_called_once_with( + self.mock_rbd.Image.return_value, + self.volume_a.name, + None) + m_del_clone_parent_refs.assert_called_once() + (self.driver.rbd.Image.return_value + .list_snaps.assert_called_once_with()) + client.__enter__.assert_called_once_with() + client.__exit__.assert_called_once_with(None, None, None) + m_del_back_snaps.assert_called_once_with( + self.mock_rbd.Image.return_value) + self.assertFalse( + self.driver.rbd.Image.return_value.unprotect_snap.called) + self.assertEqual( + 1, self.driver.rbd.RBD.return_value.remove.call_count) + @common_mocks def test_deferred_deletion(self): drv = self._make_drv({'enable_deferred_deletion': True, @@ -814,8 +855,7 @@ class RBDTestCase(test.TestCase): mock_delete_backup_snaps: with mock.patch.object(driver, 'RADOSClient') as \ mock_rados_client: - self.assertRaises(exception.VolumeIsBusy, - self.driver.delete_volume, self.volume_a) + self.driver.delete_volume(self.volume_a) mock_get_clone_info.assert_called_once_with( self.mock_rbd.Image.return_value, @@ -829,11 +869,58 @@ class RBDTestCase(test.TestCase): self.assertFalse( self.mock_rbd.Image.return_value.unprotect_snap.called) self.assertEqual( - 3, self.mock_rbd.RBD.return_value.remove.call_count) - self.assertEqual(3, len(RAISED_EXCEPTIONS)) + 1, self.mock_rbd.RBD.return_value.remove.call_count) + self.assertEqual(1, len(RAISED_EXCEPTIONS)) # Make sure the exception was raised self.assertIn(self.mock_rbd.ImageBusy, RAISED_EXCEPTIONS) + self.mock_rbd.RBD.return_value.trash_move.\ + assert_called_once_with( + mock.ANY, + self.volume_a.name, + 0) + + @common_mocks + def test_delete_volume_has_snapshots(self): + self.mock_rbd.Image.return_value.list_snaps.return_value = [] + + self.mock_rbd.RBD.return_value.remove.side_effect = ( + self.mock_rbd.ImageHasSnapshots) + mock_get_clone_info = self.mock_object(self.driver, + '_get_clone_info', + return_value=(None, + None, + None)) + m_del_backup_snaps = self.mock_object(self.driver, + '_delete_backup_snaps') + + with mock.patch.object(driver, 'RADOSClient') as mock_rados_client: + self.driver.delete_volume(self.volume_a) + + mock_get_clone_info.assert_called_once_with( + self.mock_rbd.Image.return_value, + self.volume_a.name, + None) + (self.mock_rbd.Image.return_value.list_snaps + .assert_called_once_with()) + mock_rados_client.assert_called_once_with(self.driver) + m_del_backup_snaps.assert_called_once_with( + self.mock_rbd.Image.return_value) + self.assertFalse( + self.mock_rbd.Image.return_value.unprotect_snap.called) + self.assertEqual( + 1, self.mock_rbd.RBD.return_value.remove.call_count) + self.assertEqual(1, len(RAISED_EXCEPTIONS)) + # Make sure the exception was raised + self.assertIn(self.mock_rbd.ImageHasSnapshots, + RAISED_EXCEPTIONS) + + self.mock_rbd.RBD.return_value.trash_move.\ + assert_called_once_with( + mock.ANY, + self.volume_a.name, + 0) + @common_mocks def test_delete_volume_not_found(self): self.mock_rbd.Image.return_value.list_snaps.return_value = [] @@ -866,6 +953,46 @@ class RBDTestCase(test.TestCase): self.assertEqual([self.mock_rbd.ImageNotFound], RAISED_EXCEPTIONS) + @common_mocks + def test_delete_volume_w_clone_snaps(self): + client = self.mock_client.return_value + snapshots = [ + {'id': 1, 'name': 'snapshot-00000000-0000-0000-0000-000000000000', + 'size': 2147483648}, + {'id': 2, 'name': 'snap1', 'size': 6442450944}, + {'id': 3, 'size': 8589934592, + 'name': + 'volume-22222222-2222-2222-2222-222222222222.clone_snap'}, + {'id': 4, 'size': 5368709120, + 'name': + 'backup.33333333-3333-3333-3333-333333333333.snap.123'}] + + self.mock_rbd.Image.return_value.list_snaps.return_value = snapshots + mock_get_clone_info = self.mock_object(self.driver, + '_get_clone_info', + return_value=(None, + None, + None)) + with mock.patch.object(self.driver, '_delete_backup_snaps') as \ + mock_delete_backup_snaps: + + self.driver.delete_volume(self.volume_a) + + mock_get_clone_info.assert_called_once_with( + self.mock_rbd.Image.return_value, + self.volume_a.name, + snapshots[2]['name']) + (self.driver.rbd.Image.return_value + .list_snaps.assert_called_once_with()) + client.__enter__.assert_called_once_with() + client.__exit__.assert_called_once_with(None, None, None) + mock_delete_backup_snaps.assert_called_once_with( + self.mock_rbd.Image.return_value) + self.assertFalse( + self.driver.rbd.Image.return_value.unprotect_snap.called) + self.assertEqual( + 1, self.driver.rbd.RBD.return_value.rename.call_count) + @common_mocks @mock.patch('cinder.objects.Volume.get_by_id') def test_create_snapshot(self, volume_get_by_id): diff --git a/cinder/volume/drivers/rbd.py b/cinder/volume/drivers/rbd.py index 4fd0ea9f988..d73c8582e6e 100644 --- a/cinder/volume/drivers/rbd.py +++ b/cinder/volume/drivers/rbd.py @@ -1211,8 +1211,6 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, clone_snap = snap['name'] break - raise exception.VolumeIsBusy(volume_name=volume_name) - # Determine if this volume is itself a clone _pool, parent, parent_snap = self._get_clone_info(rbd_image, volume_name, @@ -1225,13 +1223,23 @@ class RBDDriver(driver.CloneableImageVD, driver.MigrateVD, self.configuration.rados_connection_retries) def _try_remove_volume(client, volume_name): if self.configuration.enable_deferred_deletion: - LOG.debug("moving volume %s to trash", volume_name) delay = self.configuration.deferred_deletion_delay - self.RBDProxy().trash_move(client.ioctx, - volume_name, - delay) else: - self.RBDProxy().remove(client.ioctx, volume_name) + try: + self.RBDProxy().remove(client.ioctx, volume_name) + return + except (self.rbd.ImageHasSnapshots, self.rbd.ImageBusy): + delay = 0 + LOG.debug("moving volume %s to trash", volume_name) + # When using the RBD v2 clone api, deleting a volume + # that has a snapshot in the trash space raises a + # busy exception. + # In order to solve this, call the trash operation + # which should succeed when the volume has + # dependencies. + self.RBDProxy().trash_move(client.ioctx, + volume_name, + delay) if clone_snap is None: LOG.debug("deleting rbd volume %s", volume_name) diff --git a/releasenotes/notes/bug-1941815-RBD-call-trash-operation-when-plain-deletion-fails-50cef4a8a8010ba9.yaml b/releasenotes/notes/bug-1941815-RBD-call-trash-operation-when-plain-deletion-fails-50cef4a8a8010ba9.yaml new file mode 100644 index 00000000000..6d4aae88e0e --- /dev/null +++ b/releasenotes/notes/bug-1941815-RBD-call-trash-operation-when-plain-deletion-fails-50cef4a8a8010ba9.yaml @@ -0,0 +1,24 @@ +--- +fixes: + - | + RBD driver `bug #1941815 + `_: Fixed + deleting volumes with snapshots/volumes in the ceph trash space. + +upgrade: + - | + **RBD driver: Enable Ceph V2 Clone API and Ceph Trash auto purge** + + In light of the fix for RBD driver `bug #1941815 + `_, we want to + bring the following information to your attention. + + Using the v2 clone format for cloned volumes allows volumes with + dependent images to be moved to the trash - where they remain + until purged - and allow the RBD driver to postpone the deletion + until the volume has no dependent images. Configuring the trash + purge is recommended to avoid wasting space with these trashed + volumes. Since the Ceph Octopus release, the trash can be + configured to automatically purge on a defined schedule. + See the ``rbd trash purge schedule`` commands in the `rbd manpage + `_.