Merge "Add a workaround config toggle to refuse ceph image upload"
This commit is contained in:
@@ -246,6 +246,30 @@ candidates if necessary. This has a slight performance impact and is not
|
|||||||
necessary on new or upgraded deployments where the new configuration has been
|
necessary on new or upgraded deployments where the new configuration has been
|
||||||
set on all hosts. By setting this option, the second lookup is disabled and the
|
set on all hosts. By setting this option, the second lookup is disabled and the
|
||||||
scheduler will only request ``PCPU``-based allocations.
|
scheduler will only request ``PCPU``-based allocations.
|
||||||
|
"""),
|
||||||
|
cfg.BoolOpt(
|
||||||
|
'never_download_image_if_on_rbd',
|
||||||
|
default=False,
|
||||||
|
help="""
|
||||||
|
When booting from an image on a ceph-backed compute node, if the image does not
|
||||||
|
already reside on the ceph cluster (as would be the case if glance is
|
||||||
|
also using the same cluster), nova will download the image from glance and
|
||||||
|
upload it to ceph itself. If using multiple ceph clusters, this may cause nova
|
||||||
|
to unintentionally duplicate the image in a non-COW-able way in the local
|
||||||
|
ceph deployment, wasting space.
|
||||||
|
|
||||||
|
For more information, refer to the bug report:
|
||||||
|
|
||||||
|
https://bugs.launchpad.net/nova/+bug/1858877
|
||||||
|
|
||||||
|
Enabling this option will cause nova to *refuse* to boot an instance if it
|
||||||
|
would require downloading the image from glance and uploading it to ceph
|
||||||
|
itself.
|
||||||
|
|
||||||
|
Related options:
|
||||||
|
|
||||||
|
* ``compute_driver`` (libvirt)
|
||||||
|
* ``[libvirt]/images_type`` (rbd)
|
||||||
"""),
|
"""),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@@ -21454,6 +21454,52 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
|||||||
None)
|
None)
|
||||||
self.assertFalse(mock_inject.called)
|
self.assertFalse(mock_inject.called)
|
||||||
|
|
||||||
|
@mock.patch('nova.virt.libvirt.utils.fetch_image')
|
||||||
|
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
|
||||||
|
@mock.patch.object(imagebackend, 'IMAGE_API')
|
||||||
|
def test_create_fetch_image_ceph_workaround(self, mock_image, mock_rbd,
|
||||||
|
mock_fetch):
|
||||||
|
# Make sure that rbd clone will fail as un-clone-able
|
||||||
|
mock_rbd.is_cloneable.return_value = False
|
||||||
|
# Make sure the rbd code thinks the image does not already exist
|
||||||
|
mock_rbd.return_value.exists.return_value = False
|
||||||
|
# Make sure the rbd code says the image is small
|
||||||
|
mock_rbd.return_value.size.return_value = 128 * units.Mi
|
||||||
|
# Make sure IMAGE_API.get() returns a raw image
|
||||||
|
mock_image.get.return_value = {'locations': [], 'disk_format': 'raw'}
|
||||||
|
|
||||||
|
instance = self._create_instance()
|
||||||
|
disk_images = {'image_id': 'foo'}
|
||||||
|
self.flags(images_type='rbd', group='libvirt')
|
||||||
|
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||||
|
|
||||||
|
def do_create():
|
||||||
|
# Reset the fetch mock and run our driver method so we can
|
||||||
|
# check for called-ness after each attempt
|
||||||
|
mock_fetch.reset_mock()
|
||||||
|
drvr._create_and_inject_local_root(self.context,
|
||||||
|
instance,
|
||||||
|
False,
|
||||||
|
'',
|
||||||
|
disk_images,
|
||||||
|
get_injection_info(),
|
||||||
|
None)
|
||||||
|
|
||||||
|
# Do an image create with rbd
|
||||||
|
do_create()
|
||||||
|
# Make sure it tried fetch, which implies that it tried and
|
||||||
|
# failed to clone.
|
||||||
|
mock_fetch.assert_called()
|
||||||
|
|
||||||
|
# Enable the workaround
|
||||||
|
self.flags(never_download_image_if_on_rbd=True,
|
||||||
|
group='workarounds')
|
||||||
|
# Ensure that we raise the original ImageUnacceptable from the
|
||||||
|
# failed clone...
|
||||||
|
self.assertRaises(exception.ImageUnacceptable, do_create)
|
||||||
|
# ...and ensure that we did _not_ try to fetch
|
||||||
|
mock_fetch.assert_not_called()
|
||||||
|
|
||||||
@mock.patch('nova.virt.netutils.get_injected_network_template')
|
@mock.patch('nova.virt.netutils.get_injected_network_template')
|
||||||
@mock.patch('nova.virt.disk.api.inject_data')
|
@mock.patch('nova.virt.disk.api.inject_data')
|
||||||
@mock.patch.object(libvirt_driver.LibvirtDriver, "_conn")
|
@mock.patch.object(libvirt_driver.LibvirtDriver, "_conn")
|
||||||
|
@@ -3867,9 +3867,24 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||||||
backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
|
backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
|
||||||
if backend.SUPPORTS_CLONE:
|
if backend.SUPPORTS_CLONE:
|
||||||
def clone_fallback_to_fetch(*args, **kwargs):
|
def clone_fallback_to_fetch(*args, **kwargs):
|
||||||
|
refuse_fetch = (
|
||||||
|
CONF.libvirt.images_type == 'rbd' and
|
||||||
|
CONF.workarounds.never_download_image_if_on_rbd)
|
||||||
try:
|
try:
|
||||||
backend.clone(context, disk_images['image_id'])
|
backend.clone(context, disk_images['image_id'])
|
||||||
except exception.ImageUnacceptable:
|
except exception.ImageUnacceptable:
|
||||||
|
if refuse_fetch:
|
||||||
|
# Re-raise the exception from the failed
|
||||||
|
# ceph clone. The compute manager expects
|
||||||
|
# ImageUnacceptable as a possible result
|
||||||
|
# of spawn(), from which this is called.
|
||||||
|
with excutils.save_and_reraise_exception():
|
||||||
|
LOG.warning(
|
||||||
|
'Image %s is not on my ceph and '
|
||||||
|
'[workarounds]/'
|
||||||
|
'never_download_image_if_on_rbd=True;'
|
||||||
|
' refusing to fetch and upload.',
|
||||||
|
disk_images['image_id'])
|
||||||
libvirt_utils.fetch_image(*args, **kwargs)
|
libvirt_utils.fetch_image(*args, **kwargs)
|
||||||
fetch_func = clone_fallback_to_fetch
|
fetch_func = clone_fallback_to_fetch
|
||||||
else:
|
else:
|
||||||
|
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
other:
|
||||||
|
- |
|
||||||
|
Nova now has a config option called
|
||||||
|
``[workarounds]/never_download_image_if_on_rbd`` which helps to
|
||||||
|
avoid pathological storage behavior with multiple ceph clusters.
|
||||||
|
Currently, Nova does *not* support multiple ceph clusters
|
||||||
|
properly, but Glance can be configured with them. If an instance
|
||||||
|
is booted from an image residing in a ceph cluster other than the
|
||||||
|
one Nova knows about, it will silently download it from Glance and
|
||||||
|
re-upload the image to the local ceph privately for that
|
||||||
|
instance. Unlike the behavior you expect when configuring Nova and
|
||||||
|
Glance for ceph, Nova will continue to do this over and over for
|
||||||
|
the same image when subsequent instances are booted, consuming a
|
||||||
|
large amount of storage unexpectedly. The new workaround option
|
||||||
|
will cause Nova to refuse to do this download/upload behavior and
|
||||||
|
instead fail the instance boot. It is simply a stop-gap effort to
|
||||||
|
allow unsupported deployments with multiple ceph clusters from
|
||||||
|
silently consuming large amounts of disk space.
|
Reference in New Issue
Block a user