From 0be2737d66ea90b64270e09402ea5b088a28f415 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Tue, 27 Aug 2024 08:21:00 -0700 Subject: [PATCH] Use format_inspector from oslo This removes glance's in-tree format_inspector implementation and makes it use the newly-released oslo_utils' one. We need very few changes for this to work, but this is the summary: - Oslo's safety_check() raises instead of returns false - Oslo uses InspectWrapper which does in-flight detection, which we need to handle just a little differently. - Oslo detects GPT-formatted disks as 'gpt' format, which we need to consider as 'raw' for compatibility reasons. Depends-On: https://review.opendev.org/c/openstack/tempest/+/927395 Change-Id: I7e32d5ae717224b504e06f7fccb854f68c713643 --- .../async_/flows/plugins/image_conversion.py | 22 +- glance/common/format_inspector.py | 1022 ----------------- glance/location.py | 60 +- .../flows/plugins/test_image_conversion.py | 22 +- .../unit/common/test_format_inspector.py | 648 ----------- glance/tests/unit/test_store_image.py | 28 +- requirements.txt | 2 +- 7 files changed, 80 insertions(+), 1724 deletions(-) delete mode 100755 glance/common/format_inspector.py delete mode 100644 glance/tests/unit/common/test_format_inspector.py diff --git a/glance/async_/flows/plugins/image_conversion.py b/glance/async_/flows/plugins/image_conversion.py index a2fed6ea02..fa0e412faa 100644 --- a/glance/async_/flows/plugins/image_conversion.py +++ b/glance/async_/flows/plugins/image_conversion.py @@ -21,11 +21,11 @@ from oslo_config import cfg from oslo_log import log as logging from oslo_utils import encodeutils from oslo_utils import excutils +from oslo_utils.imageutils import format_inspector from taskflow.patterns import linear_flow as lf from taskflow import task from glance.async_ import utils -from glance.common import format_inspector from glance.i18n import _, _LI LOG = logging.getLogger(__name__) @@ -99,12 +99,11 @@ class _ConvertImage(task.Task): # See https://bugs.launchpad.net/nova/+bug/2059809 for details. try: inspector = format_inspector.detect_file_format(self.src_path) - if not inspector.safety_check(): - LOG.error('Image failed %s safety check; aborting conversion', - source_format) - raise RuntimeError('Image has disallowed configuration') - except RuntimeError: - raise + inspector.safety_check() + except format_inspector.SafetyCheckFailed: + LOG.error('Image failed %s safety check; aborting conversion', + source_format) + raise RuntimeError('Image has disallowed configuration') except format_inspector.ImageFormatError as e: LOG.error('Image claimed to be %s format failed format ' 'inspection: %s', source_format, e) @@ -113,7 +112,12 @@ class _ConvertImage(task.Task): LOG.exception('Unknown error inspecting image format: %s', e) raise RuntimeError('Unable to inspect image') - if str(inspector) == 'iso': + detected_format = str(inspector) + if detected_format == 'gpt': + # FIXME(danms): We need to consider GPT to be raw for compatibility + detected_format = 'raw' + + if detected_format == 'iso': if source_format == 'iso': # NOTE(abhishekk): Excluding conversion and preserving image # disk_format as `iso` only @@ -126,7 +130,7 @@ class _ConvertImage(task.Task): LOG.error('Image claimed to be %s format but format ' 'inspection found: ISO', source_format) raise RuntimeError("Image has disallowed configuration") - elif str(inspector) != source_format: + elif detected_format != source_format: LOG.error('Image claimed to be %s format failed format ' 'inspection', source_format) raise RuntimeError('Image format mismatch') diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py deleted file mode 100755 index c083448ac1..0000000000 --- a/glance/common/format_inspector.py +++ /dev/null @@ -1,1022 +0,0 @@ -# Copyright 2020 Red Hat, Inc -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -""" -This is a python implementation of virtual disk format inspection routines -gathered from various public specification documents, as well as qemu disk -driver code. It attempts to store and parse the minimum amount of data -required, and in a streaming-friendly manner to collect metadata about -complex-format images. -""" - -import struct - -from oslo_log import log as logging -from oslo_utils import units - -LOG = logging.getLogger(__name__) - - -def chunked_reader(fileobj, chunk_size=512): - while True: - chunk = fileobj.read(chunk_size) - if not chunk: - break - yield chunk - - -class CaptureRegion(object): - """Represents a region of a file we want to capture. - - A region of a file we want to capture requires a byte offset into - the file and a length. This is expected to be used by a data - processing loop, calling capture() with the most recently-read - chunk. This class handles the task of grabbing the desired region - of data across potentially multiple fractional and unaligned reads. - - :param offset: Byte offset into the file starting the region - :param length: The length of the region - """ - def __init__(self, offset, length): - self.offset = offset - self.length = length - self.data = b'' - - @property - def complete(self): - """Returns True when we have captured the desired data.""" - return self.length == len(self.data) - - def capture(self, chunk, current_position): - """Process a chunk of data. - - This should be called for each chunk in the read loop, at least - until complete returns True. - - :param chunk: A chunk of bytes in the file - :param current_position: The position of the file processed by the - read loop so far. Note that this will be - the position in the file *after* the chunk - being presented. - """ - read_start = current_position - len(chunk) - if (read_start <= self.offset <= current_position or - self.offset <= read_start <= (self.offset + self.length)): - if read_start < self.offset: - lead_gap = self.offset - read_start - else: - lead_gap = 0 - self.data += chunk[lead_gap:] - self.data = self.data[:self.length] - - -class ImageFormatError(Exception): - """An unrecoverable image format error that aborts the process.""" - pass - - -class TraceDisabled(object): - """A logger-like thing that swallows tracing when we do not want it.""" - def debug(self, *a, **k): - pass - - info = debug - warning = debug - error = debug - - -class FileInspector(object): - """A stream-based disk image inspector. - - This base class works on raw images and is subclassed for more - complex types. It is to be presented with the file to be examined - one chunk at a time, during read processing and will only store - as much data as necessary to determine required attributes of - the file. - """ - - def __init__(self, tracing=False): - self._total_count = 0 - - # NOTE(danms): The logging in here is extremely verbose for a reason, - # but should never really be enabled at that level at runtime. To - # retain all that work and assist in future debug, we have a separate - # debug flag that can be passed from a manual tool to turn it on. - if tracing: - self._log = logging.getLogger(str(self)) - else: - self._log = TraceDisabled() - self._capture_regions = {} - - def _capture(self, chunk, only=None): - for name, region in self._capture_regions.items(): - if only and name not in only: - continue - if not region.complete: - region.capture(chunk, self._total_count) - - def eat_chunk(self, chunk): - """Call this to present chunks of the file to the inspector.""" - pre_regions = set(self._capture_regions.keys()) - - # Increment our position-in-file counter - self._total_count += len(chunk) - - # Run through the regions we know of to see if they want this - # data - self._capture(chunk) - - # Let the format do some post-read processing of the stream - self.post_process() - - # Check to see if the post-read processing added new regions - # which may require the current chunk. - new_regions = set(self._capture_regions.keys()) - pre_regions - if new_regions: - self._capture(chunk, only=new_regions) - - def post_process(self): - """Post-read hook to process what has been read so far. - - This will be called after each chunk is read and potentially captured - by the defined regions. If any regions are defined by this call, - those regions will be presented with the current chunk in case it - is within one of the new regions. - """ - pass - - def region(self, name): - """Get a CaptureRegion by name.""" - return self._capture_regions[name] - - def new_region(self, name, region): - """Add a new CaptureRegion by name.""" - if self.has_region(name): - # This is a bug, we tried to add the same region twice - raise ImageFormatError('Inspector re-added region %s' % name) - self._capture_regions[name] = region - - def has_region(self, name): - """Returns True if named region has been defined.""" - return name in self._capture_regions - - @property - def format_match(self): - """Returns True if the file appears to be the expected format.""" - return True - - @property - def virtual_size(self): - """Returns the virtual size of the disk image, or zero if unknown.""" - return self._total_count - - @property - def actual_size(self): - """Returns the total size of the file, usually smaller than - virtual_size. NOTE: this will only be accurate if the entire - file is read and processed. - """ - return self._total_count - - @property - def complete(self): - """Returns True if we have all the information needed.""" - return all(r.complete for r in self._capture_regions.values()) - - def __str__(self): - """The string name of this file format.""" - return 'raw' - - @property - def context_info(self): - """Return info on amount of data held in memory for auditing. - - This is a dict of region:sizeinbytes items that the inspector - uses to examine the file. - """ - return {name: len(region.data) for name, region in - self._capture_regions.items()} - - @classmethod - def from_file(cls, filename): - """Read as much of a file as necessary to complete inspection. - - NOTE: Because we only read as much of the file as necessary, the - actual_size property will not reflect the size of the file, but the - amount of data we read before we satisfied the inspector. - - Raises ImageFormatError if we cannot parse the file. - """ - inspector = cls() - with open(filename, 'rb') as f: - for chunk in chunked_reader(f): - inspector.eat_chunk(chunk) - if inspector.complete: - # No need to eat any more data - break - if not inspector.complete or not inspector.format_match: - raise ImageFormatError('File is not in requested format') - return inspector - - def safety_check(self): - """Perform some checks to determine if this file is safe. - - Returns True if safe, False otherwise. It may raise ImageFormatError - if safety cannot be guaranteed because of parsing or other errors. - """ - return True - - -# The qcow2 format consists of a big-endian 72-byte header, of which -# only a small portion has information we care about: -# -# Dec Hex Name -# 0 0x00 Magic 4-bytes 'QFI\xfb' -# 4 0x04 Version (uint32_t, should always be 2 for modern files) -# . . . -# 8 0x08 Backing file offset (uint64_t) -# 24 0x18 Size in bytes (unint64_t) -# . . . -# 72 0x48 Incompatible features bitfield (6 bytes) -# -# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt -class QcowInspector(FileInspector): - """QEMU QCOW2 Format - - This should only require about 32 bytes of the beginning of the file - to determine the virtual size, and 104 bytes to perform the safety check. - """ - - BF_OFFSET = 0x08 - BF_OFFSET_LEN = 8 - I_FEATURES = 0x48 - I_FEATURES_LEN = 8 - I_FEATURES_DATAFILE_BIT = 3 - I_FEATURES_MAX_BIT = 4 - - def __init__(self, *a, **k): - super(QcowInspector, self).__init__(*a, **k) - self.new_region('header', CaptureRegion(0, 512)) - - def _qcow_header_data(self): - magic, version, bf_offset, bf_sz, cluster_bits, size = ( - struct.unpack('>4sIQIIQ', self.region('header').data[:32])) - return magic, size - - @property - def has_header(self): - return self.region('header').complete - - @property - def virtual_size(self): - if not self.region('header').complete: - return 0 - if not self.format_match: - return 0 - magic, size = self._qcow_header_data() - return size - - @property - def format_match(self): - if not self.region('header').complete: - return False - magic, size = self._qcow_header_data() - return magic == b'QFI\xFB' - - @property - def has_backing_file(self): - if not self.region('header').complete: - return None - if not self.format_match: - return False - bf_offset_bytes = self.region('header').data[ - self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN] - # nonzero means "has a backing file" - bf_offset, = struct.unpack('>Q', bf_offset_bytes) - return bf_offset != 0 - - @property - def has_unknown_features(self): - if not self.region('header').complete: - return None - if not self.format_match: - return False - i_features = self.region('header').data[ - self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] - - # This is the maximum byte number we should expect any bits to be set - max_byte = self.I_FEATURES_MAX_BIT // 8 - - # The flag bytes are in big-endian ordering, so if we process - # them in index-order, they're reversed - for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))): - if byte_num == max_byte: - # If we're in the max-allowed byte, allow any bits less than - # the maximum-known feature flag bit to be set - allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1) - elif byte_num > max_byte: - # If we're above the byte with the maximum known feature flag - # bit, then we expect all zeroes - allow_mask = 0x0 - else: - # Any earlier-than-the-maximum byte can have any of the flag - # bits set - allow_mask = 0xFF - - if i_features[i] & ~allow_mask: - LOG.warning('Found unknown feature bit in byte %i: %s/%s', - byte_num, bin(i_features[byte_num] & ~allow_mask), - bin(allow_mask)) - return True - - return False - - @property - def has_data_file(self): - if not self.region('header').complete: - return None - if not self.format_match: - return False - i_features = self.region('header').data[ - self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] - - # First byte of bitfield, which is i_features[7] - byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8 - # Third bit of bitfield, which is 0x04 - bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8) - return bool(i_features[byte] & bit) - - def __str__(self): - return 'qcow2' - - def safety_check(self): - return (not self.has_backing_file and - not self.has_data_file and - not self.has_unknown_features) - - -class QEDInspector(FileInspector): - def __init__(self, tracing=False): - super().__init__(tracing) - self.new_region('header', CaptureRegion(0, 512)) - - @property - def format_match(self): - if not self.region('header').complete: - return False - return self.region('header').data.startswith(b'QED\x00') - - def safety_check(self): - # QED format is not supported by anyone, but we want to detect it - # and mark it as just always unsafe. - return False - - -# The VHD (or VPC as QEMU calls it) format consists of a big-endian -# 512-byte "footer" at the beginning of the file with various -# information, most of which does not matter to us: -# -# Dec Hex Name -# 0 0x00 Magic string (8-bytes, always 'conectix') -# 40 0x28 Disk size (uint64_t) -# -# https://github.com/qemu/qemu/blob/master/block/vpc.c -class VHDInspector(FileInspector): - """Connectix/MS VPC VHD Format - - This should only require about 512 bytes of the beginning of the file - to determine the virtual size. - """ - def __init__(self, *a, **k): - super(VHDInspector, self).__init__(*a, **k) - self.new_region('header', CaptureRegion(0, 512)) - - @property - def format_match(self): - return self.region('header').data.startswith(b'conectix') - - @property - def virtual_size(self): - if not self.region('header').complete: - return 0 - - if not self.format_match: - return 0 - - return struct.unpack('>Q', self.region('header').data[40:48])[0] - - def __str__(self): - return 'vhd' - - -# The VHDX format consists of a complex dynamic little-endian -# structure with multiple regions of metadata and data, linked by -# offsets with in the file (and within regions), identified by MSFT -# GUID strings. The header is a 320KiB structure, only a few pieces of -# which we actually need to capture and interpret: -# -# Dec Hex Name -# 0 0x00000 Identity (Technically 9-bytes, padded to 64KiB, the first -# 8 bytes of which are 'vhdxfile') -# 196608 0x30000 The Region table (64KiB of a 32-byte header, followed -# by up to 2047 36-byte region table entry structures) -# -# The region table header includes two items we need to read and parse, -# which are: -# -# 196608 0x30000 4-byte signature ('regi') -# 196616 0x30008 Entry count (uint32-t) -# -# The region table entries follow the region table header immediately -# and are identified by a 16-byte GUID, and provide an offset of the -# start of that region. We care about the "metadata region", identified -# by the METAREGION class variable. The region table entry is (offsets -# from the beginning of the entry, since it could be in multiple places): -# -# 0 0x00000 16-byte MSFT GUID -# 16 0x00010 Offset of the actual metadata region (uint64_t) -# -# When we find the METAREGION table entry, we need to grab that offset -# and start examining the region structure at that point. That -# consists of a metadata table of structures, which point to places in -# the data in an unstructured space that follows. The header is -# (offsets relative to the region start): -# -# 0 0x00000 8-byte signature ('metadata') -# . . . -# 16 0x00010 2-byte entry count (up to 2047 entries max) -# -# This header is followed by the specified number of metadata entry -# structures, identified by GUID: -# -# 0 0x00000 16-byte MSFT GUID -# 16 0x00010 4-byte offset (uint32_t, relative to the beginning of -# the metadata region) -# -# We need to find the "Virtual Disk Size" metadata item, identified by -# the GUID in the VIRTUAL_DISK_SIZE class variable, grab the offset, -# add it to the offset of the metadata region, and examine that 8-byte -# chunk of data that follows. -# -# The "Virtual Disk Size" is a naked uint64_t which contains the size -# of the virtual disk, and is our ultimate target here. -# -# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-vhdx/83e061f8-f6e2-4de1-91bd-5d518a43d477 -class VHDXInspector(FileInspector): - """MS VHDX Format - - This requires some complex parsing of the stream. The first 256KiB - of the image is stored to get the header and region information, - and then we capture the first metadata region to read those - records, find the location of the virtual size data and parse - it. This needs to store the metadata table entries up until the - VDS record, which may consist of up to 2047 32-byte entries at - max. Finally, it must store a chunk of data at the offset of the - actual VDS uint64. - - """ - METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E' - VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8' - VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048 # From qemu - - def __init__(self, *a, **k): - super(VHDXInspector, self).__init__(*a, **k) - self.new_region('ident', CaptureRegion(0, 32)) - self.new_region('header', CaptureRegion(192 * 1024, 64 * 1024)) - - def post_process(self): - # After reading a chunk, we may have the following conditions: - # - # 1. We may have just completed the header region, and if so, - # we need to immediately read and calculate the location of - # the metadata region, as it may be starting in the same - # read we just did. - # 2. We may have just completed the metadata region, and if so, - # we need to immediately calculate the location of the - # "virtual disk size" record, as it may be starting in the - # same read we just did. - if self.region('header').complete and not self.has_region('metadata'): - region = self._find_meta_region() - if region: - self.new_region('metadata', region) - elif self.has_region('metadata') and not self.has_region('vds'): - region = self._find_meta_entry(self.VIRTUAL_DISK_SIZE) - if region: - self.new_region('vds', region) - - @property - def format_match(self): - return self.region('ident').data.startswith(b'vhdxfile') - - @staticmethod - def _guid(buf): - """Format a MSFT GUID from the 16-byte input buffer.""" - guid_format = '= 2048: - raise ImageFormatError('Region count is %i (limit 2047)' % count) - - # Process the regions until we find the metadata one; grab the - # offset and return - self._log.debug('Region entry first is %x', region_entry_first) - self._log.debug('Region entries %i', count) - meta_offset = 0 - for i in range(0, count): - entry_start = region_entry_first + (i * 32) - entry_end = entry_start + 32 - entry = self.region('header').data[entry_start:entry_end] - self._log.debug('Entry offset is %x', entry_start) - - # GUID is the first 16 bytes - guid = self._guid(entry[:16]) - if guid == self.METAREGION: - # This entry is the metadata region entry - meta_offset, meta_len, meta_req = struct.unpack( - '= 2048: - raise ImageFormatError( - 'Metadata item count is %i (limit 2047)' % count) - - for i in range(0, count): - entry_offset = 32 + (i * 32) - guid = self._guid(meta_buffer[entry_offset:entry_offset + 16]) - if guid == desired_guid: - # Found the item we are looking for by id. - # Stop our region from capturing - item_offset, item_length, _reserved = struct.unpack( - ' 1: - all_formats = [str(inspector) for inspector in detections] - raise ImageFormatError( - 'Multiple formats detected: %s' % ', '.join(all_formats)) - return inspectors['raw'] if not detections else detections[0] diff --git a/glance/location.py b/glance/location.py index 60120717d8..935984a071 100644 --- a/glance/location.py +++ b/glance/location.py @@ -25,9 +25,9 @@ from oslo_config import cfg from oslo_log import log as logging from oslo_utils import encodeutils from oslo_utils import excutils +from oslo_utils.imageutils import format_inspector from glance.common import exception -from glance.common import format_inspector from glance.common import store_utils from glance.common import utils import glance.domain.proxy @@ -580,37 +580,55 @@ class ImageProxy(glance.domain.proxy.Image): img_signature_key_type=key_type ) - if not self.image.virtual_size: - inspector = format_inspector.get_inspector(self.image.disk_format) - else: - # No need to do this again - inspector = None - - if inspector and self.image.container_format == 'bare': - fmt = inspector() - data = format_inspector.InfoWrapper(data, fmt) - LOG.debug('Enabling in-flight format inspection for %s', fmt) - else: - fmt = None + if self.image.container_format == 'bare': + # FIXME(danms): We do not pass an expected_format here because + # we do not (currently) want to interrupt the data pipeline if + # the format does not match. + data = format_inspector.InspectWrapper(data) + LOG.debug('Enabling in-flight format inspection for %s', + self.image.disk_format) self._upload_to_store(data, verifier, backend, size) + try: + data.close() + except AttributeError: + # We did not get a closeable or file-like object as data and/or + # did not wrap it because of container_format + pass + virtual_size = 0 - if fmt and fmt.format_match: - try: - virtual_size = fmt.virtual_size + try: + inspector = data.format + format = str(inspector) + # format_inspector detects GPT, which we need to treat as raw for + # compatibility reasons + if format == 'gpt': + format = 'raw' + if format == self.image.disk_format: + virtual_size = inspector.virtual_size LOG.info('Image format matched and virtual size computed: %i', virtual_size) - except Exception as e: - LOG.error(_LE('Unable to determine virtual_size because: %s'), - e) - elif fmt: + else: + LOG.info('Image declared as %s but detected as %s, ' + 'not updating virtual_size', + self.image.disk_format, inspector) + except AttributeError: + # We must not have wrapped this for inspection because of + # container_format + pass + except format_inspector.ImageFormatError: + # No format matched! + # FIXME(danms): This should be an error in the future for most + # cases LOG.warning('Image format %s did not match; ' 'unable to calculate virtual size', self.image.disk_format) + except Exception as e: + LOG.error(_LE('Unable to determine virtual_size because: %s'), e) if virtual_size: - self.image.virtual_size = fmt.virtual_size + self.image.virtual_size = virtual_size if set_active and self.image.status != 'active': self.image.status = 'active' diff --git a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py index 861a36ed07..cf25aa47ff 100644 --- a/glance/tests/unit/async_/flows/plugins/test_image_conversion.py +++ b/glance/tests/unit/async_/flows/plugins/test_image_conversion.py @@ -21,11 +21,11 @@ from unittest import mock import glance_store from oslo_concurrency import processutils from oslo_config import cfg +from oslo_utils.imageutils import format_inspector import glance.async_.flows.api_image_import as import_flow import glance.async_.flows.plugins.image_conversion as image_conversion from glance.async_ import utils as async_utils -from glance.common import format_inspector from glance.common import utils from glance import domain from glance import gateway @@ -97,9 +97,9 @@ class TestConvertImageTask(test_utils.BaseTestCase): self.task.task_id) self.detect_file_format_mock = mock.MagicMock() - self.useFixture(fixtures.MockPatch('glance.common.format_inspector.' - 'detect_file_format', - self.detect_file_format_mock)) + self.useFixture(fixtures.MockPatch( + 'oslo_utils.imageutils.format_inspector.detect_file_format', + self.detect_file_format_mock)) @mock.patch.object(os, 'stat') @mock.patch.object(os, 'remove') @@ -126,7 +126,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): 'virtual-size': 456} inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'raw' - inspector.safety_check.return_value = True image_convert.execute('file:///test/path.raw') # NOTE(hemanthm): Asserting that the source format is passed @@ -164,7 +163,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): self.img_repo.get.return_value = image inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'iso' - inspector.safety_check.return_value = True image_convert.execute('file:///test/path.iso') self.assertEqual(fmt, image.disk_format) @@ -206,7 +204,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): with mock.patch.object(processutils, 'execute') as exc_mock: inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'qcow2' - inspector.safety_check.return_value = True exc_mock.side_effect = OSError('fail') self.assertRaises(OSError, convert.execute, 'file:///test/path.raw') @@ -227,7 +224,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): exc_mock.return_value = '', 'some error' inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'qcow2' - inspector.safety_check.return_value = True self.assertRaises(RuntimeError, convert.execute, 'file:///test/path.raw') exc_mock.assert_called_once_with( @@ -249,7 +245,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): with mock.patch.object(processutils, 'execute') as exc_mock: inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'qcow2' - inspector.safety_check.return_value = True exc_mock.return_value = json.dumps(data), '' e = self.assertRaises(RuntimeError, convert.execute, 'file:///test/path.qcow') @@ -269,7 +264,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): exc_mock.return_value = json.dumps(data), '' inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'qcow2' - inspector.safety_check.return_value = True e = self.assertRaises(RuntimeError, convert.execute, 'file:///test/path.qcow') self.assertEqual('QCOW images with data-file set are not allowed', @@ -284,7 +278,8 @@ class TestConvertImageTask(test_utils.BaseTestCase): def test_image_convert_fails_inspection_safety_check(self): convert = self._setup_image_convert_info_fail() inspector = self.detect_file_format_mock.return_value - inspector.safety_check.return_value = False + inspector.safety_check.side_effect = ( + format_inspector.SafetyCheckFailed({})) self.assertRaisesRegex(RuntimeError, 'Image has disallowed configuration', convert.execute, 'file:///test/path.qcow') @@ -315,7 +310,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): with mock.patch.object(processutils, 'execute') as exc_mock: inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'vmdk' - inspector.safety_check.return_value = True exc_mock.return_value = json.dumps(data), '' convert.execute('file:///test/path.vmdk') @@ -347,7 +341,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): with mock.patch.object(processutils, 'execute') as exc_mock: inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'raw' - inspector.safety_check.return_value = True exc_mock.side_effect = [('{"format":"raw"}', ''), OSError('convert_fail')] self.assertRaises(OSError, @@ -373,7 +366,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): ('', 'some error')] inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'raw' - inspector.safety_check.return_value = True self.assertRaises(RuntimeError, convert.execute, 'file:///test/path.raw') exc_mock.assert_has_calls( @@ -396,7 +388,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): exc_mock.return_value = ('{}', '') inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'qcow2' - inspector.safety_check.return_value = True exc = self.assertRaises(RuntimeError, convert.execute, 'file:///test/path.raw') self.assertIn('Image metadata disagrees about format', str(exc)) @@ -429,7 +420,6 @@ class TestConvertImageTask(test_utils.BaseTestCase): '{"format": "qcow2", "virtual-size": 123}', '') inspector = self.detect_file_format_mock.return_value inspector.__str__.return_value = 'qcow2' - inspector.safety_check.return_value = True convert.execute('file:///test/path.qcow') # Make sure we only called qemu-img for inspection, not conversion exc_mock.assert_called_once_with( diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py deleted file mode 100644 index 421c561004..0000000000 --- a/glance/tests/unit/common/test_format_inspector.py +++ /dev/null @@ -1,648 +0,0 @@ -# Copyright 2020 Red Hat, Inc -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import io -import os -import re -import struct -import subprocess -import tempfile -from unittest import mock - -from oslo_utils import units - -from glance.common import format_inspector -from glance.tests import utils as test_utils - - -TEST_IMAGE_PREFIX = 'glance-unittest-formatinspector-' - - -def get_size_from_qemu_img(filename): - output = subprocess.check_output('qemu-img info "%s"' % filename, - shell=True) - for line in output.split(b'\n'): - m = re.search(b'^virtual size: .* .([0-9]+) bytes', line.strip()) - if m: - return int(m.group(1)) - - raise Exception('Could not find virtual size with qemu-img') - - -class TestFormatInspectors(test_utils.BaseTestCase): - def setUp(self): - super(TestFormatInspectors, self).setUp() - self._created_files = [] - - def tearDown(self): - super(TestFormatInspectors, self).tearDown() - for fn in self._created_files: - try: - os.remove(fn) - except Exception: - pass - - def _create_iso(self, image_size, subformat='9660'): - """Create an ISO file of the given size. - :param image_size: The size of the image to create in bytes - :param subformat: The subformat to use, if any - """ - # these tests depend on mkisofs - # being installed and in the path, - # if it is not installed, skip - try: - subprocess.check_output('mkisofs --version', shell=True) - except Exception: - self.skipTest('mkisofs not installed') - size = image_size // units.Mi - base_cmd = "mkisofs" - if subformat == 'udf': - # depending on the distribution mkisofs may not support udf - # and may be provided by genisoimage instead. As a result we - # need to check if the command supports udf via help - # instead of checking the installed version. - # mkisofs --help outputs to stderr so we need to - # redirect it to stdout to use grep. - try: - subprocess.check_output( - 'mkisofs --help 2>&1 | grep udf', shell=True) - except Exception: - self.skipTest('mkisofs does not support udf format') - base_cmd += " -udf" - prefix = TEST_IMAGE_PREFIX - prefix += '-%s-' % subformat - fn = tempfile.mktemp(prefix=prefix, suffix='.iso') - self._created_files.append(fn) - subprocess.check_output( - 'dd if=/dev/zero of=%s bs=1M count=%i' % (fn, size), - shell=True) - subprocess.check_output( - '%s -V "TEST" -o %s %s' % (base_cmd, fn, fn), - shell=True) - return fn - - def _generate_bad_iso(self): - # we want to emulate a malicious user who uploads a an - # ISO file has a qcow2 header in the system area - # of the ISO file - # we will create a qcow2 image and an ISO file - # and then copy the qcow2 header to the ISO file - # e.g. - # mkisofs -o orig.iso /etc/resolv.conf - # qemu-img create orig.qcow2 -f qcow2 64M - # dd if=orig.qcow2 of=outcome bs=32K count=1 - # dd if=orig.iso of=outcome bs=32K skip=1 seek=1 - qcow = self._create_img('qcow2', 10 * units.Mi) - iso = self._create_iso(64 * units.Mi, subformat='9660') - # first ensure the files are valid - iso_fmt = self._test_format_at_block_size('iso', iso, 4 * units.Ki) - self.assertTrue(iso_fmt.format_match) - qcow_fmt = self._test_format_at_block_size('qcow2', qcow, 4 * units.Ki) - self.assertTrue(qcow_fmt.format_match) - # now copy the qcow2 header to an ISO file - prefix = TEST_IMAGE_PREFIX - prefix += '-bad-' - fn = tempfile.mktemp(prefix=prefix, suffix='.iso') - self._created_files.append(fn) - subprocess.check_output( - 'dd if=%s of=%s bs=32K count=1' % (qcow, fn), - shell=True) - subprocess.check_output( - 'dd if=%s of=%s bs=32K skip=1 seek=1' % (iso, fn), - shell=True) - return qcow, iso, fn - - def _create_img(self, fmt, size, subformat=None, options=None, - backing_file=None): - """Create an image file of the given format and size. - :param fmt: The format to create - :param size: The size of the image to create in bytes - :param subformat: The subformat to use, if any - :param options: A dictionary of options to pass to the format - :param backing_file: The backing file to use, if any - """ - - if fmt == 'iso': - return self._create_iso(size, subformat) - # these tests depend on qemu-img - # being installed and in the path, - # if it is not installed, skip - try: - subprocess.check_output('qemu-img --version', shell=True) - except Exception: - self.skipTest('qemu-img not installed') - - if fmt == 'vhd': - # QEMU calls the vhd format vpc - fmt = 'vpc' - - if options is None: - options = {} - opt = '' - prefix = TEST_IMAGE_PREFIX - - if subformat: - options['subformat'] = subformat - prefix += subformat + '-' - - if options: - opt += '-o ' + ','.join('%s=%s' % (k, v) - for k, v in options.items()) - - if backing_file is not None: - opt += ' -b %s -F raw' % backing_file - - fn = tempfile.mktemp(prefix=prefix, - suffix='.%s' % fmt) - self._created_files.append(fn) - subprocess.check_output( - 'qemu-img create -f %s %s %s %i' % (fmt, opt, fn, size), - shell=True) - return fn - - def _create_allocated_vmdk(self, size_mb, subformat=None): - # We need a "big" VMDK file to exercise some parts of the code of the - # format_inspector. A way to create one is to first create an empty - # file, and then to convert it with the -S 0 option. - - if subformat is None: - # Matches qemu-img default, see `qemu-img convert -O vmdk -o help` - subformat = 'monolithicSparse' - - prefix = TEST_IMAGE_PREFIX - prefix += '-%s-' % subformat - fn = tempfile.mktemp(prefix=prefix, suffix='.vmdk') - self._created_files.append(fn) - raw = tempfile.mktemp(prefix=prefix, suffix='.raw') - self._created_files.append(raw) - - # Create a file with pseudo-random data, otherwise it will get - # compressed in the streamOptimized format - subprocess.check_output( - 'dd if=/dev/urandom of=%s bs=1M count=%i' % (raw, size_mb), - shell=True) - - # Convert it to VMDK - subprocess.check_output( - 'qemu-img convert -f raw -O vmdk -o subformat=%s -S 0 %s %s' % ( - subformat, raw, fn), - shell=True) - return fn - - def _test_format_at_block_size(self, format_name, img, block_size): - fmt = format_inspector.get_inspector(format_name)() - self.assertIsNotNone(fmt, - 'Did not get format inspector for %s' % ( - format_name)) - wrapper = format_inspector.InfoWrapper(open(img, 'rb'), fmt) - - while True: - chunk = wrapper.read(block_size) - if not chunk: - break - - wrapper.close() - return fmt - - def _test_format_at_image_size(self, format_name, image_size, - subformat=None): - """Test the format inspector for the given format at the - given image size. - :param format_name: The format to test - :param image_size: The size of the image to create in bytes - :param subformat: The subformat to use, if any - """ - img = self._create_img(format_name, image_size, subformat=subformat) - - # Some formats have internal alignment restrictions making this not - # always exactly like image_size, so get the real value for comparison - virtual_size = get_size_from_qemu_img(img) - - # Read the format in various sizes, some of which will read whole - # sections in a single read, others will be completely unaligned, etc. - block_sizes = [64 * units.Ki, 1 * units.Mi] - # ISO images have a 32KB system area at the beginning of the image - # as a result reading that in 17 or 512 byte blocks takes too long, - # causing the test to fail. The 64KiB block size is enough to read - # the system area and header in a single read. the 1MiB block size - # adds very little time to the test so we include it. - if format_name != 'iso': - block_sizes.extend([17, 512]) - for block_size in block_sizes: - fmt = self._test_format_at_block_size(format_name, img, block_size) - self.assertTrue(fmt.format_match, - 'Failed to match %s at size %i block %i' % ( - format_name, image_size, block_size)) - self.assertEqual(virtual_size, fmt.virtual_size, - ('Failed to calculate size for %s at size %i ' - 'block %i') % (format_name, image_size, - block_size)) - memory = sum(fmt.context_info.values()) - self.assertLess(memory, 512 * units.Ki, - 'Format used more than 512KiB of memory: %s' % ( - fmt.context_info)) - - def _test_format(self, format_name, subformat=None): - # Try a few different image sizes, including some odd and very small - # sizes - for image_size in (512, 513, 2057, 7): - self._test_format_at_image_size(format_name, image_size * units.Mi, - subformat=subformat) - - def test_qcow2(self): - self._test_format('qcow2') - - def test_iso_9660(self): - self._test_format('iso', subformat='9660') - - def test_udf(self): - self._test_format('iso', subformat='udf') - - def test_bad_iso_qcow2(self): - _, _, fn = self._generate_bad_iso() - iso_check = self._test_format_at_block_size('iso', fn, 4 * units.Ki) - qcow_check = self._test_format_at_block_size('qcow2', fn, 4 * units.Ki) - # this system area of the ISO file is not considered part of the format - # the qcow2 header is in the system area of the ISO file - # so the ISO file is still valid - self.assertTrue(iso_check.format_match) - # the qcow2 header is in the system area of the ISO file - # but that will be parsed by the qcow2 format inspector - # and it will match - self.assertTrue(qcow_check.format_match) - # if we call format_inspector.detect_file_format it should detect - # and raise an exception because both match internally. - e = self.assertRaises( - format_inspector.ImageFormatError, - format_inspector.detect_file_format, fn) - self.assertIn('Multiple formats detected', str(e)) - - def test_vhd(self): - self._test_format('vhd') - - def test_vhdx(self): - self._test_format('vhdx') - - def test_vmdk(self): - self._test_format('vmdk') - - def test_vmdk_stream_optimized(self): - self._test_format('vmdk', 'streamOptimized') - - def test_from_file_reads_minimum(self): - img = self._create_img('qcow2', 10 * units.Mi) - file_size = os.stat(img).st_size - fmt = format_inspector.QcowInspector.from_file(img) - # We know everything we need from the first 512 bytes of a QCOW image, - # so make sure that we did not read the whole thing when we inspect - # a local file. - self.assertLess(fmt.actual_size, file_size) - - def qed_supported(self): - output = subprocess.check_output('qemu-img create --help', shell=True) - return b' qed ' in output - - def test_qed_always_unsafe(self): - if not self.qed_supported(): - raise self.skipException('qed format not supported by qemu-img') - - img = self._create_img('qed', 10 * units.Mi) - fmt = format_inspector.get_inspector('qed').from_file(img) - self.assertTrue(fmt.format_match) - self.assertFalse(fmt.safety_check()) - - def _test_vmdk_bad_descriptor_offset(self, subformat=None): - format_name = 'vmdk' - image_size = 10 * units.Mi - descriptorOffsetAddr = 0x1c - BAD_ADDRESS = 0x400 - img = self._create_img(format_name, image_size, subformat=subformat) - - # Corrupt the header - fd = open(img, 'r+b') - fd.seek(descriptorOffsetAddr) - fd.write(struct.pack('=8.1.0 # Apache-2.0 oslo.concurrency>=4.5.1 # Apache-2.0 oslo.context>=2.22.0 # Apache-2.0 oslo.upgradecheck>=1.3.0 # Apache-2.0 -oslo.utils>=4.7.0 # Apache-2.0 +oslo.utils>=7.3.0 # Apache-2.0 stevedore!=3.0.0,>=1.20.0 # Apache-2.0 futurist>=1.2.0 # Apache-2.0 taskflow>=4.0.0 # Apache-2.0