diff --git a/cinder/image/format_inspector.py b/cinder/image/format_inspector.py new file mode 100644 index 00000000000..ede09005ea0 --- /dev/null +++ b/cinder/image/format_inspector.py @@ -0,0 +1,938 @@ +# Copyright 2020 Red Hat, Inc +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +This is a python implementation of virtual disk format inspection routines +gathered from various public specification documents, as well as qemu disk +driver code. It attempts to store and parse the minimum amount of data +required, and in a streaming-friendly manner to collect metadata about +complex-format images. +""" + +import struct + +from oslo_log import log as logging + +LOG = logging.getLogger(__name__) + + +def chunked_reader(fileobj, chunk_size=512): + while True: + chunk = fileobj.read(chunk_size) + if not chunk: + break + yield chunk + + +class CaptureRegion(object): + """Represents a region of a file we want to capture. + + A region of a file we want to capture requires a byte offset into + the file and a length. This is expected to be used by a data + processing loop, calling capture() with the most recently-read + chunk. This class handles the task of grabbing the desired region + of data across potentially multiple fractional and unaligned reads. + + :param offset: Byte offset into the file starting the region + :param length: The length of the region + """ + def __init__(self, offset, length): + self.offset = offset + self.length = length + self.data = b'' + + @property + def complete(self): + """Returns True when we have captured the desired data.""" + return self.length == len(self.data) + + def capture(self, chunk, current_position): + """Process a chunk of data. + + This should be called for each chunk in the read loop, at least + until complete returns True. + + :param chunk: A chunk of bytes in the file + :param current_position: The position of the file processed by the + read loop so far. Note that this will be + the position in the file *after* the chunk + being presented. + """ + read_start = current_position - len(chunk) + if (read_start <= self.offset <= current_position or + self.offset <= read_start <= (self.offset + self.length)): + if read_start < self.offset: + lead_gap = self.offset - read_start + else: + lead_gap = 0 + self.data += chunk[lead_gap:] + self.data = self.data[:self.length] + + +class ImageFormatError(Exception): + """An unrecoverable image format error that aborts the process.""" + pass + + +class TraceDisabled(object): + """A logger-like thing that swallows tracing when we do not want it.""" + def debug(self, *a, **k): + pass + + info = debug + warning = debug + error = debug + + +class FileInspector(object): + """A stream-based disk image inspector. + + This base class works on raw images and is subclassed for more + complex types. It is to be presented with the file to be examined + one chunk at a time, during read processing and will only store + as much data as necessary to determine required attributes of + the file. + """ + + def __init__(self, tracing=False): + self._total_count = 0 + + # NOTE(danms): The logging in here is extremely verbose for a reason, + # but should never really be enabled at that level at runtime. To + # retain all that work and assist in future debug, we have a separate + # debug flag that can be passed from a manual tool to turn it on. + if tracing: + self._log = logging.getLogger(str(self)) + else: + self._log = TraceDisabled() + self._capture_regions = {} + + def _capture(self, chunk, only=None): + for name, region in self._capture_regions.items(): + if only and name not in only: + continue + if not region.complete: + region.capture(chunk, self._total_count) + + def eat_chunk(self, chunk): + """Call this to present chunks of the file to the inspector.""" + pre_regions = set(self._capture_regions.keys()) + + # Increment our position-in-file counter + self._total_count += len(chunk) + + # Run through the regions we know of to see if they want this + # data + self._capture(chunk) + + # Let the format do some post-read processing of the stream + self.post_process() + + # Check to see if the post-read processing added new regions + # which may require the current chunk. + new_regions = set(self._capture_regions.keys()) - pre_regions + if new_regions: + self._capture(chunk, only=new_regions) + + def post_process(self): + """Post-read hook to process what has been read so far. + + This will be called after each chunk is read and potentially captured + by the defined regions. If any regions are defined by this call, + those regions will be presented with the current chunk in case it + is within one of the new regions. + """ + pass + + def region(self, name): + """Get a CaptureRegion by name.""" + return self._capture_regions[name] + + def new_region(self, name, region): + """Add a new CaptureRegion by name.""" + if self.has_region(name): + # This is a bug, we tried to add the same region twice + raise ImageFormatError('Inspector re-added region %s' % name) + self._capture_regions[name] = region + + def has_region(self, name): + """Returns True if named region has been defined.""" + return name in self._capture_regions + + @property + def format_match(self): + """Returns True if the file appears to be the expected format.""" + return True + + @property + def virtual_size(self): + """Returns the virtual size of the disk image, or zero if unknown.""" + return self._total_count + + @property + def actual_size(self): + """Returns the total size of the file. + + This is usually smaller than virtual_size. NOTE: this will only be + accurate if the entire file is read and processed. + """ + return self._total_count + + @property + def complete(self): + """Returns True if we have all the information needed.""" + return all(r.complete for r in self._capture_regions.values()) + + def __str__(self): + """The string name of this file format.""" + return 'raw' + + @property + def context_info(self): + """Return info on amount of data held in memory for auditing. + + This is a dict of region:sizeinbytes items that the inspector + uses to examine the file. + """ + return {name: len(region.data) for name, region in + self._capture_regions.items()} + + @classmethod + def from_file(cls, filename): + """Read as much of a file as necessary to complete inspection. + + NOTE: Because we only read as much of the file as necessary, the + actual_size property will not reflect the size of the file, but the + amount of data we read before we satisfied the inspector. + + Raises ImageFormatError if we cannot parse the file. + """ + inspector = cls() + with open(filename, 'rb') as f: + for chunk in chunked_reader(f): + inspector.eat_chunk(chunk) + if inspector.complete: + # No need to eat any more data + break + if not inspector.complete or not inspector.format_match: + raise ImageFormatError('File is not in requested format') + return inspector + + def safety_check(self): + """Perform some checks to determine if this file is safe. + + Returns True if safe, False otherwise. It may raise ImageFormatError + if safety cannot be guaranteed because of parsing or other errors. + """ + return True + + +# The qcow2 format consists of a big-endian 72-byte header, of which +# only a small portion has information we care about: +# +# Dec Hex Name +# 0 0x00 Magic 4-bytes 'QFI\xfb' +# 4 0x04 Version (uint32_t, should always be 2 for modern files) +# . . . +# 8 0x08 Backing file offset (uint64_t) +# 24 0x18 Size in bytes (unint64_t) +# . . . +# 72 0x48 Incompatible features bitfield (6 bytes) +# +# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt +class QcowInspector(FileInspector): + """QEMU QCOW2 Format + + This should only require about 32 bytes of the beginning of the file + to determine the virtual size, and 104 bytes to perform the safety check. + """ + + BF_OFFSET = 0x08 + BF_OFFSET_LEN = 8 + I_FEATURES = 0x48 + I_FEATURES_LEN = 8 + I_FEATURES_DATAFILE_BIT = 3 + I_FEATURES_MAX_BIT = 4 + + def __init__(self, *a, **k): + super(QcowInspector, self).__init__(*a, **k) + self.new_region('header', CaptureRegion(0, 512)) + + def _qcow_header_data(self): + magic, version, bf_offset, bf_sz, cluster_bits, size = ( + struct.unpack('>4sIQIIQ', self.region('header').data[:32])) + return magic, size + + @property + def has_header(self): + return self.region('header').complete + + @property + def virtual_size(self): + if not self.region('header').complete: + return 0 + if not self.format_match: + return 0 + magic, size = self._qcow_header_data() + return size + + @property + def format_match(self): + if not self.region('header').complete: + return False + magic, size = self._qcow_header_data() + return magic == b'QFI\xFB' + + @property + def has_backing_file(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + bf_offset_bytes = self.region('header').data[ + self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN] + # nonzero means "has a backing file" + bf_offset, = struct.unpack('>Q', bf_offset_bytes) + return bf_offset != 0 + + @property + def has_unknown_features(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + i_features = self.region('header').data[ + self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] + + # This is the maximum byte number we should expect any bits to be set + max_byte = self.I_FEATURES_MAX_BIT // 8 + + # The flag bytes are in big-endian ordering, so if we process + # them in index-order, they're reversed + for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))): + if byte_num == max_byte: + # If we're in the max-allowed byte, allow any bits less than + # the maximum-known feature flag bit to be set + allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1) + elif byte_num > max_byte: + # If we're above the byte with the maximum known feature flag + # bit, then we expect all zeroes + allow_mask = 0x0 + else: + # Any earlier-than-the-maximum byte can have any of the flag + # bits set + allow_mask = 0xFF + + if i_features[i] & ~allow_mask: + LOG.warning('Found unknown feature bit in byte %i: %s/%s', + byte_num, bin(i_features[byte_num] & ~allow_mask), + bin(allow_mask)) + return True + + return False + + @property + def has_data_file(self): + if not self.region('header').complete: + return None + if not self.format_match: + return False + i_features = self.region('header').data[ + self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN] + + # First byte of bitfield, which is i_features[7] + byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8 + # Third bit of bitfield, which is 0x04 + bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8) + return bool(i_features[byte] & bit) + + def __str__(self): + return 'qcow2' + + def safety_check(self): + return (not self.has_backing_file and + not self.has_data_file and + not self.has_unknown_features) + + def safety_check_allow_backing_file(self): + return (not self.has_data_file and + not self.has_unknown_features) + + +class QEDInspector(FileInspector): + def __init__(self, tracing=False): + super().__init__(tracing) + self.new_region('header', CaptureRegion(0, 512)) + + @property + def format_match(self): + if not self.region('header').complete: + return False + return self.region('header').data.startswith(b'QED\x00') + + def safety_check(self): + # QED format is not supported by anyone, but we want to detect it + # and mark it as just always unsafe. + return False + + +# The VHD (or VPC as QEMU calls it) format consists of a big-endian +# 512-byte "footer" at the beginning of the file with various +# information, most of which does not matter to us: +# +# Dec Hex Name +# 0 0x00 Magic string (8-bytes, always 'conectix') +# 40 0x28 Disk size (uint64_t) +# +# https://github.com/qemu/qemu/blob/master/block/vpc.c +class VHDInspector(FileInspector): + """Connectix/MS VPC VHD Format + + This should only require about 512 bytes of the beginning of the file + to determine the virtual size. + """ + def __init__(self, *a, **k): + super(VHDInspector, self).__init__(*a, **k) + self.new_region('header', CaptureRegion(0, 512)) + + @property + def format_match(self): + return self.region('header').data.startswith(b'conectix') + + @property + def virtual_size(self): + if not self.region('header').complete: + return 0 + + if not self.format_match: + return 0 + + return struct.unpack('>Q', self.region('header').data[40:48])[0] + + def __str__(self): + return 'vhd' + + +# The VHDX format consists of a complex dynamic little-endian +# structure with multiple regions of metadata and data, linked by +# offsets with in the file (and within regions), identified by MSFT +# GUID strings. The header is a 320KiB structure, only a few pieces of +# which we actually need to capture and interpret: +# +# Dec Hex Name +# 0 0x00000 Identity (Technically 9-bytes, padded to 64KiB, the first +# 8 bytes of which are 'vhdxfile') +# 196608 0x30000 The Region table (64KiB of a 32-byte header, followed +# by up to 2047 36-byte region table entry structures) +# +# The region table header includes two items we need to read and parse, +# which are: +# +# 196608 0x30000 4-byte signature ('regi') +# 196616 0x30008 Entry count (uint32-t) +# +# The region table entries follow the region table header immediately +# and are identified by a 16-byte GUID, and provide an offset of the +# start of that region. We care about the "metadata region", identified +# by the METAREGION class variable. The region table entry is (offsets +# from the beginning of the entry, since it could be in multiple places): +# +# 0 0x00000 16-byte MSFT GUID +# 16 0x00010 Offset of the actual metadata region (uint64_t) +# +# When we find the METAREGION table entry, we need to grab that offset +# and start examining the region structure at that point. That +# consists of a metadata table of structures, which point to places in +# the data in an unstructured space that follows. The header is +# (offsets relative to the region start): +# +# 0 0x00000 8-byte signature ('metadata') +# . . . +# 16 0x00010 2-byte entry count (up to 2047 entries max) +# +# This header is followed by the specified number of metadata entry +# structures, identified by GUID: +# +# 0 0x00000 16-byte MSFT GUID +# 16 0x00010 4-byte offset (uint32_t, relative to the beginning of +# the metadata region) +# +# We need to find the "Virtual Disk Size" metadata item, identified by +# the GUID in the VIRTUAL_DISK_SIZE class variable, grab the offset, +# add it to the offset of the metadata region, and examine that 8-byte +# chunk of data that follows. +# +# The "Virtual Disk Size" is a naked uint64_t which contains the size +# of the virtual disk, and is our ultimate target here. +# +# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-vhdx/83e061f8-f6e2-4de1-91bd-5d518a43d477 +class VHDXInspector(FileInspector): + """MS VHDX Format + + This requires some complex parsing of the stream. The first 256KiB + of the image is stored to get the header and region information, + and then we capture the first metadata region to read those + records, find the location of the virtual size data and parse + it. This needs to store the metadata table entries up until the + VDS record, which may consist of up to 2047 32-byte entries at + max. Finally, it must store a chunk of data at the offset of the + actual VDS uint64. + + """ + METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E' + VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8' + VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048 # From qemu + + def __init__(self, *a, **k): + super(VHDXInspector, self).__init__(*a, **k) + self.new_region('ident', CaptureRegion(0, 32)) + self.new_region('header', CaptureRegion(192 * 1024, 64 * 1024)) + + def post_process(self): + # After reading a chunk, we may have the following conditions: + # + # 1. We may have just completed the header region, and if so, + # we need to immediately read and calculate the location of + # the metadata region, as it may be starting in the same + # read we just did. + # 2. We may have just completed the metadata region, and if so, + # we need to immediately calculate the location of the + # "virtual disk size" record, as it may be starting in the + # same read we just did. + if self.region('header').complete and not self.has_region('metadata'): + region = self._find_meta_region() + if region: + self.new_region('metadata', region) + elif self.has_region('metadata') and not self.has_region('vds'): + region = self._find_meta_entry(self.VIRTUAL_DISK_SIZE) + if region: + self.new_region('vds', region) + + @property + def format_match(self): + return self.region('ident').data.startswith(b'vhdxfile') + + @staticmethod + def _guid(buf): + """Format a MSFT GUID from the 16-byte input buffer.""" + guid_format = '= 2048: + raise ImageFormatError('Region count is %i (limit 2047)' % count) + + # Process the regions until we find the metadata one; grab the + # offset and return + self._log.debug('Region entry first is %x', region_entry_first) + self._log.debug('Region entries %i', count) + meta_offset = 0 + for i in range(0, count): + entry_start = region_entry_first + (i * 32) + entry_end = entry_start + 32 + entry = self.region('header').data[entry_start:entry_end] + self._log.debug('Entry offset is %x', entry_start) + + # GUID is the first 16 bytes + guid = self._guid(entry[:16]) + if guid == self.METAREGION: + # This entry is the metadata region entry + meta_offset, meta_len, meta_req = struct.unpack( + '= 2048: + raise ImageFormatError( + 'Metadata item count is %i (limit 2047)' % count) + + for i in range(0, count): + entry_offset = 32 + (i * 32) + guid = self._guid(meta_buffer[entry_offset:entry_offset + 16]) + if guid == desired_guid: + # Found the item we are looking for by id. + # Stop our region from capturing + item_offset, item_length, _reserved = struct.unpack( + ' str: return QEMU_IMG_FORMAT_MAP_INV.get(disk_format, disk_format) -def qemu_img_info(path: str, - run_as_root: bool = True, - force_share: bool = False) -> imageutils.QemuImgInfo: +def qemu_img_info( + path: str, + run_as_root: bool = True, + force_share: bool = False, + allow_qcow2_backing_file: bool = False) -> imageutils.QemuImgInfo: """Return an object containing the parsed output from qemu-img info.""" - cmd = ['env', 'LC_ALL=C', 'qemu-img', 'info', '--output=json'] + + format_name = cinder.privsep.format_inspector.get_format_if_safe( + path=path, + allow_qcow2_backing_file=allow_qcow2_backing_file) + if format_name is None: + LOG.warning('Image/Volume %s failed safety check', path) + # NOTE(danms): This is the same exception as would be raised + # by qemu_img_info() if the disk format was unreadable or + # otherwise unsuitable. + raise exception.Invalid( + reason=_('Image/Volume failed safety check')) + + cmd = ['env', 'LC_ALL=C', 'qemu-img', 'info', + '-f', format_name, '--output=json'] if force_share: cmd.append('--force-share') cmd.append(path) @@ -173,8 +189,32 @@ def qemu_img_info(path: str, prlimit=QEMU_IMG_LIMITS) info = imageutils.QemuImgInfo(out, format='json') + # FIXME: figure out a more elegant way to do this + if info.file_format == 'raw': + # The format_inspector will detect a luks image as 'raw', and then when + # we call qemu-img info -f raw above, we don't get any of the luks + # format-specific info (some of which is used in the create_volume + # flow). So we need to check if this is really a luks container. + # (We didn't have to do this in the past because we called + # qemu-img info without -f.) + cmd = ['env', 'LC_ALL=C', 'qemu-img', 'info', + '-f', 'luks', '--output=json'] + if force_share: + cmd.append('--force-share') + cmd.append(path) + if os.name == 'nt': + cmd = cmd[2:] + try: + out, _err = utils.execute(*cmd, run_as_root=run_as_root, + prlimit=QEMU_IMG_LIMITS) + info = imageutils.QemuImgInfo(out, format='json') + except processutils.ProcessExecutionError: + # we'll just use the info object we already got earlier + pass + # From Cinder's point of view, any 'luks' formatted images - # should be treated as 'raw'. + # should be treated as 'raw'. (This changes the file_format, but + # not any of the format-specific information.) if info.file_format == 'luks': info.file_format = 'raw' @@ -681,6 +721,35 @@ def get_qemu_data(image_id: str, return data +def check_qcow2_image(image_id: str, data: imageutils.QemuImgInfo) -> None: + """Check some rules about qcow2 images. + + Does not check for a backing_file, because cinder has some legitimate + use cases for qcow2 backing files. + + Makes sure the image: + + - does not have a data_file + + :param image_id: the image id + :param data: an imageutils.QemuImgInfo object + :raises ImageUnacceptable: when the image fails the check + """ + try: + data_file = data.format_specific['data'].get('data-file') + except (KeyError, TypeError): + LOG.debug('Unexpected response from qemu-img info when processing ' + 'image %s: missing format-specific info for a qcow2 image', + image_id) + msg = _('Cannot determine format-specific information') + raise exception.ImageUnacceptable(image_id=image_id, reason=msg) + if data_file: + LOG.warning("Refusing to process qcow2 file with data-file '%s'", + data_file) + msg = _('A qcow2 format image is not allowed to have a data file') + raise exception.ImageUnacceptable(image_id=image_id, reason=msg) + + def check_vmdk_image(image_id: str, data: imageutils.QemuImgInfo) -> None: """Check some rules about VMDK images. @@ -771,6 +840,8 @@ def check_image_format(source: str, if data.file_format == 'vmdk': check_vmdk_image(image_id, data) + if data.file_format == 'qcow2': + check_qcow2_image(image_id, data) def fetch_verify_image(context: context.RequestContext, @@ -813,6 +884,11 @@ def fetch_verify_image(context: context.RequestContext, if fmt == 'vmdk': check_vmdk_image(image_id, data) + # Bug #2059809: a qcow2 can have a data file that's similar + # to a backing file and is also unacceptable + if fmt == 'qcow2': + check_qcow2_image(image_id, data) + def fetch_to_vhd(context: context.RequestContext, image_service: glance.GlanceImageService, diff --git a/cinder/privsep/format_inspector.py b/cinder/privsep/format_inspector.py new file mode 100644 index 00000000000..b89f5009492 --- /dev/null +++ b/cinder/privsep/format_inspector.py @@ -0,0 +1,38 @@ +# Copyright 2024 Red Hat, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Helpers for the image format_inspector. +""" + + +from cinder.image import format_inspector +import cinder.privsep + + +@cinder.privsep.sys_admin_pctxt.entrypoint +def get_format_if_safe(path, allow_qcow2_backing_file): + """Returns a str format name if the format is safe, otherwise None""" + return _get_format_if_safe(path, allow_qcow2_backing_file) + + +def _get_format_if_safe(path, allow_qcow2_backing_file): + """Returns a str format name if the format is safe, otherwise None""" + inspector = format_inspector.detect_file_format(path) + format_name = str(inspector) + safe = inspector.safety_check() + if not safe and format_name == 'qcow2' and allow_qcow2_backing_file: + safe = inspector.safety_check_allow_backing_file() + if safe: + return format_name diff --git a/cinder/tests/unit/image/test_format_inspector.py b/cinder/tests/unit/image/test_format_inspector.py new file mode 100644 index 00000000000..2687461b4a1 --- /dev/null +++ b/cinder/tests/unit/image/test_format_inspector.py @@ -0,0 +1,515 @@ +# Copyright 2020 Red Hat, Inc +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import io +import os +import re +import struct +import subprocess +import tempfile +from unittest import mock + +from oslo_utils import units + +from cinder.image import format_inspector +from cinder.tests.unit import test + + +def get_size_from_qemu_img(filename): + output = subprocess.check_output('qemu-img info "%s"' % filename, + shell=True) + for line in output.split(b'\n'): + m = re.search(b'^virtual size: .* .([0-9]+) bytes', line.strip()) + if m: + return int(m.group(1)) + + raise Exception('Could not find virtual size with qemu-img') + + +class TestFormatInspectors(test.TestCase): + def setUp(self): + super(TestFormatInspectors, self).setUp() + self._created_files = [] + + def tearDown(self): + super(TestFormatInspectors, self).tearDown() + for fn in self._created_files: + try: + os.remove(fn) + except Exception: + pass + + def _create_img(self, fmt, size, subformat=None, options=None, + backing_file=None): + if fmt == 'vhd': + # QEMU calls the vhd format vpc + fmt = 'vpc' + + if options is None: + options = {} + opt = '' + prefix = 'glance-unittest-formatinspector-' + + if subformat: + options['subformat'] = subformat + prefix += subformat + '-' + + if options: + opt += '-o ' + ','.join('%s=%s' % (k, v) + for k, v in options.items()) + + if backing_file is not None: + opt += ' -b %s -F raw' % backing_file + + fn = tempfile.mktemp(prefix=prefix, + suffix='.%s' % fmt) + self._created_files.append(fn) + subprocess.check_output( + 'qemu-img create -f %s %s %s %i' % (fmt, opt, fn, size), + shell=True) + return fn + + def _create_allocated_vmdk(self, size_mb, subformat=None): + # We need a "big" VMDK file to exercise some parts of the code of the + # format_inspector. A way to create one is to first create an empty + # file, and then to convert it with the -S 0 option. + + if subformat is None: + # Matches qemu-img default, see `qemu-img convert -O vmdk -o help` + subformat = 'monolithicSparse' + + prefix = 'glance-unittest-formatinspector-%s-' % subformat + fn = tempfile.mktemp(prefix=prefix, suffix='.vmdk') + self._created_files.append(fn) + raw = tempfile.mktemp(prefix=prefix, suffix='.raw') + self._created_files.append(raw) + + # Create a file with pseudo-random data, otherwise it will get + # compressed in the streamOptimized format + subprocess.check_output( + 'dd if=/dev/urandom of=%s bs=1M count=%i' % (raw, size_mb), + shell=True) + + # Convert it to VMDK + subprocess.check_output( + 'qemu-img convert -f raw -O vmdk -o subformat=%s -S 0 %s %s' % ( + subformat, raw, fn), + shell=True) + return fn + + def _test_format_at_block_size(self, format_name, img, block_size): + fmt = format_inspector.get_inspector(format_name)() + self.assertIsNotNone(fmt, + 'Did not get format inspector for %s' % ( + format_name)) + wrapper = format_inspector.InfoWrapper(open(img, 'rb'), fmt) + + while True: + chunk = wrapper.read(block_size) + if not chunk: + break + + wrapper.close() + return fmt + + def _test_format_at_image_size(self, format_name, image_size, + subformat=None): + img = self._create_img(format_name, image_size, subformat=subformat) + + # Some formats have internal alignment restrictions making this not + # always exactly like image_size, so get the real value for comparison + virtual_size = get_size_from_qemu_img(img) + + # Read the format in various sizes, some of which will read whole + # sections in a single read, others will be completely unaligned, etc. + for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi): + fmt = self._test_format_at_block_size(format_name, img, block_size) + self.assertTrue(fmt.format_match, + 'Failed to match %s at size %i block %i' % ( + format_name, image_size, block_size)) + self.assertEqual(virtual_size, fmt.virtual_size, + ('Failed to calculate size for %s at size %i ' + 'block %i') % (format_name, image_size, + block_size)) + memory = sum(fmt.context_info.values()) + self.assertLess(memory, 512 * units.Ki, + 'Format used more than 512KiB of memory: %s' % ( + fmt.context_info)) + + def _test_format(self, format_name, subformat=None): + # Try a few different image sizes, including some odd and very small + # sizes + for image_size in (512, 513, 2057, 7): + self._test_format_at_image_size(format_name, image_size * units.Mi, + subformat=subformat) + + def test_qcow2(self): + self._test_format('qcow2') + + def test_vhd(self): + self._test_format('vhd') + + def test_vhdx(self): + self._test_format('vhdx') + + def test_vmdk(self): + self._test_format('vmdk') + + def test_vmdk_stream_optimized(self): + self._test_format('vmdk', 'streamOptimized') + + def test_from_file_reads_minimum(self): + img = self._create_img('qcow2', 10 * units.Mi) + file_size = os.stat(img).st_size + fmt = format_inspector.QcowInspector.from_file(img) + # We know everything we need from the first 512 bytes of a QCOW image, + # so make sure that we did not read the whole thing when we inspect + # a local file. + self.assertLess(fmt.actual_size, file_size) + + def test_qed_always_unsafe(self): + img = self._create_img('qed', 10 * units.Mi) + fmt = format_inspector.get_inspector('qed').from_file(img) + self.assertTrue(fmt.format_match) + self.assertFalse(fmt.safety_check()) + + def _test_vmdk_bad_descriptor_offset(self, subformat=None): + format_name = 'vmdk' + image_size = 10 * units.Mi + descriptorOffsetAddr = 0x1c + BAD_ADDRESS = 0x400 + img = self._create_img(format_name, image_size, subformat=subformat) + + # Corrupt the header + fd = open(img, 'r+b') + fd.seek(descriptorOffsetAddr) + fd.write(struct.pack('`_ for details. +fixes: + - | + `Bug #2059809 `_: + Fixed issue where a qcow2 format image with an external data file + could expose host information. Such an image is now rejected with + an ``ImageUnacceptable`` error if it is used to create a volume. + Given that qcow2 external data files were never supported by + Cinder, the only use for such an image previously was to attempt + to steal host information, and hence this change should have no + impact on users.