Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synchronise yoga with upstream #70

Open
wants to merge 11 commits into
base: stackhpc/yoga
Choose a base branch
from
8 changes: 7 additions & 1 deletion .zuul.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@
The regular tempest-integrated-storage job but with glance metadata injection
post-run: playbooks/post-check-metadata-injection.yaml
vars:
configure_swap_size: 8192
tempest_concurrency: 3
zuul_copy_output:
/etc/glance-remote: logs
devstack_localrc:
Expand All @@ -236,6 +238,11 @@
"glance_devstack_test":"doyouseeme?"
image_conversion:
output_format: raw
test-config:
"$TEMPEST_CONFIG":
image:
image_caching_enabled: True
disk_formats: qcow2,ari,aki,vhd,vmdk,raw,ami,vdi,iso,vhdx

- job:
name: glance-multistore-cinder-import
Expand Down Expand Up @@ -306,7 +313,6 @@
- release-notes-jobs-python3
check:
jobs:
- openstack-tox-functional-py36-fips
- openstack-tox-functional-py39
- glance-tox-functional-py39-rbac-defaults
- glance-ceph-thin-provisioning:
Expand Down
10 changes: 10 additions & 0 deletions glance/async_/flows/base_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,16 @@ def execute(self, image_id):
'bfile': backing_file}
raise RuntimeError(msg)

try:
data_file = metadata['format-specific']['data']['data-file']
except KeyError:
data_file = None
if data_file is not None:
msg = _("File %(path)s has invalid data-file "
"%(dfile)s, aborting.") % {"path": path,
"dfile": data_file}
raise RuntimeError(msg)

return path

def revert(self, image_id, result, **kwargs):
Expand Down
8 changes: 8 additions & 0 deletions glance/async_/flows/plugins/image_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ def _execute(self, action, file_path, **kwargs):
raise RuntimeError(
'QCOW images with backing files are not allowed')

try:
data_file = metadata['format-specific']['data']['data-file']
except KeyError:
data_file = None
if data_file is not None:
raise RuntimeError(
'QCOW images with data-file set are not allowed')

if metadata.get('format') == 'vmdk':
create_type = metadata.get(
'format-specific', {}).get(
Expand Down
222 changes: 212 additions & 10 deletions glance/common/format_inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@
LOG = logging.getLogger(__name__)


def chunked_reader(fileobj, chunk_size=512):
while True:
chunk = fileobj.read(chunk_size)
if not chunk:
break
yield chunk


class CaptureRegion(object):
"""Represents a region of a file we want to capture.

Expand Down Expand Up @@ -176,10 +184,16 @@ def virtual_size(self):
@property
def actual_size(self):
"""Returns the total size of the file, usually smaller than
virtual_size.
virtual_size. NOTE: this will only be accurate if the entire
file is read and processed.
"""
return self._total_count

@property
def complete(self):
"""Returns True if we have all the information needed."""
return all(r.complete for r in self._capture_regions.values())

def __str__(self):
"""The string name of this file format."""
return 'raw'
Expand All @@ -194,6 +208,35 @@ def context_info(self):
return {name: len(region.data) for name, region in
self._capture_regions.items()}

@classmethod
def from_file(cls, filename):
"""Read as much of a file as necessary to complete inspection.

NOTE: Because we only read as much of the file as necessary, the
actual_size property will not reflect the size of the file, but the
amount of data we read before we satisfied the inspector.

Raises ImageFormatError if we cannot parse the file.
"""
inspector = cls()
with open(filename, 'rb') as f:
for chunk in chunked_reader(f):
inspector.eat_chunk(chunk)
if inspector.complete:
# No need to eat any more data
break
if not inspector.complete or not inspector.format_match:
raise ImageFormatError('File is not in requested format')
return inspector

def safety_check(self):
"""Perform some checks to determine if this file is safe.

Returns True if safe, False otherwise. It may raise ImageFormatError
if safety cannot be guaranteed because of parsing or other errors.
"""
return True


# The qcow2 format consists of a big-endian 72-byte header, of which
# only a small portion has information we care about:
Expand All @@ -202,15 +245,26 @@ def context_info(self):
# 0 0x00 Magic 4-bytes 'QFI\xfb'
# 4 0x04 Version (uint32_t, should always be 2 for modern files)
# . . .
# 8 0x08 Backing file offset (uint64_t)
# 24 0x18 Size in bytes (unint64_t)
# . . .
# 72 0x48 Incompatible features bitfield (6 bytes)
#
# https://people.gnome.org/~markmc/qcow-image-format.html
# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt
class QcowInspector(FileInspector):
"""QEMU QCOW2 Format

This should only require about 32 bytes of the beginning of the file
to determine the virtual size.
to determine the virtual size, and 104 bytes to perform the safety check.
"""

BF_OFFSET = 0x08
BF_OFFSET_LEN = 8
I_FEATURES = 0x48
I_FEATURES_LEN = 8
I_FEATURES_DATAFILE_BIT = 3
I_FEATURES_MAX_BIT = 4

def __init__(self, *a, **k):
super(QcowInspector, self).__init__(*a, **k)
self.new_region('header', CaptureRegion(0, 512))
Expand All @@ -220,6 +274,10 @@ def _qcow_header_data(self):
struct.unpack('>4sIQIIQ', self.region('header').data[:32]))
return magic, size

@property
def has_header(self):
return self.region('header').complete

@property
def virtual_size(self):
if not self.region('header').complete:
Expand All @@ -236,9 +294,77 @@ def format_match(self):
magic, size = self._qcow_header_data()
return magic == b'QFI\xFB'

@property
def has_backing_file(self):
if not self.region('header').complete:
return None
if not self.format_match:
return False
bf_offset_bytes = self.region('header').data[
self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN]
# nonzero means "has a backing file"
bf_offset, = struct.unpack('>Q', bf_offset_bytes)
return bf_offset != 0

@property
def has_unknown_features(self):
if not self.region('header').complete:
return None
if not self.format_match:
return False
i_features = self.region('header').data[
self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]

# This is the maximum byte number we should expect any bits to be set
max_byte = self.I_FEATURES_MAX_BIT // 8

# The flag bytes are in big-endian ordering, so if we process
# them in index-order, they're reversed
for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))):
if byte_num == max_byte:
# If we're in the max-allowed byte, allow any bits less than
# the maximum-known feature flag bit to be set
allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1)
elif byte_num > max_byte:
# If we're above the byte with the maximum known feature flag
# bit, then we expect all zeroes
allow_mask = 0x0
else:
# Any earlier-than-the-maximum byte can have any of the flag
# bits set
allow_mask = 0xFF

if i_features[i] & ~allow_mask:
LOG.warning('Found unknown feature bit in byte %i: %s/%s',
byte_num, bin(i_features[byte_num] & ~allow_mask),
bin(allow_mask))
return True

return False

@property
def has_data_file(self):
if not self.region('header').complete:
return None
if not self.format_match:
return False
i_features = self.region('header').data[
self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]

# First byte of bitfield, which is i_features[7]
byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8
# Third bit of bitfield, which is 0x04
bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8)
return bool(i_features[byte] & bit)

def __str__(self):
return 'qcow2'

def safety_check(self):
return (not self.has_backing_file and
not self.has_data_file and
not self.has_unknown_features)


# The VHD (or VPC as QEMU calls it) format consists of a big-endian
# 512-byte "footer" at the beginning of the file with various
Expand Down Expand Up @@ -345,6 +471,7 @@ class VHDXInspector(FileInspector):
"""
METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E'
VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8'
VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048 # From qemu

def __init__(self, *a, **k):
super(VHDXInspector, self).__init__(*a, **k)
Expand Down Expand Up @@ -459,6 +586,8 @@ def _find_meta_entry(self, desired_guid):
item_offset, item_length, _reserved = struct.unpack(
'<III',
meta_buffer[entry_offset + 16:entry_offset + 28])
item_length = min(item_length,
self.VHDX_METADATA_TABLE_MAX_SIZE)
self.region('metadata').length = len(meta_buffer)
self._log.debug('Found entry at offset %x', item_offset)
# Metadata item offset is from the beginning of the metadata
Expand Down Expand Up @@ -509,13 +638,20 @@ def __str__(self):
#
# https://www.vmware.com/app/vmdk/?src=vmdk
class VMDKInspector(FileInspector):
"""vmware VMDK format (monolithicSparse variant only)
"""vmware VMDK format (monolithicSparse and streamOptimized variants only)

This needs to store the 512 byte header and the descriptor region
which should be just after that. The descriptor region is some
variable number of 512 byte sectors, but is just text defining the
layout of the disk.
"""

# The beginning and max size of the descriptor is also hardcoded in Qemu
# at 0x200 and 1MB - 1
DESC_OFFSET = 0x200
DESC_MAX_SIZE = (1 << 20) - 1
GD_AT_END = 0xffffffffffffffff

def __init__(self, *a, **k):
super(VMDKInspector, self).__init__(*a, **k)
self.new_region('header', CaptureRegion(0, 512))
Expand All @@ -527,20 +663,33 @@ def post_process(self):
if not self.region('header').complete:
return

sig, ver, _flags, _sectors, _grain, desc_sec, desc_num = struct.unpack(
'<4sIIQQQQ', self.region('header').data[:44])
(sig, ver, _flags, _sectors, _grain, desc_sec, desc_num,
_numGTEsperGT, _rgdOffset, gdOffset) = struct.unpack(
'<4sIIQQQQIQQ', self.region('header').data[:64])

if sig != b'KDMV':
raise ImageFormatError('Signature KDMV not found: %r' % sig)
return

if ver not in (1, 2, 3):
raise ImageFormatError('Unsupported format version %i' % ver)
return

if gdOffset == self.GD_AT_END:
# This means we have a footer, which takes precedence over the
# header, which we cannot support since we stream.
raise ImageFormatError('Unsupported VMDK footer')

# Since we parse both desc_sec and desc_num (the location of the
# VMDK's descriptor, expressed in 512 bytes sectors) we enforce a
# check on the bounds to create a reasonable CaptureRegion. This
# is similar to how it's done in qemu.
desc_offset = desc_sec * 512
desc_size = min(desc_num * 512, self.DESC_MAX_SIZE)
if desc_offset != self.DESC_OFFSET:
raise ImageFormatError("Wrong descriptor location")

if not self.has_region('descriptor'):
self.new_region('descriptor', CaptureRegion(
desc_sec * 512, desc_num * 512))
desc_offset, desc_size))

@property
def format_match(self):
Expand All @@ -566,7 +715,7 @@ def virtual_size(self):
vmdktype = descriptor[type_idx:type_end]
else:
vmdktype = b'formatnotfound'
if vmdktype != b'monolithicSparse':
if vmdktype not in (b'monolithicSparse', b'streamOptimized'):
LOG.warning('Unsupported VMDK format %s', vmdktype)
return 0

Expand All @@ -576,6 +725,59 @@ def virtual_size(self):

return sectors * 512

def safety_check(self):
if (not self.has_region('descriptor') or
not self.region('descriptor').complete):
return False

try:
# Descriptor is padded to 512 bytes
desc_data = self.region('descriptor').data.rstrip(b'\x00')
# Descriptor is actually case-insensitive ASCII text
desc_text = desc_data.decode('ascii').lower()
except UnicodeDecodeError:
LOG.error('VMDK descriptor failed to decode as ASCII')
raise ImageFormatError('Invalid VMDK descriptor data')

extent_access = ('rw', 'rdonly', 'noaccess')
header_fields = []
extents = []
ddb = []

# NOTE(danms): Cautiously parse the VMDK descriptor. Each line must
# be something we understand, otherwise we refuse it.
for line in [x.strip() for x in desc_text.split('\n')]:
if line.startswith('#') or not line:
# Blank or comment lines are ignored
continue
elif line.startswith('ddb'):
# DDB lines are allowed (but not used by us)
ddb.append(line)
elif '=' in line and ' ' not in line.split('=')[0]:
# Header fields are a single word followed by an '=' and some
# value
header_fields.append(line)
elif line.split(' ')[0] in extent_access:
# Extent lines start with one of the three access modes
extents.append(line)
else:
# Anything else results in a rejection
LOG.error('Unsupported line %r in VMDK descriptor', line)
raise ImageFormatError('Invalid VMDK descriptor data')

# Check all the extent lines for concerning content
for extent_line in extents:
if '/' in extent_line:
LOG.error('Extent line %r contains unsafe characters',
extent_line)
return False

if not extents:
LOG.error('VMDK file specified no extents')
return False

return True

def __str__(self):
return 'vmdk'

Expand Down
Loading