Avoid requesting DISK_GB allocation for root_gb on BFV instances

Right now, we still ask placement for a disk allocation covering
swap, ephemeral, and root disks for the instance even if the instance
is going to be volume-backed. This patch makes us not include the
root size in that calculation for placement, avoiding a false
failure because the volume size is counted against the compute's
available space.

To do this, we need another flag in request_spec to track the
BFV-ness of the instance. Right now, this patch just sets that on
new builds and the scheduler client assumes a lack of said flag
as "I don't know, so assume not-BFV" for compatibility. A
subsequent patch can calculate that flag for existing instances
so that we will be able to heal over time by migrating instances
or re-writing their allocations to reflect reality.

Partial-Bug: #1469179
Change-Id: I9c2111f7377df65c1fc3c72323f85483b3295989
This commit is contained in:
Dan Smith
2018-07-06 09:09:05 -07:00
committed by Eric Fried
parent eb4f65a795
commit 03c596a9f4
8 changed files with 120 additions and 9 deletions

View File

@@ -885,6 +885,15 @@ class API(base.Base):
base_options['pci_requests'], filter_properties,
instance_group, base_options['availability_zone'],
security_groups=security_groups)
if block_device_mapping:
# Record whether or not we are a BFV instance
root = block_device_mapping.root_bdm()
req_spec.is_bfv = bool(root and root.is_volume)
else:
# If we have no BDMs, we're clearly not BFV
req_spec.is_bfv = False
# NOTE(danms): We need to record num_instances on the request
# spec as this is how the conductor knows how many were in this
# batch.

View File

@@ -44,7 +44,8 @@ class RequestSpec(base.NovaObject):
# Version 1.8: Added security_groups
# Version 1.9: Added user_id
# Version 1.10: Added network_metadata
VERSION = '1.10'
# Version 1.11: Added is_bfv
VERSION = '1.11'
fields = {
'id': fields.IntegerField(),
@@ -82,11 +83,14 @@ class RequestSpec(base.NovaObject):
'instance_uuid': fields.UUIDField(),
'security_groups': fields.ObjectField('SecurityGroupList'),
'network_metadata': fields.ObjectField('NetworkMetadata'),
'is_bfv': fields.BooleanField(),
}
def obj_make_compatible(self, primitive, target_version):
super(RequestSpec, self).obj_make_compatible(primitive, target_version)
target_version = versionutils.convert_version_to_tuple(target_version)
if target_version < (1, 11) and 'is_bfv' in primitive:
del primitive['is_bfv']
if target_version < (1, 10):
if 'network_metadata' in primitive:
del primitive['network_metadata']
@@ -473,7 +477,7 @@ class RequestSpec(base.NovaObject):
# though they should match.
if key in ['id', 'instance_uuid']:
setattr(spec, key, db_spec[key])
else:
elif key in spec_obj:
setattr(spec, key, getattr(spec_obj, key))
spec._context = context

View File

@@ -397,9 +397,14 @@ def resources_from_request_spec(spec_obj):
fields.ResourceClass.MEMORY_MB: spec_obj.memory_mb,
}
requested_disk_mb = (1024 * (spec_obj.root_gb +
spec_obj.ephemeral_gb) +
requested_disk_mb = ((1024 * spec_obj.ephemeral_gb) +
spec_obj.swap)
if 'is_bfv' not in spec_obj or not spec_obj.is_bfv:
# Either this is not a BFV instance, or we are not sure,
# so ask for root_gb allocation
requested_disk_mb += (1024 * spec_obj.root_gb)
# NOTE(sbauza): Disk request is expressed in MB but we count
# resources in GB. Since there could be a remainder of the division
# by 1024, we need to ceil the result to the next bigger Gb so we

View File

@@ -3578,6 +3578,85 @@ class ServerSoftDeleteTests(integrated_helpers.ProviderUsageBaseTestCase):
self._delete_and_check_allocations(server)
class VolumeBackedServerTest(integrated_helpers.ProviderUsageBaseTestCase):
"""Tests for volume-backed servers."""
compute_driver = 'fake.SmallFakeDriver'
def setUp(self):
super(VolumeBackedServerTest, self).setUp()
self.compute1 = self._start_compute('host1')
self.compute2 = self._start_compute('host2')
self.flavor_id = self._create_flavor()
def _create_flavor(self):
body = {
'flavor': {
'id': 'vbst',
'name': 'special',
'ram': 512,
'vcpus': 1,
'disk': 10,
'OS-FLV-EXT-DATA:ephemeral': 20,
'swap': 5 * 1024,
'rxtx_factor': 1.0,
'os-flavor-access:is_public': True,
},
}
self.admin_api.post_flavor(body)
return body['flavor']['id']
def _create_server(self):
with nova.utils.temporary_mutation(self.api, microversion='2.35'):
image_id = self.api.get_images()[0]['id']
server_req = self._build_minimal_create_server_request(
self.api, 'trait-based-server',
image_uuid=image_id,
flavor_id=self.flavor_id, networks='none')
server = self.api.post_server({'server': server_req})
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
return server
def _create_volume_backed_server(self):
self.useFixture(nova_fixtures.CinderFixtureNewAttachFlow(self))
volume_id = nova_fixtures.CinderFixtureNewAttachFlow.IMAGE_BACKED_VOL
server_req_body = {
# There is no imageRef because this is boot from volume.
'server': {
'flavorRef': self.flavor_id,
'name': 'test_volume_backed',
# We don't care about networking for this test. This
# requires microversion >= 2.37.
'networks': 'none',
'block_device_mapping_v2': [{
'boot_index': 0,
'uuid': volume_id,
'source_type': 'volume',
'destination_type': 'volume'
}]
}
}
server = self.api.post_server(server_req_body)
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
return server
def test_ephemeral_has_disk_allocation(self):
server = self._create_server()
allocs = self._get_allocations_by_server_uuid(server['id'])
resources = list(allocs.values())[0]['resources']
self.assertIn('MEMORY_MB', resources)
# 10gb root, 20gb ephemeral, 5gb swap
self.assertEqual(35, resources['DISK_GB'])
def test_volume_backed_no_disk_allocation(self):
server = self._create_volume_backed_server()
allocs = self._get_allocations_by_server_uuid(server['id'])
resources = list(allocs.values())[0]['resources']
self.assertIn('MEMORY_MB', resources)
# 0gb root, 20gb ephemeral, 5gb swap
self.assertEqual(25, resources['DISK_GB'])
class TraitsBasedSchedulingTest(integrated_helpers.ProviderUsageBaseTestCase):
"""Tests for requesting a server with required traits in Placement"""

View File

@@ -4461,7 +4461,8 @@ class _ComputeAPIUnitTestMixIn(object):
'numa_topology': None,
'pci_requests': None}
security_groups = {}
block_device_mapping = [objects.BlockDeviceMapping(
block_device_mapping = objects.BlockDeviceMappingList(
objects=[objects.BlockDeviceMapping(
**fake_block_device.FakeDbBlockDeviceDict(
{
'id': 1,
@@ -4470,7 +4471,7 @@ class _ComputeAPIUnitTestMixIn(object):
'destination_type': 'volume',
'device_name': 'vda',
'boot_index': 0,
}))]
}))])
shutdown_terminate = True
instance_group = None
check_server_group_quota = False

View File

@@ -1142,7 +1142,7 @@ object_data = {
'PowerVMLiveMigrateData': '1.3-79c635ecf61d1d70b5b9fa04bf778a91',
'Quotas': '1.3-40fcefe522111dddd3e5e6155702cf4e',
'QuotasNoOp': '1.3-347a039fc7cfee7b225b68b5181e0733',
'RequestSpec': '1.10-afe714bc445ab7cb791150a775f3b779',
'RequestSpec': '1.11-851a690dbf116fb5165cc94ea6c85629',
'S3ImageMapping': '1.0-7dd7366a890d82660ed121de9092276e',
'SchedulerLimits': '1.0-249c4bd8e62a9b327b7026b7f19cc641',
'SchedulerRetries': '1.1-3c9c8b16143ebbb6ad7030e999d14cc0',

View File

@@ -299,7 +299,7 @@ class _TestRequestSpecObject(object):
spec = objects.RequestSpec.from_primitives(ctxt, spec_dict, filt_props)
mock_limits.assert_called_once_with({})
# Make sure that all fields are set using that helper method
skip = ['id', 'security_groups', 'network_metadata']
skip = ['id', 'security_groups', 'network_metadata', 'is_bfv']
for field in [f for f in spec.obj_fields if f not in skip]:
self.assertTrue(spec.obj_attr_is_set(field),
'Field: %s is not set' % field)
@@ -329,7 +329,7 @@ class _TestRequestSpecObject(object):
filter_properties, instance_group, instance.availability_zone,
objects.SecurityGroupList())
# Make sure that all fields are set using that helper method
skip = ['id', 'network_metadata']
skip = ['id', 'network_metadata', 'is_bfv']
for field in [f for f in spec.obj_fields if f not in skip]:
self.assertTrue(spec.obj_attr_is_set(field),
'Field: %s is not set' % field)

View File

@@ -0,0 +1,13 @@
---
fixes:
- |
Booting volume-backed instances no longer includes an incorrect allocation
against the compute node for the root disk. Historically, this has been
quite broken behavior in Nova, where volume-backed instances would count
against available space on the compute node, even though their storage
was provided by the volume service. Now, newly-booted volume-backed
instances will not create allocations of ``DISK_GB`` against the compute
node for the ``root_gb`` quantity in the flavor. Note that if you are
still using a scheduler configured with the (now deprecated)
DiskFilter (including deployments using CachingScheduler), the
above change will not apply to you.