arq bind and unbound support vGPU
This patch is part of the vGPU support feature in cyborg. It implements arq bind and unbind for vGPU resource. Co-Authored-By: Wenping Song <songwenping@inspur.com> Change-Id: I32c3b81345c6ce83834a83c64b88e37926724f16
This commit is contained in:
parent
79e1928554
commit
4b34d897d2
@ -19,6 +19,7 @@ from oslo_log import log as logging
|
||||
|
||||
import re
|
||||
|
||||
import cyborg.common.exception as exception
|
||||
import cyborg.conf
|
||||
import cyborg.privsep
|
||||
|
||||
@ -41,6 +42,27 @@ def lspci_privileged():
|
||||
return processutils.execute(*cmd)
|
||||
|
||||
|
||||
@cyborg.privsep.sys_admin_pctxt.entrypoint
|
||||
def create_mdev_privileged(pci_addr, mdev_type, ah_uuid):
|
||||
"""Instantiate a mediated device."""
|
||||
if ah_uuid is None:
|
||||
raise exception.AttachHandleUUIDNeeded()
|
||||
fpath = '/sys/class/mdev_bus/{0}/mdev_supported_types/{1}/create'
|
||||
fpath = fpath.format(pci_addr, mdev_type)
|
||||
with open(fpath, 'w') as f:
|
||||
f.write(ah_uuid)
|
||||
return ah_uuid
|
||||
|
||||
|
||||
@cyborg.privsep.sys_admin_pctxt.entrypoint
|
||||
def remove_mdev_privileged(physical_device, mdev_type, medv_uuid):
|
||||
fpath = ('/sys/class/mdev_bus/{0}/mdev_supported_types/'
|
||||
'{1}/devices/{2}/remove')
|
||||
fpath = fpath.format(physical_device, mdev_type, medv_uuid)
|
||||
with open(fpath, 'w') as f:
|
||||
f.write("1")
|
||||
|
||||
|
||||
def get_pci_devices(pci_flags, vendor_id=None):
|
||||
device_for_vendor_out = []
|
||||
all_device_out = []
|
||||
|
@ -21,6 +21,7 @@ from oslo_service import periodic_task
|
||||
from oslo_utils import uuidutils
|
||||
|
||||
from cyborg.accelerator.drivers.fpga.base import FPGADriver
|
||||
from cyborg.accelerator.drivers.gpu import utils as gpu_utils
|
||||
from cyborg.agent.resource_tracker import ResourceTracker
|
||||
from cyborg.agent.rpcapi import AgentAPI
|
||||
from cyborg.common import exception
|
||||
@ -80,3 +81,11 @@ class AgentManager(periodic_task.PeriodicTasks):
|
||||
def update_available_resource(self, context, startup=True):
|
||||
"""Update all kinds of accelerator resources from their drivers."""
|
||||
self._rt.update_usage(context)
|
||||
|
||||
def create_vgpu_mdev(self, context, pci_addr, asked_type, ah_uuid):
|
||||
LOG.debug('Instantiate a mediated device')
|
||||
gpu_utils.create_mdev_privileged(pci_addr, asked_type, ah_uuid)
|
||||
|
||||
def remove_vgpu_mdev(self, context, pci_addr, asked_type, ah_uuid):
|
||||
LOG.debug('Remove a vgpu mdev')
|
||||
gpu_utils.remove_mdev_privileged(pci_addr, asked_type, ah_uuid)
|
||||
|
@ -61,3 +61,25 @@ class AgentAPI(object):
|
||||
controlpath_id=controlpath_id,
|
||||
bitstream_uuid=bitstream_uuid,
|
||||
driver_name=driver_name)
|
||||
|
||||
def create_vgpu_mdev(self, context, hostname, pci_addr,
|
||||
asked_type, ah_uuid):
|
||||
LOG.debug('Agent create_vgpu_mdev: hostname: (%s) , pci_address: (%s)'
|
||||
'gpu_id: (%s)', hostname, pci_addr, ah_uuid)
|
||||
version = '1.0'
|
||||
cctxt = self.client.prepare(server=hostname, version=version)
|
||||
return cctxt.call(context, 'create_vgpu_mdev',
|
||||
pci_addr=pci_addr,
|
||||
asked_type=asked_type,
|
||||
ah_uuid=ah_uuid)
|
||||
|
||||
def remove_vgpu_mdev(self, context, hostname, pci_addr,
|
||||
asked_type, ah_uuid):
|
||||
LOG.debug('Agent remove_vgpu_mdev: hostname: (%s) '
|
||||
'gpu_id: (%s)', hostname, ah_uuid)
|
||||
version = '1.0'
|
||||
cctxt = self.client.prepare(server=hostname, version=version)
|
||||
return cctxt.call(context, 'remove_vgpu_mdev',
|
||||
pci_addr=pci_addr,
|
||||
asked_type=asked_type,
|
||||
ah_uuid=ah_uuid)
|
||||
|
@ -60,6 +60,7 @@ class ARQ(base.APIBase):
|
||||
"""The UUID of the instance project_id associated with this ARQ, if any"""
|
||||
|
||||
attach_handle_type = wtypes.text
|
||||
attach_handle_uuid = wtypes.text
|
||||
attach_handle_info = {wtypes.text: wtypes.text}
|
||||
|
||||
links = wsme.wsattr([link.Link], readonly=True)
|
||||
|
@ -92,6 +92,10 @@ class AttachHandleAlreadyExists(CyborgException):
|
||||
_msg_fmt = _("AttachHandle with uuid %(uuid)s already exists.")
|
||||
|
||||
|
||||
class AttachHandleUUIDNeeded(CyborgException):
|
||||
_msg_fmt = _("Need to provide AttachHandle uuid.")
|
||||
|
||||
|
||||
class ControlpathIDAlreadyExists(CyborgException):
|
||||
_msg_fmt = _("ControlpathID with uuid %(uuid)s already exists.")
|
||||
|
||||
|
@ -370,7 +370,7 @@ class ConductorManager(object):
|
||||
"resource_providers?name=" + hostname).json()
|
||||
pr_uuid = provider["resource_providers"][0]["uuid"]
|
||||
return pr_uuid
|
||||
except IndexError:
|
||||
except (IndexError, KeyError):
|
||||
raise exception.PlacementResourceProviderNotFound(
|
||||
resource_provider=hostname)
|
||||
|
||||
|
@ -48,6 +48,7 @@ class ARQ(base.CyborgObject, object_base.VersionedObjectDictCompat):
|
||||
|
||||
# Fields populated by Cyborg after binding
|
||||
'attach_handle_type': object_fields.StringField(nullable=True),
|
||||
'attach_handle_uuid': object_fields.StringField(nullable=True),
|
||||
'attach_handle_info': object_fields.DictOfStringsField(nullable=True),
|
||||
}
|
||||
|
||||
|
@ -13,11 +13,14 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import json
|
||||
|
||||
from openstack import connection
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import versionutils
|
||||
from oslo_versionedobjects import base as object_base
|
||||
|
||||
from cyborg.agent.rpcapi import AgentAPI
|
||||
from cyborg.common import constants
|
||||
from cyborg.common.constants import ARQ_STATES_TRANSFORM_MATRIX
|
||||
from cyborg.common import exception
|
||||
@ -78,6 +81,10 @@ class ExtARQ(base.CyborgObject, object_base.VersionedObjectDictCompat,
|
||||
if target_version < (1, 2) and 'deployable_id' in primitive:
|
||||
del primitive['deployable_id']
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ExtARQ, self).__init__(*args, **kwargs)
|
||||
self.agent = AgentAPI()
|
||||
|
||||
def create(self, context, device_profile_id=None):
|
||||
"""Create an ExtARQ record in the DB."""
|
||||
if 'device_profile_name' not in self.arq and not device_profile_id:
|
||||
@ -213,6 +220,16 @@ class ExtARQ(base.CyborgObject, object_base.VersionedObjectDictCompat,
|
||||
try:
|
||||
ah = AttachHandle.allocate(context, deployable.id)
|
||||
self.attach_handle_id = ah.id
|
||||
# if attach_handle is a vgpu, create the mdev in the sys path
|
||||
if ah.attach_type == 'MDEV':
|
||||
attach_info = json.loads(ah.attach_info)
|
||||
pci_addr = "{}:{}:{}.{}".format(
|
||||
attach_info['domain'], attach_info['bus'],
|
||||
attach_info['device'], attach_info['function'])
|
||||
hostname = self.arq.hostname
|
||||
asked_type = attach_info['asked_type']
|
||||
self.agent.create_vgpu_mdev(
|
||||
context, hostname, pci_addr, asked_type, ah.uuid)
|
||||
except Exception as e:
|
||||
LOG.error("Failed to allocate attach handle for ARQ %s"
|
||||
"from deployable %s. Reason: %s",
|
||||
@ -237,9 +254,17 @@ class ExtARQ(base.CyborgObject, object_base.VersionedObjectDictCompat,
|
||||
# if (self.arq.state == constants.ARQ_DELETING
|
||||
# or self.arq.state == ARQ_UNBOUND):
|
||||
|
||||
def _deallocate_attach_handle(self, context, ah_id):
|
||||
def _deallocate_attach_handle(self, context, ah_id, hostname):
|
||||
try:
|
||||
attach_handle = AttachHandle.get_by_id(context, ah_id)
|
||||
if attach_handle.attach_type == 'MDEV':
|
||||
attach_info = json.loads(attach_handle.attach_info)
|
||||
pci_addr = "{}:{}:{}.{}".format(
|
||||
attach_info['domain'], attach_info['bus'],
|
||||
attach_info['device'], attach_info['function'])
|
||||
self.agent.remove_vgpu_mdev(
|
||||
context, hostname, pci_addr,
|
||||
attach_info['asked_type'], attach_handle.uuid)
|
||||
attach_handle.deallocate(context)
|
||||
except Exception as e:
|
||||
LOG.error("Failed to deallocate attach handle %s for ARQ %s."
|
||||
@ -252,6 +277,7 @@ class ExtARQ(base.CyborgObject, object_base.VersionedObjectDictCompat,
|
||||
|
||||
def unbind(self, context):
|
||||
arq = self.arq
|
||||
hostname = arq.hostname
|
||||
arq.hostname = None
|
||||
arq.device_rp_uuid = None
|
||||
arq.instance_uuid = None
|
||||
@ -260,7 +286,7 @@ class ExtARQ(base.CyborgObject, object_base.VersionedObjectDictCompat,
|
||||
# Unbind: mark attach handles as freed
|
||||
ah_id = self.attach_handle_id
|
||||
if ah_id:
|
||||
self._deallocate_attach_handle(context, ah_id)
|
||||
self._deallocate_attach_handle(context, ah_id, hostname)
|
||||
self.attach_handle_id = None
|
||||
self.deployable_id = None
|
||||
self.save(context)
|
||||
@ -285,6 +311,7 @@ class ExtARQ(base.CyborgObject, object_base.VersionedObjectDictCompat,
|
||||
if db_ah is not None:
|
||||
db_extarq['attach_handle_type'] = db_ah['attach_type']
|
||||
db_extarq['attach_handle_info'] = db_ah['attach_info']
|
||||
db_extarq['attach_handle_uuid'] = db_ah['uuid']
|
||||
else:
|
||||
raise exception.ResourceNotFound(
|
||||
resource='Attach Handle',
|
||||
|
@ -352,7 +352,8 @@ class TestExtARQObject(base.DbTestCase):
|
||||
self, mock_deallocate, mock_ah, mock_check_state):
|
||||
obj_extarq = self.fake_obj_extarqs[0]
|
||||
mock_ah.return_value = self.fake_obj_ahs[0]
|
||||
obj_extarq._deallocate_attach_handle(self.context, mock_ah.id)
|
||||
obj_extarq._deallocate_attach_handle(
|
||||
self.context, mock_ah.id, obj_extarq.arq.hostname)
|
||||
mock_check_state.assert_not_called()
|
||||
|
||||
@mock.patch('logging.LoggerAdapter.error')
|
||||
@ -370,7 +371,8 @@ class TestExtARQObject(base.DbTestCase):
|
||||
mock_deallocate.side_effect = e
|
||||
self.assertRaises(
|
||||
exception.ResourceNotFound,
|
||||
obj_extarq._deallocate_attach_handle, self.context, mock_ah.id)
|
||||
obj_extarq._deallocate_attach_handle, self.context, mock_ah.id,
|
||||
obj_extarq.arq.hostname)
|
||||
mock_log.assert_called_once_with(
|
||||
msg, mock_ah.id, obj_extarq.arq.uuid, str(e))
|
||||
mock_check_state.assert_called_once_with(
|
||||
|
Loading…
Reference in New Issue
Block a user