From 07efd624b8f957a9288f5d833550a1a93b804b5d Mon Sep 17 00:00:00 2001 From: Xinran WANG Date: Tue, 7 May 2019 10:56:02 +0800 Subject: [PATCH] Placement report 1. Create resource provider, resource class, traits. 2. Nested resource provider is supported in this patch. 3. According to the comments and reply in: https://review.opendev.org/#/c/626057/10/cyborg/common/placement_client.py, we should make a class to invoke Placement API. This has also been done in this patch. 4. Convert cyborg/objects/driver_objects/driver_device.py file type from dos to unix, to avoid ^M at the end of each line. 5. Get rid of legacy: ProviderTree class. 6. More info for driver developers: each driver should report resource class and traits in driver_attribute field in order that cyborg-conductor can parse it and report to placement. 7. update deployable rp_uuid field after getting rp_uuid from placement. Change-Id: Ib8f682255d2ae60eb615c90de0a0f7c83d1af54d --- .../accelerator/drivers/fpga/intel/sysinfo.py | 74 +- cyborg/agent/provider_tree.py | 670 ------------------ cyborg/agent/rc_fields.py | 71 -- cyborg/common/constants.py | 7 +- cyborg/common/exception.py | 12 + cyborg/common/placement_client.py | 248 ++++++- cyborg/conductor/manager.py | 167 ++++- cyborg/conf/default.py | 1 - cyborg/objects/deployable.py | 15 + cyborg/objects/device.py | 8 + .../driver_objects/driver_attribute.py | 8 + .../driver_objects/driver_deployable.py | 14 + .../objects/driver_objects/driver_device.py | 288 ++++---- .../drivers/fpga/intel/prepare_test_data.py | 4 +- test-requirements.txt | 1 + 15 files changed, 640 insertions(+), 948 deletions(-) delete mode 100644 cyborg/agent/provider_tree.py delete mode 100644 cyborg/agent/rc_fields.py diff --git a/cyborg/accelerator/drivers/fpga/intel/sysinfo.py b/cyborg/accelerator/drivers/fpga/intel/sysinfo.py index 700fdaf0..b4c2f977 100644 --- a/cyborg/accelerator/drivers/fpga/intel/sysinfo.py +++ b/cyborg/accelerator/drivers/fpga/intel/sysinfo.py @@ -24,7 +24,6 @@ import re from oslo_serialization import jsonutils from cyborg.accelerator.common import utils -from cyborg.agent import rc_fields from cyborg.objects.driver_objects import driver_deployable, driver_device,\ driver_attach_handle, driver_controlpath_id, driver_attribute from cyborg.common import constants @@ -32,8 +31,8 @@ from cyborg.common import constants PCI_DEVICES_PATH = "/sys/bus/pci/devices" PCI_DEVICES_PATH_PATTERN = "/sys/bus/pci/devices/*" -# TODO(shaohe) The KNOW_FPGAS can be configurable. -KNOW_FPGAS = [("0x8086", "0x09c4")] +# TODO(shaohe) The KNOWN_FPGAS can be configurable. +KNOWN_FPGAS = [("0x8086", "0x09c4")] INTEL_FPGA_DEV_PREFIX = "intel-fpga-dev" SYS_FPGA = "/sys/class/fpga" @@ -48,12 +47,7 @@ DEVICE_FILE_MAP = {"vendor": "vendor", DEVICE_FILE_HANDLER = {} DEVICE_EXPOSED = ["vendor", "device"] -RC_FPGA = rc_fields.ResourceClass.normalize_name( - rc_fields.ResourceClass.FPGA) - -RESOURCES = { - "fpga": RC_FPGA -} +PRODUCT_MAP = {"0x09c4": "PAC_ARRIA10"} DRIVER_NAME = "intel" @@ -66,7 +60,7 @@ def read_line(filename): def is_fpga(p): infos = (read_line(os.path.join(p, "vendor")), read_line(os.path.join(p, "device"))) - if infos in KNOW_FPGAS: + if infos in KNOWN_FPGAS: return os.path.realpath(p) @@ -80,7 +74,7 @@ def find_fpgas_by_know_list(): lambda p: ( read_line(os.path.join(p, "vendor")), read_line(os.path.join(p, "device")) - ) in KNOW_FPGAS, + ) in KNOWN_FPGAS, glob.glob(PCI_DEVICES_PATH_PATTERN)) @@ -165,24 +159,47 @@ def get_pf_bdf(bdf): return bdf -def get_afu_ids(name): +def get_afu_ids(device_name): return map( read_line, glob.glob( os.path.join( PCI_DEVICES_PATH_PATTERN, "fpga", - name, "intel-fpga-port.*", "afu_id") + device_name, "intel-fpga-port.*", "afu_id") ) ) -def get_traits(name, product_id): +def get_region_ids(device_name): + return map( + read_line, + glob.glob( + os.path.join( + SYS_FPGA, device_name, "device/physfn/fpga", + "intel-fpga-dev.*", "intel-fpga-fme.*", "pr/interface_id") + ) + ) + + +def get_traits(device_name, product_id, vf=True): + """Generate traits for devices. + : param devices_name: name of PF/VF, for example, "intel-fpga-dev.0". + : param product_id: product id of PF/VF, for example, "0x09c4". + : param vf: True if device_name is a VF, otherwise False. + """ # "region_id" not support at present, "CUSTOM_FPGA_REGION_INTEL_UUID" # "CUSTOM_PROGRAMMABLE" not support at present - traits = ["CUSTOM_FPGA_INTEL"] - for i in get_afu_ids(name): - l = "CUSTOM_FPGA_INTEL_FUNCTION_" + i.upper() - traits.append(l) + traits = [] + if not vf: + traits.append("CUSTOM_FPGA_INTEL") + traits.append("CUSTOM_FPGA_INTEL_" + PRODUCT_MAP.get(product_id)) + else: + for i in get_afu_ids(device_name): + l = "CUSTOM_FPGA_FUNCTION_ID_INTEL_" + i.upper() + traits.append(l) + for i in get_region_ids(device_name): + l = "CUSTOM_FPGA_REGION_INTEL_" + i.upper() + traits.append(l) return {"traits": traits} @@ -216,9 +233,9 @@ def fpga_tree(): if names: name = names[0] fpga["stub"] = False - traits = get_traits(name, fpga["product_id"]) + traits = get_traits(name, fpga["product_id"], vf) fpga.update(traits) - fpga["rc"] = RESOURCES["fpga"] + fpga["rc"] = constants.RESOURCES["FPGA"] return fpga devs = [] @@ -294,17 +311,28 @@ def _generate_attach_handle(fpga): def _generate_attribute_list(fpga): attr_list = [] + index = 0 for k, v in fpga.items(): if k == "rc": driver_attr = driver_attribute.DriverAttribute() - driver_attr.key = k - driver_attr.value = fpga.get(k, None) + driver_attr.key, driver_attr.value = k, v attr_list.append(driver_attr) if k == "traits": values = fpga.get(k, None) for val in values: driver_attr = driver_attribute.DriverAttribute() - driver_attr.key = "trait" + str(values.index(val)) + driver_attr.key = "trait" + str(index) + index = index + 1 driver_attr.value = val attr_list.append(driver_attr) + if fpga.get("regions"): + for vf in fpga["regions"]: + for k, values in vf.items(): + if k == "traits": + for val in values: + driver_attr = driver_attribute.DriverAttribute( + key="trait" + str(index), value=val) + index = index + 1 + # driver_attr.value = "CUSTOM_UPDATED_TRAITS2" + attr_list.append(driver_attr) return attr_list diff --git a/cyborg/agent/provider_tree.py b/cyborg/agent/provider_tree.py deleted file mode 100644 index 0ae558dc..00000000 --- a/cyborg/agent/provider_tree.py +++ /dev/null @@ -1,670 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -"""An object describing a tree of resource providers and their inventories. - -This object is not stored in the Nova API or cell databases; rather, this -object is constructed and used by the scheduler report client to track state -changes for resources on the hypervisor or baremetal node. As such, there are -no remoteable methods nor is there any interaction with the nova.db modules. -""" - -import collections -import copy - -import os_traits -from oslo_concurrency import lockutils -from oslo_log import log as logging -from oslo_utils import uuidutils - -from cyborg.common.i18n import _ - -LOG = logging.getLogger(__name__) -_LOCK_NAME = 'provider-tree-lock' - -# Point-in-time representation of a resource provider in the tree. -# Note that, whereas namedtuple enforces read-only-ness of instances as a -# whole, nothing prevents modification of the internals of attributes of -# complex types (children/inventory/traits/aggregates). However, any such -# modifications still have no effect on the ProviderTree the instance came -# from. Like, you can Sharpie a moustache on a Polaroid of my face, but that -# doesn't make a moustache appear on my actual face. -ProviderData = collections.namedtuple( - 'ProviderData', ['uuid', 'name', 'generation', 'parent_uuid', 'inventory', - 'traits', 'aggregates']) - - -class _Provider(object): - """Represents a resource provider in the tree. - - All operations against the tree should be done using the ProviderTree - interface, since it controls thread-safety. - """ - - def __init__(self, name, uuid=None, generation=None, parent_uuid=None): - if uuid is None: - uuid = uuidutils.generate_uuid() - self.uuid = uuid - self.name = name - self.generation = generation - self.parent_uuid = parent_uuid - # Contains a dict, keyed by uuid of child resource providers having - # this provider as a parent - self.children = {} - # dict of inventory records, keyed by resource class - self.inventory = {} - # Set of trait names - self.traits = set() - # Set of aggregate UUIDs - self.aggregates = set() - - @classmethod - def from_dict(cls, pdict): - """Factory method producing a _Provider based on a dict with - appropriate keys. - - :param pdict: Dictionary representing a provider, with keys 'name', - 'uuid', 'generation', 'parent_provider_uuid'. Of these, - only 'name' is mandatory. - """ - return cls(pdict['name'], uuid=pdict.get('uuid'), - generation=pdict.get('generation'), - parent_uuid=pdict.get('parent_provider_uuid')) - - def data(self): - """A collection of all informations of a provider. - - :Return: a collections.namedtuple - include inventory, traits, aggregates, uuid, name, generation, - and parent_uuid. - """ - inventory = copy.deepcopy(self.inventory) - traits = copy.copy(self.traits) - aggregates = copy.copy(self.aggregates) - return ProviderData( - self.uuid, self.name, self.generation, self.parent_uuid, - inventory, traits, aggregates) - - def get_provider_uuids(self): - """Returns a list, in top-down traversal order, of UUIDs of this - provider and all its descendants. - """ - ret = [self.uuid] - for child in self.children.values(): - ret.extend(child.get_provider_uuids()) - return ret - - def find(self, search): - """Find an expect one in the provider tree by match the serach. - - :param search: it can be the either name or uuid of an expect provider. - :return: the expect _Provider object or None. - """ - if self.name == search or self.uuid == search: - return self - if search in self.children: - return self.children[search] - if self.children: - for child in self.children.values(): - # We already searched for the child by UUID above, so here we - # just check for a child name match - if child.name == search: - return child - subchild = child.find(search) - if subchild: - return subchild - return None - - def add_child(self, provider): - self.children[provider.uuid] = provider - - def remove_child(self, provider): - if provider.uuid in self.children: - del self.children[provider.uuid] - - def has_inventory(self): - """Returns whether the provider has any inventory records at all.""" - return self.inventory != {} - - def has_inventory_changed(self, new): - """Returns whether the inventory has changed for the provider.""" - cur = self.inventory - if set(cur) != set(new): - return True - for key, cur_rec in cur.items(): - new_rec = new[key] - # If the new record contains new fields (e.g. we're adding on - # `reserved` or `allocation_ratio`) we want to make sure to pick - # them up - if set(new_rec) - set(cur_rec): - return True - for rec_key, cur_val in cur_rec.items(): - if rec_key not in new_rec: - # Deliberately don't want to compare missing keys in the - # *new* inventory record. For instance, we will be passing - # in fields like allocation_ratio in the current dict but - # the resource tracker may only pass in the total field. We - # want to return that inventory didn't change when the - # total field values are the same even if the - # allocation_ratio field is missing from the new record. - continue - if new_rec[rec_key] != cur_val: - return True - return False - - def _update_generation(self, generation): - if generation is not None and generation != self.generation: - msg_args = { - 'rp_uuid': self.uuid, - 'old': self.generation, - 'new': generation, - } - LOG.debug("Updating resource provider %(rp_uuid)s generation " - "from %(old)s to %(new)s", msg_args) - self.generation = generation - - def update_inventory(self, inventory, generation): - """Update the stored inventory for the provider along with a resource - provider generation to set the provider to. The method returns whether - the inventory has changed. - """ - self._update_generation(generation) - if self.has_inventory_changed(inventory): - self.inventory = copy.deepcopy(inventory) - return True - return False - - def have_traits_changed(self, new): - """Returns whether the provider's traits have changed.""" - return set(new) != self.traits - - def update_traits(self, new, generation=None): - """Update the stored traits for the provider along with a resource - provider generation to set the provider to. The method returns whether - the traits have changed. - """ - self._update_generation(generation) - if self.have_traits_changed(new): - self.traits = set(new) # create a copy of the new traits - return True - return False - - def has_traits(self, traits): - """Query whether the provider has certain traits. - - :param traits: Iterable of string trait names to look for. - :return: True if this provider has *all* of the specified traits; False - if any of the specified traits are absent. Returns True if - the traits parameter is empty. - """ - return not bool(set(traits) - self.traits) - - def have_aggregates_changed(self, new): - """Returns whether the provider's aggregates have changed.""" - return set(new) != self.aggregates - - def update_aggregates(self, new, generation=None): - """Update the stored aggregates for the provider along with a resource - provider generation to set the provider to. The method returns whether - the aggregates have changed. - """ - self._update_generation(generation) - if self.have_aggregates_changed(new): - self.aggregates = set(new) # create a copy of the new aggregates - return True - return False - - def in_aggregates(self, aggregates): - """Query whether the provider is a member of certain aggregates. - - :param aggregates: Iterable of string aggregate UUIDs to look for. - :return: True if this provider is a member of *all* of the specified - aggregates; False if any of the specified aggregates are - absent. Returns True if the aggregates parameter is empty. - """ - return not bool(set(aggregates) - self.aggregates) - - -class ProviderTree(object): - - def __init__(self): - """Create an empty provider tree.""" - self.lock = lockutils.internal_lock(_LOCK_NAME) - self.roots = [] - - def get_provider_uuids(self, name_or_uuid=None): - """Return a list, in top-down traversable order, of the UUIDs of all - providers (in a subtree). - - :param name_or_uuid: Provider name or UUID representing the root of a - subtree for which to return UUIDs. If not - specified, the method returns all UUIDs in the - ProviderTree. - """ - if name_or_uuid is not None: - with self.lock: - return self._find_with_lock(name_or_uuid).get_provider_uuids() - - # If no name_or_uuid, get UUIDs for all providers recursively. - ret = [] - with self.lock: - for root in self.roots: - ret.extend(root.get_provider_uuids()) - return ret - - def populate_from_iterable(self, provider_dicts): - """Populates this ProviderTree from an iterable of provider dicts. - - This method will ADD providers to the tree if provider_dicts contains - providers that do not exist in the tree already and will REPLACE - providers in the tree if provider_dicts contains providers that are - already in the tree. This method will NOT remove providers from the - tree that are not in provider_dicts. But if a parent provider is in - provider_dicts and the descendents are not, this method will remove the - descendents from the tree. - - :param provider_dicts: An iterable of dicts of resource provider - information. If a provider is present in - provider_dicts, all its descendants must also be - present. - :raises: ValueError if any provider in provider_dicts has a parent that - is not in this ProviderTree or elsewhere in provider_dicts. - """ - if not provider_dicts: - return - - # Map of provider UUID to provider dict for the providers we're - # *adding* via this method. - to_add_by_uuid = {pd['uuid']: pd for pd in provider_dicts} - - with self.lock: - # Sanity check for orphans. Every parent UUID must either be None - # (the provider is a root), or be in the tree already, or exist as - # a key in to_add_by_uuid (we're adding it). - all_parents = set([None]) | set(to_add_by_uuid) - # NOTE(efried): Can't use get_provider_uuids directly because we're - # already under lock. - for root in self.roots: - all_parents |= set(root.get_provider_uuids()) - missing_parents = set() - for pd in to_add_by_uuid.values(): - parent_uuid = pd.get('parent_provider_uuid') - if parent_uuid not in all_parents: - missing_parents.add(parent_uuid) - if missing_parents: - raise ValueError( - _("The following parents were not found: %s") % - ', '.join(missing_parents)) - - # Ready to do the work. - # Use to_add_by_uuid to keep track of which providers are left to - # be added. - while to_add_by_uuid: - # Find a provider that's suitable to inject. - for uuid, pd in to_add_by_uuid.items(): - # Roots are always okay to inject (None won't be a key in - # to_add_by_uuid). Otherwise, we have to make sure we - # already added the parent (and, by recursion, all - # ancestors) if present in the input. - parent_uuid = pd.get('parent_provider_uuid') - if parent_uuid not in to_add_by_uuid: - break - else: - # This should never happen - we already ensured all parents - # exist in the tree, which means we can't have any branches - # that don't wind up at the root, which means we can't have - # cycles. But to quell the paranoia... - raise ValueError( - _("Unexpectedly failed to find parents already in the" - "tree for any of the following: %s") % - ','.join(set(to_add_by_uuid))) - - # Add or replace the provider, either as a root or under its - # parent - try: - self._remove_with_lock(uuid) - except ValueError: - # Wasn't there in the first place - fine. - pass - - provider = _Provider.from_dict(pd) - if parent_uuid is None: - self.roots.append(provider) - else: - parent = self._find_with_lock(parent_uuid) - parent.add_child(provider) - - # Remove this entry to signify we're done with it. - to_add_by_uuid.pop(uuid) - - def _remove_with_lock(self, name_or_uuid): - found = self._find_with_lock(name_or_uuid) - if found.parent_uuid: - parent = self._find_with_lock(found.parent_uuid) - parent.remove_child(found) - else: - self.roots.remove(found) - - def remove(self, name_or_uuid): - """Safely removes the provider identified by the supplied name_or_uuid - parameter and all of its children from the tree. - - :raises ValueError if name_or_uuid points to a non-existing provider. - :param name_or_uuid: Either name or UUID of the resource provider to - remove from the tree. - """ - with self.lock: - self._remove_with_lock(name_or_uuid) - - def new_root(self, name, uuid, generation=None): - """Adds a new root provider to the tree, returning its UUID. - - :param name: The name of the new root provider - :param uuid: The UUID of the new root provider - :param generation: Generation to set for the new root provider - :returns: the UUID of the new provider - :raises: ValueError if a provider with the specified uuid already - exists in the tree. - """ - - with self.lock: - exists = True - try: - self._find_with_lock(uuid) - except ValueError: - exists = False - - if exists: - err = _("Provider %s already exists.") - raise ValueError(err % uuid) - - p = _Provider(name, uuid=uuid, generation=generation) - self.roots.append(p) - return p.uuid - - def _find_with_lock(self, name_or_uuid): - for root in self.roots: - found = root.find(name_or_uuid) - if found: - return found - raise ValueError(_("No such provider %s") % name_or_uuid) - - def data(self, name_or_uuid): - """Return a point-in-time copy of the specified provider's data. - - :param name_or_uuid: Either name or UUID of the resource provider whose - data is to be returned. - :return: ProviderData object representing the specified provider. - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - """ - with self.lock: - return self._find_with_lock(name_or_uuid).data() - - def exists(self, name_or_uuid): - """Given either a name or a UUID, return True if the tree contains the - provider, False otherwise. - """ - with self.lock: - try: - self._find_with_lock(name_or_uuid) - return True - except ValueError: - return False - - def new_child(self, name, parent, uuid=None, generation=None): - """Creates a new child provider with the given name and uuid under the - given parent. - - :param name: The name of the new child provider - :param parent: Either name or UUID of the parent provider - :param uuid: The UUID of the new child provider - :param generation: Generation to set for the new child provider - :returns: the UUID of the new provider - - :raises ValueError if a provider with the specified uuid or name - already exists; or if parent_uuid points to a nonexistent - provider. - """ - with self.lock: - try: - self._find_with_lock(uuid or name) - except ValueError: - pass - else: - err = _("Provider %s already exists.") - raise ValueError(err % (uuid or name)) - - parent_node = self._find_with_lock(parent) - p = _Provider(name, uuid, generation, parent_node.uuid) - parent_node.add_child(p) - return p.uuid - - def has_inventory(self, name_or_uuid): - """Returns True if the provider identified by name_or_uuid has any - inventory records at all. - - :raises: ValueError if a provider with uuid was not found in the tree. - :param name_or_uuid: Either name or UUID of the resource provider - """ - with self.lock: - p = self._find_with_lock(name_or_uuid) - return p.has_inventory() - - def has_inventory_changed(self, name_or_uuid, inventory): - """Returns True if the supplied inventory is different for the provider - with the supplied name or UUID. - - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - :param name_or_uuid: Either name or UUID of the resource provider to - query inventory for. - :param inventory: dict, keyed by resource class, of inventory - information. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - return provider.has_inventory_changed(inventory) - - def update_inventory(self, name_or_uuid, inventory, generation=None): - """Given a name or UUID of a provider and a dict of inventory resource - records, update the provider's inventory and set the provider's - generation. - - :returns: True if the inventory has changed. - - :note: The provider's generation is always set to the supplied - generation, even if there were no changes to the inventory. - - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - :param name_or_uuid: Either name or UUID of the resource provider to - update inventory for. - :param inventory: dict, keyed by resource class, of inventory - information. - :param generation: The resource provider generation to set. If not - specified, the provider's generation is not changed. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - return provider.update_inventory(inventory, generation) - - def has_sharing_provider(self, resource_class): - """Returns whether the specified provider_tree contains any sharing - providers of inventory of the specified resource_class. - """ - for rp_uuid in self.get_provider_uuids(): - pdata = self.data(rp_uuid) - has_rc = resource_class in pdata.inventory - is_sharing = os_traits.MISC_SHARES_VIA_AGGREGATE in pdata.traits - if has_rc and is_sharing: - return True - return False - - def has_traits(self, name_or_uuid, traits): - """Given a name or UUID of a provider, query whether that provider has - *all* of the specified traits. - - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - :param name_or_uuid: Either name or UUID of the resource provider to - query for traits. - :param traits: Iterable of string trait names to search for. - :return: True if this provider has *all* of the specified traits; False - if any of the specified traits are absent. Returns True if - the traits parameter is empty, even if the provider has no - traits. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - return provider.has_traits(traits) - - def have_traits_changed(self, name_or_uuid, traits): - """Returns True if the specified traits list is different for the - provider with the specified name or UUID. - - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - :param name_or_uuid: Either name or UUID of the resource provider to - query traits for. - :param traits: Iterable of string trait names to compare against the - provider's traits. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - return provider.have_traits_changed(traits) - - def update_traits(self, name_or_uuid, traits, generation=None): - """Given a name or UUID of a provider and an iterable of string trait - names, update the provider's traits and set the provider's generation. - - :returns: True if the traits list has changed. - - :note: The provider's generation is always set to the supplied - generation, even if there were no changes to the traits. - - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - :param name_or_uuid: Either name or UUID of the resource provider to - update traits for. - :param traits: Iterable of string trait names to set. - :param generation: The resource provider generation to set. If None, - the provider's generation is not changed. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - return provider.update_traits(traits, generation=generation) - - def add_traits(self, name_or_uuid, *traits): - """Set traits on a provider, without affecting existing traits. - - :param name_or_uuid: The name or UUID of the provider whose traits are - to be affected. - :param traits: String names of traits to be added. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - final_traits = provider.traits | set(traits) - provider.update_traits(final_traits) - - def remove_traits(self, name_or_uuid, *traits): - """Unset traits on a provider, without affecting other existing traits. - - :param name_or_uuid: The name or UUID of the provider whose traits are - to be affected. - :param traits: String names of traits to be removed. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - final_traits = provider.traits - set(traits) - provider.update_traits(final_traits) - - def in_aggregates(self, name_or_uuid, aggregates): - """Given a name or UUID of a provider, query whether that provider is a - member of *all* the specified aggregates. - - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - :param name_or_uuid: Either name or UUID of the resource provider to - query for aggregates. - :param aggregates: Iterable of string aggregate UUIDs to search for. - :return: True if this provider is associated with *all* of the - specified aggregates; False if any of the specified aggregates - are absent. Returns True if the aggregates parameter is - empty, even if the provider has no aggregate associations. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - return provider.in_aggregates(aggregates) - - def have_aggregates_changed(self, name_or_uuid, aggregates): - """Returns True if the specified aggregates list is different for the - provider with the specified name or UUID. - - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - :param name_or_uuid: Either name or UUID of the resource provider to - query aggregates for. - :param aggregates: Iterable of string aggregate UUIDs to compare - against the provider's aggregates. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - return provider.have_aggregates_changed(aggregates) - - def update_aggregates(self, name_or_uuid, aggregates, generation=None): - """Given a name or UUID of a provider and an iterable of string - aggregate UUIDs, update the provider's aggregates and set the - provider's generation. - - :returns: True if the aggregates list has changed. - - :note: The provider's generation is always set to the supplied - generation, even if there were no changes to the aggregates. - - :raises: ValueError if a provider with name_or_uuid was not found in - the tree. - :param name_or_uuid: Either name or UUID of the resource provider to - update aggregates for. - :param aggregates: Iterable of string aggregate UUIDs to set. - :param generation: The resource provider generation to set. If None, - the provider's generation is not changed. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - return provider.update_aggregates(aggregates, - generation=generation) - - def add_aggregates(self, name_or_uuid, *aggregates): - """Set aggregates on a provider, without affecting existing aggregates. - - :param name_or_uuid: The name or UUID of the provider whose aggregates - are to be affected. - :param aggregates: String UUIDs of aggregates to be added. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - final_aggs = provider.aggregates | set(aggregates) - provider.update_aggregates(final_aggs) - - def remove_aggregates(self, name_or_uuid, *aggregates): - """Unset aggregates on a provider, without affecting other existing - aggregates. - - :param name_or_uuid: The name or UUID of the provider whose aggregates - are to be affected. - :param aggregates: String UUIDs of aggregates to be removed. - """ - with self.lock: - provider = self._find_with_lock(name_or_uuid) - final_aggs = provider.aggregates - set(aggregates) - provider.update_aggregates(final_aggs) diff --git a/cyborg/agent/rc_fields.py b/cyborg/agent/rc_fields.py deleted file mode 100644 index 44e39b7a..00000000 --- a/cyborg/agent/rc_fields.py +++ /dev/null @@ -1,71 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -"""Standard Resource Class Fields.""" - -# NOTE(cdent): This is kept as its own independent file as it is used by -# both the placement and nova sides of the placement interaction. On the -# placement side we don't want to import all the nova fields, nor all the -# nova objects (which are automatically loaded and registered if the -# nova.objects package is imported). - -import re - -from oslo_versionedobjects import fields - - -class ResourceClass(fields.StringField): - """Classes of resources provided to consumers.""" - - CUSTOM_NAMESPACE = 'CUSTOM_' - """All non-standard resource classes must begin with this string.""" - - VCPU = 'VCPU' - MEMORY_MB = 'MEMORY_MB' - DISK_GB = 'DISK_GB' - PCI_DEVICE = 'PCI_DEVICE' - SRIOV_NET_VF = 'SRIOV_NET_VF' - NUMA_SOCKET = 'NUMA_SOCKET' - NUMA_CORE = 'NUMA_CORE' - NUMA_THREAD = 'NUMA_THREAD' - NUMA_MEMORY_MB = 'NUMA_MEMORY_MB' - IPV4_ADDRESS = 'IPV4_ADDRESS' - VGPU = 'VGPU' - VGPU_DISPLAY_HEAD = 'VGPU_DISPLAY_HEAD' - FPGA = 'ACCELERATOR_FPGA' - - # The ordering here is relevant. If you must add a value, only - # append. - STANDARD = (VCPU, MEMORY_MB, DISK_GB, PCI_DEVICE, SRIOV_NET_VF, - NUMA_SOCKET, NUMA_CORE, NUMA_THREAD, NUMA_MEMORY_MB, - IPV4_ADDRESS, VGPU, VGPU_DISPLAY_HEAD) - - # This is the set of standard resource classes that existed before - # we opened up for custom resource classes in version 1.1 of various - # objects in nova/objects/resource_provider.py - V1_0 = (VCPU, MEMORY_MB, DISK_GB, PCI_DEVICE, SRIOV_NET_VF, NUMA_SOCKET, - NUMA_CORE, NUMA_THREAD, NUMA_MEMORY_MB, IPV4_ADDRESS) - - @classmethod - def normalize_name(cls, rc_name): - if rc_name is None: - return None - # Replace non-alphanumeric characters with underscores - norm_name = re.sub('[^0-9A-Za-z]+', '_', rc_name) - # Bug #1762789: Do .upper after replacing non alphanumerics. - norm_name = norm_name.upper() - norm_name = cls.CUSTOM_NAMESPACE + norm_name - return norm_name - - -class ResourceClassField(fields.AutoTypedField): - AUTO_TYPE = ResourceClass() diff --git a/cyborg/common/constants.py b/cyborg/common/constants.py index dc2d8fb9..6adee341 100644 --- a/cyborg/common/constants.py +++ b/cyborg/common/constants.py @@ -12,7 +12,7 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. - +import os_resource_classes as orc CONDUCTOR_TOPIC = 'cyborg-conductor' AGENT_TOPIC = 'cyborg-agent' @@ -31,3 +31,8 @@ DEVICE_TYPE = (DEVICE_GPU, DEVICE_FPGA, DEVICE_AICHIP) # 'TEST_PCI': used by fake driver, ignored by Nova virt driver. ATTACH_HANDLE_TYPES = (AH_TYPE_PCI, AH_TYPE_MDEV, AH_TYPE_TEST_PCI) = ( "PCI", "MDEV", "TEST_PCI") + +# Resource Class +RESOURCES = { + "FPGA": orc.FPGA +} diff --git a/cyborg/common/exception.py b/cyborg/common/exception.py index fbebf881..4c2cc528 100644 --- a/cyborg/common/exception.py +++ b/cyborg/common/exception.py @@ -281,6 +281,10 @@ class AttributeAlreadyExists(CyborgException): # An exception with this name is used on both sides of the placement/ # cyborg interaction. +class ResourceClassNotFound(NotFound): + msg_fmt = _("No such resource class %(name_or_uuid)s.") + + class ResourceProviderInUse(CyborgException): msg_fmt = _("Resource provider has allocations.") @@ -341,6 +345,14 @@ class ResourceProviderUpdateConflict(PlacementAPIConflict): "provider %(uuid)s (generation %(generation)d): %(error)s") +class TraitCreationFailed(CyborgException): + msg_fmt = _("Failed to create trait %(name)s: %(error)s") + + +class TraitRetrievalFailed(CyborgException): + msg_fmt = _("Failed to retrieve traits from the placement API: %(error)s") + + class InvalidResourceClass(Invalid): msg_fmt = _("Resource class '%(resource_class)s' invalid.") diff --git a/cyborg/common/placement_client.py b/cyborg/common/placement_client.py index a75e2e2a..0f90762f 100644 --- a/cyborg/common/placement_client.py +++ b/cyborg/common/placement_client.py @@ -13,38 +13,58 @@ # License for the specific language governing permissions and limitations # under the License. +from cyborg.common import exception +from cyborg.conf import CONF +from keystoneauth1 import exceptions as ks_exc +from oslo_log import log as logging +from oslo_middleware import request_id + from openstack import connection -from oslo_log import log as logging -from cyborg.conf import CONF - -_CONN = None LOG = logging.getLogger(__name__) +NESTED_PROVIDER_API_VERSION = '1.14' +POST_RPS_RETURNS_PAYLOAD_API_VERSION = '1.20' +PLACEMENT_CLIENT_SEMAPHORE = 'placement_client' +_CONN = None -def get_placement(): - return _PlacementClient() - - -class _PlacementClient(object): - +class PlacementClient(object): + """Client class for reporting to placement.""" def __init__(self): global _CONN if _CONN is None: default_user = 'devstack-admin' try: - # TODO() CONF access fails. auth_user = CONF.placement.username or default_user except Exception: auth_user = default_user _CONN = connection.Connection(cloud=auth_user) self._client = _CONN.placement + def get(self, url, version=None, global_request_id=None): + return self._client.get(url, microversion=version, + global_request_id=global_request_id) + + def post(self, url, data, version=None, global_request_id=None): + return self._client.post(url, json=data, microversion=version, + global_request_id=global_request_id) + + def put(self, url, data, version=None, global_request_id=None): + kwargs = {} + if data is not None: + kwargs['json'] = data + return self._client.put(url, microversion=version, + global_request_id=global_request_id, + **kwargs) + + def delete(self, url, version=None, global_request_id=None): + return self._client.delete(url, microversion=version, + global_request_id=global_request_id) + def _get_rp_traits(self, rp_uuid): - placement = self._client - resp = placement.get("/resource_providers/%s/traits" % rp_uuid, - microversion='1.6') + resp = self.get("/resource_providers/%s/traits" % rp_uuid, + version='1.6') if resp.status_code != 200: raise Exception( "Failed to get traits for rp %s: HTTP %d: %s" % @@ -52,9 +72,10 @@ class _PlacementClient(object): return resp.json() def _ensure_traits(self, trait_names): - placement = self._client + # TODO(Xinran): maintain a reference count of how many RPs use + # this trait and do the deletion only when the last RP is deleted. for trait in trait_names: - resp = placement.put('/traits/' + trait, microversion='1.6') + resp = self.put("/traits/%s" % trait, None, version='1.6') if resp.status_code == 201: LOG.info("Created trait %(trait)s", {"trait": trait}) elif resp.status_code == 204: @@ -65,9 +86,14 @@ class _PlacementClient(object): (trait, resp.status_code, resp.text)) def _put_rp_traits(self, rp_uuid, traits_json): - placement = self._client - resp = placement.put("/resource_providers/%s/traits" % rp_uuid, - json=traits_json, microversion='1.6') + generation = self.get_resource_provider( + resource_provider_uuid=rp_uuid)['generation'] + payload = { + 'resource_provider_generation': generation, + 'traits': traits_json["traits"], + } + resp = self.put( + "/resource_providers/%s/traits" % rp_uuid, payload, version='1.6') if resp.status_code != 200: raise Exception( "Failed to set traits to %s for rp %s: HTTP %d: %s" % @@ -79,8 +105,15 @@ class _PlacementClient(object): traits = list(set(traits_json['traits'] + trait_names)) traits_json['traits'] = traits self._put_rp_traits(rp_uuid, traits_json) - LOG.info('Added traits %(traits)s to RP %(rp_uuid)s', - {"traits": traits, "rp_uuid": rp_uuid}) + + def delete_trait_by_name(self, rp_uuid, trait_name): + traits_json = self._get_rp_traits(rp_uuid) + traits = [ + trait for trait in traits_json['traits'] + if trait != trait_name + ] + traits_json['traits'] = traits + self._put_rp_traits(rp_uuid, traits_json) def delete_traits_with_prefixes(self, rp_uuid, trait_prefixes): traits_json = self._get_rp_traits(rp_uuid) @@ -90,5 +123,174 @@ class _PlacementClient(object): for prefix in trait_prefixes)] traits_json['traits'] = traits self._put_rp_traits(rp_uuid, traits_json) - LOG.info('Deleted traits %(traits)s to RP %(rp_uuid)s', - {"traits": traits, "rp_uuid": rp_uuid}) + + def get_placement_request_id(self, response): + if response is not None: + return response.headers.get(request_id.HTTP_RESP_HEADER_REQUEST_ID) + + def _update_inventory( + self, resource_provider_uuid, inventories, + resource_provider_generation=None): + if resource_provider_generation is None: + resource_provider_generation = self.get_resource_provider( + resource_provider_uuid=resource_provider_uuid)['generation'] + url = '/resource_providers/%s/inventories' % resource_provider_uuid + body = { + 'resource_provider_generation': resource_provider_generation, + 'inventories': inventories + } + try: + return self.put(url, body).json() + except ks_exc.NotFound: + raise exception.PlacementResourceProviderNotFound( + resource_provider=resource_provider_uuid) + + def get_resource_provider(self, resource_provider_uuid): + """Get resource provider by UUID. + + :param resource_provider_uuid: UUID of the resource provider. + :raises PlacementResourceProviderNotFound: For failure to find resource + :returns: The Resource Provider matching the UUID. + """ + url = '/resource_providers/%s' % resource_provider_uuid + try: + return self.get(url).json() + except ks_exc.NotFound: + raise exception.PlacementResourceProviderNotFound( + resource_provider=resource_provider_uuid) + + def _create_resource_provider(self, context, uuid, name, + parent_provider_uuid=None): + """Calls the placement API to create a new resource provider record. + + :param context: The security context + :param uuid: UUID of the new resource provider + :param name: Name of the resource provider + :param parent_provider_uuid: Optional UUID of the immediate parent + :return: A dict of resource provider information object representing + the newly-created resource provider. + :raise: ResourceProviderCreationFailed or + ResourceProviderRetrievalFailed on error. + """ + url = "/resource_providers" + payload = { + 'uuid': uuid, + 'name': name, + } + if parent_provider_uuid is not None: + payload['parent_provider_uuid'] = parent_provider_uuid + + # Bug #1746075: First try the microversion that returns the new + # provider's payload. + resp = self.post(url, payload, + version=POST_RPS_RETURNS_PAYLOAD_API_VERSION, + global_request_id=context.global_id) + + placement_req_id = self.get_placement_request_id(resp) + + if resp: + msg = ("[%(placement_req_id)s] Created resource provider record " + "via placement API for resource provider with UUID " + "%(uuid)s and name %(name)s.") + args = { + 'uuid': uuid, + 'name': name, + 'placement_req_id': placement_req_id, + } + LOG.info(msg, args) + return resp.json() + + def ensure_resource_provider(self, context, uuid, name=None, + parent_provider_uuid=None): + resp = self.get("/resource_providers/%s" % uuid, version='1.6') + if resp.status_code == 200: + LOG.info("Resource Provider %(uuid)s already exists", + {"uuid": uuid}) + else: + LOG.info("Creating resource provider %(provider)s", + {"provider": name or uuid}) + try: + resp = self._create_resource_provider(context, uuid, name, + parent_provider_uuid) + except Exception: + raise exception.ResourceProviderCreationFailed( + name=name or uuid) + return uuid + + def ensure_resource_classes(self, context, names): + """Make sure resource classes exist.""" + version = '1.7' + to_ensure = set(names) + for name in to_ensure: + # no payload on the put request + resp = self.put( + "/resource_classes/%s" % name, None, version=version, + global_request_id=context.global_id) + if not resp: + msg = ("Failed to ensure resource class record with placement " + "API for resource class %(rc_name)s. Got " + "%(status_code)d: %(err_text)s.") + args = { + 'rc_name': name, + 'status_code': resp.status_code, + 'err_text': resp.text, + } + LOG.error(msg, args) + raise exception.InvalidResourceClass(resource_class=name) + + def _get_providers_in_tree(self, context, uuid): + """Queries the placement API for a list of the resource providers in + the tree associated with the specified UUID. + + :param context: The security context + :param uuid: UUID identifier for the resource provider to look up + :return: A list of dicts of resource provider information, which may be + empty if no provider exists with the specified UUID. + :raise: ResourceProviderRetrievalFailed on error. + """ + resp = self.get("/resource_providers?in_tree=%s" % uuid, + version=NESTED_PROVIDER_API_VERSION, + global_request_id=context.global_id) + + if resp.status_code == 200: + return resp.json()['resource_providers'] + + # Some unexpected error + placement_req_id = self.get_placement_request_id(resp) + msg = ("[%(placement_req_id)s] Failed to retrieve resource provider " + "tree from placement API for UUID %(uuid)s. Got " + "%(status_code)d: %(err_text)s.") + args = { + 'uuid': uuid, + 'status_code': resp.status_code, + 'err_text': resp.text, + 'placement_req_id': placement_req_id, + } + LOG.error(msg, args) + raise exception.ResourceProviderRetrievalFailed(uuid=uuid) + + def _delete_provider(self, rp_uuid, global_request_id=None): + resp = self.delete('/resource_providers/%s' % rp_uuid, + global_request_id=global_request_id) + # Check for 404 since we don't need to warn/raise if we tried to delete + # something which doesn"t actually exist. + if resp.ok: + LOG.info("Deleted resource provider %s", rp_uuid) + return + + msg = ("[%(placement_req_id)s] Failed to delete resource provider " + "with UUID %(uuid)s from the placement API. Got " + "%(status_code)d: %(err_text)s.") + args = { + 'placement_req_id': self.get_placement_request_id(resp), + 'uuid': rp_uuid, + 'status_code': resp.status_code, + 'err_text': resp.text + } + LOG.error(msg, args) + # On conflict, the caller may wish to delete allocations and + # redrive. (Note that this is not the same as a + # PlacementAPIConflict case.) + if resp.status_code == 409: + raise exception.ResourceProviderInUse() + raise exception.ResourceProviderDeletionFailed(uuid=rp_uuid) diff --git a/cyborg/conductor/manager.py b/cyborg/conductor/manager.py index c1c75c50..87c2b35e 100644 --- a/cyborg/conductor/manager.py +++ b/cyborg/conductor/manager.py @@ -15,7 +15,11 @@ from oslo_log import log as logging import oslo_messaging as messaging +from oslo_utils import encodeutils +import uuid +from cyborg.common import exception +from cyborg.common import placement_client from cyborg.conf import CONF from cyborg import objects from cyborg.objects.attach_handle import AttachHandle @@ -23,6 +27,7 @@ from cyborg.objects.attribute import Attribute from cyborg.objects.control_path import ControlpathID from cyborg.objects.deployable import Deployable from cyborg.objects.device import Device +from cyborg.objects.driver_objects.driver_device import DriverDeployable from cyborg.objects.driver_objects.driver_device import DriverDevice LOG = logging.getLogger(__name__) @@ -38,6 +43,7 @@ class ConductorManager(object): super(ConductorManager, self).__init__() self.topic = topic self.host = host or CONF.host + self.placement_client = placement_client.PlacementClient() def periodic_tasks(self, context, raise_on_error=False): pass @@ -133,11 +139,10 @@ class ConductorManager(object): old_driver_device_list = DriverDevice.list(context, hostname) # TODO(wangzhh): Remove invalid driver_devices without controlpath_id. # Then diff two driver device list. - self.drv_device_make_diff(context, hostname, old_driver_device_list, - driver_device_list) + self.drv_device_make_diff(context, hostname, + old_driver_device_list, driver_device_list) - @classmethod - def drv_device_make_diff(cls, context, host, old_driver_device_list, + def drv_device_make_diff(self, context, host, old_driver_device_list, new_driver_device_list): """Compare new driver-side device object list with the old one in one host. @@ -155,6 +160,7 @@ class ConductorManager(object): same = set(new_cpid_list) & set(old_cpid_list) - set(stub_cpid_list) added = set(new_cpid_list) - same - set(stub_cpid_list) deleted = set(old_cpid_list) - same - set(stub_cpid_list) + host_rp = self._get_root_provider(context, host) for s in same: # get the driver_dev_obj, diff the driver_device layer new_driver_dev_obj = new_driver_device_list[new_cpid_list.index(s)] @@ -179,21 +185,29 @@ class ConductorManager(object): setattr(dev_obj, c_k, getattr(new_driver_dev_obj, c_k)) dev_obj.save(context) # diff the internal layer: driver_deployable - cls.drv_deployable_make_diff(context, dev_obj.id, cpid_obj.id, - old_driver_dev_obj.deployable_list, - new_driver_dev_obj.deployable_list) + self.drv_deployable_make_diff(context, dev_obj.id, cpid_obj.id, + old_driver_dev_obj.deployable_list, + new_driver_dev_obj.deployable_list, + host_rp) # device is deleted. for d in deleted: old_driver_dev_obj = old_driver_device_list[old_cpid_list.index(d)] + for driver_dep_obj in old_driver_dev_obj.deployable_list: + rp_uuid = self.get_pr_uuid_from_obj(driver_dep_obj) + self._delete_provider_and_sub_providers(context, rp_uuid) old_driver_dev_obj.destroy(context, host) # device is added for a in added: new_driver_dev_obj = new_driver_device_list[new_cpid_list.index(a)] new_driver_dev_obj.create(context, host) + for driver_dep_obj in new_driver_dev_obj.deployable_list: + self.get_placement_needed_info_and_report(context, + driver_dep_obj, + host_rp) - @classmethod - def drv_deployable_make_diff(cls, context, device_id, cpid_id, - old_driver_dep_list, new_driver_dep_list): + def drv_deployable_make_diff(self, context, device_id, cpid_id, + old_driver_dep_list, new_driver_dep_list, + host_rp): """Compare new driver-side deployable object list with the old one in one host. """ @@ -216,31 +230,45 @@ class ConductorManager(object): if dep_obj.num_accelerators != new_driver_dep_obj.num_accelerators: dep_obj.num_accelerators = new_driver_dep_obj.num_accelerators dep_obj.save(context) + rp_uuid = self.get_pr_uuid_from_obj(new_driver_dep_obj) + rc = new_driver_dep_obj.name + inv_date = \ + self._gen_resource_inventory( + rc, total=dep_obj.num_accelerators) + self.placement_client._placement_client._update_inventory( + rp_uuid, inv_date) # diff the internal layer: driver_attribute_list new_attribute_list = [] if hasattr(new_driver_dep_obj, 'attribute_list'): new_attribute_list = new_driver_dep_obj.attribute_list - cls.drv_attr_make_diff(context, dep_obj.id, - old_driver_dep_obj.attribute_list, - new_attribute_list) + self.drv_attr_make_diff(context, dep_obj.id, + old_driver_dep_obj.attribute_list, + new_attribute_list) # diff the internal layer: driver_attach_hanle_list - cls.drv_ah_make_diff(context, dep_obj.id, cpid_id, - old_driver_dep_obj.attach_handle_list, - new_driver_dep_obj.attach_handle_list) + self.drv_ah_make_diff(context, dep_obj.id, cpid_id, + old_driver_dep_obj.attach_handle_list, + new_driver_dep_obj.attach_handle_list) # name is deleted. for d in deleted: old_driver_dep_obj = old_driver_dep_list[old_name_list.index(d)] + rp_uuid = self.get_pr_uuid_from_obj(old_driver_dep_obj) old_driver_dep_obj.destroy(context, device_id) + self._delete_provider_and_sub_providers(context, rp_uuid) # name is added. for a in added: new_driver_dep_obj = new_driver_dep_list[new_name_list.index(a)] new_driver_dep_obj.create(context, device_id, cpid_id) + self.get_placement_needed_info_and_report(context, + new_driver_dep_obj, + host_rp) - @classmethod - def drv_attr_make_diff(cls, context, dep_id, old_driver_attr_list, + def drv_attr_make_diff(self, context, dep_id, old_driver_attr_list, new_driver_attr_list): """Diff new dirver-side Attribute Object lists with the old one.""" LOG.info("Start differing attributes.") + dep_obj = Deployable.get_by_id(context, dep_id) + driver_dep = DriverDeployable.get_by_name(context, dep_obj.name) + rp_uuid = self.get_pr_uuid_from_obj(driver_dep) new_key_list = [driver_attr_obj.key for driver_attr_obj in new_driver_attr_list] old_key_list = [driver_attr_obj.key for driver_attr_obj in @@ -255,17 +283,26 @@ class ConductorManager(object): attr_obj = Attribute.get_by_dep_key(context, dep_id, s) attr_obj.value = new_driver_attr_obj.value attr_obj.save(context) + # Update traits here. + if new_driver_attr_obj.key.startswith("trait"): + self.placement_client.delete_trait_by_name( + rp_uuid, old_driver_attr_obj.value) + self.placement_client.add_traits_to_rp( + rp_uuid, [new_driver_attr_obj.value]) # key is deleted. deleted = set(old_key_list) - same for d in deleted: - old_driver_attr_obj = old_driver_attr_list[ - old_key_list.index(d)] - old_driver_attr_obj.destroy(context, dep_id) + old_driver_attr_obj = old_driver_attr_list[old_key_list.index(d)] + self.placement_client.delete_trait_by_name( + rp_uuid, old_driver_attr_obj.value) + old_driver_attr_obj.delete_by_key(context, dep_id, d) # key is added. added = set(new_key_list) - same for a in added: new_driver_attr_obj = new_driver_attr_list[new_key_list.index(a)] new_driver_attr_obj.create(context, dep_id) + self.placement_client.add_traits_to_rp( + rp_uuid, [new_driver_attr_obj.value]) @classmethod def drv_ah_make_diff(cls, context, dep_id, cpid_id, old_driver_ah_list, @@ -302,3 +339,91 @@ class ConductorManager(object): for a in added: new_driver_ah_obj = new_driver_ah_list[new_info_list.index(a)] new_driver_ah_obj.create(context, dep_id, cpid_id) + + def _get_root_provider(self, context, hostname): + try: + provider = self.placement_client.get( + "resource_providers?name=" + hostname).json() + pr_uuid = provider["resource_providers"][0]["uuid"] + return pr_uuid + except IndexError: + LOG.error("Error, provider %(hostname)s can not be found", + {"hostname": hostname}) + except Exception as e: + LOG.error("Error, could not access placement. Details: %(info)s", + {"info": e}) + return + + def _get_sub_provider(self, context, parent, name): + name = encodeutils.safe_encode(name) + old_sub_pr_uuid = str(uuid.uuid3(uuid.NAMESPACE_DNS, name)) + new_sub_pr_uuid = self.placement_client.ensure_resource_provider( + context, old_sub_pr_uuid, + name=name, parent_provider_uuid=parent) + if old_sub_pr_uuid == new_sub_pr_uuid: + return new_sub_pr_uuid + else: + raise exception.Conflict() + + def provider_report(self, context, name, resource_class, traits, total, + parent): + try: + self.placement_client.get("/resource_classes/%s" % + resource_class, + version='1.26') + except Exception as e: + self.placement_client.ensure_resource_classes(context, + [resource_class]) + LOG.error("Error, could not access resource_classes." + "Details: %(info)s", {"info": e}) + + sub_pr_uuid = self._get_sub_provider( + context, parent, name) + result = self._gen_resource_inventory(resource_class, total) + self.placement_client._update_inventory(sub_pr_uuid, result) + # traits = ["CUSTOM_FPGA_INTEL", "CUSTOM_FPGA_INTEL_ARRIA10", + # "CUSTOM_FPGA_INTEL_REGION_UUID", + # "CUSTOM_FPGA_FUNCTION_ID_INTEL_UUID", + # "CUSTOM_PROGRAMMABLE", + # "CUSTOM_FPGA_NETWORK"] + self.placement_client.add_traits_to_rp(sub_pr_uuid, traits) + return sub_pr_uuid + + def get_placement_needed_info_and_report(self, context, obj, + parent_uuid=None): + pr_name = obj.name + attrs = obj.attribute_list + resource_class = [i.value for i in attrs if i.key == 'rc'][0] + traits = [i.value for i in attrs + if encodeutils.safe_encode(i.key).startswith("trait")] + total = obj.num_accelerators + rp_uuid = self.provider_report(context, pr_name, resource_class, + traits, total, parent_uuid) + dep_obj = Deployable.get_by_name(context, pr_name) + dep_obj["rp_uuid"] = rp_uuid + dep_obj.save(context) + + def _gen_resource_inventory(self, name, total=0, max=1, min=1, step=1): + result = {} + result[name] = { + 'total': total, + 'min_unit': min, + 'max_unit': max, + 'step_size': step, + } + return result + + def get_pr_uuid_from_obj(self, obj): + pr_name = encodeutils.safe_encode(obj.name) + return str(uuid.uuid3(uuid.NAMESPACE_DNS, pr_name)) + + def _delete_provider_and_sub_providers(self, context, rp_uuid): + rp_in_tree = self.placement_client._get_providers_in_tree(context, + rp_uuid) + for rp in rp_in_tree[::-1]: + if rp["parent_provider_uuid"] == rp_uuid or rp["uuid"] == rp_uuid: + self.placement_client._delete_provider(rp["uuid"]) + LOG.info("Sucessfully delete resource provider %(rp_uuid)s", + {"rp_uuid": rp["uuid"]}) + if rp["uuid"] == rp_uuid: + break diff --git a/cyborg/conf/default.py b/cyborg/conf/default.py index a2fac775..433d52ef 100644 --- a/cyborg/conf/default.py +++ b/cyborg/conf/default.py @@ -106,7 +106,6 @@ def register_placement_opts(cfg=cfg.CONF): DEFAULT_OPTS = (exc_log_opts + service_opts + path_opts) -PLACEMENT_OPTS = (placement_opts) def list_opts(): diff --git a/cyborg/objects/deployable.py b/cyborg/objects/deployable.py index 929f51d7..8e0a4d34 100644 --- a/cyborg/objects/deployable.py +++ b/cyborg/objects/deployable.py @@ -115,6 +115,12 @@ class Deployable(base.CyborgObject, object_base.VersionedObjectDictCompat): def save(self, context): """Update a Deployable record in the DB.""" updates = self.obj_get_changes() + # TODO(Xinran): Will remove this if find some better way. + updates.pop("uuid", None) + updates.pop("created_at", None) + if "updated_at" in updates.keys() and \ + updates["updated_at"] is not None: + updates["updated_at"] = updates["updated_at"].replace(tzinfo=None) db_dep = self.dbapi.deployable_update(context, self.uuid, updates) self.obj_reset_changes() self._from_db_object(self, db_dep) @@ -213,3 +219,12 @@ class Deployable(base.CyborgObject, object_base.VersionedObjectDictCompat): return dep_obj_list[0] else: return None + + @classmethod + def get_by_name(cls, context, name): + dep_filter = {'name': name} + dep_obj_list = Deployable.list(context, dep_filter) + if len(dep_obj_list) != 0: + return dep_obj_list[0] + else: + return None diff --git a/cyborg/objects/device.py b/cyborg/objects/device.py index c83a0044..d1dc0605 100644 --- a/cyborg/objects/device.py +++ b/cyborg/objects/device.py @@ -94,3 +94,11 @@ class Device(base.CyborgObject, object_base.VersionedObjectDictCompat): dev_filter = {'hostname': hostname} device_obj_list = Device.list(context, dev_filter) return device_obj_list + + @classmethod + def get_by_device_id(cls, context, device_id): + """get device object list from the device ID. return [] if not + exist.""" + dev_filter = {'device_id': device_id} + device_obj_list = Device.list(context, dev_filter) + return device_obj_list diff --git a/cyborg/objects/driver_objects/driver_attribute.py b/cyborg/objects/driver_objects/driver_attribute.py index 3981c856..c5d71160 100644 --- a/cyborg/objects/driver_objects/driver_attribute.py +++ b/cyborg/objects/driver_objects/driver_attribute.py @@ -45,6 +45,14 @@ class DriverAttribute(base.DriverObjectBase, for attr_obj in attr_obj_list: attr_obj.destroy(context) + @classmethod + def delete_by_key(cls, context, deployable_id, key): + """Delete driver-side attribute list from the DB.""" + attr_obj_list = Attribute.get_by_deployable_id(context, deployable_id) + for attr_obj in attr_obj_list: + if key == attr_obj.key: + attr_obj.destroy(context) + @classmethod def list(cls, context, deployable_id): """Form driver-side attribute list for one deployable.""" diff --git a/cyborg/objects/driver_objects/driver_deployable.py b/cyborg/objects/driver_objects/driver_deployable.py index ae38cdd5..38411766 100644 --- a/cyborg/objects/driver_objects/driver_deployable.py +++ b/cyborg/objects/driver_objects/driver_deployable.py @@ -103,3 +103,17 @@ class DriverDeployable(base.DriverObjectBase, attach_handle_list=driver_ah_obj_list) driver_dep_obj_list.append(driver_dep_obj) return driver_dep_obj_list + + @classmethod + def get_by_name(cls, context, name): + """Form driver-side Deployable object list from DB for one device.""" + # get deployable_obj_list for one device_id + dep_obj = Deployable.get_by_name(context, name) + driver_ah_obj_list = DriverAttachHandle.list(context, dep_obj.id) + # get driver_attr_obj_list fro this dep_obj + driver_attr_obj_list = DriverAttribute.list(context, dep_obj.id) + driver_dep_obj = cls(context=context, name=dep_obj.name, + num_accelerators=dep_obj.num_accelerators, + attribute_list=driver_attr_obj_list, + attach_handle_list=driver_ah_obj_list) + return driver_dep_obj diff --git a/cyborg/objects/driver_objects/driver_device.py b/cyborg/objects/driver_objects/driver_device.py index 74bad5ee..b410ab36 100644 --- a/cyborg/objects/driver_objects/driver_device.py +++ b/cyborg/objects/driver_objects/driver_device.py @@ -1,136 +1,152 @@ -# Copyright 2018 Lenovo (Beijing) Co.,LTD. -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -from oslo_versionedobjects import base as object_base -from cyborg.objects import base -from cyborg.objects import fields as object_fields -from cyborg.objects.driver_objects.driver_deployable import DriverDeployable -from cyborg.objects.driver_objects.driver_controlpath_id import \ - DriverControlPathID -from cyborg.objects.device import Device -from cyborg.objects.deployable import Deployable -from cyborg.objects.control_path import ControlpathID -from cyborg.objects.attach_handle import AttachHandle - - -@base.CyborgObjectRegistry.register -class DriverDevice(base.DriverObjectBase, - object_base.VersionedObjectDictCompat): - # Version 1.0: Initial version - VERSION = '1.0' - - fields = { - 'vendor': object_fields.StringField(nullable=False), - 'model': object_fields.StringField(nullable=False), - 'type': object_fields.DeviceTypeField(nullable=False), - 'std_board_info': object_fields.StringField(nullable=True), - # vendor board info should be a dict for driver-specific resource - # provider. - 'vendor_board_info': object_fields.StringField(nullable=True), - # hostname will be set by the agent, so driver don't need to report. - # Each controlpath_id corresponds to a different PF. For now - # we are sticking with a single cpid. - 'controlpath_id': object_fields.ObjectField('DriverControlPathID', - nullable=False), - 'deployable_list': object_fields.ListOfObjectsField('DriverDeployable', - default=[], - nullable=False), - 'stub': object_fields.BooleanField(nullable=False, default=False) - } - - def create(self, context, host): - """Create a driver-side Device Object into DB. This object will be - stored in many db tables: device, deployable, attach_handle, - controlpath_id etc. by calling related Object.""" - # first store in device table through Device Object. - - device_obj = Device(context=context, - type=self.type, - vendor=self.vendor, - model=self.model, - hostname=host - ) - if hasattr(self, 'std_board_info'): - device_obj.std_board_info = self.std_board_info - if hasattr(self, 'vendor_board_info'): - device_obj.vendor_board_info = self.vendor_board_info - device_obj.create(context) - - # for the controlpath_id, call driver_controlpath_id to create. - cpid_obj = self.controlpath_id.create(context, device_obj.id) - # for deployable_list, call internal layer object: driver_deployable - # to create. - for driver_deployable in self.deployable_list: - driver_deployable.create(context, device_obj.id, cpid_obj.id) - - def destroy(self, context, host): - """Delete a driver-side Device Object from db. This should - delete the internal layer objects.""" - # get dev_obj_list from hostname - device_obj = self.get_device_obj(context, host) - # delete deployable_list first. - for driver_deployable in self.deployable_list: - driver_deployable.destroy(context, device_obj.id) - if hasattr(self.controlpath_id, 'cpid_info'): - cpid_obj = ControlpathID.get_by_device_id_cpidinfo( - context, device_obj.id, self.controlpath_id.cpid_info) - # delete controlpath_id - cpid_obj.destroy(context) - # delete the device - device_obj.destroy(context) - - def get_device_obj(self, context, host): - """ - :param context: requested context. - :param host: hostname of the node. - :return: a device object of current driver device object. It will - return on value because it has controlpath_id. - """ - # get dev_obj_list from hostname - device_obj_list = Device.get_list_by_hostname(context, host) - # use controlpath_id.cpid_info to identiy one Device. - for device_obj in device_obj_list: - # get cpid_obj, could be empty or only one value. - cpid_obj = ControlpathID.get_by_device_id_cpidinfo( - context, device_obj.id, self.controlpath_id.cpid_info) - # find the one cpid_obj with cpid_info - if cpid_obj is not None: - return device_obj - - @classmethod - def list(cls, context, host): - """Form driver-side device object list from DB for one host. - A list may contains driver_device_object without controlpath_id.(In - the case some of controlpath_id can't store successfully but its - devices stores successfully. - )""" - # get dev_obj_list from hostname - dev_obj_list = Device.get_list_by_hostname(context, host) - driver_dev_obj_list = [] - for dev_obj in dev_obj_list: - cpid = DriverControlPathID.get(context, dev_obj.id) - # NOTE: will not return device without controlpath_id. - if cpid is not None: - driver_dev_obj = \ - cls(context=context, vendor=dev_obj.vendor, - model=dev_obj.model, type=dev_obj.type, - std_board_info=dev_obj.std_board_info, - vendor_board_info=dev_obj.vendor_board_info, - controlpath_id=cpid, - deployable_list=DriverDeployable.list(context, - dev_obj.id) - ) - driver_dev_obj_list.append(driver_dev_obj) - return driver_dev_obj_list +# Copyright 2018 Lenovo (Beijing) Co.,LTD. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_versionedobjects import base as object_base +from cyborg.objects import base +from cyborg.objects import fields as object_fields +from cyborg.objects.driver_objects.driver_deployable import DriverDeployable +from cyborg.objects.driver_objects.driver_controlpath_id import \ + DriverControlPathID +from cyborg.objects.device import Device +from cyborg.objects.deployable import Deployable +from cyborg.objects.control_path import ControlpathID +from cyborg.objects.attach_handle import AttachHandle + + +@base.CyborgObjectRegistry.register +class DriverDevice(base.DriverObjectBase, + object_base.VersionedObjectDictCompat): + # Version 1.0: Initial version + VERSION = '1.0' + + fields = { + 'vendor': object_fields.StringField(nullable=False), + 'model': object_fields.StringField(nullable=False), + 'type': object_fields.DeviceTypeField(nullable=False), + 'std_board_info': object_fields.StringField(nullable=True), + # vendor board info should be a dict for driver-specific resource + # provider. + 'vendor_board_info': object_fields.StringField(nullable=True), + # hostname will be set by the agent, so driver don't need to report. + # Each controlpath_id corresponds to a different PF. For now + # we are sticking with a single cpid. + 'controlpath_id': object_fields.ObjectField('DriverControlPathID', + nullable=False), + 'deployable_list': object_fields.ListOfObjectsField('DriverDeployable', + default=[], + nullable=False), + 'stub': object_fields.BooleanField(nullable=False, default=False) + } + + def create(self, context, host): + """Create a driver-side Device Object into DB. This object will be + stored in many db tables: device, deployable, attach_handle, + controlpath_id etc. by calling related Object.""" + # first store in device table through Device Object. + + device_obj = Device(context=context, + type=self.type, + vendor=self.vendor, + model=self.model, + hostname=host + ) + if hasattr(self, 'std_board_info'): + device_obj.std_board_info = self.std_board_info + if hasattr(self, 'vendor_board_info'): + device_obj.vendor_board_info = self.vendor_board_info + device_obj.create(context) + + # for the controlpath_id, call driver_controlpath_id to create. + cpid_obj = self.controlpath_id.create(context, device_obj.id) + # for deployable_list, call internal layer object: driver_deployable + # to create. + for driver_deployable in self.deployable_list: + driver_deployable.create(context, device_obj.id, cpid_obj.id) + + def destroy(self, context, host): + """Delete a driver-side Device Object from db. This should + delete the internal layer objects.""" + # get dev_obj_list from hostname + device_obj = self.get_device_obj(context, host) + # delete deployable_list first. + for driver_deployable in self.deployable_list: + driver_deployable.destroy(context, device_obj.id) + if hasattr(self.controlpath_id, 'cpid_info'): + cpid_obj = ControlpathID.get_by_device_id_cpidinfo( + context, device_obj.id, self.controlpath_id.cpid_info) + # delete controlpath_id + cpid_obj.destroy(context) + # delete the device + device_obj.destroy(context) + + def get_device_obj(self, context, host): + """ + :param context: requested context. + :param host: hostname of the node. + :return: a device object of current driver device object. It will + return on value because it has controlpath_id. + """ + # get dev_obj_list from hostname + device_obj_list = Device.get_list_by_hostname(context, host) + # use controlpath_id.cpid_info to identiy one Device. + for device_obj in device_obj_list: + # get cpid_obj, could be empty or only one value. + cpid_obj = ControlpathID.get_by_device_id_cpidinfo( + context, device_obj.id, self.controlpath_id.cpid_info) + # find the one cpid_obj with cpid_info + if cpid_obj is not None: + return device_obj + + @classmethod + def list(cls, context, host): + """Form driver-side device object list from DB for one host. + A list may contains driver_device_object without controlpath_id.(In + the case some of controlpath_id can't store successfully but its + devices stores successfully. + )""" + # get dev_obj_list from hostname + dev_obj_list = Device.get_list_by_hostname(context, host) + driver_dev_obj_list = [] + for dev_obj in dev_obj_list: + cpid = DriverControlPathID.get(context, dev_obj.id) + # NOTE: will not return device without controlpath_id. + if cpid is not None: + driver_dev_obj = \ + cls(context=context, vendor=dev_obj.vendor, + model=dev_obj.model, type=dev_obj.type, + std_board_info=dev_obj.std_board_info, + vendor_board_info=dev_obj.vendor_board_info, + controlpath_id=cpid, + deployable_list=DriverDeployable.list(context, + dev_obj.id) + ) + driver_dev_obj_list.append(driver_dev_obj) + return driver_dev_obj_list + + def get_device_obj_by_device_id(self, context, device_id): + """ + :param context: requested context. + :param host: hostname of the node. + :return: a device object of current driver device object. It will + return on value because it has controlpath_id. + """ + # get dev_obj_list from hostname + device_obj = Device.get_by_device_id(context, device_id) + # use controlpath_id.cpid_info to identiy one Device. + # get cpid_obj, could be empty or only one value. + cpid_obj = ControlpathID.get_by_device_id_cpidinfo( + context, device_obj.id, self.controlpath_id.cpid_info) + # find the one cpid_obj with cpid_info + return device_obj diff --git a/cyborg/tests/unit/accelerator/drivers/fpga/intel/prepare_test_data.py b/cyborg/tests/unit/accelerator/drivers/fpga/intel/prepare_test_data.py index 08b082ef..93b3a2c6 100755 --- a/cyborg/tests/unit/accelerator/drivers/fpga/intel/prepare_test_data.py +++ b/cyborg/tests/unit/accelerator/drivers/fpga/intel/prepare_test_data.py @@ -42,7 +42,7 @@ PGFA_DEVICE_COMMON_CONTENT = { "config": "", "consistent_dma_mask_bits": "64", "d3cold_allowed": "1", - "device": "0xbcc0", + "device": "0x09c4", "dma_mask_bits": "64", "driver_override": "(null)", "enable": "1", @@ -117,7 +117,7 @@ PGFA_DEVICES_SPECIAL_COMMON_CONTENT = { }, "dev.2": { "d3cold_allowed": "0", - "device": "0xbcc1", + "device": "0x09c4", "modalias": "pci:v00008086d0000BCC0sv00000000sd00000000bc12sc00i00", "irq": "0", "resource": [ diff --git a/test-requirements.txt b/test-requirements.txt index 84f5ac28..e200e586 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -16,3 +16,4 @@ testtools>=0.9.32 # MIT tempest>=17.1.0 # Apache-2.0 doc8>=0.6.0 # Apache-2.0 Pygments>=2.2.0 # BSD license +os-resource-classes>=0.5.0 # Apache-2.0