From 39e518456c0fcde8a2111d583e52c5dfe6fa515d Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Tue, 19 Apr 2022 16:29:21 +0200 Subject: [PATCH] Fix reported storage_protocol Cinder has multiple drivers reporting the same storage_protocol in their stats but with different strings: For example we have the following variants: - nfs and NFS - nveof, NVMe-oF, NVMeOF - iSCSI, iscsi - FC, fc, fibre_channel This patch documents the right values (to use existing constants) and makes the scheduler treat all variants as if they were equal. This is done by changing the storage_protocol into a list in the scheduler upon reception of the stats from the volume. Most of the code in the scheduler is already able to handle this, the only changes necessary are on the filter and goodness function code, which will now run the respective functions for all the different protocol alternatives and select the right one, but only when the function actually uses the storage_protocol. The API will now report the preferred protocol name in operations like "cinder get-pools --detail". Closes-Bug: #1966103 Change-Id: I07d74078dbb102490dd722029e32c74cec3aa44c --- cinder/api/views/capabilities.py | 9 ++++- cinder/api/views/scheduler_stats.py | 12 ++++++- cinder/interface/volume_driver.py | 12 ++++++- cinder/scheduler/filters/driver_filter.py | 34 +++++++++++++++--- cinder/scheduler/host_manager.py | 27 ++++++++++++-- cinder/scheduler/weights/goodness.py | 35 ++++++++++++++++--- .../tests/unit/scheduler/test_host_manager.py | 3 +- doc/source/contributor/drivers.rst | 18 ++++++++++ ...fix-storage_protocol-6baf55e13249463c.yaml | 19 ++++++++++ 9 files changed, 152 insertions(+), 17 deletions(-) create mode 100644 releasenotes/notes/fix-storage_protocol-6baf55e13249463c.yaml diff --git a/cinder/api/views/capabilities.py b/cinder/api/views/capabilities.py index a95ab0e1454..71dc4ad79ef 100644 --- a/cinder/api/views/capabilities.py +++ b/cinder/api/views/capabilities.py @@ -27,13 +27,20 @@ class ViewBuilder(common.ViewBuilder): def summary(self, request, capabilities, id): """Summary view of a backend capabilities.""" + # Internally storage_protocol can be a list with all the variants (eg. + # FC, fibre_channel), but we return a single value to users. The first + # value is the preferred variant. + storage_protocol = capabilities.get('storage_protocol') + if isinstance(storage_protocol, list): + storage_protocol = storage_protocol[0] + return { 'namespace': 'OS::Storage::Capabilities::%s' % id, 'vendor_name': capabilities.get('vendor_name'), 'volume_backend_name': capabilities.get('volume_backend_name'), 'pool_name': capabilities.get('pool_name'), 'driver_version': capabilities.get('driver_version'), - 'storage_protocol': capabilities.get('storage_protocol'), + 'storage_protocol': storage_protocol, 'display_name': capabilities.get('display_name'), 'description': capabilities.get('description'), 'visibility': capabilities.get('visibility'), diff --git a/cinder/api/views/scheduler_stats.py b/cinder/api/views/scheduler_stats.py index 0a01a59a23b..1f74b53219c 100644 --- a/cinder/api/views/scheduler_stats.py +++ b/cinder/api/views/scheduler_stats.py @@ -35,10 +35,20 @@ class ViewBuilder(common.ViewBuilder): def detail(self, request, pool): """Detailed view of a single pool.""" + # Internally storage_protocol can be a list with all the variants (eg. + # FC, fibre_channel), but we return a single value to users. The first + # value is the preferred variant. + capabilities = pool.get('capabilities') + if capabilities: + protocol = capabilities.get('storage_protocol') + if isinstance(protocol, list): + capabilities = capabilities.copy() + capabilities['storage_protocol'] = protocol[0] + return { 'pool': { 'name': pool.get('name'), - 'capabilities': pool.get('capabilities'), + 'capabilities': capabilities, } } diff --git a/cinder/interface/volume_driver.py b/cinder/interface/volume_driver.py index d8c4d1759e5..aa44baadadb 100644 --- a/cinder/interface/volume_driver.py +++ b/cinder/interface/volume_driver.py @@ -78,7 +78,17 @@ class VolumeDriverCore(base.CinderInterface): it tends to follow typical major.minor.revision ideas. * storage_protocol The protocol used to connect to the storage, this should be a short - string such as: "iSCSI", "FC", "nfs", "ceph", etc. + string such as: "iSCSI", "FC", "NFS", "ceph", etc. + Available protocols are present in cinder.common.constants and they + must be used instead of string literals. + Variant values only exist for older drivers that were already + reporting those values. New drivers must use non variant versions. + In some cases this may be the same value as the driver_volume_type + returned by the initialize_connection method, but they are not the + same thing, since this one is meant to be used by the scheduler, + while the latter is the os-brick connector identifier used in the + factory method. + * total_capacity_gb The total capacity in gigabytes (GiB) of the storage backend being used to store Cinder volumes. Use keyword 'unknown' if the backend diff --git a/cinder/scheduler/filters/driver_filter.py b/cinder/scheduler/filters/driver_filter.py index 397215d7056..edfeb9f2d1b 100644 --- a/cinder/scheduler/filters/driver_filter.py +++ b/cinder/scheduler/filters/driver_filter.py @@ -34,11 +34,14 @@ class DriverFilter(filters.BaseBackendFilter): stats = self._generate_stats(backend_state, filter_properties) LOG.debug("Checking backend '%s'", - stats['backend_stats']['backend_id']) - result = self._check_filter_function(stats) + stats[0]['backend_stats']['backend_id']) + # Run the filter function for all possible storage_protocol values + # (e.g. FC, fibre_channel) and if any of them passes the filter, then + # the backend passes. + result = any(self._check_filter_function(stat) for stat in stats) LOG.debug("Result: %s", result) LOG.debug("Done checking backend '%s'", - stats['backend_stats']['backend_id']) + stats[0]['backend_stats']['backend_id']) return result @@ -95,7 +98,12 @@ class DriverFilter(filters.BaseBackendFilter): return result def _generate_stats(self, backend_state, filter_properties): - """Generates statistics from backend and volume data.""" + """Generates statistics from backend and volume data. + + Returns a list where each entry corresponds to a different + storage_protocol value for those backends that use a storage protocol + that has variants, but only if the function actually uses the protocol. + """ backend_stats = { 'host': backend_state.host, @@ -116,10 +124,12 @@ class DriverFilter(filters.BaseBackendFilter): backend_caps = backend_state.capabilities filter_function = None + uses_protocol = False if ('filter_function' in backend_caps and backend_caps['filter_function'] is not None): filter_function = str(backend_caps['filter_function']) + uses_protocol = 'storage_protocol' in filter_function qos_specs = filter_properties.get('qos_specs', {}) @@ -139,4 +149,18 @@ class DriverFilter(filters.BaseBackendFilter): 'filter_function': filter_function, } - return stats + # Only create individual entries for the different protocols variants + # if the function uses the protocol and there are variants. + if uses_protocol and isinstance(backend_state.storage_protocol, list): + result = [] + for protocol in backend_state.storage_protocol: + new_stats = stats.copy() + new_stats['backend_stats'] = dict(new_stats['backend_stats']) + new_stats['backend_stats']['storage_protocol'] = protocol + new_stats['backend_caps'] = dict(new_stats['backend_caps']) + new_stats['backend_caps']['storage_protocol'] = protocol + result.append(new_stats) + + else: + result = [stats] + return result diff --git a/cinder/scheduler/host_manager.py b/cinder/scheduler/host_manager.py index 94279d29f45..6d8859866d9 100644 --- a/cinder/scheduler/host_manager.py +++ b/cinder/scheduler/host_manager.py @@ -294,8 +294,13 @@ class BackendState(object): if not pool_cap.get('volume_backend_name', None): pool_cap['volume_backend_name'] = self.volume_backend_name - if not pool_cap.get('storage_protocol', None): - pool_cap['storage_protocol'] = self.storage_protocol + protocol = pool_cap.get('storage_protocol', None) + if protocol: + # Protocols that have variants are replaced with ALL the variants + storage_protocol = self.get_storage_protocol_variants(protocol) + else: # Backend protocol has already been transformed with variants + storage_protocol = self.storage_protocol + pool_cap['storage_protocol'] = storage_protocol if not pool_cap.get('vendor_name', None): pool_cap['vendor_name'] = self.vendor_name @@ -320,7 +325,11 @@ class BackendState(object): self.volume_backend_name = capability.get('volume_backend_name', None) self.vendor_name = capability.get('vendor_name', None) self.driver_version = capability.get('driver_version', None) - self.storage_protocol = capability.get('storage_protocol', None) + self.driver_version = capability.get('driver_version', None) + + # Protocols that have variants are replaced with ALL the variants + protocol = capability.get('storage_protocol', None) + self.storage_protocol = self.get_storage_protocol_variants(protocol) self.updated = capability['timestamp'] def consume_from_volume(self, @@ -370,6 +379,18 @@ class BackendState(object): 'thick': self.thick_provisioning_support, 'pools': self.pools, 'updated': self.updated}) + @staticmethod + def get_storage_protocol_variants(storage_protocol): + if storage_protocol in constants.ISCSI_VARIANTS: + return constants.ISCSI_VARIANTS + if storage_protocol in constants.FC_VARIANTS: + return constants.FC_VARIANTS + if storage_protocol in constants.NFS_VARIANTS: + return constants.NFS_VARIANTS + if storage_protocol in constants.NVMEOF_VARIANTS: + return constants.NVMEOF_VARIANTS + return storage_protocol + class PoolState(BackendState): def __init__(self, diff --git a/cinder/scheduler/weights/goodness.py b/cinder/scheduler/weights/goodness.py index 31d7ee6f2ef..a0618645ba4 100644 --- a/cinder/scheduler/weights/goodness.py +++ b/cinder/scheduler/weights/goodness.py @@ -41,10 +41,14 @@ class GoodnessWeigher(weights.BaseHostWeigher): def _weigh_object(self, host_state, weight_properties): """Determine host's goodness rating based on a goodness_function.""" stats = self._generate_stats(host_state, weight_properties) - LOG.debug("Checking host '%s'", stats['host_stats']['host']) - result = self._check_goodness_function(stats) + LOG.debug("Checking host '%s'", stats[0]['host_stats']['host']) + # Run the goodness function for all possible storage_protocol values + # (e.g. FC, fibre_channel) and use the maximum value, as the function + # may look for an exact match on a protocol and the backend may be + # returning a variant. + result = max(self._check_goodness_function(stat) for stat in stats) LOG.debug("Goodness weight for %(host)s: %(res)s", - {'res': result, 'host': stats['host_stats']['host']}) + {'res': result, 'host': stats[0]['host_stats']['host']}) return result @@ -101,8 +105,12 @@ class GoodnessWeigher(weights.BaseHostWeigher): return result def _generate_stats(self, host_state, weight_properties): - """Generates statistics from host and volume data.""" + """Generates statistics from host and volume data. + Returns a list where each entry corresponds to a different + storage_protocol value for those backends that use a storage protocol + that has variants, but only if the function actually uses the protocol. + """ host_stats = { 'host': host_state.host, 'volume_backend_name': host_state.volume_backend_name, @@ -120,10 +128,12 @@ class GoodnessWeigher(weights.BaseHostWeigher): host_caps = host_state.capabilities goodness_function = None + uses_protocol = False if ('goodness_function' in host_caps and host_caps['goodness_function'] is not None): goodness_function = str(host_caps['goodness_function']) + uses_protocol = 'storage_protocol' in goodness_function qos_specs = weight_properties.get('qos_specs', {}) or {} @@ -143,4 +153,19 @@ class GoodnessWeigher(weights.BaseHostWeigher): 'goodness_function': goodness_function, } - return stats + # Only create individual entries for the different protocols variants + # if the function uses the protocol and there are variants. + if uses_protocol and isinstance(host_state.storage_protocol, list): + result = [] + for protocol in host_state.storage_protocol: + new_stats = stats.copy() + new_stats['host_stats'] = dict(new_stats['host_stats']) + new_stats['host_stats']['storage_protocol'] = protocol + new_stats['host_caps'] = dict(new_stats['host_caps']) + new_stats['host_caps']['storage_protocol'] = protocol + result.append(new_stats) + + else: + result = [stats] + + return result diff --git a/cinder/tests/unit/scheduler/test_host_manager.py b/cinder/tests/unit/scheduler/test_host_manager.py index 5f4980be318..c61346a8e72 100644 --- a/cinder/tests/unit/scheduler/test_host_manager.py +++ b/cinder/tests/unit/scheduler/test_host_manager.py @@ -1346,7 +1346,8 @@ class BackendStateTestCase(test.TestCase): fake_backend.update_from_volume_capability(capability) self.assertEqual('Local iSCSI', fake_backend.volume_backend_name) - self.assertEqual('iSCSI', fake_backend.storage_protocol) + # Storage protocol is changed to include its variants + self.assertEqual(['iSCSI', 'iscsi'], fake_backend.storage_protocol) self.assertEqual('OpenStack', fake_backend.vendor_name) self.assertEqual('1.0.1', fake_backend.driver_version) diff --git a/doc/source/contributor/drivers.rst b/doc/source/contributor/drivers.rst index 9f6ed2ef523..ea96c2b1579 100644 --- a/doc/source/contributor/drivers.rst +++ b/doc/source/contributor/drivers.rst @@ -113,6 +113,24 @@ extending. If it doesn't, it should report 'online_extend_support=False'. Otherwise the scheduler will attempt to perform the operation, and may leave the volume in 'error_extending' state. +Value of ``storage_protocol`` is a single string representing the transport +protocol used by the storage. Existing protocols are present in +``cinder.common.constants`` and should be used by drivers instead of string +literals. + +Variant values only exist for older drivers that were already reporting those +values. New drivers must use non variant versions. + +The ``storage_protocol`` can be used by operators using the +``cinder get-pools --detail`` command, by volume types in their extra specs, +and by the filter and goodness functions. + +We must not mistake the value of the ``storage_protocol`` with the identifier +of the os-brick connector, which is returned by the ``initialize_connection`` +driver method in the ``driver_volume_type`` dictionary key. In some cases they +may have the same value, but they are different things. + + Feature Enforcement ------------------- diff --git a/releasenotes/notes/fix-storage_protocol-6baf55e13249463c.yaml b/releasenotes/notes/fix-storage_protocol-6baf55e13249463c.yaml new file mode 100644 index 00000000000..359fa118428 --- /dev/null +++ b/releasenotes/notes/fix-storage_protocol-6baf55e13249463c.yaml @@ -0,0 +1,19 @@ +--- +upgrade: + - | + The ``storage_protocol`` treats all variants of the protocol name as the + same regarding matches, so for example using FC, fc, or fibre_channel will + be treated equally in the scheduler, be it when filtering using the volume + type's extra specs or when using filter and goodness functions. + + The storage protocol reporting via the REST API will be now the same for + them all, using the preferred naming, FC, NVMe-oF, iSCSI, NFS... + + If your deployment uses ``storage_protocol`` to differentiate between + backends that use the same protocol but report it using different variants, + be aware that they will no longer be differentiated. +fixes: + - | + `Bug #1966103 `_: Fixed + inconsistent behavior of ``storage_protocol`` among different backends that + report variants of the protocol name, such as FC, fc, fibre_channel.