From b4a13281ea26a71fcd982c0a873c635836c178f5 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Mon, 3 Oct 2016 21:27:53 +0200 Subject: [PATCH] Make Replication support Active-Active This patch adds new methods to our failover mechanism to allow failover to work when a backend is clustered. Adds REST API microversion 3.26 that adds a new `failover` method equivalent to `failover_host` but accepting `cluster` field as well as the `host` field. Thaw and Freeze are updated to update cluster and all services within the cluster. Now cluster listings accepts new filtering fields `replication_status`, `frozen`, and `active_backend_id`. Summary listings return `replication_status` field and detailed listings also return `frozen` and `active_backend_id`. Specs: https://review.openstack.org/401392 APIImpact: New service failover action and new fields in cluster listings. Implements: blueprint cinder-volume-active-active-support Change-Id: Id3291b28242d5814c259283fa629b48f22e70260 --- cinder/api/common.py | 19 +- cinder/api/contrib/services.py | 43 +-- cinder/api/openstack/api_version_request.py | 4 +- .../openstack/rest_api_version_history.rst | 12 + cinder/api/v3/clusters.py | 21 +- cinder/api/v3/views/clusters.py | 21 +- .../088_add_replication_info_to_cluster.py | 33 ++ cinder/db/sqlalchemy/models.py | 4 + cinder/exception.py | 13 +- cinder/objects/base.py | 1 + cinder/objects/cluster.py | 20 +- cinder/service.py | 45 ++- .../tests/unit/api/contrib/test_services.py | 39 +- cinder/tests/unit/api/v3/test_cluster.py | 72 +++- cinder/tests/unit/objects/test_cluster.py | 15 + cinder/tests/unit/objects/test_objects.py | 2 +- cinder/tests/unit/test_migrations.py | 10 + cinder/tests/unit/test_service.py | 107 +++++- cinder/tests/unit/test_volume.py | 346 +++++++++++++++--- cinder/tests/unit/test_volume_rpcapi.py | 46 ++- .../drivers/nexenta/test_nexenta_edge.py | 1 + .../drivers/nexenta/test_nexenta_edge_nbd.py | 1 + cinder/tests/unit/volume/test_driver.py | 120 ++++++ .../unit/volume/test_replication_manager.py | 5 +- cinder/volume/api.py | 160 +++++--- cinder/volume/driver.py | 46 +++ cinder/volume/manager.py | 127 ++++--- cinder/volume/rpcapi.py | 31 +- 28 files changed, 1140 insertions(+), 224 deletions(-) create mode 100644 cinder/db/sqlalchemy/migrate_repo/versions/088_add_replication_info_to_cluster.py create mode 100644 cinder/tests/unit/volume/test_driver.py diff --git a/cinder/api/common.py b/cinder/api/common.py index 09e2e8ed2f3..307ebdda4df 100644 --- a/cinder/api/common.py +++ b/cinder/api/common.py @@ -372,8 +372,23 @@ class ViewBuilder(object): return urllib.parse.urlunsplit(url_parts).rstrip('/') -def get_cluster_host(req, params, cluster_version): - if req.api_version_request.matches(cluster_version): +def get_cluster_host(req, params, cluster_version=None): + """Get cluster and host from the parameters. + + This method checks the presence of cluster and host parameters and returns + them depending on the cluster_version. + + If cluster_version is False we will never return the cluster_name and we + will require the presence of the host parameter. + + If cluster_version is None we will always check for the presence of the + cluster parameter, and if cluster_version is a string with a version we + will only check for the presence of the parameter if the version of the + request is not less than it. In both cases we will require one and only + one parameter, host or cluster. + """ + if (cluster_version is not False and + req.api_version_request.matches(cluster_version)): cluster_name = params.get('cluster') msg = _('One and only one of cluster and host must be set.') else: diff --git a/cinder/api/contrib/services.py b/cinder/api/contrib/services.py index c556a930c2e..fbe13d822ee 100644 --- a/cinder/api/contrib/services.py +++ b/cinder/api/contrib/services.py @@ -20,6 +20,7 @@ from oslo_log import versionutils from oslo_utils import timeutils import webob.exc +from cinder.api import common from cinder.api import extensions from cinder.api.openstack import wsgi from cinder import exception @@ -110,20 +111,23 @@ class ServiceController(wsgi.Controller): return True - def _freeze(self, context, host): - return self.volume_api.freeze_host(context, host) + def _freeze(self, context, req, body): + cluster_name, host = common.get_cluster_host(req, body, '3.26') + return self.volume_api.freeze_host(context, host, cluster_name) - def _thaw(self, context, host): - return self.volume_api.thaw_host(context, host) + def _thaw(self, context, req, body): + cluster_name, host = common.get_cluster_host(req, body, '3.26') + return self.volume_api.thaw_host(context, host, cluster_name) - def _failover(self, context, host, backend_id=None): - return self.volume_api.failover_host(context, host, backend_id) - - def _get_host(self, body): - try: - return body['host'] - except (TypeError, KeyError): - raise exception.MissingRequired(element='host') + def _failover(self, context, req, body, clustered): + # We set version to None to always get the cluster name from the body, + # to False when we don't want to get it, and '3.26' when we only want + # it if the requested version is 3.26 or higher. + version = '3.26' if clustered else False + cluster_name, host = common.get_cluster_host(req, body, version) + self.volume_api.failover(context, host, cluster_name, + body.get('backend_id')) + return webob.Response(status_int=202) def update(self, req, id, body): """Enable/Disable scheduling for a service. @@ -148,20 +152,17 @@ class ServiceController(wsgi.Controller): disabled = True status = "disabled" elif id == "freeze": - return self._freeze(context, self._get_host(body)) + return self._freeze(context, req, body) elif id == "thaw": - return self._thaw(context, self._get_host(body)) + return self._thaw(context, req, body) elif id == "failover_host": - self._failover( - context, - self._get_host(body), - body.get('backend_id', None) - ) - return webob.Response(status_int=202) + return self._failover(context, req, body, False) + elif req.api_version_request.matches('3.26') and id == 'failover': + return self._failover(context, req, body, True) else: raise exception.InvalidInput(reason=_("Unknown action")) - host = self._get_host(body) + host = common.get_cluster_host(req, body, False)[1] ret_val['disabled'] = disabled if id == "disable-log-reason" and ext_loaded: diff --git a/cinder/api/openstack/api_version_request.py b/cinder/api/openstack/api_version_request.py index 667a7d3dc48..164a6625a41 100644 --- a/cinder/api/openstack/api_version_request.py +++ b/cinder/api/openstack/api_version_request.py @@ -75,6 +75,8 @@ REST_API_VERSION_HISTORY = """ * 3.23 - Allow passing force parameter to volume delete. * 3.24 - Add workers/cleanup endpoint. * 3.25 - Add ``volumes`` field to group list/detail and group show. + * 3.26 - Add failover action and cluster listings accept new filters and + return new data. """ # The minimum and maximum versions of the API supported @@ -82,7 +84,7 @@ REST_API_VERSION_HISTORY = """ # minimum version of the API supported. # Explicitly using /v1 or /v2 enpoints will still work _MIN_API_VERSION = "3.0" -_MAX_API_VERSION = "3.25" +_MAX_API_VERSION = "3.26" _LEGACY_API_VERSION1 = "1.0" _LEGACY_API_VERSION2 = "2.0" diff --git a/cinder/api/openstack/rest_api_version_history.rst b/cinder/api/openstack/rest_api_version_history.rst index f0380aab9d1..b343cdb0fb1 100644 --- a/cinder/api/openstack/rest_api_version_history.rst +++ b/cinder/api/openstack/rest_api_version_history.rst @@ -268,3 +268,15 @@ user documentation. 3.25 ---- Add ``volumes`` field to group list/detail and group show. + +3.26 +---- + - New ``failover`` action equivalent to ``failover_host``, but accepting + ``cluster`` parameter as well as the ``host`` cluster that + ``failover_host`` accepts. + + - ``freeze`` and ``thaw`` actions accept ``cluster`` parameter. + + - Cluster listing accepts ``replication_status``, ``frozen`` and + ``active_backend_id`` as filters, and returns additional fields for each + cluster: ``replication_status``, ``frozen``, ``active_backend_id``. diff --git a/cinder/api/v3/clusters.py b/cinder/api/v3/clusters.py index 0048cf25a0c..120f1e6a106 100644 --- a/cinder/api/v3/clusters.py +++ b/cinder/api/v3/clusters.py @@ -22,11 +22,14 @@ from cinder import utils CLUSTER_MICRO_VERSION = '3.7' +REPLICATION_DATA_MICRO_VERSION = '3.26' class ClusterController(wsgi.Controller): allowed_list_keys = {'name', 'binary', 'is_up', 'disabled', 'num_hosts', - 'num_down_hosts', 'binary'} + 'num_down_hosts', 'binary', 'replication_status', + 'frozen', 'active_backend_id'} + replication_fields = {'replication_status', 'frozen', 'active_backend_id'} policy_checker = wsgi.Controller.get_policy_checker('clusters') @@ -38,7 +41,9 @@ class ClusterController(wsgi.Controller): # Let the wsgi middleware convert NotFound exceptions cluster = objects.Cluster.get_by_id(context, None, binary=binary, name=id, services_summary=True) - return clusters_view.ViewBuilder.detail(cluster) + replication_data = req.api_version_request.matches( + REPLICATION_DATA_MICRO_VERSION) + return clusters_view.ViewBuilder.detail(cluster, replication_data) @wsgi.Controller.api_version(CLUSTER_MICRO_VERSION) def index(self, req): @@ -59,9 +64,12 @@ class ClusterController(wsgi.Controller): def _get_clusters(self, req, detail): # Let the wsgi middleware convert NotAuthorized exceptions context = self.policy_checker(req, 'get_all') - + replication_data = req.api_version_request.matches( + REPLICATION_DATA_MICRO_VERSION) filters = dict(req.GET) allowed = self.allowed_list_keys + if not replication_data: + allowed = allowed.difference(self.replication_fields) # Check filters are valid if not allowed.issuperset(filters): @@ -78,7 +86,8 @@ class ClusterController(wsgi.Controller): filters['services_summary'] = detail clusters = objects.ClusterList.get_all(context, **filters) - return clusters_view.ViewBuilder.list(clusters, detail) + return clusters_view.ViewBuilder.list(clusters, detail, + replication_data) @wsgi.Controller.api_version(CLUSTER_MICRO_VERSION) def update(self, req, id, body): @@ -113,7 +122,9 @@ class ClusterController(wsgi.Controller): cluster.save() # We return summary data plus the disabled reason - ret_val = clusters_view.ViewBuilder.summary(cluster) + replication_data = req.api_version_request.matches( + REPLICATION_DATA_MICRO_VERSION) + ret_val = clusters_view.ViewBuilder.summary(cluster, replication_data) ret_val['cluster']['disabled_reason'] = disabled_reason return ret_val diff --git a/cinder/api/v3/views/clusters.py b/cinder/api/v3/views/clusters.py index 2391cc176ed..3cabf61cf4a 100644 --- a/cinder/api/v3/views/clusters.py +++ b/cinder/api/v3/views/clusters.py @@ -28,7 +28,7 @@ class ViewBuilder(object): return '' @classmethod - def detail(cls, cluster, flat=False): + def detail(cls, cluster, replication_data=False, flat=False): """Detailed view of a cluster.""" result = cls.summary(cluster, flat=True) result.update( @@ -37,27 +37,36 @@ class ViewBuilder(object): last_heartbeat=cls._normalize(cluster.last_heartbeat), created_at=cls._normalize(cluster.created_at), updated_at=cls._normalize(cluster.updated_at), - disabled_reason=cluster.disabled_reason + disabled_reason=cluster.disabled_reason, + replication_status=cluster.replication_status, + frozen=cluster.frozen, + active_backend_id=cluster.active_backend_id, ) - + if not replication_data: + for field in ('replication_status', 'frozen', 'active_backend_id'): + del result[field] if flat: return result return {'cluster': result} @staticmethod - def summary(cluster, flat=False): + def summary(cluster, replication_data=False, flat=False): """Generic, non-detailed view of a cluster.""" result = { 'name': cluster.name, 'binary': cluster.binary, 'state': 'up' if cluster.is_up else 'down', 'status': 'disabled' if cluster.disabled else 'enabled', + 'replication_status': cluster.replication_status, } + if not replication_data: + del result['replication_status'] if flat: return result return {'cluster': result} @classmethod - def list(cls, clusters, detail=False): + def list(cls, clusters, detail=False, replication_data=False): func = cls.detail if detail else cls.summary - return {'clusters': [func(n, flat=True) for n in clusters]} + return {'clusters': [func(n, replication_data, flat=True) + for n in clusters]} diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/088_add_replication_info_to_cluster.py b/cinder/db/sqlalchemy/migrate_repo/versions/088_add_replication_info_to_cluster.py new file mode 100644 index 00000000000..0e8ea25aa20 --- /dev/null +++ b/cinder/db/sqlalchemy/migrate_repo/versions/088_add_replication_info_to_cluster.py @@ -0,0 +1,33 @@ +# Copyright (c) 2016 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sqlalchemy import Boolean, Column, MetaData, String, Table, text + + +def upgrade(migrate_engine): + """Add replication info to clusters table.""" + meta = MetaData() + meta.bind = migrate_engine + + clusters = Table('clusters', meta, autoload=True) + replication_status = Column('replication_status', String(length=36), + default="not-capable") + active_backend_id = Column('active_backend_id', String(length=255)) + frozen = Column('frozen', Boolean, nullable=False, default=False, + server_default=text('false')) + + clusters.create_column(replication_status) + clusters.create_column(frozen) + clusters.create_column(active_backend_id) diff --git a/cinder/db/sqlalchemy/models.py b/cinder/db/sqlalchemy/models.py index cbed9e183b1..a9556e109c7 100644 --- a/cinder/db/sqlalchemy/models.py +++ b/cinder/db/sqlalchemy/models.py @@ -122,6 +122,10 @@ class Cluster(BASE, CinderBase): disabled_reason = Column(String(255)) race_preventer = Column(Integer, nullable=False, default=0) + replication_status = Column(String(36), default="not-capable") + active_backend_id = Column(String(255)) + frozen = Column(Boolean, nullable=False, default=False) + # Last heartbeat reported by any of the services of this cluster. This is # not deferred since we always want to load this field. last_heartbeat = column_property( diff --git a/cinder/exception.py b/cinder/exception.py index 59656b96bcf..7a16a0980a3 100644 --- a/cinder/exception.py +++ b/cinder/exception.py @@ -422,12 +422,13 @@ class ImageNotFound(NotFound): class ServiceNotFound(NotFound): def __init__(self, message=None, **kwargs): - if kwargs.get('host', None): - self.message = _("Service %(service_id)s could not be " - "found on host %(host)s.") - else: - self.message = _("Service %(service_id)s could not be found.") - super(ServiceNotFound, self).__init__(None, **kwargs) + if not message: + if kwargs.get('host', None): + self.message = _("Service %(service_id)s could not be " + "found on host %(host)s.") + else: + self.message = _("Service %(service_id)s could not be found.") + super(ServiceNotFound, self).__init__(message, **kwargs) class ServiceTooOld(Invalid): diff --git a/cinder/objects/base.py b/cinder/objects/base.py index 478b4434a95..1c22469cc49 100644 --- a/cinder/objects/base.py +++ b/cinder/objects/base.py @@ -124,6 +124,7 @@ OBJ_VERSIONS.add('1.16', {'BackupDeviceInfo': '1.0'}) OBJ_VERSIONS.add('1.17', {'VolumeAttachment': '1.1'}) OBJ_VERSIONS.add('1.18', {'Snapshot': '1.3'}) OBJ_VERSIONS.add('1.19', {'ConsistencyGroup': '1.4', 'CGSnapshot': '1.1'}) +OBJ_VERSIONS.add('1.20', {'Cluster': '1.1'}) class CinderObjectRegistry(base.VersionedObjectRegistry): diff --git a/cinder/objects/cluster.py b/cinder/objects/cluster.py index 6b5842a982f..c72598efd9d 100644 --- a/cinder/objects/cluster.py +++ b/cinder/objects/cluster.py @@ -20,6 +20,7 @@ from cinder import exception from cinder.i18n import _ from cinder import objects from cinder.objects import base +from cinder.objects import fields as c_fields from cinder import utils @@ -37,7 +38,8 @@ class Cluster(base.CinderPersistentObject, base.CinderObject, - Any other cluster field will be used as a filter. """ # Version 1.0: Initial version - VERSION = '1.0' + # Version 1.1: Add replication fields + VERSION = '1.1' OPTIONAL_FIELDS = ('num_hosts', 'num_down_hosts', 'services') # NOTE(geguileo): We don't want to expose race_preventer field at the OVO @@ -54,8 +56,24 @@ class Cluster(base.CinderPersistentObject, base.CinderObject, 'last_heartbeat': fields.DateTimeField(nullable=True, read_only=True), 'services': fields.ObjectField('ServiceList', nullable=True, read_only=True), + # Replication properties + 'replication_status': c_fields.ReplicationStatusField(nullable=True), + 'frozen': fields.BooleanField(default=False), + 'active_backend_id': fields.StringField(nullable=True), } + def obj_make_compatible(self, primitive, target_version): + """Make a cluster representation compatible with a target version.""" + # Convert all related objects + super(Cluster, self).obj_make_compatible(primitive, target_version) + + # Before v1.1 we didn't have relication fields so we have to remove + # them. + if target_version == '1.0': + for obj_field in ('replication_status', 'frozen', + 'active_backend_id'): + primitive.pop(obj_field, None) + @classmethod def _get_expected_attrs(cls, context, *args, **kwargs): """Return expected attributes when getting a cluster. diff --git a/cinder/service.py b/cinder/service.py index e2bc28d1c73..39f061a9227 100644 --- a/cinder/service.py +++ b/cinder/service.py @@ -44,6 +44,7 @@ from cinder import exception from cinder.i18n import _, _LE, _LI, _LW from cinder import objects from cinder.objects import base as objects_base +from cinder.objects import fields from cinder import rpc from cinder.scheduler import rpcapi as scheduler_rpcapi from cinder import version @@ -187,7 +188,7 @@ class Service(service.Service): if self.added_to_cluster: # We pass copy service's disable status in the cluster if we # have to create it. - self._ensure_cluster_exists(ctxt, service_ref.disabled) + self._ensure_cluster_exists(ctxt, service_ref) service_ref.cluster_name = cluster service_ref.save() Service.service_id = service_ref.id @@ -272,7 +273,9 @@ class Service(service.Service): '%(version)s)'), {'topic': self.topic, 'version': version_string, 'cluster': self.cluster}) - target = messaging.Target(topic=self.topic, server=self.cluster) + target = messaging.Target( + topic='%s.%s' % (self.topic, self.cluster), + server=vol_utils.extract_host(self.cluster, 'host')) serializer = objects_base.CinderObjectSerializer(obj_version_cap) self.cluster_rpcserver = rpc.get_server(target, endpoints, serializer) @@ -316,17 +319,34 @@ class Service(service.Service): 'new_down_time': new_down_time}) CONF.set_override('service_down_time', new_down_time) - def _ensure_cluster_exists(self, context, disabled=None): + def _ensure_cluster_exists(self, context, service): if self.cluster: try: - objects.Cluster.get_by_id(context, None, name=self.cluster, - binary=self.binary) + cluster = objects.Cluster.get_by_id(context, None, + name=self.cluster, + binary=self.binary) + # If the cluster already exists, then the service replication + # fields must match those of the cluster unless the service + # is in error status. + error_states = (fields.ReplicationStatus.ERROR, + fields.ReplicationStatus.FAILOVER_ERROR) + if service.replication_status not in error_states: + for attr in ('replication_status', 'active_backend_id', + 'frozen'): + if getattr(service, attr) != getattr(cluster, attr): + setattr(service, attr, getattr(cluster, attr)) + except exception.ClusterNotFound: - cluster = objects.Cluster(context=context, name=self.cluster, - binary=self.binary) - # If disabled has been specified overwrite default value - if disabled is not None: - cluster.disabled = disabled + # Since the cluster didn't exist, we copy replication fields + # from the service. + cluster = objects.Cluster( + context=context, + name=self.cluster, + binary=self.binary, + disabled=service.disabled, + replication_status=service.replication_status, + active_backend_id=service.active_backend_id, + frozen=service.frozen) try: cluster.create() @@ -355,7 +375,10 @@ class Service(service.Service): Service.service_id = service_ref.id # TODO(geguileo): In O unconditionally ensure that the cluster exists if not self.is_upgrading_to_n: - self._ensure_cluster_exists(context) + self._ensure_cluster_exists(context, service_ref) + # If we have updated the service_ref with replication data from + # the cluster it will be saved. + service_ref.save() def __getattr__(self, key): manager = self.__dict__.get('manager', None) diff --git a/cinder/tests/unit/api/contrib/test_services.py b/cinder/tests/unit/api/contrib/test_services.py index 64de68f099f..d5187c277f4 100644 --- a/cinder/tests/unit/api/contrib/test_services.py +++ b/cinder/tests/unit/api/contrib/test_services.py @@ -255,6 +255,36 @@ class ServicesTest(test.TestCase): ]} self.assertEqual(response, res_dict) + def test_failover_old_version(self): + req = FakeRequest(version='3.18') + self.assertRaises(exception.InvalidInput, self.controller.update, req, + 'failover', {'cluster': 'cluster1'}) + + def test_failover_no_values(self): + req = FakeRequest(version='3.26') + self.assertRaises(exception.InvalidInput, self.controller.update, req, + 'failover', {'backend_id': 'replica1'}) + + @ddt.data({'host': 'hostname'}, {'cluster': 'mycluster'}) + @mock.patch('cinder.volume.api.API.failover') + def test_failover(self, body, failover_mock): + req = FakeRequest(version='3.26') + body['backend_id'] = 'replica1' + res = self.controller.update(req, 'failover', body) + self.assertEqual(202, res.status_code) + failover_mock.assert_called_once_with(req.environ['cinder.context'], + body.get('host'), + body.get('cluster'), 'replica1') + + @ddt.data({}, {'host': 'hostname', 'cluster': 'mycluster'}) + @mock.patch('cinder.volume.api.API.failover') + def test_failover_invalid_input(self, body, failover_mock): + req = FakeRequest(version='3.26') + body['backend_id'] = 'replica1' + self.assertRaises(exception.InvalidInput, + self.controller.update, req, 'failover', body) + failover_mock.assert_not_called() + def test_services_list_with_cluster_name(self): req = FakeRequest(version='3.7') res_dict = self.controller.index(req) @@ -756,11 +786,12 @@ class ServicesTest(test.TestCase): req = fakes.HTTPRequest.blank(url) body = {'host': mock.sentinel.host, 'backend_id': mock.sentinel.backend_id} - with mock.patch.object(self.controller.volume_api, 'failover_host') \ + with mock.patch.object(self.controller.volume_api, 'failover') \ as failover_mock: res = self.controller.update(req, 'failover_host', body) failover_mock.assert_called_once_with(req.environ['cinder.context'], mock.sentinel.host, + None, mock.sentinel.backend_id) self.assertEqual(202, res.status_code) @@ -772,7 +803,7 @@ class ServicesTest(test.TestCase): as freeze_mock: res = self.controller.update(req, 'freeze', body) freeze_mock.assert_called_once_with(req.environ['cinder.context'], - mock.sentinel.host) + mock.sentinel.host, None) self.assertEqual(freeze_mock.return_value, res) def test_services_thaw(self): @@ -783,12 +814,12 @@ class ServicesTest(test.TestCase): as thaw_mock: res = self.controller.update(req, 'thaw', body) thaw_mock.assert_called_once_with(req.environ['cinder.context'], - mock.sentinel.host) + mock.sentinel.host, None) self.assertEqual(thaw_mock.return_value, res) @ddt.data('freeze', 'thaw', 'failover_host') def test_services_replication_calls_no_host(self, method): url = '/v2/%s/os-services/%s' % (fake.PROJECT_ID, method) req = fakes.HTTPRequest.blank(url) - self.assertRaises(exception.MissingRequired, + self.assertRaises(exception.InvalidInput, self.controller.update, req, method, {}) diff --git a/cinder/tests/unit/api/v3/test_cluster.py b/cinder/tests/unit/api/v3/test_cluster.py index 27e4e30bba5..e6f280d790f 100644 --- a/cinder/tests/unit/api/v3/test_cluster.py +++ b/cinder/tests/unit/api/v3/test_cluster.py @@ -18,6 +18,7 @@ import datetime import ddt from iso8601 import iso8601 import mock +from oslo_utils import versionutils from cinder.api import extensions from cinder.api.openstack import api_version_request as api_version @@ -31,11 +32,17 @@ from cinder.tests.unit import fake_cluster CLUSTERS = [ fake_cluster.fake_db_cluster( id=1, + replication_status='error', + frozen=False, + active_backend_id='replication1', last_heartbeat=datetime.datetime(2016, 6, 1, 2, 46, 28), updated_at=datetime.datetime(2016, 6, 1, 2, 46, 28), created_at=datetime.datetime(2016, 6, 1, 2, 46, 28)), fake_cluster.fake_db_cluster( id=2, name='cluster2', num_hosts=2, num_down_hosts=1, disabled=True, + replication_status='error', + frozen=True, + active_backend_id='replication2', updated_at=datetime.datetime(2016, 6, 1, 1, 46, 28), created_at=datetime.datetime(2016, 6, 1, 1, 46, 28)) ] @@ -51,6 +58,9 @@ EXPECTED = [{'created_at': datetime.datetime(2016, 6, 1, 2, 46, 28), 'num_hosts': 0, 'state': 'up', 'status': 'enabled', + 'replication_status': 'error', + 'frozen': False, + 'active_backend_id': 'replication1', 'updated_at': datetime.datetime(2016, 6, 1, 2, 46, 28)}, {'created_at': datetime.datetime(2016, 6, 1, 1, 46, 28), 'disabled_reason': None, @@ -61,6 +71,9 @@ EXPECTED = [{'created_at': datetime.datetime(2016, 6, 1, 2, 46, 28), 'num_hosts': 2, 'state': 'down', 'status': 'disabled', + 'replication_status': 'error', + 'frozen': True, + 'active_backend_id': 'replication2', 'updated_at': datetime.datetime(2016, 6, 1, 1, 46, 28)}] @@ -92,6 +105,21 @@ class ClustersTestCase(test.TestCase): {'is_up': True, 'disabled': False, 'num_hosts': 2, 'num_down_hosts': 1, 'binary': 'cinder-volume'}) + REPLICATION_FILTERS = ({'replication_status': 'error'}, {'frozen': True}, + {'active_backend_id': 'replication'}) + + def _get_expected(self, version='3.8'): + if versionutils.convert_version_to_tuple(version) >= (3, 19): + return EXPECTED + + expect = [] + for cluster in EXPECTED: + cluster = cluster.copy() + for key in ('replication_status', 'frozen', 'active_backend_id'): + cluster.pop(key) + expect.append(cluster) + return expect + def setUp(self): super(ClustersTestCase, self).setUp() @@ -101,8 +129,10 @@ class ClustersTestCase(test.TestCase): self.controller = clusters.ClusterController(self.ext_mgr) @mock.patch('cinder.db.cluster_get_all', return_value=CLUSTERS_ORM) - def _test_list(self, get_all_mock, detailed, filters, expected=None): - req = FakeRequest(**filters) + def _test_list(self, get_all_mock, detailed, filters=None, expected=None, + version='3.8'): + filters = filters or {} + req = FakeRequest(version=version, **filters) method = getattr(self.controller, 'detail' if detailed else 'index') clusters = method(req) @@ -119,7 +149,7 @@ class ClustersTestCase(test.TestCase): @ddt.data(*LIST_FILTERS) def test_index_detail(self, filters): """Verify that we get all clusters with detailed data.""" - expected = {'clusters': EXPECTED} + expected = {'clusters': self._get_expected()} self._test_list(detailed=True, filters=filters, expected=expected) @ddt.data(*LIST_FILTERS) @@ -135,6 +165,16 @@ class ClustersTestCase(test.TestCase): 'status': 'disabled'}]} self._test_list(detailed=False, filters=filters, expected=expected) + @ddt.data(*REPLICATION_FILTERS) + def test_index_detail_fail_old(self, filters): + self.assertRaises(exception.InvalidInput, self._test_list, + detailed=True, filters=filters) + + @ddt.data(*REPLICATION_FILTERS) + def test_index_summary_fail_old(self, filters): + self.assertRaises(exception.InvalidInput, self._test_list, + detailed=False, filters=filters) + @ddt.data(True, False) def test_index_unauthorized(self, detailed): """Verify that unauthorized user can't list clusters.""" @@ -147,13 +187,35 @@ class ClustersTestCase(test.TestCase): """Verify the wrong version so that user can't list clusters.""" self.assertRaises(exception.VersionNotFoundForAPIMethod, self._test_list, detailed=detailed, - filters={'version': '3.5'}) + version='3.6') + + @ddt.data(*REPLICATION_FILTERS) + def test_index_detail_replication_new_fields(self, filters): + version = '3.26' + expected = {'clusters': self._get_expected(version)} + self._test_list(detailed=True, filters=filters, expected=expected, + version=version) + + @ddt.data(*REPLICATION_FILTERS) + def test_index_summary_replication_new_fields(self, filters): + expected = {'clusters': [{'name': 'cluster_name', + 'binary': 'cinder-volume', + 'state': 'up', + 'replication_status': 'error', + 'status': 'enabled'}, + {'name': 'cluster2', + 'binary': 'cinder-volume', + 'state': 'down', + 'replication_status': 'error', + 'status': 'disabled'}]} + self._test_list(detailed=False, filters=filters, expected=expected, + version='3.26') @mock.patch('cinder.db.sqlalchemy.api.cluster_get', return_value=CLUSTERS_ORM[0]) def test_show(self, get_mock): req = FakeRequest() - expected = {'cluster': EXPECTED[0]} + expected = {'cluster': self._get_expected()[0]} cluster = self.controller.show(req, mock.sentinel.name, mock.sentinel.binary) self.assertEqual(expected, cluster) diff --git a/cinder/tests/unit/objects/test_cluster.py b/cinder/tests/unit/objects/test_cluster.py index 6a63fca48b2..50d045e8103 100644 --- a/cinder/tests/unit/objects/test_cluster.py +++ b/cinder/tests/unit/objects/test_cluster.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import ddt import mock from oslo_utils import timeutils @@ -40,6 +41,7 @@ def _get_filters_sentinel(): 'num_down_hosts': mock.sentinel.num_down_hosts} +@ddt.ddt class TestCluster(test_objects.BaseObjectsTestCase): """Test Cluster Versioned Object methods.""" cluster = fake_cluster.fake_cluster_orm() @@ -111,6 +113,19 @@ class TestCluster(test_objects.BaseObjectsTestCase): last_heartbeat=expired_time) self.assertFalse(cluster.is_up) + @ddt.data('1.0', '1.1') + def tests_obj_make_compatible(self, version): + new_fields = {'replication_status': 'error', 'frozen': True, + 'active_backend_id': 'replication'} + cluster = objects.Cluster(self.context, **new_fields) + primitive = cluster.obj_to_primitive(version) + converted_cluster = objects.Cluster.obj_from_primitive(primitive) + for key, value in new_fields.items(): + if version == '1.0': + self.assertFalse(converted_cluster.obj_attr_is_set(key)) + else: + self.assertEqual(value, getattr(converted_cluster, key)) + class TestClusterList(test_objects.BaseObjectsTestCase): """Test ClusterList Versioned Object methods.""" diff --git a/cinder/tests/unit/objects/test_objects.py b/cinder/tests/unit/objects/test_objects.py index 5bec70ae460..4fdacf03721 100644 --- a/cinder/tests/unit/objects/test_objects.py +++ b/cinder/tests/unit/objects/test_objects.py @@ -28,7 +28,7 @@ object_data = { 'BackupImport': '1.4-c50f7a68bb4c400dd53dd219685b3992', 'BackupList': '1.0-15ecf022a68ddbb8c2a6739cfc9f8f5e', 'CleanupRequest': '1.0-e7c688b893e1d5537ccf65cc3eb10a28', - 'Cluster': '1.0-6f06e867c073e9d31722c53b0a9329b8', + 'Cluster': '1.1-cdb1572b250837933d950cc6662313b8', 'ClusterList': '1.0-15ecf022a68ddbb8c2a6739cfc9f8f5e', 'CGSnapshot': '1.1-3212ac2b4c2811b7134fb9ba2c49ff74', 'CGSnapshotList': '1.0-15ecf022a68ddbb8c2a6739cfc9f8f5e', diff --git a/cinder/tests/unit/test_migrations.py b/cinder/tests/unit/test_migrations.py index 0ff23872327..87984d59bf2 100644 --- a/cinder/tests/unit/test_migrations.py +++ b/cinder/tests/unit/test_migrations.py @@ -1076,6 +1076,16 @@ class MigrationsMixin(test_migrations.WalkVersionsMixin): self.VARCHAR_TYPE) self.assertTrue(messages.c.request_id.nullable) + def _check_088(self, engine, data): + """Test adding replication data to cluster table.""" + clusters = db_utils.get_table(engine, 'clusters') + self.assertIsInstance(clusters.c.replication_status.type, + self.VARCHAR_TYPE) + self.assertIsInstance(clusters.c.active_backend_id.type, + self.VARCHAR_TYPE) + self.assertIsInstance(clusters.c.frozen.type, + self.BOOL_TYPE) + def test_walk_versions(self): self.walk_versions(False, False) diff --git a/cinder/tests/unit/test_service.py b/cinder/tests/unit/test_service.py index c5d6cc2e56f..170992eff86 100644 --- a/cinder/tests/unit/test_service.py +++ b/cinder/tests/unit/test_service.py @@ -30,6 +30,7 @@ from cinder import db from cinder import exception from cinder import manager from cinder import objects +from cinder.objects import fields from cinder import rpc from cinder import service from cinder import test @@ -437,8 +438,9 @@ class ServiceTestCase(test.TestCase): if cluster and added_to_cluster: self.assertIsNotNone(app.cluster_rpcserver) - expected_target_calls.append(mock.call(topic=self.topic, - server=cluster)) + expected_target_calls.append(mock.call( + topic=self.topic + '.' + cluster, + server=cluster.split('@')[0])) expected_rpc_calls.extend(expected_rpc_calls[:]) # Check that we create message targets for host and cluster @@ -465,12 +467,111 @@ class ServiceTestCase(test.TestCase): get_min_obj_mock): """Test that with cluster we create the rpc service.""" get_min_obj_mock.return_value = obj_version - cluster = 'cluster' + cluster = 'cluster@backend#pool' + self.host = 'host@backend#pool' app = service.Service.create(host=self.host, binary='cinder-volume', cluster=cluster, topic=self.topic) self._check_rpc_servers_and_init_host(app, obj_version != '1.3', cluster) + @mock.patch('cinder.service.Service.is_svc_upgrading_to_n', + mock.Mock(return_value=False)) + @mock.patch('cinder.objects.Cluster.get_by_id') + def test_ensure_cluster_exists_no_cluster(self, get_mock): + app = service.Service.create(host=self.host, + binary=self.binary, + topic=self.topic) + svc = objects.Service.get_by_id(self.ctxt, app.service_id) + app._ensure_cluster_exists(self.ctxt, svc) + get_mock.assert_not_called() + self.assertEqual({}, svc.cinder_obj_get_changes()) + + @mock.patch('cinder.service.Service.is_svc_upgrading_to_n', + mock.Mock(return_value=False)) + @mock.patch('cinder.objects.Cluster.get_by_id') + def test_ensure_cluster_exists_cluster_exists_non_relicated(self, + get_mock): + cluster = objects.Cluster( + name='cluster_name', active_backend_id=None, frozen=False, + replication_status=fields.ReplicationStatus.NOT_CAPABLE) + get_mock.return_value = cluster + + app = service.Service.create(host=self.host, + binary=self.binary, + topic=self.topic) + svc = objects.Service.get_by_id(self.ctxt, app.service_id) + app.cluster = cluster.name + app._ensure_cluster_exists(self.ctxt, svc) + get_mock.assert_called_once_with(self.ctxt, None, name=cluster.name, + binary=app.binary) + self.assertEqual({}, svc.cinder_obj_get_changes()) + + @mock.patch('cinder.service.Service.is_svc_upgrading_to_n', + mock.Mock(return_value=False)) + @mock.patch('cinder.objects.Cluster.get_by_id') + def test_ensure_cluster_exists_cluster_change(self, get_mock): + """We copy replication fields from the cluster to the service.""" + changes = dict(replication_status=fields.ReplicationStatus.FAILED_OVER, + active_backend_id='secondary', + frozen=True) + cluster = objects.Cluster(name='cluster_name', **changes) + get_mock.return_value = cluster + + app = service.Service.create(host=self.host, + binary=self.binary, + topic=self.topic) + svc = objects.Service.get_by_id(self.ctxt, app.service_id) + app.cluster = cluster.name + app._ensure_cluster_exists(self.ctxt, svc) + get_mock.assert_called_once_with(self.ctxt, None, name=cluster.name, + binary=app.binary) + self.assertEqual(changes, svc.cinder_obj_get_changes()) + + @mock.patch('cinder.service.Service.is_svc_upgrading_to_n', + mock.Mock(return_value=False)) + @mock.patch('cinder.objects.Cluster.get_by_id') + def test_ensure_cluster_exists_cluster_no_change(self, get_mock): + """Don't copy replication fields from cluster if replication error.""" + changes = dict(replication_status=fields.ReplicationStatus.FAILED_OVER, + active_backend_id='secondary', + frozen=True) + cluster = objects.Cluster(name='cluster_name', **changes) + get_mock.return_value = cluster + + app = service.Service.create(host=self.host, + binary=self.binary, + topic=self.topic) + svc = objects.Service.get_by_id(self.ctxt, app.service_id) + svc.replication_status = fields.ReplicationStatus.ERROR + svc.obj_reset_changes() + app.cluster = cluster.name + app._ensure_cluster_exists(self.ctxt, svc) + get_mock.assert_called_once_with(self.ctxt, None, name=cluster.name, + binary=app.binary) + self.assertEqual({}, svc.cinder_obj_get_changes()) + + @mock.patch('cinder.service.Service.is_svc_upgrading_to_n', + mock.Mock(return_value=False)) + def test_ensure_cluster_exists_cluster_create_replicated_and_non(self): + """We use service replication fields to create the cluster.""" + changes = dict(replication_status=fields.ReplicationStatus.FAILED_OVER, + active_backend_id='secondary', + frozen=True) + + app = service.Service.create(host=self.host, + binary=self.binary, + topic=self.topic) + svc = objects.Service.get_by_id(self.ctxt, app.service_id) + for key, value in changes.items(): + setattr(svc, key, value) + + app.cluster = 'cluster_name' + app._ensure_cluster_exists(self.ctxt, svc) + + cluster = objects.Cluster.get_by_id(self.ctxt, None, name=app.cluster) + for key, value in changes.items(): + self.assertEqual(value, getattr(cluster, key)) + class TestWSGIService(test.TestCase): diff --git a/cinder/tests/unit/test_volume.py b/cinder/tests/unit/test_volume.py index 58d54b43c0f..28951fabb90 100644 --- a/cinder/tests/unit/test_volume.py +++ b/cinder/tests/unit/test_volume.py @@ -40,6 +40,7 @@ from taskflow.engines.action_engine import engine from cinder.api import common from cinder.brick.local_dev import lvm as brick_lvm +from cinder.common import constants from cinder import context from cinder import coordination from cinder import db @@ -5711,107 +5712,356 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase): self.assertRaises(exception.VolumeNotFound, volume.refresh) +@ddt.ddt class ReplicationTestCase(base.BaseVolumeTestCase): - @mock.patch.object(volume_rpcapi.VolumeAPI, 'failover_host') + @mock.patch('cinder.objects.Service.is_up', True) + @mock.patch.object(volume_rpcapi.VolumeAPI, 'failover') @mock.patch.object(cinder.db, 'conditional_update') - @mock.patch.object(cinder.db, 'service_get') - def test_failover_host(self, mock_db_args, mock_db_update, - mock_failover): - """Test replication failover_host.""" + @mock.patch.object(objects.ServiceList, 'get_all') + def test_failover(self, mock_get_all, mock_db_update, mock_failover): + """Test replication failover.""" - mock_db_args.return_value = fake_service.fake_service_obj( - self.context, - binary='cinder-volume') + service = fake_service.fake_service_obj(self.context, + binary='cinder-volume') + mock_get_all.return_value = [service] mock_db_update.return_value = {'replication_status': 'enabled'} volume_api = cinder.volume.api.API() - volume_api.failover_host(self.context, host=CONF.host) - mock_failover.assert_called_once_with(self.context, CONF.host, None) + volume_api.failover(self.context, host=CONF.host, cluster_name=None) + mock_failover.assert_called_once_with(self.context, service, None) - @mock.patch.object(volume_rpcapi.VolumeAPI, 'failover_host') + @mock.patch.object(volume_rpcapi.VolumeAPI, 'failover') @mock.patch.object(cinder.db, 'conditional_update') - @mock.patch.object(cinder.db, 'service_get') - def test_failover_host_unexpected_status(self, mock_db_args, - mock_db_update, - mock_failover): - """Test replication failover_host unxepected status.""" + @mock.patch.object(cinder.db, 'service_get_all') + def test_failover_unexpected_status(self, mock_db_get_all, mock_db_update, + mock_failover): + """Test replication failover unxepected status.""" - mock_db_args.return_value = fake_service.fake_service_obj( + mock_db_get_all.return_value = [fake_service.fake_service_obj( self.context, - binary='cinder-volume') + binary='cinder-volume')] mock_db_update.return_value = None volume_api = cinder.volume.api.API() self.assertRaises(exception.InvalidInput, - volume_api.failover_host, + volume_api.failover, self.context, - host=CONF.host) + host=CONF.host, + cluster_name=None) @mock.patch.object(volume_rpcapi.VolumeAPI, 'freeze_host') - @mock.patch.object(cinder.db, 'conditional_update') - @mock.patch.object(cinder.db, 'service_get') - def test_freeze_host(self, mock_db_args, mock_db_update, + @mock.patch.object(cinder.db, 'conditional_update', return_value=1) + @mock.patch.object(cinder.objects.ServiceList, 'get_all') + def test_freeze_host(self, mock_get_all, mock_db_update, mock_freeze): """Test replication freeze_host.""" - mock_db_args.return_value = fake_service.fake_service_obj( - self.context, - binary='cinder-volume') - mock_db_update.return_value = {'frozen': False} + service = fake_service.fake_service_obj(self.context, + binary='cinder-volume') + mock_get_all.return_value = [service] + mock_freeze.return_value = True volume_api = cinder.volume.api.API() - volume_api.freeze_host(self.context, host=CONF.host) - mock_freeze.assert_called_once_with(self.context, CONF.host) + volume_api.freeze_host(self.context, host=CONF.host, cluster_name=None) + mock_freeze.assert_called_once_with(self.context, service) @mock.patch.object(volume_rpcapi.VolumeAPI, 'freeze_host') @mock.patch.object(cinder.db, 'conditional_update') - @mock.patch.object(cinder.db, 'service_get') - def test_freeze_host_unexpected_status(self, mock_db_args, + @mock.patch.object(cinder.db, 'service_get_all') + def test_freeze_host_unexpected_status(self, mock_get_all, mock_db_update, mock_freeze): """Test replication freeze_host unexpected status.""" - mock_db_args.return_value = fake_service.fake_service_obj( + mock_get_all.return_value = [fake_service.fake_service_obj( self.context, - binary='cinder-volume') + binary='cinder-volume')] mock_db_update.return_value = None volume_api = cinder.volume.api.API() self.assertRaises(exception.InvalidInput, volume_api.freeze_host, self.context, - host=CONF.host) + host=CONF.host, + cluster_name=None) @mock.patch.object(volume_rpcapi.VolumeAPI, 'thaw_host') - @mock.patch.object(cinder.db, 'conditional_update') - @mock.patch.object(cinder.db, 'service_get') - def test_thaw_host(self, mock_db_args, mock_db_update, + @mock.patch.object(cinder.db, 'conditional_update', return_value=1) + @mock.patch.object(cinder.objects.ServiceList, 'get_all') + def test_thaw_host(self, mock_get_all, mock_db_update, mock_thaw): """Test replication thaw_host.""" - mock_db_args.return_value = fake_service.fake_service_obj( - self.context, - binary='cinder-volume') - mock_db_update.return_value = {'frozen': True} + service = fake_service.fake_service_obj(self.context, + binary='cinder-volume') + mock_get_all.return_value = [service] mock_thaw.return_value = True volume_api = cinder.volume.api.API() - volume_api.thaw_host(self.context, host=CONF.host) - mock_thaw.assert_called_once_with(self.context, CONF.host) + volume_api.thaw_host(self.context, host=CONF.host, cluster_name=None) + mock_thaw.assert_called_once_with(self.context, service) @mock.patch.object(volume_rpcapi.VolumeAPI, 'thaw_host') @mock.patch.object(cinder.db, 'conditional_update') - @mock.patch.object(cinder.db, 'service_get') - def test_thaw_host_unexpected_status(self, mock_db_args, + @mock.patch.object(cinder.db, 'service_get_all') + def test_thaw_host_unexpected_status(self, mock_get_all, mock_db_update, mock_thaw): """Test replication thaw_host unexpected status.""" - mock_db_args.return_value = fake_service.fake_service_obj( + mock_get_all.return_value = [fake_service.fake_service_obj( self.context, - binary='cinder-volume') + binary='cinder-volume')] mock_db_update.return_value = None volume_api = cinder.volume.api.API() self.assertRaises(exception.InvalidInput, volume_api.thaw_host, self.context, - host=CONF.host) + host=CONF.host, cluster_name=None) + + @mock.patch('cinder.volume.driver.BaseVD.failover_completed') + def test_failover_completed(self, completed_mock): + rep_field = fields.ReplicationStatus + svc = objects.Service(self.context, host=self.volume.host, + binary=constants.VOLUME_BINARY, + replication_status=rep_field.ENABLED) + svc.create() + self.volume.failover_completed( + self.context, + {'active_backend_id': 'secondary', + 'replication_status': rep_field.FAILED_OVER}) + service = objects.Service.get_by_id(self.context, svc.id) + self.assertEqual('secondary', service.active_backend_id) + self.assertEqual('failed-over', service.replication_status) + completed_mock.assert_called_once_with(self.context, 'secondary') + + @mock.patch('cinder.volume.driver.BaseVD.failover_completed', wraps=True) + def test_failover_completed_driver_failure(self, completed_mock): + rep_field = fields.ReplicationStatus + svc = objects.Service(self.context, host=self.volume.host, + binary=constants.VOLUME_BINARY, + replication_status=rep_field.ENABLED) + svc.create() + self.volume.failover_completed( + self.context, + {'active_backend_id': 'secondary', + 'replication_status': rep_field.FAILED_OVER}) + service = objects.Service.get_by_id(self.context, svc.id) + self.assertEqual('secondary', service.active_backend_id) + self.assertEqual(rep_field.ERROR, service.replication_status) + self.assertTrue(service.disabled) + self.assertIsNotNone(service.disabled_reason) + completed_mock.assert_called_once_with(self.context, 'secondary') + + @mock.patch('cinder.volume.rpcapi.VolumeAPI.failover_completed') + def test_finish_failover_non_clustered(self, completed_mock): + svc = mock.Mock(is_clustered=None) + self.volume.finish_failover(self.context, svc, mock.sentinel.updates) + svc.update.assert_called_once_with(mock.sentinel.updates) + svc.save.assert_called_once_with() + completed_mock.assert_not_called() + + @mock.patch('cinder.volume.rpcapi.VolumeAPI.failover_completed') + def test_finish_failover_clustered(self, completed_mock): + svc = mock.Mock(cluster_name='cluster_name') + updates = {'status': 'error'} + self.volume.finish_failover(self.context, svc, updates) + completed_mock.assert_called_once_with(self.context, svc, updates) + svc.cluster.status = 'error' + svc.cluster.save.assert_called_once() + + @ddt.data(None, 'cluster_name') + @mock.patch('cinder.volume.manager.VolumeManager.finish_failover') + @mock.patch('cinder.volume.manager.VolumeManager._get_my_volumes') + def test_failover_manager(self, cluster, get_vols_mock, finish_mock): + """Test manager's failover method for clustered and not clustered.""" + rep_field = fields.ReplicationStatus + svc = objects.Service(self.context, host=self.volume.host, + binary=constants.VOLUME_BINARY, + cluster_name=cluster, + replication_status=rep_field.ENABLED) + svc.create() + + vol = objects.Volume(self.context, host=self.volume.host) + vol.create() + + get_vols_mock.return_value = [vol] + + with mock.patch.object(self.volume, 'driver') as driver: + called, not_called = driver.failover_host, driver.failover + if cluster: + called, not_called = not_called, called + + called.return_value = ('secondary', [{'volume_id': vol.id, + 'updates': {'status': 'error'}}]) + + self.volume.failover(self.context, + secondary_backend_id='secondary') + + not_called.assert_not_called() + called.assert_called_once_with(self.context, [vol], + secondary_id='secondary') + + expected_update = {'replication_status': rep_field.FAILED_OVER, + 'active_backend_id': 'secondary', + 'disabled': True, + 'disabled_reason': 'failed-over'} + finish_mock.assert_called_once_with(self.context, svc, expected_update) + + volume = objects.Volume.get_by_id(self.context, vol.id) + self.assertEqual('error', volume.status) + + @ddt.data(('host1', None), (None, 'mycluster')) + @ddt.unpack + def test_failover_api_fail_multiple_results(self, host, cluster): + """Fail if we try to failover multiple backends in the same request.""" + rep_field = fields.ReplicationStatus + clusters = [ + objects.Cluster(self.context, + name='mycluster@backend1', + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY), + objects.Cluster(self.context, + name='mycluster@backend2', + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY) + ] + clusters[0].create() + clusters[1].create() + services = [ + objects.Service(self.context, host='host1@backend1', + cluster_name=clusters[0].name, + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY), + objects.Service(self.context, host='host1@backend2', + cluster_name=clusters[1].name, + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY), + ] + services[0].create() + services[1].create() + self.assertRaises(exception.Invalid, + self.volume_api.failover, self.context, host, + cluster) + + def test_failover_api_not_found(self): + self.assertRaises(exception.ServiceNotFound, self.volume_api.failover, + self.context, 'host1', None) + + @mock.patch('cinder.volume.rpcapi.VolumeAPI.failover') + def test_failover_api_success_multiple_results(self, failover_mock): + """Succeed to failover multiple services for the same backend.""" + rep_field = fields.ReplicationStatus + cluster_name = 'mycluster@backend1' + cluster = objects.Cluster(self.context, + name=cluster_name, + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY) + cluster.create() + services = [ + objects.Service(self.context, host='host1@backend1', + cluster_name=cluster_name, + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY), + objects.Service(self.context, host='host2@backend1', + cluster_name=cluster_name, + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY), + ] + services[0].create() + services[1].create() + + self.volume_api.failover(self.context, None, cluster_name, + mock.sentinel.secondary_id) + + for service in services + [cluster]: + self.assertEqual(rep_field.ENABLED, service.replication_status) + service.refresh() + self.assertEqual(rep_field.FAILING_OVER, + service.replication_status) + + failover_mock.assert_called_once_with(self.context, mock.ANY, + mock.sentinel.secondary_id) + self.assertEqual(services[0].id, failover_mock.call_args[0][1].id) + + @mock.patch('cinder.volume.rpcapi.VolumeAPI.failover') + def test_failover_api_success_multiple_results_not_updated(self, + failover_mock): + """Succeed to failover even if a service is not updated.""" + rep_field = fields.ReplicationStatus + cluster_name = 'mycluster@backend1' + cluster = objects.Cluster(self.context, + name=cluster_name, + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY) + cluster.create() + services = [ + objects.Service(self.context, host='host1@backend1', + cluster_name=cluster_name, + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY), + objects.Service(self.context, host='host2@backend1', + cluster_name=cluster_name, + replication_status=rep_field.ERROR, + binary=constants.VOLUME_BINARY), + ] + services[0].create() + services[1].create() + + self.volume_api.failover(self.context, None, cluster_name, + mock.sentinel.secondary_id) + + for service in services[:1] + [cluster]: + service.refresh() + self.assertEqual(rep_field.FAILING_OVER, + service.replication_status) + + services[1].refresh() + self.assertEqual(rep_field.ERROR, services[1].replication_status) + + failover_mock.assert_called_once_with(self.context, mock.ANY, + mock.sentinel.secondary_id) + self.assertEqual(services[0].id, failover_mock.call_args[0][1].id) + + @mock.patch('cinder.volume.rpcapi.VolumeAPI.failover') + def test_failover_api_fail_multiple_results_not_updated(self, + failover_mock): + """Fail if none of the services could be updated.""" + rep_field = fields.ReplicationStatus + cluster_name = 'mycluster@backend1' + cluster = objects.Cluster(self.context, + name=cluster_name, + replication_status=rep_field.ENABLED, + binary=constants.VOLUME_BINARY) + cluster.create() + down_time = timeutils.datetime.datetime(1970, 1, 1) + services = [ + # This service is down + objects.Service(self.context, host='host1@backend1', + cluster_name=cluster_name, + replication_status=rep_field.ENABLED, + created_at=down_time, + updated_at=down_time, + modified_at=down_time, + binary=constants.VOLUME_BINARY), + # This service is not with the right replication status + objects.Service(self.context, host='host2@backend1', + cluster_name=cluster_name, + replication_status=rep_field.ERROR, + binary=constants.VOLUME_BINARY), + ] + services[0].create() + services[1].create() + + self.assertRaises(exception.InvalidInput, + self.volume_api.failover, self.context, None, + cluster_name, mock.sentinel.secondary_id) + + for service in services: + svc = objects.Service.get_by_id(self.context, service.id) + self.assertEqual(service.replication_status, + svc.replication_status) + + cluster.refresh() + self.assertEqual(rep_field.ENABLED, cluster.replication_status) + + failover_mock.assert_not_called() class CopyVolumeToImageTestCase(base.BaseVolumeTestCase): diff --git a/cinder/tests/unit/test_volume_rpcapi.py b/cinder/tests/unit/test_volume_rpcapi.py index a48cdea7c78..048abaf0155 100644 --- a/cinder/tests/unit/test_volume_rpcapi.py +++ b/cinder/tests/unit/test_volume_rpcapi.py @@ -32,6 +32,7 @@ from cinder.objects import fields from cinder import test from cinder.tests.unit.backup import fake_backup from cinder.tests.unit import fake_constants as fake +from cinder.tests.unit import fake_service from cinder.tests.unit import fake_snapshot from cinder.tests.unit import fake_volume from cinder.tests.unit import utils as tests_utils @@ -135,6 +136,7 @@ class VolumeRpcAPITestCase(test.TestCase): self.assertIn('id', self.fake_volume) def _get_expected_msg(self, kwargs): + update = kwargs.pop('_expected_msg', {}) expected_msg = copy.deepcopy(kwargs) if 'volume' in expected_msg: volume = expected_msg.pop('volume') @@ -172,9 +174,11 @@ class VolumeRpcAPITestCase(test.TestCase): if 'new_volume' in expected_msg: volume = expected_msg['new_volume'] expected_msg['new_volume_id'] = volume['id'] + expected_msg.update(update) return expected_msg - def _test_volume_api(self, method, rpc_method, **kwargs): + def _test_volume_api(self, method, rpc_method, _expected_method=None, + **kwargs): ctxt = context.RequestContext('fake_user', 'fake_project') if 'rpcapi_class' in kwargs: @@ -207,6 +211,8 @@ class VolumeRpcAPITestCase(test.TestCase): host = 'fake_host' elif 'cgsnapshot' in kwargs: host = kwargs['cgsnapshot'].consistencygroup.service_topic_queue + elif 'service' in kwargs: + host = kwargs['service'].service_topic_queue target['server'] = utils.extract_host(host, 'host') target['topic'] = '%s.%s' % (constants.VOLUME_TOPIC, @@ -233,7 +239,7 @@ class VolumeRpcAPITestCase(test.TestCase): retval = getattr(rpcapi, method)(ctxt, **kwargs) self.assertEqual(expected_retval, retval) - expected_args = [ctxt, method] + expected_args = [ctxt, _expected_method or method] for arg, expected_arg in zip(self.fake_args, expected_args): self.assertEqual(expected_arg, arg) @@ -623,18 +629,42 @@ class VolumeRpcAPITestCase(test.TestCase): version='3.0') def test_freeze_host(self): + service = fake_service.fake_service_obj(self.context, + host='fake_host', + binary='cinder-volume') self._test_volume_api('freeze_host', rpc_method='call', - host='fake_host', version='3.0') + service=service, version='3.0') def test_thaw_host(self): - self._test_volume_api('thaw_host', rpc_method='call', host='fake_host', + service = fake_service.fake_service_obj(self.context, + host='fake_host', + binary='cinder-volume') + self._test_volume_api('thaw_host', rpc_method='call', service=service, version='3.0') - def test_failover_host(self): - self._test_volume_api('failover_host', rpc_method='cast', - host='fake_host', + @ddt.data('3.0', '3.8') + @mock.patch('oslo_messaging.RPCClient.can_send_version') + def test_failover(self, version, can_send_version): + can_send_version.side_effect = lambda x: x == version + service = objects.Service(self.context, host='fake_host', + cluster_name=None) + _expected_method = 'failover' if version == '3.8' else 'failover_host' + self._test_volume_api('failover', rpc_method='cast', + service=service, secondary_backend_id='fake_backend', - version='3.0') + version=version, + _expected_method=_expected_method) + + @mock.patch('cinder.volume.rpcapi.VolumeAPI._get_cctxt') + def test_failover_completed(self, cctxt_mock): + service = objects.Service(self.context, host='fake_host', + cluster_name='cluster_name') + rpcapi = volume_rpcapi.VolumeAPI() + rpcapi.failover_completed(self.context, service, mock.sentinel.updates) + cctxt_mock.assert_called_once_with(service.cluster_name, '3.8', + fanout=True) + cctxt_mock.return_value.cast(self.context, 'failover_completed', + updates=mock.sentinel.updates) def test_create_consistencygroup_from_src_cgsnapshot(self): self._test_volume_api('create_consistencygroup_from_src', diff --git a/cinder/tests/unit/volume/drivers/nexenta/test_nexenta_edge.py b/cinder/tests/unit/volume/drivers/nexenta/test_nexenta_edge.py index 129624e9e76..dcf8ddfd77c 100644 --- a/cinder/tests/unit/volume/drivers/nexenta/test_nexenta_edge.py +++ b/cinder/tests/unit/volume/drivers/nexenta/test_nexenta_edge.py @@ -76,6 +76,7 @@ class TestNexentaEdgeISCSIDriver(test.TestCase): self.cfg.nexenta_iscsi_service = NEDGE_SERVICE self.cfg.nexenta_blocksize = NEDGE_BLOCKSIZE self.cfg.nexenta_chunksize = NEDGE_CHUNKSIZE + self.cfg.replication_device = [] mock_exec = mock.Mock() mock_exec.return_value = ('', '') diff --git a/cinder/tests/unit/volume/drivers/nexenta/test_nexenta_edge_nbd.py b/cinder/tests/unit/volume/drivers/nexenta/test_nexenta_edge_nbd.py index ae76f6cab6e..b4a442de70a 100644 --- a/cinder/tests/unit/volume/drivers/nexenta/test_nexenta_edge_nbd.py +++ b/cinder/tests/unit/volume/drivers/nexenta/test_nexenta_edge_nbd.py @@ -78,6 +78,7 @@ class TestNexentaEdgeNBDDriver(test.TestCase): self.cfg.nexenta_blocksize = 512 self.cfg.nexenta_chunksize = 4096 self.cfg.reserved_percentage = 0 + self.cfg.replication_device = [] self.ctx = context.get_admin_context() self.drv = nbd.NexentaEdgeNBDDriver(configuration=self.cfg) diff --git a/cinder/tests/unit/volume/test_driver.py b/cinder/tests/unit/volume/test_driver.py new file mode 100644 index 00000000000..1602bc642d8 --- /dev/null +++ b/cinder/tests/unit/volume/test_driver.py @@ -0,0 +1,120 @@ +# Copyright (c) 2016 Red Hat Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import ddt +import mock + +from cinder import exception +from cinder import test +from cinder.volume import driver +from cinder.volume import manager + + +def my_safe_get(self, value): + if value == 'replication_device': + return ['replication'] + return None + + +@ddt.ddt +class DriverTestCase(test.TestCase): + + @staticmethod + def _get_driver(relicated, version): + class NonReplicatedDriver(driver.VolumeDriver): + pass + + class V21Driver(driver.VolumeDriver): + def failover_host(*args, **kwargs): + pass + + class AADriver(V21Driver): + def failover_completed(*args, **kwargs): + pass + + if not relicated: + return NonReplicatedDriver + + if version == 'v2.1': + return V21Driver + + return AADriver + + @ddt.data('v2.1', 'a/a', 'newfeature') + def test_supports_replication_feature_none(self, rep_version): + my_driver = self._get_driver(False, None) + self.assertFalse(my_driver.supports_replication_feature(rep_version)) + + @ddt.data('v2.1', 'a/a', 'newfeature') + def test_supports_replication_feature_only_21(self, rep_version): + version = 'v2.1' + my_driver = self._get_driver(True, version) + self.assertEqual(rep_version == version, + my_driver.supports_replication_feature(rep_version)) + + @ddt.data('v2.1', 'a/a', 'newfeature') + def test_supports_replication_feature_aa(self, rep_version): + my_driver = self._get_driver(True, 'a/a') + self.assertEqual(rep_version in ('v2.1', 'a/a'), + my_driver.supports_replication_feature(rep_version)) + + def test_init_non_replicated(self): + config = manager.config.Configuration(manager.volume_manager_opts, + config_group='volume') + # No exception raised + self._get_driver(False, None)(configuration=config) + + @ddt.data('v2.1', 'a/a') + @mock.patch('cinder.volume.configuration.Configuration.safe_get', + my_safe_get) + def test_init_replicated_non_clustered(self, version): + def append_config_values(self, volume_opts): + pass + + config = manager.config.Configuration(manager.volume_manager_opts, + config_group='volume') + # No exception raised + self._get_driver(True, version)(configuration=config) + + @mock.patch('cinder.volume.configuration.Configuration.safe_get', + my_safe_get) + def test_init_replicated_clustered_not_supported(self): + config = manager.config.Configuration(manager.volume_manager_opts, + config_group='volume') + # Raises exception because we are trying to run a replicated service + # in clustered mode but the driver doesn't support it. + self.assertRaises(exception.Invalid, self._get_driver(True, 'v2.1'), + configuration=config, cluster_name='mycluster') + + @mock.patch('cinder.volume.configuration.Configuration.safe_get', + my_safe_get) + def test_init_replicated_clustered_supported(self): + config = manager.config.Configuration(manager.volume_manager_opts, + config_group='volume') + # No exception raised + self._get_driver(True, 'a/a')(configuration=config, + cluster_name='mycluster') + + def test_failover(self): + """Test default failover behavior of calling failover_host.""" + my_driver = self._get_driver(True, 'a/a')() + with mock.patch.object(my_driver, 'failover_host') as failover_mock: + res = my_driver.failover(mock.sentinel.context, + mock.sentinel.volumes, + secondary_id=mock.sentinel.secondary_id) + self.assertEqual(failover_mock.return_value, res) + failover_mock.assert_called_once_with(mock.sentinel.context, + mock.sentinel.volumes, + mock.sentinel.secondary_id) diff --git a/cinder/tests/unit/volume/test_replication_manager.py b/cinder/tests/unit/volume/test_replication_manager.py index 3cc2a48e863..3d7486aebc4 100644 --- a/cinder/tests/unit/volume/test_replication_manager.py +++ b/cinder/tests/unit/volume/test_replication_manager.py @@ -31,7 +31,7 @@ class ReplicationTestCase(base.BaseVolumeTestCase): self.host = 'host@backend#pool' self.manager = manager.VolumeManager(host=self.host) - @mock.patch('cinder.objects.VolumeList.get_all_by_host') + @mock.patch('cinder.objects.VolumeList.get_all') @mock.patch('cinder.volume.driver.BaseVD.failover_host', side_effect=exception.InvalidReplicationTarget('')) @ddt.data(('backend2', 'default', fields.ReplicationStatus.FAILED_OVER), @@ -55,7 +55,8 @@ class ReplicationTestCase(base.BaseVolumeTestCase): replication_status=fields.ReplicationStatus.FAILING_OVER) self.manager.failover_host(self.context, new_backend) - mock_getall.assert_called_once_with(self.context, self.host) + mock_getall.assert_called_once_with(self.context, + filters={'host': self.host}) mock_failover.assert_called_once_with(self.context, mock_getall.return_value, secondary_id=new_backend) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 2c485434b9d..ab13c4bb378 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -26,6 +26,7 @@ from oslo_log import log as logging from oslo_utils import excutils from oslo_utils import strutils from oslo_utils import timeutils +from oslo_utils import versionutils import six from cinder.api import common @@ -1704,70 +1705,137 @@ class API(base.Base): offset, sort_keys, sort_dirs) + def _get_cluster_and_services_for_replication(self, ctxt, host, + cluster_name): + services = objects.ServiceList.get_all( + ctxt, filters={'host': host, 'cluster_name': cluster_name, + 'binary': constants.VOLUME_BINARY}) + + if not services: + msg = _('No service found with ') + ( + 'host=%(host)s' if host else 'cluster=%(cluster_name)s') + raise exception.ServiceNotFound(msg, host=host, + cluster_name=cluster_name) + + cluster = services[0].cluster + # Check that the host or cluster we received only results in 1 host or + # hosts from the same cluster. + if cluster_name: + check_attribute = 'cluster_name' + expected = cluster.name + else: + check_attribute = 'host' + expected = services[0].host + if any(getattr(s, check_attribute) != expected for s in services): + msg = _('Services from different clusters found.') + raise exception.InvalidParameterValue(msg) + + # If we received host parameter but host belongs to a cluster we have + # to change all the services in the cluster, not just one host + if host and cluster: + services = cluster.services + + return cluster, services + + def _replication_db_change(self, ctxt, field, expected_value, new_value, + host, cluster_name, check_up=False): + def _error_msg(service): + expected = utils.build_or_str(six.text_type(expected_value)) + up_msg = 'and must be up ' if check_up else '' + msg = (_('%(field)s in %(service)s must be %(expected)s ' + '%(up_msg)sto failover.') + % {'field': field, 'service': service, + 'expected': expected, 'up_msg': up_msg}) + LOG.error(msg) + return msg + + cluster, services = self._get_cluster_and_services_for_replication( + ctxt, host, cluster_name) + + expect = {field: expected_value} + change = {field: new_value} + + if cluster: + old_value = getattr(cluster, field) + if ((check_up and not cluster.is_up) + or not cluster.conditional_update(change, expect)): + msg = _error_msg(cluster.name) + raise exception.InvalidInput(reason=msg) + + changed = [] + not_changed = [] + for service in services: + if ((not check_up or service.is_up) + and service.conditional_update(change, expect)): + changed.append(service) + else: + not_changed.append(service) + + # If there were some services that couldn't be changed we should at + # least log the error. + if not_changed: + msg = _error_msg([s.host for s in not_changed]) + # If we couldn't change any of the services + if not changed: + # Undo the cluster change + if cluster: + setattr(cluster, field, old_value) + cluster.save() + raise exception.InvalidInput( + reason=_('No service could be changed: %s') % msg) + LOG.warning(_LW('Some services could not be changed: %s'), msg) + + return cluster, services + # FIXME(jdg): Move these Cheesecake methods (freeze, thaw and failover) # to a services API because that's what they are - def failover_host(self, - ctxt, - host, - secondary_id=None): - + def failover(self, ctxt, host, cluster_name, secondary_id=None): check_policy(ctxt, 'failover_host') ctxt = ctxt if ctxt.is_admin else ctxt.elevated() - svc_host = volume_utils.extract_host(host, 'backend') - service = objects.Service.get_by_args( - ctxt, svc_host, constants.VOLUME_BINARY) - expected = {'replication_status': [fields.ReplicationStatus.ENABLED, - fields.ReplicationStatus.FAILED_OVER]} - result = service.conditional_update( - {'replication_status': fields.ReplicationStatus.FAILING_OVER}, - expected) - if not result: - expected_status = utils.build_or_str( - expected['replication_status']) - msg = (_('Host replication_status must be %s to failover.') - % expected_status) - LOG.error(msg) - raise exception.InvalidInput(reason=msg) - self.volume_rpcapi.failover_host(ctxt, host, secondary_id) + # TODO(geguileo): In P - Remove this version check + rpc_version = self.volume_rpcapi.determine_rpc_version_cap() + rpc_version = versionutils.convert_version_to_tuple(rpc_version) + if cluster_name and rpc_version < (3, 5): + msg = _('replication operations with cluster field') + raise exception.UnavailableDuringUpgrade(action=msg) - def freeze_host(self, ctxt, host): + rep_fields = fields.ReplicationStatus + expected_values = [rep_fields.ENABLED, rep_fields.FAILED_OVER] + new_value = rep_fields.FAILING_OVER + cluster, services = self._replication_db_change( + ctxt, 'replication_status', expected_values, new_value, host, + cluster_name, check_up=True) + + self.volume_rpcapi.failover(ctxt, services[0], secondary_id) + + def freeze_host(self, ctxt, host, cluster_name): check_policy(ctxt, 'freeze_host') ctxt = ctxt if ctxt.is_admin else ctxt.elevated() - svc_host = volume_utils.extract_host(host, 'backend') - service = objects.Service.get_by_args( - ctxt, svc_host, constants.VOLUME_BINARY) - expected = {'frozen': False} - result = service.conditional_update( - {'frozen': True}, expected) - if not result: - msg = _('Host is already Frozen.') - LOG.error(msg) - raise exception.InvalidInput(reason=msg) + expected = False + new_value = True + cluster, services = self._replication_db_change( + ctxt, 'frozen', expected, new_value, host, cluster_name, + check_up=False) # Should we set service status to disabled to keep # scheduler calls from being sent? Just use existing # `cinder service-disable reason=freeze` - self.volume_rpcapi.freeze_host(ctxt, host) - - def thaw_host(self, ctxt, host): + self.volume_rpcapi.freeze_host(ctxt, services[0]) + def thaw_host(self, ctxt, host, cluster_name): check_policy(ctxt, 'thaw_host') ctxt = ctxt if ctxt.is_admin else ctxt.elevated() - svc_host = volume_utils.extract_host(host, 'backend') - service = objects.Service.get_by_args( - ctxt, svc_host, constants.VOLUME_BINARY) - expected = {'frozen': True} - result = service.conditional_update( - {'frozen': False}, expected) - if not result: - msg = _('Host is NOT Frozen.') - LOG.error(msg) - raise exception.InvalidInput(reason=msg) - if not self.volume_rpcapi.thaw_host(ctxt, host): + expected = True + new_value = False + cluster, services = self._replication_db_change( + ctxt, 'frozen', expected, new_value, host, cluster_name, + check_up=False) + + if not self.volume_rpcapi.thaw_host(ctxt, services[0]): return "Backend reported error during thaw_host operation." def check_volume_filters(self, filters, strict=False): diff --git a/cinder/volume/driver.py b/cinder/volume/driver.py index 8f722df56ad..c177dd080c1 100644 --- a/cinder/volume/driver.py +++ b/cinder/volume/driver.py @@ -335,6 +335,10 @@ class BaseVD(object): # the unsupported driver started. SUPPORTED = True + # Methods checked to detect a driver implements a replication feature + REPLICATION_FEATURE_CHECKERS = {'v2.1': 'failover_host', + 'a/a': 'failover_completed'} + def __init__(self, execute=utils.execute, *args, **kwargs): # NOTE(vish): db is set by Manager self.db = kwargs.get('db') @@ -347,6 +351,16 @@ class BaseVD(object): self.configuration.append_config_values(iser_opts) utils.setup_tracing(self.configuration.safe_get('trace_flags')) + # NOTE(geguileo): Don't allow to start if we are enabling + # replication on a cluster service with a backend that doesn't + # support the required mechanism for Active-Active. + replication_devices = self.configuration.safe_get( + 'replication_device') + if (self.cluster_name and replication_devices and + not self.supports_replication_feature('a/a')): + raise exception.Invalid(_("Driver doesn't support clustered " + "replication.")) + self.driver_utils = driver_utils.VolumeDriverUtils( self._driver_data_namespace(), self.db) @@ -1701,6 +1715,38 @@ class BaseVD(object): # 'replication_extended_status': 'whatever',...}},] raise NotImplementedError() + def failover(self, context, volumes, secondary_id=None): + """Like failover but for a host that is clustered. + + Most of the time this will be the exact same behavior as failover_host, + so if it's not overwritten, it is assumed to be the case. + """ + return self.failover_host(context, volumes, secondary_id) + + def failover_completed(self, context, active_backend_id=None): + """This method is called after failover for clustered backends.""" + raise NotImplementedError() + + @classmethod + def _is_base_method(cls, method_name): + method = getattr(cls, method_name) + return method.__module__ == getattr(BaseVD, method_name).__module__ + + @classmethod + def supports_replication_feature(cls, feature): + """Check if driver class supports replication features. + + Feature is a string that must be one of: + - v2.1 + - a/a + """ + if feature not in cls.REPLICATION_FEATURE_CHECKERS: + return False + + # Check if method is being implemented/overwritten by the driver + method_name = cls.REPLICATION_FEATURE_CHECKERS[feature] + return not cls._is_base_method(method_name) + def get_replication_updates(self, context): """Old replication update method, deprecate.""" raise NotImplementedError() diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 8e60862804b..d2e05929e63 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -4151,9 +4151,8 @@ class VolumeManager(manager.CleanableManager, volume.update(model_update_default) volume.save() - # Replication V2.1 methods - def failover_host(self, context, - secondary_backend_id=None): + # Replication V2.1 and a/a method + def failover(self, context, secondary_backend_id=None): """Failover a backend to a secondary replication target. Instructs a replication capable/configured backend to failover @@ -4167,30 +4166,33 @@ class VolumeManager(manager.CleanableManager, :param context: security context :param secondary_backend_id: Specifies backend_id to fail over to """ + updates = {} + repl_status = fields.ReplicationStatus + svc_host = vol_utils.extract_host(self.host, 'backend') + service = objects.Service.get_by_args(context, svc_host, + constants.VOLUME_BINARY) + volumes = self._get_my_volumes(context) - service = objects.Service.get_by_args( - context, - svc_host, - constants.VOLUME_BINARY) - volumes = objects.VolumeList.get_all_by_host(context, self.host) - - exception_encountered = False + exception_encountered = True try: + # For non clustered we can call v2.1 failover_host, but for + # clustered we call a/a failover method. We know a/a method + # exists because BaseVD class wouldn't have started if it didn't. + failover = getattr(self.driver, + 'failover' if service.is_clustered + else 'failover_host') # expected form of volume_update_list: # [{volume_id: , updates: {'provider_id': xxxx....}}, # {volume_id: , updates: {'provider_id': xxxx....}}] - (active_backend_id, volume_update_list) = ( - self.driver.failover_host( - context, - volumes, - secondary_id=secondary_backend_id)) + active_backend_id, volume_update_list = failover( + context, + volumes, + secondary_id=secondary_backend_id) + exception_encountered = False except exception.UnableToFailOver: LOG.exception(_LE("Failed to perform replication failover")) - service.replication_status = ( - fields.ReplicationStatus.FAILOVER_ERROR) - service.save() - exception_encountered = True + updates['replication_status'] = repl_status.FAILOVER_ERROR except exception.InvalidReplicationTarget: LOG.exception(_LE("Invalid replication target specified " "for failover")) @@ -4199,12 +4201,9 @@ class VolumeManager(manager.CleanableManager, # secondary to another secondary. In both cases active_backend_id # will be set. if service.active_backend_id: - service.replication_status = ( - fields.ReplicationStatus.FAILED_OVER) + updates['replication_status'] = repl_status.FAILED_OVER else: - service.replication_status = fields.ReplicationStatus.ENABLED - service.save() - exception_encountered = True + updates['replication_status'] = repl_status.ENABLED except exception.VolumeDriverException: # NOTE(jdg): Drivers need to be aware if they fail during # a failover sequence, we're expecting them to cleanup @@ -4212,36 +4211,29 @@ class VolumeManager(manager.CleanableManager, # backend is still set as primary as per driver memory LOG.error(_LE("Driver reported error during " "replication failover.")) - service.replication_status = ( - fields.ReplicationStatus.FAILOVER_ERROR) - service.disabled = True - service.save() - exception_encountered = True + updates.update(disabled=True, + replication_status=repl_status.FAILOVER_ERROR) if exception_encountered: LOG.error( _LE("Error encountered during failover on host: " "%(host)s invalid target ID %(backend_id)s"), {'host': self.host, 'backend_id': secondary_backend_id}) + self.finish_failover(context, service, updates) return if secondary_backend_id == "default": - service.replication_status = fields.ReplicationStatus.ENABLED - service.active_backend_id = "" - if service.frozen: - service.disabled = True - service.disabled_reason = "frozen" - else: - service.disabled = False - service.disabled_reason = "" - service.save() - + updates['replication_status'] = repl_status.ENABLED + updates['active_backend_id'] = '' + updates['disabled'] = service.frozen + updates['disabled_reason'] = 'frozen' if service.frozen else '' else: - service.replication_status = fields.ReplicationStatus.FAILED_OVER - service.active_backend_id = active_backend_id - service.disabled = True - service.disabled_reason = "failed-over" - service.save() + updates['replication_status'] = repl_status.FAILED_OVER + updates['active_backend_id'] = active_backend_id + updates['disabled'] = True + updates['disabled_reason'] = 'failed-over' + + self.finish_failover(context, service, updates) for update in volume_update_list: # Response must include an id key: {volume_id: } @@ -4259,6 +4251,53 @@ class VolumeManager(manager.CleanableManager, LOG.info(_LI("Failed over to replication target successfully.")) + # TODO(geguileo): In P - remove this + failover_host = failover + + def finish_failover(self, context, service, updates): + """Completion of the failover locally or via RPC.""" + # If the service is clustered, broadcast the service changes to all + # volume services, including this one. + if service.is_clustered: + # We have to update the cluster with the same data, and we do it + # before broadcasting the failover_completed RPC call to prevent + # races with services that may be starting.. + for key, value in updates.items(): + setattr(service.cluster, key, value) + service.cluster.save() + rpcapi = volume_rpcapi.VolumeAPI() + rpcapi.failover_completed(context, service, updates) + else: + service.update(updates) + service.save() + + def failover_completed(self, context, updates): + """Finalize failover of this backend. + + When a service is clustered and replicated the failover has 2 stages, + one that does the failover of the volumes and another that finalizes + the failover of the services themselves. + + This method takes care of the last part and is called from the service + doing the failover of the volumes after finished processing the + volumes. + """ + svc_host = vol_utils.extract_host(self.host, 'backend') + service = objects.Service.get_by_args(context, svc_host, + constants.VOLUME_BINARY) + service.update(updates) + try: + self.driver.failover_completed(context, service.active_backend_id) + except Exception: + msg = _('Driver reported error during replication failover ' + 'completion.') + LOG.exception(msg) + service.disabled = True + service.disabled_reason = msg + service.replication_status = ( + fields.ReplicationStatus.ERROR) + service.save() + def freeze_host(self, context): """Freeze management plane on this backend. diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index 3bb0dd7a692..f7c8135ff1e 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -122,9 +122,10 @@ class VolumeAPI(rpc.RPCAPI): of @backend suffixes in server names. 3.7 - Adds do_cleanup method to do volume cleanups from other nodes that we were doing in init_host. + 3.8 - Make failover_host cluster aware and add failover_completed. """ - RPC_API_VERSION = '3.7' + RPC_API_VERSION = '3.8' RPC_DEFAULT_VERSION = '3.0' TOPIC = constants.VOLUME_TOPIC BINARY = 'cinder-volume' @@ -310,21 +311,31 @@ class VolumeAPI(rpc.RPCAPI): new_volume=new_volume, volume_status=original_volume_status) - def freeze_host(self, ctxt, host): + def freeze_host(self, ctxt, service): """Set backend host to frozen.""" - cctxt = self._get_cctxt(host) + cctxt = self._get_cctxt(service.service_topic_queue) return cctxt.call(ctxt, 'freeze_host') - def thaw_host(self, ctxt, host): + def thaw_host(self, ctxt, service): """Clear the frozen setting on a backend host.""" - cctxt = self._get_cctxt(host) + cctxt = self._get_cctxt(service.service_topic_queue) return cctxt.call(ctxt, 'thaw_host') - def failover_host(self, ctxt, host, secondary_backend_id=None): - """Failover host to the specified backend_id (secondary).""" - cctxt = self._get_cctxt(host) - cctxt.cast(ctxt, 'failover_host', - secondary_backend_id=secondary_backend_id) + def failover(self, ctxt, service, secondary_backend_id=None): + """Failover host to the specified backend_id (secondary). """ + version = '3.8' + method = 'failover' + if not self.client.can_send_version(version): + version = '3.0' + method = 'failover_host' + cctxt = self._get_cctxt(service.service_topic_queue, version) + cctxt.cast(ctxt, method, secondary_backend_id=secondary_backend_id) + + def failover_completed(self, ctxt, service, updates): + """Complete failover on all services of the cluster.""" + cctxt = self._get_cctxt(service.service_topic_queue, '3.8', + fanout=True) + cctxt.cast(ctxt, 'failover_completed', updates=updates) def manage_existing_snapshot(self, ctxt, snapshot, ref, backend): cctxt = self._get_cctxt(backend)