diff --git a/cinder/api/contrib/admin_actions.py b/cinder/api/contrib/admin_actions.py index 46bb3fd8ac2..cb665d2875a 100644 --- a/cinder/api/contrib/admin_actions.py +++ b/cinder/api/contrib/admin_actions.py @@ -255,6 +255,85 @@ class VolumeAdminController(AdminController): new_volume, error) return {'save_volume_id': ret} + @wsgi.action('os-enable_replication') + def _enable_replication(self, req, id, body): + """Enable/Re-enable replication on replciation capable volume. + + Admin only method, used primarily for cases like disable/re-enable + replication proces on a replicated volume for maintenance or testing + """ + + context = req.environ['cinder.context'] + self.authorize(context, 'enable_replication') + try: + volume = self._get(context, id) + except exception.VolumeNotFound as e: + raise exc.HTTPNotFound(explanation=e.msg) + self.volume_api.enable_replication(context, volume) + return webob.Response(status_int=202) + + @wsgi.action('os-disable_replication') + def _disable_replication(self, req, id, body): + """Disable replication on replciation capable volume. + + Admin only method, used to instruct a backend to + disable replication process to a replicated volume. + """ + + context = req.environ['cinder.context'] + self.authorize(context, 'disable_replication') + try: + volume = self._get(context, id) + except exception.VolumeNotFound as e: + raise exc.HTTPNotFound(explanation=e.msg) + self.volume_api.disable_replication(context, volume) + return webob.Response(status_int=202) + + @wsgi.action('os-failover_replication') + def _failover_replication(self, req, id, body): + """Failover a replicating volume to it's secondary + + Admin only method, used to force a fail-over to + a replication target. Optional secondary param to + indicate what device to promote in case of multiple + replication targets. + """ + + context = req.environ['cinder.context'] + self.authorize(context, 'failover_replication') + try: + volume = self._get(context, id) + except exception.VolumeNotFound as e: + raise exc.HTTPNotFound(explanation=e.msg) + secondary = body['os-failover_replication'].get('secondary', None) + self.volume_api.failover_replication(context, volume, secondary) + return webob.Response(status_int=202) + + @wsgi.action('os-list_replication_targets') + def _list_replication_targets(self, req, id, body): + """Show replication targets for the specified host. + + Admin only method, used to display configured + replication target devices for the specified volume. + + """ + + # TODO(jdg): We'll want an equivalent type of command + # to querie a backend host (show configuration for a + # specified backend), but priority here is for + # a volume as it's likely to be more useful. + context = req.environ['cinder.context'] + self.authorize(context, 'list_replication_targets') + try: + volume = self._get(context, id) + except exception.VolumeNotFound as e: + raise exc.HTTPNotFound(explanation=e.msg) + + # Expected response is a dict is a dict with unkonwn + # keys. Should be of the form: + # {'volume_id': xx, 'replication_targets':[{k: v, k1: v1...}]} + return self.volume_api.list_replication_targets(context, volume) + class SnapshotAdminController(AdminController): """AdminController for Snapshots.""" diff --git a/cinder/tests/unit/policy.json b/cinder/tests/unit/policy.json index e656d06859f..0948d3dd0af 100644 --- a/cinder/tests/unit/policy.json +++ b/cinder/tests/unit/policy.json @@ -34,7 +34,10 @@ "volume:update_readonly_flag": "", "volume:retype": "", "volume:copy_volume_to_image": "", - + "volume:enable_replication": "rule:admin_api", + "volume:disable_replication": "rule:admin_api", + "volume:failover_replication": "rule:admin_api", + "volume:list_replication_targets": "rule:admin_api", "volume_extension:volume_admin_actions:reset_status": "rule:admin_api", "volume_extension:snapshot_admin_actions:reset_status": "rule:admin_api", "volume_extension:backup_admin_actions:reset_status": "rule:admin_api", diff --git a/cinder/tests/unit/test_volume.py b/cinder/tests/unit/test_volume.py index 07486b6277b..256c272090b 100644 --- a/cinder/tests/unit/test_volume.py +++ b/cinder/tests/unit/test_volume.py @@ -5849,6 +5849,61 @@ class GenericVolumeDriverTestCase(DriverTestCase): volume_file) self.assertEqual(i, backup_service.restore.call_count) + def test_enable_replication_invalid_state(self): + volume_api = cinder.volume.api.API() + ctxt = context.get_admin_context() + volume = tests_utils.create_volume(ctxt, + size=1, + host=CONF.host, + replication_status='enabled') + + self.assertRaises(exception.InvalidVolume, + volume_api.enable_replication, + ctxt, volume) + + def test_enable_replication(self): + volume_api = cinder.volume.api.API() + ctxt = context.get_admin_context() + + volume = tests_utils.create_volume(self.context, + size=1, + host=CONF.host, + replication_status='disabled') + with mock.patch.object(volume_rpcapi.VolumeAPI, + 'enable_replication') as mock_enable_rep: + volume_api.enable_replication(ctxt, volume) + self.assertTrue(mock_enable_rep.called) + + def test_disable_replication_invalid_state(self): + volume_api = cinder.volume.api.API() + ctxt = context.get_admin_context() + volume = tests_utils.create_volume(ctxt, + size=1, + host=CONF.host, + replication_status='invalid-state') + + self.assertRaises(exception.InvalidVolume, + volume_api.disable_replication, + ctxt, volume) + + def test_disable_replication(self): + volume_api = cinder.volume.api.API() + ctxt = context.get_admin_context() + + volume = tests_utils.create_volume(self.context, + size=1, + host=CONF.host, + replication_status='disabled') + + with mock.patch.object(volume_rpcapi.VolumeAPI, + 'disable_replication') as mock_disable_rep: + volume_api.disable_replication(ctxt, volume) + self.assertTrue(mock_disable_rep.called) + + volume['replication_status'] = 'enabled' + volume_api.disable_replication(ctxt, volume) + self.assertTrue(mock_disable_rep.called) + class LVMISCSIVolumeDriverTestCase(DriverTestCase): """Test case for VolumeDriver""" diff --git a/cinder/tests/unit/test_volume_utils.py b/cinder/tests/unit/test_volume_utils.py index 5502c33baf8..41b1a0add3f 100644 --- a/cinder/tests/unit/test_volume_utils.py +++ b/cinder/tests/unit/test_volume_utils.py @@ -794,3 +794,11 @@ class VolumeUtilsTestCase(test.TestCase): mock_db, 'volume-d8cd1fe') self.assertFalse(result) + + def test_convert_config_string_to_dict(self): + test_string = "{'key-1'='val-1' 'key-2'='val-2' 'key-3'='val-3'}" + expected_dict = {'key-1': 'val-1', 'key-2': 'val-2', 'key-3': 'val-3'} + + self.assertEqual( + expected_dict, + volume_utils.convert_config_string_to_dict(test_string)) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index c06895aed82..af64a635329 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -1505,6 +1505,121 @@ class API(base.Base): resource=vol_ref) return vol_ref + # Replication V2 methods ## + + # NOTE(jdg): It might be kinda silly to propogate the named + # args with defaults all the way down through rpc into manager + # but for now the consistency is useful, and there may be + # some usefulness in the future (direct calls in manager?) + + # NOTE(jdg): Relying solely on the volume-type quota mechanism + # need to consider looking at how we handle configured backends + # WRT quotas, do they count against normal quotas or not? For + # now they're a special resource, so no. + + @wrap_check_policy + def enable_replication(self, ctxt, volume): + + # NOTE(jdg): details like sync vs async + # and replica count are to be set via the + # volume-type and config files. + + # Get a fresh ref from db and check status + volume = self.db.volume_get(ctxt, volume['id']) + + # NOTE(jdg): Set a valid status as a var to minimize errors via typos + # also, use a list, we may want to add to it some day + + # TODO(jdg): Move these up to a global list for each call and ban the + # free form typing of states and state checks going forward + + # NOTE(jdg): There may be a need for some backends to allow this + # call to driver regardless of replication_status, most likely + # this indicates an issue with the driver, but might be useful + # cases to consider modifying this for in the future. + valid_rep_status = ['disabled'] + rep_status = volume.get('replication_status', valid_rep_status[0]) + + if rep_status not in valid_rep_status: + msg = (_("Invalid status to enable replication. " + "valid states are: %(valid_states)s, " + "current replication-state is: %(curr_state)s."), + {'valid_states': valid_rep_status, + 'curr_state': rep_status}) + + raise exception.InvalidVolume(reason=msg) + + vref = self.db.volume_update(ctxt, + volume['id'], + {'replication_status': 'enabling'}) + self.volume_rpcapi.enable_replication(ctxt, vref) + + @wrap_check_policy + def disable_replication(self, ctxt, volume): + + valid_disable_status = ['disabled', 'enabled'] + + # NOTE(jdg): Just use disabled here (item 1 in the list) this + # way if someone says disable_rep on a volume that's not being + # replicated we just say "ok, done" + rep_status = volume.get('replication_status', valid_disable_status[0]) + + if rep_status not in valid_disable_status: + msg = (_("Invalid status to disable replication. " + "valid states are: %(valid_states)s, " + "current replication-state is: %(curr_state)s."), + {'valid_states': valid_disable_status, + 'curr_state': rep_status}) + + raise exception.InvalidVolume(reason=msg) + + vref = self.db.volume_update(ctxt, + volume['id'], + {'replication_status': 'disabling'}) + + self.volume_rpcapi.disable_replication(ctxt, vref) + + @wrap_check_policy + def failover_replication(self, + ctxt, + volume, + secondary=None): + + # FIXME(jdg): What is the secondary argument? + # for managed secondaries that's easy; it's a host + # for others, it's tricky; will propose a format for + # secondaries that includes an ID/Name that can be + # used as a handle + valid_failover_status = ['enabled'] + rep_status = volume.get('replication_status', 'na') + + if rep_status not in valid_failover_status: + msg = (_("Invalid status to failover replication. " + "valid states are: %(valid_states)s, " + "current replication-state is: %(curr_state)s."), + {'valid_states': valid_failover_status, + 'curr_state': rep_status}) + + raise exception.InvalidVolume(reason=msg) + + vref = self.db.volume_update( + ctxt, + volume['id'], + {'replication_status': 'enabling_secondary'}) + + self.volume_rpcapi.failover_replication(ctxt, + vref, + secondary) + + @wrap_check_policy + def list_replication_targets(self, ctxt, volume): + + # NOTE(jdg): This collects info for the specified volume + # it is NOT an error if the volume is not being replicated + # also, would be worth having something at a backend/host + # level to show an admin how a backend is configured. + return self.volume_rpcapi.list_replication_targets(ctxt, volume) + class HostAPI(base.Base): def __init__(self): diff --git a/cinder/volume/driver.py b/cinder/volume/driver.py index 59983b3a527..7f6c8646e2a 100644 --- a/cinder/volume/driver.py +++ b/cinder/volume/driver.py @@ -221,6 +221,20 @@ volume_opts = [ help='List of options that control which trace info ' 'is written to the DEBUG log level to assist ' 'developers. Valid values are method and api.'), + cfg.BoolOpt('managed_replication_target', + default=True, + help='There are two types of target configurations ' + 'managed (replicate to another configured backend) ' + 'or unmanaged (replicate to a device not managed ' + 'by Cinder).'), + cfg.ListOpt('replication_devices', + default=None, + help="List of k/v pairs representing a replication target " + "for this backend device. For unmanaged the format " + "is: {'key-1'='val1' 'key-2'='val2'...},{...} " + "and for managed devices its simply a list of valid " + "configured backend_names that the driver supports " + "replicating to: backend-a,bakcend-b...") ] # for backward compatibility @@ -291,6 +305,7 @@ class BaseVD(object): self.configuration.append_config_values(volume_opts) self.configuration.append_config_values(iser_opts) utils.setup_tracing(self.configuration.safe_get('trace_flags')) + self.set_execute(execute) self._stats = {} @@ -1384,6 +1399,187 @@ class ManageableVD(object): pass +@six.add_metaclass(abc.ABCMeta) +class ReplicaV2VD(object): + """Cinder replication functionality. + + The Cinder replication functionality is set up primarily through + the use of volume-types in conjunction with the filter scheduler. + This requires: + 1. The driver reports "replication = True" in it's capabilities + 2. The cinder.conf file includes the valid_replication_devices section + + The driver configuration is expected to take one of the following two + forms, see devref replication docs for details. + + Note we provide cinder.volume.utils.convert_config_string_to_dict + to parse this out into a usable proper dictionary. + + """ + + @abc.abstractmethod + def replication_enable(self, context, volume): + """Enable replication on a replication capable volume. + + If the volume was created on a replication_enabled host this method + is used to re-enable replication for the volume. + + Primarily we only want this for testing/admin purposes. The idea + being that the bulk of the replication details are handled by the + type definition and the driver; however disable/enable(re-enable) is + provided for admins to test or do maintenance which is a + requirement by some cloud-providers. + + NOTE: This is intended as an ADMIN only call and is not + intended to be used by end-user to enable replication. We're + leaving that to volume-type info, this is for things like + maintenance or testing. + + + :param context: security context + :param volume: volume object returned by DB + :response: {replication_driver_data: vendor-data} DB update + + The replication_driver_data response is vendor unique, + data returned/used by the driver. It is expected that + the reponse from the driver is in the appropriate db update + format, in the form of a dict, where the vendor data is + stored under the key 'replication_driver_data' + + """ + + # TODO(jdg): Put a check in at API layer to verify the host is + # replication capable before even issuing this call (can just + # check against the volume-type for said volume as well) + + raise NotImplementedError() + + @abc.abstractmethod + def replication_disable(self, context, volume): + """Disable replication on the specified volume. + + If the specified volume is currently replication enabled, + this method can be used to disable the replciation process + on the backend. + + Note that we still send this call to a driver whos volume + may report replication-disabled already. We do this as a + safety mechanism to allow a driver to cleanup any mismatch + in state between Cinder and itself. + + This is intended as an ADMIN only call to allow for + maintenance and testing. If a driver receives this call + and the process fails for some reason the driver should + return a status update to "replication_status=disable_failed" + + :param context: security context + :param volume: volume object returned by DB + :response: {replication_driver_data: vendor-data} DB update + + The replication_driver_data response is vendor unique, + data returned/used by the driver. It is expected that + the reponse from the driver is in the appropriate db update + format, in the form of a dict, where the vendor data is + stored under the key 'replication_driver_data' + + """ + + raise NotImplementedError() + + @abc.abstractmethod + def replication_failover(self, context, volume, secondary): + """Force failover to a secondary replication target. + + Forces the failover action of a replicated volume to one of its + secondary/target devices. By default the choice of target devices + is left up to the driver. In particular we expect one way + replication here, but are providing a mechanism for 'n' way + if supported/configured. + + Currently we leave it up to the driver to figure out how/what + to do here. Rather than doing things like ID swaps, we instead + just let the driver figure out how/where to route things. + + In cases where we might want to drop a volume-service node and + the replication target is a configured cinder backend, we'll + just update the host column for the volume. + + Very important point here is that in the case of a succesful + failover, we want to update the replication_status of the + volume to "failed-over". This way there's an indication that + things worked as expected, and that it's evident that the volume + may no longer be replicating to another backend (primary burst + in to flames). This status will be set by the manager. + + :param context: security context + :param volume: volume object returned by DB + :param secondary: Specifies rep target to fail over to + :response: dict of udpates + + So the response would take the form: + {host: , + model_update: {standard_model_update_KVs}, + replication_driver_data: xxxxxxx} + + It is expected that the format of these responses are in a consumable + format to be used in a db.update call directly. + + Additionally we utilize exception catching to report back to the + manager when things went wrong and to inform the caller on how + to proceed. + + """ + + raise NotImplementedError() + + @abc.abstractmethod + def list_replication_targets(self, context, vref): + """Provide a means to obtain replication targets for a volume. + + This method is used to query a backend to get the current + replication config info for the specified volume. + + In the case of a volume that isn't being replicated, + the driver should return an empty list. + + + Example response for replicating to a managed backend: + {'volume_id': volume['id'], + 'targets':[{'type': 'managed', + 'backend_name': 'backend_name'}...] + + Example response for replicating to an unmanaged backend: + {'volume_id': volume['id'], + 'targets':[{'type': 'managed', + 'vendor-key-1': 'value-1'}...] + + NOTE: It's the responsibility of the driver to mask out any + passwords or sensitive information. Also the format of the + response allows mixed (managed/unmanaged) targets, even though + the first iteration does not support configuring the driver in + such a manner. + + """ + + raise NotImplementedError() + + @abc.abstractmethod + def get_replication_updates(self, context): + """Provide a means to obtain status updates from backend. + + Provides a concise update for backends to report any errors + or problems with replicating volumes. The intent is we only + return something here if there's an error or a problem, and to + notify where the backend thinks the volume is. + + param: context: context of caller (probably don't need) + returns: [{volid: n, status: ok|error,...}] + """ + # NOTE(jdg): flush this out with implementations so we all + # have something usable here + raise NotImplementedError() + + @six.add_metaclass(abc.ABCMeta) class ReplicaVD(object): @abc.abstractmethod @@ -1928,6 +2124,7 @@ class ISCSIDriver(VolumeDriver): data["driver_version"] = '1.0' data["storage_protocol"] = 'iSCSI' data["pools"] = [] + data["replication_enabled"] = False self._update_pools_and_stats(data) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 0edb0d410db..fa5a2d1eecd 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -189,7 +189,7 @@ def locked_snapshot_operation(f): class VolumeManager(manager.SchedulerDependentManager): """Manages attachable block storage devices.""" - RPC_API_VERSION = '1.26' + RPC_API_VERSION = '1.27' target = messaging.Target(version=RPC_API_VERSION) @@ -405,6 +405,10 @@ class VolumeManager(manager.SchedulerDependentManager): self.publish_service_capabilities(ctxt) # conditionally run replication status task + + # FIXME(jdg): This should go away or be handled differently + # if/when we're ready for V2 replication + stats = self.driver.get_volume_stats(refresh=True) if stats and stats.get('replication', False): @@ -413,6 +417,7 @@ class VolumeManager(manager.SchedulerDependentManager): self._update_replication_relationship_status(ctxt) self.add_periodic_task(run_replication_task) + LOG.info(_LI("Driver initialization completed successfully."), resource={'type': 'driver', 'id': self.driver.__class__.__name__}) @@ -1538,6 +1543,24 @@ class VolumeManager(manager.SchedulerDependentManager): # queue it to be sent to the Schedulers. self.update_service_capabilities(volume_stats) + if volume_stats.get('replication_enabled', False): + # replciation_status provides a concise update of + # replicating volumes and any error conditions + # detected by the driver. The intent is we don't + # expect/worry about updates so long as nothing + # changes, but if something goes wrong this is a + # handy mechanism to update the manager and the db + # possibly let the admin/user be notified + + # TODO(jdg): Refactor the check/update pieces to a + # helper method we can share + # We want to leverage some of the same update model + # that we have in the targets update call + + replication_updates = self.driver.get_replication_updates() + for update in replication_updates: + pass + def _append_volume_stats(self, vol_stats): pools = vol_stats.get('pools', None) if pools and isinstance(pools, list): @@ -2706,3 +2729,204 @@ class VolumeManager(manager.SchedulerDependentManager): for key in model_update.iterkeys()} self.db.volume_update(ctxt.elevated(), new_volume['id'], model_update_new) + + # Replication V2 methods + def enable_replication(self, context, volume): + """Enable replication on a replication capable volume. + + If the volume was created on a replication_enabled host this method + is used to enable replication for the volume. Primarily used for + testing and maintenance. + + :param context: security context + :param volume: volume object returned by DB + """ + + # NOTE(jdg): We're going to do fresh get from the DB and verify that + # we are in an expected state ('enabling') + volume = self.db.volume_get(context, volume['id']) + if volume['replication_status'] != 'enabling': + raise exception.InvalidVolume() + + try: + rep_driver_data = self.driver.replication_enable(context, + volume) + except exception.CinderException: + err_msg = (_("Enable replication for volume failed.")) + LOG.exception(err_msg, resource=volume) + raise exception.VolumeBackendAPIException(data=err_msg) + try: + if rep_driver_data: + volume = self.db.volume_update(context, + volume['id'], + rep_driver_data) + except exception.CinderException as ex: + LOG.exception(_LE("Driver replication data update failed."), + resource=volume) + raise exception.VolumeBackendAPIException(reason=ex) + self.db.volume_update(context, volume['id'], + {'replication_status': 'enabled'}) + + def disable_replication(self, context, volume): + """Disable replication on the specified volume. + + If the specified volume is currently replication enabled, + this method can be used to disable the replication process + on the backend. This method assumes that we checked + replication status in the API layer to ensure we should + send this call to the driver. + + :param context: security context + :param volume: volume object returned by DB + """ + + volume = self.db.volume_get(context, volume['id']) + if volume['replication_status'] != 'disabling': + raise exception.InvalidVolume() + + try: + rep_driver_data = self.driver.replication_disable(context, + volume) + except exception.CinderException: + err_msg = (_("Disable replication for volume failed.")) + LOG.exception(err_msg, resource=volume) + raise exception.VolumeBackendAPIException(data=err_msg) + try: + if rep_driver_data: + volume = self.db.volume_update(context, + volume['id'], + rep_driver_data) + except exception.CinderException as ex: + LOG.exception(_LE("Driver replication data update failed."), + resource=volume) + raise exception.VolumeBackendAPIException(reason=ex) + self.db.volume_update(context, + volume['id'], + {'replication_status': 'disabled'}) + + def failover_replication(self, context, volume, secondary=None): + """Force failover to a secondary replication target. + + Forces the failover action of a replicated volume to one of its + secondary/target devices. By default the choice of target devices + is left up to the driver. In particular we expect one way + replication here, but are providing a mechanism for 'n' way + if supported/configrued. + + Currently we leave it up to the driver to figure out how/what + to do here. Rather than doing things like ID swaps, we instead + just let the driver figure out how/where to route things. + + In cases where we might want to drop a volume-service node and + the replication target is a configured cinder backend, we'll + just update the host column for the volume. + + :param context: security context + :param volume: volume object returned by DB + :param secondary: Specifies rep target to fail over to + """ + try: + volume_updates = self.driver.replication_failover(context, + volume, + secondary) + + # volume_updates is a dict containing a report of relevant + # items based on the backend and how it operates or what it needs + # {'host': 'secondary-configured-cinder-backend', + # 'model_update': {'update-all-the-provider-info-etc'}, + # 'replication_driver_data': 'driver-specific-stuff-for-db'} + # Where 'host' is a valid cinder host string like + # 'foo@bar#baz' + # model_update and replication_driver_data are required + + except exception.CinderException: + + # FIXME(jdg): We need to create a few different exceptions here + # and handle each differently: + # 1. I couldn't failover, but the original setup is ok so proceed + # as if this were never called + # 2. I ran into a problem and I have no idea what state things + # are in, so set volume to error + # 3. I ran into a problem and a human needs to come fix me up + + err_msg = (_("Replication failover for volume failed.")) + LOG.exception(err_msg, resource=volume) + self.db.volume_update(context, + volume['id'], + {'replication_status': 'error'}) + raise exception.VolumeBackendAPIException(data=err_msg) + + # TODO(jdg): Come back and condense thes into a single update + update = {} + model_update = volume_updates.get('model_update', None) + driver_update = volume_updates.get('replication_driver_data', None) + host_update = volume_updates.get('host', None) + + if model_update: + update['model'] = model_update + if driver_update: + update['replication_driver_data'] = driver_update + if host_update: + update['host'] = host_update + + if update: + try: + volume = self.db.volume_update( + context, + volume['id'], + update) + + except exception.CinderException as ex: + LOG.exception(_LE("Driver replication data update failed."), + resource=volume) + raise exception.VolumeBackendAPIException(reason=ex) + + # NOTE(jdg): We're setting replication status to failed-over + # which indicates the volume is ok, things went as epected but + # we're likely not replicating any longer because... well we + # did a fail-over. In the case of admin brining primary + # back online he/she can use enable_replication to get this + # state set back to enabled. + + # Also, in the case of multiple targets, the driver can update + # status in the rep-status checks if it still has valid replication + # targets that the volume is being replicated to. + + self.db.volume_update(context, + volume['id'], + {'replication_status': 'failed-over'}) + + def list_replication_targets(self, context, volume): + """Provide a means to obtain replication targets for a volume. + + This method is used to query a backend to get the current + replication config info for the specified volume. + + In the case of a volume that isn't being replicated, + the driver should return an empty list. + + + Example response for replicating to a managed backend: + {'volume_id': volume['id'], + 'targets':[{'managed_host': 'backend_name'}...] + + Example response for replicating to an unmanaged backend: + {'volume_id': volume['id'], 'targets':[{'san_ip': '1.1.1.1', + 'san_login': 'admin'}, + ....]} + + NOTE: It's the responsibility of the driver to mask out any + passwords or sensitive information. + + """ + + try: + replication_targets = self.driver.list_replication_targets(context, + volume) + + except exception.CinderException: + err_msg = (_("Get replication targets failed.")) + LOG.exception(err_msg) + raise exception.VolumeBackendAPIException(data=err_msg) + + return replication_targets diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index e980b46a4f1..ab3c171803b 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -72,6 +72,7 @@ class VolumeAPI(object): 1.26 - Adds support for sending objects over RPC in create_consistencygroup(), create_consistencygroup_from_src(), update_consistencygroup() and delete_consistencygroup(). + 1.27 - Adds support for replication V2 """ BASE_RPC_API_VERSION = '1.0' @@ -81,7 +82,7 @@ class VolumeAPI(object): target = messaging.Target(topic=CONF.volume_topic, version=self.BASE_RPC_API_VERSION) serializer = objects_base.CinderObjectSerializer() - self.client = rpc.get_client(target, '1.26', serializer=serializer) + self.client = rpc.get_client(target, '1.27', serializer=serializer) def create_consistencygroup(self, ctxt, group, host): new_host = utils.extract_host(host) @@ -260,3 +261,29 @@ class VolumeAPI(object): volume=volume, new_volume=new_volume, volume_status=original_volume_status) + + def enable_replication(self, ctxt, volume): + new_host = utils.extract_host(volume['host']) + cctxt = self.client.prepare(server=new_host, version='1.27') + cctxt.cast(ctxt, 'enable_replication', volume=volume) + + def disable_replication(self, ctxt, volume): + new_host = utils.extract_host(volume['host']) + cctxt = self.client.prepare(server=new_host, version='1.27') + cctxt.cast(ctxt, 'disable_replication', + volume=volume) + + def failover_replication(self, + ctxt, + volume, + secondary=None): + new_host = utils.extract_host(volume['host']) + cctxt = self.client.prepare(server=new_host, version='1.27') + cctxt.cast(ctxt, 'failover_replication', + volume=volume, + secondary=secondary) + + def list_replication_targets(self, ctxt, volume): + new_host = utils.extract_host(volume['host']) + cctxt = self.client.prepare(server=new_host, version='1.27') + return cctxt.call(ctxt, 'list_replication_targets', volume=volume) diff --git a/cinder/volume/utils.py b/cinder/volume/utils.py index c2d297bbd9a..893cf3bef8b 100644 --- a/cinder/volume/utils.py +++ b/cinder/volume/utils.py @@ -15,6 +15,7 @@ """Volume-related Utilities and helpers.""" +import ast import math import re import uuid @@ -569,3 +570,27 @@ def check_already_managed_volume(db, vol_name): except (exception.VolumeNotFound, ValueError): return False return False + + +def convert_config_string_to_dict(config_string): + """Convert config file replication string to a dict. + + The only supported form is as follows: + "{'key-1'='val-1' 'key-2'='val-2'...}" + + :param config_string: Properly formatted string to convert to dict. + :response: dict of string values + """ + + resultant_dict = {} + + try: + st = config_string.replace("=", ":") + st = st.replace(" ", ", ") + resultant_dict = ast.literal_eval(st) + except Exception: + LOG.warning(_LW("Error encountered translating config_string: " + "%(config_string)s to dict"), + {'config_string': config_string}) + + return resultant_dict diff --git a/doc/source/devref/index.rst b/doc/source/devref/index.rst index 00509ebaeec..86e64b578dd 100644 --- a/doc/source/devref/index.rst +++ b/doc/source/devref/index.rst @@ -31,6 +31,7 @@ Programming HowTos and Tutorials addmethod.openstackapi drivers gmr + replication Background Concepts for Cinder diff --git a/doc/source/devref/replication.rst b/doc/source/devref/replication.rst new file mode 100644 index 00000000000..fd7fc41e342 --- /dev/null +++ b/doc/source/devref/replication.rst @@ -0,0 +1,166 @@ +Replication +============ + +How to implement replication features in a backend driver. + +For backend devices that offer replication features, Cinder +provides a common mechanism for exposing that functionality +on a volume per volume basis while still trying to allow +flexibility for the varying implementation and requirements +of all the different backend devices. + +Most of the configuration is done via the cinder.conf file +under the driver section and through the use of volume types. + +Config file examples +-------------------- + +The cinder.conf file is used to specify replication target +devices for a specific driver. There are two types of target +devices that can be configured: + + 1. Cinder Managed (represented by the volume-backend name) + 2. External devices (require vendor specific data to configure) + +NOTE that it is expected to be an error to have both managed and unmanaged replication +config variables set for a single driver. + +Cinder managed target device +----------------------------- + +In the case of a Cinder managed target device, we simply +use another Cinder configured backend as the replication +target. + +For example if we have two backend devices foo and biz that +can replicate to each other, we can set up backend biz as +a replication target for device foo using the following +config entries:: + + ..... + [driver-biz] + volume_driver=xxxx + volume_backend_name=biz + + [driver-foo] + volume_driver=xxxx + volume_backend_name=foo + managed_replication_target=True + replication_devices=volume_backend_name-1,volume_backend_name-2.... + +Notice that the only change from the usual driver configuration +section here is the addition of the replication_devices option. + + +Unmanaged target device +------------------------ + +In some cases the replication target device may not be a +configured Cinder backend. In this case it's the configured +drivers responsibility to route commands to the active device +and to update provider info to ensure the proper iSCSI targets +are being used. + +This type of config changes only slightly, and instead of using +a backend_name, it takes the vendor unique config options:: + + ..... + [driver-foo] + volume_driver=xxxx + volume_backend_name=foo + managed_replication_target=False + replication_devices={'key1'='val1' 'key2'='val2' ...}, + {'key7'='val7'....},... + +Note the key/value entries can be whatever the device requires, we treat the actual +variable in the config parser as a comma delimited list, the {} and = notations are +convenient/common parser delimeters, and the K/V entries are space seperated. + +We provide a literal evaluator to convert these entries into a proper dict, thus +format is extremely important here. + + +Volume Types / Extra Specs +--------------------------- +In order for a user to specify they'd like a replicated volume, there needs to be +a corresponding Volume Type created by the Cloud Administrator. + +There's a good deal of flexibility by using volume types. The scheduler can +send the create request to a backend that provides replication by simply +providing the replication=enabled key to the extra-specs of the volume type. + +For example, if the type was set to simply create the volume on any (or if you only had one) +backend that supports replication, the extra-specs entry would be:: + + {replication: enabled} + +If you needed to provide a specific backend device (multiple backends supporting replication):: + {replication: enabled, volume_backend_name: foo} + +Additionally you could provide additional details using scoped keys:: + {replication: enabled, volume_backend_name: foo, + replication:replication_type: async} + +Again, it's up to the driver to parse the volume type info on create and set things up +as requested. While the scoping key can be anything, it's strongly recommended that all +backends utilize the same key (replication) for consistency and to make things easier for +the Cloud Administrator. + +Capabilities reporting +---------------------- +The following entries are expected to be added to the stats/capabilities update for +replication configured devices:: + + stats["replication_enabled"] = True|False + stats["replication_type"] = ['async', 'sync'...] + stats["replication_count"] = len(self.cluster_pairs) + +Required methods +----------------- +The number of API methods associated with replication are intentionally very limited, and are +Admin only methods. + +They include:: + replication_enable(self, context, volume) + replication_disable(self, context, volume) + replication_failover(self, context, volume) + list_replication_targets(self, context) + +**replication_enable** + +Used to notify the driver that we would like to enable replication on a replication capable volume. +NOTE this is NOT used as the initial create replication command, that's handled by the volume-type at +create time. This is provided as a method for an Admin that may have needed to disable replication +on a volume for maintenance or whatever reason to signify that they'd like to "resume" replication on +the given volume. + +**replication_disable** + +Used to notify the driver that we would like to disable replication on a replication capable volume. +This again would be used by a Cloud Administrator for things like maintenance etc. + +**replication_failover** + +Used to instruct the backend to fail over to the secondary/target device on a replication capable volume. +This may be used for triggering a fail-over manually or for testing purposes. + +Note that ideally drivers will know how to update the volume reference properly so that Cinder is now +pointing to the secondary. Also, while it's not required, at this time; ideally the command would +act as a toggle, allowing to switch back and forth betweeen primary and secondary and back to primary. + +**list_replication_targets** + +Used by the admin to query a volume for a list of configured replication targets +The expected return for this call is expeceted to mimic the form used in the config file. + +For a volume replicating to managed replication targets:: + + {'volume_id': volume['id'], 'targets':[{'type': 'managed', + 'backend_name': 'backend_name'}...] + +For a volume replicating to external/unmanaged targets:: + + {'volume_id': volume['id'], 'targets':[{'type': 'unmanaged', + 'san_ip': '127.0.0.1', + 'san_login': 'admin'...}...] + diff --git a/etc/cinder/policy.json b/etc/cinder/policy.json index 5a520c528bd..7bbe497537e 100644 --- a/etc/cinder/policy.json +++ b/etc/cinder/policy.json @@ -64,6 +64,11 @@ "volume_extension:replication:promote": "rule:admin_api", "volume_extension:replication:reenable": "rule:admin_api", + "volume:enable_replication": "rule:admin_api", + "volume:disable_replication": "rule:admin_api", + "volume:failover_replication": "rule:admin_api", + "volume:list_replication_targets": "rule:admin_api", + "backup:create" : "", "backup:delete": "", "backup:get": "",