Cinder replication V2

This adds a scaled back replication implementation
that leaves the bulk of the work up to the driver.
We just provide basic admin API methods to do things
like enable/disable and fail-over.  Set up and
specification of replication targets for a specific
back end are now intended to be part of the cinder.conf
in the driver section itself.

Replication targets are configured via the cinder.conf
file in their associated driver section.  See the devref
doc included in this commit for details on the format.

The next step in configuration is to create a volume-type
with replication info in the extra-specs.
    extra-specs = replication=enable, volume_backend_name=foo

This instructs the driver to utilize replication, default is
up to the driver but would suggest single way rep and in the case
of multiple targets, driver could choose or have a default.

If the back end doesn't report replication=enabled in it's stats
updates the scheduler will fail to place the volume due to invalid
host, or no hosts available.

Vendors can easily modify extra-specs or their own config settings
to modify this behavior, any vendor-unique adaptation can be
provided through the use of scoped keys.  Suggested examples
will be published in docs.

See doc/source/devref/replication.rst for more info

Implements BP: replication-v2
DocImpact

Change-Id: I406390e4d5f3c9947df1c4f2de68821e0fd7f75b
This commit is contained in:
John Griffith 2015-07-09 21:11:54 +00:00
parent ec127646d1
commit fe538dc63d
12 changed files with 908 additions and 3 deletions

View File

@ -255,6 +255,85 @@ class VolumeAdminController(AdminController):
new_volume, error) new_volume, error)
return {'save_volume_id': ret} return {'save_volume_id': ret}
@wsgi.action('os-enable_replication')
def _enable_replication(self, req, id, body):
"""Enable/Re-enable replication on replciation capable volume.
Admin only method, used primarily for cases like disable/re-enable
replication proces on a replicated volume for maintenance or testing
"""
context = req.environ['cinder.context']
self.authorize(context, 'enable_replication')
try:
volume = self._get(context, id)
except exception.VolumeNotFound as e:
raise exc.HTTPNotFound(explanation=e.msg)
self.volume_api.enable_replication(context, volume)
return webob.Response(status_int=202)
@wsgi.action('os-disable_replication')
def _disable_replication(self, req, id, body):
"""Disable replication on replciation capable volume.
Admin only method, used to instruct a backend to
disable replication process to a replicated volume.
"""
context = req.environ['cinder.context']
self.authorize(context, 'disable_replication')
try:
volume = self._get(context, id)
except exception.VolumeNotFound as e:
raise exc.HTTPNotFound(explanation=e.msg)
self.volume_api.disable_replication(context, volume)
return webob.Response(status_int=202)
@wsgi.action('os-failover_replication')
def _failover_replication(self, req, id, body):
"""Failover a replicating volume to it's secondary
Admin only method, used to force a fail-over to
a replication target. Optional secondary param to
indicate what device to promote in case of multiple
replication targets.
"""
context = req.environ['cinder.context']
self.authorize(context, 'failover_replication')
try:
volume = self._get(context, id)
except exception.VolumeNotFound as e:
raise exc.HTTPNotFound(explanation=e.msg)
secondary = body['os-failover_replication'].get('secondary', None)
self.volume_api.failover_replication(context, volume, secondary)
return webob.Response(status_int=202)
@wsgi.action('os-list_replication_targets')
def _list_replication_targets(self, req, id, body):
"""Show replication targets for the specified host.
Admin only method, used to display configured
replication target devices for the specified volume.
"""
# TODO(jdg): We'll want an equivalent type of command
# to querie a backend host (show configuration for a
# specified backend), but priority here is for
# a volume as it's likely to be more useful.
context = req.environ['cinder.context']
self.authorize(context, 'list_replication_targets')
try:
volume = self._get(context, id)
except exception.VolumeNotFound as e:
raise exc.HTTPNotFound(explanation=e.msg)
# Expected response is a dict is a dict with unkonwn
# keys. Should be of the form:
# {'volume_id': xx, 'replication_targets':[{k: v, k1: v1...}]}
return self.volume_api.list_replication_targets(context, volume)
class SnapshotAdminController(AdminController): class SnapshotAdminController(AdminController):
"""AdminController for Snapshots.""" """AdminController for Snapshots."""

View File

@ -34,7 +34,10 @@
"volume:update_readonly_flag": "", "volume:update_readonly_flag": "",
"volume:retype": "", "volume:retype": "",
"volume:copy_volume_to_image": "", "volume:copy_volume_to_image": "",
"volume:enable_replication": "rule:admin_api",
"volume:disable_replication": "rule:admin_api",
"volume:failover_replication": "rule:admin_api",
"volume:list_replication_targets": "rule:admin_api",
"volume_extension:volume_admin_actions:reset_status": "rule:admin_api", "volume_extension:volume_admin_actions:reset_status": "rule:admin_api",
"volume_extension:snapshot_admin_actions:reset_status": "rule:admin_api", "volume_extension:snapshot_admin_actions:reset_status": "rule:admin_api",
"volume_extension:backup_admin_actions:reset_status": "rule:admin_api", "volume_extension:backup_admin_actions:reset_status": "rule:admin_api",

View File

@ -5849,6 +5849,61 @@ class GenericVolumeDriverTestCase(DriverTestCase):
volume_file) volume_file)
self.assertEqual(i, backup_service.restore.call_count) self.assertEqual(i, backup_service.restore.call_count)
def test_enable_replication_invalid_state(self):
volume_api = cinder.volume.api.API()
ctxt = context.get_admin_context()
volume = tests_utils.create_volume(ctxt,
size=1,
host=CONF.host,
replication_status='enabled')
self.assertRaises(exception.InvalidVolume,
volume_api.enable_replication,
ctxt, volume)
def test_enable_replication(self):
volume_api = cinder.volume.api.API()
ctxt = context.get_admin_context()
volume = tests_utils.create_volume(self.context,
size=1,
host=CONF.host,
replication_status='disabled')
with mock.patch.object(volume_rpcapi.VolumeAPI,
'enable_replication') as mock_enable_rep:
volume_api.enable_replication(ctxt, volume)
self.assertTrue(mock_enable_rep.called)
def test_disable_replication_invalid_state(self):
volume_api = cinder.volume.api.API()
ctxt = context.get_admin_context()
volume = tests_utils.create_volume(ctxt,
size=1,
host=CONF.host,
replication_status='invalid-state')
self.assertRaises(exception.InvalidVolume,
volume_api.disable_replication,
ctxt, volume)
def test_disable_replication(self):
volume_api = cinder.volume.api.API()
ctxt = context.get_admin_context()
volume = tests_utils.create_volume(self.context,
size=1,
host=CONF.host,
replication_status='disabled')
with mock.patch.object(volume_rpcapi.VolumeAPI,
'disable_replication') as mock_disable_rep:
volume_api.disable_replication(ctxt, volume)
self.assertTrue(mock_disable_rep.called)
volume['replication_status'] = 'enabled'
volume_api.disable_replication(ctxt, volume)
self.assertTrue(mock_disable_rep.called)
class LVMISCSIVolumeDriverTestCase(DriverTestCase): class LVMISCSIVolumeDriverTestCase(DriverTestCase):
"""Test case for VolumeDriver""" """Test case for VolumeDriver"""

View File

@ -794,3 +794,11 @@ class VolumeUtilsTestCase(test.TestCase):
mock_db, 'volume-d8cd1fe') mock_db, 'volume-d8cd1fe')
self.assertFalse(result) self.assertFalse(result)
def test_convert_config_string_to_dict(self):
test_string = "{'key-1'='val-1' 'key-2'='val-2' 'key-3'='val-3'}"
expected_dict = {'key-1': 'val-1', 'key-2': 'val-2', 'key-3': 'val-3'}
self.assertEqual(
expected_dict,
volume_utils.convert_config_string_to_dict(test_string))

View File

@ -1505,6 +1505,121 @@ class API(base.Base):
resource=vol_ref) resource=vol_ref)
return vol_ref return vol_ref
# Replication V2 methods ##
# NOTE(jdg): It might be kinda silly to propogate the named
# args with defaults all the way down through rpc into manager
# but for now the consistency is useful, and there may be
# some usefulness in the future (direct calls in manager?)
# NOTE(jdg): Relying solely on the volume-type quota mechanism
# need to consider looking at how we handle configured backends
# WRT quotas, do they count against normal quotas or not? For
# now they're a special resource, so no.
@wrap_check_policy
def enable_replication(self, ctxt, volume):
# NOTE(jdg): details like sync vs async
# and replica count are to be set via the
# volume-type and config files.
# Get a fresh ref from db and check status
volume = self.db.volume_get(ctxt, volume['id'])
# NOTE(jdg): Set a valid status as a var to minimize errors via typos
# also, use a list, we may want to add to it some day
# TODO(jdg): Move these up to a global list for each call and ban the
# free form typing of states and state checks going forward
# NOTE(jdg): There may be a need for some backends to allow this
# call to driver regardless of replication_status, most likely
# this indicates an issue with the driver, but might be useful
# cases to consider modifying this for in the future.
valid_rep_status = ['disabled']
rep_status = volume.get('replication_status', valid_rep_status[0])
if rep_status not in valid_rep_status:
msg = (_("Invalid status to enable replication. "
"valid states are: %(valid_states)s, "
"current replication-state is: %(curr_state)s."),
{'valid_states': valid_rep_status,
'curr_state': rep_status})
raise exception.InvalidVolume(reason=msg)
vref = self.db.volume_update(ctxt,
volume['id'],
{'replication_status': 'enabling'})
self.volume_rpcapi.enable_replication(ctxt, vref)
@wrap_check_policy
def disable_replication(self, ctxt, volume):
valid_disable_status = ['disabled', 'enabled']
# NOTE(jdg): Just use disabled here (item 1 in the list) this
# way if someone says disable_rep on a volume that's not being
# replicated we just say "ok, done"
rep_status = volume.get('replication_status', valid_disable_status[0])
if rep_status not in valid_disable_status:
msg = (_("Invalid status to disable replication. "
"valid states are: %(valid_states)s, "
"current replication-state is: %(curr_state)s."),
{'valid_states': valid_disable_status,
'curr_state': rep_status})
raise exception.InvalidVolume(reason=msg)
vref = self.db.volume_update(ctxt,
volume['id'],
{'replication_status': 'disabling'})
self.volume_rpcapi.disable_replication(ctxt, vref)
@wrap_check_policy
def failover_replication(self,
ctxt,
volume,
secondary=None):
# FIXME(jdg): What is the secondary argument?
# for managed secondaries that's easy; it's a host
# for others, it's tricky; will propose a format for
# secondaries that includes an ID/Name that can be
# used as a handle
valid_failover_status = ['enabled']
rep_status = volume.get('replication_status', 'na')
if rep_status not in valid_failover_status:
msg = (_("Invalid status to failover replication. "
"valid states are: %(valid_states)s, "
"current replication-state is: %(curr_state)s."),
{'valid_states': valid_failover_status,
'curr_state': rep_status})
raise exception.InvalidVolume(reason=msg)
vref = self.db.volume_update(
ctxt,
volume['id'],
{'replication_status': 'enabling_secondary'})
self.volume_rpcapi.failover_replication(ctxt,
vref,
secondary)
@wrap_check_policy
def list_replication_targets(self, ctxt, volume):
# NOTE(jdg): This collects info for the specified volume
# it is NOT an error if the volume is not being replicated
# also, would be worth having something at a backend/host
# level to show an admin how a backend is configured.
return self.volume_rpcapi.list_replication_targets(ctxt, volume)
class HostAPI(base.Base): class HostAPI(base.Base):
def __init__(self): def __init__(self):

View File

@ -221,6 +221,20 @@ volume_opts = [
help='List of options that control which trace info ' help='List of options that control which trace info '
'is written to the DEBUG log level to assist ' 'is written to the DEBUG log level to assist '
'developers. Valid values are method and api.'), 'developers. Valid values are method and api.'),
cfg.BoolOpt('managed_replication_target',
default=True,
help='There are two types of target configurations '
'managed (replicate to another configured backend) '
'or unmanaged (replicate to a device not managed '
'by Cinder).'),
cfg.ListOpt('replication_devices',
default=None,
help="List of k/v pairs representing a replication target "
"for this backend device. For unmanaged the format "
"is: {'key-1'='val1' 'key-2'='val2'...},{...} "
"and for managed devices its simply a list of valid "
"configured backend_names that the driver supports "
"replicating to: backend-a,bakcend-b...")
] ]
# for backward compatibility # for backward compatibility
@ -291,6 +305,7 @@ class BaseVD(object):
self.configuration.append_config_values(volume_opts) self.configuration.append_config_values(volume_opts)
self.configuration.append_config_values(iser_opts) self.configuration.append_config_values(iser_opts)
utils.setup_tracing(self.configuration.safe_get('trace_flags')) utils.setup_tracing(self.configuration.safe_get('trace_flags'))
self.set_execute(execute) self.set_execute(execute)
self._stats = {} self._stats = {}
@ -1384,6 +1399,187 @@ class ManageableVD(object):
pass pass
@six.add_metaclass(abc.ABCMeta)
class ReplicaV2VD(object):
"""Cinder replication functionality.
The Cinder replication functionality is set up primarily through
the use of volume-types in conjunction with the filter scheduler.
This requires:
1. The driver reports "replication = True" in it's capabilities
2. The cinder.conf file includes the valid_replication_devices section
The driver configuration is expected to take one of the following two
forms, see devref replication docs for details.
Note we provide cinder.volume.utils.convert_config_string_to_dict
to parse this out into a usable proper dictionary.
"""
@abc.abstractmethod
def replication_enable(self, context, volume):
"""Enable replication on a replication capable volume.
If the volume was created on a replication_enabled host this method
is used to re-enable replication for the volume.
Primarily we only want this for testing/admin purposes. The idea
being that the bulk of the replication details are handled by the
type definition and the driver; however disable/enable(re-enable) is
provided for admins to test or do maintenance which is a
requirement by some cloud-providers.
NOTE: This is intended as an ADMIN only call and is not
intended to be used by end-user to enable replication. We're
leaving that to volume-type info, this is for things like
maintenance or testing.
:param context: security context
:param volume: volume object returned by DB
:response: {replication_driver_data: vendor-data} DB update
The replication_driver_data response is vendor unique,
data returned/used by the driver. It is expected that
the reponse from the driver is in the appropriate db update
format, in the form of a dict, where the vendor data is
stored under the key 'replication_driver_data'
"""
# TODO(jdg): Put a check in at API layer to verify the host is
# replication capable before even issuing this call (can just
# check against the volume-type for said volume as well)
raise NotImplementedError()
@abc.abstractmethod
def replication_disable(self, context, volume):
"""Disable replication on the specified volume.
If the specified volume is currently replication enabled,
this method can be used to disable the replciation process
on the backend.
Note that we still send this call to a driver whos volume
may report replication-disabled already. We do this as a
safety mechanism to allow a driver to cleanup any mismatch
in state between Cinder and itself.
This is intended as an ADMIN only call to allow for
maintenance and testing. If a driver receives this call
and the process fails for some reason the driver should
return a status update to "replication_status=disable_failed"
:param context: security context
:param volume: volume object returned by DB
:response: {replication_driver_data: vendor-data} DB update
The replication_driver_data response is vendor unique,
data returned/used by the driver. It is expected that
the reponse from the driver is in the appropriate db update
format, in the form of a dict, where the vendor data is
stored under the key 'replication_driver_data'
"""
raise NotImplementedError()
@abc.abstractmethod
def replication_failover(self, context, volume, secondary):
"""Force failover to a secondary replication target.
Forces the failover action of a replicated volume to one of its
secondary/target devices. By default the choice of target devices
is left up to the driver. In particular we expect one way
replication here, but are providing a mechanism for 'n' way
if supported/configured.
Currently we leave it up to the driver to figure out how/what
to do here. Rather than doing things like ID swaps, we instead
just let the driver figure out how/where to route things.
In cases where we might want to drop a volume-service node and
the replication target is a configured cinder backend, we'll
just update the host column for the volume.
Very important point here is that in the case of a succesful
failover, we want to update the replication_status of the
volume to "failed-over". This way there's an indication that
things worked as expected, and that it's evident that the volume
may no longer be replicating to another backend (primary burst
in to flames). This status will be set by the manager.
:param context: security context
:param volume: volume object returned by DB
:param secondary: Specifies rep target to fail over to
:response: dict of udpates
So the response would take the form:
{host: <properly formatted host string for db update>,
model_update: {standard_model_update_KVs},
replication_driver_data: xxxxxxx}
It is expected that the format of these responses are in a consumable
format to be used in a db.update call directly.
Additionally we utilize exception catching to report back to the
manager when things went wrong and to inform the caller on how
to proceed.
"""
raise NotImplementedError()
@abc.abstractmethod
def list_replication_targets(self, context, vref):
"""Provide a means to obtain replication targets for a volume.
This method is used to query a backend to get the current
replication config info for the specified volume.
In the case of a volume that isn't being replicated,
the driver should return an empty list.
Example response for replicating to a managed backend:
{'volume_id': volume['id'],
'targets':[{'type': 'managed',
'backend_name': 'backend_name'}...]
Example response for replicating to an unmanaged backend:
{'volume_id': volume['id'],
'targets':[{'type': 'managed',
'vendor-key-1': 'value-1'}...]
NOTE: It's the responsibility of the driver to mask out any
passwords or sensitive information. Also the format of the
response allows mixed (managed/unmanaged) targets, even though
the first iteration does not support configuring the driver in
such a manner.
"""
raise NotImplementedError()
@abc.abstractmethod
def get_replication_updates(self, context):
"""Provide a means to obtain status updates from backend.
Provides a concise update for backends to report any errors
or problems with replicating volumes. The intent is we only
return something here if there's an error or a problem, and to
notify where the backend thinks the volume is.
param: context: context of caller (probably don't need)
returns: [{volid: n, status: ok|error,...}]
"""
# NOTE(jdg): flush this out with implementations so we all
# have something usable here
raise NotImplementedError()
@six.add_metaclass(abc.ABCMeta) @six.add_metaclass(abc.ABCMeta)
class ReplicaVD(object): class ReplicaVD(object):
@abc.abstractmethod @abc.abstractmethod
@ -1928,6 +2124,7 @@ class ISCSIDriver(VolumeDriver):
data["driver_version"] = '1.0' data["driver_version"] = '1.0'
data["storage_protocol"] = 'iSCSI' data["storage_protocol"] = 'iSCSI'
data["pools"] = [] data["pools"] = []
data["replication_enabled"] = False
self._update_pools_and_stats(data) self._update_pools_and_stats(data)

View File

@ -189,7 +189,7 @@ def locked_snapshot_operation(f):
class VolumeManager(manager.SchedulerDependentManager): class VolumeManager(manager.SchedulerDependentManager):
"""Manages attachable block storage devices.""" """Manages attachable block storage devices."""
RPC_API_VERSION = '1.26' RPC_API_VERSION = '1.27'
target = messaging.Target(version=RPC_API_VERSION) target = messaging.Target(version=RPC_API_VERSION)
@ -405,6 +405,10 @@ class VolumeManager(manager.SchedulerDependentManager):
self.publish_service_capabilities(ctxt) self.publish_service_capabilities(ctxt)
# conditionally run replication status task # conditionally run replication status task
# FIXME(jdg): This should go away or be handled differently
# if/when we're ready for V2 replication
stats = self.driver.get_volume_stats(refresh=True) stats = self.driver.get_volume_stats(refresh=True)
if stats and stats.get('replication', False): if stats and stats.get('replication', False):
@ -413,6 +417,7 @@ class VolumeManager(manager.SchedulerDependentManager):
self._update_replication_relationship_status(ctxt) self._update_replication_relationship_status(ctxt)
self.add_periodic_task(run_replication_task) self.add_periodic_task(run_replication_task)
LOG.info(_LI("Driver initialization completed successfully."), LOG.info(_LI("Driver initialization completed successfully."),
resource={'type': 'driver', resource={'type': 'driver',
'id': self.driver.__class__.__name__}) 'id': self.driver.__class__.__name__})
@ -1538,6 +1543,24 @@ class VolumeManager(manager.SchedulerDependentManager):
# queue it to be sent to the Schedulers. # queue it to be sent to the Schedulers.
self.update_service_capabilities(volume_stats) self.update_service_capabilities(volume_stats)
if volume_stats.get('replication_enabled', False):
# replciation_status provides a concise update of
# replicating volumes and any error conditions
# detected by the driver. The intent is we don't
# expect/worry about updates so long as nothing
# changes, but if something goes wrong this is a
# handy mechanism to update the manager and the db
# possibly let the admin/user be notified
# TODO(jdg): Refactor the check/update pieces to a
# helper method we can share
# We want to leverage some of the same update model
# that we have in the targets update call
replication_updates = self.driver.get_replication_updates()
for update in replication_updates:
pass
def _append_volume_stats(self, vol_stats): def _append_volume_stats(self, vol_stats):
pools = vol_stats.get('pools', None) pools = vol_stats.get('pools', None)
if pools and isinstance(pools, list): if pools and isinstance(pools, list):
@ -2706,3 +2729,204 @@ class VolumeManager(manager.SchedulerDependentManager):
for key in model_update.iterkeys()} for key in model_update.iterkeys()}
self.db.volume_update(ctxt.elevated(), new_volume['id'], self.db.volume_update(ctxt.elevated(), new_volume['id'],
model_update_new) model_update_new)
# Replication V2 methods
def enable_replication(self, context, volume):
"""Enable replication on a replication capable volume.
If the volume was created on a replication_enabled host this method
is used to enable replication for the volume. Primarily used for
testing and maintenance.
:param context: security context
:param volume: volume object returned by DB
"""
# NOTE(jdg): We're going to do fresh get from the DB and verify that
# we are in an expected state ('enabling')
volume = self.db.volume_get(context, volume['id'])
if volume['replication_status'] != 'enabling':
raise exception.InvalidVolume()
try:
rep_driver_data = self.driver.replication_enable(context,
volume)
except exception.CinderException:
err_msg = (_("Enable replication for volume failed."))
LOG.exception(err_msg, resource=volume)
raise exception.VolumeBackendAPIException(data=err_msg)
try:
if rep_driver_data:
volume = self.db.volume_update(context,
volume['id'],
rep_driver_data)
except exception.CinderException as ex:
LOG.exception(_LE("Driver replication data update failed."),
resource=volume)
raise exception.VolumeBackendAPIException(reason=ex)
self.db.volume_update(context, volume['id'],
{'replication_status': 'enabled'})
def disable_replication(self, context, volume):
"""Disable replication on the specified volume.
If the specified volume is currently replication enabled,
this method can be used to disable the replication process
on the backend. This method assumes that we checked
replication status in the API layer to ensure we should
send this call to the driver.
:param context: security context
:param volume: volume object returned by DB
"""
volume = self.db.volume_get(context, volume['id'])
if volume['replication_status'] != 'disabling':
raise exception.InvalidVolume()
try:
rep_driver_data = self.driver.replication_disable(context,
volume)
except exception.CinderException:
err_msg = (_("Disable replication for volume failed."))
LOG.exception(err_msg, resource=volume)
raise exception.VolumeBackendAPIException(data=err_msg)
try:
if rep_driver_data:
volume = self.db.volume_update(context,
volume['id'],
rep_driver_data)
except exception.CinderException as ex:
LOG.exception(_LE("Driver replication data update failed."),
resource=volume)
raise exception.VolumeBackendAPIException(reason=ex)
self.db.volume_update(context,
volume['id'],
{'replication_status': 'disabled'})
def failover_replication(self, context, volume, secondary=None):
"""Force failover to a secondary replication target.
Forces the failover action of a replicated volume to one of its
secondary/target devices. By default the choice of target devices
is left up to the driver. In particular we expect one way
replication here, but are providing a mechanism for 'n' way
if supported/configrued.
Currently we leave it up to the driver to figure out how/what
to do here. Rather than doing things like ID swaps, we instead
just let the driver figure out how/where to route things.
In cases where we might want to drop a volume-service node and
the replication target is a configured cinder backend, we'll
just update the host column for the volume.
:param context: security context
:param volume: volume object returned by DB
:param secondary: Specifies rep target to fail over to
"""
try:
volume_updates = self.driver.replication_failover(context,
volume,
secondary)
# volume_updates is a dict containing a report of relevant
# items based on the backend and how it operates or what it needs
# {'host': 'secondary-configured-cinder-backend',
# 'model_update': {'update-all-the-provider-info-etc'},
# 'replication_driver_data': 'driver-specific-stuff-for-db'}
# Where 'host' is a valid cinder host string like
# 'foo@bar#baz'
# model_update and replication_driver_data are required
except exception.CinderException:
# FIXME(jdg): We need to create a few different exceptions here
# and handle each differently:
# 1. I couldn't failover, but the original setup is ok so proceed
# as if this were never called
# 2. I ran into a problem and I have no idea what state things
# are in, so set volume to error
# 3. I ran into a problem and a human needs to come fix me up
err_msg = (_("Replication failover for volume failed."))
LOG.exception(err_msg, resource=volume)
self.db.volume_update(context,
volume['id'],
{'replication_status': 'error'})
raise exception.VolumeBackendAPIException(data=err_msg)
# TODO(jdg): Come back and condense thes into a single update
update = {}
model_update = volume_updates.get('model_update', None)
driver_update = volume_updates.get('replication_driver_data', None)
host_update = volume_updates.get('host', None)
if model_update:
update['model'] = model_update
if driver_update:
update['replication_driver_data'] = driver_update
if host_update:
update['host'] = host_update
if update:
try:
volume = self.db.volume_update(
context,
volume['id'],
update)
except exception.CinderException as ex:
LOG.exception(_LE("Driver replication data update failed."),
resource=volume)
raise exception.VolumeBackendAPIException(reason=ex)
# NOTE(jdg): We're setting replication status to failed-over
# which indicates the volume is ok, things went as epected but
# we're likely not replicating any longer because... well we
# did a fail-over. In the case of admin brining primary
# back online he/she can use enable_replication to get this
# state set back to enabled.
# Also, in the case of multiple targets, the driver can update
# status in the rep-status checks if it still has valid replication
# targets that the volume is being replicated to.
self.db.volume_update(context,
volume['id'],
{'replication_status': 'failed-over'})
def list_replication_targets(self, context, volume):
"""Provide a means to obtain replication targets for a volume.
This method is used to query a backend to get the current
replication config info for the specified volume.
In the case of a volume that isn't being replicated,
the driver should return an empty list.
Example response for replicating to a managed backend:
{'volume_id': volume['id'],
'targets':[{'managed_host': 'backend_name'}...]
Example response for replicating to an unmanaged backend:
{'volume_id': volume['id'], 'targets':[{'san_ip': '1.1.1.1',
'san_login': 'admin'},
....]}
NOTE: It's the responsibility of the driver to mask out any
passwords or sensitive information.
"""
try:
replication_targets = self.driver.list_replication_targets(context,
volume)
except exception.CinderException:
err_msg = (_("Get replication targets failed."))
LOG.exception(err_msg)
raise exception.VolumeBackendAPIException(data=err_msg)
return replication_targets

View File

@ -72,6 +72,7 @@ class VolumeAPI(object):
1.26 - Adds support for sending objects over RPC in 1.26 - Adds support for sending objects over RPC in
create_consistencygroup(), create_consistencygroup_from_src(), create_consistencygroup(), create_consistencygroup_from_src(),
update_consistencygroup() and delete_consistencygroup(). update_consistencygroup() and delete_consistencygroup().
1.27 - Adds support for replication V2
""" """
BASE_RPC_API_VERSION = '1.0' BASE_RPC_API_VERSION = '1.0'
@ -81,7 +82,7 @@ class VolumeAPI(object):
target = messaging.Target(topic=CONF.volume_topic, target = messaging.Target(topic=CONF.volume_topic,
version=self.BASE_RPC_API_VERSION) version=self.BASE_RPC_API_VERSION)
serializer = objects_base.CinderObjectSerializer() serializer = objects_base.CinderObjectSerializer()
self.client = rpc.get_client(target, '1.26', serializer=serializer) self.client = rpc.get_client(target, '1.27', serializer=serializer)
def create_consistencygroup(self, ctxt, group, host): def create_consistencygroup(self, ctxt, group, host):
new_host = utils.extract_host(host) new_host = utils.extract_host(host)
@ -260,3 +261,29 @@ class VolumeAPI(object):
volume=volume, volume=volume,
new_volume=new_volume, new_volume=new_volume,
volume_status=original_volume_status) volume_status=original_volume_status)
def enable_replication(self, ctxt, volume):
new_host = utils.extract_host(volume['host'])
cctxt = self.client.prepare(server=new_host, version='1.27')
cctxt.cast(ctxt, 'enable_replication', volume=volume)
def disable_replication(self, ctxt, volume):
new_host = utils.extract_host(volume['host'])
cctxt = self.client.prepare(server=new_host, version='1.27')
cctxt.cast(ctxt, 'disable_replication',
volume=volume)
def failover_replication(self,
ctxt,
volume,
secondary=None):
new_host = utils.extract_host(volume['host'])
cctxt = self.client.prepare(server=new_host, version='1.27')
cctxt.cast(ctxt, 'failover_replication',
volume=volume,
secondary=secondary)
def list_replication_targets(self, ctxt, volume):
new_host = utils.extract_host(volume['host'])
cctxt = self.client.prepare(server=new_host, version='1.27')
return cctxt.call(ctxt, 'list_replication_targets', volume=volume)

View File

@ -15,6 +15,7 @@
"""Volume-related Utilities and helpers.""" """Volume-related Utilities and helpers."""
import ast
import math import math
import re import re
import uuid import uuid
@ -569,3 +570,27 @@ def check_already_managed_volume(db, vol_name):
except (exception.VolumeNotFound, ValueError): except (exception.VolumeNotFound, ValueError):
return False return False
return False return False
def convert_config_string_to_dict(config_string):
"""Convert config file replication string to a dict.
The only supported form is as follows:
"{'key-1'='val-1' 'key-2'='val-2'...}"
:param config_string: Properly formatted string to convert to dict.
:response: dict of string values
"""
resultant_dict = {}
try:
st = config_string.replace("=", ":")
st = st.replace(" ", ", ")
resultant_dict = ast.literal_eval(st)
except Exception:
LOG.warning(_LW("Error encountered translating config_string: "
"%(config_string)s to dict"),
{'config_string': config_string})
return resultant_dict

View File

@ -31,6 +31,7 @@ Programming HowTos and Tutorials
addmethod.openstackapi addmethod.openstackapi
drivers drivers
gmr gmr
replication
Background Concepts for Cinder Background Concepts for Cinder

View File

@ -0,0 +1,166 @@
Replication
============
How to implement replication features in a backend driver.
For backend devices that offer replication features, Cinder
provides a common mechanism for exposing that functionality
on a volume per volume basis while still trying to allow
flexibility for the varying implementation and requirements
of all the different backend devices.
Most of the configuration is done via the cinder.conf file
under the driver section and through the use of volume types.
Config file examples
--------------------
The cinder.conf file is used to specify replication target
devices for a specific driver. There are two types of target
devices that can be configured:
1. Cinder Managed (represented by the volume-backend name)
2. External devices (require vendor specific data to configure)
NOTE that it is expected to be an error to have both managed and unmanaged replication
config variables set for a single driver.
Cinder managed target device
-----------------------------
In the case of a Cinder managed target device, we simply
use another Cinder configured backend as the replication
target.
For example if we have two backend devices foo and biz that
can replicate to each other, we can set up backend biz as
a replication target for device foo using the following
config entries::
.....
[driver-biz]
volume_driver=xxxx
volume_backend_name=biz
[driver-foo]
volume_driver=xxxx
volume_backend_name=foo
managed_replication_target=True
replication_devices=volume_backend_name-1,volume_backend_name-2....
Notice that the only change from the usual driver configuration
section here is the addition of the replication_devices option.
Unmanaged target device
------------------------
In some cases the replication target device may not be a
configured Cinder backend. In this case it's the configured
drivers responsibility to route commands to the active device
and to update provider info to ensure the proper iSCSI targets
are being used.
This type of config changes only slightly, and instead of using
a backend_name, it takes the vendor unique config options::
.....
[driver-foo]
volume_driver=xxxx
volume_backend_name=foo
managed_replication_target=False
replication_devices={'key1'='val1' 'key2'='val2' ...},
{'key7'='val7'....},...
Note the key/value entries can be whatever the device requires, we treat the actual
variable in the config parser as a comma delimited list, the {} and = notations are
convenient/common parser delimeters, and the K/V entries are space seperated.
We provide a literal evaluator to convert these entries into a proper dict, thus
format is extremely important here.
Volume Types / Extra Specs
---------------------------
In order for a user to specify they'd like a replicated volume, there needs to be
a corresponding Volume Type created by the Cloud Administrator.
There's a good deal of flexibility by using volume types. The scheduler can
send the create request to a backend that provides replication by simply
providing the replication=enabled key to the extra-specs of the volume type.
For example, if the type was set to simply create the volume on any (or if you only had one)
backend that supports replication, the extra-specs entry would be::
{replication: enabled}
If you needed to provide a specific backend device (multiple backends supporting replication)::
{replication: enabled, volume_backend_name: foo}
Additionally you could provide additional details using scoped keys::
{replication: enabled, volume_backend_name: foo,
replication:replication_type: async}
Again, it's up to the driver to parse the volume type info on create and set things up
as requested. While the scoping key can be anything, it's strongly recommended that all
backends utilize the same key (replication) for consistency and to make things easier for
the Cloud Administrator.
Capabilities reporting
----------------------
The following entries are expected to be added to the stats/capabilities update for
replication configured devices::
stats["replication_enabled"] = True|False
stats["replication_type"] = ['async', 'sync'...]
stats["replication_count"] = len(self.cluster_pairs)
Required methods
-----------------
The number of API methods associated with replication are intentionally very limited, and are
Admin only methods.
They include::
replication_enable(self, context, volume)
replication_disable(self, context, volume)
replication_failover(self, context, volume)
list_replication_targets(self, context)
**replication_enable**
Used to notify the driver that we would like to enable replication on a replication capable volume.
NOTE this is NOT used as the initial create replication command, that's handled by the volume-type at
create time. This is provided as a method for an Admin that may have needed to disable replication
on a volume for maintenance or whatever reason to signify that they'd like to "resume" replication on
the given volume.
**replication_disable**
Used to notify the driver that we would like to disable replication on a replication capable volume.
This again would be used by a Cloud Administrator for things like maintenance etc.
**replication_failover**
Used to instruct the backend to fail over to the secondary/target device on a replication capable volume.
This may be used for triggering a fail-over manually or for testing purposes.
Note that ideally drivers will know how to update the volume reference properly so that Cinder is now
pointing to the secondary. Also, while it's not required, at this time; ideally the command would
act as a toggle, allowing to switch back and forth betweeen primary and secondary and back to primary.
**list_replication_targets**
Used by the admin to query a volume for a list of configured replication targets
The expected return for this call is expeceted to mimic the form used in the config file.
For a volume replicating to managed replication targets::
{'volume_id': volume['id'], 'targets':[{'type': 'managed',
'backend_name': 'backend_name'}...]
For a volume replicating to external/unmanaged targets::
{'volume_id': volume['id'], 'targets':[{'type': 'unmanaged',
'san_ip': '127.0.0.1',
'san_login': 'admin'...}...]

View File

@ -64,6 +64,11 @@
"volume_extension:replication:promote": "rule:admin_api", "volume_extension:replication:promote": "rule:admin_api",
"volume_extension:replication:reenable": "rule:admin_api", "volume_extension:replication:reenable": "rule:admin_api",
"volume:enable_replication": "rule:admin_api",
"volume:disable_replication": "rule:admin_api",
"volume:failover_replication": "rule:admin_api",
"volume:list_replication_targets": "rule:admin_api",
"backup:create" : "", "backup:create" : "",
"backup:delete": "", "backup:delete": "",
"backup:get": "", "backup:get": "",