Merge "VMAX driver - Replication failover performance improvement"
This commit is contained in:
commit
b89e73a2ac
@ -3240,25 +3240,6 @@ class VMAXProvisionTest(test.TestCase):
|
||||
self.data.rdf_group_no, self.data.device_id2, extra_specs)
|
||||
mock_del_rdf.assert_called_once()
|
||||
|
||||
def test_failover_volume(self):
|
||||
array = self.data.array
|
||||
device_id = self.data.device_id
|
||||
rdf_group_name = self.data.rdf_group_name
|
||||
extra_specs = self.data.extra_specs
|
||||
with mock.patch.object(
|
||||
self.provision.rest, 'modify_rdf_device_pair') as mod_rdf:
|
||||
self.provision.failover_volume(
|
||||
array, device_id, rdf_group_name,
|
||||
extra_specs, '', True)
|
||||
mod_rdf.assert_called_once_with(
|
||||
array, device_id, rdf_group_name, extra_specs)
|
||||
mod_rdf.reset_mock()
|
||||
self.provision.failover_volume(
|
||||
array, device_id, rdf_group_name,
|
||||
extra_specs, '', False)
|
||||
mod_rdf.assert_called_once_with(
|
||||
array, device_id, rdf_group_name, extra_specs)
|
||||
|
||||
@mock.patch.object(rest.VMAXRest, 'get_storage_group',
|
||||
return_value=None)
|
||||
def test_create_volume_group_success(self, mock_get_sg):
|
||||
@ -7030,127 +7011,19 @@ class VMAXCommonReplicationTest(test.TestCase):
|
||||
|
||||
def test_failover_host(self):
|
||||
volumes = [self.data.test_volume, self.data.test_clone_volume]
|
||||
with mock.patch.object(self.common, '_failover_volume',
|
||||
return_value={}) as mock_fo:
|
||||
with mock.patch.object(self.common, '_failover_replication',
|
||||
return_value=(None, {})) as mock_fo:
|
||||
self.common.failover_host(volumes)
|
||||
self.assertEqual(2, mock_fo.call_count)
|
||||
|
||||
def test_failover_host_exception(self):
|
||||
volumes = [self.data.test_volume, self.data.test_clone_volume]
|
||||
self.assertRaises(exception.VolumeBackendAPIException,
|
||||
self.common.failover_host,
|
||||
volumes, secondary_id="default")
|
||||
mock_fo.assert_called_once()
|
||||
|
||||
@mock.patch.object(common.VMAXCommon, 'failover_replication',
|
||||
return_value=({}, {}))
|
||||
@mock.patch.object(common.VMAXCommon, '_failover_volume',
|
||||
return_value={})
|
||||
def test_failover_host_groups(self, mock_fv, mock_fg):
|
||||
def test_failover_host_groups(self, mock_fg):
|
||||
volumes = [self.data.test_volume_group_member]
|
||||
group1 = self.data.test_group
|
||||
self.common.failover_host(volumes, None, [group1])
|
||||
mock_fv.assert_not_called()
|
||||
mock_fg.assert_called_once()
|
||||
|
||||
def test_failover_volume(self):
|
||||
ref_model_update = {
|
||||
'volume_id': self.data.test_volume.id,
|
||||
'updates':
|
||||
{'replication_status': fields.ReplicationStatus.FAILED_OVER,
|
||||
'replication_driver_data': self.data.provider_location,
|
||||
'provider_location': self.data.provider_location3}}
|
||||
model_update = self.common._failover_volume(
|
||||
self.data.test_volume, True, self.extra_specs)
|
||||
|
||||
# Decode string representations of dicts into dicts, because
|
||||
# the string representations are randomly ordered and therefore
|
||||
# hard to compare.
|
||||
model_update['updates']['replication_driver_data'] = ast.literal_eval(
|
||||
model_update['updates']['replication_driver_data'])
|
||||
|
||||
model_update['updates']['provider_location'] = ast.literal_eval(
|
||||
model_update['updates']['provider_location'])
|
||||
|
||||
self.assertEqual(ref_model_update, model_update)
|
||||
|
||||
ref_model_update2 = {
|
||||
'volume_id': self.data.test_volume.id,
|
||||
'updates':
|
||||
{'replication_status': fields.ReplicationStatus.ENABLED,
|
||||
'replication_driver_data': self.data.provider_location,
|
||||
'provider_location': self.data.provider_location3}}
|
||||
model_update2 = self.common._failover_volume(
|
||||
self.data.test_volume, False, self.extra_specs)
|
||||
|
||||
# Decode string representations of dicts into dicts, because
|
||||
# the string representations are randomly ordered and therefore
|
||||
# hard to compare.
|
||||
model_update2['updates']['replication_driver_data'] = ast.literal_eval(
|
||||
model_update2['updates']['replication_driver_data'])
|
||||
|
||||
model_update2['updates']['provider_location'] = ast.literal_eval(
|
||||
model_update2['updates']['provider_location'])
|
||||
|
||||
self.assertEqual(ref_model_update2, model_update2)
|
||||
|
||||
def test_failover_legacy_volume(self):
|
||||
ref_model_update = {
|
||||
'volume_id': self.data.test_volume.id,
|
||||
'updates':
|
||||
{'replication_status': fields.ReplicationStatus.FAILED_OVER,
|
||||
'replication_driver_data': self.data.legacy_provider_location,
|
||||
'provider_location': self.data.legacy_provider_location2}}
|
||||
model_update = self.common._failover_volume(
|
||||
self.data.test_legacy_vol, True, self.extra_specs)
|
||||
|
||||
# Decode string representations of dicts into dicts, because
|
||||
# the string representations are randomly ordered and therefore
|
||||
# hard to compare.
|
||||
model_update['updates']['replication_driver_data'] = ast.literal_eval(
|
||||
model_update['updates']['replication_driver_data'])
|
||||
|
||||
model_update['updates']['provider_location'] = ast.literal_eval(
|
||||
model_update['updates']['provider_location'])
|
||||
|
||||
self.assertEqual(ref_model_update, model_update)
|
||||
|
||||
def test_failover_volume_exception(self):
|
||||
with mock.patch.object(
|
||||
self.provision, 'failover_volume',
|
||||
side_effect=exception.VolumeBackendAPIException):
|
||||
ref_model_update = {
|
||||
'volume_id': self.data.test_volume.id,
|
||||
'updates': {'replication_status':
|
||||
fields.ReplicationStatus.FAILOVER_ERROR,
|
||||
'replication_driver_data': six.text_type(
|
||||
self.data.provider_location3),
|
||||
'provider_location': six.text_type(
|
||||
self.data.provider_location)}}
|
||||
model_update = self.common._failover_volume(
|
||||
self.data.test_volume, True, self.extra_specs)
|
||||
self.assertEqual(ref_model_update, model_update)
|
||||
|
||||
@mock.patch.object(
|
||||
common.VMAXCommon, '_find_device_on_array',
|
||||
side_effect=[None, VMAXCommonData.device_id,
|
||||
VMAXCommonData.device_id, VMAXCommonData.device_id])
|
||||
@mock.patch.object(
|
||||
common.VMAXCommon, '_get_masking_views_from_volume',
|
||||
side_effect=['OS-host-MV', None, exception.VolumeBackendAPIException])
|
||||
def test_recover_volumes_on_failback(self, mock_mv, mock_dev):
|
||||
recovery1 = self.common.recover_volumes_on_failback(
|
||||
self.data.test_volume, self.extra_specs)
|
||||
self.assertEqual('error', recovery1['updates']['status'])
|
||||
recovery2 = self.common.recover_volumes_on_failback(
|
||||
self.data.test_volume, self.extra_specs)
|
||||
self.assertEqual('in-use', recovery2['updates']['status'])
|
||||
recovery3 = self.common.recover_volumes_on_failback(
|
||||
self.data.test_volume, self.extra_specs)
|
||||
self.assertEqual('available', recovery3['updates']['status'])
|
||||
recovery4 = self.common.recover_volumes_on_failback(
|
||||
self.data.test_volume, self.extra_specs)
|
||||
self.assertEqual('available', recovery4['updates']['status'])
|
||||
|
||||
def test_get_remote_target_device(self):
|
||||
target_device1, _, _, _, _ = (
|
||||
self.common.get_remote_target_device(
|
||||
@ -7405,6 +7278,13 @@ class VMAXCommonReplicationTest(test.TestCase):
|
||||
self.assertEqual(fields.ReplicationStatus.ERROR,
|
||||
model_update['replication_status'])
|
||||
|
||||
@mock.patch.object(provision.VMAXProvision, 'failover_group')
|
||||
def test_failover_replication_metro(self, mock_fo):
|
||||
volumes = [self.data.test_volume]
|
||||
_, vol_model_updates = self.common._failover_replication(
|
||||
volumes, group, None, host=True, is_metro=True)
|
||||
mock_fo.assert_not_called()
|
||||
|
||||
@mock.patch.object(utils.VMAXUtils, 'get_volume_group_utils',
|
||||
return_value=(VMAXCommonData.array, {}))
|
||||
@mock.patch.object(common.VMAXCommon, '_cleanup_group_replication')
|
||||
@ -7485,16 +7365,13 @@ class VMAXCommonReplicationTest(test.TestCase):
|
||||
|
||||
@mock.patch.object(common.VMAXCommon, '_failover_replication',
|
||||
return_value=({}, {}))
|
||||
@mock.patch.object(common.VMAXCommon, '_failover_volume',
|
||||
return_value={})
|
||||
def test_failover_host_async(self, mock_fv, mock_fg):
|
||||
def test_failover_host_async(self, mock_fg):
|
||||
volumes = [self.data.test_volume]
|
||||
extra_specs = deepcopy(self.extra_specs)
|
||||
extra_specs['rep_mode'] = utils.REP_ASYNC
|
||||
with mock.patch.object(common.VMAXCommon, '_initial_setup',
|
||||
return_value=extra_specs):
|
||||
self.async_driver.common.failover_host(volumes, None, [])
|
||||
mock_fv.assert_not_called()
|
||||
mock_fg.assert_called_once()
|
||||
|
||||
@mock.patch.object(common.VMAXCommon, '_retype_volume', return_value=True)
|
||||
|
@ -2843,8 +2843,7 @@ class VMAXCommon(object):
|
||||
% {'backend': self.configuration.safe_get(
|
||||
'volume_backend_name')})
|
||||
LOG.error(exception_message)
|
||||
raise exception.VolumeBackendAPIException(
|
||||
data=exception_message)
|
||||
return
|
||||
else:
|
||||
if self.failover:
|
||||
self.failover = False
|
||||
@ -2858,8 +2857,7 @@ class VMAXCommon(object):
|
||||
% {'backend': self.configuration.safe_get(
|
||||
'volume_backend_name')})
|
||||
LOG.error(exception_message)
|
||||
raise exception.VolumeBackendAPIException(
|
||||
data=exception_message)
|
||||
return
|
||||
|
||||
if groups:
|
||||
for group in groups:
|
||||
@ -2876,118 +2874,74 @@ class VMAXCommon(object):
|
||||
volume_update_list += vol_updates
|
||||
|
||||
rep_mode = self.rep_config['mode']
|
||||
if rep_mode == utils.REP_ASYNC:
|
||||
|
||||
sync_vol_list, non_rep_vol_list, async_vol_list, metro_list = (
|
||||
[], [], [], [])
|
||||
for volume in volumes:
|
||||
array = ast.literal_eval(volume.provider_location)['array']
|
||||
extra_specs = self._initial_setup(volume)
|
||||
extra_specs[utils.ARRAY] = array
|
||||
if self.utils.is_replication_enabled(extra_specs):
|
||||
device_id = self._find_device_on_array(
|
||||
volume, extra_specs)
|
||||
self._sync_check(
|
||||
array, device_id, volume.name, extra_specs)
|
||||
if rep_mode == utils.REP_SYNC:
|
||||
sync_vol_list.append(volume)
|
||||
elif rep_mode == utils.REP_ASYNC:
|
||||
async_vol_list.append(volume)
|
||||
else:
|
||||
metro_list.append(volume)
|
||||
else:
|
||||
non_rep_vol_list.append(volume)
|
||||
|
||||
if len(async_vol_list) > 0:
|
||||
vol_grp_name = self.utils.get_async_rdf_managed_grp_name(
|
||||
self.rep_config)
|
||||
__, volume_update_list = (
|
||||
__, vol_updates = (
|
||||
self._failover_replication(
|
||||
volumes, None, vol_grp_name,
|
||||
async_vol_list, None, vol_grp_name,
|
||||
secondary_backend_id=group_fo, host=True))
|
||||
volume_update_list += vol_updates
|
||||
|
||||
for volume in volumes:
|
||||
extra_specs = self._initial_setup(volume)
|
||||
if self.utils.is_replication_enabled(extra_specs):
|
||||
if rep_mode == utils.REP_SYNC:
|
||||
model_update = self._failover_volume(
|
||||
volume, self.failover, extra_specs)
|
||||
volume_update_list.append(model_update)
|
||||
else:
|
||||
if self.failover:
|
||||
# Since the array has been failed-over,
|
||||
# volumes without replication should be in error.
|
||||
if len(sync_vol_list) > 0:
|
||||
extra_specs = self._initial_setup(sync_vol_list[0])
|
||||
array = ast.literal_eval(
|
||||
sync_vol_list[0].provider_location)['array']
|
||||
extra_specs[utils.ARRAY] = array
|
||||
temp_grp_name = self.utils.get_temp_failover_grp_name(
|
||||
self.rep_config)
|
||||
self.provision.create_volume_group(
|
||||
array, temp_grp_name, extra_specs)
|
||||
device_ids = self._get_volume_device_ids(sync_vol_list, array)
|
||||
self.masking.add_volumes_to_storage_group(
|
||||
array, device_ids, temp_grp_name, extra_specs)
|
||||
__, vol_updates = (
|
||||
self._failover_replication(
|
||||
sync_vol_list, None, temp_grp_name,
|
||||
secondary_backend_id=group_fo, host=True))
|
||||
volume_update_list += vol_updates
|
||||
self.rest.delete_storage_group(array, temp_grp_name)
|
||||
|
||||
if len(metro_list) > 0:
|
||||
__, vol_updates = (
|
||||
self._failover_replication(
|
||||
sync_vol_list, None, None, secondary_backend_id=group_fo,
|
||||
host=True, is_metro=True))
|
||||
volume_update_list += vol_updates
|
||||
|
||||
if len(non_rep_vol_list) > 0:
|
||||
if self.failover:
|
||||
# Since the array has been failed-over,
|
||||
# volumes without replication should be in error.
|
||||
for vol in non_rep_vol_list:
|
||||
volume_update_list.append({
|
||||
'volume_id': volume.id,
|
||||
'volume_id': vol.id,
|
||||
'updates': {'status': 'error'}})
|
||||
else:
|
||||
# This is a failback, so we will attempt
|
||||
# to recover non-failed over volumes
|
||||
recovery = self.recover_volumes_on_failback(
|
||||
volume, extra_specs)
|
||||
volume_update_list.append(recovery)
|
||||
|
||||
LOG.info("Failover host complete.")
|
||||
return secondary_id, volume_update_list, group_update_list
|
||||
|
||||
def _failover_volume(self, vol, failover, extra_specs):
|
||||
"""Failover a volume.
|
||||
|
||||
:param vol: the volume object
|
||||
:param failover: flag to indicate failover or failback -- bool
|
||||
:param extra_specs: the extra specifications
|
||||
:returns: model_update -- dict
|
||||
"""
|
||||
loc = vol.provider_location
|
||||
rep_data = vol.replication_driver_data
|
||||
try:
|
||||
name = ast.literal_eval(loc)
|
||||
replication_keybindings = ast.literal_eval(rep_data)
|
||||
try:
|
||||
array = name['array']
|
||||
except KeyError:
|
||||
array = (name['keybindings']
|
||||
['SystemName'].split('+')[1].strip('-'))
|
||||
device_id = self._find_device_on_array(vol, {utils.ARRAY: array})
|
||||
|
||||
(target_device, remote_array, rdf_group,
|
||||
local_vol_state, pair_state) = (
|
||||
self.get_remote_target_device(array, vol, device_id))
|
||||
|
||||
self._sync_check(array, device_id, vol.name, extra_specs)
|
||||
self.provision.failover_volume(
|
||||
array, device_id, rdf_group, extra_specs,
|
||||
local_vol_state, failover)
|
||||
|
||||
if failover:
|
||||
new_status = REPLICATION_FAILOVER
|
||||
else:
|
||||
new_status = REPLICATION_ENABLED
|
||||
|
||||
# Transfer ownership to secondary_backend_id and
|
||||
# update provider_location field
|
||||
loc = six.text_type(replication_keybindings)
|
||||
rep_data = six.text_type(name)
|
||||
|
||||
except Exception as ex:
|
||||
msg = ('Failed to failover volume %(volume_id)s. '
|
||||
'Error: %(error)s.')
|
||||
LOG.error(msg, {'volume_id': vol.id,
|
||||
'error': ex}, )
|
||||
new_status = FAILOVER_ERROR
|
||||
|
||||
model_update = {'volume_id': vol.id,
|
||||
'updates':
|
||||
{'replication_status': new_status,
|
||||
'replication_driver_data': rep_data,
|
||||
'provider_location': loc}}
|
||||
return model_update
|
||||
|
||||
def recover_volumes_on_failback(self, volume, extra_specs):
|
||||
"""Recover volumes on failback.
|
||||
|
||||
On failback, attempt to recover non RE(replication enabled)
|
||||
volumes from primary array.
|
||||
:param volume: the volume object
|
||||
:param extra_specs: the extra specifications
|
||||
:returns: volume_update
|
||||
"""
|
||||
# Check if volume still exists on the primary
|
||||
volume_update = {'volume_id': volume.id}
|
||||
device_id = self._find_device_on_array(volume, extra_specs)
|
||||
if not device_id:
|
||||
volume_update['updates'] = {'status': 'error'}
|
||||
else:
|
||||
try:
|
||||
maskingview = self._get_masking_views_from_volume(
|
||||
extra_specs[utils.ARRAY], device_id, None)
|
||||
except Exception:
|
||||
maskingview = None
|
||||
LOG.debug("Unable to determine if volume is in masking view.")
|
||||
if not maskingview:
|
||||
volume_update['updates'] = {'status': 'available'}
|
||||
else:
|
||||
volume_update['updates'] = {'status': 'in-use'}
|
||||
return volume_update
|
||||
|
||||
def get_remote_target_device(self, array, volume, device_id):
|
||||
"""Get the remote target for a given volume.
|
||||
|
||||
@ -4121,7 +4075,7 @@ class VMAXCommon(object):
|
||||
|
||||
def _failover_replication(
|
||||
self, volumes, group, vol_grp_name,
|
||||
secondary_backend_id=None, host=False):
|
||||
secondary_backend_id=None, host=False, is_metro=False):
|
||||
"""Failover replication for a group.
|
||||
|
||||
:param volumes: the list of volumes
|
||||
@ -4139,7 +4093,8 @@ class VMAXCommon(object):
|
||||
|
||||
try:
|
||||
extra_specs = self._initial_setup(volumes[0])
|
||||
array = extra_specs[utils.ARRAY]
|
||||
array = ast.literal_eval(volumes[0].provider_location)['array']
|
||||
extra_specs[utils.ARRAY] = array
|
||||
if group:
|
||||
volume_group = self._find_volume_group(array, group)
|
||||
if volume_group:
|
||||
@ -4148,12 +4103,13 @@ class VMAXCommon(object):
|
||||
if vol_grp_name is None:
|
||||
raise exception.GroupNotFound(group_id=group.id)
|
||||
|
||||
rdf_group_no, _ = self.get_rdf_details(array)
|
||||
# As we only support a single replication target, ignore
|
||||
# any secondary_backend_id which is not 'default'
|
||||
failover = False if secondary_backend_id == 'default' else True
|
||||
self.provision.failover_group(
|
||||
array, vol_grp_name, rdf_group_no, extra_specs, failover)
|
||||
if not is_metro:
|
||||
rdf_group_no, _ = self.get_rdf_details(array)
|
||||
self.provision.failover_group(
|
||||
array, vol_grp_name, rdf_group_no, extra_specs, failover)
|
||||
if failover:
|
||||
model_update.update({
|
||||
'replication_status':
|
||||
|
@ -593,35 +593,6 @@ class VMAXProvision(object):
|
||||
rc = timer.start(interval=UNLINK_INTERVAL).wait()
|
||||
return rc
|
||||
|
||||
def failover_volume(self, array, device_id, rdf_group,
|
||||
extra_specs, local_vol_state, failover):
|
||||
"""Failover or back a volume pair.
|
||||
|
||||
:param array: the array serial number
|
||||
:param device_id: the source device id
|
||||
:param rdf_group: the rdf group number
|
||||
:param extra_specs: extra specs
|
||||
:param local_vol_state: the local volume state
|
||||
:param failover: flag to indicate failover or failback -- bool
|
||||
"""
|
||||
if local_vol_state == WRITE_DISABLED:
|
||||
LOG.info("Volume %(dev)s is already failed over.",
|
||||
{'dev': device_id})
|
||||
return
|
||||
if failover:
|
||||
action = "Failing over"
|
||||
else:
|
||||
action = "Failing back"
|
||||
LOG.info("%(action)s rdf pair: source device: %(src)s ",
|
||||
{'action': action, 'src': device_id})
|
||||
|
||||
@coordination.synchronized('emc-rg-{rdfg_no}')
|
||||
def _failover_volume(rdfg_no):
|
||||
self.rest.modify_rdf_device_pair(
|
||||
array, device_id, rdfg_no, extra_specs)
|
||||
|
||||
_failover_volume(rdf_group)
|
||||
|
||||
def get_or_create_volume_group(self, array, group, extra_specs):
|
||||
"""Get or create a volume group.
|
||||
|
||||
|
@ -811,3 +811,16 @@ class VMAXUtils(object):
|
||||
[REP_ASYNC, REP_METRO]):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def get_temp_failover_grp_name(rep_config):
|
||||
"""Get the temporary group name used for failover.
|
||||
|
||||
:param rep_config: the replication config
|
||||
:return: temp_grp_name
|
||||
"""
|
||||
temp_grp_name = ("OS-%(rdf)s-temp-rdf-sg"
|
||||
% {'rdf': rep_config['rdf_group_label']})
|
||||
LOG.debug("The temp rdf managed group name is %(name)s",
|
||||
{'name': temp_grp_name})
|
||||
return temp_grp_name
|
||||
|
Loading…
x
Reference in New Issue
Block a user