More error handling on EMC VNX migration failure

If a LUN migration session is stopped or faulted after started,
current implementation of VNX Cinder Driver only wait for timeout
and no logic will clean up the broken migration session.

This patch adds logic to detect and clean up the stopped/faulted
migration session.

Change-Id: If66109ca45fce303390c4144b4120e75f1aae138
Closes-Bug: #1420075
This commit is contained in:
Jeegn Chen 2015-02-10 11:26:24 +08:00
parent e8652b1e92
commit 6713e8f26d
2 changed files with 146 additions and 20 deletions

View File

@ -439,6 +439,26 @@ class EMCVNXCLIDriverTestData():
NDU_LIST_RESULT_WO_LICENSE = (
"Name of the software package: -Unisphere ",
0)
MIGRATE_PROPERTY_MIGRATING = """\
Source LU Name: volume-f6247ae1-8e1c-4927-aa7e-7f8e272e5c3d
Source LU ID: 63950
Dest LU Name: volume-f6247ae1-8e1c-4927-aa7e-7f8e272e5c3d_dest
Dest LU ID: 136
Migration Rate: high
Current State: MIGRATING
Percent Complete: 50
Time Remaining: 0 second(s)
"""
MIGRATE_PROPERTY_STOPPED = """\
Source LU Name: volume-f6247ae1-8e1c-4927-aa7e-7f8e272e5c3d
Source LU ID: 63950
Dest LU Name: volume-f6247ae1-8e1c-4927-aa7e-7f8e272e5c3d_dest
Dest LU ID: 136
Migration Rate: high
Current State: STOPPED - Destination full
Percent Complete: 60
Time Remaining: 0 second(s)
"""
def SNAP_MP_CREATE_CMD(self, name='vol1', source='vol1'):
return ('lun', '-create', '-type', 'snap', '-primaryLunName',
@ -482,6 +502,9 @@ class EMCVNXCLIDriverTestData():
def MIGRATION_VERIFY_CMD(self, src_id):
return ("migrate", "-list", "-source", src_id)
def MIGRATION_CANCEL_CMD(self, src_id):
return ("migrate", "-cancel", "-source", src_id, '-o')
def GETPORT_CMD(self):
return ("connection", "-getport", "-address", "-vlanid")
@ -1469,6 +1492,52 @@ Time Remaining: 0 second(s)
poll=True)]
fake_cli.assert_has_calls(expect_cmd)
@mock.patch("cinder.volume.drivers.emc.emc_vnx_cli."
"CommandLineHelper.create_lun_by_cmd",
mock.Mock(
return_value={'lun_id': 1}))
@mock.patch(
"cinder.volume.drivers.emc.emc_vnx_cli.EMCVnxCliBase.get_lun_id",
mock.Mock(
side_effect=[1, 1]))
@mock.patch(
"cinder.volume.drivers.emc.emc_vnx_cli.EMCVnxCliBase."
"get_lun_id_by_name",
mock.Mock(return_value=1))
def test_volume_migration_stopped(self):
commands = [self.testData.MIGRATION_CMD(),
self.testData.MIGRATION_VERIFY_CMD(1),
self.testData.MIGRATION_CANCEL_CMD(1)]
results = [SUCCEED, [(self.testData.MIGRATE_PROPERTY_MIGRATING, 0),
(self.testData.MIGRATE_PROPERTY_STOPPED, 0),
('The specified source LUN is not '
'currently migrating', 23)],
SUCCEED]
fake_cli = self.driverSetup(commands, results)
fake_host = {'capabilities': {'location_info':
"unit_test_pool2|fakeSerial",
'storage_protocol': 'iSCSI'}}
self.assertRaisesRegexp(exception.VolumeBackendAPIException,
"Migration of LUN 1 has been stopped or"
" faulted.",
self.driver.migrate_volume,
None, self.testData.test_volume, fake_host)
expect_cmd = [mock.call(*self.testData.MIGRATION_CMD(),
retry_disable=True,
poll=True),
mock.call(*self.testData.MIGRATION_VERIFY_CMD(1),
poll=True),
mock.call(*self.testData.MIGRATION_VERIFY_CMD(1),
poll=False),
mock.call(*self.testData.MIGRATION_CANCEL_CMD(1)),
mock.call(*self.testData.MIGRATION_VERIFY_CMD(1),
poll=False)]
fake_cli.assert_has_calls(expect_cmd)
def test_create_destroy_volume_snapshot(self):
fake_cli = self.driverSetup()
@ -1930,11 +1999,17 @@ Time Remaining: 0 second(s)
cmd_detach_lun = ('lun', '-detach', '-name', 'vol2')
output_migrate = ("", 0)
cmd_migrate_verify = self.testData.MIGRATION_VERIFY_CMD(1)
output_migrate_verify = (r'The specified source LUN '
'is not currently migrating', 23)
cmd_migrate_cancel = self.testData.MIGRATION_CANCEL_CMD(1)
output_migrate_cancel = ("", 0)
commands = [cmd_dest, cmd_dest_np, cmd_migrate,
cmd_migrate_verify]
cmd_migrate_verify, cmd_migrate_cancel]
results = [output_dest, output_dest, output_migrate,
FAKE_ERROR_RETURN]
[FAKE_ERROR_RETURN, output_migrate_verify],
output_migrate_cancel]
fake_cli = self.driverSetup(commands, results)
self.assertRaises(exception.VolumeBackendAPIException,
@ -1962,6 +2037,9 @@ Time Remaining: 0 second(s)
poll=True),
mock.call(*self.testData.MIGRATION_VERIFY_CMD(1),
poll=True),
mock.call(*self.testData.MIGRATION_CANCEL_CMD(1)),
mock.call(*self.testData.MIGRATION_VERIFY_CMD(1),
poll=False),
mock.call(*self.testData.LUN_DELETE_CMD('vol2_dest')),
mock.call(*cmd_detach_lun),
mock.call(*self.testData.LUN_DELETE_CMD('vol2'))]

View File

@ -257,9 +257,11 @@ class CommandLineHelper(object):
CLI_RESP_PATTERN_LUN_NOT_EXIST = 'The (pool lun) may not exist'
CLI_RESP_PATTERN_SMP_NOT_ATTACHED = ('The specified Snapshot mount point '
'is not currently attached.')
CLI_RESP_PATTERN_SG_NAME_IN_USE = "Storage Group name already in use"
CLI_RESP_PATTERN_LUN_IN_SG_1 = "contained in a Storage Group"
CLI_RESP_PATTERN_LUN_IN_SG_2 = "Host LUN/LUN mapping still exists"
CLI_RESP_PATTERN_SG_NAME_IN_USE = 'Storage Group name already in use'
CLI_RESP_PATTERN_LUN_IN_SG_1 = 'contained in a Storage Group'
CLI_RESP_PATTERN_LUN_IN_SG_2 = 'Host LUN/LUN mapping still exists'
CLI_RESP_PATTERN_LUN_NOT_MIGRATING = ('The specified source LUN '
'is not currently migrating')
def __init__(self, configuration):
configuration.append_config_values(san.san_opts)
@ -900,30 +902,76 @@ class CommandLineHelper(object):
LOG.debug("Migration output: %s", out)
if rc == 0:
# parse the percentage
out = re.split(r'\n', out)
log = "Migration in process %s %%." % out[7].split(": ")[1]
LOG.debug(log)
state = re.search(r'Current State:\s*([^\n]+)', out)
percentage = re.search(r'Percent Complete:\s*([^\n]+)', out)
if state is not None:
current_state = state.group(1)
percentage_complete = percentage.group(1)
else:
self._raise_cli_error(cmd_migrate_list, rc, out)
if ("FAULTED" in current_state or
"STOPPED" in current_state):
reason = _("Migration of LUN %s has been stopped or"
" faulted.") % src_id
raise exception.VolumeBackendAPIException(data=reason)
if ("TRANSITIONING" in current_state or
"MIGRATING" in current_state):
LOG.debug("Migration of LUN %(src_id)s in process "
"%(percentage)s %%.",
{"src_id": src_id,
"percentage": percentage_complete})
else:
if re.search(r'The specified source LUN '
'is not currently migrating', out):
if re.search(self.CLI_RESP_PATTERN_LUN_NOT_MIGRATING, out):
LOG.debug("Migration of LUN %s is finished.", src_id)
mig_ready = True
else:
reason = _("Querying migrating status error.")
LOG.error(reason)
raise exception.VolumeBackendAPIException(
data="%(reason)s : %(output)s" %
{'reason': reason, 'output': out})
self._raise_cli_error(cmd_migrate_list, rc, out)
return mig_ready
def migration_disappeared(poll=False):
cmd_migrate_list = ('migrate', '-list', '-source', src_id)
out, rc = self.command_execute(*cmd_migrate_list,
poll=poll)
if rc != 0:
if re.search(self.CLI_RESP_PATTERN_LUN_NOT_MIGRATING, out):
LOG.debug("Migration of LUN %s is finished.", src_id)
return True
else:
LOG.error(_LE("Failed to query migration status of LUN."),
src_id)
self._raise_cli_error(cmd_migrate_list, rc, out)
return False
eventlet.sleep(INTERVAL_30_SEC)
if migration_is_ready(True):
return True
self._wait_for_a_condition(migration_is_ready,
interval=INTERVAL_30_SEC)
try:
if migration_is_ready(True):
return True
self._wait_for_a_condition(
migration_is_ready,
interval=INTERVAL_30_SEC,
ignorable_exception_arbiter=lambda ex:
type(ex) is not exception.VolumeBackendAPIException)
# Migration cancellation for clean up
except exception.VolumeBackendAPIException:
with excutils.save_and_reraise_exception():
LOG.error(_LE("Migration of LUN %s failed to complete."),
src_id)
self.migration_cancel(src_id)
self._wait_for_a_condition(migration_disappeared,
interval=INTERVAL_30_SEC)
return True
# Cancel migration in case where status is faulted or stopped
def migration_cancel(self, src_id):
LOG.info(_LI("Cancelling Migration from LUN %s."), src_id)
cmd_migrate_cancel = ('migrate', '-cancel', '-source', src_id,
'-o')
out, rc = self.command_execute(*cmd_migrate_cancel)
if rc != 0:
self._raise_cli_error(cmd_migrate_cancel, rc, out)
def get_storage_group(self, name, poll=True):
# ALU/HLU as key/value map
@ -1604,7 +1652,7 @@ class CommandLineHelper(object):
class EMCVnxCliBase(object):
"""This class defines the functions to use the native CLI functionality."""
VERSION = '05.03.04'
VERSION = '05.03.05'
stats = {'driver_version': VERSION,
'storage_protocol': None,
'vendor_name': 'EMC',