NetApp SolidFire: Add active/active replication

This patch adds support for active/active replication to
NetApp SolidFire driver.

failover_host has been splitted in two phases (failover and
failover_completed) in order to allow all nodes in cluster to
properly switch between primary and replication back-ends after a
successful failover or failback procedure.

This patch also enables `SUPPORTS_ACTIVE_ACTIVE` flag to the SolidFire
driver, to allow configuring SolidFire backends when volume service
is clustered.

Change-Id: I327f61db5d53c0c3ede962ce52f1a3c00e74b86b
This commit is contained in:
Fernando Ferraz 2020-01-14 14:30:27 +00:00
parent f24eb2fc63
commit 6840ddf962
3 changed files with 129 additions and 27 deletions

View File

@ -150,6 +150,26 @@ class SolidFireVolumeTestCase(test.TestCase):
'qos': None, 'qos': None,
'iqn': 'super_fake_iqn'} 'iqn': 'super_fake_iqn'}
self.fake_primary_cluster = (
{'endpoint': {
'passwd': 'admin',
'port': 443,
'url': 'https://192.168.139.11:443',
'svip': '10.10.8.11',
'mvip': '10.10.8.12',
'login': 'admin'},
'name': 'volume-f0632d53-d836-474c-a5bc-478ef18daa32',
'clusterPairID': 33,
'uuid': 'f0632d53-d836-474c-a5bc-478ef18daa32',
'svip': '10.10.8.11',
'mvipNodeID': 1,
'repCount': 1,
'encryptionAtRestState': 'disabled',
'attributes': {},
'mvip': '10.10.8.12',
'ensemble': ['10.10.5.130'],
'svipNodeID': 1})
self.cluster_pairs = ( self.cluster_pairs = (
[{'uniqueID': 'lu9f', [{'uniqueID': 'lu9f',
'endpoint': {'passwd': 'admin', 'port': 443, 'endpoint': {'passwd': 'admin', 'port': 443,
@ -3121,6 +3141,61 @@ class SolidFireVolumeTestCase(test.TestCase):
mock_create_cluster_reference.assert_called() mock_create_cluster_reference.assert_called()
mock_get_sfvol_by_cinder_vref.assert_called() mock_get_sfvol_by_cinder_vref.assert_called()
@mock.patch.object(solidfire.SolidFireDriver, '_set_cluster_pairs')
@mock.patch.object(solidfire.SolidFireDriver, 'failover')
@mock.patch.object(solidfire.SolidFireDriver, 'failover_completed')
def test_failover_host(self, mock_failover_completed,
mock_failover,
mock_set_cluster_pairs):
fake_context = None
fake_cinder_vols = [{'id': 'testvol1'}, {'id': 'testvol2'}]
fake_failover_updates = [{'volume_id': 'testvol1',
'updates': {
'replication_status': 'failed-over'}},
{'volume_id': 'testvol2',
'updates': {
'replication_status': 'failed-over'}}]
mock_failover.return_value = "secondary", fake_failover_updates, []
drv_args = {'active_backend_id': None}
sfv = solidfire.SolidFireDriver(configuration=self.configuration,
**drv_args)
cluster_id, updates, _ = sfv.failover_host(
fake_context, fake_cinder_vols, secondary_id='secondary',
groups=None)
mock_failover.called_with(fake_context, fake_cinder_vols, "secondary",
None)
mock_failover_completed.called_with(fake_context, "secondary")
self.assertEqual(cluster_id, "secondary")
self.assertEqual(fake_failover_updates, updates)
@mock.patch.object(solidfire.SolidFireDriver, '_set_cluster_pairs')
@mock.patch.object(solidfire.SolidFireDriver, '_create_cluster_reference')
def test_failover_completed(self, mock_create_cluster_reference,
mock_set_cluster_pairs):
ctx = context.get_admin_context()
drv_args = {'active_backend_id': None}
sfv = solidfire.SolidFireDriver(configuration=self.configuration,
**drv_args)
sfv.cluster_pairs = self.cluster_pairs
sfv.failover_completed(ctx, "secondary")
self.assertTrue(sfv.failed_over)
self.assertDictEqual(sfv.active_cluster, sfv.cluster_pairs[0])
mock_create_cluster_reference.return_value = self.fake_primary_cluster
sfv.failover_completed(ctx, '')
self.assertFalse(sfv.failed_over)
mock_create_cluster_reference.assert_called()
self.assertDictEqual(sfv.active_cluster, self.fake_primary_cluster)
@mock.patch.object(solidfire.SolidFireDriver, '_issue_api_request') @mock.patch.object(solidfire.SolidFireDriver, '_issue_api_request')
@mock.patch.object(solidfire.SolidFireDriver, '_create_cluster_reference') @mock.patch.object(solidfire.SolidFireDriver, '_create_cluster_reference')
@mock.patch.object(solidfire.SolidFireDriver, '_set_cluster_pairs') @mock.patch.object(solidfire.SolidFireDriver, '_set_cluster_pairs')
@ -3130,15 +3205,15 @@ class SolidFireVolumeTestCase(test.TestCase):
@mock.patch.object(solidfire.SolidFireDriver, '_failover_volume') @mock.patch.object(solidfire.SolidFireDriver, '_failover_volume')
@mock.patch.object(solidfire.SolidFireDriver, '_get_create_account') @mock.patch.object(solidfire.SolidFireDriver, '_get_create_account')
@mock.patch.object(solidfire.SolidFireDriver, '_get_remote_info_by_id') @mock.patch.object(solidfire.SolidFireDriver, '_get_remote_info_by_id')
def test_failover_host(self, mock_get_remote_info_by_id, def test_failover(self, mock_get_remote_info_by_id,
mock_get_create_account, mock_get_create_account,
mock_failover_volume, mock_failover_volume,
mock_map_sf_volumes, mock_map_sf_volumes,
mock_get_cluster_info, mock_get_cluster_info,
mock_update_cluster_status, mock_update_cluster_status,
mock_set_cluster_pairs, mock_set_cluster_pairs,
mock_create_cluster_reference, mock_create_cluster_reference,
mock_issue_api_request): mock_issue_api_request):
all_mocks = locals() all_mocks = locals()
@ -3176,7 +3251,7 @@ class SolidFireVolumeTestCase(test.TestCase):
**drv_args) **drv_args)
self.assertRaises(exception.UnableToFailOver, self.assertRaises(exception.UnableToFailOver,
sfv.failover_host, ctx, cinder_vols, 'fake', None) sfv.failover, ctx, cinder_vols, 'fake', None)
mock_map_sf_volumes.assert_not_called() mock_map_sf_volumes.assert_not_called()
fake_replication_device = {'backend_id': 'fake', fake_replication_device = {'backend_id': 'fake',
@ -3187,26 +3262,28 @@ class SolidFireVolumeTestCase(test.TestCase):
self.configuration.replication_device = [fake_replication_device] self.configuration.replication_device = [fake_replication_device]
reset_mocks() reset_mocks()
drv_args = {'active_backend_id': None} drv_args = {'active_backend_id': ''}
sfv = solidfire.SolidFireDriver(configuration=self.configuration, sfv = solidfire.SolidFireDriver(configuration=self.configuration,
**drv_args) **drv_args)
sfv.replication_enabled = True
self.assertRaises(exception.InvalidReplicationTarget, self.assertRaises(exception.InvalidReplicationTarget,
sfv.failover_host, ctx, cinder_vols, 'default', None) sfv.failover, ctx, cinder_vols, 'default', None)
mock_map_sf_volumes.assert_not_called() mock_map_sf_volumes.assert_not_called()
reset_mocks() reset_mocks()
drv_args = {'active_backend_id': None} drv_args = {'active_backend_id': None}
sfv = solidfire.SolidFireDriver(configuration=self.configuration, sfv = solidfire.SolidFireDriver(configuration=self.configuration,
**drv_args) **drv_args)
sfv.replication_enabled = True
self.assertRaises(exception.InvalidReplicationTarget, self.assertRaises(exception.InvalidReplicationTarget,
sfv.failover_host, ctx, cinder_vols, sfv.failover, ctx, cinder_vols,
secondary_id='not_fake_id', groups=None) secondary_id='not_fake_id', groups=None)
mock_map_sf_volumes.assert_not_called() mock_map_sf_volumes.assert_not_called()
mock_create_cluster_reference.return_value = self.cluster_pairs[0] mock_create_cluster_reference.return_value = self.cluster_pairs[0]
reset_mocks() reset_mocks()
drv_args = {'active_backend_id': 'secondary'} drv_args = {'active_backend_id': 'fake'}
sfv = solidfire.SolidFireDriver(configuration=self.configuration, sfv = solidfire.SolidFireDriver(configuration=self.configuration,
**drv_args) **drv_args)
sfv.cluster_pairs = self.cluster_pairs sfv.cluster_pairs = self.cluster_pairs
@ -3223,7 +3300,6 @@ class SolidFireVolumeTestCase(test.TestCase):
mock_failover_volume.assert_called() mock_failover_volume.assert_called()
mock_map_sf_volumes.assert_called() mock_map_sf_volumes.assert_called()
mock_update_cluster_status.assert_called() mock_update_cluster_status.assert_called()
mock_set_cluster_pairs.assert_called()
mock_create_cluster_reference.assert_called() mock_create_cluster_reference.assert_called()
reset_mocks() reset_mocks()
@ -3233,7 +3309,7 @@ class SolidFireVolumeTestCase(test.TestCase):
sfv.cluster_pairs = self.cluster_pairs sfv.cluster_pairs = self.cluster_pairs
sfv.cluster_pairs[0]['backend_id'] = 'fake' sfv.cluster_pairs[0]['backend_id'] = 'fake'
sfv.replication_enabled = True sfv.replication_enabled = True
cluster_id, updates, _ = sfv.failover_host( cluster_id, updates, _ = sfv.failover(
ctx, cinder_vols, secondary_id='fake', groups=None) ctx, cinder_vols, secondary_id='fake', groups=None)
self.assertEqual(5, len(updates)) self.assertEqual(5, len(updates))
for update in updates: for update in updates:
@ -3245,7 +3321,6 @@ class SolidFireVolumeTestCase(test.TestCase):
mock_failover_volume.assert_called() mock_failover_volume.assert_called()
mock_map_sf_volumes.assert_called() mock_map_sf_volumes.assert_called()
mock_update_cluster_status.assert_called() mock_update_cluster_status.assert_called()
mock_set_cluster_pairs.assert_called()
mock_create_cluster_reference.assert_called() mock_create_cluster_reference.assert_called()
@mock.patch.object(solidfire.SolidFireDriver, '_issue_api_request') @mock.patch.object(solidfire.SolidFireDriver, '_issue_api_request')

View File

@ -225,9 +225,14 @@ class SolidFireDriver(san.SanISCSIDriver):
SnapshotsOnly) SnapshotsOnly)
2.0.17 - Fix bug #1859653 SolidFire fails to failback when volume 2.0.17 - Fix bug #1859653 SolidFire fails to failback when volume
service is restarted service is restarted
2.1.0 - Add Cinder Active/Active support
- Enable Active/Active support flag
- Implement Active/Active replication support
""" """
VERSION = '2.0.17' VERSION = '2.1.0'
SUPPORTS_ACTIVE_ACTIVE = True
# ThirdPartySystems wiki page # ThirdPartySystems wiki page
CI_WIKI_NAME = "NetApp_SolidFire_CI" CI_WIKI_NAME = "NetApp_SolidFire_CI"
@ -2347,7 +2352,7 @@ class SolidFireDriver(san.SanISCSIDriver):
self._issue_api_request('ModifyVolume', params, self._issue_api_request('ModifyVolume', params,
endpoint=tgt_cluster['endpoint']) endpoint=tgt_cluster['endpoint'])
def failover_host(self, context, volumes, secondary_id=None, groups=None): def failover(self, context, volumes, secondary_id=None, groups=None):
"""Failover to replication target. """Failover to replication target.
In order to do failback, you MUST specify the original/default cluster In order to do failback, you MUST specify the original/default cluster
@ -2484,10 +2489,7 @@ class SolidFireDriver(san.SanISCSIDriver):
} }
} }
vol_updates['updates'].update(conn_info) vol_updates['updates'].update(conn_info)
volume_updates.append(vol_updates) volume_updates.append(vol_updates)
LOG.debug("Updates for volume: %(id)s %(updates)s",
{'id': v.id, 'updates': vol_updates})
except Exception as e: except Exception as e:
volume_updates.append({'volume_id': v['id'], volume_updates.append({'volume_id': v['id'],
@ -2500,18 +2502,37 @@ class SolidFireDriver(san.SanISCSIDriver):
volume_updates.append({'volume_id': v['id'], volume_updates.append({'volume_id': v['id'],
'updates': {'status': 'error', }}) 'updates': {'status': 'error', }})
self.active_cluster = remote return '' if failback else remote['backend_id'], volume_updates, []
if failback: def failover_completed(self, context, active_backend_id=None):
active_cluster_id = '' """Update volume node when `failover` is completed.
Expects the following scenarios:
1) active_backend_id='' when failing back
2) active_backend_id=<secondary_backend_id> when failing over
3) When `failover` raises an Exception, this will be called
with the previous active_backend_id (Will be empty string
in case backend wasn't in failed-over state).
"""
if not active_backend_id:
LOG.info("Failback completed. "
"Switching active cluster back to default.")
self.active_cluster = self._create_cluster_reference()
self.failed_over = False self.failed_over = False
# Recreating cluster pairs after a successful failback # Recreating cluster pairs after a successful failback
self._set_cluster_pairs() self._set_cluster_pairs()
else: else:
active_cluster_id = remote['backend_id'] LOG.info("Failover completed. "
"Switching active cluster to %s.", active_backend_id)
self.active_cluster = self.cluster_pairs[0]
self.failed_over = True self.failed_over = True
return active_cluster_id, volume_updates, [] def failover_host(self, context, volumes, secondary_id=None, groups=None):
"""Failover to replication target in non-clustered deployment."""
active_cluster_id, volume_updates, group_updates = (
self.failover(context, volumes, secondary_id, groups))
self.failover_completed(context, active_cluster_id)
return active_cluster_id, volume_updates, group_updates
def freeze_backend(self, context): def freeze_backend(self, context):
"""Freeze backend notification.""" """Freeze backend notification."""

View File

@ -0,0 +1,6 @@
---
features:
- |
NetApp SolidFire driver: Enabled support for Active/Active
(including replication) to the SolidFire driver. This allows
users to configure SolidFire backends in clustered environments.