Driver reinitialization after failure

This is a proposal to reinitialize a volume driver
when it fails during starting.

Change-Id: I01b37a15f03f0a16690745362404d741a225bad7
Co-authored-by: Ovidiu Poncea <ovidiu.poncea@windriver.com>
Implements: blueprint driver-initialization-after-fail
This commit is contained in:
LisaLi 2018-11-19 10:14:56 -05:00 committed by lixiaoy1
parent 0b70bd7046
commit bffd16608a
3 changed files with 54 additions and 1 deletions

View File

@ -17,8 +17,10 @@
import mock import mock
from oslo_config import cfg from oslo_config import cfg
from oslo_utils import importutils
from cinder import context from cinder import context
from cinder import exception
from cinder import objects from cinder import objects
from cinder.tests.unit import utils as tests_utils from cinder.tests.unit import utils as tests_utils
from cinder.tests.unit import volume as base from cinder.tests.unit import volume as base
@ -281,3 +283,30 @@ class VolumeInitHostTestCase(base.BaseVolumeTestCase):
mock_add_threadpool.assert_called_once_with( mock_add_threadpool.assert_called_once_with(
mock_migrate_fixed_key, mock_migrate_fixed_key,
volumes=mock_get_my_volumes()) volumes=mock_get_my_volumes())
@mock.patch('time.sleep')
def test_init_host_retry(self, mock_sleep):
kwargs = {'service_id': 2}
self.volume = importutils.import_object(CONF.volume_manager)
self.volume.driver.do_setup = mock.MagicMock()
self.volume.driver.do_setup.side_effect = [
exception.CinderException("Test driver error."),
exception.InvalidConfigurationValue('Test config error.'),
ImportError]
self.volume.init_host(added_to_cluster=False, **kwargs)
self.assertEqual(4, self.volume.driver.do_setup.call_count)
self.assertFalse(self.volume.is_working())
@mock.patch('time.sleep')
def test_init_host_retry_once(self, mock_sleep):
kwargs = {'service_id': 2}
self.volume = importutils.import_object(CONF.volume_manager)
self.volume.driver.do_setup = mock.MagicMock()
self.volume.driver.do_setup.side_effect = [ImportError, None]
self.volume.init_host(added_to_cluster=False, **kwargs)
self.assertEqual(2, self.volume.driver.do_setup.call_count)
self.assertTrue(self.volume.is_working())

View File

@ -124,6 +124,11 @@ volume_manager_opts = [
cfg.StrOpt('zoning_mode', cfg.StrOpt('zoning_mode',
help="FC Zoning mode configured, only 'fabric' is " help="FC Zoning mode configured, only 'fabric' is "
"supported now."), "supported now."),
cfg.IntOpt('reinit_driver_count',
default=3,
help='Maximum times to reintialize the driver '
'if volume initialization fails. The interval of retry is '
'exponentially backoff, and will be 1s, 2s, 4s etc.'),
] ]
volume_backend_opts = [ volume_backend_opts = [
@ -409,7 +414,6 @@ class VolumeManager(manager.CleanableManager,
def init_host(self, added_to_cluster=None, **kwargs): def init_host(self, added_to_cluster=None, **kwargs):
"""Perform any required initialization.""" """Perform any required initialization."""
ctxt = context.get_admin_context()
if not self.driver.supported: if not self.driver.supported:
utils.log_unsupported_driver_warning(self.driver) utils.log_unsupported_driver_warning(self.driver)
@ -422,6 +426,20 @@ class VolumeManager(manager.CleanableManager,
'id': self.__class__.__name__}) 'id': self.__class__.__name__})
return return
self._init_host(added_to_cluster, **kwargs)
if not self.driver.initialized:
reinit_count = 0
while reinit_count < CONF.reinit_driver_count:
time.sleep(2 ** reinit_count)
self._init_host(added_to_cluster, **kwargs)
if self.driver.initialized:
return
reinit_count += 1
def _init_host(self, added_to_cluster=None, **kwargs):
ctxt = context.get_admin_context()
# If we have just added this host to a cluster we have to include all # If we have just added this host to a cluster we have to include all
# our resources in that cluster. # our resources in that cluster.
if added_to_cluster: if added_to_cluster:

View File

@ -0,0 +1,6 @@
---
features:
- Added a new config ``reinit_driver_count`` in volume driver, which
indicates the maximum retry limit for driver re-initialization when
it fails to initialize a volume driver. Its default value is 3. The
interval of retry is exponentially backoff, and will be 1s, 2s, 4s etc.