Restart consoles on conductor startup

Some nodes' console_enabled may be True but the corresponding
shellinabox services stopped while starting conductors, so try
to start consoles on conductor startup.

Change-Id: Ida5fda35340d62e08c779655183ef82211cd8703
Closes-Bug: #1525790
This commit is contained in:
Zhenguo Niu
2015-12-14 17:57:31 +08:00
committed by Zhenguo Niu
parent 0ad5b13b5a
commit d27e9e4a8f
5 changed files with 145 additions and 0 deletions

View File

@@ -15,6 +15,7 @@
import inspect import inspect
import threading import threading
import eventlet
import futurist import futurist
from futurist import periodics from futurist import periodics
from futurist import rejection from futurist import rejection
@@ -176,6 +177,13 @@ class BaseConductorManager(object):
states.DEPLOYING, 'provision_updated_at', states.DEPLOYING, 'provision_updated_at',
last_error=last_error) last_error=last_error)
# Start consoles if it set enabled in a greenthread.
try:
self._spawn_worker(self._start_consoles,
ironic_context.get_admin_context())
except exception.NoFreeConductorWorker:
LOG.warning(_LW('Failed to start worker for restarting consoles.'))
# Spawn a dedicated greenthread for the keepalive # Spawn a dedicated greenthread for the keepalive
try: try:
self._spawn_worker(self._conductor_service_record_keepalive) self._spawn_worker(self._conductor_service_record_keepalive)
@@ -371,3 +379,48 @@ class BaseConductorManager(object):
workers_count += 1 workers_count += 1
if workers_count >= CONF.conductor.periodic_max_workers: if workers_count >= CONF.conductor.periodic_max_workers:
break break
def _start_consoles(self, context):
"""Start consoles if set enabled.
:param: context: request context
"""
filters = {'console_enabled': True}
node_iter = self.iter_nodes(filters=filters)
for node_uuid, driver in node_iter:
try:
with task_manager.acquire(context, node_uuid, shared=False,
purpose='start console') as task:
try:
LOG.debug('Trying to start console of node %(node)s',
{'node': node_uuid})
task.driver.console.start_console(task)
LOG.info(_LI('Successfully started console of node '
'%(node)s'), {'node': node_uuid})
except Exception as err:
msg = (_('Failed to start console of node %(node)s '
'while starting the conductor, so changing '
'the console_enabled status to False, error: '
'%(err)s')
% {'node': node_uuid, 'err': err})
LOG.error(msg)
# If starting console failed, set node console_enabled
# back to False and set node's last error.
task.node.last_error = msg
task.node.console_enabled = False
task.node.save()
except exception.NodeLocked:
LOG.warning(_LW('Node %(node)s is locked while trying to '
'start console on conductor startup'),
{'node': node_uuid})
continue
except exception.NodeNotFound:
LOG.warning(_LW("During starting console on conductor "
"startup, node %(node)s was not found"),
{'node': node_uuid})
continue
finally:
# Yield on every iteration
eventlet.sleep(0)

View File

@@ -220,6 +220,8 @@ class Connection(api.Connection):
(datetime.timedelta( (datetime.timedelta(
seconds=filters['inspection_started_before']))) seconds=filters['inspection_started_before'])))
query = query.filter(models.Node.inspection_started_at < limit) query = query.filter(models.Node.inspection_started_at < limit)
if 'console_enabled' in filters:
query = query.filter_by(console_enabled=filters['console_enabled'])
return query return query

View File

@@ -544,6 +544,8 @@ class ConsoleInterface(object):
def start_console(self, task): def start_console(self, task):
"""Start a remote console for the task's node. """Start a remote console for the task's node.
This method should not raise an exception if console already started.
:param task: a TaskManager instance containing the node to act on. :param task: a TaskManager instance containing the node to act on.
""" """

View File

@@ -18,11 +18,13 @@ from futurist import periodics
import mock import mock
from oslo_config import cfg from oslo_config import cfg
from oslo_db import exception as db_exception from oslo_db import exception as db_exception
from oslo_utils import uuidutils
from ironic.common import driver_factory from ironic.common import driver_factory
from ironic.common import exception from ironic.common import exception
from ironic.conductor import base_manager from ironic.conductor import base_manager
from ironic.conductor import manager from ironic.conductor import manager
from ironic.conductor import task_manager
from ironic.drivers import base as drivers_base from ironic.drivers import base as drivers_base
from ironic import objects from ironic import objects
from ironic.tests import base as tests_base from ironic.tests import base as tests_base
@@ -218,3 +220,84 @@ class ManagerSpawnWorkerTestCase(tests_base.TestCase):
self.assertRaises(exception.NoFreeConductorWorker, self.assertRaises(exception.NoFreeConductorWorker,
self.service._spawn_worker, 'fake') self.service._spawn_worker, 'fake')
class StartConsolesTestCase(mgr_utils.ServiceSetUpMixin,
tests_db_base.DbTestCase):
def test__start_consoles(self):
obj_utils.create_test_node(self.context,
driver='fake',
console_enabled=True)
obj_utils.create_test_node(
self.context,
uuid=uuidutils.generate_uuid(),
driver='fake',
console_enabled=True
)
obj_utils.create_test_node(
self.context,
uuid=uuidutils.generate_uuid(),
driver='fake'
)
self._start_service()
with mock.patch.object(self.driver.console,
'start_console') as mock_start_console:
self.service._start_consoles(self.context)
self.assertEqual(2, mock_start_console.call_count)
def test__start_consoles_no_console_enabled(self):
obj_utils.create_test_node(self.context,
driver='fake',
console_enabled=False)
self._start_service()
with mock.patch.object(self.driver.console,
'start_console') as mock_start_console:
self.service._start_consoles(self.context)
self.assertFalse(mock_start_console.called)
def test__start_consoles_failed(self):
test_node = obj_utils.create_test_node(self.context,
driver='fake',
console_enabled=True)
self._start_service()
with mock.patch.object(self.driver.console,
'start_console') as mock_start_console:
mock_start_console.side_effect = Exception()
self.service._start_consoles(self.context)
mock_start_console.assert_called_once_with(mock.ANY)
test_node.refresh()
self.assertFalse(test_node.console_enabled)
self.assertIsNotNone(test_node.last_error)
@mock.patch.object(base_manager, 'LOG')
def test__start_consoles_node_locked(self, log_mock):
test_node = obj_utils.create_test_node(self.context,
driver='fake',
console_enabled=True,
reservation='fake-host')
self._start_service()
with mock.patch.object(self.driver.console,
'start_console') as mock_start_console:
self.service._start_consoles(self.context)
self.assertFalse(mock_start_console.called)
test_node.refresh()
self.assertTrue(test_node.console_enabled)
self.assertIsNone(test_node.last_error)
self.assertTrue(log_mock.warning.called)
@mock.patch.object(base_manager, 'LOG')
def test__start_consoles_node_not_found(self, log_mock):
test_node = obj_utils.create_test_node(self.context,
driver='fake',
console_enabled=True)
self._start_service()
with mock.patch.object(task_manager, 'acquire') as mock_acquire:
mock_acquire.side_effect = exception.NodeNotFound(node='not found')
with mock.patch.object(self.driver.console,
'start_console') as mock_start_console:
self.service._start_consoles(self.context)
self.assertFalse(mock_start_console.called)
test_node.refresh()
self.assertTrue(test_node.console_enabled)
self.assertIsNone(test_node.last_error)
self.assertTrue(log_mock.warning.called)

View File

@@ -0,0 +1,5 @@
---
fixes:
- Some nodes' console may be enabled but the corresponding console
services stopped while starting conductors, this tries to start
consoles on conductor startup to make the status consistent.