Fix waiting for target disk to appear
This patch is changing the _wait_for_disks() method behavior to wait to a specific disk if any device hints is specified. There are cases where the deployment might fail or succeed randomly depending on the order and time that the disks shows up. If no root device hints is specified, the method will just wait for any suitable disk to show up, like before. The _wait_for_disks call was made into a proper hardware manager method. It is now also called each time the cached node is updated, not only on start up. This is to ensure that we wait for the device, matching root device hints (which are part of the node). The loop was corrected to avoid redundant sleeps and warnings. Finally, this patch adds more logging around detecting the root device. Co-Authored-By: Dmitry Tantsur <dtantsur@redhat.com> Change-Id: I10ca70d6a390ed802505c0d10d440dfb52beb56c Closes-Bug: #1670916
This commit is contained in:

committed by
Dmitry Tantsur

parent
e87d039111
commit
3189c16a5e
@@ -381,6 +381,33 @@ class HardwareManager(object):
|
||||
erase_results[block_device.name] = result
|
||||
return erase_results
|
||||
|
||||
def wait_for_disks(self):
|
||||
"""Wait for the root disk to appear.
|
||||
|
||||
Wait for at least one suitable disk to show up or a specific disk
|
||||
if any device hint is specified. Otherwise neither inspection
|
||||
not deployment have any chances to succeed.
|
||||
|
||||
"""
|
||||
if not CONF.disk_wait_attempts:
|
||||
return
|
||||
|
||||
for attempt in range(CONF.disk_wait_attempts):
|
||||
try:
|
||||
self.get_os_install_device()
|
||||
except errors.DeviceNotFound:
|
||||
LOG.debug('Still waiting for the root device to appear, '
|
||||
'attempt %d of %d', attempt + 1,
|
||||
CONF.disk_wait_attempts)
|
||||
|
||||
if attempt < CONF.disk_wait_attempts - 1:
|
||||
time.sleep(CONF.disk_wait_delay)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
LOG.warning('The root device was not detected in %d seconds',
|
||||
CONF.disk_wait_delay * CONF.disk_wait_attempts)
|
||||
|
||||
def list_hardware_info(self):
|
||||
"""Return full hardware inventory as a serializable dict.
|
||||
|
||||
@@ -489,32 +516,9 @@ class GenericHardwareManager(HardwareManager):
|
||||
def evaluate_hardware_support(self):
|
||||
# Do some initialization before we declare ourself ready
|
||||
_check_for_iscsi()
|
||||
self._wait_for_disks()
|
||||
self.wait_for_disks()
|
||||
return HardwareSupport.GENERIC
|
||||
|
||||
def _wait_for_disks(self):
|
||||
"""Wait for disk to appear
|
||||
|
||||
Wait for at least one suitable disk to show up, otherwise neither
|
||||
inspection not deployment have any chances to succeed.
|
||||
|
||||
"""
|
||||
|
||||
for attempt in range(CONF.disk_wait_attempts):
|
||||
try:
|
||||
block_devices = self.list_block_devices()
|
||||
utils.guess_root_disk(block_devices)
|
||||
except errors.DeviceNotFound:
|
||||
LOG.debug('Still waiting for at least one disk to appear, '
|
||||
'attempt %d of %d', attempt + 1,
|
||||
CONF.disk_wait_attempts)
|
||||
time.sleep(CONF.disk_wait_delay)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
LOG.warning('No disks detected in %d seconds',
|
||||
CONF.disk_wait_delay * CONF.disk_wait_attempts)
|
||||
|
||||
def collect_lldp_data(self, interface_names):
|
||||
"""Collect and convert LLDP info from the node.
|
||||
|
||||
@@ -705,10 +709,12 @@ class GenericHardwareManager(HardwareManager):
|
||||
root_device_hints = None
|
||||
if cached_node is not None:
|
||||
root_device_hints = cached_node['properties'].get('root_device')
|
||||
LOG.debug('Looking for a device matching root hints %s',
|
||||
root_device_hints)
|
||||
|
||||
block_devices = self.list_block_devices()
|
||||
if not root_device_hints:
|
||||
return utils.guess_root_disk(block_devices).name
|
||||
dev_name = utils.guess_root_disk(block_devices).name
|
||||
else:
|
||||
serialized_devs = [dev.serialize() for dev in block_devices]
|
||||
try:
|
||||
@@ -729,7 +735,13 @@ class GenericHardwareManager(HardwareManager):
|
||||
"No suitable device was found for "
|
||||
"deployment using these hints %s" % root_device_hints)
|
||||
|
||||
return device['name']
|
||||
dev_name = device['name']
|
||||
|
||||
LOG.info('Picked root device %(dev)s for node %(node)s based on '
|
||||
'root device hints %(hints)s',
|
||||
{'dev': dev_name, 'hints': root_device_hints,
|
||||
'node': cached_node['uuid'] if cached_node else None})
|
||||
return dev_name
|
||||
|
||||
def get_system_vendor_info(self):
|
||||
product_name = None
|
||||
@@ -1160,11 +1172,22 @@ def cache_node(node):
|
||||
Stores the node object in the hardware module to facilitate the
|
||||
access of a node information in the hardware extensions.
|
||||
|
||||
If the new node does not match the previously cached one, wait for the
|
||||
expected root device to appear.
|
||||
|
||||
:param node: Ironic node object
|
||||
"""
|
||||
global NODE
|
||||
new_node = NODE is None or NODE['uuid'] != node['uuid']
|
||||
NODE = node
|
||||
|
||||
if new_node:
|
||||
LOG.info('Cached node %s, waiting for its root device to appear',
|
||||
node['uuid'])
|
||||
# Root device hints, stored in the new node, can change the expected
|
||||
# root device. So let us wait for it to appear again.
|
||||
dispatch_to_managers('wait_for_disks')
|
||||
|
||||
|
||||
def get_cached_node():
|
||||
"""Guard function around the module variable NODE."""
|
||||
|
Reference in New Issue
Block a user