Improve disk handling in sysinv conductor

This change improves the method in the sysinv conductor responsible
for handling disks, including creating, updating, and removing disks
from the database. This improvement was necessary to prevent disk
duplication when the conductor receives a disk with the device_path
field with value None.

The improvements are:
 - Update the logic in the is_same_disk function to use device_path
   as the trusted unique identifier to consider the same disk.
 - Add logic to stop iterating over all disks in the database once
   a matching disk has been found.
 - Improve the condition to allow only valid disks to be created.

Test Plan:
 - PASS: Verify fresh install from a new ISO works as expected.
 - PASS: Verify fresh install from a new ISO using MULTIPATH is working
         as expected.
 - PASS: Confirm if, when the re-enumeration occurs, the database is
         updated correctly (e.g: sda->sdb and sdb->sda).
 - PASS: Ensure each host can add new disks, and they are properly
         created in the database.
 - PASS: Ensure each host can remove disks, and they are properly
         removed from the database.
 - PASS: Adding and removing disks simultaneously on each host, and
         they are properly updated in the database.
 - PASS: Replacing a disk on any host properly updates all related
         information in the database.
 - PASS: Verify that all hosts reboot successfully (one at a time).
 - PASS: Confirm Backup & Restore functionality works as expected.
 - PASS: Swap 2 disks between your ports and check if sysinv will
         detect them as a disk replacement action.
 - PASS: Swap 1 disk to a new port that has no disks and check if
         the old database entry is deleted and a new one with a new
         device_path is created.

Note: All the above tests were performed on SX, DX, and STD setups.

Closes-Bug: 2112494

Change-Id: Ibd64680ec1da88bd8e1e5621b33ebcc267817080
Signed-off-by: Gustavo Ornaghi Antunes <gustavo.ornaghiantunes@windriver.com>
This commit is contained in:
Gustavo Ornaghi Antunes
2025-05-30 10:08:41 -03:00
parent 9fe84c43ea
commit 271f611a2d

View File

@@ -4947,28 +4947,13 @@ class ConductorManager(service.PeriodicService):
:returns: pass or fail :returns: pass or fail
""" """
def is_same_disk(i, idisk): def does_disk_path_exist(i, idisk):
# Upgrades R3->R4: An update from an N-1 agent will be missing the
# persistent naming fields. if i.get('device_path') is not None:
if 'device_path' in i: if idisk.device_path == i.get('device_path'):
if i.get('device_path') is not None: LOG.info("The disk is the same, because the %s matches" % "device_path")
if idisk.device_path == i.get('device_path'): return True
# Update from R4 node: Use R4 disk identification logic
return True
elif not idisk.device_path:
# TODO: remove R5. still need to compare device_node
# because not inventoried for R3 node controller-0
if idisk.device_node == i.get('device_node'):
LOG.info("host_uuid=%s idisk.device_path not"
"set, match on device_node %s" %
(ihost_uuid, idisk.device_node))
return True
else:
return False
elif idisk.device_node == i.get('device_node'):
# Update from R3 node: Fall back to R3 disk identification
# logic.
return True
return False return False
ihost_uuid.strip() ihost_uuid.strip()
@@ -4998,6 +4983,9 @@ class ConductorManager(service.PeriodicService):
idisks = self.dbapi.idisk_get_by_ihost(ihost_uuid) idisks = self.dbapi.idisk_get_by_ihost(ihost_uuid)
idisk_dict_array.sort(key=lambda i: i.get('device_path') is None)
LOG.debug(f"Disks received from agent: {str(idisk_dict_array)}")
for i in idisk_dict_array: for i in idisk_dict_array:
disk_dict = {'forihostid': forihostid} disk_dict = {'forihostid': forihostid}
# this could overwrite capabilities - do not overwrite device_function? # this could overwrite capabilities - do not overwrite device_function?
@@ -5009,11 +4997,12 @@ class ConductorManager(service.PeriodicService):
disk = self.dbapi.idisk_create(forihostid, disk_dict) disk = self.dbapi.idisk_create(forihostid, disk_dict)
else: else:
found = False found = False
is_valid_disk = True
for idisk in idisks: for idisk in idisks:
LOG.debug("[DiskEnum] for - current idisk: %s - %s -%s" % LOG.debug("[DiskEnum] for - current idisk: %s - %s -%s" %
(idisk.uuid, idisk.device_node, idisk.device_id)) (idisk.uuid, idisk.device_node, idisk.device_id))
if is_same_disk(i, idisk): if does_disk_path_exist(i, idisk):
found = True found = True
# The disk has been replaced? # The disk has been replaced?
if idisk.serial_id != i.get('serial_id'): if idisk.serial_id != i.get('serial_id'):
@@ -5059,8 +5048,6 @@ class ConductorManager(service.PeriodicService):
idisk.device_id, idisk.capabilities, idisk.device_id, idisk.capabilities,
disk_dict['capabilities'])) disk_dict['capabilities']))
# disk = self.dbapi.idisk_update(idisk['uuid'],
# disk_dict)
disk_dict_capabilities = disk_dict.get('capabilities') disk_dict_capabilities = disk_dict.get('capabilities')
if (disk_dict_capabilities and if (disk_dict_capabilities and
('device_function' not in ('device_function' not in
@@ -5082,15 +5069,25 @@ class ConductorManager(service.PeriodicService):
(idisk['uuid'], str(disk_dict))) (idisk['uuid'], str(disk_dict)))
disk = self.dbapi.idisk_update(idisk['uuid'], disk = self.dbapi.idisk_update(idisk['uuid'],
disk_dict) disk_dict)
elif not idisk.device_path: break
if idisk.device_node == i.get('device_node'): else:
found = True if i.get('device_path') is None:
disk = self.dbapi.idisk_update(idisk['uuid'], # If the disk's `device_path` is still `None` after
disk_dict) # the disks loop, mark it as invalid.
self.dbapi.journal_update_path(disk) # This can happen if pyudev takes longer than normal
# to provide all device information.
# Such missing device information is unexpected and
# usually means something went wrong.
# Marking the disk as invalid helps avoid duplicate
# entries caused by temporary inconsistencies.
is_valid_disk = False
LOG.warn(f"The disk received with device_node {i.get('device_node')} is invalid,"
"because it does not have the device_path value.")
if not found: if not found and is_valid_disk:
LOG.info(f"Creating new disk with device_path {disk_dict.get('device_path')}")
disk = self.dbapi.idisk_create(forihostid, disk_dict) disk = self.dbapi.idisk_create(forihostid, disk_dict)
idisks.append(disk)
# Update the capabilities if the device is a cinder # Update the capabilities if the device is a cinder
# disk # disk