Add rook-ceph monitor operations test

This automation identifies hosts without monitors and ensures that
at least two nodes are available. It then adds two monitors, applies
rook-ceph, and validates the application and health status.
The test proceeds to remove the monitors, lock and unlock nodes,
delete host-fs entries, and re-apply rook-ceph as required.
Finally, it verifies that rook-ceph health returns to a stable state.

Change-Id: Ib2b06c381bca15b6908f591af2f9097dad3b54a9
Signed-off-by: tleal <tiago.leal@windriver.com>
This commit is contained in:
tleal
2025-09-17 13:21:28 -03:00
parent f21a516452
commit fa4be5902e
4 changed files with 164 additions and 3 deletions

View File

@@ -106,3 +106,27 @@ class SystemHostFSOutput:
if len(fs_lists) == 0:
return False
return True
def get_host_fs(self, fs_name: str) -> SystemHostFSObject | None:
"""
Return the FS object with the given name, or None if not found.
Args:
fs_name (str): Name of the filesystem to retrieve
Returns:
SystemHostFSObject | None: The filesystem object if found, else None
"""
for fs in self.system_host_fs:
if fs.get_name() == fs_name:
return fs
return None
def has_monitor(self) -> bool:
"""
Check whether a monitor is configured on the host.
Returns:
bool: True if at least one FS has 'monitor' in its functions; False otherwise
"""
return any("monitor" in (fs.get_capabilities().get_functions() or []) for fs in self.system_host_fs if fs.get_capabilities() is not None)

View File

@@ -1,7 +1,10 @@
import time
from framework.ssh.ssh_connection import SSHConnection
from keywords.base_keyword import BaseKeyword
from keywords.cloud_platform.command_wrappers import source_openrc
from keywords.cloud_platform.system.host.objects.system_host_fs_output import SystemHostFSOutput
from keywords.cloud_platform.system.host.system_host_list_keywords import SystemHostListKeywords
class SystemHostFSKeywords(BaseKeyword):
@@ -48,18 +51,27 @@ class SystemHostFSKeywords(BaseKeyword):
self.ssh_connection.send(source_openrc(f"system host-fs-add {hostname} {fs_name}={fs_size}"))
self.validate_success_return_code(self.ssh_connection)
def system_host_fs_modify(self, hostname: str, fs_name: str, fs_size: int):
def system_host_fs_modify(self, hostname: str, fs_name: str, fs_size: int = None, functions: str = None):
"""
Run the "system host-fs-modify" command with the specified arguments.
Args:
hostname (str): Name of the host to modify.
fs_name (str): Name of FS Name to be modified
fs_size (int): Size of FS Name to be modified
fs_size (int, optional): Size of FS Name to be modified
functions (str, optional): Functions to set for the filesystem
Returns: None
"""
self.ssh_connection.send(source_openrc(f"system host-fs-modify {hostname} {fs_name}={fs_size}"))
command = f"system host-fs-modify {hostname} {fs_name}"
if fs_size is not None:
command += f" {fs_size}"
if functions is not None:
command += f" --functions={functions}"
self.ssh_connection.send(source_openrc(command))
self.validate_success_return_code(self.ssh_connection)
def system_host_fs_modify_with_error(self, hostname: str, fs_name: str, fs_size: int) -> list[str]:
@@ -89,3 +101,46 @@ class SystemHostFSKeywords(BaseKeyword):
"""
self.ssh_connection.send(source_openrc(f"system host-fs-delete {hostname} {fs_name}"))
self.validate_success_return_code(self.ssh_connection)
def wait_for_fs_ready(self, hostname: str, fs_name: str, timeout: int = 300, sleep_time: int = 30) -> None:
"""
Wait until the given FS on the host reaches state 'Ready'.
Args:
hostname (str): Host name to check
fs_name (str): FS name to wait for
timeout (int): Max time in seconds to wait
sleep_time (int): Interval between checks
Raises:
TimeoutError: If FS does not reach 'Ready' state within timeout
Returns:
None: This function does not return any value
"""
end_time = time.time() + timeout
while time.time() < end_time:
fs_output = self.get_system_host_fs_list(hostname)
fs = fs_output.get_host_fs(fs_name)
if fs and fs.get_state() == "Ready":
return
time.sleep(sleep_time)
raise TimeoutError(f"FS '{fs_name}' on host '{hostname}' did not reach 'Ready' state within {timeout} seconds")
def get_hosts_without_monitor(self) -> list[str]:
"""
Return a list of hosts that do NOT have a monitor.
Returns:
list[str]: List of hostnames without monitor
"""
no_monitor_hosts = []
all_hosts = SystemHostListKeywords(self.ssh_connection).get_system_host_list().get_controllers_and_computes()
for host in all_hosts:
fs_output = self.get_system_host_fs_list(host.get_host_name())
if not fs_output.has_monitor():
no_monitor_hosts.append(host.get_host_name())
return no_monitor_hosts

View File

@@ -896,3 +896,84 @@ def test_reboot_active_controller_rook_ceph():
get_logger().log_test_case_step("Checking rook-ceph health after reboot.")
ceph_status_keywords.wait_for_ceph_health_status(expect_health_status=True)
@mark.p2
@mark.lab_rook_ceph
@mark.lab_has_min_3_compute
def test_monitor_operations_rook_ceph():
"""
Test case: Add and remove rook-ceph monitors.
Test Steps:
-Identify hosts without monitors.
-Ensure at least 2 nodes are available for monitor addition.
-Add 2 monitors to the selected nodes.
-Apply rook-ceph and validate the application status.
-Verify rook-ceph health after adding monitors.
-Remove the previously added monitors.
-Lock the target nodes for monitor removal.
-Wait for rook-ceph auto-apply and validate status.
-Apply rook-ceph manually to complete monitor removal.
-Delete host-fs entries for the target nodes.
-Unlock all previously locked nodes.
-Validate rook-ceph application status again.
-Apply rook-ceph one final time.
-Verify rook-ceph health after monitor removal.
Args: None
"""
active_controller_ssh_connection = LabConnectionKeywords().get_active_controller_ssh()
ceph_status_keywords = CephStatusKeywords(active_controller_ssh_connection)
system_host_fs_keywords = SystemHostFSKeywords(active_controller_ssh_connection)
system_application_apply_keywords = SystemApplicationApplyKeywords(active_controller_ssh_connection)
system_application_list_keywords = SystemApplicationListKeywords(active_controller_ssh_connection)
app_status_list = ["applied"]
app_name = "rook-ceph"
no_monitor_hosts = []
get_logger().log_test_case_step("Identifying hosts without monitors.")
no_monitor_hosts = system_host_fs_keywords.get_hosts_without_monitor()
if len(no_monitor_hosts) < 2:
raise AssertionError("Insufficient free nodes: at least 2 nodes are required to add monitors.")
target_hosts = no_monitor_hosts[:2]
get_logger().log_test_case_step(f"Adding monitors to nodes: {target_hosts}")
for host in target_hosts:
system_host_fs_keywords.system_host_fs_add(hostname=host, fs_name="ceph", fs_size=20)
get_logger().log_test_case_step("Wait for rook-ceph auto-apply after monitor addition.")
system_application_list_keywords.validate_app_status_in_list(app_name, app_status_list, timeout=360, polling_sleep_time=10)
get_logger().log_test_case_step("Reapply rook-ceph after adding monitor.")
system_application_apply_keywords.system_application_apply(app_name, timeout=500)
get_logger().log_test_case_step("Validate rook-ceph application status after addition")
system_application_list_keywords.validate_app_status_in_list(app_name, app_status_list, timeout=360, polling_sleep_time=10)
get_logger().log_test_case_step(f"Removing monitors from nodes: {target_hosts}")
for host in target_hosts:
system_host_fs_keywords.system_host_fs_modify(hostname=host, fs_name="ceph", functions="")
get_logger().log_test_case_step("Lock nodes for monitor removal.")
SystemHostLockKeywords.lock_multiple_hosts(active_controller_ssh_connection, target_hosts)
get_logger().log_test_case_step("Wait for rook-ceph auto-apply after monitor removal.")
system_application_list_keywords.validate_app_status_in_list(app_name, app_status_list, timeout=360, polling_sleep_time=20)
get_logger().log_test_case_step("Reapply rook-ceph to complete monitor removal")
system_application_apply_keywords.system_application_apply(app_name, timeout=500)
get_logger().log_test_case_step("Deleting host-fs entries from target hosts.")
for host in target_hosts:
system_host_fs_keywords.wait_for_fs_ready(hostname=host, fs_name="ceph")
system_host_fs_keywords.system_host_fs_delete(hostname=host, fs_name="ceph")
get_logger().log_test_case_step("Unlock all previously locked nodes.")
SystemHostLockKeywords.unlock_multiple_hosts(active_controller_ssh_connection, target_hosts)
get_logger().log_test_case_step("Validate rook-ceph application status after unlocking nodes")
system_application_list_keywords.validate_app_status_in_list(app_name, app_status_list, timeout=360, polling_sleep_time=10)
get_logger().log_test_case_step("Reapply rook-ceph after unlock.")
system_application_apply_keywords.system_application_apply(app_name, timeout=500)
get_logger().log_test_case_step("Verify final rook-ceph health.")
ceph_status_keywords.wait_for_ceph_health_status(expect_health_status=True)

View File

@@ -32,6 +32,7 @@ markers=
lab_has_subcloud: mark tests that require at least one subcloud
lab_has_min_2_subclouds: mark tests that require at least 2 subcloud
lab_has_compute: mark tests that require at least one compute node
lab_has_min_3_compute: mark tests that require at least 3 compute nodes
subcloud_lab_has_compute: mark tests that require at least one subcloud containing at least one compute node
lab_has_secondary_system_controller: mark tests that require a secondary system controller
lab_has_ptp_configuration_compute: mark tests that requred ptp_configuration_expectation_compute.json5