From c99230060f27b0100a913ba8201524be02b1f09b Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Thu, 24 Jul 2025 11:51:09 -0700 Subject: [PATCH] Add a suggestive warning around power and sensor syncs While looking at the threading issues related to the removal of eventlet, it dawned on me that it wouldn't be a bad idea for Ironic to log a warning suggesting corrective actions an operator could take for non-ideal performance behavior. For example, if we're not launching enough power sync workers, or enough sensor data collection workers, then the check interval begins to become the minimum, and the task just re-launches after the sweep. That, itself, is not a huge issue except it can begin to reduce the meaningfulness of and ready reliability of the status data in larger clusters, where operators should likely take actions which may be increase the workers, or add conductors. Change-Id: Ic9277c5389c7e8f2d68e72bf6338a4f509989e75 Signed-off-by: Julia Kreger --- ironic/conductor/manager.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 0fefc465ea..b81bcf8715 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1578,10 +1578,18 @@ class ConductorManager(base_manager.BaseConductorManager): METRICS.send_gauge( 'ConductorManager.PowerSyncNodesCount', len(nodes)) - + runtime = time.time() - started LOG.debug('Completed power state sync operation, evaluated %d ' 'nodes with %d workers in %.2f seconds', - len(futures), number_of_workers, time.time() - started) + len(futures), number_of_workers, runtime) + if runtime > (3 * CONF.conductor.sync_power_state_interval): + LOG.warning('The power state sync operation runtime is 3x ' + 'the [conductor]sync_power_state_interval setting. ' + 'This is not ideal. You may need to tune the ' + '[conductor]sync_power_state_workers and ' + '[conductor]periodic_max_workers settings, ' + 'or ultimately add more conductors to the ' + 'ironic deployment.') def _sync_power_state_nodes_task(self, context, nodes): """Invokes power state sync on nodes from synchronized queue. @@ -2906,9 +2914,16 @@ class ConductorManager(base_manager.BaseConductorManager): if not_done: LOG.warning("%d workers for send sensors data did not complete", len(not_done)) + runtime = time.time() - started LOG.debug('Completed sending sensor data, evaluated %d ' 'nodes with %d workers in %.2f seconds', - len(done), number_of_threads, time.time() - started) + len(done), number_of_threads, runtime) + if runtime > (3 * CONF.sensor_data.interval): + LOG.warning('The sensor data collection runtime is ' + '3x the [sensor_data]interval setting. ' + 'Please consider tuning the [sensor_data]' + 'workers setting or adding additional ' + 'conductors to the ironic deployment.') def _filter_out_unsupported_types(self, sensors_data): """Filters out sensor data types that aren't specified in the config.