Script not logging all the reverted apps list in software.log

Executing the 21-k8s-app-upgrade script failed to correctly log the
update status of all applications when the script failed due to a
timeout. This deviates from the fact that the script only triggers
a bulk application update processor in sysinv, and this operation
is reported in batches.

This change analyzes the status of apps in the database when the
timeout expires, reporting the following:
  - Lists of applications that have been updated/reverted.
  - Lists of applications that have not yet started
    updating/reverting.
  - Lists of applications currently in the process of
    updating/reverting.

Test Plan:
PASS: build-pkgs && build-image
PASS: Platform upgrade stx-10 to master
PASS: Platform rollback from master to stx-10
PASS: Simulation of script 21 failing due to a timeout, and all
      applications are logged with their respective statuses.

Closes-bug: 2121827
Depends-On: https://review.opendev.org/c/starlingx/config/+/962259

Change-Id: I805e0341dae6b50cca0e683d502320161d8368b3
Signed-off-by: David Bastos <david.barbosabastos@windriver.com>
This commit is contained in:
David Bastos
2025-09-25 06:41:28 -03:00
parent ff6fd70e58
commit 24c6e99696

View File

@@ -24,6 +24,11 @@ NO_INFO_STATUS = 'no_info'
ERROR_STATUS = 'error'
TIMEOUT_STATUS = 'timeout'
REVERT_ACTION = 'revert'
# Possible app status values
APP_UPLOAD_SUCCESS = 'uploaded'
APP_UPDATE_STARTING = 'update-starting'
APP_UPDATE_IN_PROGRESS = 'updating'
APP_APPLY_SUCCESS = 'applied'
def get_sysinv_client():
@@ -35,10 +40,80 @@ def get_sysinv_client():
return sysinv_client
def log_apps_progress_via_database(client_and_release_info, action='update'):
"""
Logs the progress of application updates or reverts by querying the database.
This function retrieves all applications from the database using the provided client,
categorizes them based on their update status (updated, not updated, or update in progress),
and logs the progress accordingly. The log messages are tailored based on whether
the action is an update or a revert.
Args:
client_and_release_info (dict): A dictionary containing:
- 'client': The database client object with a 'kube_app.get_all_apps()' method.
- 'from_release': The release version the apps are being updated from.
- 'to_release': The release version the apps are being updated to.
action (str, optional): The action being performed, either 'update' or 'revert'.
Defaults to 'update'.
Logs:
- Lists of applications that have been updated/reverted.
- Lists of applications that have not yet started updating/reverting.
- Lists of applications currently in the process of updating/reverting.
Returns:
None
"""
client = client_and_release_info['client']
from_release = client_and_release_info['from_release']
to_release = client_and_release_info['to_release']
try:
apps = client.kube_app.get_all_apps()
except Exception as e:
LOG.error(f"Failed to get apps from database: {e}")
return
updated = []
not_updated = []
update_in_progress = []
for app in apps:
name = app['name']
status = app['status']
app_version = app['app_version']
if status in [APP_APPLY_SUCCESS, APP_UPLOAD_SUCCESS] and to_release in app_version:
updated.append(name)
elif status in [APP_APPLY_SUCCESS, APP_UPLOAD_SUCCESS] and from_release in app_version:
not_updated.append(name)
elif status in [APP_UPDATE_STARTING, APP_UPDATE_IN_PROGRESS]:
update_in_progress.append(name)
if updated:
if action == REVERT_ACTION:
LOG.info(f"Reverted apps up to now: {', '.join(updated)}")
else:
LOG.info(f"Updated apps up to now: {', '.join(updated)}")
if not_updated:
if action == REVERT_ACTION:
LOG.info("Applications that have not yet started the reverting process: "
f"{', '.join(not_updated)}")
else:
LOG.info("Applications that have not yet started the updating process: "
f"{', '.join(not_updated)}")
if update_in_progress:
if action == REVERT_ACTION:
LOG.info("Applications currently in the reverting process: "
f"{', '.join(update_in_progress)}")
else:
LOG.info("Applications currently in the updating process: "
f"{', '.join(update_in_progress)}")
def log_progress(
max_attempts,
currently_attempt,
status,
client_and_release_info,
failed_apps=[],
updated_apps=[],
error_msg=None,
@@ -73,20 +148,30 @@ def log_progress(
progress_log = status_to_msg[status]
if status in (FAILED_STATUS, ERROR_STATUS, TIMEOUT_STATUS):
if status in (FAILED_STATUS, ERROR_STATUS):
LOG.error(progress_log)
if apps_msg:
LOG.info(apps_msg)
return
elif status == TIMEOUT_STATUS:
log_apps_progress_via_database(client_and_release_info, action=action)
LOG.warning(
"The apps listed above may change as sysinv continues to update/revert apps. "
"If the app that's taking longer than expected to update/revert is resolved, "
"the update will continue for the remaining apps."
)
LOG.error(progress_log)
return
LOG.info(progress_log)
if apps_msg:
LOG.info(apps_msg)
def check_apps_update_progress(client, action='update'):
def check_apps_update_progress(client_and_release_info, action='update'):
max_attempts = int(TIMEOUT_LIMIT_IN_MINUTES*60 / PROGRESS_CHECK_INTERVAL_IN_SECONDS)
currently_attempt = 0
client = client_and_release_info['client']
while currently_attempt < max_attempts:
try:
response = client.kube_app.get_apps_update_status()
@@ -98,6 +183,7 @@ def check_apps_update_progress(client, action='update'):
max_attempts,
currently_attempt,
status,
client_and_release_info,
response['failed_apps'],
response['updated_apps'],
action=action
@@ -116,26 +202,52 @@ def check_apps_update_progress(client, action='update'):
max_attempts,
currently_attempt,
ERROR_STATUS,
client_and_release_info,
error_msg=e,
action=action
)
sleep(PROGRESS_CHECK_INTERVAL_IN_SECONDS)
currently_attempt += 1
log_progress(max_attempts, currently_attempt, TIMEOUT_STATUS)
log_progress(
max_attempts, currently_attempt, TIMEOUT_STATUS, client_and_release_info, action=action)
return False
def main():
action = sys.argv[3]
action = None
from_release = None
to_release = None
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
# Optional postgres port parameter for USM upgrade (not used
# by this script).
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
if action in ('activate', 'activate-rollback'):
configure_logging()
try:
client = get_sysinv_client()
client_and_release_info = {
'client': client,
'from_release': from_release,
'to_release': to_release
}
update_operation_result = False
if action == 'activate':
client.kube_app.update_all()
sleep(5)
update_operation_result = check_apps_update_progress(client)
update_operation_result = check_apps_update_progress(client_and_release_info)
elif action == 'activate-rollback':
if client.kube_app.get_all_apps_by_status('apply-failed'):
LOG.error(
@@ -145,7 +257,8 @@ def main():
return 1
client.kube_app.rollback_all_apps()
sleep(5)
update_operation_result = check_apps_update_progress(client, REVERT_ACTION)
update_operation_result = check_apps_update_progress(
client_and_release_info, REVERT_ACTION)
if update_operation_result:
return 0
return 1