Add logic to script 21 to support app rollback during the

activate-rollback process.

This change introduces adjustments to support application rollback
after the activate step has been executed. It adds the necessary logic
to trigger the rollback operation at the appframework level, using the
AppUpdateManager introduced in:
https://review.opendev.org/c/starlingx/config/+/954298

Test plan:
PASS: build-pkgs && build-image.
PASS: AIO-SX master fresh install.
PASS: check if all apps were correctly installed.

PASS: build-pkgs && build-image.
PASS: AIO-SX 10 fresh install.
PASS: upgrade to starlingx master until deploy done step.

PASS: build-pkgs && build-image.
PASS: AIO-SX 10 fresh install.
PASS: upgrade to starlingx master until activate-done step
PASS: check if all apps were correctly updated.
PASS: run the activate-rollback step and check if all apps correctly
      rollbacks.
PASS: If any app fails to update, it must recover to the previous
      version.
PASS: If the app fails to update and fails to recovery to the previous
      version and is left in apply-failed, the script will fail and
      require manual intervention.
PASS: If the app fails to update and is configured to not perform
      recovery during the update (update_failure_no_rollback: True),
      the script will fail and manual intervention will be required.

PASS: build-pkgs && build-image.
PASS: AIO-SX 10 fresh install.
PASS: install different apps and force two to stay in an apply-failed
      state, and two others in an upload-failed state.
PASS: upgrade to starlingx master until activate-done step
PASS: run the activate-rollback step and check if the process fails.
      The activate-rollback process cannot occur if any app is in
      the apply-failed state.
PASS: If any app fails to rollback, recovery will not be triggered
      and the app will remain in apply-failed status, requiring manual
      intervention

Depends-on: https://review.opendev.org/c/starlingx/config/+/954298

Story: 2011357
Task: 52492

Change-Id: I64a32aef07471a1ff82abf5827995740abea6775
Signed-off-by: edias <edson.dias@windriver.com>
This commit is contained in:
edias
2025-07-02 17:26:56 -03:00
committed by David Bastos
parent a784b11d57
commit 9fb805b63f

View File

@@ -34,51 +34,44 @@ def get_sysinv_client():
return sysinv_client
def start_update_of_all_apps(client):
try:
client.kube_app.update_all()
return 0
except Exception as e:
LOG.error(f"ERROR: {e}")
return 1
def log_progress(
max_attempts,
currently_attempt,
status,
failed_apps=[],
updated_apps=[],
error_msg=None
error_msg=None,
action='update'
):
attempt_msg = f"Update checking {currently_attempt + 1}/{max_attempts}:"
attempt_msg = f"{action.capitalize()} checking {currently_attempt + 1}/{max_attempts}"
interval_msg = f"Checking again in {PROGRESS_CHECK_INTERVAL_IN_SECONDS} second(s)."
status_to_msg = {
IN_PROGRESS_STATUS: f'{attempt_msg}: Application updates still in progress. {interval_msg}',
FAILED_STATUS: f'{attempt_msg}: The application update process failed',
COMPLETED_STATUS: f'{attempt_msg}: Application updates successfully finished.',
IN_PROGRESS_STATUS: f'{attempt_msg}: Application {action} still in progress. {interval_msg}',
FAILED_STATUS: f'{attempt_msg}: The application {action} process failed',
COMPLETED_STATUS: f'{attempt_msg}: Application {action} successfully finished.',
NO_INFO_STATUS: f'{attempt_msg}: No info from the Application Framework regarding \
application updates. {interval_msg}',
application {action}. {interval_msg}',
ERROR_STATUS: f'{attempt_msg} failed with error: {error_msg}',
TIMEOUT_STATUS: f'{attempt_msg}: Application updates failed due to a timeout. \
TIMEOUT_STATUS: f'{attempt_msg}: Application {action} failed due to a timeout. \
For more details, check the sysinv logs at /var/log/sysinv.log'
}
apps_msg = ''
if updated_apps and status == IN_PROGRESS_STATUS:
apps_msg += f"Updated apps up to now: {', '.join(updated_apps)}."
apps_msg += f"{action.capitalize()}d apps up to now: {', '.join(updated_apps)}."
elif updated_apps and status == COMPLETED_STATUS:
apps_msg += f"Updated apps: {', '.join(updated_apps)}."
apps_msg += f"{action.capitalize()}d apps: {', '.join(updated_apps)}."
if failed_apps:
apps_msg += f"The following apps did not update correctly and require manual \
apps_msg += f"The following apps did not {action} correctly and require manual \
intervention: {', '.join(failed_apps)}."
progress_log = status_to_msg[status]
if status in ('failed', 'timeout', 'error'):
if status in (FAILED_STATUS, ERROR_STATUS, TIMEOUT_STATUS):
LOG.error(progress_log)
if apps_msg:
LOG.info(apps_msg)
@@ -89,7 +82,7 @@ def log_progress(
LOG.info(apps_msg)
def check_apps_update_progress(client):
def check_apps_update_progress(client, action='update'):
max_attempts = int(TIMEOUT_LIMIT_IN_MINUTES*60 / PROGRESS_CHECK_INTERVAL_IN_SECONDS)
currently_attempt = 0
while currently_attempt < max_attempts:
@@ -104,7 +97,8 @@ def check_apps_update_progress(client):
currently_attempt,
status,
response['failed_apps'],
response['updated_apps']
response['updated_apps'],
action=action
)
if status == IN_PROGRESS_STATUS:
sleep(PROGRESS_CHECK_INTERVAL_IN_SECONDS)
@@ -120,7 +114,8 @@ def check_apps_update_progress(client):
max_attempts,
currently_attempt,
ERROR_STATUS,
error_msg=e
error_msg=e,
action=action
)
sleep(PROGRESS_CHECK_INTERVAL_IN_SECONDS)
currently_attempt += 1
@@ -130,15 +125,31 @@ def check_apps_update_progress(client):
def main():
action = sys.argv[3]
if action == 'activate':
if action in ('activate', 'activate-rollback'):
configure_logging()
try:
client = get_sysinv_client()
start_update_of_all_apps(client)
update_operation_result = False
if action == 'activate':
client.kube_app.update_all()
sleep(5)
update_operation_result = check_apps_update_progress(client)
elif action == 'activate-rollback':
if client.kube_app.get_all_apps_by_status('apply-failed'):
LOG.error(
"One or more applications are in 'apply-failed' status."
"Manual intervention is required."
)
return 1
client.kube_app.rollback_all_apps()
sleep(5)
update_operation_result = check_apps_update_progress(client, 'revert')
if update_operation_result:
return 0
return 1
except Exception as e:
LOG.error(e)
return 1
if __name__ == "__main__":