
activate-rollback process. This change introduces adjustments to support application rollback after the activate step has been executed. It adds the necessary logic to trigger the rollback operation at the appframework level, using the AppUpdateManager introduced in: https://review.opendev.org/c/starlingx/config/+/954298 Test plan: PASS: build-pkgs && build-image. PASS: AIO-SX master fresh install. PASS: check if all apps were correctly installed. PASS: build-pkgs && build-image. PASS: AIO-SX 10 fresh install. PASS: upgrade to starlingx master until deploy done step. PASS: build-pkgs && build-image. PASS: AIO-SX 10 fresh install. PASS: upgrade to starlingx master until activate-done step PASS: check if all apps were correctly updated. PASS: run the activate-rollback step and check if all apps correctly rollbacks. PASS: If any app fails to update, it must recover to the previous version. PASS: If the app fails to update and fails to recovery to the previous version and is left in apply-failed, the script will fail and require manual intervention. PASS: If the app fails to update and is configured to not perform recovery during the update (update_failure_no_rollback: True), the script will fail and manual intervention will be required. PASS: build-pkgs && build-image. PASS: AIO-SX 10 fresh install. PASS: install different apps and force two to stay in an apply-failed state, and two others in an upload-failed state. PASS: upgrade to starlingx master until activate-done step PASS: run the activate-rollback step and check if the process fails. The activate-rollback process cannot occur if any app is in the apply-failed state. PASS: If any app fails to rollback, recovery will not be triggered and the app will remain in apply-failed status, requiring manual intervention Depends-on: https://review.opendev.org/c/starlingx/config/+/954298 Story: 2011357 Task: 52492 Change-Id: I64a32aef07471a1ff82abf5827995740abea6775 Signed-off-by: edias <edson.dias@windriver.com>
157 lines
4.9 KiB
Python
157 lines
4.9 KiB
Python
#!/usr/bin/python
|
|
# Copyright (c) 2022-2025 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
import logging
|
|
import os
|
|
import sys
|
|
from time import sleep
|
|
|
|
from cgtsclient import client as cgts_client
|
|
|
|
from software.utilities.utils import configure_logging
|
|
|
|
|
|
LOG = logging.getLogger('main_logger')
|
|
TIMEOUT_LIMIT_IN_MINUTES = 30
|
|
PROGRESS_CHECK_INTERVAL_IN_SECONDS = 20
|
|
IN_PROGRESS_STATUS = 'in_progress'
|
|
FAILED_STATUS = 'failed'
|
|
COMPLETED_STATUS = 'completed'
|
|
NO_INFO_STATUS = 'no_info'
|
|
ERROR_STATUS = 'error'
|
|
TIMEOUT_STATUS = 'timeout'
|
|
|
|
|
|
def get_sysinv_client():
|
|
sysinv_client = cgts_client.get_client(
|
|
"1",
|
|
os_auth_token=os.environ.get("OS_AUTH_TOKEN"),
|
|
system_url=os.environ.get("SYSTEM_URL")
|
|
)
|
|
return sysinv_client
|
|
|
|
|
|
def log_progress(
|
|
max_attempts,
|
|
currently_attempt,
|
|
status,
|
|
failed_apps=[],
|
|
updated_apps=[],
|
|
error_msg=None,
|
|
action='update'
|
|
):
|
|
|
|
attempt_msg = f"{action.capitalize()} checking {currently_attempt + 1}/{max_attempts}"
|
|
interval_msg = f"Checking again in {PROGRESS_CHECK_INTERVAL_IN_SECONDS} second(s)."
|
|
|
|
status_to_msg = {
|
|
IN_PROGRESS_STATUS: f'{attempt_msg}: Application {action} still in progress. {interval_msg}',
|
|
FAILED_STATUS: f'{attempt_msg}: The application {action} process failed',
|
|
COMPLETED_STATUS: f'{attempt_msg}: Application {action} successfully finished.',
|
|
NO_INFO_STATUS: f'{attempt_msg}: No info from the Application Framework regarding \
|
|
application {action}. {interval_msg}',
|
|
ERROR_STATUS: f'{attempt_msg} failed with error: {error_msg}',
|
|
TIMEOUT_STATUS: f'{attempt_msg}: Application {action} failed due to a timeout. \
|
|
For more details, check the sysinv logs at /var/log/sysinv.log'
|
|
}
|
|
|
|
apps_msg = ''
|
|
|
|
if updated_apps and status == IN_PROGRESS_STATUS:
|
|
apps_msg += f"{action.capitalize()}d apps up to now: {', '.join(updated_apps)}."
|
|
elif updated_apps and status == COMPLETED_STATUS:
|
|
apps_msg += f"{action.capitalize()}d apps: {', '.join(updated_apps)}."
|
|
|
|
if failed_apps:
|
|
apps_msg += f"The following apps did not {action} correctly and require manual \
|
|
intervention: {', '.join(failed_apps)}."
|
|
|
|
progress_log = status_to_msg[status]
|
|
|
|
if status in (FAILED_STATUS, ERROR_STATUS, TIMEOUT_STATUS):
|
|
LOG.error(progress_log)
|
|
if apps_msg:
|
|
LOG.info(apps_msg)
|
|
return
|
|
|
|
LOG.info(progress_log)
|
|
if apps_msg:
|
|
LOG.info(apps_msg)
|
|
|
|
|
|
def check_apps_update_progress(client, action='update'):
|
|
max_attempts = int(TIMEOUT_LIMIT_IN_MINUTES*60 / PROGRESS_CHECK_INTERVAL_IN_SECONDS)
|
|
currently_attempt = 0
|
|
while currently_attempt < max_attempts:
|
|
try:
|
|
response = client.kube_app.get_apps_update_status()
|
|
status = NO_INFO_STATUS
|
|
if response:
|
|
status = response['status']
|
|
|
|
log_progress(
|
|
max_attempts,
|
|
currently_attempt,
|
|
status,
|
|
response['failed_apps'],
|
|
response['updated_apps'],
|
|
action=action
|
|
)
|
|
if status == IN_PROGRESS_STATUS:
|
|
sleep(PROGRESS_CHECK_INTERVAL_IN_SECONDS)
|
|
currently_attempt += 1
|
|
elif status == FAILED_STATUS:
|
|
return False
|
|
elif status == COMPLETED_STATUS:
|
|
return True
|
|
else:
|
|
currently_attempt += 1
|
|
except Exception as e:
|
|
log_progress(
|
|
max_attempts,
|
|
currently_attempt,
|
|
ERROR_STATUS,
|
|
error_msg=e,
|
|
action=action
|
|
)
|
|
sleep(PROGRESS_CHECK_INTERVAL_IN_SECONDS)
|
|
currently_attempt += 1
|
|
log_progress(max_attempts, currently_attempt, TIMEOUT_STATUS)
|
|
return False
|
|
|
|
|
|
def main():
|
|
action = sys.argv[3]
|
|
if action in ('activate', 'activate-rollback'):
|
|
configure_logging()
|
|
try:
|
|
client = get_sysinv_client()
|
|
update_operation_result = False
|
|
if action == 'activate':
|
|
client.kube_app.update_all()
|
|
sleep(5)
|
|
update_operation_result = check_apps_update_progress(client)
|
|
elif action == 'activate-rollback':
|
|
if client.kube_app.get_all_apps_by_status('apply-failed'):
|
|
LOG.error(
|
|
"One or more applications are in 'apply-failed' status."
|
|
"Manual intervention is required."
|
|
)
|
|
return 1
|
|
client.kube_app.rollback_all_apps()
|
|
sleep(5)
|
|
update_operation_result = check_apps_update_progress(client, 'revert')
|
|
if update_operation_result:
|
|
return 0
|
|
return 1
|
|
except Exception as e:
|
|
LOG.error(e)
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|