Add logic to script 21 to support app rollback during the
activate-rollback process. This change introduces adjustments to support application rollback after the activate step has been executed. It adds the necessary logic to trigger the rollback operation at the appframework level, using the AppUpdateManager introduced in: https://review.opendev.org/c/starlingx/config/+/954298 Test plan: PASS: build-pkgs && build-image. PASS: AIO-SX master fresh install. PASS: check if all apps were correctly installed. PASS: build-pkgs && build-image. PASS: AIO-SX 10 fresh install. PASS: upgrade to starlingx master until deploy done step. PASS: build-pkgs && build-image. PASS: AIO-SX 10 fresh install. PASS: upgrade to starlingx master until activate-done step PASS: check if all apps were correctly updated. PASS: run the activate-rollback step and check if all apps correctly rollbacks. PASS: If any app fails to update, it must recover to the previous version. PASS: If the app fails to update and fails to recovery to the previous version and is left in apply-failed, the script will fail and require manual intervention. PASS: If the app fails to update and is configured to not perform recovery during the update (update_failure_no_rollback: True), the script will fail and manual intervention will be required. PASS: build-pkgs && build-image. PASS: AIO-SX 10 fresh install. PASS: install different apps and force two to stay in an apply-failed state, and two others in an upload-failed state. PASS: upgrade to starlingx master until activate-done step PASS: run the activate-rollback step and check if the process fails. The activate-rollback process cannot occur if any app is in the apply-failed state. PASS: If any app fails to rollback, recovery will not be triggered and the app will remain in apply-failed status, requiring manual intervention Depends-on: https://review.opendev.org/c/starlingx/config/+/954298 Story: 2011357 Task: 52492 Change-Id: I64a32aef07471a1ff82abf5827995740abea6775 Signed-off-by: edias <edson.dias@windriver.com>
This commit is contained in:
@@ -34,51 +34,44 @@ def get_sysinv_client():
|
||||
return sysinv_client
|
||||
|
||||
|
||||
def start_update_of_all_apps(client):
|
||||
try:
|
||||
client.kube_app.update_all()
|
||||
return 0
|
||||
except Exception as e:
|
||||
LOG.error(f"ERROR: {e}")
|
||||
return 1
|
||||
|
||||
|
||||
def log_progress(
|
||||
max_attempts,
|
||||
currently_attempt,
|
||||
status,
|
||||
failed_apps=[],
|
||||
updated_apps=[],
|
||||
error_msg=None
|
||||
error_msg=None,
|
||||
action='update'
|
||||
):
|
||||
attempt_msg = f"Update checking {currently_attempt + 1}/{max_attempts}:"
|
||||
|
||||
attempt_msg = f"{action.capitalize()} checking {currently_attempt + 1}/{max_attempts}"
|
||||
interval_msg = f"Checking again in {PROGRESS_CHECK_INTERVAL_IN_SECONDS} second(s)."
|
||||
|
||||
status_to_msg = {
|
||||
IN_PROGRESS_STATUS: f'{attempt_msg}: Application updates still in progress. {interval_msg}',
|
||||
FAILED_STATUS: f'{attempt_msg}: The application update process failed',
|
||||
COMPLETED_STATUS: f'{attempt_msg}: Application updates successfully finished.',
|
||||
IN_PROGRESS_STATUS: f'{attempt_msg}: Application {action} still in progress. {interval_msg}',
|
||||
FAILED_STATUS: f'{attempt_msg}: The application {action} process failed',
|
||||
COMPLETED_STATUS: f'{attempt_msg}: Application {action} successfully finished.',
|
||||
NO_INFO_STATUS: f'{attempt_msg}: No info from the Application Framework regarding \
|
||||
application updates. {interval_msg}',
|
||||
application {action}. {interval_msg}',
|
||||
ERROR_STATUS: f'{attempt_msg} failed with error: {error_msg}',
|
||||
TIMEOUT_STATUS: f'{attempt_msg}: Application updates failed due to a timeout. \
|
||||
TIMEOUT_STATUS: f'{attempt_msg}: Application {action} failed due to a timeout. \
|
||||
For more details, check the sysinv logs at /var/log/sysinv.log'
|
||||
}
|
||||
|
||||
apps_msg = ''
|
||||
|
||||
if updated_apps and status == IN_PROGRESS_STATUS:
|
||||
apps_msg += f"Updated apps up to now: {', '.join(updated_apps)}."
|
||||
apps_msg += f"{action.capitalize()}d apps up to now: {', '.join(updated_apps)}."
|
||||
elif updated_apps and status == COMPLETED_STATUS:
|
||||
apps_msg += f"Updated apps: {', '.join(updated_apps)}."
|
||||
apps_msg += f"{action.capitalize()}d apps: {', '.join(updated_apps)}."
|
||||
|
||||
if failed_apps:
|
||||
apps_msg += f"The following apps did not update correctly and require manual \
|
||||
apps_msg += f"The following apps did not {action} correctly and require manual \
|
||||
intervention: {', '.join(failed_apps)}."
|
||||
|
||||
progress_log = status_to_msg[status]
|
||||
|
||||
if status in ('failed', 'timeout', 'error'):
|
||||
if status in (FAILED_STATUS, ERROR_STATUS, TIMEOUT_STATUS):
|
||||
LOG.error(progress_log)
|
||||
if apps_msg:
|
||||
LOG.info(apps_msg)
|
||||
@@ -89,7 +82,7 @@ def log_progress(
|
||||
LOG.info(apps_msg)
|
||||
|
||||
|
||||
def check_apps_update_progress(client):
|
||||
def check_apps_update_progress(client, action='update'):
|
||||
max_attempts = int(TIMEOUT_LIMIT_IN_MINUTES*60 / PROGRESS_CHECK_INTERVAL_IN_SECONDS)
|
||||
currently_attempt = 0
|
||||
while currently_attempt < max_attempts:
|
||||
@@ -104,7 +97,8 @@ def check_apps_update_progress(client):
|
||||
currently_attempt,
|
||||
status,
|
||||
response['failed_apps'],
|
||||
response['updated_apps']
|
||||
response['updated_apps'],
|
||||
action=action
|
||||
)
|
||||
if status == IN_PROGRESS_STATUS:
|
||||
sleep(PROGRESS_CHECK_INTERVAL_IN_SECONDS)
|
||||
@@ -120,7 +114,8 @@ def check_apps_update_progress(client):
|
||||
max_attempts,
|
||||
currently_attempt,
|
||||
ERROR_STATUS,
|
||||
error_msg=e
|
||||
error_msg=e,
|
||||
action=action
|
||||
)
|
||||
sleep(PROGRESS_CHECK_INTERVAL_IN_SECONDS)
|
||||
currently_attempt += 1
|
||||
@@ -130,15 +125,31 @@ def check_apps_update_progress(client):
|
||||
|
||||
def main():
|
||||
action = sys.argv[3]
|
||||
if action == 'activate':
|
||||
if action in ('activate', 'activate-rollback'):
|
||||
configure_logging()
|
||||
client = get_sysinv_client()
|
||||
start_update_of_all_apps(client)
|
||||
sleep(5)
|
||||
update_operation_result = check_apps_update_progress(client)
|
||||
if update_operation_result:
|
||||
return 0
|
||||
return 1
|
||||
try:
|
||||
client = get_sysinv_client()
|
||||
update_operation_result = False
|
||||
if action == 'activate':
|
||||
client.kube_app.update_all()
|
||||
sleep(5)
|
||||
update_operation_result = check_apps_update_progress(client)
|
||||
elif action == 'activate-rollback':
|
||||
if client.kube_app.get_all_apps_by_status('apply-failed'):
|
||||
LOG.error(
|
||||
"One or more applications are in 'apply-failed' status."
|
||||
"Manual intervention is required."
|
||||
)
|
||||
return 1
|
||||
client.kube_app.rollback_all_apps()
|
||||
sleep(5)
|
||||
update_operation_result = check_apps_update_progress(client, 'revert')
|
||||
if update_operation_result:
|
||||
return 0
|
||||
return 1
|
||||
except Exception as e:
|
||||
LOG.error(e)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Reference in New Issue
Block a user