
This fix is for software deploy precheck to ignore subcloud offline alarm(280.001) for patch and upgrade software deployment. Test Plan: PASSED: software deploy precheck ignores alarm 280.001 for software deployment. PASSED: vim sw-deploy strategy creation is successful when alarm 280.001 is present. Closes-Bug: 2088023 Change-Id: Ic31847e2119b0d28bee0897a7d7f36aca1311813 Signed-off-by: Vanathi.Selvaraju <vanathi.selvaraju@windriver.com>
492 lines
20 KiB
Python
492 lines
20 KiB
Python
#!/usr/bin/python3
|
|
# -*- encoding: utf-8 -*-
|
|
#
|
|
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
|
#
|
|
# Copyright (c) 2023-2024 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
"""
|
|
Run platform upgrade deploy precheck as a standalone executable
|
|
"""
|
|
|
|
import argparse
|
|
import base64
|
|
import json
|
|
import logging as LOG
|
|
import os
|
|
import re
|
|
import requests
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
|
|
from lxml import etree as ElementTree
|
|
from tsconfig.tsconfig import SW_VERSION
|
|
|
|
import upgrade_utils
|
|
|
|
|
|
# TODO(heitormatsui) keep updated for every release
|
|
SUPPORTED_K8S_VERSIONS = [
|
|
"v1.24.4",
|
|
"v1.25.3",
|
|
"v1.26.1",
|
|
"v1.27.5",
|
|
"v1.28.4",
|
|
"v1.29.2",
|
|
]
|
|
|
|
RC_SUCCESS = 0
|
|
RC_UNHEALTHY = 3
|
|
STATE_AVAILABLE = 'available'
|
|
STATE_DEPLOYED = 'deployed'
|
|
|
|
class HealthCheck(object):
|
|
"""This class represents a general health check object
|
|
that uses sysinv-client to run system health checks"""
|
|
|
|
SUCCESS_MSG = 'OK'
|
|
FAIL_MSG = 'Fail'
|
|
|
|
def __init__(self, config):
|
|
self._config = config
|
|
|
|
# get target release from script directory location
|
|
self._target_release = re.match("^.*/rel-(\d\d.\d\d.\d+)/", __file__).group(1)
|
|
self._major_release = self._target_release.rsplit(".", 1)[0]
|
|
|
|
# get sysinv token, endpoint and client
|
|
self._sysinv_token, self._sysinv_endpoint = \
|
|
upgrade_utils.get_token_endpoint(config, service_type="platform")
|
|
self._sysinv_client = upgrade_utils.get_sysinv_client(self._sysinv_token,
|
|
self._sysinv_endpoint)
|
|
|
|
def _check_license(self, version):
|
|
"""
|
|
Validates the current license is valid for the specified version
|
|
:param version: version to be checked against installed license
|
|
:return: True is license is valid for version, False otherwise
|
|
"""
|
|
license_dict = self._sysinv_client.license.show()
|
|
if license_dict["error"]:
|
|
return False
|
|
|
|
# create temp file with license content to run verify-license binary against it
|
|
with tempfile.NamedTemporaryFile(mode="w", delete=True) as license_file:
|
|
try:
|
|
license_file.write(license_dict["content"])
|
|
subprocess.check_call(["/usr/bin/verify-license", # pylint: disable=not-callable
|
|
license_file.name,
|
|
version])
|
|
except subprocess.CalledProcessError:
|
|
return False
|
|
return True
|
|
|
|
# TODO(heitormatsui): implement patch precheck targeted against USM
|
|
# and implement patch precheck for subcloud
|
|
def _check_required_patches_state(self, required_patches, patch_health_check=False):
|
|
"""
|
|
Check if the required patches are in 'deployed' state, if patch_health_check is
|
|
True, the required_patches can be in 'available' state as well.
|
|
:param required_patches: list of patches to be checked
|
|
:param patch_health_check: boolean if is a patch or upgrage health check
|
|
:return: boolean indicating success/failure and list of patches
|
|
that are not in the 'deployed' or 'available' state
|
|
"""
|
|
success = True
|
|
releases = self._config.get("releases", "")
|
|
releases_in_allowed_states = []
|
|
for release in json.loads(releases):
|
|
if release['state'] == STATE_DEPLOYED or \
|
|
(patch_health_check and release['state'] == STATE_AVAILABLE):
|
|
releases_in_allowed_states.append(release)
|
|
|
|
allowed_patches = [release['release_id'] for release in releases_in_allowed_states]
|
|
missing_patches = list(set(required_patches) - set(allowed_patches))
|
|
if missing_patches:
|
|
success = False
|
|
|
|
return success, missing_patches
|
|
|
|
def run_general_health_check(self):
|
|
"""Run general health check using sysinv client"""
|
|
force = self._config.get("force", False)
|
|
health_ok = success = True
|
|
|
|
alarm_ignore_list = ["280.001", # subcloud offline
|
|
"280.002", # subcloud resource out-of-sync
|
|
"280.003", # subcloud backup failure
|
|
"280.004", # subcloud peer group in disconnected state
|
|
"280.005", # subcloud peer group managed with lower priority
|
|
"900.201", # Software upgrade auto apply in progress
|
|
]
|
|
api_cmd = self._sysinv_endpoint + "/health/kube-upgrade"
|
|
|
|
if force:
|
|
api_cmd += '/relaxed'
|
|
|
|
if alarm_ignore_list:
|
|
api_cmd += f'?alarm_ignore_list={alarm_ignore_list}'
|
|
|
|
method = 'GET'
|
|
output = upgrade_utils.call_api(self._sysinv_token, method, api_cmd)
|
|
|
|
# check hosts are patch current
|
|
deploy_in_progress = self._config.get("deploy_in_progress", "{}")
|
|
deploy_in_progress = json.loads(deploy_in_progress)
|
|
if deploy_in_progress:
|
|
success = False
|
|
from_load = deploy_in_progress["from_release"]
|
|
to_load = deploy_in_progress["to_release"]
|
|
output += ('All hosts are patch current: [%s]\n') \
|
|
% (HealthCheck.FAIL_MSG)
|
|
output += ('Deployment in progress: %s to %s\n' % (from_load, to_load))
|
|
else:
|
|
success = True
|
|
output += ('All hosts are patch current: [%s]\n') \
|
|
% (HealthCheck.SUCCESS_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
if HealthCheck.FAIL_MSG in output:
|
|
success = False
|
|
health_ok = health_ok and success
|
|
|
|
return health_ok, output
|
|
|
|
|
|
class UpgradeHealthCheck(HealthCheck):
|
|
"""This class represents a upgrade-specific health check object
|
|
that verifies if system is in a valid state for upgrade"""
|
|
|
|
# TODO(heitormatsui): switch from using upgrade metadata xml to
|
|
# the new USM metadata format
|
|
def _check_valid_upgrade_path(self):
|
|
"""Checks if active release to specified release is a valid upgrade path"""
|
|
# Get active release
|
|
isystem = self._sysinv_client.isystem.list()[0]
|
|
active_release = isystem.software_version
|
|
|
|
# supported_release is a dict with {release: required_patch}
|
|
supported_releases = dict()
|
|
|
|
# Parse upgrade metadata file for supported upgrade paths
|
|
root = ElementTree.parse("/var/www/pages/feed/rel-%s/upgrades/metadata.xml" % self._major_release)
|
|
upgrade_root = root.find("supported_upgrades").findall("upgrade")
|
|
for upgrade in upgrade_root:
|
|
version = upgrade.find("version")
|
|
required_patch = upgrade.find("required_patch")
|
|
supported_releases.update({version.text: [required_patch.text] if
|
|
required_patch is not None else []})
|
|
success = active_release in supported_releases
|
|
return success, active_release, supported_releases.get(active_release, [])
|
|
|
|
# TODO(heitormatsui) do we need this check on USM? Remove if we don't
|
|
def _check_active_is_controller_0(self):
|
|
"""Checks that active controller is controller-0"""
|
|
controllers = self._sysinv_client.ihost.list()
|
|
for controller in controllers:
|
|
if controller.hostname == "controller-0" and \
|
|
"Controller-Active" in controller.capabilities["Personality"]:
|
|
return True
|
|
return False
|
|
|
|
def _check_kube_version(self, supported_versions):
|
|
"""
|
|
Check if active k8s version is in a list of supported versions
|
|
:param supported_versions: list of supported k8s versions
|
|
:return: boolean indicating success/failure and active k8s version
|
|
"""
|
|
kube_versions = self._sysinv_client.kube_version.list()
|
|
active_version = None
|
|
for kv in kube_versions:
|
|
if kv.state == "active":
|
|
active_version = kv.version
|
|
break
|
|
success = active_version in supported_versions
|
|
return success, active_version
|
|
|
|
def run_health_check(self):
|
|
"""Run specific upgrade health checks"""
|
|
|
|
# run health check for 22.12
|
|
# TODO(ShawnLi): remove this once upgrade from 22.12 is deprecated
|
|
if SW_VERSION == '22.12':
|
|
return self.run_health_check_in_from_release()
|
|
|
|
health_ok = True
|
|
output = ""
|
|
|
|
# check if it is a valid upgrade path
|
|
success, active_release, required_patches = self._check_valid_upgrade_path()
|
|
output += 'Valid upgrade path from release %s to %s: [%s]\n' \
|
|
% (active_release, self._major_release,
|
|
HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
# check if required patches are deployed
|
|
success, missing_patches = self._check_required_patches_state(required_patches)
|
|
output += 'Required patches are applied: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
if not success:
|
|
output += '-> Patches not applied: [%s]\n' \
|
|
% ', '.join(missing_patches)
|
|
health_ok = health_ok and success
|
|
|
|
# check if k8s version is valid
|
|
success, active_version = self._check_kube_version(SUPPORTED_K8S_VERSIONS)
|
|
output += 'Active kubernetes version [%s] is a valid supported version: [%s]\n' \
|
|
% (active_version, HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
|
|
if not active_version:
|
|
output += ('-> Failed to get version info. Upgrade kubernetes to one of the '
|
|
'supported versions [%s] and ensure that the kubernetes version '
|
|
'information is available in the kubeadm configmap.\n'
|
|
'See "system kube-version-list"\n' % ", ".join(SUPPORTED_K8S_VERSIONS))
|
|
elif not success:
|
|
output += ('-> Upgrade active kubernetes version [%s] to one of the '
|
|
'supported versions [%s]. See "system kube-version-list"\n' %
|
|
(active_version, ", ".join(SUPPORTED_K8S_VERSIONS)))
|
|
health_ok = health_ok and success
|
|
|
|
# TODO(heitormatsui) Do we need the following check on USM?
|
|
# The load is only imported to controller-0. An upgrade can only
|
|
# be started when controller-0 is active.
|
|
is_controller_0 = self._check_active_is_controller_0()
|
|
success = is_controller_0
|
|
output += \
|
|
'Active controller is controller-0: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
# check installed license
|
|
# NOTE(nicodemos): We just need to check the license for major release
|
|
success = self._check_license(self._major_release)
|
|
output += 'Installed license is valid: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
# TODO(mdecastr) Plan is to add support to ECC key, this verification need to
|
|
# be re evaluated in future releases if the support is implemented.
|
|
# Check if system-local-ca's private key is RSA
|
|
if upgrade_utils.get_distributed_cloud_role() == 'subcloud':
|
|
# system-local-ca in subclouds either match the systemcontroller's,
|
|
# or it will be changed to match in upgrade activation
|
|
LOG.info("Checking system-local-ca's private key is not required for subclouds.")
|
|
success = True
|
|
else:
|
|
success = self._check_local_issuer_rsa_private_key()
|
|
|
|
if not success:
|
|
LOG.error("system-local-ca's private key is either not RSA or not valid.")
|
|
output += 'Platform Issuer: [%s]\n' % (HealthCheck.FAIL_MSG)
|
|
output += ('-> Platform Issuer (system-local-ca) TLS private key is not valid. '
|
|
'Only RSA keys are supported.\n'
|
|
' Please perform the \'Update system-local-ca or Migrate Platform '
|
|
'Certificates to use Cert Manager\'\n'
|
|
' procedure to update the Platform Issuer, providing a valid RSA '
|
|
'cert/key to be used by the issuer.\n')
|
|
else:
|
|
LOG.info("system-local-ca has a valid private key.")
|
|
health_ok = health_ok and success
|
|
|
|
return health_ok, output
|
|
|
|
def run_health_check_in_from_release(self):
|
|
"""
|
|
Run the health check in 22.12 release environment
|
|
:return: tuple (success, output)
|
|
"""
|
|
|
|
health_ok = True
|
|
output = ""
|
|
|
|
success, active_release, required_patches = self._check_valid_upgrade_path()
|
|
output += 'Valid upgrade path from release %s to %s: [%s]\n' \
|
|
% (active_release, self._major_release,
|
|
HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
# check if required patches are deployed
|
|
success, missing_patches = self._check_required_patches(required_patches)
|
|
output += 'Required patches are applied: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
if not success:
|
|
output += '-> Patches not applied: [%s]\n' \
|
|
% ', '.join(missing_patches)
|
|
health_ok = health_ok and success
|
|
|
|
# check if system-local-ca's private key is RSA
|
|
if upgrade_utils.get_distributed_cloud_role() == 'subcloud':
|
|
# system-local-ca in subclouds either match the systemcontroller's,
|
|
# or it will be changed to match in upgrade activation
|
|
LOG.info("Checking system-local-ca's private key is not required for subclouds.")
|
|
success = True
|
|
else:
|
|
success = self._check_local_issuer_rsa_private_key()
|
|
|
|
if not success:
|
|
LOG.error("system-local-ca's private key is either not RSA or not valid.")
|
|
output += 'Platform Issuer: [%s]\n' % (HealthCheck.FAIL_MSG)
|
|
output += ('-> Platform Issuer (system-local-ca) TLS private key is not valid. '
|
|
'Only RSA keys are supported.\n'
|
|
' Please perform the \'Update system-local-ca or Migrate Platform '
|
|
'Certificates to use Cert Manager\'\n'
|
|
' procedure to update the Platform Issuer, providing a valid RSA '
|
|
'cert/key to be used by the issuer.\n')
|
|
else:
|
|
LOG.info("system-local-ca has a valid private key.")
|
|
health_ok = health_ok and success
|
|
|
|
return health_ok, output
|
|
|
|
def _check_required_patches(self, required_patches):
|
|
"""
|
|
Check if required patches are applied using the patching API
|
|
:return: tuple (success, missing_patches)
|
|
"""
|
|
try:
|
|
patch_token, patch_endpoint = upgrade_utils.get_token_endpoint(
|
|
self._config, service_type="patching")
|
|
patch_endpoint += "/v1/query/"
|
|
response = requests.get(patch_endpoint, headers={
|
|
"X-Auth-Token": patch_token}, timeout=10)
|
|
except Exception as e:
|
|
return False, "Failed to connect to patching API: %s" % e
|
|
|
|
query_patches = response.json()['pd']
|
|
applied_patches = []
|
|
for patch_key, patch in query_patches.items():
|
|
if patch.get('patchstate') in {'Applied', 'Committed'}:
|
|
applied_patches.append(patch_key)
|
|
|
|
missing_patches = [patch for patch in required_patches if patch not in applied_patches]
|
|
success = not missing_patches
|
|
|
|
return success, missing_patches
|
|
|
|
def _check_local_issuer_rsa_private_key(self):
|
|
secret = upgrade_utils.get_secret_data_yaml('system-local-ca', 'cert-manager')
|
|
if secret is None or secret == '':
|
|
LOG.error("Error while retrieving system-local-ca's secret data.")
|
|
return False
|
|
|
|
key_b64 = secret.get('data', {}).get('tls.key', None)
|
|
if key_b64 is None:
|
|
LOG.error("Could not retrieve system-local-ca private key.")
|
|
return False
|
|
return upgrade_utils.is_tls_key_rsa(base64.b64decode(key_b64).decode('utf-8'))
|
|
|
|
|
|
class PatchHealthCheck(HealthCheck):
|
|
"""This class represents a patch-specific health check object
|
|
that verifies if system is in valid state to apply a patch"""
|
|
|
|
def _get_required_patches(self):
|
|
"""Get required patches for a target release"""
|
|
releases = self._config.get("releases")
|
|
required_patches = []
|
|
for release in json.loads(releases):
|
|
if release["sw_version"] == self._target_release:
|
|
required_patches.extend(release["requires"])
|
|
break
|
|
|
|
return required_patches
|
|
|
|
def run_health_check(self):
|
|
"""Run specific patch health checks"""
|
|
health_ok = True
|
|
output = ""
|
|
|
|
# check required patches for target release
|
|
required_patches = self._get_required_patches()
|
|
success, missing_patches = self._check_required_patches_state(required_patches, True)
|
|
output += 'Required patches are deployed or available: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
if not success:
|
|
output += '-> Patches not deployed or available: [%s]\n' \
|
|
% ', '.join(missing_patches)
|
|
health_ok = health_ok and success
|
|
|
|
return health_ok, output
|
|
|
|
|
|
def parse_config(args=None):
|
|
"""Parse the parameters passed to the script"""
|
|
parser = argparse.ArgumentParser(description="Run health checks to verify if the system "
|
|
"meets the requirements to deploy a specific "
|
|
"release.")
|
|
parser.add_argument("--auth_url",
|
|
help="Authentication URL",
|
|
required=True)
|
|
parser.add_argument("--username",
|
|
help="Username",
|
|
required=True)
|
|
parser.add_argument("--password",
|
|
help="Password",
|
|
required=True)
|
|
parser.add_argument("--project_name",
|
|
help="Project Name",
|
|
required=True)
|
|
parser.add_argument("--user_domain_name",
|
|
help="User Domain Name",
|
|
required=True)
|
|
parser.add_argument("--project_domain_name",
|
|
help="Project Domain Name",
|
|
required=True)
|
|
parser.add_argument("--region_name",
|
|
help="Region Name",
|
|
default="RegionOne")
|
|
parser.add_argument("--force",
|
|
help="Ignore non-critical health checks",
|
|
action="store_true")
|
|
parser.add_argument("--patch",
|
|
help="Set precheck to run against a patch release",
|
|
action="store_true")
|
|
parser.add_argument("--releases",
|
|
help="Releases",
|
|
default="[]")
|
|
parser.add_argument("--deploy_in_progress",
|
|
help="check hosts are patch current",
|
|
default="{}")
|
|
|
|
# if args was not passed will use sys.argv by default
|
|
parsed_args = parser.parse_args(args)
|
|
return vars(parsed_args)
|
|
|
|
|
|
def main(argv=None):
|
|
config = parse_config(argv)
|
|
patch_release = config.get("patch", False)
|
|
|
|
health_ok = True
|
|
output = ""
|
|
|
|
if patch_release:
|
|
health_check = PatchHealthCheck(config)
|
|
else:
|
|
health_check = UpgradeHealthCheck(config)
|
|
|
|
# execute general health check
|
|
general_health_ok, general_output = health_check.run_general_health_check()
|
|
# execute release-specific health check
|
|
specific_health_ok, specific_output = health_check.run_health_check()
|
|
|
|
# combine health check results removing extra line breaks/blank spaces from the output
|
|
health_ok = general_health_ok and specific_health_ok
|
|
output = general_output.strip() + "\n" + specific_output.strip()
|
|
|
|
# print health check output and exit
|
|
print(output)
|
|
if health_ok:
|
|
return RC_SUCCESS
|
|
else:
|
|
return RC_UNHEALTHY
|
|
|
|
|
|
if __name__ == "__main__":
|
|
upgrade_utils.configure_logging("/var/log/software.log", log_level=LOG.INFO)
|
|
sys.exit(main())
|