USM: Change deploy precheck and start to accept arbitrary arguments.

This change adds arbitrary parameters, to reduce or avoid the need to
patchback when new features are added.
The snapshot parameter was removed to use this new change with -o
snapshot=true.
This commit also add a sanitize function to allow only letters, numbers,
space, - and _.
Usage: software deploy start/precheck <major release> -o snapshot=true
--options test=test

Test Plan:
PASS: Build-pkgs, Build-image, Install/Bootstrap/Unlock
PASS: Check precheck script receives the parameters
(by injecting logs manually)
PASS: Check deploy start receives the parameters by injecting
manual logs.
PASS: Check software.json stores the parameters under options field.
PASS: Deploy start blocked with reserved key in options.
PASS: Check activate-rollback is using deploy options for snapshot.
PASS: Major release deployment stx10 -> stx11 in sx.

Story: 2011357
Task: 52264

Change-Id: I53b8863ceeec74c45831d959fe6cea00990cc156
Signed-off-by: Luis Eduardo Bonatti <luizeduardo.bonatti@windriver.com>
This commit is contained in:
Luis Eduardo Bonatti
2025-05-22 17:02:32 -03:00
parent a8b3c98117
commit fdbb073719
11 changed files with 119 additions and 52 deletions

View File

@@ -433,6 +433,7 @@ confidence=HIGH,
# -Refactoring-
# R0205 useless-object-inheritance
# R0402 consider-using-from-import
# R0801 Similar lines in x files
# R0903 too-few-public-methods
# R0911 too-many-return-statements
# R0912 too-many-branches
@@ -453,7 +454,7 @@ confidence=HIGH,
# W1514 unspecified-encoding
# W3101 missing-timeout
disable= C0103,C0114,C0115,C0116,C0209,C0301,C0302,C0411,
C0415,R0205,R0402,R0903,R0911,R0912,R0914,R0915,
C0415,R0205,R0402,R0801,R0903,R0911,R0912,R0914,R0915,
R1702,R1705,R1714,R1722,R1724,R1725,R1732,R1735,
W0602,W0603,W0718,W1514,W3101,

View File

@@ -30,9 +30,8 @@ class DeployManager(base.Manager):
body = {}
if args.force:
body["force"] = "true"
if args.snapshot:
body["snapshot"] = "true"
if args.options:
body["options"] = args.options
if args.region_name:
body["region_name"] = args.region_name
@@ -51,8 +50,8 @@ class DeployManager(base.Manager):
body = {}
if args.force:
body["force"] = "true"
if args.snapshot:
body["snapshot"] = "true"
if args.options:
body["options"] = args.options
return self._post(path, body=body)

View File

@@ -63,11 +63,11 @@ def do_host_list(cc, args):
default=None,
required=False,
help='Run precheck against a subcloud')
@utils.arg('-s',
'--snapshot',
action='store_true',
@utils.arg('-o',
'--options',
action='append',
required=False,
help='Execute LVM snapshot feature specific health checks (AIO-SX only)')
help='Additional parameters in key=value format.')
def do_precheck(cc, args):
"""Verify whether prerequisites for installing the software deployment are satisfied"""
resp, data = cc.deploy.precheck(args)
@@ -91,11 +91,11 @@ def do_precheck(cc, args):
action='store_true',
required=False,
help='Allow bypassing non-critical checks')
@utils.arg('-s',
'--snapshot',
action='store_true',
@utils.arg('-o',
'--options',
action='append',
required=False,
help='Use LVM snapshots for faster rollback/recovery (AIO-SX only)')
help='Additional parameters in key=value format.')
def do_start(cc, args):
"""Start the software deployment"""
resp, data = cc.deploy.start(args)

View File

@@ -257,6 +257,11 @@ class UpgradeHealthCheck(HealthCheck):
return success, msg
return success, None
def _check_snapshot_option(self):
options = json.loads(self._config.get("options", {}))
snapshot = options.get("snapshot", False)
return upgrade_utils.to_bool(snapshot)
def run_health_check(self):
"""Run specific upgrade health checks"""
@@ -337,7 +342,7 @@ class UpgradeHealthCheck(HealthCheck):
health_ok = health_ok and success
# check for LVM snapshot health checks
use_lvm_snapshot = self._config.get("snapshot", False)
use_lvm_snapshot = self._check_snapshot_option()
if use_lvm_snapshot:
success = self._check_system_simplex()
output += '(LVM snapshots) System is AIO-SX: [%s]\n' \
@@ -456,9 +461,8 @@ def parse_config(args=None):
parser.add_argument("--patch",
help="Set precheck to run against a patch release",
action="store_true")
parser.add_argument("--snapshot",
help="Execute LVM snapshot feature specific health checks (AIO-SX only)",
action="store_true")
parser.add_argument("--options",
help="Additional parameters in key=value format.")
parser.add_argument("--releases",
help="Releases",
default="[]")

View File

@@ -13,6 +13,7 @@
# 5. perform data migration
#
import json
import logging
import os
import shutil
@@ -34,7 +35,7 @@ class DeployStart:
DEPLOY_STATE_START_FAILED = "start-failed"
def __init__(self, from_version, to_version, k8s_version, postgres_port,
feed_ostree_repo_dir, commit_id=None, ignore_errors=False):
feed_ostree_repo_dir, commit_id=None, ignore_errors=False, options=None):
self._from_version = from_version
self._to_version = to_version
self._k8s_version = k8s_version.lstrip("v")
@@ -43,6 +44,7 @@ class DeployStart:
self._feed_ostree_repo_url = f"file://{feed_ostree_repo_dir}"
self._commit_id = commit_id
self._ignore_errors = ignore_errors
self._options = json.loads(options) if options else {}
def _update_deploy_state(self, state):
try:
@@ -167,8 +169,8 @@ class DeployStart:
LOG.info("Cleanup complete")
def _take_lvm_snapshots(self):
deployment = upgrade_utils.get_deployment_data()
if deployment.get("snapshot") is True:
snapshot = upgrade_utils.to_bool(self._options.get("snapshot"))
if snapshot is True:
LOG.info("LVM snapshot option enabled, proceeding to take snapshots...")
script_path = os.path.join(self.SCRIPT_DIR, "manage-lvm-snapshots")
cmd = [script_path, "--create"]
@@ -208,6 +210,7 @@ if __name__ == "__main__":
postgres_port = None
feed_ostree_repo_dir = None
commit_id = None
options = None
for arg in range(1, len(sys.argv)):
if arg == 1:
from_version = sys.argv[arg]
@@ -221,16 +224,18 @@ if __name__ == "__main__":
feed_ostree_repo_dir = sys.argv[arg]
elif arg == 6:
commit_id = sys.argv[arg]
elif arg == 7:
options = sys.argv[arg]
ignore_errors = os.environ.get("IGNORE_ERRORS", False)
if any(x is None for x in [from_version, to_version, k8s_version, postgres_port, feed_ostree_repo_dir]):
usage_msg = (f"usage: {sys.argv[0]} <from_version> <to_version> <k8s_version> "
f"<postgresql_port> <feed_ostree_repo_dir> [commit_id]")
f"<postgresql_port> <feed_ostree_repo_dir> [commit_id] <options>")
print(usage_msg)
LOG.info(usage_msg)
sys.exit(1)
deploy_start = DeployStart(from_version, to_version, k8s_version, postgres_port, feed_ostree_repo_dir,
commit_id=commit_id, ignore_errors=ignore_errors)
commit_id=commit_id, ignore_errors=ignore_errors, options=options)
sys.exit(deploy_start.run())

View File

@@ -280,3 +280,11 @@ def get_deployment_data():
with open("/opt/software/software.json", "r") as fp:
deployment = json.loads(fp.read())
return deployment.get("deploy")[0]
def to_bool(value):
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() == 'true'
return False

View File

@@ -72,26 +72,24 @@ class DeployController(RestController):
return result
@expose(method='POST', template='json')
def precheck(self, release, force=None, region_name=None, snapshot=None):
def precheck(self, release, force=None, region_name=None, **kwargs):
_force = force is not None
_snapshot = snapshot is not None
reload_release_data()
result = sc.software_deploy_precheck_api(release, _force, region_name, _snapshot)
result = sc.software_deploy_precheck_api(release, _force, region_name, **kwargs)
if result and 'error' in result and result["error"] != "":
response.status = 406
return result
@expose(method='POST', template='json')
def start(self, release, force=None, snapshot=None):
def start(self, release, force=None, **kwargs):
reload_release_data()
_force = force is not None
_snapshot = snapshot is not None
if sc.any_patch_host_installing():
raise SoftwareServiceError(error="Rejected: One or more nodes are installing a release.")
result = sc.software_deploy_start_api(release, _force, _snapshot)
result = sc.software_deploy_start_api(release, _force, **kwargs)
if result and 'error' in result and result["error"] != "":
response.status = 406

View File

@@ -238,3 +238,10 @@ LOG_DEFAULT_FORMAT = ('%(asctime)s.%(msecs)03d USM - %(exec)s [%(process)s:%(thr
SOFTWARE_API_SUPPRESS_PATTERNS = [
r"GET /v1/deploy/software_upgrade",
]
# Reserved_words
RESERVED_WORDS_SET = {"OS_AUTH_TOKEN", "OS_AUTH_TYPE", "OS_AUTH_URL", "OS_ENDPOINT_TYPE", "OS_IDENTITY_API_VERSION",
"OS_INTERFACE", "OS_PASSWORD", "OS_PROJECT_DOMAIN_ID", "OS_PROJECT_DOMAIN_NAME", "OS_PROJECT_ID",
"OS_PROJECT_NAME", "OS_REGION_NAME", "OS_SERVICE_TOKEN", "OS_USERNAME", "OS_USER_DOMAIN_NAME",
"OS_SERVICE_TYPE", "OS_TENANT_ID", "OS_TENANT_NAME", "OS_USER_DOMAIN_ID", "SYSTEM_API_VERSION",
"SYSTEM_URL"}

View File

@@ -91,6 +91,7 @@ from software.software_functions import is_deployment_in_progress
from software.software_functions import get_release_from_patch
from software.software_functions import clean_up_deployment_data
from software.software_functions import run_remove_temporary_data_script
from software.software_functions import to_bool
from software.release_state import ReleaseState
from software.utilities.deploy_set_failed import start_set_fail
from software.deploy_host_state import DeployHostState
@@ -2674,6 +2675,39 @@ class PatchController(PatchService):
finally:
self.socket_lock.release()
def _sanitize_extra_options(self, value):
"""
Make sure value have only allowed characters.
"""
# Only letters, numbers, space, -, and _ are allowed.
if not re.match(r'^[\w\s\-]+$', value):
msg_error = f"Invalid value: '{value}'."
raise SoftwareServiceError(msg_error)
return value
def _parse_and_sanitize_extra_options(self, options_list):
"""
Validate, sanitize and convert a 'key=value' to dictionary.
"""
for item in options_list:
if item.count('=') != 1:
msg_error = f"Invalid format: '{item}'. Expected format is key=value"
raise SoftwareServiceError(msg_error)
options = {}
for item in options_list:
key, value = item.split('=', 1)
key = self._sanitize_extra_options(key.strip())
value = self._sanitize_extra_options(value.strip())
if key in constants.RESERVED_WORDS_SET:
msg_error = f"{key} is a reserved word and can't be used."
raise SoftwareServiceError(msg_error)
options[key] = value
return options
def _release_basic_checks(self, deployment):
"""
Does basic sanity checks on the release data
@@ -2696,7 +2730,7 @@ class PatchController(PatchService):
def _deploy_precheck(self, release_version: str, force: bool = False,
region_name: typing.Optional[str] = None, patch: bool = False,
snapshot: bool = False) -> dict:
**kwargs) -> dict:
"""
Verify if system satisfy the requisites to upgrade to a specified deployment.
:param release_version: full release name, e.g. starlingx-MM.mm.pp
@@ -2777,13 +2811,12 @@ class PatchController(PatchService):
"--project_domain_name=%s" % project_domain_name,
"--region_name=%s" % region_name,
"--releases=%s" % json.dumps(releases),
"--options=%s" % json.dumps(kwargs.get("options", {})),
"--deploy_in_progress=%s" % json.dumps(deploy_in_progress)]
if force:
cmd.append("--force")
if patch:
cmd.append("--patch")
if snapshot:
cmd.append("--snapshot")
# Call precheck from the deployment files
precheck_return = subprocess.run(
@@ -2805,7 +2838,7 @@ class PatchController(PatchService):
return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=system_healthy)
def software_deploy_precheck_api(self, deployment: str, force: bool = False, region_name=None,
snapshot: bool = False) -> dict:
**kwargs) -> dict:
"""
Verify if system satisfy the requisites to upgrade to a specified deployment.
:param deployment: full release name, e.g. starlingx-MM.mm.pp
@@ -2821,8 +2854,9 @@ class PatchController(PatchService):
if not is_patch and socket.gethostname() != constants.CONTROLLER_0_HOSTNAME:
raise SoftwareServiceError(f"Deploy precheck for major releases needs to be executed in"
f" {constants.CONTROLLER_0_HOSTNAME} host.")
ret = self._deploy_precheck(release_version, force, region_name, is_patch, snapshot)
if kwargs.get("options"):
kwargs["options"] = self._parse_and_sanitize_extra_options(kwargs.get("options"))
ret = self._deploy_precheck(release_version, force, region_name, is_patch, **kwargs)
if ret:
if ret.get("system_healthy") is None:
ret["error"] = "Fail to perform deploy precheck. Internal error has occurred.\n" + \
@@ -2832,7 +2866,7 @@ class PatchController(PatchService):
"deploying %s\n" % deployment + ret.get("info")
return ret
def _deploy_upgrade_start(self, to_release, commit_id):
def _deploy_upgrade_start(self, to_release, commit_id, **kwargs):
LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION))
deploy_script_name = constants.DEPLOY_START_SCRIPT
cmd_path = utils.get_software_deploy_script(to_release, deploy_script_name)
@@ -2853,6 +2887,7 @@ class PatchController(PatchService):
feed]
upgrade_start_cmd.append(commit_id if commit_id is not None else 0)
upgrade_start_cmd.append(json.dumps(kwargs.get("options")) if kwargs.get("options") is not None else "")
# pass in keystone auth through environment variables
# OS_AUTH_URL, OS_USERNAME, OS_PASSWORD, OS_PROJECT_NAME, OS_USER_DOMAIN_NAME,
# OS_PROJECT_DOMAIN_NAME, OS_REGION_NAME are in env variables.
@@ -2869,7 +2904,7 @@ class PatchController(PatchService):
try:
LOG.info("starting subprocess %s" % ' '.join(upgrade_start_cmd))
subprocess.Popen(' '.join(upgrade_start_cmd), start_new_session=True, shell=True, env=env)
subprocess.Popen(upgrade_start_cmd, start_new_session=True, shell=False, env=env)
LOG.info("subprocess started")
return True
except subprocess.SubprocessError as e:
@@ -2969,7 +3004,7 @@ class PatchController(PatchService):
msg = "Failed to delete patch script %s. Reason: %s" % (script_path, e)
LOG.error(msg)
def install_releases_thread(self, deployment_list, feed_repo, upgrade=False):
def install_releases_thread(self, deployment_list, feed_repo, upgrade=False, **kwargs):
"""
In a separated thread.
Install the debian packages, create the commit and update the metadata.
@@ -3071,7 +3106,7 @@ class PatchController(PatchService):
base_deployment = deployment_list[0]
base_release = self._release_basic_checks(base_deployment)
upgrade_commit_id = base_release.commit_id
if self._deploy_upgrade_start(base_release.sw_release, upgrade_commit_id):
if self._deploy_upgrade_start(base_release.sw_release, upgrade_commit_id, **kwargs):
LOG.info("Finished releases %s deploy start" % deployment_list)
else:
raise ValueError("_deploy_upgrade_start failed")
@@ -3098,9 +3133,9 @@ class PatchController(PatchService):
thread = threading.Thread(target=run)
thread.start()
def _precheck_before_start(self, deployment, release_version, is_patch, force=False, snapshot=False):
def _precheck_before_start(self, deployment, release_version, is_patch, force=False, **kwargs):
LOG.info("Running deploy precheck.")
precheck_result = self._deploy_precheck(release_version, patch=is_patch, force=force, snapshot=snapshot)
precheck_result = self._deploy_precheck(release_version, patch=is_patch, force=force, **kwargs)
if precheck_result.get('system_healthy') is None:
precheck_result["error"] = (
f"Fail to perform deploy precheck. Internal error has occurred.\n"
@@ -3129,7 +3164,7 @@ class PatchController(PatchService):
with open(precheck_result_file, "w") as f:
json.dump({"healthy": healthy, "timestamp": time.time()}, f)
def _should_run_precheck_prior_deploy_start(self, release_version, force, is_patch, snapshot):
def _should_run_precheck_prior_deploy_start(self, release_version, force, is_patch, **kwargs):
# there is not precheck script in this state
if self.pre_bootstrap:
return False
@@ -3143,7 +3178,7 @@ class PatchController(PatchService):
LOG.info("The precheck result file %s does not exist." % file_path)
return True
if snapshot:
if kwargs:
return True
with open(file_path) as f:
@@ -3158,7 +3193,7 @@ class PatchController(PatchService):
@require_deploy_state([None],
"There is already a deployment in progress ({state.value}). "
"Please complete/delete the current deployment.")
def software_deploy_start_api(self, deployment: str, force: bool, snapshot: bool, **kwargs) -> dict:
def software_deploy_start_api(self, deployment: str, force: bool, **kwargs) -> dict:
"""
to start deploy of a specified release.
The operation implies deploying all undeployed dependency releases of
@@ -3200,15 +3235,16 @@ class PatchController(PatchService):
LOG.warning("Using unknown hostname for local install: %s", hostname)
to_release = deploy_release.sw_release
if self._should_run_precheck_prior_deploy_start(to_release, force, is_patch, snapshot):
if kwargs.get("options"):
kwargs["options"] = self._parse_and_sanitize_extra_options(kwargs.get("options"))
if self._should_run_precheck_prior_deploy_start(to_release, force, is_patch, **kwargs):
LOG.info("Executing software deploy precheck prior to software deploy start")
if precheck_result := self._precheck_before_start(
deployment,
to_release,
is_patch=is_patch,
force=force,
snapshot=snapshot,
**kwargs
):
return precheck_result
self._safe_remove_precheck_result_file(to_release)
@@ -3281,11 +3317,11 @@ class PatchController(PatchService):
deploy_state.start(running_release, to_release, feed_repo, None, reboot_required)
else:
deploy_state.start(running_release, to_release, feed_repo, commit_id,
reboot_required, snapshot=snapshot)
reboot_required, **kwargs)
# Start applying the releases
upgrade = not is_patch
self.install_releases_thread(deployment_list, feed_repo, upgrade)
self.install_releases_thread(deployment_list, feed_repo, upgrade, **kwargs)
msg_info += "%s is now starting, await for the states: " \
"[deploy-start-done | deploy-start-failed] in " \
@@ -3810,7 +3846,8 @@ class PatchController(PatchService):
system_mode = utils.get_platform_conf("system_mode")
if system_mode == constants.SYSTEM_MODE_SIMPLEX:
deploy = self.db_api_instance.get_deploy_all()[0]
enabled_lvm_snapshots = deploy.get("snapshot")
options = deploy.get("options", {})
enabled_lvm_snapshots = to_bool(options.get("snapshot"))
if enabled_lvm_snapshots:
LOG.info("LVM snapshots are enabled")
manager = lvm_snapshot.LVMSnapshotManager()

View File

@@ -137,7 +137,7 @@ class Deploy(ABC):
@abstractmethod
def create(self, from_release: str, to_release: str, feed_repo: str,
commit_id: str, reboot_required: bool, state: DEPLOY_STATES):
commit_id: str, reboot_required: bool, state: DEPLOY_STATES, **kwargs):
"""
Create a new deployment entry.
@@ -267,8 +267,8 @@ class DeployHandler(Deploy):
"commit_id": commit_id,
"reboot_required": reboot_required,
"state": state.value,
"options": kwargs.get("options")
}
new_deploy.update(kwargs)
try:
data = get_software_filesystem_data()

View File

@@ -1704,3 +1704,11 @@ def execute_agent_hooks(software_version, additional_data=None):
except Exception as e:
LOG.exception("Error running agent hooks: %s" % str(e))
raise
def to_bool(value):
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() == 'true'
return False