From fdbb073719e0dd592c39cb092fd855b28fd34fc7 Mon Sep 17 00:00:00 2001 From: Luis Eduardo Bonatti Date: Thu, 22 May 2025 17:02:32 -0300 Subject: [PATCH] USM: Change deploy precheck and start to accept arbitrary arguments. This change adds arbitrary parameters, to reduce or avoid the need to patchback when new features are added. The snapshot parameter was removed to use this new change with -o snapshot=true. This commit also add a sanitize function to allow only letters, numbers, space, - and _. Usage: software deploy start/precheck -o snapshot=true --options test=test Test Plan: PASS: Build-pkgs, Build-image, Install/Bootstrap/Unlock PASS: Check precheck script receives the parameters (by injecting logs manually) PASS: Check deploy start receives the parameters by injecting manual logs. PASS: Check software.json stores the parameters under options field. PASS: Deploy start blocked with reserved key in options. PASS: Check activate-rollback is using deploy options for snapshot. PASS: Major release deployment stx10 -> stx11 in sx. Story: 2011357 Task: 52264 Change-Id: I53b8863ceeec74c45831d959fe6cea00990cc156 Signed-off-by: Luis Eduardo Bonatti --- software-client/pylint.rc | 3 +- software-client/software_client/v1/deploy.py | 9 +-- .../software_client/v1/deploy_shell.py | 16 ++-- software/scripts/deploy-precheck | 12 ++- software/scripts/deploy-start | 15 ++-- software/scripts/upgrade_utils.py | 8 ++ .../software/api/controllers/v1/deploy.py | 10 +-- software/software/constants.py | 7 ++ software/software/software_controller.py | 79 ++++++++++++++----- software/software/software_entities.py | 4 +- software/software/software_functions.py | 8 ++ 11 files changed, 119 insertions(+), 52 deletions(-) diff --git a/software-client/pylint.rc b/software-client/pylint.rc index 089f84b2..72d7540d 100644 --- a/software-client/pylint.rc +++ b/software-client/pylint.rc @@ -433,6 +433,7 @@ confidence=HIGH, # -Refactoring- # R0205 useless-object-inheritance # R0402 consider-using-from-import +# R0801 Similar lines in x files # R0903 too-few-public-methods # R0911 too-many-return-statements # R0912 too-many-branches @@ -453,7 +454,7 @@ confidence=HIGH, # W1514 unspecified-encoding # W3101 missing-timeout disable= C0103,C0114,C0115,C0116,C0209,C0301,C0302,C0411, - C0415,R0205,R0402,R0903,R0911,R0912,R0914,R0915, + C0415,R0205,R0402,R0801,R0903,R0911,R0912,R0914,R0915, R1702,R1705,R1714,R1722,R1724,R1725,R1732,R1735, W0602,W0603,W0718,W1514,W3101, diff --git a/software-client/software_client/v1/deploy.py b/software-client/software_client/v1/deploy.py index 6b17386f..fdc9367f 100644 --- a/software-client/software_client/v1/deploy.py +++ b/software-client/software_client/v1/deploy.py @@ -30,9 +30,8 @@ class DeployManager(base.Manager): body = {} if args.force: body["force"] = "true" - if args.snapshot: - body["snapshot"] = "true" - + if args.options: + body["options"] = args.options if args.region_name: body["region_name"] = args.region_name @@ -51,8 +50,8 @@ class DeployManager(base.Manager): body = {} if args.force: body["force"] = "true" - if args.snapshot: - body["snapshot"] = "true" + if args.options: + body["options"] = args.options return self._post(path, body=body) diff --git a/software-client/software_client/v1/deploy_shell.py b/software-client/software_client/v1/deploy_shell.py index 9baa9624..72a6b62b 100644 --- a/software-client/software_client/v1/deploy_shell.py +++ b/software-client/software_client/v1/deploy_shell.py @@ -63,11 +63,11 @@ def do_host_list(cc, args): default=None, required=False, help='Run precheck against a subcloud') -@utils.arg('-s', - '--snapshot', - action='store_true', +@utils.arg('-o', + '--options', + action='append', required=False, - help='Execute LVM snapshot feature specific health checks (AIO-SX only)') + help='Additional parameters in key=value format.') def do_precheck(cc, args): """Verify whether prerequisites for installing the software deployment are satisfied""" resp, data = cc.deploy.precheck(args) @@ -91,11 +91,11 @@ def do_precheck(cc, args): action='store_true', required=False, help='Allow bypassing non-critical checks') -@utils.arg('-s', - '--snapshot', - action='store_true', +@utils.arg('-o', + '--options', + action='append', required=False, - help='Use LVM snapshots for faster rollback/recovery (AIO-SX only)') + help='Additional parameters in key=value format.') def do_start(cc, args): """Start the software deployment""" resp, data = cc.deploy.start(args) diff --git a/software/scripts/deploy-precheck b/software/scripts/deploy-precheck index 3ad7f1ce..5c5194dd 100644 --- a/software/scripts/deploy-precheck +++ b/software/scripts/deploy-precheck @@ -257,6 +257,11 @@ class UpgradeHealthCheck(HealthCheck): return success, msg return success, None + def _check_snapshot_option(self): + options = json.loads(self._config.get("options", {})) + snapshot = options.get("snapshot", False) + return upgrade_utils.to_bool(snapshot) + def run_health_check(self): """Run specific upgrade health checks""" @@ -337,7 +342,7 @@ class UpgradeHealthCheck(HealthCheck): health_ok = health_ok and success # check for LVM snapshot health checks - use_lvm_snapshot = self._config.get("snapshot", False) + use_lvm_snapshot = self._check_snapshot_option() if use_lvm_snapshot: success = self._check_system_simplex() output += '(LVM snapshots) System is AIO-SX: [%s]\n' \ @@ -456,9 +461,8 @@ def parse_config(args=None): parser.add_argument("--patch", help="Set precheck to run against a patch release", action="store_true") - parser.add_argument("--snapshot", - help="Execute LVM snapshot feature specific health checks (AIO-SX only)", - action="store_true") + parser.add_argument("--options", + help="Additional parameters in key=value format.") parser.add_argument("--releases", help="Releases", default="[]") diff --git a/software/scripts/deploy-start b/software/scripts/deploy-start index 88114bb4..4986c489 100755 --- a/software/scripts/deploy-start +++ b/software/scripts/deploy-start @@ -13,6 +13,7 @@ # 5. perform data migration # +import json import logging import os import shutil @@ -34,7 +35,7 @@ class DeployStart: DEPLOY_STATE_START_FAILED = "start-failed" def __init__(self, from_version, to_version, k8s_version, postgres_port, - feed_ostree_repo_dir, commit_id=None, ignore_errors=False): + feed_ostree_repo_dir, commit_id=None, ignore_errors=False, options=None): self._from_version = from_version self._to_version = to_version self._k8s_version = k8s_version.lstrip("v") @@ -43,6 +44,7 @@ class DeployStart: self._feed_ostree_repo_url = f"file://{feed_ostree_repo_dir}" self._commit_id = commit_id self._ignore_errors = ignore_errors + self._options = json.loads(options) if options else {} def _update_deploy_state(self, state): try: @@ -167,8 +169,8 @@ class DeployStart: LOG.info("Cleanup complete") def _take_lvm_snapshots(self): - deployment = upgrade_utils.get_deployment_data() - if deployment.get("snapshot") is True: + snapshot = upgrade_utils.to_bool(self._options.get("snapshot")) + if snapshot is True: LOG.info("LVM snapshot option enabled, proceeding to take snapshots...") script_path = os.path.join(self.SCRIPT_DIR, "manage-lvm-snapshots") cmd = [script_path, "--create"] @@ -208,6 +210,7 @@ if __name__ == "__main__": postgres_port = None feed_ostree_repo_dir = None commit_id = None + options = None for arg in range(1, len(sys.argv)): if arg == 1: from_version = sys.argv[arg] @@ -221,16 +224,18 @@ if __name__ == "__main__": feed_ostree_repo_dir = sys.argv[arg] elif arg == 6: commit_id = sys.argv[arg] + elif arg == 7: + options = sys.argv[arg] ignore_errors = os.environ.get("IGNORE_ERRORS", False) if any(x is None for x in [from_version, to_version, k8s_version, postgres_port, feed_ostree_repo_dir]): usage_msg = (f"usage: {sys.argv[0]} " - f" [commit_id]") + f" [commit_id] ") print(usage_msg) LOG.info(usage_msg) sys.exit(1) deploy_start = DeployStart(from_version, to_version, k8s_version, postgres_port, feed_ostree_repo_dir, - commit_id=commit_id, ignore_errors=ignore_errors) + commit_id=commit_id, ignore_errors=ignore_errors, options=options) sys.exit(deploy_start.run()) diff --git a/software/scripts/upgrade_utils.py b/software/scripts/upgrade_utils.py index 06685d7c..bf2a284a 100644 --- a/software/scripts/upgrade_utils.py +++ b/software/scripts/upgrade_utils.py @@ -280,3 +280,11 @@ def get_deployment_data(): with open("/opt/software/software.json", "r") as fp: deployment = json.loads(fp.read()) return deployment.get("deploy")[0] + + +def to_bool(value): + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.lower() == 'true' + return False diff --git a/software/software/api/controllers/v1/deploy.py b/software/software/api/controllers/v1/deploy.py index 79841888..cd75984c 100644 --- a/software/software/api/controllers/v1/deploy.py +++ b/software/software/api/controllers/v1/deploy.py @@ -72,26 +72,24 @@ class DeployController(RestController): return result @expose(method='POST', template='json') - def precheck(self, release, force=None, region_name=None, snapshot=None): + def precheck(self, release, force=None, region_name=None, **kwargs): _force = force is not None - _snapshot = snapshot is not None reload_release_data() - result = sc.software_deploy_precheck_api(release, _force, region_name, _snapshot) + result = sc.software_deploy_precheck_api(release, _force, region_name, **kwargs) if result and 'error' in result and result["error"] != "": response.status = 406 return result @expose(method='POST', template='json') - def start(self, release, force=None, snapshot=None): + def start(self, release, force=None, **kwargs): reload_release_data() _force = force is not None - _snapshot = snapshot is not None if sc.any_patch_host_installing(): raise SoftwareServiceError(error="Rejected: One or more nodes are installing a release.") - result = sc.software_deploy_start_api(release, _force, _snapshot) + result = sc.software_deploy_start_api(release, _force, **kwargs) if result and 'error' in result and result["error"] != "": response.status = 406 diff --git a/software/software/constants.py b/software/software/constants.py index 1245f4f5..6c81c0af 100644 --- a/software/software/constants.py +++ b/software/software/constants.py @@ -238,3 +238,10 @@ LOG_DEFAULT_FORMAT = ('%(asctime)s.%(msecs)03d USM - %(exec)s [%(process)s:%(thr SOFTWARE_API_SUPPRESS_PATTERNS = [ r"GET /v1/deploy/software_upgrade", ] + +# Reserved_words +RESERVED_WORDS_SET = {"OS_AUTH_TOKEN", "OS_AUTH_TYPE", "OS_AUTH_URL", "OS_ENDPOINT_TYPE", "OS_IDENTITY_API_VERSION", + "OS_INTERFACE", "OS_PASSWORD", "OS_PROJECT_DOMAIN_ID", "OS_PROJECT_DOMAIN_NAME", "OS_PROJECT_ID", + "OS_PROJECT_NAME", "OS_REGION_NAME", "OS_SERVICE_TOKEN", "OS_USERNAME", "OS_USER_DOMAIN_NAME", + "OS_SERVICE_TYPE", "OS_TENANT_ID", "OS_TENANT_NAME", "OS_USER_DOMAIN_ID", "SYSTEM_API_VERSION", + "SYSTEM_URL"} diff --git a/software/software/software_controller.py b/software/software/software_controller.py index f8e7c2ae..1801c817 100644 --- a/software/software/software_controller.py +++ b/software/software/software_controller.py @@ -91,6 +91,7 @@ from software.software_functions import is_deployment_in_progress from software.software_functions import get_release_from_patch from software.software_functions import clean_up_deployment_data from software.software_functions import run_remove_temporary_data_script +from software.software_functions import to_bool from software.release_state import ReleaseState from software.utilities.deploy_set_failed import start_set_fail from software.deploy_host_state import DeployHostState @@ -2674,6 +2675,39 @@ class PatchController(PatchService): finally: self.socket_lock.release() + def _sanitize_extra_options(self, value): + """ + Make sure value have only allowed characters. + """ + # Only letters, numbers, space, -, and _ are allowed. + if not re.match(r'^[\w\s\-]+$', value): + msg_error = f"Invalid value: '{value}'." + raise SoftwareServiceError(msg_error) + return value + + def _parse_and_sanitize_extra_options(self, options_list): + """ + Validate, sanitize and convert a 'key=value' to dictionary. + """ + + for item in options_list: + if item.count('=') != 1: + msg_error = f"Invalid format: '{item}'. Expected format is key=value" + raise SoftwareServiceError(msg_error) + + options = {} + for item in options_list: + key, value = item.split('=', 1) + key = self._sanitize_extra_options(key.strip()) + value = self._sanitize_extra_options(value.strip()) + + if key in constants.RESERVED_WORDS_SET: + msg_error = f"{key} is a reserved word and can't be used." + raise SoftwareServiceError(msg_error) + + options[key] = value + return options + def _release_basic_checks(self, deployment): """ Does basic sanity checks on the release data @@ -2696,7 +2730,7 @@ class PatchController(PatchService): def _deploy_precheck(self, release_version: str, force: bool = False, region_name: typing.Optional[str] = None, patch: bool = False, - snapshot: bool = False) -> dict: + **kwargs) -> dict: """ Verify if system satisfy the requisites to upgrade to a specified deployment. :param release_version: full release name, e.g. starlingx-MM.mm.pp @@ -2777,13 +2811,12 @@ class PatchController(PatchService): "--project_domain_name=%s" % project_domain_name, "--region_name=%s" % region_name, "--releases=%s" % json.dumps(releases), + "--options=%s" % json.dumps(kwargs.get("options", {})), "--deploy_in_progress=%s" % json.dumps(deploy_in_progress)] if force: cmd.append("--force") if patch: cmd.append("--patch") - if snapshot: - cmd.append("--snapshot") # Call precheck from the deployment files precheck_return = subprocess.run( @@ -2805,7 +2838,7 @@ class PatchController(PatchService): return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=system_healthy) def software_deploy_precheck_api(self, deployment: str, force: bool = False, region_name=None, - snapshot: bool = False) -> dict: + **kwargs) -> dict: """ Verify if system satisfy the requisites to upgrade to a specified deployment. :param deployment: full release name, e.g. starlingx-MM.mm.pp @@ -2821,8 +2854,9 @@ class PatchController(PatchService): if not is_patch and socket.gethostname() != constants.CONTROLLER_0_HOSTNAME: raise SoftwareServiceError(f"Deploy precheck for major releases needs to be executed in" f" {constants.CONTROLLER_0_HOSTNAME} host.") - - ret = self._deploy_precheck(release_version, force, region_name, is_patch, snapshot) + if kwargs.get("options"): + kwargs["options"] = self._parse_and_sanitize_extra_options(kwargs.get("options")) + ret = self._deploy_precheck(release_version, force, region_name, is_patch, **kwargs) if ret: if ret.get("system_healthy") is None: ret["error"] = "Fail to perform deploy precheck. Internal error has occurred.\n" + \ @@ -2832,7 +2866,7 @@ class PatchController(PatchService): "deploying %s\n" % deployment + ret.get("info") return ret - def _deploy_upgrade_start(self, to_release, commit_id): + def _deploy_upgrade_start(self, to_release, commit_id, **kwargs): LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION)) deploy_script_name = constants.DEPLOY_START_SCRIPT cmd_path = utils.get_software_deploy_script(to_release, deploy_script_name) @@ -2853,6 +2887,7 @@ class PatchController(PatchService): feed] upgrade_start_cmd.append(commit_id if commit_id is not None else 0) + upgrade_start_cmd.append(json.dumps(kwargs.get("options")) if kwargs.get("options") is not None else "") # pass in keystone auth through environment variables # OS_AUTH_URL, OS_USERNAME, OS_PASSWORD, OS_PROJECT_NAME, OS_USER_DOMAIN_NAME, # OS_PROJECT_DOMAIN_NAME, OS_REGION_NAME are in env variables. @@ -2869,7 +2904,7 @@ class PatchController(PatchService): try: LOG.info("starting subprocess %s" % ' '.join(upgrade_start_cmd)) - subprocess.Popen(' '.join(upgrade_start_cmd), start_new_session=True, shell=True, env=env) + subprocess.Popen(upgrade_start_cmd, start_new_session=True, shell=False, env=env) LOG.info("subprocess started") return True except subprocess.SubprocessError as e: @@ -2969,7 +3004,7 @@ class PatchController(PatchService): msg = "Failed to delete patch script %s. Reason: %s" % (script_path, e) LOG.error(msg) - def install_releases_thread(self, deployment_list, feed_repo, upgrade=False): + def install_releases_thread(self, deployment_list, feed_repo, upgrade=False, **kwargs): """ In a separated thread. Install the debian packages, create the commit and update the metadata. @@ -3071,7 +3106,7 @@ class PatchController(PatchService): base_deployment = deployment_list[0] base_release = self._release_basic_checks(base_deployment) upgrade_commit_id = base_release.commit_id - if self._deploy_upgrade_start(base_release.sw_release, upgrade_commit_id): + if self._deploy_upgrade_start(base_release.sw_release, upgrade_commit_id, **kwargs): LOG.info("Finished releases %s deploy start" % deployment_list) else: raise ValueError("_deploy_upgrade_start failed") @@ -3098,9 +3133,9 @@ class PatchController(PatchService): thread = threading.Thread(target=run) thread.start() - def _precheck_before_start(self, deployment, release_version, is_patch, force=False, snapshot=False): + def _precheck_before_start(self, deployment, release_version, is_patch, force=False, **kwargs): LOG.info("Running deploy precheck.") - precheck_result = self._deploy_precheck(release_version, patch=is_patch, force=force, snapshot=snapshot) + precheck_result = self._deploy_precheck(release_version, patch=is_patch, force=force, **kwargs) if precheck_result.get('system_healthy') is None: precheck_result["error"] = ( f"Fail to perform deploy precheck. Internal error has occurred.\n" @@ -3129,7 +3164,7 @@ class PatchController(PatchService): with open(precheck_result_file, "w") as f: json.dump({"healthy": healthy, "timestamp": time.time()}, f) - def _should_run_precheck_prior_deploy_start(self, release_version, force, is_patch, snapshot): + def _should_run_precheck_prior_deploy_start(self, release_version, force, is_patch, **kwargs): # there is not precheck script in this state if self.pre_bootstrap: return False @@ -3143,7 +3178,7 @@ class PatchController(PatchService): LOG.info("The precheck result file %s does not exist." % file_path) return True - if snapshot: + if kwargs: return True with open(file_path) as f: @@ -3158,7 +3193,7 @@ class PatchController(PatchService): @require_deploy_state([None], "There is already a deployment in progress ({state.value}). " "Please complete/delete the current deployment.") - def software_deploy_start_api(self, deployment: str, force: bool, snapshot: bool, **kwargs) -> dict: + def software_deploy_start_api(self, deployment: str, force: bool, **kwargs) -> dict: """ to start deploy of a specified release. The operation implies deploying all undeployed dependency releases of @@ -3200,15 +3235,16 @@ class PatchController(PatchService): LOG.warning("Using unknown hostname for local install: %s", hostname) to_release = deploy_release.sw_release - - if self._should_run_precheck_prior_deploy_start(to_release, force, is_patch, snapshot): + if kwargs.get("options"): + kwargs["options"] = self._parse_and_sanitize_extra_options(kwargs.get("options")) + if self._should_run_precheck_prior_deploy_start(to_release, force, is_patch, **kwargs): LOG.info("Executing software deploy precheck prior to software deploy start") if precheck_result := self._precheck_before_start( deployment, to_release, is_patch=is_patch, force=force, - snapshot=snapshot, + **kwargs ): return precheck_result self._safe_remove_precheck_result_file(to_release) @@ -3281,11 +3317,11 @@ class PatchController(PatchService): deploy_state.start(running_release, to_release, feed_repo, None, reboot_required) else: deploy_state.start(running_release, to_release, feed_repo, commit_id, - reboot_required, snapshot=snapshot) + reboot_required, **kwargs) # Start applying the releases upgrade = not is_patch - self.install_releases_thread(deployment_list, feed_repo, upgrade) + self.install_releases_thread(deployment_list, feed_repo, upgrade, **kwargs) msg_info += "%s is now starting, await for the states: " \ "[deploy-start-done | deploy-start-failed] in " \ @@ -3810,7 +3846,8 @@ class PatchController(PatchService): system_mode = utils.get_platform_conf("system_mode") if system_mode == constants.SYSTEM_MODE_SIMPLEX: deploy = self.db_api_instance.get_deploy_all()[0] - enabled_lvm_snapshots = deploy.get("snapshot") + options = deploy.get("options", {}) + enabled_lvm_snapshots = to_bool(options.get("snapshot")) if enabled_lvm_snapshots: LOG.info("LVM snapshots are enabled") manager = lvm_snapshot.LVMSnapshotManager() diff --git a/software/software/software_entities.py b/software/software/software_entities.py index 7603a3e8..b0b6b2ed 100644 --- a/software/software/software_entities.py +++ b/software/software/software_entities.py @@ -137,7 +137,7 @@ class Deploy(ABC): @abstractmethod def create(self, from_release: str, to_release: str, feed_repo: str, - commit_id: str, reboot_required: bool, state: DEPLOY_STATES): + commit_id: str, reboot_required: bool, state: DEPLOY_STATES, **kwargs): """ Create a new deployment entry. @@ -267,8 +267,8 @@ class DeployHandler(Deploy): "commit_id": commit_id, "reboot_required": reboot_required, "state": state.value, + "options": kwargs.get("options") } - new_deploy.update(kwargs) try: data = get_software_filesystem_data() diff --git a/software/software/software_functions.py b/software/software/software_functions.py index b0e8eec5..8bb6605c 100644 --- a/software/software/software_functions.py +++ b/software/software/software_functions.py @@ -1704,3 +1704,11 @@ def execute_agent_hooks(software_version, additional_data=None): except Exception as e: LOG.exception("Error running agent hooks: %s" % str(e)) raise + + +def to_bool(value): + if isinstance(value, bool): + return value + if isinstance(value, str): + return value.lower() == 'true' + return False