From 75c8b8d13a6eb597c14a6c94408aae5fc3980c6b Mon Sep 17 00:00:00 2001 From: Gustavo Herzmann Date: Wed, 10 Sep 2025 09:28:26 -0300 Subject: [PATCH] Add support for auto-restore without --with-install This commit allows running auto-restore operations without automatically reinstalling the subcloud. Users can now manually reinstall the subcloud and run only the auto-restore phase. The pre-installed subcloud must contain the required prestaged registry images. The restore overrides file and central backup file are bundled in a seed ISO, which is used to trigger the auto-restore operation. Test Plan: 01. PASS - Install a subcloud with a prestaged ISO and then create a central subcloud backup. Manually reinstall the prestaged ISO and then run the auto-restore without both --with-install and --local-only. Verify that the subcloud is restored using the bundled central backup file without automatic reinstallation. Depends-On: https://review.opendev.org/c/starlingx/ansible-playbooks/+/960801 Story: 2011454 Task: 52797 Change-Id: I284392e24df0a9cb5f95654a47e0381c79795fe8 Signed-off-by: Gustavo Herzmann --- api-ref/source/parameters.yaml | 8 +- distributedcloud/dccommon/consts.py | 1 + .../api/controllers/v1/subcloud_backup.py | 11 +- distributedcloud/dcmanager/common/utils.py | 2 +- .../dcmanager/manager/subcloud_manager.py | 281 +++++++++++++++++- .../controllers/v1/test_subcloud_backup.py | 4 +- 6 files changed, 280 insertions(+), 27 deletions(-) diff --git a/api-ref/source/parameters.yaml b/api-ref/source/parameters.yaml index 9f8b2c6c9..ea4537bc4 100644 --- a/api-ref/source/parameters.yaml +++ b/api-ref/source/parameters.yaml @@ -116,10 +116,10 @@ association_type: type: string auto_restore: description: | - Performs a subcloud restore with installation, automatically triggering the - restore process locally in the subcloud after installation completes. Also - restores container images. - Can be used with or without the local-only option. + Performs a subcloud restore by automatically triggering the restore + process locally in the subcloud using only BMC connectivity. + Also restores container images. + Can be used with or without the local-only and with-install options. Defaults to False. in: body required: false diff --git a/distributedcloud/dccommon/consts.py b/distributedcloud/dccommon/consts.py index 9eedcc732..3dee6c4df 100644 --- a/distributedcloud/dccommon/consts.py +++ b/distributedcloud/dccommon/consts.py @@ -324,6 +324,7 @@ ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK = ( ) ENROLL_INIT_SEED_ISO_NAME = "seed.iso" +AUTO_RESTORE_SEED_ISO_NAME = "auto-restore-seed.iso" ANSIBLE_SUBCLOUD_ENROLL_INIT_PLAYBOOK = ( "/usr/share/ansible/stx-ansible/playbooks/enroll_init.yml" diff --git a/distributedcloud/dcmanager/api/controllers/v1/subcloud_backup.py b/distributedcloud/dcmanager/api/controllers/v1/subcloud_backup.py index 48146ec18..b4cce8f64 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/subcloud_backup.py +++ b/distributedcloud/dcmanager/api/controllers/v1/subcloud_backup.py @@ -398,13 +398,13 @@ class SubcloudBackupController(object): ) if payload.get("release") and not ( - payload["with_install"] or payload["auto"] or payload["factory"] + payload["with_install"] or payload["factory"] ): pecan.abort( 400, _( - "Option release cannot be used without one of the " - "following options: with_install, auto or factory." + "Option release cannot be used without 'with_install' " + "or 'factory' options." ), ) @@ -477,10 +477,11 @@ class SubcloudBackupController(object): % matching_iso ) - # An auto or factory restore implies with-install and registry-images + # An auto or factory restore implies registry-images if payload.get("auto") or payload.get("factory"): - payload["with_install"] = True payload["registry_images"] = True + if payload.get("factory"): + payload["with_install"] = True try: # local update to deploy_status - this is just for CLI response diff --git a/distributedcloud/dcmanager/common/utils.py b/distributedcloud/dcmanager/common/utils.py index 6520892e2..2206c8ad9 100644 --- a/distributedcloud/dcmanager/common/utils.py +++ b/distributedcloud/dcmanager/common/utils.py @@ -1169,7 +1169,7 @@ def find_central_subcloud_backup(subcloud_name: str, software_version: str) -> P if not search_dir.exists(): raise FileNotFoundError(f"Backup directory does not exist: {search_dir}") - pattern = f"{re.escape(subcloud_name)}_platform_backup_*.tgz" + pattern = f"{subcloud_name}_platform_backup_*.tgz" backup_files = list(search_dir.glob(pattern)) if not backup_files: diff --git a/distributedcloud/dcmanager/manager/subcloud_manager.py b/distributedcloud/dcmanager/manager/subcloud_manager.py index 57f9f30ba..1696ccbe2 100644 --- a/distributedcloud/dcmanager/manager/subcloud_manager.py +++ b/distributedcloud/dcmanager/manager/subcloud_manager.py @@ -34,6 +34,7 @@ import threading import time from typing import Optional +from eventlet.green import subprocess from eventlet import greenpool from fm_api import constants as fm_const from fm_api import fm_api @@ -2470,12 +2471,243 @@ class SubcloudManager(manager.Manager): ) raise + def _create_auto_restore_user_data(self, temp_dir: str, subcloud_name: str) -> None: + """Create cloud-init user-data file for auto-restore + + The seed iso will be mounted into the subcloud and the backup archive and + restore override values will be copied into the SUBCLOUD_AUTO_RESTORE_DIR. + Then the dc-auto-restore service is started, triggering the auto-restore + operation inside the subcloud. + """ + runcmd = [ + [ + "/bin/bash", + "-c", + "echo $(date): Starting auto-restore from seed ISO", + ], + ["mkdir", "-p", "/mnt/seed-iso"], + ["mount", "LABEL=CIDATA", "/mnt/seed-iso"], + [ + "cp", + "-r", + "/mnt/seed-iso/auto-restore", + f"{consts.SUBCLOUD_AUTO_RESTORE_DIR}", + ], + [ + "/bin/bash", + "-c", + f"if [ ! -f {consts.SUBCLOUD_AUTO_RESTORE_DIR}/" + "backup_restore_values.yml ]; then " + "echo 'ERROR: backup_restore_values.yml not found'; " + "exit 1; fi", + ], + [ + "/bin/bash", + "-c", + "echo 'Auto-restore files copied:'; " + f"ls -la {consts.SUBCLOUD_AUTO_RESTORE_DIR}", + ], + ["umount", "/mnt/seed-iso"], + ["rmdir", "/mnt/seed-iso"], + [ + "/bin/bash", + "-c", + "echo 'Starting auto-restore service'; " + "systemctl start dc-auto-restore.service", + ], + [ + "/bin/bash", + "-c", + "echo $(date): Auto-restore seed processing completed successfully", + ], + ] + user_data_content = { + "network": {"config": "disabled"}, + "runcmd": runcmd, + "cloud_config_modules": [["runcmd", "always"]], + "cloud_final_modules": [["scripts-user", "always"]], + } + + user_data_file = os.path.join(temp_dir, "user-data") + with open(user_data_file, "w", encoding="utf-8") as f: + f.write("#cloud-config\n") + yaml.dump(user_data_content, f, default_flow_style=False, sort_keys=False) + + LOG.info(f"Created user-data for auto-restore seed ISO for {subcloud_name}") + + def _create_auto_restore_meta_data(self, temp_dir: str, subcloud_name: str) -> None: + meta_data_content = {"instance-id": f"{subcloud_name}"} + + meta_data_file = os.path.join(temp_dir, "meta-data") + with open(meta_data_file, "w", encoding="utf-8") as f: + yaml.dump(meta_data_content, f, default_flow_style=False) + + LOG.info(f"Created meta-data for auto-restore seed ISO for {subcloud_name}") + + def _generate_auto_restore_seed_iso( + self, subcloud: Subcloud, overrides_file: str, payload: dict + ) -> str: + try: + software_version = str(payload.get("software_version")) + www_root = os.path.join("/opt/platform/iso", software_version) + iso_dir_path = os.path.join(www_root, "nodes", subcloud.name) + iso_output_path = os.path.join( + iso_dir_path, dccommon_consts.AUTO_RESTORE_SEED_ISO_NAME + ) + + if not os.path.isdir(www_root): + os.mkdir(www_root, 0o755) + if not os.path.isdir(iso_dir_path): + os.makedirs(iso_dir_path, 0o755, exist_ok=True) + elif os.path.exists(iso_output_path): + # Clean up iso file if it already exists. + LOG.info( + f"Found preexisting seed iso for subcloud {subcloud.name}, " + "cleaning up" + ) + os.remove(iso_output_path) + + LOG.info( + f"Generating auto-restore seed ISO for {subcloud.name}: " + f"{iso_output_path}" + ) + + # Create the cloud-init ISO structure in a single temp directory + with tempfile.TemporaryDirectory( + prefix=f".{subcloud.name}", + dir=self._get_auto_restore_temp_dir_location(subcloud, payload), + ) as temp_iso_dir: + self._create_auto_restore_user_data(temp_iso_dir, subcloud.name) + self._create_auto_restore_meta_data(temp_iso_dir, subcloud.name) + + self._stage_auto_restore_files( + Path(temp_iso_dir), Path(overrides_file), payload, subcloud + ) + + gen_seed_iso_command = [ + "genisoimage", + "-o", + iso_output_path, + "-volid", + "CIDATA", + "-untranslated-filenames", + "-joliet", + "-rock", + "-iso-level", + "2", + temp_iso_dir, + ] + + LOG.info(f"Running auto-restore ISO generation: {gen_seed_iso_command}") + result = subprocess.run( + gen_seed_iso_command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + + output = result.stdout.decode("utf-8").replace("\n", ", ") + + if result.returncode == 0: + LOG.info( + "Successfully generated auto-restore seed ISO for %s: " + "returncode: %s, output: %s", + subcloud.name, + result.returncode, + output, + ) + return iso_output_path + + LOG.error( + "Failed to generate auto-restore seed ISO for %s: " + "returncode: %s, output: %s", + subcloud.name, + result.returncode, + output, + ) + return None + + except Exception as e: + LOG.exception( + f"Exception generating auto-restore seed ISO for {subcloud.name}: {e}" + ) + return None + + def _cleanup_auto_restore_seed_iso(self, iso_path: str) -> None: + try: + if iso_path and os.path.exists(iso_path): + os.remove(iso_path) + LOG.info(f"Cleaned up auto-restore seed ISO: {iso_path}") + except Exception as e: + LOG.warning(f"Failed to cleanup auto-restore seed ISO {iso_path}: {e}") + + def _create_rvmc_config_for_seed_iso( + self, subcloud: Subcloud, payload: dict + ) -> str: + override_path = os.path.join( + dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud.name + ) + + if not os.path.exists(override_path): + os.makedirs(override_path, 0o755) + + sysinv_client = SysinvClient( + dccommon_utils.get_region_one_name(), + endpoint_cache.EndpointCache.get_admin_session(), + ) + + https_enabled = sysinv_client.get_system().capabilities.get( + "https_enabled", False + ) + subcloud_primary_oam_ip_family = utils.get_primary_oam_address_ip_family( + subcloud + ) + image_base_url = SubcloudInstall.get_image_base_url( + https_enabled, sysinv_client, subcloud_primary_oam_ip_family + ) + + install_values = payload.get("install_values", {}) + bmc_values = { + "bmc_username": install_values.get("bmc_username"), + "bmc_password": install_values.get("bmc_password"), + "bmc_address": install_values.get("bmc_address"), + "image": os.path.join( + image_base_url, + "iso", + payload["software_version"], + "nodes", + subcloud.name, + dccommon_consts.AUTO_RESTORE_SEED_ISO_NAME, + ), + } + + SubcloudInstall.create_rvmc_config_file(override_path, bmc_values) + + rvmc_config_path = os.path.join( + override_path, dccommon_consts.RVMC_CONFIG_FILE_NAME + ) + LOG.info( + "Created RVMC config for auto-restore seed ISO for " + f"subcloud {subcloud.name}: {rvmc_config_path}" + ) + return rvmc_config_path + def _restore_subcloud_backup(self, context, payload, subcloud): log_file = ( os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + "_playbook_output.log" ) + bmc_access_only = True + seed_iso_path = None + + if payload.get("factory"): + auto_restore_mode = "factory" + elif payload.get("auto"): + auto_restore_mode = "auto" + else: + auto_restore_mode = None + bmc_access_only = False + # To get the bootstrap_address for the subcloud, we considered # the following order: # 1) Use the value from restore_values if present @@ -2516,15 +2748,6 @@ class SubcloudManager(manager.Manager): subcloud, bootstrap_address=bootstrap_address ) - bmc_access_only = True - if payload.get("factory"): - auto_restore_mode = "factory" - elif payload.get("auto"): - auto_restore_mode = "auto" - else: - auto_restore_mode = None - bmc_access_only = False - # Install wipe_osds parameter is required to determine if # the OSDs should be wiped during restore when --with-install # subcommand is provided. @@ -2542,6 +2765,29 @@ class SubcloudManager(manager.Manager): install_wipe_osds, subcloud_region_name=subcloud.region_name, ) + + # Handle auto-restore without install using seed ISO + if auto_restore_mode == "auto" and not payload.get("with_install"): + LOG.info( + f"Performing auto-restore without install for {subcloud.name} " + f"using seed ISO approach" + ) + + seed_iso_path = self._generate_auto_restore_seed_iso( + subcloud, overrides_file, payload + ) + + if not seed_iso_path: + raise Exception("Failed to generate auto-restore seed ISO") + + data_install = json.loads(subcloud.data_install) + if payload.get("install_values"): + payload.get("install_values").update(data_install) + else: + payload["install_values"] = data_install + + self._create_rvmc_config_for_seed_iso(subcloud, payload) + restore_command = self.compose_backup_restore_command( subcloud.name, subcloud_inventory_file, auto_restore_mode ) @@ -2620,14 +2866,19 @@ class SubcloudManager(manager.Manager): if not install_success: return subcloud, False - success = self._run_subcloud_backup_restore_playbook( - subcloud, restore_command, context, log_file, auto_restore_mode - ) + try: + success = self._run_subcloud_backup_restore_playbook( + subcloud, restore_command, context, log_file, auto_restore_mode + ) - if success: - utils.delete_subcloud_inventory(overrides_file) + if success: + utils.delete_subcloud_inventory(overrides_file) - return subcloud, success + return subcloud, success + + finally: + if seed_iso_path: + self._cleanup_auto_restore_seed_iso(seed_iso_path) @staticmethod def _build_subcloud_operation_notice( diff --git a/distributedcloud/dcmanager/tests/unit/api/controllers/v1/test_subcloud_backup.py b/distributedcloud/dcmanager/tests/unit/api/controllers/v1/test_subcloud_backup.py index 04d9b2795..a40ae59e6 100644 --- a/distributedcloud/dcmanager/tests/unit/api/controllers/v1/test_subcloud_backup.py +++ b/distributedcloud/dcmanager/tests/unit/api/controllers/v1/test_subcloud_backup.py @@ -1108,8 +1108,8 @@ class TestSubcloudBackupPatchRestoreSubcloud(BaseTestSubcloudBackupPatchRestore) self._assert_pecan_and_response( response, http.client.BAD_REQUEST, - "Option release cannot be used without one of the following options: " - "with_install, auto or factory.", + "Option release cannot be used without 'with_install' or " + "'factory' options.", ) def test_patch_restore_subcloud_fails_with_install_without_install_values(self):