Files
update/software/software/utilities/utils.py
Bin Qian e63a5d4794 deploy delete action
This commit add support of new deploy delete plug.
This commit also add  new DeployPluginRunner class in order to unify
execution of deploy actions, with API credential, CLI environment, and
deploy context (options from APIs).

Story: 2011357
Task: 52340

Test Plan:
    passed: execute delete action w/ deploy complete, deploy delete
    passed: execute delete action w/ deploy start, deploy delete.
    passed: execute delete action w/ deploy start, host, abort, rollback
            delete
    passed: complete deploy for upgrade.

Signed-off-by: Bin Qian <bin.qian@windriver.com>
Change-Id: I52aeb3669a4fc61a0941553c1f40c52acc87e868
2025-06-16 15:48:45 +00:00

495 lines
18 KiB
Python

#
# Copyright (c) 2023-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import keyring
import logging
import os
import psycopg2
from psycopg2.extras import RealDictCursor
import subprocess
import sys
import tempfile
import traceback
import yaml
# WARNING: The first controller upgrade is done before any puppet manifests
# have been applied, so only the static entries from tsconfig can be used.
# (the platform.conf file will not have been updated with dynamic values).
from software.utilities.constants import PLATFORM_PATH
from software.utilities.constants import KEYRING_PERMDIR
from software.utilities import constants
import software.config as cfg
LOG = logging.getLogger('main_logger')
SOFTWARE_LOG_FILE = "/var/log/software.log"
DEPLOY_SCRIPTS_FAILURES_LOG = logging.getLogger('deploy_scripts_failures')
DEPLOY_SCRIPTS_FAILURES_LOG_FILE = "/var/log/deploy_scripts_failures.log"
# well-known default domain name
DEFAULT_DOMAIN_NAME = 'Default'
# Upgrade script actions
ACTION_START = "start"
ACTION_MIGRATE = "migrate"
ACTION_ACTIVATE = "activate"
ACTION_ACTIVATE_ROLLBACK = "activate-rollback"
ACTION_DELETE = "delete"
def configure_logging():
cfg.read_config()
my_exec = os.path.basename(sys.argv[0])
log_format = cfg.logging_default_format_string
log_format = log_format.replace('%(exec)s', my_exec)
formatter = logging.Formatter(log_format, datefmt="%FT%T")
LOG.setLevel(logging.INFO)
main_log_handler = logging.FileHandler(SOFTWARE_LOG_FILE)
main_log_handler.setFormatter(formatter)
LOG.addHandler(main_log_handler)
def get_migration_scripts(migration_script_dir):
if not os.path.isdir(migration_script_dir):
msg = "Folder %s does not exist" % migration_script_dir
LOG.exception(msg)
raise Exception(msg)
files = [f for f in os.listdir(migration_script_dir)
if os.path.isfile(os.path.join(migration_script_dir, f)) and
os.access(os.path.join(migration_script_dir, f), os.X_OK)]
return files
def sort_migration_scripts(scripts, action):
reversed_actions = ['activate-rollback']
# From file name, get the number to sort the calling sequence,
# abort when the file name format does not follow the pattern
# "nnn-*.*", where "nnn" string shall contain only digits, corresponding
# to a valid unsigned integer (first sequence of characters before "-")
try:
scripts.sort(key=lambda x: int(x.split("-")[0]))
if action in reversed_actions:
scripts = scripts[::-1]
LOG.info(f"Executing deployment scripts for {action} in reversed order")
except Exception:
LOG.exception("Deployment script sequence validation failed, invalid "
"file name format")
raise
return scripts
# This file is currently categorized as independent from framework,
# which is runnable w/ N+1 code on a N runtime environment. The exception class
# is defined here instead of software.exceptions module as result.
# TODO(bqian) move the exception definition to software.exceptions if this code
# becomes part of framework.
class MigrationScriptFailed(Exception):
def __init__(self, msg, inner_exception):
super().__init__(msg)
self._inner_exception = inner_exception
@property
def inner_exception(self):
return self._inner_exception
def execute_script(script, from_release, to_release, action, port):
MSG_SCRIPT_FAILURE = "Deployment script %s failed with return code %d" \
"\nScript output:\n%s"
try:
LOG.info("Executing deployment script %s" % script)
cmdline = [script, from_release, to_release, action]
if port is not None:
cmdline.append(port)
# Let subprocess.run handle non-zero exit codes via check=True
subprocess.run(cmdline,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
check=True)
except subprocess.CalledProcessError as e:
# Deduplicate output lines using set and create error message
unique_output = "\n".join(e.output.splitlines()) + "\n"
error = MSG_SCRIPT_FAILURE % (script, e.returncode, unique_output)
raise MigrationScriptFailed(error, e)
except Exception as ee:
# Log exception but continue processing
error = f"Unexpected error executing {script}: {str(ee)}"
raise MigrationScriptFailed(error, ee)
LOG.info(f'Deployment script {script} completed successfully')
def initialize_deploy_failure_log():
if not DEPLOY_SCRIPTS_FAILURES_LOG.handlers:
log_format = ('%(asctime)s: %(message)s')
log_datefmt = "%FT%T"
DEPLOY_SCRIPTS_FAILURES_LOG.setLevel(logging.INFO)
log_file_handler = logging.FileHandler(DEPLOY_SCRIPTS_FAILURES_LOG_FILE)
log_file_handler.setFormatter(logging.Formatter(
fmt=log_format, datefmt=log_datefmt))
DEPLOY_SCRIPTS_FAILURES_LOG.addHandler(log_file_handler)
def log_exception(msg, exc):
trace = ''.join(traceback.format_exception(type(exc), exc, exc.__traceback__))
LOG.error(msg)
LOG.error(trace)
def execute_scripts(scripts, from_release, to_release, action, port, migration_script_dir):
# Execute each migration script and collect errors
ignore_errors = os.environ.get("IGNORE_ERRORS", 'False').upper() == 'TRUE'
errors = []
for f in scripts:
migration_script = os.path.join(migration_script_dir, f)
try:
execute_script(migration_script, from_release, to_release, action, port)
except MigrationScriptFailed as e:
if ignore_errors:
log_exception(f"Migrate script error, action {action} continue.",
e.inner_exception)
errors.append(str(e))
else:
log_exception(f"Migrate script error, action {action} stopped.",
e.inner_exception)
raise e.inner_exception
if errors and ignore_errors:
LOG.warning(f"Action {action} completed with errors. Operation continue as IGNORE_ERRORS is set." +
f" Summarized error information can be found in {DEPLOY_SCRIPTS_FAILURES_LOG_FILE}")
initialize_deploy_failure_log()
# initialize_deploy_failure_log Log the errors to the dedicated failure log
DEPLOY_SCRIPTS_FAILURES_LOG.info("%s action partially failed. " % action)
DEPLOY_SCRIPTS_FAILURES_LOG.info("\n".join(errors))
def execute_migration_scripts(from_release, to_release, action, port=None,
migration_script_dir="/usr/local/share/upgrade.d"):
LOG.info("Executing deployment scripts from: %s with from_release: %s, to_release: %s, "
"action: %s" % (migration_script_dir, from_release, to_release, action))
scripts = get_migration_scripts(migration_script_dir)
scripts = sort_migration_scripts(scripts, action)
execute_scripts(scripts, from_release, to_release, action, port, migration_script_dir)
def get_db_connection(hiera_db_records, database):
username = hiera_db_records[database]['username']
password = hiera_db_records[database]['password']
return "postgresql://%s:%s@%s/%s" % (
username, password, 'localhost', database)
def get_password_from_keyring(service, username):
"""Retrieve password from keyring"""
password = ""
os.environ["XDG_DATA_HOME"] = KEYRING_PERMDIR
try:
password = keyring.get_password(service, username)
except Exception as e:
LOG.exception("Received exception when attempting to get password "
"for service %s, username %s: %s" %
(service, username, e))
raise
finally:
del os.environ["XDG_DATA_HOME"]
return password
def get_upgrade_token(from_release,
config,
secure_config):
# Get the system hiera data from the from release
from_hiera_path = os.path.join(PLATFORM_PATH, "puppet", from_release,
"hieradata")
system_file = os.path.join(from_hiera_path, "system.yaml")
with open(system_file, 'r') as s_file:
system_config = yaml.load(s_file, Loader=yaml.Loader)
# during a data-migration, keystone is running
# on the controller UNIT IP, however the service catalog
# that was migrated from controller-0 since lists the
# floating controller IP. Keystone operations that use
# the AUTH URL will hit this service URL and fail,
# therefore we have to issue an Upgrade token for
# all Keystone operations during an Upgrade. This token
# will allow us to circumvent the service catalog entry, by
# providing a bypass endpoint.
keystone_upgrade_url = "http://{}:5000/{}".format(
'127.0.0.1',
system_config['openstack::keystone::params::api_version'])
admin_user_domain = system_config.get(
'platform::client::params::admin_user_domain')
if admin_user_domain is None:
# This value wasn't present in R2. So may be missing in upgrades from
# that release
LOG.info("platform::client::params::admin_user_domain key not found. "
"Using Default.")
admin_user_domain = DEFAULT_DOMAIN_NAME
admin_project_domain = system_config.get(
'platform::client::params::admin_project_domain')
if admin_project_domain is None:
# This value wasn't present in R2. So may be missing in upgrades from
# that release
LOG.info("platform::client::params::admin_project_domain key not "
"found. Using Default.")
admin_project_domain = DEFAULT_DOMAIN_NAME
admin_password = get_password_from_keyring("CGCS", "admin")
admin_username = system_config.get(
'platform::client::params::admin_username')
# the upgrade token command
keystone_upgrade_token = (
"openstack "
"--os-username {} "
"--os-password '{}' "
"--os-auth-url {} "
"--os-project-name admin "
"--os-user-domain-name {} "
"--os-project-domain-name {} "
"--os-interface internal "
"--os-identity-api-version 3 "
"token issue -c id -f value".format(
admin_username,
admin_password,
keystone_upgrade_url,
admin_user_domain,
admin_project_domain
))
config.update({
'openstack::keystone::upgrade::upgrade_token_file':
'/etc/keystone/upgrade_token',
'openstack::keystone::upgrade::url': keystone_upgrade_url
})
secure_config.update({
'openstack::keystone::upgrade::upgrade_token_cmd':
keystone_upgrade_token,
})
def get_upgrade_data(from_release,
system_config,
secure_config):
"""Retrieve required data from the from-release, update system_config
and secure_config with them.
This function is needed for adding new service account and endpoints
during upgrade.
"""
# Get the system hiera data from the from release
from_hiera_path = os.path.join(PLATFORM_PATH, "puppet", from_release,
"hieradata")
system_file = os.path.join(from_hiera_path, "system.yaml")
with open(system_file, 'r') as s_file:
system_config_from_release = yaml.load(s_file, Loader=yaml.Loader)
# Get keystone region
keystone_region = system_config_from_release.get(
'keystone::endpoint::region')
system_config.update({
'platform::client::params::identity_region': keystone_region,
# Retrieve keystone::auth::region from the from-release for the new
# service.
# 'newservice::keystone::auth::region': keystone_region,
})
# Generate password for the new service
# password = sysinv_utils.generate_random_password(16)
secure_config.update({
# Generate and set the keystone::auth::password for the new service.
# 'newservice::keystone::auth::password': password,
})
def add_upgrade_entries_to_hiera_data(from_release):
"""Adds upgrade entries to the hiera data """
filename = 'static.yaml'
secure_filename = 'secure_static.yaml'
path = constants.HIERADATA_PERMDIR
# Get the hiera data for this release
filepath = os.path.join(path, filename)
with open(filepath, 'r') as c_file:
config = yaml.load(c_file, Loader=yaml.Loader)
secure_filepath = os.path.join(path, secure_filename)
with open(secure_filepath, 'r') as s_file:
secure_config = yaml.load(s_file, Loader=yaml.Loader)
# File for system.yaml
# TODO(bqian): This is needed for adding new service account and endpoints
# during upgrade.
system_filename = 'system.yaml'
system_filepath = os.path.join(path, system_filename)
# Get a token and update the config
# Below should be removed. Need to ensure during data migration
get_upgrade_token(from_release, config, secure_config)
# Get required data from the from-release and add them in system.yaml.
# We don't carry system.yaml from the from-release.
# This is needed for adding new service account and endpoints
# during upgrade.
# TODO(bqian): Below should be replaced with generating hieradata from
# migrated to-release database after "deploy host" is verified
system_config = {}
get_upgrade_data(from_release, system_config, secure_config)
# Update the hiera data on disk
try:
fd, tmppath = tempfile.mkstemp(dir=path, prefix=filename,
text=True)
with open(tmppath, 'w') as f:
yaml.dump(config, f, default_flow_style=False)
os.close(fd)
os.rename(tmppath, filepath)
except Exception:
LOG.exception("failed to write config file: %s" % filepath)
raise
try:
fd, tmppath = tempfile.mkstemp(dir=path, prefix=secure_filename,
text=True)
with open(tmppath, 'w') as f:
yaml.dump(secure_config, f, default_flow_style=False)
os.close(fd)
os.rename(tmppath, secure_filepath)
except Exception:
LOG.exception("failed to write secure config: %s" % secure_filepath)
raise
# Add required hiera data into system.yaml.
# This is needed for adding new service account and endpoints
# during upgrade.
try:
fd, tmppath = tempfile.mkstemp(dir=path, prefix=system_filename,
text=True)
with open(tmppath, 'w') as f:
yaml.dump(system_config, f, default_flow_style=False)
os.close(fd)
os.rename(tmppath, system_filepath)
except Exception:
LOG.exception("failed to write system config: %s" % system_filepath)
raise
def apply_upgrade_manifest(controller_address):
"""Apply puppet upgrade manifest files."""
cmd = [
"/usr/local/bin/puppet-manifest-apply.sh",
constants.HIERADATA_PERMDIR,
str(controller_address),
constants.CONTROLLER,
'upgrade'
]
logfile = "/tmp/apply_manifest.log"
try:
with open(logfile, "w") as flog:
subprocess.check_call(cmd, stdout=flog, stderr=flog)
except subprocess.CalledProcessError:
msg = "Failed to execute upgrade manifest"
print(msg)
raise Exception(msg)
def get_keystone_user_id(user_name):
"""Get the a keystone user id by name"""
conn = psycopg2.connect("dbname='keystone' user='postgres'")
with conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("SELECT user_id FROM local_user WHERE name='%s'" %
user_name)
user_id = cur.fetchone()
if user_id is not None:
return user_id['user_id']
else:
return user_id
def get_keystone_project_id(project_name):
"""Get the a keystone project id by name"""
conn = psycopg2.connect("dbname='keystone' user='postgres'")
with conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("SELECT id FROM project WHERE name='%s'" %
project_name)
project_id = cur.fetchone()
if project_id is not None:
return project_id['id']
else:
return project_id
def get_postgres_bin():
"""Get the path to the postgres binaries"""
try:
return subprocess.check_output(
['pg_config', '--bindir']).decode().rstrip('\n')
except subprocess.CalledProcessError:
LOG.exception("Failed to get postgres bin directory.")
raise
def create_manifest_runtime_config(filename, config):
"""Write the runtime Puppet configuration to a runtime file."""
if not config:
return
try:
with open(filename, 'w') as f:
yaml.dump(config, f, default_flow_style=False)
except Exception:
LOG.exception("failed to write config file: %s" % filename)
raise
def create_system_config():
cmd = ["/usr/bin/sysinv-puppet",
"create-system-config",
constants.HIERADATA_PERMDIR]
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError:
msg = "Failed to update puppet hiera system config"
print(msg)
raise Exception(msg)
def create_host_config(hostname=None):
cmd = ["/usr/bin/sysinv-puppet",
"create-host-config",
constants.HIERADATA_PERMDIR]
if hostname:
cmd.append(hostname)
try:
subprocess.check_call(cmd)
except subprocess.CalledProcessError:
msg = "Failed to update puppet hiera host config"
print(msg)
raise Exception(msg)