Move upgrade scripts from config repo to update repo

Since upgrade is responsibility of USM the upgrade scripts need
to be moved to this repo. This commit adds the upgrade-scripts
from config, note that the upgrade-scripts will still be located
under /usr/local/share/upgrade.d folder.
There's also a change in upgrade-scripts to use the log function
from this repo instead of controllerconfig one.
Also fix a log error in deploy scripts.

Test Plan:
PASS: Build-pkgs && build-image.
PASS: Upgrade from 24.09 to 25.09 in sx.
PASS: Install/bootstrap 25.09 in sx.
PASS: Check if /usr/local/share/upgrade.d have the same scripts.
PASS: Check scripts are logging accordingly.

Story: 2011357
Task: 52196

Change-Id: Iab5e6d6f0348f996daf0adb2447d22c4216e537f
Signed-off-by: Luis Eduardo Bonatti <luizeduardo.bonatti@windriver.com>
This commit is contained in:
Luis Eduardo Bonatti
2025-05-15 17:46:48 -03:00
committed by Luis Eduardo Bonatti
parent eb6bbe9951
commit e93d1686f5
31 changed files with 2879 additions and 17 deletions

View File

@@ -89,9 +89,14 @@ override_dh_install:
${SCRIPTDIR}/sync-controllers-feed ${SCRIPTDIR}/sync-controllers-feed
install -m 755 scripts/remove-temporary-data \ install -m 755 scripts/remove-temporary-data \
${SCRIPTDIR}/remove-temporary-data ${SCRIPTDIR}/remove-temporary-data
install -d -m 755 $(ROOT)/usr/local/share/upgrade.d
install -p -D -m 755 upgrade-scripts/* $(ROOT)/usr/local/share/upgrade.d
install -d -m 755 $(ROOT)/etc/update.d
# TODO(heitormatsui): following lines are for backward compatibility and can be removed in stx-12 # TODO(heitormatsui): following lines are for backward compatibility and can be removed in stx-12
cd ${SCRIPTDIR} ; ln -sf major-release-upload usm_load_import cd ${SCRIPTDIR} ; ln -sf major-release-upload usm_load_import
cd ${SCRIPTDIR} ; ln -sf deploy-start software-deploy-start cd ${SCRIPTDIR} ; ln -sf deploy-start software-deploy-start
dh_install dh_install
override_dh_usrlocal:

View File

@@ -9,3 +9,4 @@ usr/lib/python*/dist-packages/*
usr/sbin usr/sbin
usr/share/starlingx/pmon.d/software-controller-daemon.conf usr/share/starlingx/pmon.d/software-controller-daemon.conf
usr/share/starlingx/pmon.d/software-agent.conf usr/share/starlingx/pmon.d/software-agent.conf
usr/local/share/upgrade.d/*

View File

@@ -13,7 +13,7 @@
import configparser import configparser
import grp import grp
import logging as LOG import logging
import os import os
import pwd import pwd
import shutil import shutil
@@ -22,6 +22,7 @@ import sys
import upgrade_utils import upgrade_utils
LOG = logging.getLogger('main_logger')
class PostgresDatabase: class PostgresDatabase:
DEFAULT_POSTGRESQL_PORT = 5432 DEFAULT_POSTGRESQL_PORT = 5432
@@ -118,7 +119,7 @@ class PostgresDatabase:
if __name__ == "__main__": if __name__ == "__main__":
upgrade_utils.configure_logging("/var/log/software.log", log_level=LOG.INFO) upgrade_utils.configure_logging("/var/log/software.log", log_level=logging.INFO)
port = None port = None
error = False error = False

View File

@@ -15,7 +15,7 @@ Run platform upgrade deploy precheck as a standalone executable
import argparse import argparse
import base64 import base64
import json import json
import logging as LOG import logging
import re import re
import requests import requests
import subprocess import subprocess
@@ -47,6 +47,8 @@ SYSTEM_MODE_SIMPLEX = "simplex"
# defined in lvm_snapshot.LvmSnapshotManager.LOGICAL_VOLUMES # defined in lvm_snapshot.LvmSnapshotManager.LOGICAL_VOLUMES
FREE_SPACE_NEEDED_LVM_SNAPSHOTS_GIB = 24 FREE_SPACE_NEEDED_LVM_SNAPSHOTS_GIB = 24
LOG = logging.getLogger('main_logger')
class HealthCheck(object): class HealthCheck(object):
"""This class represents a general health check object """This class represents a general health check object
that uses sysinv-client to run system health checks""" that uses sysinv-client to run system health checks"""
@@ -499,5 +501,5 @@ def main(argv=None):
if __name__ == "__main__": if __name__ == "__main__":
upgrade_utils.configure_logging("/var/log/software.log", log_level=LOG.INFO) upgrade_utils.configure_logging("/var/log/software.log", log_level=logging.INFO)
sys.exit(main()) sys.exit(main())

View File

@@ -13,7 +13,7 @@
# 5. perform data migration # 5. perform data migration
# #
import logging as LOG import logging
import os import os
import shutil import shutil
import subprocess import subprocess
@@ -21,6 +21,8 @@ import sys
import upgrade_utils import upgrade_utils
LOG = logging.getLogger('main_logger')
class DeployStart: class DeployStart:
STAGING_DIR = "/sysroot/upgrade" STAGING_DIR = "/sysroot/upgrade"
@@ -183,7 +185,7 @@ class DeployStart:
if __name__ == "__main__": if __name__ == "__main__":
upgrade_utils.configure_logging("/var/log/software.log", log_level=LOG.INFO) upgrade_utils.configure_logging("/var/log/software.log", log_level=logging.INFO)
from_version = None from_version = None
to_version = None to_version = None

View File

@@ -17,7 +17,7 @@ to the controller.
import argparse import argparse
import configparser import configparser
import glob import glob
import logging as LOG import logging
import os import os
import shutil import shutil
import subprocess import subprocess
@@ -25,6 +25,8 @@ import sys
import upgrade_utils import upgrade_utils
LOG = logging.getLogger('main_logger')
AVAILABLE_DIR = "/opt/software/metadata/available" AVAILABLE_DIR = "/opt/software/metadata/available"
UNAVAILABLE_DIR = "/opt/software/metadata/unavailable" UNAVAILABLE_DIR = "/opt/software/metadata/unavailable"
COMMITTED_DIR = "/opt/software/metadata/committed" COMMITTED_DIR = "/opt/software/metadata/committed"
@@ -386,5 +388,5 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
upgrade_utils.configure_logging('/var/log/software.log', log_level=LOG.INFO) upgrade_utils.configure_logging('/var/log/software.log', log_level=logging.INFO)
sys.exit(main()) sys.exit(main())

View File

@@ -11,13 +11,15 @@
# to the local filesystem data and configuration. # to the local filesystem data and configuration.
# #
import logging as LOG import logging
import os import os
import subprocess import subprocess
import sys import sys
import upgrade_utils import upgrade_utils
LOG = logging.getLogger('main_logger')
class ChrootMounts: class ChrootMounts:
DEV_PATH="/dev" DEV_PATH="/dev"
@@ -108,7 +110,7 @@ class ChrootMounts:
if __name__ == "__main__": if __name__ == "__main__":
upgrade_utils.configure_logging("/var/log/software.log", log_level=LOG.INFO) upgrade_utils.configure_logging("/var/log/software.log", log_level=logging.INFO)
checkout_dir = None checkout_dir = None
operation = None operation = None

View File

@@ -12,7 +12,7 @@
Run platform upgrade prep data migration as a standalone executable Run platform upgrade prep data migration as a standalone executable
""" """
import logging as LOG import logging
import os import os
import shutil import shutil
import subprocess import subprocess
@@ -30,6 +30,7 @@ RABBIT_PATH = '/var/lib/rabbitmq'
KUBERNETES_CONF_PATH = "/etc/kubernetes" KUBERNETES_CONF_PATH = "/etc/kubernetes"
KUBERNETES_ADMIN_CONF_FILE = "admin.conf" KUBERNETES_ADMIN_CONF_FILE = "admin.conf"
LOG = logging.getLogger('main_logger')
class DataMigration(object): class DataMigration(object):
@@ -335,5 +336,5 @@ def main(sys_argv):
if __name__ == "__main__": if __name__ == "__main__":
upgrade_utils.configure_logging("/var/log/software.log", log_level=LOG.INFO) upgrade_utils.configure_logging("/var/log/software.log", log_level=logging.INFO)
sys.exit(main(sys.argv)) sys.exit(main(sys.argv))

View File

@@ -16,7 +16,7 @@
# #
import configparser import configparser
import logging as LOG import logging
import os import os
import shutil import shutil
import subprocess import subprocess
@@ -24,6 +24,8 @@ import sys
import upgrade_utils import upgrade_utils
LOG = logging.getLogger('main_logger')
class RemoveTemporaryData: class RemoveTemporaryData:
def __init__(self, checkout_dir): def __init__(self, checkout_dir):
@@ -109,7 +111,7 @@ class RemoveTemporaryData:
if __name__ == "__main__": if __name__ == "__main__":
upgrade_utils.configure_logging("/var/log/software.log", log_level=LOG.INFO) upgrade_utils.configure_logging("/var/log/software.log", log_level=logging.INFO)
checkout_dir = None checkout_dir = None
for arg in range(1, len(sys.argv)): for arg in range(1, len(sys.argv)):

View File

@@ -12,7 +12,7 @@
Run feed synchronization between controllers Run feed synchronization between controllers
""" """
import logging as LOG import logging
import os import os
import socket import socket
import subprocess import subprocess
@@ -23,6 +23,8 @@ CONTROLLER_0_HOSTNAME = "controller-0"
CONTROLLER_1_HOSTNAME = "controller-1" CONTROLLER_1_HOSTNAME = "controller-1"
SYSTEM_MODE_SIMPLEX = "simplex" SYSTEM_MODE_SIMPLEX = "simplex"
LOG = logging.getLogger('main_logger')
def sync_controllers(feed): def sync_controllers(feed):
controller = socket.gethostname() controller = socket.gethostname()
controller = CONTROLLER_1_HOSTNAME if \ controller = CONTROLLER_1_HOSTNAME if \
@@ -96,5 +98,5 @@ def main(sys_argv):
return 1 return 1
if __name__ == "__main__": if __name__ == "__main__":
upgrade_utils.configure_logging('/var/log/software.log', log_level=LOG.INFO) upgrade_utils.configure_logging('/var/log/software.log', log_level=logging.INFO)
sys.exit(main(sys.argv)) sys.exit(main(sys.argv))

View File

@@ -102,9 +102,10 @@ enable-extensions = H106, H203
# E501 line too long => ~15 instances # E501 line too long => ~15 instances
# H105 Don't use author tags => 1 instance # H105 Don't use author tags => 1 instance
# H306 imports not in alphabetical order => ~80 instances # H306 imports not in alphabetical order => ~80 instances
# H401: docstring should not start with a space => ~5 instances
# H404 multi line docstring should start without a leading new line => ~80 instances # H404 multi line docstring should start without a leading new line => ~80 instances
# H405 multi line docstring summary not separated with an empty line => ~930 instances # H405 multi line docstring summary not separated with an empty line => ~930 instances
extend-ignore = E402, E501, H105, H306, H404, H405 extend-ignore = E402, E501, H105, H306, H401, H404, H405
[testenv:flake8] [testenv:flake8]

View File

@@ -0,0 +1,72 @@
#!/bin/bash
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script adds system service parameters for service kubernetes
# for sections kube_scheduler and kube_controller_manager.
#
# This script relies on 'kubernetes-service-parameters-apply.py'
# to apply the parameters to kubeapi, needing to be executed before it.
#
# As this script does not restart any kubernetes components, we do not
# need to run k8s health check here.
#
NAME=$(basename "$0")
# The migration scripts are passed these parameters:
FROM_RELEASE=$1
TO_RELEASE=$2
ACTION=$3
SOFTWARE_LOG_PATH="/var/log/software.log"
function log {
echo "$(date -Iseconds | cut -d'+' -f1): ${NAME}[$$]: INFO: $*" >> "${SOFTWARE_LOG_PATH}" 2>&1
}
log "Disable leader election for kube-scheduler and kube-controller-manager"\
"from $FROM_RELEASE to $TO_RELEASE with action $ACTION"
if [[ "${ACTION}" == "activate" ]]; then
source /etc/platform/platform.conf
if [[ "${nodetype}" == "controller" ]] && [[ "${system_mode}" == "simplex" ]]; then
source /etc/platform/openrc
for section in kube_scheduler kube_controller_manager; do
value=$(system service-parameter-list --service kubernetes \
--section "${section}" --format value | awk '/leader-elect/ {print $5}')
if [[ "${value}" == "false" ]]; then
log "Service parameter leader-elect=false already exists for section ${section}."\
"Nothing to do."
elif [[ "${value}" == "" ]]; then
system service-parameter-add kubernetes "${section}" leader-elect=false
RC=$?
if [ ${RC} == 0 ]; then
log "Successfully added service parameter leader-elect=false for ${section}"
else
log "Command service-parameter-add failed for section ${section}."\
"Exiting for manual intervention or retry of the activation."
exit 1
fi
else
# 'true' or any garbage value
system service-parameter-modify kubernetes "${section}" leader-elect=false
RC=$?
if [ ${RC} == 0 ]; then
log "Successfully updated service parameter leader-elect=false for ${section}"
else
log "Command service-parameter-modify failed for section ${section}."\
"Exiting for manual intervention or retry of the activation."
exit 1
fi
fi
done
else
log "No actions required for ${system_mode}-${nodetype}"
fi
else
log "No actions required from release ${FROM_RELEASE} to ${TO_RELEASE} with action ${ACTION}"
fi
exit 0

View File

@@ -0,0 +1,158 @@
#!/usr/bin/env python
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script updates the node IP addresses in sysinv DB tables. Only admin
# network entries and only AIO-SX systems will be updated with the following
# actions:
# - address_pools: update controller0_address_id and controller1_address_id
# to None
# - addresses: update floating address IPv4 and IPv6 entries' interface_id
# with controller-0's entries' interface_id
# - addresses: delete IPv4 and IPv6 controller-0 and controller-1 entries'
# interface_id
#
import logging
import sys
from packaging import version
import psycopg2
from six.moves import configparser
from software.utilities.utils import configure_logging
DEFAULT_POSTGRES_PORT = 5432
LOG = logging.getLogger('main_logger')
def main():
action = None
from_release = None
to_release = None
postgres_port = DEFAULT_POSTGRES_PORT
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
postgres_port = sys.argv[arg]
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
LOG.info(
"%s invoked from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action)
)
res = 0
to_release_version = version.Version(to_release)
target_version = version.Version("25.09")
if action == 'migrate' and to_release_version == target_version:
if get_system_mode() == "simplex":
try:
conn = psycopg2.connect("dbname=sysinv user=postgres port=%s"
% postgres_port)
del_admin_node_addresses(conn)
conn.close()
except Exception as e:
LOG.exception("Error: {}".format(e))
res = 1
return res
def del_admin_node_addresses(conn):
query = (
"SELECT address_pools.id,controller0_address_id,controller1_address_id"
",floating_address_id "
"FROM address_pools "
"JOIN network_addresspools ON address_pools.id "
"= network_addresspools.address_pool_id "
"JOIN networks ON network_addresspools.network_id = networks.id "
"WHERE networks.type = 'admin';"
)
res1 = db_query(conn, query)
LOG.info("Number of address_pools entries found: %s" % len(res1))
controller0_ids = ",".join([str(e[1]) for e in res1 if e[1]])
if not controller0_ids:
LOG.info("Nothing to change")
return
query = (
"SELECT interface_id "
"FROM addresses "
"WHERE id IN (%s);" % controller0_ids
)
res2 = db_query(conn, query)
c0_interface_ids = tuple([e[0] for e in res2])
LOG.info("interface_id found in addresses: %s" % (c0_interface_ids,))
idx = 0
for entry in res1:
address_pools_id = entry[0]
node_ids = entry[1:3]
floating_id = entry[3]
LOG.info("Found admin controller-0 and controller-1 IDs = %s"
% (node_ids,))
query = (
"UPDATE address_pools "
"SET controller0_address_id = NULL, controller1_address_id = NULL "
"WHERE id = %s;" % address_pools_id
)
db_update(conn, query)
query = (
"UPDATE addresses "
"SET interface_id = %s "
"WHERE id = %s;" % (c0_interface_ids[idx], floating_id)
)
db_update(conn, query)
query = (
"DELETE FROM addresses "
"WHERE id IN %s;" % (node_ids,)
)
db_update(conn, query)
idx += 1
LOG.info("Admin addresses deleted from address_pools and addresses tables "
"with success")
def db_query(conn, query):
result = []
with conn.cursor() as cur:
cur.execute(query)
for rec in cur:
result.append(rec)
return result
def db_update(conn, query):
with conn.cursor() as cur:
cur.execute(query)
conn.commit()
def get_system_mode():
ini_str = '[DEFAULT]\n' + open('/etc/platform/platform.conf', 'r').read()
config_applied = configparser.RawConfigParser()
config_applied.read_string(ini_str)
if config_applied.has_option('DEFAULT', 'system_mode'):
system_mode = config_applied.get('DEFAULT', 'system_mode')
else:
system_mode = None
return system_mode
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,90 @@
#!/usr/bin/env python
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script will remove load, host_upgrade and software_upgrade
# database table
#
import logging
import sys
from packaging import version
import psycopg2
from software.utilities.utils import configure_logging
DEFAULT_POSTGRES_PORT = 5432
LOG = logging.getLogger('main_logger')
def main():
action = None
from_release = None
to_release = None
postgres_port = DEFAULT_POSTGRES_PORT
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
postgres_port = sys.argv[arg]
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
LOG.info(
"%s invoked from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action)
)
res = 0
to_release_version = version.Version(to_release)
minimum_version = version.Version("25.09")
if action == 'migrate' and to_release_version == minimum_version:
try:
conn = psycopg2.connect("dbname=sysinv user=postgres port=%s"
% postgres_port)
delete_software_upgrade_database(conn)
delete_host_upgrade_database(conn)
delete_load_database(conn)
conn.close()
except Exception as e:
LOG.exception("Error: {}".format(e))
res = 1
return res
def delete_load_database(conn):
delete_cmd = "drop table if exists loads;"
db_update(conn, delete_cmd)
LOG.info("Loads table removed with success")
def delete_host_upgrade_database(conn):
delete_cmd = "drop table if exists host_upgrade;"
db_update(conn, delete_cmd)
LOG.info("Host_upgrade table removed with success")
def delete_software_upgrade_database(conn):
delete_cmd = "drop table if exists software_upgrade;"
db_update(conn, delete_cmd)
LOG.info("Software_upgrade table removed with success")
def db_update(conn, query):
with conn.cursor() as cur:
cur.execute(query)
conn.commit()
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,105 @@
#!/usr/bin/env python
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# The purpose of this script is to populate the sw_version
# field on i_host table.
import logging
import sys
import psycopg2
from six.moves import configparser
from software.utilities.utils import configure_logging
CONTROLLER_0_HOSTNAME = "controller-0"
CONTROLLER_1_HOSTNAME = "controller-1"
DEFAULT_POSTGRES_PORT = 5432
LOG = logging.getLogger('main_logger')
def main():
action = None
from_release = None
to_release = None
postgres_port = DEFAULT_POSTGRES_PORT
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
postgres_port = sys.argv[arg]
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
LOG.info(
"%s invoked from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action)
)
res = 0
if action == 'migrate':
try:
conn = psycopg2.connect("dbname=sysinv user=postgres port=%s"
% postgres_port)
populate_ihost_sw_version(conn, to_release)
conn.close()
except Exception as e:
LOG.exception("Error: {}".format(e))
res = 1
return res
def populate_ihost_sw_version(conn, to_release):
"""
Populate the sw_version field of i_host table for simplex
"""
hostname = CONTROLLER_1_HOSTNAME
if get_system_mode() == "simplex":
hostname = CONTROLLER_0_HOSTNAME
update_query = ("UPDATE i_host set sw_version = %s WHERE "
"hostname = '%s'" % (to_release,
hostname))
db_update(conn, update_query)
LOG.info("Updated sw_version to %s on %s" % (to_release, hostname))
def get_system_mode():
ini_str = '[DEFAULT]\n' + open('/etc/platform/platform.conf', 'r').read()
config_applied = configparser.RawConfigParser()
config_applied.read_string(ini_str)
if config_applied.has_option('DEFAULT', 'system_mode'):
system_mode = config_applied.get('DEFAULT', 'system_mode')
else:
system_mode = None
return system_mode
def db_update(conn, query):
with conn.cursor() as cur:
cur.execute(query)
conn.commit()
def db_query(conn, query):
result = []
with conn.cursor() as cur:
cur.execute(query)
for rec in cur:
result.append(rec)
return result
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import logging
from oslo_config import cfg
import os
from six.moves import configparser
import sys
import subprocess
from cgtsclient import client as cgts_client
from software.utilities.utils import configure_logging
CONF = cfg.CONF
LOG = logging.getLogger('main_logger')
class CgtsClient(object):
SYSINV_API_VERSION = "1"
def __init__(self):
self._sysinv_client = None
@property
def sysinv(self):
if not self._sysinv_client:
self._sysinv_client = cgts_client.get_client(
self.SYSINV_API_VERSION,
os_auth_token=os.environ.get("OS_AUTH_TOKEN"),
system_url=os.environ.get("SYSTEM_URL"),
)
return self._sysinv_client
def main():
action = None
from_release = None
to_release = None
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
# optional port parameter for USM upgrade
# port = sys.argv[arg]
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
LOG.info("%s invoked from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action))
res = 0
if action == "activate":
try:
res = activate_keystone()
except Exception:
LOG.error("Activate keystone action failed")
res = 1
return res
def get_system_mode():
ini_str = '[DEFAULT]\n' + open('/etc/platform/platform.conf', 'r').read()
config_applied = configparser.RawConfigParser()
config_applied.read_string(ini_str)
if config_applied.has_option('DEFAULT', 'system_mode'):
system_mode = config_applied.get('DEFAULT', 'system_mode')
else:
system_mode = None
return system_mode
def get_shared_services():
client = CgtsClient()
isystem = client.sysinv.isystem.list()[0]
shared_services = isystem.capabilities.get('shared_services', '')
return shared_services
def activate_keystone():
if get_system_mode() != "simplex":
try:
shared_services = get_shared_services()
except Exception:
LOG.exception("Failed to get shared services")
return 1
if 'identity' not in shared_services:
keystone_cmd = ('keystone-manage db_sync --contract')
try:
subprocess.check_call([keystone_cmd], shell=True)
except subprocess.CalledProcessError:
msg = "Failed to contract Keystone databases for upgrade."
LOG.exception(msg)
return 1
except Exception:
LOG.exception("Failed to execute command %s" % keystone_cmd)
return 1
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,172 @@
#!/usr/bin/env python
# Copyright (c) 2023-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import logging
import os
import shutil
import subprocess
import sys
import tempfile
import yaml
from software.utilities.utils import configure_logging
LOG = logging.getLogger('main_logger')
def get_list_of_keys(from_release, to_release):
keys = {"static": [], "secure_static": []}
return keys
def main():
action = None
from_release = None
to_release = None
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
# optional port parameter for USM upgrade
# port = sys.argv[arg]
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
LOG.info("%s invoked from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action))
res = 0
if action == "migrate":
try:
res = do_update(from_release, to_release)
except Exception:
LOG.exception("Updating static hieradata action failed")
res = 1
return res
def do_update(from_release, to_release):
with tempfile.TemporaryDirectory() as tempdir:
_do_update_under_temp(from_release, to_release, tempdir)
def _do_update_under_temp(from_release, to_release, tempdir):
SYSTEM_STATIC_FILE = "static.yaml"
SECURE_STATIC_FILE = "secure_static.yaml"
HIERADATA_PATH = "/opt/platform/puppet/%s/hieradata"
# copy static hieradata yaml files to tempdir
system_static_file = \
os.path.join(HIERADATA_PATH % to_release, SYSTEM_STATIC_FILE)
secure_static_file = \
os.path.join(HIERADATA_PATH % to_release, SECURE_STATIC_FILE)
tmp_system_static_file = os.path.join(tempdir, SYSTEM_STATIC_FILE)
tmp_secure_static_file = os.path.join(tempdir, SECURE_STATIC_FILE)
files_to_copy = {system_static_file: tmp_system_static_file,
secure_static_file: tmp_secure_static_file}
for src in files_to_copy:
dest = files_to_copy[src]
try:
shutil.copyfile(src, dest)
except IOError as e:
LOG.error("Failed copying file %s to %s. Error %s", src, dest, e)
raise
# generate static config to /opt/platform/puppet/<ver>/hieradata
cmd = ["sysinv-puppet", "create-static-config"]
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
out, err = process.communicate()
if process.returncode != 0:
msg = "Failed to generate static config. Command output: \n%s" % err
LOG.error(msg)
raise Exception(msg)
src_file_mapping = {"static": system_static_file,
"secure_static": secure_static_file}
tmp_file_mapping = {"static": tmp_system_static_file,
"secure_static": tmp_secure_static_file}
list_of_keys = get_list_of_keys(from_release, to_release)
# find the new generated static data and update the static hieradata
# from previous release
for tag in list_of_keys:
keys = list_of_keys[tag]
if len(keys) > 0:
tmp_file = tmp_file_mapping[tag]
src_file = src_file_mapping[tag]
# read the key/value from src_file
# (generated by sysinv-puppet create-static-config)
# at /opt/platform/puppet/hieradata/<to_release>
# write the key/value to tmp_file at temp directory
with open(src_file, "r") as src:
try:
src_data = yaml.load(src, Loader=yaml.Loader)
except Exception as e:
LOG.error("Failed to load %s. Error %s" % (src_file, e))
raise
with open(tmp_file, "r") as dest:
try:
dest_data = yaml.load(dest, Loader=yaml.Loader)
except Exception as e:
LOG.error("Failed to load %s. Error %s" % (tmp_file, e))
raise
for key in keys:
if key in src_data:
dest_data[key] = src_data[key]
else:
LOG.warn("Expect %s generated in %s, but is not found" %
(key, src_file))
with open(tmp_file, "w") as dest:
try:
yaml.dump(dest_data, dest, default_flow_style=False)
except Exception as e:
LOG.error("Failed to update %s. Error %s" %
(tmp_file, e))
raise
# copy the updated static hieradata yaml files to hieradata directory
# of to release
dest_system_static_file = \
os.path.join(HIERADATA_PATH % to_release, SYSTEM_STATIC_FILE)
dest_secure_static_file = \
os.path.join(HIERADATA_PATH % to_release, SECURE_STATIC_FILE)
dest_file_mapping = {"static": dest_system_static_file,
"secure_static": dest_secure_static_file}
for tag in ["static", "secure_static"]:
try:
shutil.copyfile(tmp_file_mapping[tag], dest_file_mapping[tag])
except Exception as e:
msg = "Failed to copy file %s to %s. Error %s" % (
tmp_file_mapping[tag],
dest_file_mapping[tag],
e)
LOG.error(msg)
raise Exception(msg)
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,48 @@
#!/bin/bash
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script disables NFV-VIM Web Server. From version 25.x onwards,
# the web server will stay disabled by default in order to optimize
# memory and CPU consumption of the host.
#
# The user can manually reactivate it issuing the command:
# "sm-provision service-group-member vim-services vim-webserver"
#
# shellcheck disable=SC2206
# The script receives these parameters:
FROM_RELEASE=$1
TO_RELEASE=$2
ACTION=$3
SOFTWARE_LOG_PATH="/var/log/software.log"
FROM_RELEASE_ARR=(${FROM_RELEASE//./ })
FROM_RELEASE_MAJOR=${FROM_RELEASE_ARR[0]}
TO_RELEASE_ARR=(${TO_RELEASE//./ })
TO_RELEASE_MAJOR=${TO_RELEASE_ARR[0]}
# Default logging method extracted from script #02
function log {
echo "$(date -Iseconds | cut -d'+' -f1): ${NAME}[$$]: INFO: $*" \
>> "${SOFTWARE_LOG_PATH}" 2>&1
}
if [[ "${ACTION}" == "migrate" ]] && \
[ ${FROM_RELEASE_MAJOR} -lt 25 ] && \
[ ${TO_RELEASE_MAJOR} -ge 25 ]; then
log Disabling the NFV-VIM Web Server...
sm-deprovision service-group-member vim-services vim-webserver
ret_value=$?
[ $ret_value -eq 0 ] && log NFV-VIM Web Server successfully disabled
exit $ret_value
else
log No actions required from $FROM_RELEASE to $TO_RELEASE with action $ACTION
fi

View File

@@ -0,0 +1,315 @@
#!/usr/bin/python
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
import logging
import os
import psycopg2
import subprocess
import sys
import uuid
from cgtsclient import client as cgts_client
from netaddr import valid_ipv4
from netaddr import valid_ipv6
from software.utilities.utils import configure_logging
from sysinv.common import constants as sysinv_constants
from wsme import types as wtypes
DEFAULT_POSTGRES_PORT = 5432
LOG_FILE = "/var/log/software.log"
LOG = logging.getLogger('main_logger')
# CgtsClient class to handle API interactions
class CgtsClient(object):
SYSINV_API_VERSION = 1
def __init__(self):
self.conf = {}
self._sysinv = None
# Loading credentials and configurations from environment variables
# typically set in OpenStack
source_command = 'source /etc/platform/openrc && env'
with open(os.devnull, "w") as fnull:
proc = subprocess.Popen(
['bash', '-c', source_command],
stdout=subprocess.PIPE, stderr=fnull,
universal_newlines=True)
# Strip the configurations starts with 'OS_' and change
# the value to lower
for line in proc.stdout:
key, _, value = line.partition("=")
if key.startswith('OS_'):
self.conf[key[3:].lower()] = value.strip()
proc.communicate()
@property
def sysinv(self):
if not self._sysinv:
self._sysinv = cgts_client.get_client(
self.SYSINV_API_VERSION,
os_username=self.conf['username'],
os_password=self.conf['password'],
os_auth_url=self.conf['auth_url'],
os_project_name=self.conf['project_name'],
os_project_domain_name=self.conf['project_domain_name'],
os_user_domain_name=self.conf['user_domain_name'],
os_region_name=self.conf['region_name'],
os_service_type='platform',
os_endpoint_type='internal')
return self._sysinv
def main():
action = None
from_release = None
to_release = None
postgres_port = DEFAULT_POSTGRES_PORT
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
# optional port parameter for USM upgrade
postgres_port = sys.argv[arg]
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
LOG.info("%s invoked from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action))
if action == "migrate" and from_release == "24.09":
LOG.info("Create service parameter dns host record for "
"registry.central")
conn = None
try:
client = CgtsClient()
virtual_system = check_virtual_system(client)
conn = psycopg2.connect(
"dbname=sysinv user=postgres port=%s" % postgres_port)
floating_address_id = get_floating_sc_address_id(
conn, virtual_system)
if not floating_address_id:
LOG.info("System controller address ID not found, exiting.")
return 0
registry_central_ip = get_address_by_id(conn, floating_address_id)
if not registry_central_ip:
LOG.info("System controller address not found, exiting.")
return 0
if virtual_system:
registry_local_ip = get_controller_mgmt_address(conn)
update_dns_registry(
conn, registry_central_ip, registry_local_ip, to_release)
else:
update_dns_registry(conn, registry_central_ip, None,
to_release)
if not check_dns_resolution(registry_central_ip):
return 1
except Exception as ex:
LOG.exception("Error: %s" % ex)
print(ex)
return 1
finally:
if conn:
conn.close()
return 0
def update_dns_registry(conn, registry_central_ip,
registry_local_ip=None, to_release=None):
try:
delete_query = (
"DELETE FROM service_parameter "
"WHERE service='dns' AND section='host-record' "
"AND name IN ('registry.central', 'registry.local');"
)
db_execute(conn, delete_query)
created_at = wtypes.datetime.datetime
central_uuid = str(uuid.uuid4())
insert_central_query = (
"INSERT INTO service_parameter "
"(uuid, service, section, name, value, personality, "
"resource, created_at) "
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s);"
)
central_values = (
central_uuid, 'dns', 'host-record', 'registry.central',
f"registry.central,{registry_central_ip}",
None, None, created_at.utcnow()
)
db_execute(conn, insert_central_query, central_values)
if registry_local_ip:
local_uuid = str(uuid.uuid4())
insert_local_query = (
"INSERT INTO service_parameter "
"(uuid, service, section, name, value, personality, "
"resource, created_at) "
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s);"
)
local_values = (
local_uuid, 'dns', 'host-record', 'registry.local',
f"registry.local,{registry_local_ip}",
None, None, created_at.utcnow()
)
db_execute(conn, insert_local_query, local_values)
LOG.info("DNS host records for registry inserted successfully.")
config_dir = f"/opt/platform/config/{to_release}"
config_file = os.path.join(config_dir, "dnsmasq.addn_conf")
os.makedirs(config_dir, exist_ok=True)
LOG.info("Created config directory: %s" % config_dir)
existing_lines = []
if os.path.exists(config_file):
with open(config_file, "r") as f:
existing_lines = f.readlines()
updated_lines = []
for line in existing_lines:
if not line.startswith("host-record=registry.central,") and \
not line.startswith("host-record=registry.local,"):
updated_lines.append(line.strip())
updated_lines.append(
f"host-record=registry.central,{registry_central_ip}"
)
if registry_local_ip:
updated_lines.append(
f"host-record=registry.local,{registry_local_ip}"
)
with open(config_file, "w") as f:
for line in updated_lines:
f.write(line + "\n")
LOG.info("Updated entry in %s: %s" % (config_file, line))
except Exception as e:
LOG.exception("Failed to update DNS records: %s" % e)
raise
def db_execute(conn, query, params=None):
try:
with conn.cursor() as cursor:
if params:
cursor.execute(query, params)
else:
cursor.execute(query)
conn.commit()
except Exception as e:
conn.rollback()
LOG.exception("Error executing query: %s" % e)
raise
def db_query(conn, query):
try:
with conn.cursor() as cursor:
cursor.execute(query)
result = cursor.fetchone()
return result[0] if result else None
except Exception as e:
LOG.exception("Error executing query: %s" % e)
raise
def check_virtual_system(client):
parameters = client.sysinv.service_parameter.list()
for parameter in parameters:
if (parameter.name ==
sysinv_constants.SERVICE_PARAM_NAME_PLAT_CONFIG_VIRTUAL):
return True
return False
def get_floating_sc_address_id(conn, virtual_system):
if virtual_system:
query = (
"SELECT floating_address_id FROM address_pools "
"WHERE name = 'system-controller-subnet';"
)
else:
query = (
"SELECT floating_address_id FROM address_pools "
"WHERE name = 'system-controller-oam-subnet';"
)
return db_query(conn, query)
def get_controller_mgmt_address(conn):
query = "SELECT address FROM addresses WHERE name = 'controller-mgmt';"
return db_query(conn, query)
def get_address_by_id(conn, floating_address_id):
query = (
"SELECT address FROM addresses WHERE id = %s;"
% floating_address_id
)
return db_query(conn, query)
def check_dns_resolution(ip_address):
if valid_ipv4(ip_address):
record_type = "A"
ip_type = "IPv4"
elif valid_ipv6(ip_address):
record_type = "AAAA"
ip_type = "IPv6"
else:
LOG.error("Invalid IP address: %s" % ip_address)
return False
LOG.info("Checking resolution to registry.central")
result = subprocess.run(
["dig", "registry.central", record_type, "+short"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
universal_newlines=True
)
if result.returncode != 0 or not result.stdout.strip():
LOG.error(
"Failed to resolve %s address %s to a name associated with "
"the domain (registry.central). No valid DNS record found." %
(ip_type, ip_address)
)
return False
return True
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,91 @@
#!/usr/bin/python
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
import logging
import psycopg2
import sys
from software.utilities.utils import configure_logging
DEFAULT_POSTGRES_PORT = 5432
LOG_FILE = "/var/log/software.log"
DB_NAME = "sysinv"
DB_USER = "postgres"
LOG = logging.getLogger('main_logger')
def main():
action = None
from_release = None
to_release = None
postgres_port = DEFAULT_POSTGRES_PORT
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
# optional port parameter for USM upgrade
postgres_port = sys.argv[arg]
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
res = 0
LOG.info("%s invoked from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action))
if action == "migrate" and from_release == "24.09":
LOG.info("Updating addresses table entries.")
try:
update_address_name_from_db(postgres_port)
except Exception as ex:
LOG.exception("Error: {}".format(ex))
print(ex)
res = 1
return res
def update_address_name_from_db(postgres_port):
query = """
UPDATE addresses
SET name = REGEXP_REPLACE(
name, '^system-controller-gateway-ip-', 'controller-gateway-')
WHERE name LIKE 'system-controller-gateway-ip-%';
"""
try:
with psycopg2.connect(
dbname=DB_NAME,
user=DB_USER,
port=postgres_port
) as conn:
with conn.cursor() as cursor:
cursor.execute(query)
rows_updated = cursor.rowcount
conn.commit()
if rows_updated:
LOG.info(
"Updated %d entries in addresses table.", rows_updated)
else:
LOG.info("No entries updated in addresses table.")
except Exception as e:
LOG.error(f"Failed to update IP addresses in the "
f"database: {e}")
raise
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,49 @@
#!/bin/bash
#
# Copyright (c) 2022-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# Sometimes docker will be in a bad state.
# Check for this and use some recovery logic to get it back to normal.
# Parameters for recovery logic
MAX_ATTEMPTS=5
TIME_STEP=6
# The migration scripts are passed these parameters:
NAME=$(basename $0)
FROM_RELEASE=$1
TO_RELEASE=$2
ACTION=$3
function log {
echo "$(date -Iseconds | cut -d'+' -f1): ${NAME}[$$]: INFO: $*" >> "/var/log/software.log" 2>&1
}
# Script start
if [[ "${ACTION}" != "activate" ]]; then
exit 0
fi
log "Starting docker health check script from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
# Docker is considered in a "bad state" if the service isn't active or
# if "/var/lib/docker/tmp" doesn't exist, as it won't be able to download images
attempts=0
while [ "$(systemctl is-active docker)" != "active" ] || [ ! -d "/var/lib/docker/tmp" ]; do
attempts=$(( $attempts + 1 ))
if [ "$attempts" -gt "$MAX_ATTEMPTS" ]; then
log "Could not fix docker service."
exit 0
fi
log "Docker in bad state. Restarting docker service. Attempt: $attempts/$MAX_ATTEMPTS"
systemctl restart docker
sleep $TIME_STEP
done
log "Docker service is active and healthy"
exit 0

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env python
# Copyright (c) 2021-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script will clear the host config target.
# This is required in order to ensure tracking is aligned with config
# requests in N+1 release and not due to potential stale configuration
# from N release.
import logging
import sys
from psycopg2.extras import RealDictCursor
from controllerconfig import utils
from controllerconfig.common import constants
from software.utilities.utils import configure_logging
LOG = logging.getLogger('main_logger')
def main():
action = None
from_release = None
to_release = None
postgres_port = constants.POSTGRESQL_DEFAULT_PORT
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg] # noqa
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
postgres_port = sys.argv[arg]
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
LOG.debug("%s invoked with from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action))
# This host table data migration will likely be required for each release
if action == "migrate":
try:
reset_config_target(postgres_port)
except Exception as ex:
LOG.exception(ex)
return 1
def reset_config_target(port):
conn = utils.connect_to_postgresql(port)
with conn:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("update i_host set config_target=NULL",)
LOG.info("Reset host config_target completed")
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,113 @@
#!/usr/bin/python
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script is responsible for updating the software_version
# in i_system table during the USM upgrade
import logging
import psycopg2
import sys
from software.utilities.utils import configure_logging
LOG = logging.getLogger('main_logger')
DEFAULT_POSTGRES_PORT = 5432
def main():
action = None
from_release = None
to_release = None # noqa
postgres_port = DEFAULT_POSTGRES_PORT
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg] # noqa
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
# optional port parameter for USM upgrade
postgres_port = sys.argv[arg]
pass
else:
print(f"Invalid option {sys.argv[arg]}.")
return 1
arg += 1
configure_logging()
if action in ["activate", "activate-rollback"]:
try:
# This username/password authentication is required in activate
# or rollback actions to connect to the database
# For migration, we don't need username/password and host. Peer
# authentication is available in the case of migration
username, password = get_db_credentials()
conn = psycopg2.connect("dbname=sysinv user=%s password=%s \
host=localhost port=%s"
% (username, password, postgres_port))
except Exception as e:
LOG.exception(f"Error connecting to database: {e}")
return 1
try:
LOG.info(f"Updating software_version from {from_release} \
to {to_release}\n")
update_isystem_software_version(conn, to_release)
except Exception as ex:
LOG.exception(ex)
return 1
finally:
conn.close()
return 0
def update_isystem_software_version(conn, new_sw_version):
"""
This function updates the software_version in isystem table
"""
update_isystem_software_version_query = \
f"UPDATE i_system SET software_version='{new_sw_version}';"
db_update(conn, update_isystem_software_version_query)
LOG.info(f"Updated software_version to {new_sw_version}")
def get_db_credentials():
import re
import configparser
configparser = configparser.ConfigParser()
configparser.read('/etc/sysinv/sysinv.conf')
conn_string = configparser['database']['connection']
match = re.match(r'postgresql\+psycopg2://([^:]+):([^@]+)@', conn_string)
if match:
username = match.group(1)
password = match.group(2)
return username, password
else:
raise Exception("Failed to get database credentials from sysinv.conf")
def db_query(conn, query):
result = []
with conn.cursor() as cur:
cur.execute(query)
for rec in cur:
result.append(rec)
return result
def db_update(conn, query):
with conn.cursor() as cur:
cur.execute(query)
conn.commit()
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script reconfigure the keystone endpoints using the sysinv
# version (not puppet).
# Needs to run at the end of the upgrade activation, to reduce the
# stabilization time after upgrade is concluded (less reconfigurations).
import logging
import socket
import sys
from time import sleep
from oslo_config import cfg
from oslo_context import context as mycontext
from six.moves import configparser
from sysinv.conductor import rpcapiproxy as conductor_rpcapi
from software.utilities.utils import configure_logging
LOG = logging.getLogger('main_logger')
CONF = cfg.CONF
SYSINV_CONFIG_FILE = '/etc/sysinv/sysinv.conf'
def get_conductor_rpc_bind_ip():
ini_str = '[DEFAULT]\n' + open(SYSINV_CONFIG_FILE, 'r').read()
config_applied = configparser.RawConfigParser()
config_applied.read_string(ini_str)
conductor_bind_ip = None
if config_applied.has_option('DEFAULT', 'rpc_zeromq_conductor_bind_ip'):
conductor_bind_ip = \
config_applied.get('DEFAULT', 'rpc_zeromq_conductor_bind_ip')
return conductor_bind_ip
def main():
action = None
from_release = None
to_release = None
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
# optional port parameter for USM upgrade
# port = sys.argv[arg]
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
# Activate
if action == 'activate':
LOG.info("%s invoked with from_release = %s to_release = %s "
"action = %s"
% (sys.argv[0], from_release, to_release, action))
# Options of bind ip to the rpc call
rpc_ip_options = [get_conductor_rpc_bind_ip(), 'controller.internal']
while None in rpc_ip_options:
rpc_ip_options.remove(None)
for index, ip in enumerate(rpc_ip_options):
try:
CONF.rpc_zeromq_conductor_bind_ip = ip
context = mycontext.get_admin_context()
rpcapi = conductor_rpcapi.ConductorAPI(
topic=conductor_rpcapi.MANAGER_TOPIC)
host = rpcapi.get_ihost_by_hostname(
context, socket.gethostname())
LOG.info("Call Conductor to reconfigure keystone endpoints. "
"Bind ip: %s." % CONF.rpc_zeromq_conductor_bind_ip)
rpcapi.reconfigure_service_endpoints(context, host)
except Exception as e:
if index == (len(rpc_ip_options) - 1):
LOG.error("Error configuring keystone endpoints. "
"Please verify logs.")
return 1
else:
LOG.exception(e)
LOG.error("Exception ocurred during script execution, "
"retrying after 5 seconds.")
sleep(5)
else:
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,82 @@
#!/usr/bin/python
# Copyright (c) 2022-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script install fluxcd controllers in the fluxcd-helm namespace
# in kubernetes
#
# This script can be removed in the release that follows stx7
import logging
import subprocess
import sys
from sysinv.common import exception
from sysinv.common.retrying import retry
from sysinv.common.kubernetes import test_k8s_health
from software.utilities.utils import configure_logging
LOG = logging.getLogger('main_logger')
def main():
action = None
from_release = None
to_release = None
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
# postgres_port = sys.argv[arg]
pass
else:
print("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
configure_logging()
if action == 'activate' and from_release >= '21.12':
LOG.info("%s invoked with from_release = %s to_release = %s "
"action = %s"
% (sys.argv[0], from_release, to_release, action))
enable_fluxcd_controllers(from_release)
@retry(retry_on_exception=lambda x: isinstance(x, exception.SysinvException),
stop_max_attempt_number=3)
@test_k8s_health
def enable_fluxcd_controllers(from_release):
"""Run fluxcd_controllers ansible playbook to enable fluxcd controllers
"""
playbooks_root = '/usr/share/ansible/stx-ansible/playbooks'
upgrade_script = 'upgrade-fluxcd-controllers.yml'
cmd = 'ansible-playbook {}/{} -e "upgrade_activate_from_release={}"'\
''.format(playbooks_root, upgrade_script, from_release)
try:
sub = subprocess.Popen(cmd, shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = sub.communicate()
if sub.returncode != 0:
LOG.error('Command failed:\n %s\n. %s\n%s' % (
cmd, stdout.decode('utf-8'), stderr.decode('utf-8')))
raise Exception('Cannot install fluxcd controllers')
LOG.info('FluxCD controllers enabled. Output: %s' %
stdout.decode('utf-8'))
except Exception as e:
raise exception.SysinvException(
f"Error trying to enable fluxcd controllers via {cmd}, reason: {e}"
)
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,72 @@
#!/bin/bash
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
NAME=$(basename "$0")
FROM_RELEASE=$1
TO_RELEASE=$2
ACTION=$3
SYSCTL_FILE="/etc/sysctl.conf"
LOG_FILE="/var/log/software.log"
function log {
echo "$(date -Iseconds | cut -d'+' -f1): ${NAME}[$$]: INFO: $*" >> "$LOG_FILE" 2>&1
}
if [[ "${ACTION}" == "activate" ]]; then
log "Ensure CIS Benchmark Standards are met from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
# Ensure config is set correctly
grep -q "^net.ipv4.conf.default.rp_filter" "$SYSCTL_FILE" && \
sed -i "s/^net.ipv4.conf.default.rp_filter.*/net.ipv4.conf.default.rp_filter=1/" "$SYSCTL_FILE" || \
echo "net.ipv4.conf.default.rp_filter=1" >> "$SYSCTL_FILE"
grep -q "^net.ipv4.conf.all.rp_filter" "$SYSCTL_FILE" && \
sed -i "s/^net.ipv4.conf.all.rp_filter.*/net.ipv4.conf.all.rp_filter=1/" "$SYSCTL_FILE" || \
echo "net.ipv4.conf.all.rp_filter=1" >> "$SYSCTL_FILE"
grep -q "net.ipv4.tcp_syncookies" "$SYSCTL_FILE" && \
sed -i "s/^#*\s*net.ipv4.tcp_syncookies.*/net.ipv4.tcp_syncookies=1/" "$SYSCTL_FILE" || \
echo "net.ipv4.tcp_syncookies=1" >> "$SYSCTL_FILE"
grep -q "net.ipv4.icmp_echo_ignore_broadcasts" "$SYSCTL_FILE" && \
sed -i "s/^#*\s*net.ipv4.icmp_echo_ignore_broadcasts.*/net.ipv4.icmp_echo_ignore_broadcasts=1/" "$SYSCTL_FILE" || \
echo "net.ipv4.icmp_echo_ignore_broadcasts=1" >> "$SYSCTL_FILE"
grep -q "net.ipv4.conf.all.accept_source_route" "$SYSCTL_FILE" && \
sed -i "s/^#*\s*net.ipv4.conf.all.accept_source_route.*/net.ipv4.conf.all.accept_source_route=0/" "$SYSCTL_FILE" || \
echo "net.ipv4.conf.all.accept_source_route=0" >> "$SYSCTL_FILE"
# Apply changes
sysctl --system &>/dev/null
log "Applied CIS Benchmark required config"
elif [[ "${ACTION}" == "activate-rollback" ]]; then
log "Rolling back CIS Benchmark changes from release $FROM_RELEASE to $TO_RELEASE"
# Reverse config
grep -q "^net.ipv4.conf.default.rp_filter" "$SYSCTL_FILE" && \
sed -i "s/^net.ipv4.conf.default.rp_filter.*/net.ipv4.conf.default.rp_filter=0/" "$SYSCTL_FILE"
grep -q "^net.ipv4.conf.all.rp_filter" "$SYSCTL_FILE" && \
sed -i "s/^net.ipv4.conf.all.rp_filter.*/net.ipv4.conf.all.rp_filter=0/" "$SYSCTL_FILE"
grep -q "^net.ipv4.tcp_syncookies" "$SYSCTL_FILE" && \
sed -i "s/^net.ipv4.tcp_syncookies.*/#net.ipv4.tcp_syncookies=1/" "$SYSCTL_FILE"
grep -q "^net.ipv4.icmp_echo_ignore_broadcasts" "$SYSCTL_FILE" && \
sed -i "s/^net.ipv4.icmp_echo_ignore_broadcasts.*/#net.ipv4.icmp_echo_ignore_broadcasts=1/" "$SYSCTL_FILE"
grep -q "^net.ipv4.conf.all.accept_source_route" "$SYSCTL_FILE" && \
sed -i "s/^net.ipv4.conf.all.accept_source_route.*/#net.ipv4.conf.all.accept_source_route=0/" "$SYSCTL_FILE"
# Apply changes
sysctl --system &>/dev/null
log "Rollback applied: Restored previous values"
else
exit 0
fi

View File

@@ -0,0 +1,55 @@
#!/bin/bash
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This migration script is used to create keystone roles
# operator and configurator during upgrade, also deletes
# roles when the rollback is executed
#
# The migration scripts are passed these parameters:
NAME=$(basename $0)
FROM_RELEASE=$1
TO_RELEASE=$2
ACTION=$3
ROLES=("operator" "configurator")
function log {
echo "$(date -Iseconds | cut -d'+' -f1): ${NAME}[$$]: INFO: $*" >> "/var/log/software.log" 2>&1
}
# Only run this script during upgrade-activate and from release 24.09
if [[ "$ACTION" == "activate" && "$FROM_RELEASE" == "24.09" ]]; then
log "creating keystone roles operator,configurator"
for role in "${ROLES[@]}"; do
openstack role show $role
RC=$?
if [ ${RC} == 1 ]; then
openstack role create $role
RC=$?
if [ ${RC} == 0 ]; then
log "Successfully added keystone role ${role}"
else
log "Failed to add keystone role ${role}"
exit 1
fi
fi
done
elif [[ "$ACTION" == "activate-rollback" && "$TO_RELEASE" == "24.09" ]]; then
for role in "${ROLES[@]}"; do
openstack role show $role
RC=$?
if [ ${RC} == 0 ]; then
openstack role delete $role
RC=$?
if [ ${RC} == 0 ]; then
log "Successfully deleted keystone role ${role}"
else
log "Failed to delete keystone role ${role}"
exit 1
fi
fi
done
fi

View File

@@ -0,0 +1,541 @@
#!/bin/bash
#
# Copyright (c) 2020-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This migration script is used for upgrading platform applications during the
# activate stage of a platform upgrade. It will:
# - Ignore any new applications that are installed in the To-Release and rely on
# any platform-managed application logic to upload/apply it after the upgrade
# has completed.
# - Attempt to delete and upload any apps that were in the uploaded state in the
# From-Release if the version has changed in the To-Release
# - Attempt to update any app that was in the applied state in the From-Release
# if the version has changed in the To-Release
NAME=$(basename $0)
# The migration scripts are passed these parameters:
FROM_RELEASE=$1
TO_RELEASE=$2
ACTION=$3
if (( $# != 3 && $# != 4 )); then
>&2 echo "Error: Missing Arguments!"
>&2 echo "Usage: 65-k8s-app-upgrade.sh FROM_RELEASE TO_RELEASE ACTION"
>&2 echo "Exiting for manual intervention..."
exit 1
fi
UPGRADE_IN_PROGRESS_APPS_FILE='/etc/platform/.upgrade_in_progress_apps'
TIMEOUT=600
KUBE_SYSTEM_NAMESPACE="kube-system"
CERT_MANAGER_NAMESPACE="cert-manager"
RECOVER_RESULT_SLEEP=30
RECOVER_RESULT_ATTEMPTS=30 # ~15 min to recover app
DELETE_RESULT_SLEEP=10
DELETE_RESULT_ATTEMPTS=6 # ~1 min to delete app
UPLOAD_RESULT_SLEEP=10
UPLOAD_RESULT_ATTEMPTS=24 # ~4 min to upload app
UPDATE_RESULT_SLEEP=30
UPDATE_RESULT_ATTEMPTS=30 # ~15 min to update app
COMMAND_RETRY_SLEEP=30
COMMAND_RETRY_ATTEMPTS=10 # ~5 min to wait on a retried command.
SOFTWARE_LOG_PATH='/var/log/software.log'
CRITICAL_APPS='nginx-ingress-controller cert-manager platform-integ-apps'
APPS_NOT_TO_UPDATE=''
TEST_CERT_CM="
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
creationTimestamp: null
name: system-local-ca
spec:
ca:
secretName: system-local-ca
status: {}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
creationTimestamp: null
name: stx-test-cm
namespace: cert-manager
spec:
commonName: stx-test-cm
issuerRef:
kind: ClusterIssuer
name: system-local-ca
secretName: stx-test-cm
status: {}
"
function log {
echo "$(date -Iseconds | cut -d'+' -f1): ${NAME}[$$]: INFO: $*" >> "$SOFTWARE_LOG_PATH" 2>&1
}
function get_api_token {
curl -v -X POST "${1}/auth/tokens" \
--header 'Content-Type: application/json' \
--data '{
"auth": {
"identity": {
"methods": [
"password"
],
"password": {
"user": {
"domain": {
"name": "Default"
},
"name": "'${2}'",
"password": "'${3}'"
}
}
},
"scope": {
"project": {
"domain": {
"name": "Default"
},
"name": "admin"
}
}
}
}' 2>&1 | sed -n 's/.*[t|T]oken: \(.*\)/\1/p'
}
function verify_apps_are_not_recovering {
# Scrape app names. Skip header and footer.
APPS=$(system application-list --nowrap | head -n-1 | tail -n+4 | awk '{print $2}')
for a in ${APPS}; do
log "Checking application ${a} current state..."
# If app is being upgraded then ignore
if [[ -f $UPGRADE_IN_PROGRESS_APPS_FILE ]] && grep -q $a $UPGRADE_IN_PROGRESS_APPS_FILE; then
log "${a} is being upgraded."
continue
fi
APP_STATUS=$(system application-show $a --column status --format value)
if [[ "${APP_STATUS}" =~ ^(applying|restore-requested)$ ]]; then
if [ ${system_type} == 'All-in-one' ] && [ ${system_mode} == 'simplex' ]; then
log "$a is in a recovering state: ${APP_STATUS}. Waiting for all applications to be uploaded or applied."
return 1
else
log "$a is in an unexpected state: ${APP_STATUS}. Exiting for manual intervention..."
fi
exit 1
fi
done
return 0
}
function retry_command {
# This command attempts to retry the command provided and waits to see if it
# executed sucessfully or failed.
COMMAND=$1
APPLICATION_NAME=$2
if (( $# != 2 )); then
>&2 echo "Error: Missing Arguments!"
>&2 echo "Usage: retry_command COMMAND APPLICATION_NAME"
>&2 echo "Exiting for manual intervention..."
exit 1
fi
log "Retrying command: ${COMMAND}"
system ${COMMAND} ${APPLICATION_NAME}
# Do an initial sleep before first status check attempt
sleep $COMMAND_RETRY_SLEEP
for tries in $(seq 1 $COMMAND_RETRY_ATTEMPTS); do
APP_STATUS=$(system application-show ${APPLICATION_NAME} --column status --format value)
if [[ "${APP_STATUS}" =~ ^(uploaded|applied|removed)$ ]]; then
# This is if the command succeeded, break here.
log "${APPLICATION_NAME} status is: ${APP_STATUS}. Done!"
break
elif [[ "${APP_STATUS}" =~ ^(upload-failed|apply-failed|remove-failed)$ ]]; then
# The command was retried, but resulted in another failure. Nothing more to be done,
# so exit.
log "${APPLICATION_NAME} status is: ${APP_STATUS}. The retry has failed. Exiting for manual intervention..."
exit 1
elif [ $tries == $COMMAND_RETRY_ATTEMPTS ]; then
log "Exceeded maximum application ${COMMAND} time of $(date -u -d @"$((COMMAND_RETRY_ATTEMPTS*COMMAND_RETRY_SLEEP))" +"%Mm%Ss"). Execute upgrade-activate again when all applications are uploaded or applied."
exit 1
fi
log "${APPLICATION_NAME} status is: ${APP_STATUS}. Will check again in ${COMMAND_RETRY_SLEEP} seconds."
sleep $COMMAND_RETRY_SLEEP
done
log "Retrying command: ${COMMAND} - Succeeded!"
return 0
}
# Check kubernetes health status.
# Exit with status 1 if sysinv-k8s-health command fails
function check_k8s_health {
local k8s_health
sysinv-k8s-health --log-file "${SOFTWARE_LOG_PATH}" check
k8s_health=$?
if [ $k8s_health -eq 1 ]; then
exit 1
fi
}
# As per cert-manager docs, the webhook server can take some time to come up.
# We can ensure the sanity by issuing a certificate as test.
function check_cert_manager {
log "Issue test certificate to assert cert-manager readyness."
RETRIES=60
check_k8s_health
kubectl delete certificate -n cert-manager stx-test-cm --kubeconfig=/etc/kubernetes/admin.conf --ignore-not-found
apply_failed=1
secret_failed=1
for retry in $( seq 1 ${RETRIES} ); do
if [ ${apply_failed} -ne 0 ]; then
log "Apply test certificate CRD..."
kubectl apply -f <(echo "$TEST_CERT_CM") --kubeconfig=/etc/kubernetes/admin.conf
if [ $? -ne 0 ]; then
log "Error applying certificate CRD. Retrying."
sleep 3
continue
fi
apply_failed=0
fi
log "Waiting cert-manager to issue the certificate..."
sleep 3
kubectl get secret -n cert-manager stx-test-cm --kubeconfig=/etc/kubernetes/admin.conf
if [ $? -eq 0 ]; then
log "cert-manager is ready to issue certificates."
secret_failed=0
break
fi
done
check_k8s_health
kubectl delete certificate -n cert-manager stx-test-cm --kubeconfig=/etc/kubernetes/admin.conf
if [ ${secret_failed} -ne 0 ]; then
log "Cert-manager is not ready after the allotted time. Check the pod logs."
exit 1
fi
}
function check_pod_readiness {
# Check the status of nginx-ingress-controller and cert-manager pods
# Wait for the Nginx Ingress Controller pods to be ready in the background
check_k8s_health
log "Waiting for Nginx Ingress Controller Pod Status ..."
kubectl --kubeconfig=/etc/kubernetes/admin.conf wait --for=condition=ready pod --all=true -n $KUBE_SYSTEM_NAMESPACE -lapp.kubernetes.io/name=ingress-nginx --timeout=${TIMEOUT}s
RESULT1=$?
# Wait for the Cert Manager pods to be ready in the background
check_k8s_health
log "Waiting for Cert-manager Pod Status ..."
kubectl --kubeconfig=/etc/kubernetes/admin.conf wait --for=condition=ready pod --all=true -n $CERT_MANAGER_NAMESPACE -lapp=cert-manager --timeout=${TIMEOUT}s
RESULT2=$?
# Check the results and provide specific message
if [ $RESULT1 -eq 0 ] && [ $RESULT2 -eq 0 ]; then
log "All required pods for Ingress Nginx Controller and Cert Manager are ready."
check_cert_manager
elif [ $RESULT1 -ne 0 ] && [ $RESULT2 -eq 0 ]; then
log "ERROR: Ingress NGINX pods did not become ready within the timeout period."
exit 1
elif [ $RESULT1 -eq 0 ] && [ $RESULT2 -ne 0 ]; then
log "ERROR: Cert Manager pods did not become ready within the timeout period."
exit 1
else
log "ERROR: Both Ingress Nginx Ingress Controller and Cert Manager pods did not become ready within the timeout period."
exit 1
fi
}
function update_in_series {
log "App ${EXISTING_APP_NAME} needs to be updated serially"
# Wait on the upload, should be quick
UPDATED=false
for tries in $(seq 1 $UPDATE_RESULT_ATTEMPTS); do
UPDATING_APP_INFO=$(system application-show $UPGRADE_APP_NAME --column name --column app_version --column status --format yaml)
UPDATING_APP_NAME=$(echo ${UPDATING_APP_INFO} | sed 's/.*name:[[:space:]]\(\S*\).*/\1/')
UPDATING_APP_VERSION=$(echo ${UPDATING_APP_INFO} | sed 's/.*app_version:[[:space:]]\(\S*\).*/\1/')
UPDATING_APP_STATUS=$(echo ${UPDATING_APP_INFO} | sed 's/.*status:[[:space:]]\(\S*\).*/\1/')
if [ "${UPDATING_APP_VERSION}" == "${UPGRADE_APP_VERSION}" ] && \
[ "${UPDATING_APP_STATUS}" == "applied" ]; then
ALARMS=$(fm alarm-list --nowrap --uuid --query "alarm_id=750.005;entity_type_id=k8s_application;entity_instance_id=${UPGRADE_APP_NAME}" | head -n-1 | tail -n+4 | awk '{print $2}')
for alarm in ${ALARMS}; do
log "$NAME: [Warning] A stale 750.005 Application Update In Progress alarm was found for ${UPGRADE_APP_NAME}. Clearing it (UUID: ${alarm})."
fm alarm-delete $alarm --yes
done
log "$NAME: ${UPGRADE_APP_NAME} has been updated to version ${UPGRADE_APP_VERSION} from version ${EXISTING_APP_VERSION}"
UPDATED=true
break
fi
sleep $UPDATE_RESULT_SLEEP
done
if [ $UPDATED == false ] && [ $tries == $UPDATE_RESULT_ATTEMPTS ]; then
log "$NAME: ${UPGRADE_APP_NAME}, version ${UPGRADE_APP_VERSION}, was not updated in the alloted time. Exiting for manual intervention..."
exit 1
fi
if [ $tries != $UPDATE_RESULT_ATTEMPTS ] && [ "${UPDATING_APP_VERSION}" == "${EXISTING_APP_VERSION}" ] ; then
log "$NAME: ${UPGRADE_APP_NAME}, version ${UPGRADE_APP_VERSION}, update failed and was rolled back. Exiting for manual intervention..."
exit 1
fi
}
function update_apps {
PATHS_TO_TARBALLS=$1
IS_SERIAL_INSTALLATION=$2
LAST_APP_CHECKED=""
# Get the list of applications installed in the new release
for fqpn_app in $PATHS_TO_TARBALLS; do
# Extract the app name and version from the tarball name: app_name-version.tgz
re='^(.*)-([0-9]+\.[0-9]+-[0-9]+).tgz'
[[ "$(basename $fqpn_app)" =~ $re ]]
UPGRADE_APP_NAME=${BASH_REMATCH[1]}
UPGRADE_APP_VERSION=${BASH_REMATCH[2]}
log "Found application ${UPGRADE_APP_NAME}, version ${UPGRADE_APP_VERSION} at $fqpn_app"
# Confirm application is loaded.
EXISTING_APP_NAME=$(system application-show $UPGRADE_APP_NAME --column name --format value)
if [ -z "${EXISTING_APP_NAME}" ]; then
log "${UPGRADE_APP_NAME} is currently not uploaded in the system. skipping..."
continue
fi
# Check if the app name is in the list of apps that should not be updated.
if [[ " $APPS_NOT_TO_UPDATE " == *" $UPGRADE_APP_NAME "* ]]; then
log "${UPGRADE_APP_NAME} is listed as an app that should not be updated. skipping..."
continue
fi
# If the last iteration for the same app was sucessful no further updates are necessary
if [ "${LAST_APP_CHECKED}" == "${UPGRADE_APP_NAME}" ] && [[ "${EXISTING_APP_STATUS}" =~ ^(uploaded|applied)$ ]]; then
continue
fi
# Get the existing application details
EXISTING_APP_INFO=$(system application-show $EXISTING_APP_NAME --column app_version --column status --format yaml)
EXISTING_APP_VERSION=$(echo ${EXISTING_APP_INFO} | sed 's/.*app_version:[[:space:]]\(\S*\).*/\1/')
EXISTING_APP_STATUS=$(echo ${EXISTING_APP_INFO} | sed 's/.*status:[[:space:]]\(\S*\).*/\1/')
log "$EXISTING_APP_NAME, version $EXISTING_APP_VERSION, is currently in the state: $EXISTING_APP_STATUS"
if [ "x${UPGRADE_APP_VERSION}" == "x${EXISTING_APP_VERSION}" ]; then
# If the app is in uploaded or applied state, then we continue with next iteration.
# Else, the code execution proceeds and the script would exit with an unexpected state.
if [[ "${EXISTING_APP_STATUS}" =~ ^(uploaded|applied)$ ]]; then
log "${UPGRADE_APP_NAME}, version ${EXISTING_APP_VERSION}, is already present. Skipping..."
continue
fi
fi
# All applications should be in an 'applied' or 'uploaded' state. Any other state is unexpected
case "${EXISTING_APP_STATUS}" in
# States that are upgradable
uploaded)
check_k8s_health
log "Deleting ${EXISTING_APP_NAME}, version ${EXISTING_APP_VERSION}"
system application-delete ${EXISTING_APP_NAME} --yes
# Wait on the delete, should be quick
for tries in $(seq 1 $DELETE_RESULT_ATTEMPTS); do
EXISTING_APP_STATUS=$(system application-show $EXISTING_APP_NAME --column status --format value)
if [ -z "${EXISTING_APP_STATUS}" ]; then
log "${EXISTING_APP_NAME} has been deleted."
break
fi
sleep $DELETE_RESULT_SLEEP
done
if [ $tries == $DELETE_RESULT_ATTEMPTS ]; then
log "${EXISTING_APP_NAME}, version ${EXISTING_APP_VERSION}, was not deleted in the alloted time. Exiting for manual intervention..."
exit 1
fi
check_k8s_health
log "Uploading ${UPGRADE_APP_NAME}, version ${UPGRADE_APP_VERSION} from $fqpn_app"
system application-upload $fqpn_app
;;
applied)
check_k8s_health
log "Updating ${EXISTING_APP_NAME}, from version ${EXISTING_APP_VERSION} to version ${UPGRADE_APP_VERSION} from $fqpn_app"
system application-update $fqpn_app
if [ "$IS_SERIAL_INSTALLATION" == "true" ]; then
update_in_series
fi
;;
upload-failed)
check_k8s_health
log "${EXISTING_APP_NAME}, version ${EXISTING_APP_VERSION}, upload failed: ${EXISTING_APP_STATUS}. Retrying command..."
retry_command "application-upload" "${EXISTING_APP_NAME}"
;;
apply-failed)
check_k8s_health
log "${EXISTING_APP_NAME}, version ${EXISTING_APP_VERSION}, apply failed: ${EXISTING_APP_STATUS}. Retrying command..."
retry_command "application-apply" "${EXISTING_APP_NAME}"
;;
remove-failed)
check_k8s_health
log "${EXISTING_APP_NAME}, version ${EXISTING_APP_VERSION}, remove failed: ${EXISTING_APP_STATUS}. Retrying command..."
retry_command "application-remove --yes" "${EXISTING_APP_NAME}"
;;
# States that are unexpected
uploading | applying | removing | restore-requested | updating | recovering)
log "${EXISTING_APP_NAME}, version ${EXISTING_APP_VERSION}, is in an unexpected state: ${EXISTING_APP_STATUS}. Exiting for manual intervention..."
exit 1
;;
*)
log "${EXISTING_APP_NAME}, version ${EXISTING_APP_VERSION}, is in an unknown state: ${EXISTING_APP_STATUS}. Exiting for manual intervention..."
exit 1
;;
esac
# Include app in upgrade in progress file
if [[ ! -f $UPGRADE_IN_PROGRESS_APPS_FILE ]] || ! grep -q "${EXISTING_APP_NAME},${EXISTING_APP_VERSION},${UPGRADE_APP_VERSION}" $UPGRADE_IN_PROGRESS_APPS_FILE; then
echo "${EXISTING_APP_NAME},${EXISTING_APP_VERSION},${UPGRADE_APP_VERSION}" >> $UPGRADE_IN_PROGRESS_APPS_FILE
log "Added ${EXISTING_APP_NAME} to upgrade in progress control file."
fi
LAST_APP_CHECKED=${UPGRADE_APP_NAME}
done
}
log "Starting Kubernetes application updates from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
if [ "$ACTION" == "activate" ]; then
# remove upgrade in progress file
[[ -f $UPGRADE_IN_PROGRESS_APPS_FILE ]] && rm -f $UPGRADE_IN_PROGRESS_APPS_FILE
# move the costly source command in the if branch, so only execute when needed.
source /etc/platform/platform.conf
for tries in $(seq 1 $RECOVER_RESULT_ATTEMPTS); do
if verify_apps_are_not_recovering; then
break
elif [ $tries == $RECOVER_RESULT_ATTEMPTS ]; then
log "Exceeded maximum application recovery time of $(date -u -d @"$((RECOVER_RESULT_ATTEMPTS*RECOVER_RESULT_SLEEP))" +"%Mm%Ss"). Execute upgrade-activate again when all applications are uploaded or applied."
exit 1
fi
sleep $RECOVER_RESULT_SLEEP
done
# Get the current k8s version
check_k8s_health
K8S_VERSIONS=$(system kube-version-list)
ACTIVE_K8S_VERSION=$(echo "$K8S_VERSIONS" | grep ' True ' | grep ' active ' | awk -F '|' '{print $2}' | tr -d ' ')
# Get token
TOKEN=$(get_api_token "${OS_AUTH_URL}" "${OS_USERNAME}" "${OS_PASSWORD}")
# Get list of apps that need to be installed serially due to application dependencies.
REORDER_APPS_ENDPOINT="http://controller:6385/v1/reorder_apps/"
RESPONSE=$(curl --silent --retry $COMMAND_RETRY_ATTEMPTS --retry-delay $COMMAND_RETRY_SLEEP --write-out "HTTPSTATUS:%{http_code}" -X GET "$REORDER_APPS_ENDPOINT" -H "X-Auth-Token: ${TOKEN}")
# Capture the HTTP status code
HTTP_STATUS=$(echo "$RESPONSE" | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
if [[ "$HTTP_STATUS" -ne 200 ]]; then
log "Unable to get order of apps. Received HTTP status code $HTTP_STATUS."
exit 1
fi
ALL_SYSTEM_SERIAL_APPLICATION=$(echo "$RESPONSE" | sed -e 's/HTTPSTATUS:.*//')
# Get compatibles tarballs path with current k8s version
QUERY_COMPATIBLE_APPS_ENDPOINT="http://controller:6385/v1/query_compatible_apps/?k8s_ver=${ACTIVE_K8S_VERSION}&include_path=true"
RESPONSE=$(curl --silent --retry $COMMAND_RETRY_ATTEMPTS --retry-delay $COMMAND_RETRY_SLEEP --write-out "HTTPSTATUS:%{http_code}" -X GET "$QUERY_COMPATIBLE_APPS_ENDPOINT" -H "X-Auth-Token: ${TOKEN}")
# Capture the HTTP status code
HTTP_STATUS=$(echo "$RESPONSE" | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
if [[ "$HTTP_STATUS" -ne 200 ]]; then
log "Unable to obtain compatible app list. Received HTTP status code $HTTP_STATUS."
exit 1
fi
PATHS_TO_COMPATIBLE_TARBALLS=$(echo "$RESPONSE" | sed -e 's/HTTPSTATUS:.*//')
# Format values
ALL_SYSTEM_SERIAL_APPLICATION=$(echo $ALL_SYSTEM_SERIAL_APPLICATION | sed 's/\[//;s/\]//;s/", "/\n/g;s/"//g')
PATHS_TO_COMPATIBLE_TARBALLS=$(echo $PATHS_TO_COMPATIBLE_TARBALLS | sed 's/\[//;s/\]//;s/", "/\n/g;s/"//g')
CRITICAL_APPS_PATHS=""
# From the list of PATHS_TO_COMPATIBLE_TARBALLS, apps that have priority for installation by the platform are separated.
for app in $CRITICAL_APPS; do
# Get the first matching path for the app
matched_path=$(echo "$PATHS_TO_COMPATIBLE_TARBALLS" | grep -m 1 "/$app-")
# Add the matched path to MATCHED_PATHS if found
if [ -n "$matched_path" ]; then
CRITICAL_APPS_PATHS+="$matched_path "
# Remove the matched path from PATHS_TO_COMPATIBLE_TARBALLS
PATHS_TO_COMPATIBLE_TARBALLS=$(echo "$PATHS_TO_COMPATIBLE_TARBALLS" | grep -v "$matched_path")
fi
done
APPS_IN_SERIAL_PATH=''
APPS_IN_PARALLEL_PATHS=''
# Find matches between ALL_SYSTEM_SERIAL_APPLICATION and PATHS_TO_COMPATIBLE_TARBALLS and save
# to APPS_IN_SERIAL_PATH
for app in $ALL_SYSTEM_SERIAL_APPLICATION; do
# Find the corresponding path in PATHS_TO_COMPATIBLE_TARBALLS
matched_path=$(echo "$PATHS_TO_COMPATIBLE_TARBALLS" | grep -m 1 "/$app-")
# If a match is found, append it to APPS_IN_SERIAL_PATH
if [ -n "$matched_path" ]; then
APPS_IN_SERIAL_PATH="${APPS_IN_SERIAL_PATH}${matched_path} "
fi
done
# Find unmatched paths between ALL_SYSTEM_SERIAL_APPLICATION and PATHS_TO_COMPATIBLE_TARBALLS
# and save to APPS_IN_PARALLEL_PATHS
for path in $PATHS_TO_COMPATIBLE_TARBALLS; do
if ! echo -e "$APPS_IN_SERIAL_PATH" | grep -q "$path"; then
APPS_IN_PARALLEL_PATHS="${APPS_IN_PARALLEL_PATHS}${path} "
fi
done
update_apps "$CRITICAL_APPS_PATHS" "true"
check_pod_readiness
update_apps "$APPS_IN_PARALLEL_PATHS" "false"
update_apps "$APPS_IN_SERIAL_PATH" "true"
log "Completed Kubernetes application updates for release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
else
log "No actions required for from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
fi
exit 0

View File

@@ -0,0 +1,105 @@
#!/bin/bash
#
# Copyright (c) 2023-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This migration script is used to wait for apps that were upgraded by
# previous application upgrade scripts on the chain. It will:
# - Wait for upgraded applications to be either 'applied' or 'uploaded'
# with the new version, these applications must be stored earlier during
# upgrade-activate process in a file inside /etc/platform/
NAME=$(basename $0)
# The migration scripts are passed these parameters:
FROM_RELEASE=$1
TO_RELEASE=$2
ACTION=$3
CONFIG_PERMDIR="/opt/platform/config/${TO_RELEASE}"
UPGRADE_IN_PROGRESS_APPS_FILE='/etc/platform/.upgrade_in_progress_apps'
UPDATE_RESULT_SLEEP=30
UPDATE_RESULT_ATTEMPTS=45 # ~22.5 min to allow updates to complete.
function log {
echo "$(date -Iseconds | cut -d'+' -f1): ${NAME}[$$]: INFO: $*" >> "/var/log/software.log" 2>&1
}
log "Starting application upgrade watcher script from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
if [ "$ACTION" == "activate" ]; then
# move the costly source command in the if branch, so only execute when needed.
source /etc/platform/platform.conf
if [ ! -f $UPGRADE_IN_PROGRESS_APPS_FILE ]; then
log "No file with application upgrade in progress found, skipping script."
exit 0
fi
# Loop over upgraded apps and wait them to become 'applied' or 'uploaded' with the new version
APPS_LIST=$(cat $UPGRADE_IN_PROGRESS_APPS_FILE)
for tries in $(seq 1 $UPDATE_RESULT_ATTEMPTS); do
log "Checking applications status... Retry ${tries} of ${UPDATE_RESULT_ATTEMPTS}"
ALL_UPGRADED="true"
UPGRADE_IN_PROGRESS_APPS_LIST=""
for app in $APPS_LIST; do
re='[[:space:]]*(\S*),(\S*),(\S*)[[:space:]]*'
[[ $app =~ $re ]]
UPGRADE_APP_NAME=${BASH_REMATCH[1]}
EXISTING_APP_VERSION=${BASH_REMATCH[2]}
UPGRADE_APP_VERSION=${BASH_REMATCH[3]}
UPDATING_APP_INFO=$(system application-show $UPGRADE_APP_NAME --column name --column app_version --column status --format yaml)
UPDATING_APP_NAME=$(echo ${UPDATING_APP_INFO} | sed 's/.*name:[[:space:]]\(\S*\).*/\1/')
UPDATING_APP_VERSION=$(echo ${UPDATING_APP_INFO} | sed 's/.*app_version:[[:space:]]\(\S*\).*/\1/')
UPDATING_APP_STATUS=$(echo ${UPDATING_APP_INFO} | sed 's/.*status:[[:space:]]\(\S*\).*/\1/')
if [ "${UPDATING_APP_NAME}" == "${UPGRADE_APP_NAME}" ] && \
[ "${UPDATING_APP_VERSION}" == "${UPGRADE_APP_VERSION}" ]; then
case "${UPDATING_APP_STATUS}" in
"applied"|"uploaded")
ALARMS=$(fm alarm-list --nowrap --uuid --query "alarm_id=750.005;entity_type_id=k8s_application;entity_instance_id=${UPGRADE_APP_NAME}" | head -n-1 | tail -n+4 | awk '{print $2}')
for alarm in ${ALARMS}; do
log "WARN: A stale 750.005 Application Update In Progress alarm was found for ${UPGRADE_APP_NAME}. Clearing it (UUID: ${alarm})."
fm alarm-delete $alarm --yes
done
log "${UPGRADE_APP_NAME} has been updated to version ${UPGRADE_APP_VERSION} from version ${EXISTING_APP_VERSION}"
;;
*)
log "${UPGRADE_APP_NAME} update in progress to version ${UPGRADE_APP_VERSION} from version ${EXISTING_APP_VERSION}"
UPGRADE_IN_PROGRESS_APPS_LIST="${app} ${UPGRADE_IN_PROGRESS_APPS_LIST}"
ALL_UPGRADED="false"
;;
esac
else
log "WARN: ${UPGRADE_APP_NAME} is on '${UPDATING_APP_STATUS}' state but the version is not updated to ${UPGRADE_APP_VERSION} from version ${EXISTING_APP_VERSION}"
UPGRADE_IN_PROGRESS_APPS_LIST="${app} ${UPGRADE_IN_PROGRESS_APPS_LIST}"
ALL_UPGRADED="false"
fi
done
# Exit loop if all applications are upgraded
[[ $ALL_UPGRADED == "true" ]] && break
# Next iteration will check only apps which upgrade is in progress
APPS_LIST=$UPGRADE_IN_PROGRESS_APPS_LIST
sleep $UPDATE_RESULT_SLEEP
done
if [ $tries == $UPDATE_RESULT_ATTEMPTS ]; then
log "One or more apps (${APPS_LIST// /, }) were not updated in the alloted time. Exiting for manual intervention..."
exit 1
fi
# remove upgrade in progress file
log "Removing temporary file: $UPGRADE_IN_PROGRESS_APPS_FILE"
[[ -f $UPGRADE_IN_PROGRESS_APPS_FILE ]] && rm -f $UPGRADE_IN_PROGRESS_APPS_FILE
log "Completed application upgrade watcher script from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
else
log "No actions required for from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
fi

View File

@@ -0,0 +1,155 @@
#!/bin/bash
#
# Copyright (c) 2022-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script is used to resize the platform (and backup, consequently) filesystems
# on System Controller DC, so that to allow an increased parallelism on subclouds
# deployment (100+ deployments in parallel). This script will:
# - Check if deployment is System Controller DC from distributed_cloud_role variable
# sourced from /etc/platform/platform.conf
# - Check if platform filesystem needs to be resized (i.e. if less than 20GB in size)
# and skip the execution if not
# - Check if there is enough space on cgts-vg to resize on both controllers
# - Resize backup filesystem on each controller and check if resized successfully
# - Resize platform controllerfs and check if resized successfully
# - NOTE: this script has to be idempotent and reentrant, since upgrade-activate can
# be called multiple times during the upgrade
# - NOTE: this script must not fail the upgrade if there is not enough disk space to
# resize, and only have to warn the user about the limitation
NAME=$(basename $0)
# The migration scripts are passed these parameters:
FROM_RELEASE=$1
TO_RELEASE=$2
ACTION=$3
EXPANDED_PLATFORM_SIZE=20
NODE_LIST=(controller-0 controller-1)
RESIZE_SLEEP_TIME=90
RESIZE_CHECK_MAX_RETRIES=5
source /etc/platform/platform.conf
function log {
echo "$(date -Iseconds | cut -d'+' -f1): ${NAME}[$$]: INFO: $*" >> "/var/log/software.log" 2>&1
}
function verify_fs_need_resizing {
_PLATFORM_SIZE=$(
system controllerfs-list --column name --column size --column state | grep platform | awk '{ print $4; }'
)
echo $_PLATFORM_SIZE # return value so that it can be assigned to variable
if [[ $_PLATFORM_SIZE -ge $EXPANDED_PLATFORM_SIZE ]]; then
return 1
fi
return 0
}
function verify_space_to_resize {
_PLATFORM_SIZE=$1
_HOSTNAME=$2
_AVAILABLE_DISK_SIZE=$(system host-lvg-list $_HOSTNAME | grep cgts-vg | awk '{ print $12; }')
_INCREASE_DISK_SIZE=$(echo "$EXPANDED_PLATFORM_SIZE - $_PLATFORM_SIZE" | bc)
_TOTAL_INCREASE_DISK_SIZE=$(echo "2 * $_INCREASE_DISK_SIZE" | bc) # need to resize platform and backup
log "[$_HOSTNAME] Available cgts-vg space: ${_AVAILABLE_DISK_SIZE}G, need ${_TOTAL_INCREASE_DISK_SIZE}G to resize."
echo $_INCREASE_DISK_SIZE # return value so that it can be assigned to variable
return $(echo "! $_AVAILABLE_DISK_SIZE >= $_TOTAL_INCREASE_DISK_SIZE" | bc)
}
function resize_backup_filesystem {
_INCREASE_DISK_SIZE=$1
_HOSTNAME=$2
_BACKUP_SIZE=$(system host-fs-list $_HOSTNAME | grep backup | awk '{ print $6; }')
_EXPANDED_BACKUP_SIZE=$(echo "$_BACKUP_SIZE + $_INCREASE_DISK_SIZE" | bc)
log "[$_HOSTNAME] Current backup size is ${_BACKUP_SIZE}G, new size will be ${_EXPANDED_BACKUP_SIZE}G."
system host-fs-modify $_HOSTNAME backup=$_EXPANDED_BACKUP_SIZE
sleep 5
_BACKUP_SIZE=$(system host-fs-list $_HOSTNAME | grep backup | awk '{ print $6; }')
return $(echo "! $_BACKUP_SIZE == $_EXPANDED_BACKUP_SIZE" | bc)
}
function resize_platform_controllerfs {
_PLATFORM_SIZE=$1
log "Current platform size is ${_PLATFORM_SIZE}G, new size will be ${EXPANDED_PLATFORM_SIZE}G."
system controllerfs-modify platform=$EXPANDED_PLATFORM_SIZE
for RETRY in $(seq $RESIZE_CHECK_MAX_RETRIES); do
log "Retry $RETRY of $RESIZE_CHECK_MAX_RETRIES, checking if platform filesystem is resized and available..."
OUTPUT=$(system controllerfs-list --column name --column size --column state | grep platform)
_CURRENT_PLATFORM_SIZE=$(echo $OUTPUT | awk '{ print $4; }')
_CURRENT_PLATFORM_STATE=$(echo $OUTPUT | awk '{ print $6; }')
log "Current platform fs size/state: ${_CURRENT_PLATFORM_SIZE}/${_CURRENT_PLATFORM_STATE}"
if [[ ($_CURRENT_PLATFORM_SIZE -eq $EXPANDED_PLATFORM_SIZE) && ($_CURRENT_PLATFORM_STATE == "available") ]]; then
return 0
fi
# if current size is less than the expanded size, retry the resize command
if [[ $_CURRENT_PLATFORM_SIZE -lt $EXPANDED_PLATFORM_SIZE ]]; then
log "Current platform size is less than ${EXPANDED_PLATFORM_SIZE}G, retrying resize command..."
system controllerfs-modify platform=$EXPANDED_PLATFORM_SIZE
fi
sleep $RESIZE_SLEEP_TIME
done
if [[ $_CURRENT_PLATFORM_SIZE -eq $EXPANDED_PLATFORM_SIZE ]]; then
log "[WARNING] platform fs is resized but not yet in available state."
return 0
fi
return 1
}
# Script start
log "Starting filesystems resize on DC System Controller for increased parallel subcloud deployment, from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
if [[ "$ACTION" == "activate" ]]; then
if [[ $distributed_cloud_role == "systemcontroller" ]]; then
log "Verifying if filesystems need resizing..."
if ! PLATFORM_SIZE=$(verify_fs_need_resizing); then
log "No need to resize, platform filesystem has been resized already."
exit 0
fi
log "Platform filesystem needs resizing, current size is ${PLATFORM_SIZE}G, ideal size is ${EXPANDED_PLATFORM_SIZE}G."
log "Verifying if there is enough available space to resize..."
for NODE in "${NODE_LIST[@]}"; do
if ! INCREASE_DISK_SIZE=$(verify_space_to_resize $PLATFORM_SIZE $NODE); then
log "Not enough space in cgts-vg on $NODE to resize, parallel subcloud deployment will be limited. Resize operations will be skipped."
exit 0
fi
done
log "LVG cgts-vg has enough space for resizing, continuing with resize operations..."
log "Trying to resize host-fs backup for both controllers..."
for NODE in "${NODE_LIST[@]}"; do
if ! resize_backup_filesystem $INCREASE_DISK_SIZE $NODE; then
log "Failed while resizing backup fs on $NODE, resize operation aborted."
exit 0
fi
log "Successfully resized backup filesystem on $NODE."
done
log "Trying to resize controllerfs platform filesystem..."
if ! resize_platform_controllerfs $PLATFORM_SIZE; then
log "Failed while resizing controllerfs platform filesystem, resize operation aborted."
exit 0
fi
log "Successfully resized controllerfs platform filesystem."
else
log "Not a DC System Controller deployment. No filesystem resize needed."
fi
log "Filesystems resizing for DC System Controller finished successfully, from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
elif [[ "$ACTION" == "activate-rollback" ]]; then
log "The $ACTION action is not reversible for this script."
else
log "No actions required for from release $FROM_RELEASE to $TO_RELEASE with action $ACTION"
fi
exit 0

View File

@@ -0,0 +1,329 @@
#!/usr/bin/env python3
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
# This script performs database updation and rollback operations
# for the `interfaces` table in the `sysinv` PostgreSQL database,
# specifically targeting VF interfaces with PCI SR-IOV class.
import sys
import psycopg2
import logging
from psycopg2 import sql
import json
import re
import configparser
import subprocess
import time
from software.utilities.utils import configure_logging
DB_NAME = "sysinv"
DB_HOST = "localhost"
TABLE_NAME = "interfaces"
MAX_TX_RATE = "max_tx_rate"
MAX_RX_RATE = "max_rx_rate"
IFCAPABILITIES = "ifcapabilities"
VF_TYPE = "vf"
PCI_CLASS = "pci-sriov"
DEFAULT_POSTGRES_PORT = "5432"
LOG = logging.getLogger('main_logger')
configure_logging()
def get_db_credentials():
""" Retrieve DB credentials from sysinv.conf """
try:
config = configparser.ConfigParser()
config.read('/etc/sysinv/sysinv.conf')
conn_string = config['database']['connection']
match = re.match(r'postgresql\+psycopg2://([^:]+):([^@]+)@',
conn_string)
if match:
username = match.group(1)
password = match.group(2)
return username, password
else:
raise Exception("Failed to parse DB credentials from sysinv.conf")
except Exception as e:
LOG.error(f"Error getting DB credentials: {e}")
sys.exit(1)
def connect_to_db(port):
""" Establish DB connection """
username, password = get_db_credentials()
try:
conn = psycopg2.connect(
dbname=DB_NAME,
user=username,
password=password,
host=DB_HOST,
port=port
)
return conn
except Exception as e:
LOG.error(f"Database connection failed: {e}")
sys.exit(1)
def db_query(conn, query, params=()):
""" Execute SELECT query and return results """
with conn.cursor() as cur:
cur.execute(query, params)
return cur.fetchall()
def db_update(conn, query, params=(), autocommit=True):
""" Execute UPDATE query """
with conn.cursor() as cur:
cur.execute(query, params)
if autocommit:
conn.commit()
def columns_exist(conn):
""" Verify required columns exist in the table """
query = f"""
SELECT column_name
FROM information_schema.columns
WHERE table_name = '{TABLE_NAME}'
AND column_name IN ('{MAX_TX_RATE}', '{MAX_RX_RATE}',
'{IFCAPABILITIES}');
"""
cols = db_query(conn, query)
existing_cols = {col[0] for col in cols}
if {MAX_TX_RATE, MAX_RX_RATE, IFCAPABILITIES}.issubset(existing_cols):
return True
else:
missing_cols = (
{MAX_TX_RATE, MAX_RX_RATE, IFCAPABILITIES} - existing_cols
)
LOG.error(f"Missing columns: {', '.join(missing_cols)}")
sys.exit(1)
def update_data(conn):
LOG.info("Starting data updation...")
select_query = sql.SQL(f"""
SELECT id, uuid, {IFCAPABILITIES}
FROM {TABLE_NAME}
WHERE iftype = %s AND ifclass = %s;
""")
vf_interfaces = []
vf_interfaces = db_query(
conn, select_query, (VF_TYPE, PCI_CLASS)
)
LOG.info(f"Found {len(vf_interfaces)} VF interfaces to update.")
if len(vf_interfaces) == 0:
LOG.info("No VF interfaces found to update. No changes required")
return
updated = False
for iface_id, iface_uuid, ifcapabilities in vf_interfaces:
if ifcapabilities:
try:
capabilities_dict = json.loads(ifcapabilities)
except (json.JSONDecodeError, TypeError) as e:
raise ValueError(
f"Malformed ifcapabilities for UUID {iface_uuid}: {e}"
)
tx_rate = capabilities_dict.get("max_tx_rate", None)
if "max_tx_rate" in capabilities_dict:
del capabilities_dict["max_tx_rate"]
cleaned_ifcapabilities = json.dumps(capabilities_dict) if \
capabilities_dict else None
# Only update the database if either tx_rate or
# cleaned_ifcapabilities has a value
if tx_rate is not None or cleaned_ifcapabilities is not None:
update_query = sql.SQL(f"""
UPDATE {TABLE_NAME}
SET {MAX_TX_RATE} = %s, {IFCAPABILITIES} = %s
WHERE id = %s;
""")
db_update(
conn,
update_query,
(tx_rate, cleaned_ifcapabilities, iface_id),
autocommit=False
)
updated = True
LOG.info(f"Updated {TABLE_NAME} for UUID: {iface_uuid} "
f"with max_tx_rate: {tx_rate}")
if updated:
conn.commit()
LOG.info("All applicable records updated successfully.")
else:
LOG.info("No changes were made to the database.")
def rollback_data(conn):
"""Rollback migration by moving data back to ifcapabilities"""
LOG.info("Starting data rollback...")
select_query = sql.SQL(f"""
SELECT id, uuid, {MAX_TX_RATE}, {IFCAPABILITIES}
FROM {TABLE_NAME}
WHERE iftype = %s AND ifclass = %s;
""")
vf_interfaces = []
vf_interfaces = db_query(
conn, select_query, (VF_TYPE, PCI_CLASS)
)
LOG.info(f"Found {len(vf_interfaces)} VF interfaces to rollback.")
if len(vf_interfaces) == 0:
LOG.info("No VF interfaces found to rollback. No changes required")
return
updated = False
for iface_id, iface_uuid, max_tx_rate, ifcapabilities in vf_interfaces:
capabilities = {}
if max_tx_rate is not None:
capabilities["max_tx_rate"] = max_tx_rate
if ifcapabilities:
try:
existing = json.loads(ifcapabilities)
capabilities.update(existing)
except (json.JSONDecodeError, TypeError) as e:
raise ValueError(
f"Malformed ifcapabilities for UUID {iface_uuid}: {e}"
)
if not capabilities:
continue
new_ifcap = json.dumps(capabilities) if capabilities else None
if new_ifcap or max_tx_rate is not None:
update_query = sql.SQL(f"""
UPDATE {TABLE_NAME}
SET {IFCAPABILITIES} = %s, {MAX_TX_RATE} = NULL
WHERE id = %s;
""")
db_update(
conn, update_query, (new_ifcap, iface_id), autocommit=False
)
updated = True
LOG.info(
f"Rolled back {TABLE_NAME} for UUID: {iface_uuid} "
f"with ifcapabilities: {new_ifcap}"
)
if updated:
conn.commit()
LOG.info("All applicable records rolled back successfully.")
else:
LOG.info("No changes were made to the database.")
def patch_felix_configuration():
"""Ensure FelixConfiguration chainInsertMode is set to Append."""
LOG.info("Patching FelixConfiguration to Append...")
cmd = [
"kubectl", "--kubeconfig=/etc/kubernetes/admin.conf",
"patch", "felixconfiguration", "default", "--type=merge",
"-p", '{"spec":{"chainInsertMode":"Append"}}'
]
retries, delay = 3, 5
timeout = 15
for attempt in range(retries):
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
check=True,
timeout=timeout
)
LOG.info(f"Patch applied successfully: {result.stdout}")
return
except subprocess.TimeoutExpired:
LOG.warning(f"Attempt {attempt + 1} timed out after {timeout}s.")
except subprocess.CalledProcessError as e:
LOG.warning(f"Attempt {attempt + 1} failed: {e.stderr}")
if attempt < retries - 1:
time.sleep(delay)
else:
LOG.error("FelixConfiguration patch failed after retries.")
def main():
action = None
from_release = None
to_release = None
postgres_port = DEFAULT_POSTGRES_PORT
arg = 1
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
elif arg == 4:
postgres_port = sys.argv[arg]
else:
LOG.error(f"Invalid option {sys.argv[arg]}.")
return 1
arg += 1
if action not in ["activate", "activate-rollback"]:
LOG.warning(f"Action '{action}' is not valid. Skipping...")
return 0
try:
conn = connect_to_db(postgres_port)
columns_exist(conn)
if to_release == "25.09" and action == "activate":
update_data(conn)
patch_felix_configuration()
elif from_release == "25.09" and action == "activate-rollback":
rollback_data(conn)
else:
LOG.error(f"Unknown action: {action}")
return 1
except Exception as e:
LOG.error(f"Exception during {action}: {e}", exc_info=True)
return 1
finally:
if 'conn' in locals():
conn.close()
if __name__ == "__main__":
main()