Upgrade/rollback fixes for mgmt addr reduction
The following issues started after mgmt address reduction feature was merged: 1. upgrade: in some occasions fm-api and sm-api fail to start This happened when because the processes can start before dnsmasq.addn_hosts file is updated. The fix consists in restarting both services during upgrade activate. 2. rollback: in some occasions agent-hooks 'ceph mon start' command times out. This happened because ceph mon start needs the correct IP address to be created in Linux and it will be created only after host unlock. The fix consists in adding the IP temporarily before running the 'ceph mon start' command. 3. ceph alarm after finishing the rollback process. This happened because the last ceph reconfiguration command was not running for rollback. The fix consists in checking the correct state to run the command. Test plan: - SX upgrade - SX rollback Story: 2011191 Task: 52558 Change-Id: Ibfc8d3aaf8f95f5a3250b0826fe40d888bb6d760 Signed-off-by: Caio Bruchert <caio.bruchert@windriver.com>
This commit is contained in:
@@ -29,10 +29,6 @@ from ipaddress import ip_address
|
||||
from ipaddress import IPv6Address
|
||||
import psycopg2
|
||||
|
||||
import software.constants as constants
|
||||
import software.utils as utils
|
||||
|
||||
|
||||
log_format = ('%(asctime)s: ' + '[%(process)s]: '
|
||||
'%(filename)s(%(lineno)s): %(levelname)s: %(message)s')
|
||||
LOG.basicConfig(filename="/var/log/software.log",
|
||||
@@ -877,24 +873,21 @@ class ReconfigureCephMonHook(BaseHook):
|
||||
fields = line.split()
|
||||
if fields[1] == host:
|
||||
ip = fields[0]
|
||||
if isinstance(ip_address(ip), IPv6Address):
|
||||
ip = f"[{ip}]"
|
||||
return ip
|
||||
mon_ip = f"[{ip}]" if isinstance(ip_address(ip), IPv6Address) else ip
|
||||
return mon_ip, ip
|
||||
return None
|
||||
|
||||
def run(self):
|
||||
# Handle both upgrade to 25.09 and rollback to 24.09
|
||||
if self._to_release == "24.09" or self._to_release == "25.09":
|
||||
system_type = utils.get_platform_conf("system_type")
|
||||
system_mode = utils.get_platform_conf("system_mode")
|
||||
if (system_type == constants.SYSTEM_TYPE_ALL_IN_ONE and
|
||||
system_mode == constants.SYSTEM_MODE_SIMPLEX):
|
||||
system_mode = self.get_platform_conf("system_mode")
|
||||
if (system_mode == self.SIMPLEX):
|
||||
if not self.is_ceph_configured():
|
||||
LOG.info("ceph-mon: skipping reconfiguration, bare metal ceph not configured for mgmt")
|
||||
return
|
||||
fsid = self.get_fsid()
|
||||
mon_name = "controller-0"
|
||||
mon_ip = self.get_mon_ip()
|
||||
mon_ip, ip = self.get_mon_ip()
|
||||
if not fsid or not mon_ip:
|
||||
LOG.exception("Invalid fsid or mon_ip")
|
||||
raise ValueError("Invalid params")
|
||||
@@ -911,6 +904,10 @@ class ReconfigureCephMonHook(BaseHook):
|
||||
["/etc/init.d/ceph", "start", "mon"],
|
||||
["ln", "-s", "/etc/ceph/ceph.conf.pmon", "/etc/pmon.d/ceph.conf"],
|
||||
]
|
||||
if self._to_release == "24.09":
|
||||
# For /etc/init.d/ceph start mon to work during rollback, need to add mon_ip temporarily
|
||||
# to the loopback. This will corrected permanently after host unlock and reboot.
|
||||
cmds.insert(0, ["ip", "address", "replace", f"{ip}", "dev", "lo"])
|
||||
|
||||
try:
|
||||
for cmd in cmds:
|
||||
|
@@ -17,6 +17,11 @@ FROM_RELEASE=$1
|
||||
TO_RELEASE=$2
|
||||
ACTION=$3
|
||||
|
||||
FROM_RELEASE_ARR=(${FROM_RELEASE//./ })
|
||||
FROM_RELEASE_MAJOR=${FROM_RELEASE_ARR[0]}
|
||||
TO_RELEASE_ARR=(${TO_RELEASE//./ })
|
||||
TO_RELEASE_MAJOR=${TO_RELEASE_ARR[0]}
|
||||
|
||||
SOFTWARE_LOG_PATH="/var/log/software.log"
|
||||
|
||||
function log {
|
||||
@@ -26,7 +31,8 @@ function log {
|
||||
log "ceph-mon: enable ceph-mon msgr2"\
|
||||
"from $FROM_RELEASE to $TO_RELEASE with action $ACTION"
|
||||
|
||||
if [[ "$ACTION" == "activate" && "$FROM_RELEASE" == "24.09" ]] || [[ "$ACTION" == "delete" && "$TO_RELEASE" == "24.09" ]]; then
|
||||
if [[ "$ACTION" == "activate" && ${TO_RELEASE_MAJOR} -eq 25 ]] || \
|
||||
[[ "$ACTION" == "delete" && ${TO_RELEASE_MAJOR} -eq 24 ]]; then
|
||||
source /etc/platform/platform.conf
|
||||
if [[ "${system_mode}" == "simplex" ]]; then
|
||||
if [[ -f /etc/platform/.node_ceph_configured ]]; then
|
||||
|
@@ -15,6 +15,7 @@
|
||||
#
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from packaging import version
|
||||
@@ -55,8 +56,8 @@ def main():
|
||||
res = 0
|
||||
to_release_version = version.Version(to_release)
|
||||
target_version = version.Version("25.09")
|
||||
if action == 'migrate' and to_release_version == target_version:
|
||||
if get_system_mode() == "simplex":
|
||||
if get_system_mode() == "simplex":
|
||||
if action == 'migrate' and to_release_version == target_version:
|
||||
try:
|
||||
conn = psycopg2.connect("dbname=sysinv user=postgres port=%s"
|
||||
% postgres_port)
|
||||
@@ -65,6 +66,8 @@ def main():
|
||||
except Exception as e:
|
||||
LOG.exception("Error: {}".format(e))
|
||||
res = 1
|
||||
elif action == 'activate' and to_release_version == target_version:
|
||||
restart_services_bound_to_controller0_address()
|
||||
return res
|
||||
|
||||
|
||||
@@ -140,6 +143,21 @@ def db_update(conn, query):
|
||||
conn.commit()
|
||||
|
||||
|
||||
def restart_services_bound_to_controller0_address():
|
||||
services = (
|
||||
'sm-api',
|
||||
'fm-api',
|
||||
)
|
||||
for service in services:
|
||||
LOG.info(f"Restarting {service}...")
|
||||
try:
|
||||
subprocess.run(['systemctl', 'restart', service], check=True, timeout=15)
|
||||
except subprocess.TimeoutExpired:
|
||||
LOG.error(f"Restarting {service} timed out.")
|
||||
except subprocess.CalledProcessError as e:
|
||||
LOG.error(f"Restarting {service} failed: {e.stderr}")
|
||||
|
||||
|
||||
def get_system_mode():
|
||||
ini_str = '[DEFAULT]\n' + open('/etc/platform/platform.conf', 'r').read()
|
||||
|
||||
|
Reference in New Issue
Block a user