5152 lines
213 KiB
Python
5152 lines
213 KiB
Python
"""
|
|
Copyright (c) 2023-2025 Wind River Systems, Inc.
|
|
|
|
SPDX-License-Identifier: Apache-2.0
|
|
|
|
"""
|
|
import sys
|
|
|
|
# prevent software_controller from importing osprofiler
|
|
sys.modules['osprofiler'] = None
|
|
|
|
import configparser
|
|
import gc
|
|
import json
|
|
import logging
|
|
import os
|
|
from packaging import version
|
|
import re
|
|
import select
|
|
import sh
|
|
import shutil
|
|
import socket
|
|
import subprocess
|
|
import tempfile
|
|
import threading
|
|
import time
|
|
import typing
|
|
from wsgiref import simple_server
|
|
|
|
from fm_api import fm_api
|
|
from fm_api import constants as fm_constants
|
|
|
|
from oslo_config import cfg as oslo_cfg
|
|
|
|
import software.apt_utils as apt_utils
|
|
import software.lvm_snapshot as lvm_snapshot
|
|
import software.ostree_utils as ostree_utils
|
|
from software.api import app
|
|
from software.authapi import app as auth_app
|
|
from software.constants import CONTROLLER_0_HOSTNAME
|
|
from software.constants import CONTROLLER_1_HOSTNAME
|
|
from software.constants import INSTALL_LOCAL_FLAG
|
|
from software.states import DEPLOY_HOST_STATES
|
|
from software.states import DEPLOY_STATES
|
|
from software.states import INTERRUPTION_RECOVERY_STATES
|
|
from software.base import PatchService
|
|
from software.dc_utils import get_subcloud_groupby_version
|
|
from software.deploy_state import require_deploy_state
|
|
from software.exceptions import APTOSTreeCommandFail
|
|
from software.exceptions import HostNotFound
|
|
from software.exceptions import InternalError
|
|
from software.exceptions import MetadataFail
|
|
from software.exceptions import UpgradeNotSupported
|
|
from software.exceptions import OSTreeCommandFail
|
|
from software.exceptions import OSTreeTarFail
|
|
from software.exceptions import SoftwareError
|
|
from software.exceptions import SoftwareFail
|
|
from software.exceptions import ReleaseInvalidRequest
|
|
from software.exceptions import ReleaseValidationFailure
|
|
from software.exceptions import ReleaseIsoDeleteFailure
|
|
from software.exceptions import SoftwareServiceError
|
|
from software.exceptions import InvalidOperation
|
|
from software.exceptions import HostAgentUnreachable
|
|
from software.exceptions import HostIpNotFound
|
|
from software.exceptions import MaxReleaseExceeded
|
|
from software.exceptions import ServiceParameterNotFound
|
|
from software.plugin import DeployPluginRunner
|
|
from software.release_data import reload_release_data
|
|
from software.release_data import get_SWReleaseCollection
|
|
from software.software_functions import collect_current_load_for_hosts
|
|
from software.software_functions import copy_pxeboot_update_file
|
|
from software.software_functions import copy_pxeboot_cfg_files
|
|
from software.software_functions import create_deploy_hosts
|
|
from software.software_functions import deploy_host_validations
|
|
from software.software_functions import validate_host_deploy_order
|
|
from software.software_functions import parse_release_metadata
|
|
from software.software_functions import configure_logging
|
|
from software.software_functions import mount_iso_load
|
|
from software.software_functions import unmount_iso_load
|
|
from software.software_functions import read_upgrade_support_versions
|
|
from software.software_functions import get_to_release_from_metadata_file
|
|
from software.software_functions import BasePackageData
|
|
from software.software_functions import PatchFile
|
|
from software.software_functions import package_dir
|
|
from software.software_functions import repo_dir
|
|
from software.software_functions import root_scripts_dir
|
|
from software.software_functions import SW_VERSION
|
|
from software.software_functions import audit_log_info
|
|
from software.software_functions import repo_root_dir
|
|
from software.software_functions import is_deploy_state_in_sync
|
|
from software.software_functions import is_deployment_in_progress
|
|
from software.software_functions import get_release_from_patch
|
|
from software.software_functions import run_remove_temporary_data_script
|
|
from software.software_functions import to_bool
|
|
from software.release_state import ReleaseState
|
|
from software.utilities.deploy_set_failed import start_set_fail
|
|
from software.deploy_host_state import DeployHostState
|
|
from software.deploy_state import DeployState
|
|
from software.release_verify import verify_files
|
|
import software.config as cfg
|
|
import software.utils as utils
|
|
from software.sysinv_utils import get_k8s_ver
|
|
from software.sysinv_utils import is_system_controller
|
|
from software.sysinv_utils import update_host_sw_version
|
|
from software.sysinv_utils import are_all_hosts_unlocked_and_online
|
|
from software.sysinv_utils import get_system_info
|
|
from software.sysinv_utils import get_oot_drivers
|
|
from software.sysinv_utils import trigger_evaluate_apps_reapply
|
|
from software.sysinv_utils import trigger_vim_host_audit
|
|
|
|
from software.db.api import get_instance
|
|
|
|
import software.messages as messages
|
|
import software.constants as constants
|
|
from software import states
|
|
|
|
from tsconfig.tsconfig import INITIAL_CONFIG_COMPLETE_FLAG
|
|
from tsconfig.tsconfig import VOLATILE_CONTROLLER_CONFIG_COMPLETE
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
CONF = oslo_cfg.CONF
|
|
|
|
LOG = logging.getLogger('main_logger')
|
|
|
|
pidfile_path = "/var/run/patch_controller.pid"
|
|
|
|
sc = None
|
|
state_file = "%s/.controller.state" % constants.SOFTWARE_STORAGE_DIR
|
|
app_dependency_basename = "app_dependencies.json"
|
|
app_dependency_filename = "%s/%s" % (constants.SOFTWARE_STORAGE_DIR, app_dependency_basename)
|
|
|
|
insvc_patch_restart_controller = "/run/software/.restart.software-controller"
|
|
|
|
ETC_HOSTS_FILE_PATH = "/etc/hosts"
|
|
ETC_HOSTS_BACKUP_FILE_PATH = "/etc/hosts.patchbak"
|
|
PATCH_MIGRATION_SCRIPT_DIR = "/etc/update.d"
|
|
SOFTWARE_LOG_FILE = "/var/log/software.log"
|
|
|
|
stale_hosts = []
|
|
pending_queries = []
|
|
|
|
thread_death = None
|
|
keep_running = True
|
|
system_mode = None
|
|
|
|
# Limit socket blocking to 5 seconds to allow for thread to shutdown
|
|
api_socket_timeout = 5.0
|
|
|
|
|
|
def is_simplex():
|
|
global system_mode
|
|
if system_mode is None:
|
|
_, system_mode = get_system_info()
|
|
|
|
return system_mode == constants.SYSTEM_MODE_SIMPLEX
|
|
|
|
|
|
class ControllerNeighbour(object):
|
|
def __init__(self):
|
|
self.last_ack = 0
|
|
self.synced = False
|
|
|
|
def rx_ack(self):
|
|
self.last_ack = time.time()
|
|
|
|
def get_age(self):
|
|
return int(time.time() - self.last_ack)
|
|
|
|
def rx_synced(self):
|
|
self.synced = True
|
|
|
|
def clear_synced(self):
|
|
self.synced = False
|
|
|
|
def get_synced(self):
|
|
return self.synced
|
|
|
|
|
|
class AgentNeighbour(object):
|
|
def __init__(self, ip):
|
|
self.ip = ip
|
|
self.last_ack = 0
|
|
self.last_query_id = 0
|
|
self.out_of_date = False
|
|
self.hostname = "n/a"
|
|
self.requires_reboot = False
|
|
self.patch_failed = False
|
|
self.stale = False
|
|
self.pending_query = False
|
|
self.latest_sysroot_commit = None
|
|
self.nodetype = None
|
|
self.sw_version = "unknown"
|
|
self.subfunctions = []
|
|
self.state = None
|
|
self._is_alive = False
|
|
|
|
@property
|
|
def is_alive(self):
|
|
return self._is_alive
|
|
|
|
@is_alive.setter
|
|
def is_alive(self, value):
|
|
self._is_alive = value
|
|
|
|
def rx_ack(self,
|
|
hostname,
|
|
out_of_date,
|
|
requires_reboot,
|
|
query_id,
|
|
patch_failed,
|
|
sw_version,
|
|
state):
|
|
self.last_ack = time.time()
|
|
self.hostname = hostname
|
|
self.patch_failed = patch_failed
|
|
self.sw_version = sw_version
|
|
self.state = state
|
|
|
|
if out_of_date != self.out_of_date or requires_reboot != self.requires_reboot:
|
|
self.out_of_date = out_of_date
|
|
self.requires_reboot = requires_reboot
|
|
LOG.info("Agent %s (%s) reporting out_of_date=%s, requires_reboot=%s",
|
|
self.hostname,
|
|
self.ip,
|
|
self.out_of_date,
|
|
self.requires_reboot)
|
|
|
|
if self.last_query_id != query_id:
|
|
self.last_query_id = query_id
|
|
self.stale = True
|
|
if self.ip not in stale_hosts and self.ip not in pending_queries:
|
|
stale_hosts.append(self.ip)
|
|
|
|
def get_age(self):
|
|
return int(time.time() - self.last_ack)
|
|
|
|
def handle_query_detailed_resp(self,
|
|
latest_sysroot_commit,
|
|
nodetype,
|
|
sw_version,
|
|
subfunctions,
|
|
state):
|
|
self.latest_sysroot_commit = latest_sysroot_commit
|
|
self.nodetype = nodetype
|
|
self.stale = False
|
|
self.pending_query = False
|
|
self.sw_version = sw_version
|
|
self.subfunctions = subfunctions
|
|
self.state = state
|
|
|
|
if self.ip in pending_queries:
|
|
pending_queries.remove(self.ip)
|
|
|
|
if self.ip in stale_hosts:
|
|
stale_hosts.remove(self.ip)
|
|
|
|
def get_dict(self):
|
|
d = {"ip": self.ip,
|
|
"hostname": self.hostname,
|
|
"deployed": not self.out_of_date,
|
|
"secs_since_ack": self.get_age(),
|
|
"patch_failed": self.patch_failed,
|
|
"stale_details": self.stale,
|
|
"latest_sysroot_commit": self.latest_sysroot_commit,
|
|
"nodetype": self.nodetype,
|
|
"subfunctions": self.subfunctions,
|
|
"sw_version": self.sw_version,
|
|
"state": self.state}
|
|
|
|
return d
|
|
|
|
|
|
class PatchMessageHello(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO)
|
|
self.patch_op_counter = 0
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'patch_op_counter' in data:
|
|
self.patch_op_counter = data['patch_op_counter']
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['patch_op_counter'] = sc.patch_op_counter
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
# Send response
|
|
if self.patch_op_counter > 0:
|
|
sc.handle_nbr_patch_op_counter(host, self.patch_op_counter)
|
|
|
|
resp = PatchMessageHelloAck()
|
|
resp.send(sock)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
if sc.install_local:
|
|
return
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageHelloAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_ACK)
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
sc.controller_neighbours_lock.acquire()
|
|
if not addr[0] in sc.controller_neighbours:
|
|
sc.controller_neighbours[addr[0]] = ControllerNeighbour()
|
|
|
|
sc.controller_neighbours[addr[0]].rx_ack()
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageSyncReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SYNC_REQ)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the SYNC_REQ, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
# We may need to do this in a separate thread, so that we continue to process hellos
|
|
LOG.info("Handling sync req")
|
|
|
|
# NOTE(bqian) sync_from_nbr returns "False" if sync operations failed.
|
|
# need to think of reattempt to deal w/ the potential failure.
|
|
sc.sync_from_nbr(host)
|
|
|
|
resp = PatchMessageSyncComplete()
|
|
resp.send(sock)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync req")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageSyncComplete(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SYNC_COMPLETE)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the SYNC_COMPLETE, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
LOG.info("Handling sync complete")
|
|
|
|
sc.controller_neighbours_lock.acquire()
|
|
if not addr[0] in sc.controller_neighbours:
|
|
sc.controller_neighbours[addr[0]] = ControllerNeighbour()
|
|
|
|
sc.controller_neighbours[addr[0]].rx_synced()
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync complete")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageHelloAgent(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_AGENT)
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['patch_op_counter'] = sc.patch_op_counter
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
if not sc.install_local:
|
|
local_hostname = utils.ip_to_versioned_localhost(cfg.agent_mcast_group)
|
|
sock.sendto(str.encode(message), (local_hostname, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageSendLatestFeedCommit(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SEND_LATEST_FEED_COMMIT)
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['latest_feed_commit'] = sc.latest_feed_commit
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
if not sc.install_local:
|
|
local_hostname = utils.ip_to_versioned_localhost(cfg.agent_mcast_group)
|
|
sock.sendto(str.encode(message), (local_hostname, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageHelloAgentAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_AGENT_ACK)
|
|
self.query_id = 0
|
|
self.agent_out_of_date = False
|
|
self.agent_hostname = "n/a"
|
|
self.agent_requires_reboot = False
|
|
self.agent_patch_failed = False
|
|
self.agent_sw_version = "unknown"
|
|
self.agent_state = "unknown"
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'query_id' in data:
|
|
self.query_id = data['query_id']
|
|
if 'out_of_date' in data:
|
|
self.agent_out_of_date = data['out_of_date']
|
|
if 'hostname' in data:
|
|
self.agent_hostname = data['hostname']
|
|
if 'requires_reboot' in data:
|
|
self.agent_requires_reboot = data['requires_reboot']
|
|
if 'patch_failed' in data:
|
|
self.agent_patch_failed = data['patch_failed']
|
|
if 'sw_version' in data:
|
|
self.agent_sw_version = data['sw_version']
|
|
if 'state' in data:
|
|
self.agent_state = data['state']
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
sc.hosts_lock.acquire()
|
|
if not addr[0] in sc.hosts:
|
|
sc.hosts[addr[0]] = AgentNeighbour(addr[0])
|
|
|
|
sc.hosts[addr[0]].rx_ack(self.agent_hostname,
|
|
self.agent_out_of_date,
|
|
self.agent_requires_reboot,
|
|
self.query_id,
|
|
self.agent_patch_failed,
|
|
self.agent_sw_version,
|
|
self.agent_state)
|
|
sc.hosts_lock.release()
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageQueryDetailed(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_QUERY_DETAILED)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the message, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendall(str.encode(message))
|
|
|
|
|
|
class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_QUERY_DETAILED_RESP)
|
|
self.agent_sw_version = "unknown"
|
|
self.latest_sysroot_commit = "unknown"
|
|
self.subfunctions = []
|
|
self.nodetype = "unknown"
|
|
self.agent_sw_version = "unknown"
|
|
self.agent_state = "unknown"
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'latest_sysroot_commit' in data:
|
|
self.latest_sysroot_commit = data['latest_sysroot_commit']
|
|
if 'nodetype' in data:
|
|
self.nodetype = data['nodetype']
|
|
if 'sw_version' in data:
|
|
self.agent_sw_version = data['sw_version']
|
|
if 'subfunctions' in data:
|
|
self.subfunctions = data['subfunctions']
|
|
if 'state' in data:
|
|
self.agent_state = data['state']
|
|
|
|
def encode(self):
|
|
LOG.error("Should not get here")
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
ip = addr[0]
|
|
sc.hosts_lock.acquire()
|
|
if ip in sc.hosts:
|
|
sc.hosts[ip].handle_query_detailed_resp(self.latest_sysroot_commit,
|
|
self.nodetype,
|
|
self.agent_sw_version,
|
|
self.subfunctions,
|
|
self.agent_state)
|
|
for patch_id in list(sc.interim_state):
|
|
if ip in sc.interim_state[patch_id]:
|
|
sc.interim_state[patch_id].remove(ip)
|
|
if len(sc.interim_state[patch_id]) == 0:
|
|
del sc.interim_state[patch_id]
|
|
sc.hosts_lock.release()
|
|
else:
|
|
sc.hosts_lock.release()
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageAgentInstallReq(messages.PatchMessage):
|
|
def __init__(self, additional_data=None):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_AGENT_INSTALL_REQ)
|
|
self.ip = None
|
|
self.force = False
|
|
self.major_release = None
|
|
self.commit_id = None
|
|
self.additional_data = additional_data
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['force'] = self.force
|
|
self.message['major_release'] = self.major_release
|
|
self.message['commit_id'] = self.commit_id
|
|
if self.additional_data:
|
|
self.message['additional_data'] = self.additional_data.copy()
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
msg = f"sending install request to node: {self.ip} with {message}"
|
|
LOG.info(msg)
|
|
sock.sendto(str.encode(message), (self.ip, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageAgentInstallResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_AGENT_INSTALL_RESP)
|
|
self.status = False
|
|
self.reject_reason = None
|
|
self.reboot_required = False
|
|
reload_release_data()
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'status' in data:
|
|
self.status = data['status']
|
|
if 'reject_reason' in data:
|
|
self.reject_reason = data['reject_reason']
|
|
if 'reboot_required' in data:
|
|
self.reboot_required = data['reboot_required']
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def _set_host_install_completed(self, host):
|
|
global sc
|
|
|
|
sc.hosts_lock.acquire()
|
|
try:
|
|
host.install_status = self.status
|
|
host.install_pending = False
|
|
host.install_reject_reason = self.reject_reason
|
|
finally:
|
|
sc.hosts_lock.release()
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.info("Handling install resp from %s", addr[0])
|
|
global sc
|
|
|
|
ip = addr[0]
|
|
sc.hosts_lock.acquire()
|
|
try:
|
|
# NOTE(bqian) seems like trying to tolerate a failure situation
|
|
# that a host is directed to install a patch but during the installation
|
|
# software-controller-daemon gets restarted
|
|
# should remove the sc.hosts which is in memory volatile storage and replaced with
|
|
# permanent deploy-host entity
|
|
if ip not in sc.hosts:
|
|
sc.hosts[ip] = AgentNeighbour(ip)
|
|
|
|
host = sc.hosts[ip]
|
|
hostname = host.hostname
|
|
finally:
|
|
sc.hosts_lock.release()
|
|
|
|
dbapi = get_instance()
|
|
deploy = dbapi.get_deploy_all()
|
|
if len(deploy) == 0:
|
|
LOG.info("No deploy in progress. ignore install resp from %s", addr[0])
|
|
return
|
|
deploy = deploy[0]
|
|
|
|
success = False
|
|
deploy_host_state = DeployHostState(hostname)
|
|
|
|
try:
|
|
if self.status:
|
|
deploying = ReleaseState(release_state=states.DEPLOYING)
|
|
if deploying.is_major_release_deployment():
|
|
# For major release deployment, update sysinv ihost.sw_version
|
|
# so that right manifest can be generated.
|
|
sw_version = utils.get_major_release_version(deploy.get("to_release"))
|
|
msg = f"Update {hostname} to {sw_version}"
|
|
LOG.info(msg)
|
|
try:
|
|
update_host_sw_version(hostname, sw_version)
|
|
except Exception:
|
|
# Failed a step, fail the host deploy for reattempt
|
|
return
|
|
|
|
success = True
|
|
finally:
|
|
if success:
|
|
deploy_host_state.deployed()
|
|
if self.reboot_required:
|
|
sc.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_SUCCESS_RR,
|
|
fm_constants.FM_ALARM_STATE_SET,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname))
|
|
else:
|
|
deploy_host_state.deploy_failed()
|
|
sc.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_FAILURE,
|
|
fm_constants.FM_ALARM_STATE_SET,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname))
|
|
|
|
self._set_host_install_completed(host)
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageDropHostReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DROP_HOST_REQ)
|
|
self.ip = None
|
|
|
|
def encode(self):
|
|
messages.PatchMessage.encode(self)
|
|
self.message['ip'] = self.ip
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'ip' in data:
|
|
self.ip = data['ip']
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
if self.ip is None:
|
|
LOG.error("Received PATCHMSG_DROP_HOST_REQ with no ip: %s", json.dumps(self.data))
|
|
return
|
|
|
|
sc.drop_host(self.ip, sync_nbr=False)
|
|
return
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
if sc.install_local:
|
|
return
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SoftwareMessageDeployStateUpdate(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_UPDATE)
|
|
self.data = None
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
self.data = data
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
filesystem_data = utils.get_software_filesystem_data()
|
|
deploys_state = {"deploy_host": filesystem_data.get("deploy_host", {}),
|
|
"deploy": filesystem_data.get("deploy", {})}
|
|
self.message["deploy_state"] = deploys_state
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
if sc.mgmt_ip == addr[0]:
|
|
# update from localhost, ignore
|
|
return
|
|
|
|
filesystem_data = utils.get_software_filesystem_data()
|
|
synced_filesystem_data = utils.get_synced_software_filesystem_data()
|
|
|
|
actual_state = {"deploy_host": filesystem_data.get("deploy_host", {}),
|
|
"deploy": filesystem_data.get("deploy", {})}
|
|
|
|
synced_state = {"deploy_host": synced_filesystem_data.get("deploy_host", {}),
|
|
"deploy": synced_filesystem_data.get("deploy", {})}
|
|
|
|
peer_state = {"deploy_host": self.data.get("deploy_state").get("deploy_host", {}),
|
|
"deploy": self.data.get("deploy_state").get("deploy", {})}
|
|
|
|
result = "diverged"
|
|
if actual_state == peer_state:
|
|
result = messages.MSG_ACK_SUCCESS
|
|
elif actual_state == synced_state:
|
|
result = messages.MSG_ACK_SUCCESS
|
|
utils.save_to_json_file(constants.SOFTWARE_JSON_FILE, peer_state)
|
|
|
|
if result == messages.MSG_ACK_SUCCESS:
|
|
utils.save_to_json_file(constants.SYNCED_SOFTWARE_JSON_FILE, peer_state)
|
|
|
|
resp = SoftwareMessageDeployStateUpdateAck()
|
|
resp.send(sock, result)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SoftwareMessageDeployStateUpdateAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_UPDATE_ACK)
|
|
self.peer_state_data = {}
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
self.peer_state_data = data
|
|
|
|
def encode(self, result): # pylint: disable=arguments-differ
|
|
messages.PatchMessage.encode(self)
|
|
synced_data = utils.get_synced_software_filesystem_data()
|
|
self.message["result"] = result
|
|
self.message["deploy_state"] = synced_data
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
if sc.mgmt_ip == addr[0]:
|
|
# update from localhost, ignore
|
|
return
|
|
|
|
if self.peer_state_data["result"] == messages.MSG_ACK_SUCCESS:
|
|
LOG.debug("Peer controller is synced with value: %s",
|
|
self.peer_state_data["deploy_state"])
|
|
utils.save_to_json_file(constants.SYNCED_SOFTWARE_JSON_FILE,
|
|
self.peer_state_data["deploy_state"])
|
|
else:
|
|
LOG.error("Peer controller deploy state has diverged.")
|
|
|
|
def send(self, sock, result):
|
|
self.encode(result)
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SWMessageDeployStateChanged(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_CHANGED)
|
|
self.valid = False
|
|
self.agent = None
|
|
self.deploy_state = None
|
|
self.hostname = None
|
|
self.host_state = None
|
|
|
|
def decode(self, data):
|
|
"""
|
|
The message is a serialized json object:
|
|
{
|
|
"msgtype": "deploy-state-changed",
|
|
"msgversion": 1,
|
|
"agent": "<a valid agent>",
|
|
"deploy-state": "<deploy-state>",
|
|
"hostname": "<hostname>",
|
|
"host-state": "<host-deploy-substate>"
|
|
}
|
|
"""
|
|
|
|
messages.PatchMessage.decode(self, data)
|
|
|
|
self.valid = True
|
|
self.agent = None
|
|
|
|
valid_agents = ['deploy-start', 'deploy-activate', 'deploy-activate-rollback', 'admin']
|
|
if 'agent' in data:
|
|
self.agent = data['agent']
|
|
else:
|
|
self.agent = 'unknown'
|
|
|
|
if self.agent not in valid_agents:
|
|
# ignore msg from unknown senders
|
|
LOG.info("%s received from unknown agent %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent))
|
|
self.valid = False
|
|
|
|
valid_state = {
|
|
DEPLOY_STATES.START_DONE.value: DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED.value: DEPLOY_STATES.START_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_FAILED.value: DEPLOY_STATES.ACTIVATE_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_DONE.value: DEPLOY_STATES.ACTIVATE_DONE,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE.value: DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED.value: DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED,
|
|
DEPLOY_STATES.HOST_FAILED.value: DEPLOY_STATES.HOST_FAILED
|
|
}
|
|
if 'deploy-state' in data and data['deploy-state']:
|
|
deploy_state = data['deploy-state']
|
|
if deploy_state in valid_state:
|
|
self.deploy_state = valid_state[deploy_state]
|
|
LOG.info("%s received from %s with deploy-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state))
|
|
else:
|
|
self.valid = False
|
|
LOG.error("%s received from %s with invalid deploy-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state))
|
|
|
|
if 'hostname' in data and data['hostname']:
|
|
self.hostname = data['hostname']
|
|
|
|
if 'host-state' in data and data['host-state']:
|
|
host_state = states.DEPLOY_HOST_STATES(data['host-state'])
|
|
if host_state not in states.VALID_HOST_DEPLOY_STATE:
|
|
LOG.error("%s received from %s with invalid host-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, host_state))
|
|
self.valid = False
|
|
else:
|
|
self.host_state = host_state
|
|
|
|
if self.valid:
|
|
self.valid = (bool(self.host_state and self.hostname) != bool(self.deploy_state))
|
|
|
|
if not self.valid:
|
|
LOG.error("%s received from %s as invalid %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, data))
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
if not self.valid:
|
|
# nothing to do
|
|
return
|
|
|
|
if self.deploy_state:
|
|
LOG.info("Received deploy state changed to %s, agent %s" %
|
|
(self.deploy_state, self.agent))
|
|
try:
|
|
sc.deploy_state_changed(self.deploy_state)
|
|
except Exception as e:
|
|
LOG.error("Deploy state change failed: %s" % str(e))
|
|
else:
|
|
LOG.info("Received %s deploy host state changed to %s, agent %s" %
|
|
(self.hostname, self.host_state, self.agent))
|
|
sc.host_deploy_state_changed(self.hostname, self.host_state)
|
|
|
|
sock.sendto(str.encode("OK"), addr)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync req")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SoftwareMessageDeployDeleteCleanupReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_DELETE_CLEANUP_REQ)
|
|
self.ip = None
|
|
self.major_release = None
|
|
|
|
def encode(self):
|
|
messages.PatchMessage.encode(self)
|
|
self.message["major_release"] = self.major_release
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("Sending deploy delete cleanup request to all nodes.")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
|
|
|
|
class SoftwareMessageDeployDeleteCleanupResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_DELETE_CLEANUP_RESP)
|
|
self.success = None
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'success' in data:
|
|
self.success = data['success']
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
ip = addr[0]
|
|
LOG.info("Handling deploy delete cleanup resp from %s", ip)
|
|
global sc
|
|
if self.success:
|
|
LOG.info("Host %s sucessfully executed deploy delete "
|
|
"cleanup tasks." % sc.hosts[ip].hostname)
|
|
return
|
|
LOG.error("Host %s failed executing deploy delete "
|
|
"cleanup tasks." % sc.hosts[ip].hostname)
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class SoftwareMessageCheckAgentAliveReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_CHECK_AGENT_ALIVE_REQ)
|
|
self.ip = None
|
|
|
|
def encode(self):
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
LOG.info("Sending check agent alive to %s", self.ip)
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (self.ip, cfg.agent_port))
|
|
|
|
|
|
class SoftwareMessageCheckAgentAliveResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_CHECK_AGENT_ALIVE_RESP)
|
|
self.status = False
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.info("Handling check agent alive resp from %s", addr[0])
|
|
global sc
|
|
|
|
ip = addr[0]
|
|
sc.hosts_lock.acquire()
|
|
sc.hosts[ip].is_alive = True
|
|
sc.hosts_lock.release()
|
|
LOG.info("Agent from %s is reachable and alive." % ip)
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchController(PatchService):
|
|
def __init__(self):
|
|
PatchService.__init__(self)
|
|
|
|
# Locks
|
|
self.socket_lock = threading.RLock()
|
|
self.controller_neighbours_lock = threading.RLock()
|
|
self.hosts_lock = threading.RLock()
|
|
|
|
self.hosts = {}
|
|
self.controller_neighbours = {}
|
|
self.host_mgmt_ip = []
|
|
|
|
self.db_api_instance = get_instance()
|
|
|
|
self.ignore_errors = 'False'
|
|
|
|
# interim_state is used to track hosts that have not responded
|
|
# with fresh queries since a patch was applied or removed, on
|
|
# a per-patch basis. This allows the patch controller to move
|
|
# patches immediately into a "Partial" state until all nodes
|
|
# have responded.
|
|
#
|
|
self.interim_state = {}
|
|
|
|
self.sock_out = None
|
|
self.sock_in = None
|
|
self.controller_address = None
|
|
self.agent_address = None
|
|
self.patch_op_counter = 1
|
|
reload_release_data()
|
|
try:
|
|
self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION)
|
|
except OSTreeCommandFail:
|
|
LOG.exception("Failure to fetch the feed ostree latest log while "
|
|
"initializing Patch Controller")
|
|
self.latest_feed_commit = None
|
|
|
|
self.base_pkgdata = BasePackageData()
|
|
|
|
# This is for alarm cache. It will be used to store the last raising alarm id
|
|
self.usm_alarm = {constants.LAST_IN_SYNC: False}
|
|
self.hostname = socket.gethostname()
|
|
self.fm_api = fm_api.FaultAPIs()
|
|
|
|
self.allow_insvc_patching = True
|
|
|
|
if os.path.exists(app_dependency_filename):
|
|
try:
|
|
with open(app_dependency_filename, 'r') as f:
|
|
self.app_dependencies = json.loads(f.read())
|
|
except Exception:
|
|
LOG.exception("Failed to read app dependencies: %s", app_dependency_filename)
|
|
else:
|
|
self.app_dependencies = {}
|
|
|
|
if os.path.isfile(state_file):
|
|
self.read_state_file()
|
|
else:
|
|
self.write_state_file()
|
|
|
|
# Create patch activation scripts folder
|
|
if not os.path.exists(PATCH_MIGRATION_SCRIPT_DIR):
|
|
os.makedirs(PATCH_MIGRATION_SCRIPT_DIR, 0o755)
|
|
|
|
self.register_deploy_state_change_listeners()
|
|
|
|
def _state_changed_sync(self, *args): # pylint: disable=unused-argument
|
|
if is_simplex():
|
|
# ensure the in-sync state for SX
|
|
# treat it as SX for deploy before bootstrap
|
|
shutil.copyfile(constants.SOFTWARE_JSON_FILE, constants.SYNCED_SOFTWARE_JSON_FILE)
|
|
else:
|
|
self._update_state_to_peer()
|
|
|
|
def _notify_vim_on_state_change(self, target_state):
|
|
"""Notify VIM of state change.
|
|
|
|
This method will notify VIM when one of the following state changes is made:
|
|
- start-done
|
|
- start-failed
|
|
- activate-done
|
|
- activate-failed
|
|
- activate-rollback-done
|
|
- activate-rollback-failed
|
|
|
|
If new async states are added they should be added here.
|
|
|
|
Args:
|
|
target_state: The new deployment state to notify VIM about
|
|
"""
|
|
|
|
if self.pre_bootstrap:
|
|
return
|
|
|
|
if target_state not in [
|
|
DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_DONE,
|
|
DEPLOY_STATES.ACTIVATE_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED,
|
|
]:
|
|
return
|
|
|
|
# Get local hostname
|
|
LOG.info("Notifying VIM of state change: %s", target_state)
|
|
trigger_vim_host_audit(socket.gethostname())
|
|
|
|
def register_deploy_state_change_listeners(self):
|
|
# data sync listener
|
|
DeployState.register_event_listener(self._state_changed_sync)
|
|
DeployHostState.register_event_listener(self._state_changed_sync)
|
|
|
|
DeployHostState.register_event_listener(DeployState.host_deploy_updated)
|
|
DeployState.register_event_listener(ReleaseState.deploy_updated)
|
|
DeployState.register_event_listener(self.create_clean_up_deployment_alarm)
|
|
|
|
# VIM notifications
|
|
DeployState.register_event_listener(self._notify_vim_on_state_change)
|
|
# TODO(jkraitbe): Add host-deploy when that becomes async
|
|
|
|
@property
|
|
def release_collection(self):
|
|
swrc = get_SWReleaseCollection()
|
|
return swrc
|
|
|
|
def update_config(self):
|
|
cfg.read_config()
|
|
|
|
if self.port != cfg.controller_port:
|
|
self.port = cfg.controller_port
|
|
|
|
# Loopback interface does not support multicast messaging, therefore
|
|
# revert to using unicast messaging when configured against the
|
|
# loopback device
|
|
if self.pre_bootstrap:
|
|
mgmt_ip = utils.gethostbyname(constants.PREBOOTSTRAP_HOSTNAME)
|
|
self.mcast_addr = None
|
|
self.controller_address = mgmt_ip
|
|
self.agent_address = mgmt_ip
|
|
elif cfg.get_mgmt_iface() == constants.LOOPBACK_INTERFACE_NAME:
|
|
mgmt_ip = cfg.get_mgmt_ip()
|
|
self.mcast_addr = None
|
|
self.controller_address = mgmt_ip
|
|
self.agent_address = mgmt_ip
|
|
else:
|
|
self.mcast_addr = cfg.controller_mcast_group
|
|
self.controller_address = cfg.controller_mcast_group
|
|
self.agent_address = cfg.agent_mcast_group
|
|
|
|
def socket_lock_acquire(self):
|
|
self.socket_lock.acquire()
|
|
|
|
def socket_lock_release(self):
|
|
try:
|
|
self.socket_lock.release()
|
|
except Exception:
|
|
pass
|
|
|
|
def write_state_file(self):
|
|
config = configparser.ConfigParser(strict=False)
|
|
|
|
cfgfile = open(state_file, 'w')
|
|
|
|
config.add_section('runtime')
|
|
config.set('runtime', 'patch_op_counter', str(self.patch_op_counter))
|
|
config.write(cfgfile)
|
|
cfgfile.close()
|
|
|
|
def read_state_file(self):
|
|
config = configparser.ConfigParser(strict=False)
|
|
|
|
config.read(state_file)
|
|
|
|
try:
|
|
counter = config.getint('runtime', 'patch_op_counter')
|
|
self.patch_op_counter = counter
|
|
|
|
LOG.info("patch_op_counter is: %d", self.patch_op_counter)
|
|
except configparser.Error:
|
|
LOG.exception("Failed to read state info")
|
|
|
|
def handle_nbr_patch_op_counter(self, host, nbr_patch_op_counter):
|
|
if self.patch_op_counter >= nbr_patch_op_counter:
|
|
return
|
|
|
|
# NOTE(bqian) sync_from_nbr returns "False" if sync operations failed.
|
|
# need to think of reattempt to deal w/ the potential failure.
|
|
self.sync_from_nbr(host)
|
|
|
|
def sync_from_nbr(self, host):
|
|
# Sync the software repo
|
|
host_url = utils.ip_to_url(host)
|
|
try:
|
|
output = subprocess.check_output(["rsync",
|
|
"-acv",
|
|
"--delete",
|
|
"--exclude", "tmp",
|
|
"--exclude", "software.json",
|
|
"rsync://%s/software/" % host_url,
|
|
"%s/" % constants.SOFTWARE_STORAGE_DIR],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate software via rsync: %s", output)
|
|
except subprocess.CalledProcessError as e:
|
|
LOG.error("Failed to rsync: %s", e.output)
|
|
return False
|
|
|
|
try:
|
|
output = subprocess.check_output(["rsync",
|
|
"-acv",
|
|
"--delete",
|
|
"rsync://%s/repo/" % host_url,
|
|
"%s/" % repo_root_dir],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate repo via rsync: %s", output)
|
|
except subprocess.CalledProcessError:
|
|
LOG.error("Failed to rsync: %s", output)
|
|
return False
|
|
|
|
try:
|
|
for neighbour in list(self.hosts):
|
|
if (self.hosts[neighbour].nodetype == "controller" and
|
|
self.hosts[neighbour].ip == host):
|
|
LOG.info("Starting sync controllers")
|
|
# The output is a string that lists the directories
|
|
# Example output:
|
|
# >>> dir_names = sh.ls("/var/www/pages/feed/")
|
|
# >>> dir_names.stdout
|
|
# b'rel-22.12 rel-22.5\n'
|
|
dir_names = sh.ls(constants.FEED_OSTREE_BASE_DIR)
|
|
|
|
# Convert the output above into a list that can be iterated
|
|
# >>> list_of_dirs = dir_names.stdout.decode().rstrip().split()
|
|
# >>> print(list_of_dirs)
|
|
# ['rel-22.12', 'rel-22.5']
|
|
list_of_dirs = dir_names.stdout.decode("utf-8").rstrip().split()
|
|
|
|
for rel_dir in list_of_dirs:
|
|
|
|
# todo(lvieira): Filtered out the sync of N-1 feed folders.
|
|
# Recheck this if in the future a N-1 patch apply will be
|
|
# supported in the system controller.
|
|
rel_version = rel_dir.split("rel-")[-1]
|
|
if rel_version < SW_VERSION:
|
|
LOG.info("Skip syncing %s inactive release", rel_dir)
|
|
continue
|
|
|
|
feed_repo = "%s/%s/ostree_repo/" % (constants.FEED_OSTREE_BASE_DIR, rel_dir)
|
|
if not os.path.isdir(feed_repo):
|
|
LOG.info("Skipping feed dir %s", feed_repo)
|
|
continue
|
|
LOG.info("Syncing %s", feed_repo)
|
|
output = subprocess.check_output(["ostree",
|
|
"--repo=%s" % feed_repo,
|
|
"pull",
|
|
"--depth=-1",
|
|
"--mirror",
|
|
"starlingx"],
|
|
stderr=subprocess.STDOUT)
|
|
output = subprocess.check_output(["ostree",
|
|
"summary",
|
|
"--update",
|
|
"--repo=%s" % feed_repo],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate feed via ostree pull: %s", output)
|
|
|
|
output = subprocess.check_output(["rsync",
|
|
"-acv",
|
|
"--delete",
|
|
"rsync://%s/update_scripts/" % host_url,
|
|
"%s/" % PATCH_MIGRATION_SCRIPT_DIR],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced %s folder between controllers: %s"
|
|
% (PATCH_MIGRATION_SCRIPT_DIR, output))
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
LOG.error("Failed during controllers sync tasks: %s", e.output)
|
|
return False
|
|
except Exception as e:
|
|
LOG.error("Exception while syncing controllers: %s", e)
|
|
return False
|
|
|
|
self.read_state_file()
|
|
|
|
self.interim_state = {}
|
|
reload_release_data()
|
|
|
|
if os.path.exists(app_dependency_filename):
|
|
try:
|
|
with open(app_dependency_filename, 'r') as f:
|
|
self.app_dependencies = json.loads(f.read())
|
|
except Exception:
|
|
LOG.exception("Failed to read app dependencies: %s", app_dependency_filename)
|
|
else:
|
|
self.app_dependencies = {}
|
|
|
|
return True
|
|
|
|
def inc_patch_op_counter(self):
|
|
self.patch_op_counter += 1
|
|
self.write_state_file()
|
|
|
|
def get_release_dependency_list(self, release_id, preinstalled_patches=None):
|
|
"""
|
|
Returns a list of software releases that are required by this release.
|
|
Example: If R5 requires R4 and R1, R4 requires R3 and R1, R3 requires R1
|
|
then for input param release_id='R5', it will return ['R4', 'R1', 'R3']
|
|
:param release: The software release ID
|
|
:param preinstalled_patches: A list containing all pre installed patches
|
|
"""
|
|
|
|
def get_dependencies(release_id, visited):
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is None:
|
|
error = f"Not all required releases are uploaded, missing {release_id}"
|
|
raise SoftwareServiceError(error=error)
|
|
|
|
dependencies = []
|
|
for req_release in release.requires_release_ids:
|
|
if req_release not in visited:
|
|
visited.add(req_release)
|
|
dependencies.append(req_release)
|
|
if req_release not in preinstalled_patches:
|
|
dependencies.extend(get_dependencies(req_release, visited))
|
|
return dependencies
|
|
|
|
if preinstalled_patches is None:
|
|
preinstalled_patches = []
|
|
|
|
return get_dependencies(release_id, set())
|
|
|
|
def get_ostree_tar_filename(self, patch_sw_version, patch_id):
|
|
'''
|
|
Returns the path of the ostree tarball
|
|
:param patch_sw_version: sw version this patch must be applied to
|
|
:param patch_id: The patch ID
|
|
'''
|
|
ostree_tar_dir = package_dir[patch_sw_version]
|
|
ostree_tar_filename = "%s/%s-software.tar" % (ostree_tar_dir, patch_id)
|
|
return ostree_tar_filename
|
|
|
|
def delete_start_install_script(self, patch_id):
|
|
'''
|
|
Deletes the start and install scripts associated with the patch
|
|
:param patch_id: The patch ID
|
|
'''
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
scripts = ["pre_start", "post_start", "pre_install", "post_install"]
|
|
|
|
for script in scripts:
|
|
script_name = getattr(release, script, None)
|
|
if script_name:
|
|
script_path = os.path.join(root_scripts_dir, f"{patch_id}_{script_name}")
|
|
try:
|
|
os.remove(script_path)
|
|
LOG.info("Removed %s script" % script_path)
|
|
except OSError:
|
|
msg = "Failed to remove start/install script for %s" % patch_id
|
|
LOG.warning(msg)
|
|
|
|
def delete_patch_activate_scripts(self, patch_id):
|
|
'''
|
|
Deletes the activate scripts associated with the patch
|
|
:param patch_id: The patch ID
|
|
'''
|
|
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
activate_scripts_list = release.activation_scripts
|
|
|
|
for script in activate_scripts_list:
|
|
full_name_file = "%s_%s" % (patch_id, script)
|
|
script_path = "%s/%s" % (root_scripts_dir, full_name_file)
|
|
|
|
try:
|
|
os.remove(script_path)
|
|
except OSError:
|
|
msg = "Failed to remove the activate script for %s" % patch_id
|
|
LOG.warning(msg)
|
|
|
|
def run_semantic_check(self, action, patch_list):
|
|
if not os.path.exists(INITIAL_CONFIG_COMPLETE_FLAG):
|
|
# Skip semantic checks if initial configuration isn't complete
|
|
return
|
|
|
|
# Pass the current patch state to the semantic check as a series of args
|
|
patch_state_args = []
|
|
for release in self.release_collection.iterate_releases():
|
|
patch_state = '%s=%s' % (release.id, release.state)
|
|
patch_state_args += ['-p', patch_state]
|
|
|
|
# Run semantic checks, if any
|
|
for patch_id in patch_list:
|
|
semchk = os.path.join(constants.SEMANTICS_DIR, action, patch_id)
|
|
|
|
if os.path.exists(semchk):
|
|
try:
|
|
LOG.info("Running semantic check: %s", semchk)
|
|
subprocess.check_output([semchk] + patch_state_args,
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Semantic check %s passed", semchk)
|
|
except subprocess.CalledProcessError as e:
|
|
msg = "Semantic check failed for %s:\n%s" % (patch_id, e.output)
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
def software_install_local_api(self, delete):
|
|
"""
|
|
Enable patch installation to local controller
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
dbapi = get_instance()
|
|
deploy = dbapi.get_deploy_all()
|
|
if len(deploy) > 0:
|
|
msg_info += "Software Deploy operation is in progress.\n"
|
|
msg_info += "Please finish current deploy before modifying install local mode.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if os.path.isfile(INSTALL_LOCAL_FLAG) and delete:
|
|
if os.path.isfile(INSTALL_LOCAL_FLAG):
|
|
try:
|
|
os.remove(INSTALL_LOCAL_FLAG)
|
|
except Exception:
|
|
LOG.exception("Failed to clear %s flag", INSTALL_LOCAL_FLAG)
|
|
msg = "Software deployment in local installation mode is stopped"
|
|
msg_info += f"{msg}.\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
elif not delete and not os.path.isfile(INSTALL_LOCAL_FLAG):
|
|
open(INSTALL_LOCAL_FLAG, 'a').close()
|
|
msg = "Software deployment in local installation mode is started"
|
|
msg_info += f"{msg}.\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
else:
|
|
mode = 'disabled' if delete else 'enabled'
|
|
msg_info += f"Software deployment in local installation mode is already {mode}.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def major_release_upload_check(self):
|
|
"""
|
|
major release upload semantic check
|
|
"""
|
|
valid_controllers = ['controller-0']
|
|
if socket.gethostname() not in valid_controllers:
|
|
msg = f"Upload rejected, major release must be uploaded to {valid_controllers}"
|
|
LOG.info(msg)
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
max_major_releases = 2
|
|
major_releases = []
|
|
for rel in self.release_collection.iterate_releases():
|
|
major_rel = rel.sw_version
|
|
if major_rel not in major_releases:
|
|
major_releases.append(major_rel)
|
|
|
|
# Only system controller can have 2 major releases (N+1 and N-1)
|
|
max_releases = max_major_releases + 1 if is_system_controller() else max_major_releases
|
|
if len(major_releases) >= max_releases:
|
|
msg = f"Major releases {major_releases} have already been uploaded{' in system controller' if is_system_controller() else ''}. " + \
|
|
f"Max major releases is {max_releases}"
|
|
LOG.info(msg)
|
|
raise MaxReleaseExceeded(msg)
|
|
|
|
def _run_load_import(self, from_release, to_release, iso_mount_dir, upgrade_files):
|
|
"""
|
|
Run load and import
|
|
:param from_release: From release
|
|
:param to_release: To release
|
|
:param iso_mount_dir: ISO mount directory
|
|
:return: info, warning, error messages, dict of release metadata info
|
|
"""
|
|
local_info = ""
|
|
local_warning = ""
|
|
local_error = ""
|
|
release_meta_info = {}
|
|
|
|
def run_script_command(cmd):
|
|
LOG.info("Running load import command: %s", " ".join(cmd))
|
|
result = subprocess.run(cmd, stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT, check=True, text=True)
|
|
return (result.stdout, None) if result.returncode == 0 else (None, result.stdout)
|
|
|
|
# Check if major-release-upload script exists in the iso
|
|
has_release_upload_script = os.path.isfile(os.path.join(
|
|
iso_mount_dir, 'upgrades', 'software-deploy', constants.MAJOR_RELEASE_UPLOAD_SCRIPT))
|
|
|
|
if has_release_upload_script:
|
|
# major-release-upload script is found. This iso supports upgrade from USM
|
|
try:
|
|
# Copy iso /upgrades/software-deploy/ to /opt/software/rel-<rel>/bin/
|
|
to_release_bin_dir = os.path.join(
|
|
constants.SOFTWARE_STORAGE_DIR, ("rel-%s" % to_release), "bin")
|
|
if os.path.exists(to_release_bin_dir):
|
|
shutil.rmtree(to_release_bin_dir)
|
|
shutil.copytree(os.path.join(iso_mount_dir, "upgrades", constants.SOFTWARE_DEPLOY_FOLDER),
|
|
to_release_bin_dir, symlinks=True)
|
|
|
|
# Run major-release-upload script
|
|
import_script = os.path.join(to_release_bin_dir, constants.MAJOR_RELEASE_UPLOAD_SCRIPT)
|
|
load_import_cmd = [
|
|
str(import_script),
|
|
f"--from-release={from_release}",
|
|
f"--to-release={to_release}",
|
|
f"--iso-dir={iso_mount_dir}"
|
|
]
|
|
|
|
load_import_info, load_import_error = run_script_command(load_import_cmd)
|
|
local_info += load_import_info or ""
|
|
local_error += load_import_error or ""
|
|
|
|
# Copy metadata.xml to /opt/software/rel-<rel>/
|
|
to_file = os.path.join(constants.SOFTWARE_STORAGE_DIR,
|
|
("rel-%s" % to_release), "metadata.xml")
|
|
metadata_file = os.path.join(iso_mount_dir, "upgrades", "metadata.xml")
|
|
shutil.copyfile(metadata_file, to_file)
|
|
|
|
# Update the release metadata
|
|
# metadata files have been copied over to the metadata/available directory
|
|
reload_release_data()
|
|
LOG.info("Updated release metadata for %s", to_release)
|
|
|
|
release_meta_info = self.get_release_meta_info(iso_mount_dir, upgrade_files)
|
|
|
|
return local_info, local_warning, local_error, release_meta_info
|
|
|
|
except Exception as e:
|
|
LOG.exception("Error occurred while running load import: %s", str(e))
|
|
raise
|
|
|
|
# At this step, major-release-upload script is not found in the iso
|
|
# Therefore, we run the local major-release-upload script which supports importing the N-1 iso
|
|
# that doesn't support USM feature.
|
|
# This is the special case where *only* DC system controller can import this iso
|
|
# TODO(ShawnLi): remove the code below when this special case is not supported
|
|
try:
|
|
local_import_script = os.path.join(
|
|
"/usr/sbin/software-deploy/", constants.MAJOR_RELEASE_UPLOAD_SCRIPT)
|
|
|
|
load_import_cmd = [local_import_script,
|
|
"--from-release=%s" % from_release,
|
|
"--to-release=%s" % to_release,
|
|
"--iso-dir=%s" % iso_mount_dir,
|
|
"--is-usm-iso=False"]
|
|
|
|
load_import_info, load_import_error = run_script_command(load_import_cmd)
|
|
local_info += load_import_info or ""
|
|
local_error += load_import_error or ""
|
|
|
|
# Update the release metadata
|
|
# metadata files have been copied over to the metadata/available directory
|
|
reload_release_data()
|
|
LOG.info("Updated release metadata for %s", to_release)
|
|
|
|
release_meta_info = {
|
|
os.path.basename(upgrade_files[constants.ISO_EXTENSION]): {
|
|
"id": constants.RELEASE_GA_NAME % to_release,
|
|
"sw_release": to_release,
|
|
},
|
|
os.path.basename(upgrade_files[constants.SIG_EXTENSION]): {
|
|
"id": None,
|
|
"sw_release": None,
|
|
}
|
|
}
|
|
return local_info, local_warning, local_error, release_meta_info
|
|
|
|
except Exception as e:
|
|
LOG.exception("Error occurred while running local load import script: %s", str(e))
|
|
raise
|
|
|
|
def get_release_meta_info(self, iso_mount_dir, upgrade_files) -> dict:
|
|
"""
|
|
Get release metadata information from metadata.xml
|
|
:param iso_mount_dir: ISO mount directory
|
|
:param upgrade_files: dict of upgrade files
|
|
:return: dict of release metadata info
|
|
"""
|
|
|
|
# Get release metadata
|
|
# NOTE(bqian) to_release is sw_version (MM.mm), the path isn't correct
|
|
# also prepatched iso needs to be handled.
|
|
# should go through the release_data to find the latest release of major release
|
|
# to_release
|
|
abs_meta_file_dir = os.path.join(iso_mount_dir, 'upgrades')
|
|
release_metadata_file_list = utils.find_file_by_regex(
|
|
abs_meta_file_dir, r'^([a-zA-Z]+)-([\d.]+)-metadata\.xml$')
|
|
if len(release_metadata_file_list) == 0:
|
|
raise SoftwareServiceError("No release metadata file found in %s" % abs_meta_file_dir)
|
|
release_metadata_file = release_metadata_file_list[0]
|
|
abs_stx_release_metadata_file = os.path.join(
|
|
iso_mount_dir, 'upgrades', release_metadata_file)
|
|
all_release_meta_info = parse_release_metadata(abs_stx_release_metadata_file)
|
|
return {
|
|
os.path.basename(upgrade_files[constants.ISO_EXTENSION]): {
|
|
"id": all_release_meta_info.get("id"),
|
|
"sw_release": all_release_meta_info.get("sw_version"),
|
|
},
|
|
os.path.basename(upgrade_files[constants.SIG_EXTENSION]): {
|
|
"id": None,
|
|
"sw_release": None,
|
|
}
|
|
}
|
|
|
|
def _clean_up_load_import(
|
|
self, iso_mount_dir, to_release, iso_file, is_import_completed, is_max_rel_exceeded):
|
|
"""
|
|
Clean up load and import
|
|
:param iso_mount_dir: ISO mount directory
|
|
:param to_release: To release
|
|
:param iso_file: ISO file
|
|
:param is_import_completed: Is import completed
|
|
:param is_max_rel_exceeded: Is max release exceeded
|
|
"""
|
|
# Unmount the iso file
|
|
if iso_mount_dir:
|
|
unmount_iso_load(iso_mount_dir)
|
|
LOG.info("Unmounted iso file %s", iso_file)
|
|
|
|
# remove upload leftover in case of failure
|
|
if to_release and not is_import_completed and not is_max_rel_exceeded:
|
|
to_release_dir = os.path.join(constants.SOFTWARE_STORAGE_DIR, "rel-%s" % to_release)
|
|
shutil.rmtree(to_release_dir, ignore_errors=True)
|
|
|
|
def _clean_up_inactive_load_import(self, release_version):
|
|
"""
|
|
Clean up inactive load and import
|
|
:param release_version: Release version
|
|
"""
|
|
dirs_to_remove = [
|
|
f"{constants.DC_VAULT_PLAYBOOK_DIR}/{release_version}",
|
|
f"{constants.DC_VAULT_LOADS_DIR}/{release_version}"
|
|
]
|
|
|
|
for dir_path in dirs_to_remove:
|
|
if os.path.exists(dir_path):
|
|
shutil.rmtree(dir_path, ignore_errors=True)
|
|
LOG.info("Removed %s", dir_path)
|
|
|
|
# TODO(ShawnLi): the code below is to only clean up those files that were created in usm_laod_import script
|
|
# delete 22.12 iso metadata in /opt/software/metadata/unavailable
|
|
# delete 22.12 patches in /opt/software/metadata/committed
|
|
file_patterns = [
|
|
(states.UNAVAILABLE_DIR, fr'^([a-zA-Z]+)-({release_version})-metadata\.xml$'),
|
|
(states.COMMITTED_DIR, fr'^([a-zA-Z]+)_({release_version})_PATCH_([0-9]+)-metadata\.xml$')
|
|
]
|
|
|
|
# Remove files matching patterns
|
|
for directory, pattern in file_patterns:
|
|
matched_file_names = utils.find_file_by_regex(directory, pattern)
|
|
for filename in matched_file_names:
|
|
abs_filename = os.path.join(directory, filename)
|
|
try:
|
|
os.remove(abs_filename)
|
|
LOG.info("Removed: %s", abs_filename)
|
|
except OSError:
|
|
LOG.warning("Failed to remove: %s", abs_filename)
|
|
|
|
def _process_upload_upgrade_files(
|
|
self, from_release, to_release, iso_mount_dir, supported_from_releases, upgrade_files):
|
|
"""
|
|
Process the uploaded upgrade files
|
|
:param from_release: From release
|
|
:param to_release: To release
|
|
:param iso_mount_dir: ISO mount directory
|
|
:param supported_from_releases: List of supported releases
|
|
:param upgrade_files: dict of upgrade files
|
|
:return: info, warning, error messages, dict of release metadata info
|
|
"""
|
|
|
|
# validate this major release upload
|
|
self.major_release_upload_check()
|
|
|
|
try:
|
|
# Validate that the current release is supported to upgrade to the new release
|
|
supported_versions = [v.get("version") for v in supported_from_releases]
|
|
if SW_VERSION not in supported_versions:
|
|
raise UpgradeNotSupported("Current release %s not supported to upgrade to %s"
|
|
% (SW_VERSION, to_release))
|
|
# Run major-release-upload script
|
|
LOG.info("Starting load import from %s", upgrade_files[constants.ISO_EXTENSION])
|
|
return self._run_load_import(from_release, to_release, iso_mount_dir, upgrade_files)
|
|
except Exception as e:
|
|
LOG.exception("Error occurred while processing upload upgrade files: %s", str(e))
|
|
raise
|
|
|
|
def _process_inactive_upgrade_files(
|
|
self, from_release, to_release, iso_mount_dir, upgrade_files):
|
|
"""
|
|
Process the uploaded inactive upgrade files, aka N-1 release
|
|
:param from_release: From release
|
|
:param to_release: To release
|
|
:param iso_mount_dir: ISO mount directory
|
|
:param upgrade_files: dict of upgrade files
|
|
:return: info, warning, error messages, dict of release metadata info
|
|
"""
|
|
|
|
# validate this major release upload
|
|
self.major_release_upload_check()
|
|
|
|
to_release_maj_ver = utils.get_major_release_version(to_release)
|
|
|
|
try:
|
|
|
|
# Validate the N-1 release from the iso file is supported to upgrade to the current N release
|
|
current_upgrade_supported_versions = read_upgrade_support_versions(
|
|
"/usr/rootdirs/opt/")
|
|
supported_versions = [v.get("version") for v in current_upgrade_supported_versions]
|
|
|
|
# to_release is N-1 release in here
|
|
if to_release_maj_ver not in supported_versions:
|
|
raise UpgradeNotSupported(
|
|
"ISO file release version %s not supported to upgrade to %s" %
|
|
(to_release_maj_ver, SW_VERSION))
|
|
|
|
# iso validation completed
|
|
LOG.info("Starting load import from %s", upgrade_files[constants.ISO_EXTENSION])
|
|
|
|
# from_release is set to None when uploading N-1 load
|
|
return self._run_load_import(from_release, to_release, iso_mount_dir, upgrade_files)
|
|
|
|
except Exception as e:
|
|
LOG.exception("Error occurred while processing inactive upgrade files: %s", str(e))
|
|
raise
|
|
|
|
def _checkout_commit_to_dc_vault_playbook_dir(self, release_version):
|
|
"""
|
|
Checkout commit to dc-vault playbook dir
|
|
:param release_version: release version
|
|
:return: None
|
|
"""
|
|
dc_vault_playbook_dir = f"{constants.DC_VAULT_PLAYBOOK_DIR}/{release_version}"
|
|
|
|
os.makedirs(dc_vault_playbook_dir, exist_ok=True)
|
|
ostree_repo = os.path.join(constants.FEED_DIR,
|
|
"rel-%s/ostree_repo" % release_version)
|
|
|
|
try:
|
|
latest_commit = ostree_utils.get_feed_latest_commit(release_version)
|
|
LOG.info("Getting latest commit for %s: %s", release_version, latest_commit)
|
|
except OSTreeCommandFail as e:
|
|
LOG.exception("Error occurred while getting latest commit for %s: %s",
|
|
release_version, str(e))
|
|
raise
|
|
|
|
try:
|
|
LOG.info("Checking out commit %s to %s", latest_commit, dc_vault_playbook_dir)
|
|
ostree_utils.checkout_commit_to_dir(
|
|
ostree_repo, latest_commit, dc_vault_playbook_dir, sub_path=constants.PLAYBOOKS_PATH)
|
|
except Exception:
|
|
if os.path.exists(dc_vault_playbook_dir):
|
|
shutil.rmtree(dc_vault_playbook_dir)
|
|
raise
|
|
|
|
def _process_upload_patch_files(self, patch_files):
|
|
"""
|
|
Process the uploaded patch files
|
|
:param patch_files: list of patch files
|
|
:return: info, warning, error messages
|
|
"""
|
|
|
|
local_info = ""
|
|
local_warning = ""
|
|
local_error = ""
|
|
upload_patch_info = []
|
|
try:
|
|
# Create the directories
|
|
for state_dir in states.DEPLOY_STATE_METADATA_DIR:
|
|
os.makedirs(state_dir, exist_ok=True)
|
|
except os.error:
|
|
msg = "Failed to create directories"
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
for patch_file in patch_files:
|
|
|
|
base_patch_filename = os.path.basename(patch_file)
|
|
|
|
# Get the release_id from the patch's metadata
|
|
# and check to see if it's already uploaded
|
|
release_id = get_release_from_patch(patch_file, 'id')
|
|
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
patch_id = None
|
|
thispatch = None
|
|
|
|
try:
|
|
if release:
|
|
if release.state == states.COMMITTED:
|
|
msg = "%s is committed. Metadata not updated" % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
elif release.state != states.AVAILABLE:
|
|
msg = "%s is not currently in available state to be deployed." % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
else:
|
|
# todo(abailey) PatchFile / extract_patch should be renamed
|
|
patch_id, thispatch, error_msg = PatchFile.extract_patch(
|
|
patch_file,
|
|
metadata_dir=states.AVAILABLE_DIR,
|
|
metadata_only=True,
|
|
existing_content=release.contents,
|
|
base_pkgdata=self.base_pkgdata)
|
|
|
|
if error_msg:
|
|
raise ReleaseValidationFailure(error=error_msg)
|
|
|
|
PatchFile.unpack_patch(patch_file)
|
|
reload_release_data()
|
|
msg = "%s is already uploaded. Updated metadata only" % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
else:
|
|
patch_id, thispatch, error_msg = PatchFile.extract_patch(
|
|
patch_file,
|
|
metadata_dir=states.AVAILABLE_DIR,
|
|
base_pkgdata=self.base_pkgdata)
|
|
|
|
if error_msg:
|
|
raise ReleaseValidationFailure(error=error_msg)
|
|
|
|
PatchFile.unpack_patch(patch_file)
|
|
local_info += "%s is now uploaded\n" % release_id
|
|
reload_release_data()
|
|
|
|
# NOTE(bqian) Below check an exception raise should be revisit,
|
|
# if applicable, should be applied to the beginning of all requests.
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
except Exception as e:
|
|
msg = "Failed to upload release %s" % release_id
|
|
LOG.exception("%s: %s" % (msg, e))
|
|
local_error += msg + "\n"
|
|
|
|
if patch_id and thispatch:
|
|
PatchFile.delete_extracted_patch(patch_id, thispatch)
|
|
|
|
try:
|
|
release_sw_version = thispatch.metadata[patch_id]["sw_version"]
|
|
pkg_feed_dir = "%s/rel-%s" % (constants.PACKAGE_FEED_DIR, release_sw_version)
|
|
apt_utils.component_remove(pkg_feed_dir, release_sw_version)
|
|
except Exception:
|
|
LOG.info("Could not delete apt-ostree component, does not exist")
|
|
continue
|
|
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release:
|
|
upload_patch_info.append({
|
|
base_patch_filename: {
|
|
"id": release_id,
|
|
"sw_release": release.sw_release, # MM.mm.pp release version
|
|
}
|
|
})
|
|
|
|
# create versioned precheck for uploaded patches
|
|
for patch in upload_patch_info:
|
|
filename, values = list(patch.items())[0]
|
|
LOG.info("Creating precheck for release %s..." % values.get("id"))
|
|
for pf in patch_files:
|
|
if filename in pf:
|
|
patch_file = pf
|
|
|
|
sw_release = values.get("sw_release")
|
|
|
|
required_patches = []
|
|
for dep_id in self.release_collection.get_release_by_id(values.get("id")).requires_release_ids:
|
|
required_patches.append(version.parse(dep_id))
|
|
|
|
# sort the required patches list and get the latest, if available
|
|
req_patch_version = None
|
|
if len(required_patches) > 0:
|
|
req_patch = str(sorted(required_patches)[-1])
|
|
_, req_patch_version, _, _ = utils.get_component_and_versions(req_patch)
|
|
if self.release_collection.get_release_by_id(req_patch) is None:
|
|
LOG.warning("Required patch '%s' is not uploaded." % req_patch)
|
|
|
|
PatchFile.create_versioned_precheck(patch_file, sw_release, req_patch_version=req_patch_version)
|
|
|
|
return local_info, local_warning, local_error, upload_patch_info
|
|
|
|
def software_release_upload(self, release_files):
|
|
"""
|
|
Upload software release files
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
upload_info = []
|
|
is_importing_inactive_load = False
|
|
|
|
# Refresh data, if needed
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
msg = "Uploading files: %s" % ",".join(release_files)
|
|
audit_log_info(msg)
|
|
|
|
# We now need to put the files in the category (patch or upgrade)
|
|
patch_files = []
|
|
upgrade_files = {}
|
|
|
|
for uploaded_file in release_files:
|
|
(_, ext) = os.path.splitext(uploaded_file)
|
|
if ext in [constants.PATCH_EXTENSION]:
|
|
patch_files.append(uploaded_file)
|
|
elif ext == constants.ISO_EXTENSION:
|
|
upgrade_files[constants.ISO_EXTENSION] = uploaded_file
|
|
elif ext == constants.SIG_EXTENSION:
|
|
upgrade_files[constants.SIG_EXTENSION] = uploaded_file
|
|
else:
|
|
msg = "The file extension is not supported. Supported extensions include .patch, .iso and .sig"
|
|
LOG.exception(msg)
|
|
raise ReleaseValidationFailure(error=msg)
|
|
|
|
if len(upgrade_files) == 1: # Only one upgrade file uploaded
|
|
msg = "Missing upgrade file or signature file"
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
elif upgrade_files.get(constants.ISO_EXTENSION, None) and self.hostname != constants.CONTROLLER_0_HOSTNAME:
|
|
raise SoftwareServiceError("Upload can only be performed on controller-0.")
|
|
elif len(upgrade_files) == 2: # Two upgrade files uploaded
|
|
tmp_info = ""
|
|
tmp_error = ""
|
|
tmp_warning = ""
|
|
tmp_release_meta_info = {}
|
|
is_import_completed = True
|
|
is_max_rel_exceeded = False
|
|
|
|
iso = upgrade_files[constants.ISO_EXTENSION]
|
|
sig = upgrade_files[constants.SIG_EXTENSION]
|
|
|
|
if not verify_files([iso], sig):
|
|
msg = "Software %s:%s signature validation failed" % (iso, sig)
|
|
raise ReleaseValidationFailure(error=msg)
|
|
|
|
LOG.info("iso and signature files upload completed.")
|
|
|
|
try:
|
|
# Mount the iso file after signature verification
|
|
iso_mount_dir = mount_iso_load(iso, constants.TMP_DIR)
|
|
LOG.info("Mounted iso file %s to %s", iso, iso_mount_dir)
|
|
|
|
# Read the metadata from the iso file to get to-release and supported-from-releases
|
|
supported_from_releases = read_upgrade_support_versions(iso_mount_dir)
|
|
to_release = get_to_release_from_metadata_file(iso_mount_dir)
|
|
to_release_maj_ver = utils.get_major_release_version(to_release)
|
|
LOG.info("Reading metadata from iso file %s completed. \nto_release: %s", iso, to_release_maj_ver)
|
|
|
|
# Same release is uploaded, return the metadata info from the iso file
|
|
if to_release_maj_ver == SW_VERSION:
|
|
tmp_info = f"Uploaded release {to_release} is the same as current release on the controller"
|
|
tmp_release_meta_info = self.get_release_meta_info(iso_mount_dir, upgrade_files)
|
|
elif to_release > SW_VERSION:
|
|
# N + 1 release is uploaded, process it regardless
|
|
tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_upload_upgrade_files(
|
|
SW_VERSION, to_release, iso_mount_dir, supported_from_releases, upgrade_files)
|
|
elif to_release < SW_VERSION and is_system_controller():
|
|
# N - 1 release is uploaded, process it only if the region is system controller
|
|
is_importing_inactive_load = True
|
|
tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_inactive_upgrade_files(
|
|
None, to_release, iso_mount_dir, upgrade_files)
|
|
# Checkout commit to dc-vault/playbooks directory
|
|
self._checkout_commit_to_dc_vault_playbook_dir(to_release_maj_ver)
|
|
except MaxReleaseExceeded:
|
|
is_max_rel_exceeded = True
|
|
raise
|
|
except Exception as e:
|
|
LOG.error("Error occurred while processing software release upload: %s", str(e))
|
|
is_import_completed = False
|
|
raise
|
|
finally:
|
|
self._clean_up_load_import(iso_mount_dir, to_release, iso,
|
|
is_import_completed, is_max_rel_exceeded)
|
|
if is_importing_inactive_load and not is_import_completed:
|
|
self._clean_up_inactive_load_import(to_release)
|
|
|
|
msg_info += tmp_info
|
|
msg_warning += tmp_warning
|
|
msg_error += tmp_error
|
|
upload_info.append(tmp_release_meta_info)
|
|
|
|
if len(patch_files) > 0:
|
|
tmp_info, tmp_warning, tmp_error, tmp_patch_meta_info = self._process_upload_patch_files(
|
|
patch_files)
|
|
msg_info += tmp_info
|
|
msg_warning += tmp_warning
|
|
msg_error += tmp_error
|
|
upload_info += tmp_patch_meta_info
|
|
|
|
reload_release_data()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, upload_info=upload_info)
|
|
|
|
def release_apply_order(self, release_id, running_release_sw_version):
|
|
"""
|
|
Determines the order of releases for applying.
|
|
:param release_id: The appliyng release id
|
|
:param running_release_sw_version: The running release major version
|
|
:return: List of releases in the order for applying
|
|
"""
|
|
|
|
deployed_releases_id = []
|
|
preinstalled_patches = []
|
|
for rel in self.release_collection.iterate_releases():
|
|
if rel.state == states.DEPLOYED:
|
|
deployed_releases_id.append(rel.id)
|
|
|
|
if rel.prepatched_iso:
|
|
preinstalled_patches = rel.preinstalled_patches
|
|
|
|
release_dependencies = self.get_release_dependency_list(release_id, preinstalled_patches)
|
|
release_dependencies.append(release_id)
|
|
|
|
# filter release_dependencies to include only releases
|
|
# that matches the major running release version
|
|
# and remove all releases already deployed, including prepatched
|
|
to_apply_releases = [
|
|
rel_id for rel_id in release_dependencies
|
|
if f"-{running_release_sw_version}." in rel_id and
|
|
rel_id not in deployed_releases_id + preinstalled_patches
|
|
]
|
|
|
|
to_apply_releases.sort()
|
|
return to_apply_releases
|
|
|
|
def release_remove_order(self, target_release_id, running_release_id, running_release_sw_version):
|
|
"""
|
|
Determines the order of releases for removing based on the feed commit order.
|
|
:param target_release_id: The target release id
|
|
:param running_release_id: The running release id
|
|
:param running_release_sw_version: The running release major version
|
|
:return: List of releases in the order for removing
|
|
"""
|
|
|
|
# if removing release is not from the major running version, cannot remove it
|
|
if f"-{running_release_sw_version}." not in target_release_id:
|
|
return []
|
|
|
|
releases = list(self.release_collection.iterate_releases_by_state(states.DEPLOYED))
|
|
release_map = {release.id: release for release in releases}
|
|
|
|
to_remove_releases = []
|
|
current = running_release_id
|
|
|
|
while current != target_release_id:
|
|
to_remove_releases.append(current)
|
|
current_release = release_map.get(current)
|
|
if not current_release:
|
|
error = f"Release {current} not found in releases map"
|
|
raise SoftwareServiceError(error=error)
|
|
|
|
next_release = next((r for r in releases if r.commit_id == current_release.base_commit_id), None)
|
|
if not next_release:
|
|
error = f"Release with commit id {current_release.base_commit_id} not found"
|
|
raise SoftwareServiceError(error=error)
|
|
|
|
current = next_release.id
|
|
|
|
return to_remove_releases
|
|
|
|
def reset_feed_commit(self, release):
|
|
commit_id = release.commit_id
|
|
if commit_id is None:
|
|
LOG.warning("Unable to find the commit id in metadata")
|
|
return
|
|
|
|
LOG.info("Reset feed to commit %s" % commit_id)
|
|
|
|
try:
|
|
feed_ostree_dir = "%s/rel-%s/ostree_repo" % \
|
|
(constants.FEED_OSTREE_BASE_DIR, release.sw_version)
|
|
|
|
apt_utils.run_rollback(feed_ostree_dir, commit_id)
|
|
self.latest_feed_commit = commit_id
|
|
except APTOSTreeCommandFail:
|
|
msg = "Failure when reseting commit %s" % commit_id
|
|
LOG.exception(msg)
|
|
raise APTOSTreeCommandFail(msg)
|
|
|
|
def software_release_delete_api(self, release_ids):
|
|
"""
|
|
Delete release(s)
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
# Protect against duplications
|
|
full_list = sorted(list(set(release_ids)))
|
|
|
|
not_founds = []
|
|
cannot_del = []
|
|
used_by_subcloud = []
|
|
release_list = []
|
|
for rel_id in full_list:
|
|
rel = self.release_collection.get_release_by_id(rel_id)
|
|
if rel is None:
|
|
not_founds.append(rel_id)
|
|
else:
|
|
if not rel.is_deletable:
|
|
cannot_del.append(rel_id)
|
|
elif rel.is_ga_release and is_system_controller():
|
|
subcloud_by_sw_version = get_subcloud_groupby_version()
|
|
if rel.sw_version in subcloud_by_sw_version:
|
|
used_by_subcloud.append(rel_id)
|
|
else:
|
|
release_list.append(rel_id)
|
|
else:
|
|
release_list.append(rel_id)
|
|
|
|
err_msg = ""
|
|
if not_founds:
|
|
list_str = ','.join(not_founds)
|
|
err_msg = f"Release{'' if len(not_founds) == 1 else 's'} {list_str} can not be found\n"
|
|
|
|
if cannot_del:
|
|
list_str = ','.join(cannot_del)
|
|
err_msg += (f"Release{'' if len(cannot_del) == 1 else 's'} {list_str} "
|
|
f"{'is' if len(cannot_del) == 1 else 'are'} not ready to be deleted\n")
|
|
|
|
if used_by_subcloud:
|
|
list_str = ','.join(used_by_subcloud)
|
|
err_msg += f"Release{'' if len(used_by_subcloud) == 1 else 's'} {list_str} still used by subcloud(s)"
|
|
|
|
if len(err_msg) > 0:
|
|
raise SoftwareServiceError(error=err_msg)
|
|
|
|
msg = "Deleting releases: %s" % ",".join(release_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# Handle operation
|
|
for release_id in release_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
release_sw_version = release.sw_version
|
|
|
|
# Delete ostree content if it exists.
|
|
# RPM based patches (from upgrades) will not have ostree contents
|
|
ostree_tar_filename = self.get_ostree_tar_filename(release_sw_version, release_id)
|
|
if os.path.isfile(ostree_tar_filename):
|
|
try:
|
|
os.remove(ostree_tar_filename)
|
|
except OSError:
|
|
msg = "Failed to remove ostree tarball %s" % ostree_tar_filename
|
|
LOG.exception(msg)
|
|
raise OSTreeTarFail(msg)
|
|
is_major_release = ReleaseState(release_ids=[release.id]).is_major_release_deployment()
|
|
if not is_major_release:
|
|
package_repo_dir = "%s/rel-%s" % (constants.PACKAGE_FEED_DIR, release_sw_version)
|
|
apt_utils.component_remove(package_repo_dir, release.sw_release)
|
|
|
|
# Delete upgrade iso file in folder
|
|
# TODO(heitormatsui): treat the prepatched iso scenario
|
|
metadata_file = "%s-metadata.xml" % release_id
|
|
delete_feed = False
|
|
to_release_iso_dir = os.path.join(constants.FEED_OSTREE_BASE_DIR, ("rel-%s" % release_sw_version))
|
|
|
|
if os.path.isdir(to_release_iso_dir):
|
|
# check if the release being deleted is related to this feed
|
|
if os.path.isfile("%s/upgrades/%s" % (to_release_iso_dir, metadata_file)):
|
|
delete_feed = True
|
|
if delete_feed:
|
|
try:
|
|
shutil.rmtree(to_release_iso_dir)
|
|
except OSError:
|
|
msg = "Failed to remove release iso %s folder" % to_release_iso_dir
|
|
LOG.exception(msg)
|
|
raise ReleaseIsoDeleteFailure(msg)
|
|
msg = "Deleted feed directory %s" % to_release_iso_dir
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
|
|
# TODO(lbonatti): treat the upcoming versioning changes
|
|
PatchFile.delete_versioned_directory(release.sw_release)
|
|
|
|
# Delete N-1 load on system controller
|
|
if is_system_controller():
|
|
self._clean_up_inactive_load_import(release_sw_version)
|
|
|
|
try:
|
|
# Delete the metadata
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[release.state]
|
|
os.remove("%s/%s" % (metadata_dir, metadata_file))
|
|
except OSError:
|
|
# When deleting the load from a system controller, the unavailable
|
|
# and commited directories are cleaned up and, if the metadata file
|
|
# is located in one of those, it will result in a exception since
|
|
# it would have been already removed by the
|
|
# _clean_up_inactive_load_import method
|
|
if (
|
|
is_system_controller() and
|
|
(
|
|
metadata_dir == states.UNAVAILABLE_DIR or
|
|
metadata_dir == states.COMMITTED_DIR
|
|
)
|
|
):
|
|
msg = (
|
|
f"Metadata file already removed: {metadata_dir}/{metadata_file}"
|
|
)
|
|
LOG.warning(msg)
|
|
else:
|
|
msg = "Failed to remove metadata for %s" % release_id
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
self.delete_start_install_script(release_id)
|
|
self.delete_patch_activate_scripts(release_id)
|
|
reload_release_data()
|
|
msg = "%s has been deleted" % release_id
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
|
|
# Refresh data, if needed
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def in_sync_controller_api(self):
|
|
"""
|
|
Check if both controllers are in sync
|
|
by checking the database JSON file
|
|
"""
|
|
is_in_sync = is_deploy_state_in_sync()
|
|
return {"in_sync": is_in_sync}
|
|
|
|
def patch_init_release_api(self, release_id):
|
|
"""
|
|
Create an empty repo for a new release_id
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
msg = "Initializing repo for: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
if release_id == SW_VERSION:
|
|
msg = "Rejected: Requested release %s is running release" % release_id
|
|
msg_error += msg + "\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# Refresh data
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
reload_release_data()
|
|
|
|
repo_dir[release_id] = "%s/rel-%s" % (repo_root_dir, release_id)
|
|
|
|
# Verify the release doesn't already exist
|
|
if os.path.exists(repo_dir[release_id]):
|
|
msg = "Patch repository for %s already exists" % release_id
|
|
msg_info += msg + "\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# Create the repo
|
|
try:
|
|
# todo(jcasteli) determine if ostree change needs a createrepo equivalent
|
|
output = "UNDER CONSTRUCTION for OSTREE"
|
|
LOG.info("Repo[%s] updated:\n%s", release_id, output)
|
|
except Exception:
|
|
msg = "Failed to update the repo for %s" % release_id
|
|
LOG.exception(msg)
|
|
|
|
# Wipe out what was created
|
|
shutil.rmtree(repo_dir[release_id])
|
|
del repo_dir[release_id]
|
|
|
|
raise SoftwareFail(msg)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def patch_query_what_requires(self, patch_ids):
|
|
"""
|
|
Query the known patches to see which have dependencies on the specified patches
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
msg = "Querying what requires patches: %s" % ",".join(patch_ids)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# First, verify that all specified patches exist
|
|
id_verification = True
|
|
for patch_id in patch_ids:
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
msg = "Patch %s does not exist" % patch_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
id_verification = False
|
|
|
|
if not id_verification:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
required_patches = {}
|
|
for release in self.release_collection.iterate_releases():
|
|
for req_patch in release.requires_release_ids:
|
|
if req_patch not in patch_ids:
|
|
continue
|
|
|
|
if req_patch not in required_patches:
|
|
required_patches[req_patch] = []
|
|
|
|
required_patches[req_patch].append(release.id)
|
|
|
|
for patch_id in patch_ids:
|
|
if patch_id in required_patches:
|
|
iter_patch_list = required_patches[patch_id]
|
|
msg_info += "%s is required by: %s\n" % (patch_id, ", ".join(sorted(iter_patch_list)))
|
|
else:
|
|
msg_info += "%s is not required by any patches.\n" % patch_id
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def send_latest_feed_commit_to_agent(self):
|
|
"""
|
|
Notify the patch agent that the latest commit on the feed
|
|
repo has been updated
|
|
"""
|
|
# Skip sending messages if host not yet provisioned
|
|
if self.sock_out is None:
|
|
LOG.info("Skipping send feed commit to agent")
|
|
return
|
|
|
|
send_commit_to_agent = PatchMessageSendLatestFeedCommit()
|
|
self.socket_lock.acquire()
|
|
send_commit_to_agent.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
def software_sync(self):
|
|
# Increment the software_op_counter here
|
|
self.inc_patch_op_counter()
|
|
|
|
if self.sock_out is None or self.install_local:
|
|
return True
|
|
|
|
# Send the sync requests
|
|
|
|
self.controller_neighbours_lock.acquire()
|
|
for n in self.controller_neighbours:
|
|
self.controller_neighbours[n].clear_synced()
|
|
self.controller_neighbours_lock.release()
|
|
|
|
msg = PatchMessageSyncReq()
|
|
self.socket_lock.acquire()
|
|
msg.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
# Now we wait, up to two mins. future enhancement: Wait on a condition
|
|
my_ip = cfg.get_mgmt_ip()
|
|
sync_rc = False
|
|
max_time = time.time() + 120
|
|
while time.time() < max_time:
|
|
all_done = True
|
|
self.controller_neighbours_lock.acquire()
|
|
for n in self.controller_neighbours:
|
|
if n != my_ip and not self.controller_neighbours[n].get_synced():
|
|
all_done = False
|
|
self.controller_neighbours_lock.release()
|
|
|
|
if all_done:
|
|
LOG.info("Sync complete")
|
|
sync_rc = True
|
|
break
|
|
|
|
time.sleep(0.5)
|
|
|
|
# Send hellos to the hosts now, to get queries performed
|
|
hello_agent = PatchMessageHelloAgent()
|
|
self.socket_lock.acquire()
|
|
hello_agent.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
if not sync_rc:
|
|
LOG.info("Timed out waiting for sync completion")
|
|
return sync_rc
|
|
|
|
def software_release_query_cached(self, **kwargs):
|
|
query_state = None
|
|
if "show" in kwargs:
|
|
valid_query_states = [
|
|
states.AVAILABLE,
|
|
states.UNAVAILABLE,
|
|
states.DEPLOYED,
|
|
states.REMOVING,
|
|
states.COMMITTED,
|
|
states.DEPLOYING
|
|
]
|
|
if kwargs["show"] in valid_query_states:
|
|
query_state = kwargs["show"]
|
|
|
|
query_release = None
|
|
if "release" in kwargs:
|
|
query_release = kwargs["release"]
|
|
|
|
results = []
|
|
|
|
def filter_by_version():
|
|
for r in self.release_collection.iterate_releases():
|
|
if r.sw_version in query_release:
|
|
yield r
|
|
|
|
def filter_by_state():
|
|
for rel in self.release_collection.iterate_releases_by_state(query_state):
|
|
yield rel
|
|
|
|
if query_state is not None:
|
|
iterator = filter_by_state
|
|
elif query_release is not None:
|
|
iterator = filter_by_version
|
|
else:
|
|
iterator = self.release_collection.iterate_releases
|
|
|
|
for i in iterator():
|
|
data = i.to_query_dict()
|
|
results.append(data)
|
|
|
|
return results
|
|
|
|
def software_release_query_specific_cached(self, release_ids):
|
|
LOG.info("software release show")
|
|
|
|
results = []
|
|
|
|
for release_id in release_ids:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is not None:
|
|
results.append(release.to_query_dict())
|
|
|
|
return results
|
|
|
|
def get_dependencies(self, patch_ids, recursive):
|
|
dependencies = set()
|
|
patch_added = False
|
|
|
|
# Add patches to workset
|
|
for patch_id in sorted(patch_ids):
|
|
dependencies.add(patch_id)
|
|
patch_added = True
|
|
|
|
while patch_added:
|
|
patch_added = False
|
|
for patch_id in sorted(dependencies):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
for req in release.requires:
|
|
if req not in dependencies:
|
|
dependencies.add(req)
|
|
patch_added = recursive
|
|
|
|
return sorted(dependencies)
|
|
|
|
def patch_query_dependencies(self, patch_ids, **kwargs):
|
|
msg = "Patch query-dependencies %s" % patch_ids
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
failure = False
|
|
|
|
results = {"patches": [],
|
|
"error": ""}
|
|
|
|
recursive = False
|
|
if kwargs.get("recursive") == "yes":
|
|
recursive = True
|
|
|
|
# Verify patch IDs
|
|
for patch_id in sorted(patch_ids):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
errormsg = "%s is unrecognized\n" % patch_id
|
|
LOG.info("patch_query_dependencies: %s", errormsg)
|
|
results["error"] += errormsg
|
|
failure = True
|
|
|
|
if failure:
|
|
LOG.info("patch_query_dependencies failed")
|
|
return results
|
|
|
|
results["patches"] = self.get_dependencies(patch_ids, recursive)
|
|
|
|
return results
|
|
|
|
def patch_commit(self, patch_ids, dry_run=False):
|
|
msg = "Patch commit %s" % patch_ids
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
try:
|
|
if not os.path.exists(states.COMMITTED_DIR):
|
|
os.makedirs(states.COMMITTED_DIR)
|
|
except os.error:
|
|
msg = "Failed to create %s" % states.COMMITTED_DIR
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
failure = False
|
|
recursive = True
|
|
cleanup_files = set()
|
|
results = {"info": "",
|
|
"error": ""}
|
|
|
|
# Ensure there are only REL patches
|
|
non_rel_list = []
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.status != constants.STATUS_RELEASED:
|
|
non_rel_list.append(release.id)
|
|
|
|
if len(non_rel_list) > 0:
|
|
errormsg = "A commit cannot be performed with non-REL status patches in the system:\n"
|
|
for patch_id in non_rel_list:
|
|
errormsg += " %s\n" % patch_id
|
|
LOG.info("patch_commit rejected: %s", errormsg)
|
|
results["error"] += errormsg
|
|
return results
|
|
|
|
# Verify Release IDs
|
|
for patch_id in sorted(patch_ids):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
errormsg = "%s is unrecognized\n" % patch_id
|
|
LOG.info("patch_commit: %s", errormsg)
|
|
results["error"] += errormsg
|
|
failure = True
|
|
|
|
if failure:
|
|
LOG.info("patch_commit: Failed patch ID check")
|
|
return results
|
|
|
|
commit_list = self.get_dependencies(patch_ids, recursive)
|
|
|
|
# Check patch states
|
|
avail_list = []
|
|
for patch_id in commit_list:
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release.state not in [states.DEPLOYED, states.COMMITTED]:
|
|
avail_list.append(patch_id)
|
|
|
|
if len(avail_list) > 0:
|
|
errormsg = "The following patches are not applied and cannot be committed:\n"
|
|
for patch_id in avail_list:
|
|
errormsg += " %s\n" % patch_id
|
|
LOG.info("patch_commit rejected: %s", errormsg)
|
|
results["error"] += errormsg
|
|
return results
|
|
|
|
# TODO(ShawnLi): Comment out for 24.09 release. This is gated to 25.03
|
|
# NOTE(lviera): Must include start scripts, refactor like self.delete_start_install_script(patch_id)
|
|
# for patch_id in commit_list:
|
|
# # Fetch file paths that need to be cleaned up to
|
|
# # free patch storage disk space
|
|
# pre_install_filename = self.release_data.metadata[patch_id].get("pre_install")
|
|
# post_install_filename = self.release_data.metadata[patch_id].get("post_install")
|
|
|
|
# if pre_install_filename:
|
|
# pre_install_script_path = "%s/%s_%s" % (root_scripts_dir, patch_id, pre_install_filename)
|
|
# post_install_script_path = "%s/%s_%s" % (root_scripts_dir, patch_id, post_install_filename)
|
|
# if os.path.exists(pre_install_script_path):
|
|
# cleanup_files.add(pre_install_script_path)
|
|
# if os.path.exists(post_install_script_path):
|
|
# cleanup_files.add(post_install_script_path)
|
|
|
|
# patch_sw_version = utils.get_major_release_version(
|
|
# self.release_data.metadata[patch_id]["sw_version"])
|
|
# abs_ostree_tar_dir = package_dir[patch_sw_version]
|
|
# software_tar_path = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id)
|
|
# if os.path.exists(software_tar_path):
|
|
# cleanup_files.add(software_tar_path)
|
|
|
|
# Calculate disk space
|
|
disk_space = 0
|
|
for file in cleanup_files:
|
|
statinfo = os.stat(file)
|
|
disk_space += statinfo.st_size
|
|
|
|
if dry_run:
|
|
results["info"] = "This commit operation would free %0.2f MiB" % (disk_space / (1024.0 * 1024.0))
|
|
return results
|
|
|
|
# Do the commit
|
|
|
|
# Move the metadata to the committed dir
|
|
for patch_id in commit_list:
|
|
metadata_fname = "%s-metadata.xml" % patch_id
|
|
deployed_fname = os.path.join(states.DEPLOYED_DIR, metadata_fname)
|
|
committed_fname = os.path.join(states.COMMITTED_DIR, metadata_fname)
|
|
if os.path.exists(deployed_fname):
|
|
try:
|
|
shutil.move(deployed_fname, committed_fname)
|
|
except shutil.Error:
|
|
msg = "Failed to move the metadata for %s" % patch_id
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
# Delete the files
|
|
for file in cleanup_files:
|
|
try:
|
|
os.remove(file)
|
|
except OSError:
|
|
msg = "Failed to remove: %s" % file
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
reload_release_data()
|
|
|
|
results["info"] = "The releases have been committed."
|
|
return results
|
|
|
|
def query_host_cache(self):
|
|
output = []
|
|
|
|
self.hosts_lock.acquire()
|
|
for nbr in list(self.hosts):
|
|
host = self.hosts[nbr].get_dict()
|
|
host["interim_state"] = False
|
|
for patch_id in list(sc.interim_state):
|
|
if nbr in sc.interim_state[patch_id]:
|
|
host["interim_state"] = True
|
|
|
|
output.append(host)
|
|
|
|
self.hosts_lock.release()
|
|
|
|
return output
|
|
|
|
def any_patch_host_installing(self):
|
|
rc = False
|
|
|
|
with self.hosts_lock:
|
|
for host in self.hosts.values():
|
|
if host.state == constants.PATCH_AGENT_STATE_INSTALLING:
|
|
rc = True
|
|
break
|
|
return rc
|
|
|
|
def copy_install_scripts(self):
|
|
applying_states = [states.DEPLOYING, states.REMOVING]
|
|
for release in self.release_collection.iterate_releases():
|
|
pre_install = release.pre_install
|
|
post_install = release.post_install
|
|
folder = ["preinstall", "postinstall"]
|
|
if release.state in applying_states:
|
|
try:
|
|
for i, file in enumerate([pre_install, post_install]):
|
|
if file:
|
|
full_name_file = "%s_%s" % (release.id, file)
|
|
script_path = "%s/%s" % (root_scripts_dir, full_name_file)
|
|
dest_path = constants.PATCH_SCRIPTS_STAGING_DIR + "/" + folder[i]
|
|
dest_script_file = "%s/%s" % (dest_path, full_name_file)
|
|
if not os.path.exists(dest_path):
|
|
os.makedirs(dest_path, 0o700)
|
|
shutil.copyfile(script_path, dest_script_file)
|
|
os.chmod(dest_script_file, 0o700)
|
|
msg = "Creating install script %s for %s" % (full_name_file, release.id)
|
|
LOG.info(msg)
|
|
except shutil.Error:
|
|
msg = "Failed to copy the install script %s for %s" % (full_name_file, release.id)
|
|
LOG.exception(msg)
|
|
raise SoftwareError(msg)
|
|
else:
|
|
try:
|
|
for i, file in enumerate(file for file in (pre_install, post_install) if file):
|
|
full_name_file = "%s_%s" % (release.id, file)
|
|
script_path = "%s/%s/%s" % (constants.PATCH_SCRIPTS_STAGING_DIR, folder[i], full_name_file)
|
|
if os.path.exists(script_path):
|
|
os.remove(script_path)
|
|
msg = "Removing install script %s for %s" % (full_name_file, release.id)
|
|
LOG.info(msg)
|
|
except shutil.Error:
|
|
msg = "Failed to delete the install script %s for %s" % (full_name_file, release.id)
|
|
LOG.exception(msg)
|
|
|
|
def _update_state_to_peer(self):
|
|
self.socket_lock.acquire()
|
|
try:
|
|
state_update_msg = SoftwareMessageDeployStateUpdate()
|
|
state_update_msg.send(self.sock_out)
|
|
finally:
|
|
self.socket_lock.release()
|
|
|
|
def _sanitize_extra_options(self, value):
|
|
"""
|
|
Make sure value have only allowed characters.
|
|
"""
|
|
# Only letters, numbers, space, -, and _ are allowed.
|
|
if not re.match(r'^[\w\s\-]+$', value):
|
|
msg_error = f"Invalid value: '{value}'."
|
|
raise SoftwareServiceError(msg_error)
|
|
return value
|
|
|
|
def _parse_and_sanitize_extra_options(self, options_list):
|
|
"""
|
|
Validate, sanitize and convert a 'key=value' to dictionary.
|
|
"""
|
|
|
|
for item in options_list:
|
|
if item.count('=') != 1:
|
|
msg_error = f"Invalid format: '{item}'. Expected format is key=value"
|
|
raise SoftwareServiceError(msg_error)
|
|
|
|
options = {}
|
|
for item in options_list:
|
|
key, value = item.split('=', 1)
|
|
key = self._sanitize_extra_options(key.strip())
|
|
value = self._sanitize_extra_options(value.strip())
|
|
|
|
if key in constants.RESERVED_WORDS_SET:
|
|
msg_error = f"{key} is a reserved word and can't be used."
|
|
raise SoftwareServiceError(msg_error)
|
|
|
|
options[key] = value
|
|
return options
|
|
|
|
def _release_basic_checks(self, deployment):
|
|
"""
|
|
Does basic sanity checks on the release data
|
|
:param deployment: release to be checked
|
|
:return: release object (if exists),
|
|
bool with success output,
|
|
strings with info, warning and error messages
|
|
"""
|
|
|
|
# We need to verify that the software release exists
|
|
release = self.release_collection.get_release_by_id(deployment)
|
|
if not release:
|
|
msg = "Software release version corresponding to the specified release " \
|
|
"%s does not exist." % deployment
|
|
LOG.error(msg)
|
|
msg = msg + " Try deleting and re-uploading the software for recovery."
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
return release
|
|
|
|
def _deploy_precheck(self, release_version: str, force: bool = False,
|
|
region_name: typing.Optional[str] = None, patch: bool = False,
|
|
**kwargs) -> dict:
|
|
"""
|
|
Verify if system satisfy the requisites to upgrade to a specified deployment.
|
|
:param release_version: full release name, e.g. starlingx-MM.mm.pp
|
|
:param force: if True will ignore minor alarms during precheck
|
|
:param region_name: region_name
|
|
:param patch: if True then indicate precheck is for patch release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
if region_name is None:
|
|
region_name = utils.get_local_region_name()
|
|
precheck_script = utils.get_precheck_script(release_version)
|
|
|
|
if not os.path.isfile(precheck_script) and patch:
|
|
# Precheck script may not be available for some patches
|
|
# In that case, report system as healthy with info message to proceed
|
|
self._save_precheck_result(release_version, healthy=True)
|
|
msg_info = f"No deploy-precheck script available for patch version {release_version}"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=True)
|
|
|
|
if not os.path.isfile(precheck_script):
|
|
msg = "Release files for deployment %s are not present on the system, " \
|
|
"cannot proceed with the precheck." % release_version
|
|
LOG.error(msg)
|
|
msg_error = "Fail to perform deploy precheck. " \
|
|
"Uploaded release may have been damaged. " \
|
|
"Try delete and re-upload the release.\n"
|
|
self._save_precheck_result(release_version, healthy=False)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if self.pre_bootstrap and not patch:
|
|
# Deploy precheck should be avoided in case of major release.
|
|
msg_info = "Major release precheck is not valid in pre bootstrap scenario.\n"
|
|
self._save_precheck_result(release_version, healthy=True)
|
|
return dict(info=msg_info, warning=msg_info, error=msg_error, system_healthy=True)
|
|
|
|
if self.pre_bootstrap and not force:
|
|
# Deploy precheck may not be supported in prebootstrap environment if
|
|
# script access any of services like sysinv, keystone, etc.
|
|
msg_warning = "Pre-bootstrap environment may not support deploy precheck.\n" \
|
|
"Use --force option to execute deploy precheck script.\n"
|
|
self._save_precheck_result(release_version, healthy=True)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=True)
|
|
|
|
deploy_in_progress = self._get_software_upgrade()
|
|
|
|
# parse local config file to pass parameters to precheck script
|
|
try:
|
|
cp = configparser.ConfigParser(interpolation=None)
|
|
cp.read(constants.SOFTWARE_CONFIG_FILE_LOCAL)
|
|
ks_section = dict(cp["keystone_authtoken"]) if cp.has_section("keystone_authtoken") else {}
|
|
auth_url = ks_section.get("auth_url")
|
|
username = ks_section.get("username")
|
|
password = ks_section.get("password")
|
|
project_name = ks_section.get("project_name")
|
|
user_domain_name = ks_section.get("user_domain_name")
|
|
project_domain_name = ks_section.get("project_domain_name")
|
|
except Exception as e:
|
|
msg = "Error parsing config file: %s." % str(e)
|
|
LOG.error(msg)
|
|
msg_error = "Fail to perform deploy precheck. Internal error has occured." \
|
|
"Try lock and unlock the controller for recovery.\n"
|
|
self._save_precheck_result(release_version, healthy=False)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# Get releases info required for precheck
|
|
releases = self.software_release_query_cached()
|
|
|
|
preinstalled_patches = []
|
|
for release in releases:
|
|
if release['prepatched_iso']:
|
|
preinstalled_patches = release.get('preinstalled_patches', [])
|
|
break
|
|
|
|
for release in releases:
|
|
keys_to_delete = ['packages', 'summary', 'description',
|
|
'install_instructions', 'warnings', 'component']
|
|
for key in keys_to_delete:
|
|
del release[key]
|
|
|
|
# remove patch from requires if present in preinstalled_patches
|
|
if preinstalled_patches:
|
|
requires = release.get('requires', [])
|
|
common = set(requires) & set(preinstalled_patches)
|
|
if common:
|
|
release['requires'] = [id for id in requires if id not in common]
|
|
LOG.info("Removed %s from %s requires list, since these are prepatched"
|
|
% (common, release['release_id']))
|
|
|
|
cmd = [precheck_script,
|
|
"--auth_url=%s" % auth_url,
|
|
"--username=%s" % username,
|
|
"--password=%s" % password,
|
|
"--project_name=%s" % project_name,
|
|
"--user_domain_name=%s" % user_domain_name,
|
|
"--project_domain_name=%s" % project_domain_name,
|
|
"--region_name=%s" % region_name,
|
|
"--releases=%s" % json.dumps(releases),
|
|
"--options=%s" % json.dumps(kwargs.get("options", {})),
|
|
"--deploy_in_progress=%s" % json.dumps(deploy_in_progress)]
|
|
if force:
|
|
cmd.append("--force")
|
|
if patch:
|
|
cmd.append("--patch")
|
|
|
|
# Call precheck from the deployment files
|
|
precheck_return = subprocess.run(
|
|
cmd,
|
|
stderr=subprocess.STDOUT,
|
|
stdout=subprocess.PIPE,
|
|
check=False,
|
|
text=True,
|
|
)
|
|
system_healthy = None
|
|
if precheck_return.returncode in [constants.RC_SUCCESS, constants.RC_UNHEALTHY]:
|
|
system_healthy = precheck_return.returncode == constants.RC_SUCCESS
|
|
self._save_precheck_result(release_version, healthy=system_healthy)
|
|
msg_info += precheck_return.stdout
|
|
else:
|
|
self._save_precheck_result(release_version, healthy=False)
|
|
msg_error += precheck_return.stdout
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=system_healthy)
|
|
|
|
def _get_release_additional_info(self, release):
|
|
"""
|
|
Get additional information related to release in precheck api.
|
|
:return: dict with release info.
|
|
"""
|
|
release_info = {}
|
|
running_release = self.release_collection.running_release
|
|
|
|
release_info["major_release"] = utils.is_upgrade_deploy(SW_VERSION, release.sw_release)
|
|
release_info["reboot_required"] = release.reboot_required
|
|
release_info["prepatched_iso"] = release.prepatched_iso
|
|
release_info["apply_operation"] = release > running_release
|
|
|
|
return release_info
|
|
|
|
def software_deploy_precheck_api(self, deployment: str, force: bool = False, region_name=None,
|
|
**kwargs) -> dict:
|
|
"""
|
|
Verify if system satisfy the requisites to upgrade to a specified deployment.
|
|
:param deployment: full release name, e.g. starlingx-MM.mm.pp
|
|
:param force: if True will ignore minor alarms during precheck
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
release = self._release_basic_checks(deployment)
|
|
release_version = release.sw_release
|
|
|
|
# Check fields (MM.mm) of release_version to set patch flag
|
|
is_patch = (not utils.is_upgrade_deploy(SW_VERSION, release_version))
|
|
if not is_patch and socket.gethostname() != constants.CONTROLLER_0_HOSTNAME:
|
|
raise SoftwareServiceError(f"Deploy precheck for major releases needs to be executed in"
|
|
f" {constants.CONTROLLER_0_HOSTNAME} host.")
|
|
if kwargs.get("options"):
|
|
kwargs["options"] = self._parse_and_sanitize_extra_options(kwargs.get("options"))
|
|
ret = self._deploy_precheck(release_version, force, region_name, is_patch, **kwargs)
|
|
if ret:
|
|
if ret.get("system_healthy") is None:
|
|
ret["error"] = "Fail to perform deploy precheck. Internal error has occurred.\n" + \
|
|
ret.get("error")
|
|
elif not ret.get("system_healthy"):
|
|
ret["error"] = "The following issues have been detected, which prevent " \
|
|
"deploying %s\n" % deployment + ret.get("info")
|
|
release_info = self._get_release_additional_info(release)
|
|
ret.update(release_info)
|
|
return ret
|
|
|
|
def _deploy_upgrade_start(self, to_release, commit_id, **kwargs):
|
|
LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION))
|
|
deploy_script_name = constants.DEPLOY_START_SCRIPT
|
|
cmd_path = utils.get_software_deploy_script(to_release, deploy_script_name)
|
|
if not os.path.isfile(cmd_path):
|
|
msg = f"{deploy_script_name} was not found"
|
|
LOG.error(msg)
|
|
raise SoftwareServiceError(f"{deploy_script_name} was not found. "
|
|
"The uploaded software could have been damaged. "
|
|
"Please delete the software and re-upload it")
|
|
major_to_release = utils.get_major_release_version(to_release)
|
|
k8s_ver = get_k8s_ver()
|
|
postgresql_port = str(cfg.alt_postgresql_port)
|
|
feed = os.path.join(constants.FEED_DIR,
|
|
"rel-%s/ostree_repo" % major_to_release)
|
|
|
|
LOG.info("k8s version %s" % k8s_ver)
|
|
upgrade_start_cmd = [cmd_path, SW_VERSION, major_to_release, k8s_ver, postgresql_port,
|
|
feed]
|
|
|
|
upgrade_start_cmd.append(commit_id if commit_id is not None else 0)
|
|
upgrade_start_cmd.append(json.dumps(kwargs.get("options")) if kwargs.get("options") is not None else "")
|
|
# pass in keystone auth through environment variables
|
|
# OS_AUTH_URL, OS_USERNAME, OS_PASSWORD, OS_PROJECT_NAME, OS_USER_DOMAIN_NAME,
|
|
# OS_PROJECT_DOMAIN_NAME, OS_REGION_NAME are in env variables.
|
|
keystone_auth = CONF.get('keystone_authtoken')
|
|
env = {}
|
|
env["OS_AUTH_URL"] = keystone_auth["auth_url"] + '/v3'
|
|
env["OS_USERNAME"] = keystone_auth["username"]
|
|
env["OS_PASSWORD"] = keystone_auth["password"]
|
|
env["OS_PROJECT_NAME"] = keystone_auth["project_name"]
|
|
env["OS_USER_DOMAIN_NAME"] = keystone_auth["user_domain_name"]
|
|
env["OS_PROJECT_DOMAIN_NAME"] = keystone_auth["project_domain_name"]
|
|
env["OS_REGION_NAME"] = keystone_auth["region_name"]
|
|
env["IGNORE_ERRORS"] = self.ignore_errors
|
|
|
|
try:
|
|
LOG.info("starting subprocess %s" % ' '.join(upgrade_start_cmd))
|
|
subprocess.Popen(upgrade_start_cmd, start_new_session=True, shell=False, env=env)
|
|
LOG.info("subprocess started")
|
|
return True
|
|
except subprocess.SubprocessError as e:
|
|
LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_start_cmd), e))
|
|
return False
|
|
|
|
def deploy_state_changed(self, new_state):
|
|
'''Handle 'deploy state change' event, invoked when operations complete. '''
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
state_event = {
|
|
DEPLOY_STATES.START_DONE: deploy_state.start_done,
|
|
DEPLOY_STATES.START_FAILED: deploy_state.start_failed,
|
|
DEPLOY_STATES.ACTIVATE_DONE: deploy_state.activate_done,
|
|
DEPLOY_STATES.ACTIVATE_FAILED: deploy_state.activate_failed,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE: deploy_state.activate_rollback_done,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED: deploy_state.activate_rollback_failed,
|
|
DEPLOY_STATES.HOST_FAILED: deploy_state.deploy_host_failed
|
|
}
|
|
if new_state in state_event:
|
|
state_event[new_state]()
|
|
else:
|
|
msg = f"Received invalid deploy state update {deploy_state}"
|
|
LOG.error(msg)
|
|
|
|
def host_deploy_state_changed(self, hostname, host_deploy_state):
|
|
'''Handle 'host deploy state change' event. '''
|
|
|
|
deploy_host_state = DeployHostState(hostname)
|
|
state_event = {
|
|
DEPLOY_HOST_STATES.FAILED: deploy_host_state.failed
|
|
}
|
|
if host_deploy_state in state_event:
|
|
state_event[host_deploy_state]()
|
|
else:
|
|
msg = f"Received invalid deploy host state update {host_deploy_state}"
|
|
LOG.error(msg)
|
|
|
|
def add_text_tag_to_xml(self, parent, tag, text):
|
|
'''Add text to tag. Create it if it does not exist'''
|
|
element = parent.find(tag)
|
|
if element is None:
|
|
element = ET.SubElement(parent, tag)
|
|
element.text = text
|
|
return element
|
|
|
|
def is_deployment_list_reboot_required(self, deployment_list):
|
|
"""Check if any deploy in deployment list is reboot required"""
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release.reboot_required:
|
|
return True
|
|
return False
|
|
|
|
def copy_patch_activate_scripts(self, release_id, activate_scripts_list):
|
|
"""Copy patch activate scripts to /etc/update.d"""
|
|
|
|
try:
|
|
existing_scripts = list(os.listdir(PATCH_MIGRATION_SCRIPT_DIR))
|
|
|
|
for script in activate_scripts_list:
|
|
full_name_file = "%s_%s" % (release_id, script)
|
|
script_path = "%s/%s" % (root_scripts_dir, full_name_file)
|
|
dest_script_file = "%s/%s" % (PATCH_MIGRATION_SCRIPT_DIR, script)
|
|
|
|
# Do not copy if script already exists in folder
|
|
if script in existing_scripts:
|
|
msg = "Script %s already exists in %s. Skipping copy" \
|
|
% (script, PATCH_MIGRATION_SCRIPT_DIR)
|
|
LOG.info(msg)
|
|
continue
|
|
|
|
shutil.copyfile(script_path, dest_script_file)
|
|
os.chmod(dest_script_file, 0o755)
|
|
msg = "Creating patch activate script %s for %s" \
|
|
% (full_name_file, release_id)
|
|
LOG.info(msg)
|
|
except shutil.Error:
|
|
msg = "Failed to copy patch activate script %s for %s" \
|
|
% (full_name_file, release_id)
|
|
LOG.exception(msg)
|
|
raise SoftwareError(msg)
|
|
|
|
def delete_all_patch_activate_scripts(self):
|
|
"""Delete all patch activate scripts in /etc/update.d"""
|
|
if os.path.exists(PATCH_MIGRATION_SCRIPT_DIR):
|
|
for script_name in os.listdir(PATCH_MIGRATION_SCRIPT_DIR):
|
|
script_path = os.path.join(PATCH_MIGRATION_SCRIPT_DIR, script_name)
|
|
try:
|
|
os.remove(script_path)
|
|
msg = "Deleted patch script: %s" % script_path
|
|
LOG.info(msg)
|
|
except Exception as e:
|
|
msg = "Failed to delete patch script %s. Reason: %s" % (script_path, e)
|
|
LOG.error(msg)
|
|
|
|
def _run_start_script(self, script_name, release_id, operation):
|
|
"""Run pre_start or post_start scripts"""
|
|
script_path = os.path.join(root_scripts_dir, f"{release_id}_{script_name}")
|
|
|
|
if os.path.isfile(script_path):
|
|
LOG.info("Running %s script", script_name)
|
|
try:
|
|
output = subprocess.check_output(
|
|
["sudo", script_path, f"--operation={operation}"],
|
|
stderr=subprocess.STDOUT,
|
|
text=True
|
|
)
|
|
LOG.info("%s output:\n%s" % (script_name, output.strip()))
|
|
except subprocess.CalledProcessError as e:
|
|
msg = "Failed to execute %s for release %s." % (script_name, release_id)
|
|
LOG.exception(msg)
|
|
LOG.error("Command output: %s", e.output)
|
|
raise SoftwareError(msg)
|
|
else:
|
|
LOG.warning("Script %s not found", script_name)
|
|
|
|
def cleanup_old_releases(self, target_commit, all_commits):
|
|
index = 0
|
|
to_delete_releases = []
|
|
|
|
while index < len(all_commits) and target_commit != all_commits[index]:
|
|
to_delete_release = self.release_collection.get_release_by_commit_id(all_commits[index])
|
|
if to_delete_release:
|
|
to_delete_releases.append(to_delete_release.id)
|
|
LOG.info("Deleting %s not used after prestage" % to_delete_release.id)
|
|
index += 1
|
|
|
|
# Delete metadata and all associated release files
|
|
self.software_release_delete_api(to_delete_releases)
|
|
|
|
def install_releases_thread(self, deployment_list, feed_repo, upgrade=False, **kwargs):
|
|
"""
|
|
In a separated thread.
|
|
Install the debian packages, create the commit and update the metadata.
|
|
IF it's an upgrade, also run the upgrade script
|
|
"""
|
|
def run():
|
|
LOG.info("Installing releases on repo: %s" % feed_repo)
|
|
|
|
try:
|
|
deploy_sw_version = None
|
|
for release_id in deployment_list:
|
|
msg = "Starting deployment for: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
deploy_release = self._release_basic_checks(release_id)
|
|
self.copy_patch_activate_scripts(release_id, deploy_release.activation_scripts)
|
|
|
|
# Run pre_start script
|
|
self._run_start_script(deploy_release.pre_start, release_id, constants.APPLY)
|
|
# Reload release in case pre_start script made some change
|
|
reload_release_data()
|
|
deploy_release = self._release_basic_checks(release_id)
|
|
|
|
deploy_sw_version = deploy_release.sw_version
|
|
|
|
all_commits = ostree_utils.get_all_feed_commits(deploy_release.sw_version)
|
|
latest_commit = all_commits[0]
|
|
target_commit = deploy_release.commit_id
|
|
if target_commit in all_commits:
|
|
# This case is for node with prestaged data where ostree
|
|
# commits have been pulled from system controller
|
|
LOG.info("Commit %s already exists in feed repo for release %s"
|
|
% (deploy_release.commit_id, release_id))
|
|
|
|
# If this is the last deployment, and it is not the latest commit in feed
|
|
# delete the commits until reach this, and delete metadatas
|
|
if release_id == deployment_list[-1] and target_commit != latest_commit:
|
|
self.cleanup_old_releases(target_commit, all_commits)
|
|
|
|
# Reset feed to last deployment release
|
|
self.reset_feed_commit(deploy_release)
|
|
|
|
continue
|
|
|
|
packages = [pkg.split("_")[0] for pkg in deploy_release.packages]
|
|
if packages is None:
|
|
msg = "Unable to determine packages to install"
|
|
LOG.error(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
# Install debian package through apt-ostree
|
|
try:
|
|
apt_utils.run_install(
|
|
feed_repo,
|
|
deploy_release.sw_version,
|
|
deploy_release.sw_release,
|
|
packages)
|
|
except APTOSTreeCommandFail:
|
|
msg = "Failed to install Debian packages."
|
|
LOG.exception(msg)
|
|
raise APTOSTreeCommandFail(msg)
|
|
|
|
# Get the latest commit after performing "apt-ostree install".
|
|
self.latest_feed_commit = \
|
|
ostree_utils.get_feed_latest_commit(deploy_release.sw_version)
|
|
|
|
deploystate = deploy_release.state
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[deploystate]
|
|
metadata_file = "%s/%s-metadata.xml" % (metadata_dir, release_id)
|
|
|
|
reload_release_data()
|
|
# NOTE(bqian) Below check an exception raise should be revisit, if applicable,
|
|
# should be applied to the begining of all requests.
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
|
|
with self.hosts_lock:
|
|
self.interim_state[release_id] = list(self.hosts)
|
|
|
|
self.latest_feed_commit = \
|
|
ostree_utils.get_feed_latest_commit(deploy_release.sw_version)
|
|
|
|
# Update metadata
|
|
tree = ET.parse(metadata_file)
|
|
root = tree.getroot()
|
|
|
|
contents = ET.SubElement(root, constants.CONTENTS_TAG)
|
|
ostree = ET.SubElement(contents, constants.OSTREE_TAG)
|
|
self.add_text_tag_to_xml(ostree, constants.NUMBER_OF_COMMITS_TAG, "1")
|
|
base = ET.SubElement(ostree, constants.BASE_TAG)
|
|
self.add_text_tag_to_xml(base, constants.COMMIT_TAG, latest_commit)
|
|
self.add_text_tag_to_xml(base, constants.CHECKSUM_TAG, "")
|
|
commit1 = ET.SubElement(ostree, constants.COMMIT1_TAG)
|
|
self.add_text_tag_to_xml(commit1, constants.COMMIT_TAG, self.latest_feed_commit)
|
|
self.add_text_tag_to_xml(commit1, constants.CHECKSUM_TAG, "")
|
|
|
|
ET.indent(tree, ' ')
|
|
with open(metadata_file, "wb") as outfile:
|
|
tree = ET.tostring(root)
|
|
outfile.write(tree)
|
|
|
|
LOG.info("Latest feed commit: %s added to metadata file" % self.latest_feed_commit)
|
|
|
|
# Run post_start script
|
|
self._run_start_script(deploy_release.post_start, release_id, constants.APPLY)
|
|
|
|
# In prepatched add tombstone
|
|
ostree_utils.add_tombstone_commit_if_prepatched(constants.OSTREE_REF, feed_repo)
|
|
|
|
# Update the feed ostree summary
|
|
ostree_utils.update_repo_summary_file(feed_repo)
|
|
self.latest_feed_commit = ostree_utils.get_feed_latest_commit(deploy_sw_version)
|
|
|
|
self.send_latest_feed_commit_to_agent()
|
|
self.software_sync()
|
|
|
|
if upgrade:
|
|
base_deployment = deployment_list[0]
|
|
base_release = self._release_basic_checks(base_deployment)
|
|
upgrade_commit_id = base_release.commit_id
|
|
if self._deploy_upgrade_start(base_release.sw_release, upgrade_commit_id, **kwargs):
|
|
LOG.info("Finished releases %s deploy start" % deployment_list)
|
|
else:
|
|
raise ValueError("_deploy_upgrade_start failed")
|
|
else:
|
|
# move the deploy state to start-done
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_done(self.latest_feed_commit)
|
|
LOG.info("Finished releases %s deploy start" % deployment_list)
|
|
|
|
except Exception as e:
|
|
msg = "Deploy start applying failed: %s" % str(e)
|
|
LOG.exception(msg)
|
|
audit_log_info(msg)
|
|
|
|
try:
|
|
# set state to failed
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_failed()
|
|
except Exception as e:
|
|
msg = "Unable to set deploy failed: %s" % str(e)
|
|
LOG.exception(msg)
|
|
audit_log_info(msg)
|
|
|
|
thread = threading.Thread(target=run)
|
|
thread.start()
|
|
|
|
def _precheck_before_start(self, deployment, release_version, is_patch, force=False, **kwargs):
|
|
LOG.info("Running deploy precheck.")
|
|
precheck_result = self._deploy_precheck(release_version, patch=is_patch, force=force, **kwargs)
|
|
if precheck_result.get('system_healthy') is None:
|
|
precheck_result["error"] = (
|
|
f"Fail to perform deploy precheck. Internal error has occurred.\n"
|
|
f"{precheck_result['error']}"
|
|
)
|
|
return precheck_result
|
|
elif precheck_result.get('system_healthy') is False:
|
|
precheck_result["error"] = (
|
|
f"The following issues have been detected, which prevent deploying {deployment}\n"
|
|
f"{precheck_result['info']}\n"
|
|
"Please fix above issues then retry the deploy.\n"
|
|
)
|
|
return precheck_result
|
|
return None
|
|
|
|
def _get_precheck_result_file_path(self, release_version):
|
|
return os.path.join("/opt/software/", f"rel-{release_version}", "precheck-result.json")
|
|
|
|
def _safe_remove_precheck_result_file(self, release_version):
|
|
precheck_result_file = self._get_precheck_result_file_path(release_version)
|
|
if os.path.isfile(precheck_result_file):
|
|
os.remove(precheck_result_file)
|
|
|
|
def _save_precheck_result(self, release_version, healthy):
|
|
precheck_result_file = self._get_precheck_result_file_path(release_version)
|
|
with open(precheck_result_file, "w") as f:
|
|
json.dump({"healthy": healthy, "timestamp": time.time()}, f)
|
|
|
|
def _should_run_precheck_prior_deploy_start(self, release_version, force, is_patch, **kwargs):
|
|
# there is not precheck script in this state
|
|
if self.pre_bootstrap:
|
|
return False
|
|
|
|
# we should be able to patch an unhealthy system ignoring the unhealthy state
|
|
if is_patch and force:
|
|
return False
|
|
|
|
file_path = self._get_precheck_result_file_path(release_version)
|
|
if not os.path.isfile(file_path):
|
|
LOG.info("The precheck result file %s does not exist." % file_path)
|
|
return True
|
|
|
|
if kwargs:
|
|
return True
|
|
|
|
with open(file_path) as f:
|
|
last_result = json.load(f)
|
|
|
|
if time.time() - last_result["timestamp"] > constants.PRECHECK_RESULT_VALID_PERIOD:
|
|
LOG.info("The precheck result expired.")
|
|
return True
|
|
|
|
return not last_result["healthy"]
|
|
|
|
@require_deploy_state([None],
|
|
"There is already a deployment in progress ({state.value}). "
|
|
"Please complete/delete the current deployment.")
|
|
def software_deploy_start_api(self, deployment: str, force: bool, **kwargs) -> dict:
|
|
"""
|
|
to start deploy of a specified release.
|
|
The operation implies deploying all undeployed dependency releases of
|
|
the specified release. i.e, to deploy release 24.09.1, it implies
|
|
deploying 24.09.0 and 24.09.1 when 24.09.0 has not been deployed.
|
|
The operation includes steps:
|
|
1. find all undeployed dependency releases
|
|
2. ensure all releases (dependency and specified release) are ready to deployed
|
|
3. precheck, if last precheck was not executed or if was executed and failed or
|
|
if precheck result expired
|
|
4. transform all involved releases to deploying state
|
|
5. start the deploy subprocess
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
deploy_release = self._release_basic_checks(deployment)
|
|
|
|
running_release = self.release_collection.running_release
|
|
deploy_sw_version = deploy_release.sw_version # MM.mm
|
|
is_patch = (not utils.is_upgrade_deploy(SW_VERSION, deploy_sw_version))
|
|
|
|
# pre-bootstrap patch removal case
|
|
if not self.pre_bootstrap:
|
|
if (not is_patch) and socket.gethostname() != constants.CONTROLLER_0_HOSTNAME:
|
|
raise SoftwareServiceError(f"Deploy start for major releases needs to be executed in "
|
|
f"{constants.CONTROLLER_0_HOSTNAME} host.")
|
|
|
|
feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, deploy_sw_version)
|
|
commit_id = deploy_release.commit_id
|
|
# Set hostname in case of local install
|
|
hostname = None
|
|
if self.pre_bootstrap:
|
|
hostname = constants.PREBOOTSTRAP_HOSTNAME
|
|
elif self.install_local:
|
|
hostname = socket.gethostname()
|
|
valid_hostnames = [constants.CONTROLLER_0_HOSTNAME, constants.CONTROLLER_1_HOSTNAME]
|
|
if hostname not in valid_hostnames:
|
|
LOG.warning("Using unknown hostname for local install: %s", hostname)
|
|
|
|
to_release = deploy_release.sw_release
|
|
if kwargs.get("options"):
|
|
kwargs["options"] = self._parse_and_sanitize_extra_options(kwargs.get("options"))
|
|
if self._should_run_precheck_prior_deploy_start(to_release, force, is_patch, **kwargs):
|
|
LOG.info("Executing software deploy precheck prior to software deploy start")
|
|
if precheck_result := self._precheck_before_start(
|
|
deployment,
|
|
to_release,
|
|
is_patch=is_patch,
|
|
force=force,
|
|
**kwargs
|
|
):
|
|
return precheck_result
|
|
self._safe_remove_precheck_result_file(to_release)
|
|
|
|
# Patch operation: 'deploy release' major version equals 'running release' major version (MM.mm)
|
|
|
|
# TODO(bqian) update references of sw_release (string) to SWRelease object
|
|
|
|
if deploy_release > running_release:
|
|
operation = constants.APPLY
|
|
elif running_release > deploy_release:
|
|
operation = constants.REMOVE
|
|
else:
|
|
# NOTE(bqian) The error message doesn't seem right. software version format
|
|
# or any metadata semantic check should be done during upload. If data
|
|
# invalid found subsequently, data is considered damaged, should recommend
|
|
# delete and re-upload
|
|
msg_error += "The software version format for this release is not correct.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# NOTE(bqian) shouldn't that patch release deploy and remove are doing the same thing
|
|
# in terms of ostree commit, that it deploy to a commit specified by the commit-id that
|
|
# associated to the release from the deploy start command?
|
|
# If releases are such that:
|
|
# R2 requires R1, R3 requires R2, R4 requires R3
|
|
# If current running release is R2 and command issued is "software deploy start R4"
|
|
# operation is "apply" with order [R3, R4]
|
|
# If current running release is R4 and command issued is "software deploy start R2"
|
|
# operation is "remove" with order [R4, R3]
|
|
if operation == constants.APPLY:
|
|
deployment_list = self.release_apply_order(deployment, deploy_sw_version)
|
|
|
|
collect_current_load_for_hosts(deploy_sw_version, hostname=hostname)
|
|
create_deploy_hosts(hostname=hostname)
|
|
|
|
msg = "Deploy start order for apply operation: %s" % ",".join(deployment_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# todo(jcasteli) Do we need this block below?
|
|
# Check for patches that can't be applied during an upgrade
|
|
upgrade_check = True
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release.sw_version != SW_VERSION and release.apply_active_release_only == "Y":
|
|
msg = "%s cannot be created during an upgrade" % release_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
upgrade_check = False
|
|
|
|
if not upgrade_check:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skip-semantic") != "yes":
|
|
self.run_semantic_check(constants.SEMANTIC_PREAPPLY, deployment_list)
|
|
|
|
running_release = self.release_collection.running_release
|
|
to_deploy_release_id = deployment_list[-1]
|
|
to_deploy_release = self.release_collection.get_release_by_id(to_deploy_release_id)
|
|
reboot_required = self.is_deployment_list_reboot_required(deployment_list)
|
|
|
|
collect_current_load_for_hosts(to_deploy_release.sw_version, hostname=hostname)
|
|
release_state = ReleaseState(release_ids=deployment_list)
|
|
release_state.start_deploy()
|
|
|
|
# Setting deploy state to start, so that it can transition to start-done or start-failed
|
|
deploy_state = DeployState.get_instance()
|
|
to_release = to_deploy_release.sw_release
|
|
if is_patch:
|
|
deploy_state.start(running_release, to_release, feed_repo, None, reboot_required)
|
|
else:
|
|
deploy_state.start(running_release, to_release, feed_repo, commit_id,
|
|
reboot_required, **kwargs)
|
|
|
|
# Start applying the releases
|
|
upgrade = not is_patch
|
|
self.install_releases_thread(deployment_list, feed_repo, upgrade, **kwargs)
|
|
|
|
msg_info += "%s is now starting, await for the states: " \
|
|
"[deploy-start-done | deploy-start-failed] in " \
|
|
"'software deploy show'\n" % deployment_list
|
|
|
|
elif operation == constants.REMOVE:
|
|
collect_current_load_for_hosts(deploy_sw_version, hostname=hostname)
|
|
create_deploy_hosts(hostname=hostname)
|
|
deployment_list = self.release_remove_order(deployment, running_release.id, running_release.sw_version)
|
|
|
|
msg = "Deploy start order for remove operation: %s" % ",".join(deployment_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
remove_unremovable = False
|
|
|
|
if kwargs.get("removeunremovable") == "yes":
|
|
remove_unremovable = True
|
|
|
|
# See if any of the patches are marked as unremovable
|
|
unremovable_verification = True
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release.unremovable:
|
|
if remove_unremovable:
|
|
msg = "Unremovable release %s being removed" % release_id
|
|
LOG.warning(msg)
|
|
msg_warning = msg + "\n"
|
|
else:
|
|
msg = "Release %s is not removable" % release_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
unremovable_verification = False
|
|
elif release.state == states.COMMITTED:
|
|
msg = "Release %s is committed and cannot be removed" % release_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
unremovable_verification = False
|
|
|
|
if not unremovable_verification:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skipappcheck") != "yes":
|
|
# Check application dependencies before removing
|
|
required_releases = {}
|
|
for release in deployment_list:
|
|
for appname, iter_release_list in self.app_dependencies.items():
|
|
if release in iter_release_list:
|
|
if release not in required_releases:
|
|
required_releases[release] = []
|
|
required_releases[release].append(appname)
|
|
|
|
if len(required_releases) > 0:
|
|
for req_release, app_list in required_releases.items():
|
|
msg = "%s is required by application(s): %s" % (req_release, ", ".join(sorted(app_list)))
|
|
msg_error += msg + "\n"
|
|
LOG.info(msg)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skip-semantic") != "yes":
|
|
self.run_semantic_check(constants.SEMANTIC_PREREMOVE, deployment_list)
|
|
|
|
collect_current_load_for_hosts(deploy_sw_version, hostname=hostname)
|
|
release_state = ReleaseState(release_ids=deployment_list)
|
|
release_state.start_remove()
|
|
|
|
reboot_required = self.is_deployment_list_reboot_required(deployment_list)
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
to_release = deploy_release.sw_release
|
|
deploy_state.start(running_release, to_release, feed_repo, commit_id, reboot_required)
|
|
|
|
try:
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
msg = "Removing release: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# Run pre_start script
|
|
self._run_start_script(release.pre_start, release_id, constants.REMOVE)
|
|
# Reload release in case pre_start script made some change
|
|
reload_release_data()
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
|
|
if release.state == states.AVAILABLE:
|
|
msg = "The deployment for %s has not been created" % release_id
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
continue
|
|
|
|
self.copy_patch_activate_scripts(release_id, release.activation_scripts)
|
|
|
|
major_release_sw_version = release.sw_version
|
|
# this is an ostree patch
|
|
# Base commit is fetched from the patch metadata.
|
|
base_commit = release.base_commit_id
|
|
feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_release_sw_version)
|
|
try:
|
|
# Reset the ostree HEAD
|
|
ostree_utils.reset_ostree_repo_head(base_commit, feed_repo)
|
|
|
|
# Delete all commits that belong to this release
|
|
# NOTE(bqian) there should be just one commit per release.
|
|
commit_to_delete = release.commit_id
|
|
ostree_utils.delete_ostree_repo_commit(commit_to_delete, feed_repo)
|
|
|
|
# Update the feed ostree summary
|
|
ostree_utils.update_repo_summary_file(feed_repo)
|
|
|
|
except OSTreeCommandFail:
|
|
LOG.exception("Failure while removing release %s.", release_id)
|
|
|
|
# Remove contents tag from metadata xml
|
|
self.remove_tags_from_metadata(release, constants.CONTENTS_TAG)
|
|
|
|
try:
|
|
# Move the metadata to the deleted dir
|
|
self.release_collection.update_state([release_id], states.REMOVING)
|
|
msg_info += "%s has been removed from the repo\n" % release_id
|
|
except shutil.Error:
|
|
msg = "Failed to move the metadata for %s" % release_id
|
|
LOG.Error(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
|
|
# only update lastest_feed_commit if it is an ostree patch
|
|
if release.base_commit_id is not None:
|
|
# Base Commit in this release's metadata.xml file represents the latest commit
|
|
# after this release has been removed from the feed repo
|
|
self.latest_feed_commit = release.base_commit_id
|
|
|
|
with self.hosts_lock:
|
|
self.interim_state[release_id] = list(self.hosts)
|
|
|
|
# Run post_start script
|
|
self._run_start_script(release.post_start, release_id, constants.REMOVE)
|
|
|
|
# In prepatched add tombstone
|
|
if ostree_utils.add_tombstone_commit_if_prepatched(constants.OSTREE_REF, feed_repo):
|
|
ostree_utils.update_repo_summary_file(feed_repo)
|
|
|
|
# There is no defined behavior for deploy start for patching releases, so
|
|
# move the deploy state to start-done
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_done(self.latest_feed_commit)
|
|
|
|
self.send_latest_feed_commit_to_agent()
|
|
self.software_sync()
|
|
except Exception as e:
|
|
msg_error = "Deploy start removing failed"
|
|
msg = "%s: %s" % (msg_error, e)
|
|
LOG.exception(msg)
|
|
audit_log_info(msg)
|
|
|
|
# set state to failed
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_failed()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def remove_tags_from_metadata(self, release, tag):
|
|
LOG.info("Removing %s tag from %s metadata" % (tag, release.id))
|
|
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[release.state]
|
|
metadata_path = "%s/%s-metadata.xml" % (metadata_dir, release.id)
|
|
tree = ET.parse(metadata_path)
|
|
root = tree.getroot()
|
|
metadata_tag = root.find(tag)
|
|
|
|
if metadata_tag is not None:
|
|
root.remove(metadata_tag)
|
|
|
|
ET.indent(tree, ' ')
|
|
with open(metadata_path, "wb") as outfile:
|
|
tree = ET.tostring(root)
|
|
outfile.write(tree)
|
|
|
|
def execute_delete_actions(self):
|
|
deploy = self.db_api_instance.get_current_deploy()
|
|
to_release = deploy.get("to_release")
|
|
from_release = deploy.get("from_release")
|
|
|
|
delete_cmd = f"/usr/bin/software-deploy-delete {from_release} {to_release} --is_major_release"
|
|
|
|
runner = DeployPluginRunner(deploy)
|
|
runner.execute(delete_cmd)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.HOST_ROLLBACK_DONE, DEPLOY_STATES.COMPLETED, DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED],
|
|
"Deploy must be in the following states to be able to delete: %s, %s, %s, %s" % (
|
|
DEPLOY_STATES.HOST_ROLLBACK_DONE.value, DEPLOY_STATES.COMPLETED.value,
|
|
DEPLOY_STATES.START_DONE.value, DEPLOY_STATES.START_FAILED.value))
|
|
def software_deploy_delete_api(self) -> dict:
|
|
"""
|
|
Delete deployment and the data generated during the deploy.
|
|
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy = self.db_api_instance.get_current_deploy()
|
|
to_release = deploy.get("to_release")
|
|
from_release = deploy.get("from_release")
|
|
|
|
deploy_state_instance = DeployState.get_instance()
|
|
|
|
# except in early stages of the deployment, such as deploy start,
|
|
# hosts must be unlocked and online since during delete deployment
|
|
# a request is sent to all hosts to clear flags and temporary data
|
|
# created during the deployment procedure
|
|
if not self.pre_bootstrap:
|
|
if (deploy_state_instance.get_deploy_state() not in [DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED] and
|
|
not are_all_hosts_unlocked_and_online()):
|
|
msg = f"Hosts must be {constants.ADMIN_UNLOCKED} and {constants.AVAILABILITY_ONLINE}."
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
is_major_release = False
|
|
|
|
deploy_state = deploy_state_instance.get_deploy_state()
|
|
deploying_release_state = ReleaseState(release_state=states.DEPLOYING)
|
|
is_applying = deploying_release_state.has_release_id()
|
|
|
|
if deploy_state in [
|
|
DEPLOY_STATES.START_DONE, DEPLOY_STATES.START_FAILED, DEPLOY_STATES.COMPLETED]:
|
|
is_major_release = deploying_release_state.is_major_release_deployment() if is_applying else False
|
|
elif deploy_state == DEPLOY_STATES.HOST_ROLLBACK_DONE:
|
|
is_major_release = ReleaseState(
|
|
release_state=states.DEPLOYING).is_major_release_deployment()
|
|
|
|
# Only major release is required to be deleted on controller-0
|
|
# Patch deletion can take place on either controller
|
|
if is_major_release and self.hostname != constants.CONTROLLER_0_HOSTNAME:
|
|
raise SoftwareServiceError("Deploy delete can only be performed on controller-0.")
|
|
|
|
if DEPLOY_STATES.COMPLETED == deploy_state:
|
|
if is_applying:
|
|
major_release = utils.get_major_release_version(from_release)
|
|
# In case of a major release deployment set all the releases related to from_release to unavailable
|
|
if is_major_release:
|
|
unavailable_releases = []
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.sw_version == major_release:
|
|
unavailable_releases.append(release.id)
|
|
ReleaseState(release_ids=unavailable_releases).replaced()
|
|
|
|
# Set deploying releases to deployed state.
|
|
deploying_release_state.deploy_completed()
|
|
else:
|
|
removing_release_state = ReleaseState(release_state=states.REMOVING)
|
|
removing_release_state.available()
|
|
|
|
elif DEPLOY_STATES.HOST_ROLLBACK_DONE == deploy_state:
|
|
major_release = utils.get_major_release_version(from_release)
|
|
release_state = ReleaseState(release_state=states.DEPLOYING)
|
|
release_state.available()
|
|
|
|
elif deploy_state in [DEPLOY_STATES.START_DONE, DEPLOY_STATES.START_FAILED]:
|
|
# TODO(bqian), this check is redundant. there should be no host deployed/deploying
|
|
# when deploy in START_DONE or START_FAILED states
|
|
hosts_states = []
|
|
for host in self.db_api_instance.get_deploy_host():
|
|
hosts_states.append(host.get("state"))
|
|
if (DEPLOY_HOST_STATES.DEPLOYED.value in hosts_states or
|
|
DEPLOY_HOST_STATES.DEPLOYING.value in hosts_states):
|
|
raise SoftwareServiceError(f"There are hosts already {DEPLOY_HOST_STATES.DEPLOYED.value} "
|
|
f"or in {DEPLOY_HOST_STATES.DEPLOYING.value} process")
|
|
|
|
if is_applying:
|
|
major_release = utils.get_major_release_version(to_release)
|
|
|
|
if is_major_release:
|
|
try:
|
|
# TODO(bqian) Move below function to a delete action
|
|
run_remove_temporary_data_script(to_release)
|
|
except subprocess.CalledProcessError as e:
|
|
msg_error = "Failed to delete deploy"
|
|
LOG.error("%s: %s" % (msg_error, e))
|
|
raise SoftwareServiceError(msg_error)
|
|
else:
|
|
deployment_list = deploying_release_state.get_release_ids()
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.sw_release == from_release:
|
|
self.reset_feed_commit(release)
|
|
|
|
if release.id in deployment_list:
|
|
self.remove_tags_from_metadata(release, constants.CONTENTS_TAG)
|
|
|
|
deploying_release_state.available()
|
|
else:
|
|
msg_error = "Delete is not supported while removing a release"
|
|
LOG.error(msg_error)
|
|
raise SoftwareServiceError(msg_error)
|
|
|
|
if os.path.isfile(INSTALL_LOCAL_FLAG):
|
|
# Remove install local flag if enabled
|
|
try:
|
|
os.remove(INSTALL_LOCAL_FLAG)
|
|
except Exception:
|
|
msg_error = "Failed to clear install-local mode flag"
|
|
LOG.error(msg_error)
|
|
raise SoftwareServiceError(msg_error)
|
|
LOG.info("Software deployment in local installation mode is stopped")
|
|
|
|
if is_major_release:
|
|
if SW_VERSION == major_release:
|
|
msg_error = (
|
|
f"Deploy {major_release} can't be deleted as it is still the"
|
|
"current running software.An error may have occurred during the deploy.")
|
|
LOG.error(msg_error)
|
|
raise SoftwareServiceError(msg_error)
|
|
|
|
# Send message to agents cleanup their ostree environment
|
|
# if the deployment has completed or rolled-back successfully
|
|
finished_deploy_states = [DEPLOY_STATES.COMPLETED, DEPLOY_STATES.HOST_ROLLBACK_DONE]
|
|
if deploy_state in finished_deploy_states:
|
|
cleanup_req = SoftwareMessageDeployDeleteCleanupReq()
|
|
cleanup_req.major_release = utils.get_major_release_version(to_release)
|
|
cleanup_req.encode()
|
|
self.socket_lock.acquire()
|
|
cleanup_req.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
self.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_CLEANUP_DEPLOYMENT_DATA,
|
|
fm_constants.FM_ALARM_STATE_CLEAR,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, constants.CONTROLLER_FLOATING_HOSTNAME))
|
|
|
|
# execute deploy delete plugins
|
|
# NOTE(bqian) implement for major release deploy delete only as deleting action
|
|
# for patching is undefined, i.e, in the case of patch is applied, both from and
|
|
# to releases are applied.
|
|
self.execute_delete_actions()
|
|
else:
|
|
self.delete_all_patch_activate_scripts()
|
|
|
|
msg_info += "Deploy deleted with success"
|
|
self.db_api_instance.delete_deploy_host_all()
|
|
self.db_api_instance.delete_deploy()
|
|
|
|
LOG.info("Deploy is deleted")
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def _deploy_complete(self):
|
|
is_all_hosts_in_deployed_state = all(host_state.get("state") == DEPLOY_HOST_STATES.DEPLOYED.value
|
|
for host_state in self.db_api_instance.get_deploy_host())
|
|
if not is_all_hosts_in_deployed_state:
|
|
raise SoftwareServiceError(f"Complete not allowed because there are hosts not"
|
|
f" in {DEPLOY_HOST_STATES.DEPLOYED.value} state.")
|
|
return True
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE_DONE],
|
|
"Deploy must be in %s state to be able to complete." % DEPLOY_STATES.ACTIVATE_DONE.value)
|
|
def software_deploy_complete_api(self) -> dict:
|
|
"""
|
|
Completes a deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
|
|
if self._deploy_complete():
|
|
deploy_state.completed()
|
|
msg_info += "Deployment has been completed\n"
|
|
try:
|
|
# the sysinv evaluate_apps_reapply function needs to
|
|
# be triggered after the deploy complete.
|
|
trigger_evaluate_apps_reapply({"type": "usm-upgrade-complete"})
|
|
except Exception as e:
|
|
LOG.error("The attempt to trigger the evaluate apps reapply \
|
|
failed with message: %s", e)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def _activate(self):
|
|
deploy = self.db_api_instance.get_deploy_all()
|
|
if deploy:
|
|
deploy = deploy[0]
|
|
else:
|
|
msg = "Deployment is missing unexpectedly"
|
|
raise InvalidOperation(msg)
|
|
|
|
cmd_path = "/usr/bin/software-deploy-activate"
|
|
from_release = deploy.get("from_release")
|
|
to_release = deploy.get("to_release")
|
|
if self.pre_bootstrap:
|
|
activate_cmd = [cmd_path, from_release, to_release]
|
|
else:
|
|
activate_cmd = ["source", "/etc/platform/openrc;", cmd_path, from_release, to_release]
|
|
|
|
deploying = ReleaseState(release_state=states.DEPLOYING)
|
|
if deploying.is_major_release_deployment():
|
|
activate_cmd.append('--is_major_release')
|
|
|
|
env = os.environ.copy()
|
|
env["ANSIBLE_LOG_PATH"] = SOFTWARE_LOG_FILE
|
|
if not self.pre_bootstrap:
|
|
token, endpoint = utils.get_endpoints_token()
|
|
env["OS_AUTH_TOKEN"] = token
|
|
env["SYSTEM_URL"] = re.sub('/v[1,9]$', '', endpoint) # remove ending /v1
|
|
|
|
env["IGNORE_ERRORS"] = self.ignore_errors
|
|
try:
|
|
LOG.info("starting subprocess %s" % ' '.join(activate_cmd))
|
|
subprocess.Popen(' '.join(activate_cmd), start_new_session=True, shell=True, env=env)
|
|
LOG.info("subprocess started")
|
|
except subprocess.SubprocessError as e:
|
|
LOG.error("Failed to start command: %s. Error %s" % (' '.join(activate_cmd), e))
|
|
return False
|
|
|
|
return True
|
|
|
|
def _check_pre_activate(self):
|
|
if not self.pre_bootstrap:
|
|
if not are_all_hosts_unlocked_and_online():
|
|
msg = f"Hosts must be {constants.ADMIN_UNLOCKED} and {constants.AVAILABILITY_ONLINE}."
|
|
raise SoftwareServiceError(error=msg)
|
|
# check current deployment, deploy to all hosts have completed,
|
|
# the deploy state is host-done, or
|
|
# activate-failed' as reattempt from a previous failed activate
|
|
deploy_state = DeployState.get_deploy_state()
|
|
if deploy_state not in [DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.ACTIVATE_FAILED]:
|
|
msg = "Must complete deploying all hosts before activating the deployment"
|
|
raise InvalidOperation(msg)
|
|
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
invalid_hosts = []
|
|
for deploy_host in deploy_hosts:
|
|
if deploy_host['state'] not in [states.DEPLOYED]:
|
|
invalid_hosts.append(deploy_host)
|
|
|
|
if len(invalid_hosts) > 0:
|
|
msg = "All hosts must have completed deployment before activating the deployment"
|
|
for invalid_host in invalid_hosts:
|
|
msg += "%s: %s\n" % (invalid_host["hostname"], invalid_host["state"])
|
|
raise InvalidOperation(msg)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE, DEPLOY_STATES.ACTIVATE_DONE, DEPLOY_STATES.ACTIVATE_FAILED,
|
|
DEPLOY_STATES.COMPLETED, DEPLOY_STATES.HOST, DEPLOY_STATES.HOST_DONE,
|
|
DEPLOY_STATES.HOST_FAILED],
|
|
"Deploy must be in the following states to be able to abort: %s, %s, %s, %s, %s, %s, %s" %
|
|
(DEPLOY_STATES.ACTIVATE.value, DEPLOY_STATES.ACTIVATE_DONE.value,
|
|
DEPLOY_STATES.ACTIVATE_FAILED.value, DEPLOY_STATES.COMPLETED.value, DEPLOY_STATES.HOST.value,
|
|
DEPLOY_STATES.HOST_DONE.value, DEPLOY_STATES.HOST_FAILED.value))
|
|
def software_deploy_abort_api(self) -> dict:
|
|
"""
|
|
Aborts the deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy = self.db_api_instance.get_current_deploy()
|
|
from_release = deploy.get("from_release")
|
|
to_release = deploy.get("to_release")
|
|
from_release_deployment = self.release_collection.get_release_id_by_sw_release(from_release)
|
|
to_release_deployment = self.release_collection.get_release_id_by_sw_release(to_release)
|
|
|
|
try:
|
|
is_major_release = ReleaseState(release_state=states.DEPLOYING).is_major_release_deployment()
|
|
except AttributeError:
|
|
release = self.release_collection.get_release_by_id(to_release_deployment)
|
|
is_major_release = ReleaseState(release_ids=[release.id]).is_major_release_deployment()
|
|
|
|
if not is_major_release:
|
|
removing_release_state = ReleaseState(release_state=states.REMOVING)
|
|
is_removing = removing_release_state.has_release_id()
|
|
|
|
if is_removing:
|
|
raise SoftwareServiceError("Abort operation is not supported in patch removal")
|
|
|
|
from_deployment = self.release_collection.get_release_by_id(from_release_deployment)
|
|
self.reset_feed_commit(from_deployment)
|
|
|
|
self.send_latest_feed_commit_to_agent()
|
|
self.software_sync()
|
|
|
|
major_from_release = utils.get_major_release_version(from_release)
|
|
feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_from_release)
|
|
deploy_release = self._release_basic_checks(from_release_deployment)
|
|
commit_id = deploy_release.commit_id
|
|
|
|
# TODO(lbonatti): remove this condition when commit-id is built into GA metadata.
|
|
if is_major_release and commit_id in [constants.COMMIT_DEFAULT_VALUE, None]:
|
|
commit_id = ostree_utils.get_feed_latest_commit(deploy_release.sw_version)
|
|
|
|
# Update the deployment
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.abort(feed_repo, commit_id)
|
|
|
|
# Update the host deployment
|
|
deploy_host = self.db_api_instance.get_deploy_host()
|
|
for host in deploy_host:
|
|
hostname = host.get("hostname")
|
|
deploy_host_state = DeployHostState(hostname)
|
|
deploy_host_state.abort()
|
|
|
|
msg_info += "Deployment has been aborted\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.ACTIVATE_FAILED],
|
|
"Activate deployment only when current deployment state is {require_states}")
|
|
def software_deploy_activate_api(self) -> dict:
|
|
"""
|
|
Activates the deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
self._check_pre_activate()
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.activate()
|
|
|
|
try:
|
|
self._activate()
|
|
msg_info = "Deploy activate has started"
|
|
except Exception:
|
|
deploy_state.activate_failed()
|
|
raise
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def _activate_rollback_major_release(self, deploy):
|
|
cmd_path = "/usr/bin/software-deploy-activate-rollback"
|
|
from_release = utils.get_major_release_version(deploy.get("from_release"))
|
|
to_release = utils.get_major_release_version(deploy.get("to_release"))
|
|
|
|
token, endpoint = utils.get_endpoints_token()
|
|
env = os.environ.copy()
|
|
env["ANSIBLE_LOG_PATH"] = SOFTWARE_LOG_FILE
|
|
env["OS_AUTH_TOKEN"] = token
|
|
env["SYSTEM_URL"] = re.sub('/v[1,9]$', '', endpoint) # remove ending /v1
|
|
|
|
env["IGNORE_ERRORS"] = self.ignore_errors
|
|
upgrade_activate_rollback_cmd = [
|
|
"source", "/etc/platform/openrc;", cmd_path, from_release, to_release]
|
|
|
|
# check if LVM snapshots are enabled and try to restore them
|
|
# TODO(heitormatsui): we don't really need to verify the system mode
|
|
# as LVM snapshots will only be allowed if the system is AIO-SX
|
|
system_mode = utils.get_platform_conf("system_mode")
|
|
if system_mode == constants.SYSTEM_MODE_SIMPLEX:
|
|
deploy = self.db_api_instance.get_deploy_all()[0]
|
|
options = deploy.get("options", {})
|
|
enabled_lvm_snapshots = to_bool(options.get("snapshot"))
|
|
if enabled_lvm_snapshots:
|
|
LOG.info("LVM snapshots are enabled")
|
|
manager = lvm_snapshot.LVMSnapshotManager()
|
|
success = manager.restore_snapshots()
|
|
if success:
|
|
LOG.info("LVM snapshots were restored, upgrade scripts with "
|
|
"action=activate-rollback will be skipped")
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.activate_rollback_done()
|
|
return
|
|
else:
|
|
LOG.warning("Failure restoring LVM snapshots, falling back "
|
|
"to standard activate-rollback procedure")
|
|
|
|
try:
|
|
LOG.info("starting subprocess %s" % ' '.join(upgrade_activate_rollback_cmd))
|
|
subprocess.Popen(' '.join(upgrade_activate_rollback_cmd), start_new_session=True, shell=True, env=env)
|
|
LOG.info("subprocess started")
|
|
except subprocess.SubprocessError as e:
|
|
LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_activate_rollback_cmd), e))
|
|
raise
|
|
|
|
def _activate_rollback_patching_release(self):
|
|
deploy_state = DeployState.get_instance()
|
|
# patching release activate-rollback operations go here
|
|
deploy_state.activate_rollback_done()
|
|
|
|
def _activate_rollback(self):
|
|
deploy = self.db_api_instance.get_current_deploy()
|
|
if not deploy:
|
|
msg = "Deployment is missing unexpectedly"
|
|
raise InvalidOperation(msg)
|
|
|
|
deploying = ReleaseState(release_state=states.DEPLOYING)
|
|
if deploying.is_major_release_deployment():
|
|
self._activate_rollback_major_release(deploy)
|
|
else:
|
|
self._activate_rollback_patching_release()
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE_ROLLBACK_PENDING, DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED],
|
|
"Activate-rollback deployment only when current deployment state is {require_states}")
|
|
def software_deploy_activate_rollback_api(self) -> dict:
|
|
"""
|
|
Rolls back activates the deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.activate_rollback()
|
|
|
|
try:
|
|
self._activate_rollback()
|
|
msg_info = "Deploy activate-rollback has started"
|
|
except Exception:
|
|
deploy_state.activate_rollback_failed()
|
|
raise
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def software_deploy_show_api(self, from_release=None, to_release=None):
|
|
# Retrieve deploy state from db
|
|
if from_release and to_release:
|
|
deploy_data = self.db_api_instance.get_deploy(from_release, to_release)
|
|
if not deploy_data:
|
|
return deploy_data
|
|
release_deployment = deploy_data["to_release"]
|
|
else:
|
|
# Retrieve deploy state from db in list format
|
|
deploy_data = self.db_api_instance.get_deploy_all()
|
|
if not deploy_data:
|
|
return deploy_data
|
|
release_deployment = deploy_data[0]["to_release"]
|
|
|
|
release_id = self.release_collection.get_release_id_by_sw_release(release_deployment)
|
|
release = self._release_basic_checks(release_id)
|
|
release_info = self._get_release_additional_info(release)
|
|
|
|
if isinstance(deploy_data, list):
|
|
deploy_data[0].update(release_info)
|
|
else:
|
|
deploy_data.update(release_info)
|
|
return deploy_data
|
|
|
|
def _deploy_host(self, hostname, force, async_req=False, rollback=False):
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
try:
|
|
ip = utils.gethostbyname(hostname)
|
|
except socket.gaierror:
|
|
msg_error += "Host %s not found\n" % hostname
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# NOTE(bqian) Get IP address to fulfill the need of patching structure.
|
|
# need to review the design
|
|
# ensure ip is in table as in some cases the host is aged out from the hosts table
|
|
if ip not in self.hosts:
|
|
raise HostIpNotFound(hostname)
|
|
|
|
# check if host agent is reachable via message
|
|
self.hosts[ip].is_alive = False
|
|
check_alive_req = SoftwareMessageCheckAgentAliveReq()
|
|
check_alive_req.ip = ip
|
|
self.socket_lock.acquire()
|
|
check_alive_req.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
time.sleep(5) # sleep 5 seconds for agent to reply
|
|
if not self.hosts[ip].is_alive:
|
|
raise HostAgentUnreachable(hostname)
|
|
|
|
is_major_release = self.check_upgrade_in_progress()
|
|
deploy_host = self.db_api_instance.get_deploy_host_by_hostname(hostname)
|
|
if deploy_host is None:
|
|
raise HostNotFound(hostname)
|
|
|
|
deploy = self.db_api_instance.get_deploy_all()[0]
|
|
# Determine reboot required from deployment info
|
|
self.allow_insvc_patching = True
|
|
is_reboot_req = deploy.get(constants.REBOOT_REQUIRED, False)
|
|
if is_reboot_req:
|
|
self.allow_insvc_patching = False
|
|
|
|
# for rr patch in pre bootstrap
|
|
if self.pre_bootstrap:
|
|
self.allow_insvc_patching = True
|
|
|
|
commit_id = deploy.get("commit_id")
|
|
if not self.install_local:
|
|
deploy_host_validations(
|
|
hostname,
|
|
is_major_release=is_major_release,
|
|
rollback=rollback
|
|
)
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_host_state = DeployHostState(hostname)
|
|
deploy_state.deploy_host()
|
|
deploy_host_state.deploy_started()
|
|
|
|
# if in a 'deploy host' reentrant scenario, i.e. retrying after
|
|
# a failure, then clear the failure alarm before retrying
|
|
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname)
|
|
self.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_FAILURE,
|
|
fm_constants.FM_ALARM_STATE_CLEAR,
|
|
entity_instance_id)
|
|
|
|
msg = "Running software deploy host for %s (%s), force=%s, async_req=%s" % (
|
|
hostname, ip, force, async_req)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
if not is_major_release and self.allow_insvc_patching:
|
|
LOG.info("Allowing in-service patching")
|
|
force = True
|
|
self.copy_install_scripts()
|
|
|
|
# Check if there is a major release deployment in progress
|
|
# and set agent request parameters accordingly
|
|
major_release = None
|
|
additional_data = {}
|
|
if is_major_release:
|
|
upgrade_release = self.get_software_upgrade()
|
|
major_release = upgrade_release["to_release"]
|
|
force = False
|
|
async_req = False
|
|
msg = "Running major release deployment, major_release=%s, force=%s, async_req=%s, commit_id=%s" % (
|
|
major_release, force, async_req, commit_id)
|
|
msg_info += msg + "\n"
|
|
LOG.info(msg)
|
|
try:
|
|
copy_pxeboot_update_file(major_release, rollback=rollback)
|
|
copy_pxeboot_cfg_files(major_release)
|
|
except Exception:
|
|
LOG.error("Fail to start deploy host")
|
|
deploy_host_state.deploy_failed()
|
|
raise
|
|
|
|
# TODO(bqian) This code below is for upgrading to stx-10. Beside the code is specific for the upgrade
|
|
# path, the solution is also temporary. Need a better design with smooth support of host deploy with
|
|
# predetermined parameters
|
|
impacted_upgrade = ["24.09", "22.12"]
|
|
if upgrade_release["to_release"] in impacted_upgrade and \
|
|
upgrade_release["from_release"] in impacted_upgrade:
|
|
if rollback:
|
|
oot_drivers = ""
|
|
else:
|
|
try:
|
|
oot_drivers = get_oot_drivers()
|
|
except ServiceParameterNotFound:
|
|
# the oot_drivers should be identical to the new default service parameter declare in
|
|
# config/controllerconfig/controllerconfig/upgrade-scripts/26-add-service-parameter.py#L52
|
|
oot_drivers = "ice,i40e,iavf"
|
|
|
|
additional_data.update({"out-of-tree-drivers": oot_drivers})
|
|
|
|
self.hosts_lock.acquire()
|
|
self.hosts[ip].install_pending = True
|
|
self.hosts[ip].install_status = False
|
|
self.hosts[ip].install_reject_reason = None
|
|
self.hosts_lock.release()
|
|
|
|
installreq = PatchMessageAgentInstallReq(additional_data)
|
|
installreq.ip = ip
|
|
installreq.force = force
|
|
installreq.major_release = major_release
|
|
installreq.commit_id = commit_id
|
|
installreq.encode()
|
|
self.socket_lock.acquire()
|
|
installreq.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
if async_req:
|
|
# async_req install requested, so return now
|
|
msg = "Host deployment request sent to %s." % self.hosts[ip].hostname
|
|
msg_info += msg + "\n"
|
|
LOG.info("host-install async_req: %s", msg)
|
|
# TODO(bqian) update deploy state to deploy-host
|
|
|
|
# Now we wait, up to ten mins. future enhancement: Wait on a condition
|
|
resp_rx = False
|
|
max_time = time.time() + 600
|
|
success = True
|
|
# NOTE(bqian) loop below blocks REST API service (slow thread)
|
|
# Consider remove.
|
|
while time.time() < max_time:
|
|
self.hosts_lock.acquire()
|
|
if ip not in self.hosts:
|
|
# The host aged out while we were waiting
|
|
self.hosts_lock.release()
|
|
success = False
|
|
msg = "Agent expired while waiting: %s" % ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
break
|
|
|
|
if not self.hosts[ip].install_pending:
|
|
# We got a response
|
|
resp_rx = True
|
|
if self.hosts[ip].install_status:
|
|
msg = "Host deployment was successful on %s." % self.hosts[ip].hostname
|
|
msg_info += msg + "\n"
|
|
LOG.info("host-install: %s", msg)
|
|
elif self.hosts[ip].install_reject_reason:
|
|
msg = "Host deployment rejected by %s. %s" % (
|
|
self.hosts[ip].hostname,
|
|
self.hosts[ip].install_reject_reason)
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
success = False
|
|
else:
|
|
msg = "Host deployment failed on %s." % self.hosts[ip].hostname
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
success = False
|
|
|
|
self.hosts_lock.release()
|
|
break
|
|
|
|
self.hosts_lock.release()
|
|
|
|
time.sleep(0.5)
|
|
|
|
if not resp_rx:
|
|
msg = "Timeout occurred while waiting response from %s." % ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
success = False
|
|
|
|
if not success:
|
|
deploy_host_state.deploy_failed()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.START_DONE, DEPLOY_STATES.HOST, DEPLOY_STATES.HOST_FAILED],
|
|
"Current deployment ({state.value}) is not ready to deploy host")
|
|
def software_deploy_host_api(self, hostname, force, async_req=False):
|
|
return self._deploy_host(hostname, force, async_req)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE,
|
|
DEPLOY_STATES.HOST_ROLLBACK, DEPLOY_STATES.HOST_ROLLBACK_FAILED],
|
|
"Current deployment ({state.value}) is not ready to rollback host")
|
|
def software_deploy_host_rollback_api(self, hostname, force, async_req=False):
|
|
return self._deploy_host(hostname, force, async_req, rollback=True)
|
|
|
|
def drop_host(self, host_ip, sync_nbr=True):
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
ip = host_ip
|
|
|
|
self.hosts_lock.acquire()
|
|
# If not in hosts table, maybe a hostname was used instead
|
|
if host_ip not in self.hosts:
|
|
try:
|
|
# Because the host may be getting dropped due to deletion,
|
|
# we may be unable to do a hostname lookup. Instead, we'll
|
|
# iterate through the table here.
|
|
for host in list(self.hosts):
|
|
if host_ip == self.hosts[host].hostname:
|
|
ip = host
|
|
break
|
|
|
|
if ip not in self.hosts:
|
|
# Translated successfully, but IP isn't in the table.
|
|
# Raise an exception to drop out to the failure handling
|
|
raise SoftwareError("Host IP (%s) not in table" % ip)
|
|
except Exception:
|
|
self.hosts_lock.release()
|
|
msg = "Unknown host specified: %s" % host_ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in drop-host: %s", msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
msg = "Running drop-host for %s (%s)" % (host_ip, ip)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
del self.hosts[ip]
|
|
for patch_id in list(self.interim_state):
|
|
if ip in self.interim_state[patch_id]:
|
|
self.interim_state[patch_id].remove(ip)
|
|
|
|
self.hosts_lock.release()
|
|
|
|
if sync_nbr:
|
|
sync_msg = PatchMessageDropHostReq()
|
|
sync_msg.ip = ip
|
|
self.socket_lock.acquire()
|
|
sync_msg.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def check_releases_state(self, release_ids, state):
|
|
"""check all releases to be in the specified state"""
|
|
all_matched = True
|
|
|
|
for release_id in release_ids:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is None:
|
|
all_matched = False
|
|
break
|
|
|
|
if release.state != state:
|
|
all_matched = False
|
|
break
|
|
return all_matched
|
|
|
|
def is_available(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.AVAILABLE)
|
|
|
|
def is_deployed(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.DEPLOYED)
|
|
|
|
def is_committed(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.COMMITTED)
|
|
|
|
# NOTE(bqian) report_app_dependencies function not being called?
|
|
# which means self.app_dependencies will always be empty and file
|
|
# app_dependency_filename will never exist?
|
|
def report_app_dependencies(self, patch_ids, **kwargs):
|
|
"""
|
|
Handle report of application dependencies
|
|
"""
|
|
if "app" not in kwargs:
|
|
raise ReleaseInvalidRequest
|
|
|
|
appname = kwargs.get("app")
|
|
|
|
LOG.info("Handling app dependencies report: app=%s, patch_ids=%s",
|
|
appname, ','.join(patch_ids))
|
|
|
|
if len(patch_ids) == 0:
|
|
if appname in self.app_dependencies:
|
|
del self.app_dependencies[appname]
|
|
else:
|
|
self.app_dependencies[appname] = patch_ids
|
|
|
|
try:
|
|
tmpfile, tmpfname = tempfile.mkstemp(
|
|
prefix=app_dependency_basename,
|
|
dir=constants.SOFTWARE_STORAGE_DIR)
|
|
|
|
os.write(tmpfile, json.dumps(self.app_dependencies).encode())
|
|
os.close(tmpfile)
|
|
|
|
os.rename(tmpfname, app_dependency_filename)
|
|
except Exception:
|
|
LOG.exception("Failed in report_app_dependencies")
|
|
raise SoftwareFail("Internal failure")
|
|
|
|
return True
|
|
|
|
# NOTE(bqian) unused function query_app_dependencies
|
|
def query_app_dependencies(self):
|
|
"""
|
|
Query application dependencies
|
|
"""
|
|
data = self.app_dependencies
|
|
|
|
return dict(data)
|
|
|
|
def is_host_next_to_be_deployed_api(self, hostname):
|
|
is_major_release = ReleaseState(release_state=states.DEPLOYING).is_major_release_deployment()
|
|
deploy_state = DeployState.get_deploy_state()
|
|
# If there's no deploy in progress return False
|
|
if deploy_state is None:
|
|
return False
|
|
is_rollback_action = deploy_state in [DEPLOY_STATES.HOST_ROLLBACK, DEPLOY_STATES.ACTIVATE_ROLLBACK_PENDING,
|
|
DEPLOY_STATES.HOST_ROLLBACK_FAILED, DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED]
|
|
try:
|
|
validate_host_deploy_order(hostname, is_major_release, is_rollback_action)
|
|
return True
|
|
except SoftwareServiceError:
|
|
return False
|
|
except Exception as err:
|
|
msg_error = "Error to check deploy order"
|
|
LOG.exception("%s: %s" % (msg_error, err))
|
|
return False
|
|
|
|
def deploy_host_list(self):
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
deploy = self.db_api_instance.get_deploy_all()
|
|
if not deploy:
|
|
return []
|
|
deploy = deploy[0]
|
|
|
|
deploy_host_list = []
|
|
for host in deploy_hosts:
|
|
state = host.get("state")
|
|
deploy_host = {
|
|
"hostname": host.get("hostname"),
|
|
"software_release": deploy.get("from_release"),
|
|
"target_release": deploy.get("to_release") if state else None,
|
|
"reboot_required": deploy.get("reboot_required") if state else None,
|
|
"host_state": state
|
|
}
|
|
deploy_host_list.append(deploy_host)
|
|
return deploy_host_list
|
|
|
|
def manage_software_alarm(self, alarm_id, alarm_state, entity_instance_id, **kwargs):
|
|
try:
|
|
if alarm_id not in constants.SOFTWARE_ALARMS:
|
|
raise Exception("Unknown software alarm '%s'." % alarm_id)
|
|
|
|
# deal with the alarm clear scenario
|
|
if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR:
|
|
LOG.info("Clearing alarm: %s for %s" % (alarm_id, entity_instance_id))
|
|
self.fm_api.clear_fault(alarm_id, entity_instance_id)
|
|
return
|
|
|
|
# if not clear alarm scenario, create the alarm
|
|
alarm_data = constants.SOFTWARE_ALARMS.get(alarm_id)
|
|
# update the alarm_data if it is present in kwargs
|
|
if kwargs:
|
|
for data in alarm_data:
|
|
if data in kwargs.keys():
|
|
alarm_data[data] = kwargs[data]
|
|
|
|
alarm = fm_api.Fault(
|
|
alarm_id=alarm_id,
|
|
alarm_state=alarm_state,
|
|
entity_type_id=alarm_data.get("entity_type_id"),
|
|
entity_instance_id=entity_instance_id,
|
|
severity=alarm_data.get("severity"),
|
|
reason_text=alarm_data.get("reason_text"),
|
|
alarm_type=alarm_data.get("alarm_type"),
|
|
probable_cause=alarm_data.get("probable_cause"),
|
|
proposed_repair_action=alarm_data.get("proposed_repair_action"),
|
|
service_affecting=alarm_data.get("service_affecting"),
|
|
)
|
|
LOG.info("Raising alarm: %s for %s" % (alarm_id, entity_instance_id))
|
|
self.fm_api.set_fault(alarm)
|
|
except Exception as e:
|
|
LOG.exception("Failed to manage alarm %s with action %s: %s" % (
|
|
alarm_id, alarm_state, str(e)
|
|
))
|
|
|
|
def get_out_of_sync_alarm(self):
|
|
"""Get the out-of-sync alarm instance from fm_api"""
|
|
return self.fm_api.get_fault(fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC,
|
|
constants.ALARM_INSTANCE_ID_OUT_OF_SYNC)
|
|
|
|
def create_clean_up_deployment_alarm(self, target_state):
|
|
"""
|
|
Creates the 900.022 alarm to warn the user to clean up the deployment data remaining for the specified release
|
|
version.
|
|
|
|
"""
|
|
if target_state in [DEPLOY_STATES.COMPLETED, DEPLOY_STATES.HOST_ROLLBACK_DONE]:
|
|
is_major_release = ReleaseState(release_state=states.DEPLOYING).is_major_release_deployment()
|
|
# Do not create in case of patch release.
|
|
if not is_major_release:
|
|
return
|
|
reason_text = constants.SOFTWARE_ALARMS[fm_constants.FM_ALARM_ID_USM_CLEANUP_DEPLOYMENT_DATA]["reason_text"]
|
|
self.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_CLEANUP_DEPLOYMENT_DATA,
|
|
fm_constants.FM_ALARM_STATE_SET,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, constants.CONTROLLER_FLOATING_HOSTNAME),
|
|
reason_text=reason_text)
|
|
|
|
def handle_deploy_state_sync(self):
|
|
"""
|
|
Handle the deploy state sync.
|
|
If deploy state is in sync, clear the alarm.
|
|
If not, raise the alarm.
|
|
"""
|
|
is_in_sync = is_deploy_state_in_sync()
|
|
|
|
# Deploy in sync state is not changed, no need to update the alarm
|
|
if is_in_sync == self.usm_alarm.get(constants.LAST_IN_SYNC):
|
|
return
|
|
|
|
try:
|
|
LOG.info("software.json in sync: %s", is_in_sync)
|
|
out_of_sync_alarm_fault = self.get_out_of_sync_alarm()
|
|
|
|
if out_of_sync_alarm_fault and is_in_sync:
|
|
# There was an out of sync alarm raised, but local software.json is in sync,
|
|
# we clear the alarm
|
|
self.manage_software_alarm(
|
|
alarm_id=fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC,
|
|
alarm_state=fm_constants.FM_ALARM_STATE_CLEAR,
|
|
entity_instance_id=constants.ALARM_INSTANCE_ID_OUT_OF_SYNC
|
|
)
|
|
# Deploy in sync state is changed, update the cache
|
|
self.usm_alarm[constants.LAST_IN_SYNC] = is_in_sync
|
|
elif (not out_of_sync_alarm_fault) and (not is_in_sync):
|
|
# There was no out of sync alarm raised, but local software.json is not in sync,
|
|
# we raise the alarm
|
|
self.manage_software_alarm(
|
|
alarm_id=fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC,
|
|
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
|
entity_instance_id=constants.ALARM_INSTANCE_ID_OUT_OF_SYNC
|
|
)
|
|
# Deploy in sync state is changed, update the cache
|
|
self.usm_alarm[constants.LAST_IN_SYNC] = is_in_sync
|
|
else:
|
|
# Shouldn't come to here
|
|
LOG.error("Unexpected case in handling deploy state sync. ")
|
|
|
|
except Exception as ex:
|
|
LOG.exception("Failed in handling deploy state sync. Error: %s" % str(ex))
|
|
|
|
def _get_software_upgrade(self):
|
|
"""
|
|
Get the current software upgrade from/to versions and state
|
|
:return: dict of from_release, to_release and state
|
|
"""
|
|
|
|
all_deploy = self.db_api_instance.get_deploy_all()
|
|
|
|
if not all_deploy:
|
|
return None
|
|
|
|
deploy = all_deploy[0]
|
|
from_maj_min_release = utils.get_major_release_version(deploy.get("from_release"))
|
|
to_maj_min_release = utils.get_major_release_version(deploy.get("to_release"))
|
|
state = deploy.get("state")
|
|
|
|
return {
|
|
"from_release": from_maj_min_release,
|
|
"to_release": to_maj_min_release,
|
|
"state": state
|
|
}
|
|
|
|
def check_upgrade_in_progress(self):
|
|
"""
|
|
Check if major release upgrade is in progress
|
|
"""
|
|
_upgrade_in_progress = False
|
|
upgrade_release = self._get_software_upgrade()
|
|
if not upgrade_release:
|
|
return _upgrade_in_progress
|
|
from_release = version.Version(upgrade_release["from_release"])
|
|
to_release = version.Version(upgrade_release["to_release"])
|
|
if (from_release.major != to_release.major) or (from_release.minor != to_release.minor):
|
|
_upgrade_in_progress = True
|
|
return _upgrade_in_progress
|
|
|
|
def get_software_upgrade(self):
|
|
return self._get_software_upgrade()
|
|
|
|
def get_all_software_host_upgrade(self):
|
|
"""
|
|
Get all software host upgrade from/to versions and state
|
|
:return: list of dict of hostname, current_sw_version, target_sw_version and host_state
|
|
"""
|
|
deploy = self._get_software_upgrade()
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
|
|
if deploy is None or deploy_hosts is None:
|
|
return None
|
|
|
|
from_maj_min_release = deploy.get("from_release")
|
|
to_maj_min_release = deploy.get("to_release")
|
|
|
|
all_host_upgrades = []
|
|
for deploy_host in deploy_hosts:
|
|
all_host_upgrades.append({
|
|
"hostname": deploy_host.get("hostname"),
|
|
"current_sw_version": to_maj_min_release if deploy_host.get(
|
|
"state") == states.DEPLOYED else from_maj_min_release,
|
|
"target_sw_version": to_maj_min_release,
|
|
"host_state": deploy_host.get("state")
|
|
})
|
|
|
|
return all_host_upgrades
|
|
|
|
def get_one_software_host_upgrade(self, hostname):
|
|
"""
|
|
Get the given software host upgrade from/to versions and state
|
|
:param hostname: hostname
|
|
:return: array of dict of hostname, current_sw_version, target_sw_version and host_state
|
|
"""
|
|
|
|
all_host_upgrades = self.get_all_software_host_upgrade()
|
|
|
|
if not all_host_upgrades:
|
|
return None
|
|
|
|
for host_upgrade in all_host_upgrades:
|
|
if host_upgrade.get("hostname") == hostname:
|
|
return [host_upgrade]
|
|
|
|
return None
|
|
|
|
def is_host_active_controller(self):
|
|
"""
|
|
Check if current host is active controller by checking if floating ip is assigned
|
|
to the host
|
|
:return: True if it is active controller, False otherwise
|
|
"""
|
|
if not os.path.exists(INITIAL_CONFIG_COMPLETE_FLAG):
|
|
return False
|
|
|
|
floating_mgmt_ip = utils.gethostbyname(constants.CONTROLLER_FLOATING_HOSTNAME)
|
|
if not floating_mgmt_ip:
|
|
return False
|
|
|
|
ip_family = utils.get_management_family()
|
|
mgmt_iface = cfg.get_mgmt_iface()
|
|
|
|
host_mgmt_ip_list = utils.get_iface_ip(mgmt_iface, ip_family)
|
|
return floating_mgmt_ip in host_mgmt_ip_list if host_mgmt_ip_list else False
|
|
|
|
def set_interruption_fail_state(self):
|
|
"""
|
|
Set the host failed state after an interruption based on current deployment state
|
|
"""
|
|
upgrade_status = self.get_software_upgrade()
|
|
if self.is_host_active_controller() and os.path.exists(INITIAL_CONFIG_COMPLETE_FLAG) and upgrade_status:
|
|
|
|
if upgrade_status.get('state') == DEPLOY_STATES.HOST.value and not is_simplex():
|
|
to_fail_hostname = CONTROLLER_0_HOSTNAME if self.hostname == CONTROLLER_1_HOSTNAME else \
|
|
CONTROLLER_1_HOSTNAME
|
|
# In DX, when it is in deploy-host state, we can only set the standby controller to fail
|
|
start_set_fail(True, to_fail_hostname)
|
|
|
|
elif upgrade_status.get('state') in INTERRUPTION_RECOVERY_STATES:
|
|
# The deployment was interrupted. We need to update the deployment state first
|
|
start_set_fail(True, self.hostname)
|
|
|
|
|
|
class PatchControllerApiThread(threading.Thread):
|
|
def __init__(self):
|
|
threading.Thread.__init__(self)
|
|
self.wsgi = None
|
|
self.name = "PatchControllerApiThread"
|
|
|
|
def run(self):
|
|
global thread_death
|
|
|
|
host = "127.0.0.1"
|
|
port = cfg.api_port
|
|
|
|
try:
|
|
# In order to support IPv6, server_class.address_family must be
|
|
# set to the correct address family. Because the unauthenticated
|
|
# API always uses IPv4 for the loopback address, the address_family
|
|
# variable cannot be set directly in the WSGIServer class, so a
|
|
# local subclass needs to be created for the call to make_server,
|
|
# where the correct address_family can be specified.
|
|
class server_class(simple_server.WSGIServer):
|
|
pass
|
|
|
|
server_class.address_family = socket.AF_INET
|
|
self.wsgi = simple_server.make_server(
|
|
host, port,
|
|
app.VersionSelectorApplication(),
|
|
server_class=server_class)
|
|
|
|
self.wsgi.socket.settimeout(api_socket_timeout)
|
|
global keep_running
|
|
while keep_running:
|
|
self.wsgi.handle_request()
|
|
|
|
if thread_death.is_set():
|
|
LOG.info("%s exits as thread death is detected.", self.name)
|
|
return
|
|
|
|
# Call garbage collect after wsgi request is handled,
|
|
# to ensure any open file handles are closed in the case
|
|
# of an upload.
|
|
gc.collect()
|
|
except Exception as ex:
|
|
# Log all exceptions
|
|
LOG.exception("%s: error occurred during request processing: %s" % (self.name, str(ex)))
|
|
thread_death.set()
|
|
|
|
def kill(self):
|
|
# Must run from other thread
|
|
if self.wsgi is not None:
|
|
self.wsgi.shutdown()
|
|
|
|
|
|
class PatchControllerAuthApiThread(threading.Thread):
|
|
def __init__(self, port):
|
|
threading.Thread.__init__(self)
|
|
# LOG.info ("Initializing Authenticated API thread")
|
|
self.wsgi = None
|
|
self.port = port
|
|
self.name = f"PatchControllerAuthApiThread_{port}"
|
|
|
|
def run(self):
|
|
global thread_death
|
|
host = CONF.auth_api_bind_ip
|
|
if host is None:
|
|
host = utils.get_versioned_address_all()
|
|
try:
|
|
# Can only launch authenticated server post-config
|
|
while not os.path.exists(VOLATILE_CONTROLLER_CONFIG_COMPLETE):
|
|
LOG.info("Authorized API: Waiting for controller config complete.")
|
|
time.sleep(5)
|
|
|
|
LOG.info("Authorized API: Initializing")
|
|
|
|
# In order to support IPv6, server_class.address_family must be
|
|
# set to the correct address family. Because the unauthenticated
|
|
# API always uses IPv4 for the loopback address, the address_family
|
|
# variable cannot be set directly in the WSGIServer class, so a
|
|
# local subclass needs to be created for the call to make_server,
|
|
# where the correct address_family can be specified.
|
|
class server_class(simple_server.WSGIServer):
|
|
pass
|
|
|
|
server_class.address_family = utils.get_management_family()
|
|
self.wsgi = simple_server.make_server(
|
|
host, self.port,
|
|
auth_app.VersionSelectorApplication(),
|
|
server_class=server_class)
|
|
|
|
# self.wsgi.serve_forever()
|
|
self.wsgi.socket.settimeout(api_socket_timeout)
|
|
|
|
global keep_running
|
|
while keep_running:
|
|
self.wsgi.handle_request()
|
|
|
|
if thread_death.is_set():
|
|
LOG.info("%s exits as thread death is detected.", self.name)
|
|
return
|
|
|
|
# Call garbage collect after wsgi request is handled,
|
|
# to ensure any open file handles are closed in the case
|
|
# of an upload.
|
|
gc.collect()
|
|
except Exception as ex:
|
|
# Log all exceptions
|
|
LOG.exception("%s: error occurred during request processing: %s" % (self.name, str(ex)))
|
|
thread_death.set()
|
|
|
|
def kill(self):
|
|
# Must run from other thread
|
|
if self.wsgi is not None:
|
|
self.wsgi.shutdown()
|
|
|
|
LOG.info("%s exits as requested", self.name)
|
|
global thread_death
|
|
thread_death.set()
|
|
|
|
|
|
class PatchControllerMainThread(threading.Thread):
|
|
def __init__(self):
|
|
threading.Thread.__init__(self)
|
|
# LOG.info ("Initializing Main thread")
|
|
self.name = "PatchControllerMainThread"
|
|
|
|
def run(self):
|
|
global sc
|
|
global thread_death
|
|
|
|
# TODO(jvazhapp) Fix following temporary workaround
|
|
# for eventlet issue resulting in error message:
|
|
# 'Resolver configuration could not be read or
|
|
# specified no nameservers eventlet fix version'
|
|
with open('/etc/resolv.conf', 'a+') as f:
|
|
f.seek(0)
|
|
data = f.read()
|
|
if "nameserver" not in data:
|
|
f.writelines("nameserver 8.8.8.8")
|
|
|
|
# Send periodic messages to the agents
|
|
# We only can use one inverval
|
|
SEND_MSG_INTERVAL_IN_SECONDS = 30.0
|
|
|
|
sc.ignore_errors = os.environ.get('IGNORE_ERRORS', 'False')
|
|
LOG.info("IGNORE_ERRORS execution flag is set: %s", sc.ignore_errors)
|
|
|
|
LOG.info("software-controller-daemon is starting")
|
|
|
|
LOG.info("%s is active controller: %s", sc.hostname, sc.is_host_active_controller())
|
|
|
|
sc.set_interruption_fail_state()
|
|
|
|
try:
|
|
if sc.pre_bootstrap and cfg.get_mgmt_ip():
|
|
sc.pre_bootstrap = False
|
|
|
|
if sc.pre_bootstrap or os.path.isfile(INSTALL_LOCAL_FLAG):
|
|
sc.install_local = True
|
|
else:
|
|
sc.install_local = False
|
|
|
|
# Update the out of sync alarm cache when the thread starts
|
|
out_of_sync_alarm_fault = sc.get_out_of_sync_alarm()
|
|
sc.usm_alarm[constants.LAST_IN_SYNC] = not out_of_sync_alarm_fault
|
|
|
|
sock_in = sc.setup_socket()
|
|
|
|
while sock_in is None:
|
|
# Check every thirty seconds?
|
|
# Once we've got a conf file, tied into packstack,
|
|
# we'll get restarted when the file is updated,
|
|
# and this should be unnecessary.
|
|
time.sleep(30)
|
|
sock_in = sc.setup_socket()
|
|
|
|
# Ok, now we've got our socket. Let's start with a hello!
|
|
sc.socket_lock.acquire()
|
|
|
|
hello = PatchMessageHello()
|
|
hello.send(sc.sock_out)
|
|
|
|
hello_agent = PatchMessageHelloAgent()
|
|
hello_agent.send(sc.sock_out)
|
|
|
|
sc.socket_lock.release()
|
|
|
|
# Send hello every thirty seconds
|
|
hello_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
# Send deploy state update every thirty seconds
|
|
deploy_state_update_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
remaining = int(SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
agent_query_conns = []
|
|
|
|
while True:
|
|
# Check to see if any other thread has died
|
|
if thread_death.is_set():
|
|
LOG.info("%s exits as thread death is detected.", self.name)
|
|
return
|
|
|
|
# Check for in-service patch restart flag
|
|
if os.path.exists(insvc_patch_restart_controller):
|
|
LOG.info("In-service patch restart flag detected. Exiting.")
|
|
global keep_running
|
|
keep_running = False
|
|
os.remove(insvc_patch_restart_controller)
|
|
return
|
|
|
|
# If bootstrap is completed re-initialize sockets
|
|
if sc.pre_bootstrap and cfg.get_mgmt_ip():
|
|
sc.pre_bootstrap = False
|
|
|
|
sock_in = sc.setup_socket()
|
|
while sock_in is None:
|
|
time.sleep(30)
|
|
sock_in = sc.setup_socket()
|
|
|
|
sc.socket_lock.acquire()
|
|
|
|
hello = PatchMessageHello()
|
|
hello.send(sc.sock_out)
|
|
|
|
hello_agent = PatchMessageHelloAgent()
|
|
hello_agent.send(sc.sock_out)
|
|
|
|
sc.socket_lock.release()
|
|
for s in agent_query_conns.copy():
|
|
agent_query_conns.remove(s)
|
|
s.shutdown(socket.SHUT_RDWR)
|
|
s.close()
|
|
|
|
local_mode = sc.pre_bootstrap or os.path.isfile(INSTALL_LOCAL_FLAG)
|
|
if local_mode and not sc.install_local:
|
|
sc.install_local = True
|
|
elif not local_mode and sc.install_local:
|
|
sc.install_local = False
|
|
|
|
inputs = [sc.sock_in] + agent_query_conns
|
|
outputs = []
|
|
|
|
rlist, wlist, xlist = select.select(
|
|
inputs, outputs, inputs, SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
if (len(rlist) == 0 and
|
|
len(wlist) == 0 and
|
|
len(xlist) == 0):
|
|
# Timeout hit
|
|
sc.audit_socket()
|
|
|
|
for s in rlist:
|
|
data = ''
|
|
addr = None
|
|
msg = None
|
|
|
|
if s == sc.sock_in:
|
|
# Receive from UDP
|
|
sc.socket_lock.acquire()
|
|
data, addr = s.recvfrom(1024)
|
|
sc.socket_lock.release()
|
|
else:
|
|
# Receive from TCP
|
|
while True:
|
|
try:
|
|
packet = s.recv(1024)
|
|
except socket.error:
|
|
LOG.exception("Socket error on recv")
|
|
data = ''
|
|
break
|
|
|
|
if packet:
|
|
data += packet.decode()
|
|
|
|
if data == '':
|
|
break
|
|
try:
|
|
json.loads(data)
|
|
break
|
|
except ValueError:
|
|
# Message is incomplete
|
|
continue
|
|
else:
|
|
LOG.info('End of TCP message received')
|
|
break
|
|
|
|
if data == '':
|
|
# Connection dropped
|
|
agent_query_conns.remove(s)
|
|
s.close()
|
|
continue
|
|
|
|
# Get the TCP endpoint address
|
|
addr = s.getpeername()
|
|
|
|
msgdata = json.loads(data)
|
|
|
|
# For now, discard any messages that are not msgversion==1
|
|
if 'msgversion' in msgdata and msgdata['msgversion'] != 1:
|
|
continue
|
|
|
|
if 'msgtype' in msgdata:
|
|
if msgdata['msgtype'] == messages.PATCHMSG_HELLO:
|
|
msg = PatchMessageHello()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_HELLO_ACK:
|
|
msg = PatchMessageHelloAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_SYNC_REQ:
|
|
msg = PatchMessageSyncReq()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_SYNC_COMPLETE:
|
|
msg = PatchMessageSyncComplete()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_HELLO_AGENT_ACK:
|
|
msg = PatchMessageHelloAgentAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_QUERY_DETAILED_RESP:
|
|
msg = PatchMessageQueryDetailedResp()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_AGENT_INSTALL_RESP:
|
|
msg = PatchMessageAgentInstallResp()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DROP_HOST_REQ:
|
|
msg = PatchMessageDropHostReq()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_UPDATE:
|
|
msg = SoftwareMessageDeployStateUpdate()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_UPDATE_ACK:
|
|
msg = SoftwareMessageDeployStateUpdateAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_CHANGED:
|
|
msg = SWMessageDeployStateChanged()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_DELETE_CLEANUP_RESP:
|
|
msg = SoftwareMessageDeployDeleteCleanupResp()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_CHECK_AGENT_ALIVE_RESP:
|
|
msg = SoftwareMessageCheckAgentAliveResp()
|
|
|
|
if msg is None:
|
|
msg = messages.PatchMessage()
|
|
|
|
msg.decode(msgdata)
|
|
if s == sc.sock_in:
|
|
msg.handle(sc.sock_out, addr)
|
|
else:
|
|
msg.handle(s, addr)
|
|
|
|
# We can drop the connection after a query response
|
|
if msg.msgtype == messages.PATCHMSG_QUERY_DETAILED_RESP and s != sc.sock_in:
|
|
agent_query_conns.remove(s)
|
|
s.shutdown(socket.SHUT_RDWR)
|
|
s.close()
|
|
|
|
while len(stale_hosts) > 0 and len(agent_query_conns) <= 5:
|
|
ip = stale_hosts.pop()
|
|
try:
|
|
agent_sock = socket.create_connection((ip, cfg.agent_port))
|
|
query = PatchMessageQueryDetailed()
|
|
query.send(agent_sock)
|
|
agent_query_conns.append(agent_sock)
|
|
except Exception:
|
|
# Put it back on the list
|
|
stale_hosts.append(ip)
|
|
|
|
remaining = int(hello_timeout - time.time())
|
|
if remaining <= 0 or remaining > int(SEND_MSG_INTERVAL_IN_SECONDS):
|
|
hello_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
remaining = int(SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
sc.socket_lock.acquire()
|
|
|
|
hello = PatchMessageHello()
|
|
hello.send(sc.sock_out)
|
|
|
|
hello_agent = PatchMessageHelloAgent()
|
|
hello_agent.send(sc.sock_out)
|
|
|
|
sc.socket_lock.release()
|
|
|
|
# Age out neighbours
|
|
sc.controller_neighbours_lock.acquire()
|
|
nbrs = list(sc.controller_neighbours)
|
|
for n in nbrs:
|
|
# Age out controllers after 2 minutes
|
|
if sc.controller_neighbours[n].get_age() >= 120:
|
|
LOG.info("Aging out controller %s from table", n)
|
|
del sc.controller_neighbours[n]
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
sc.hosts_lock.acquire()
|
|
nbrs = list(sc.hosts)
|
|
for n in nbrs:
|
|
# Age out hosts after 1 hour
|
|
if sc.hosts[n].get_age() >= 3600:
|
|
LOG.info("Aging out host %s from table", n)
|
|
del sc.hosts[n]
|
|
for patch_id in list(sc.interim_state):
|
|
if n in sc.interim_state[patch_id]:
|
|
sc.interim_state[patch_id].remove(n)
|
|
|
|
sc.hosts_lock.release()
|
|
|
|
deploy_state_update_remaining = int(deploy_state_update_timeout - time.time())
|
|
# Only send the deploy state update from the active controller
|
|
if deploy_state_update_remaining <= 0 or deploy_state_update_remaining > int(
|
|
SEND_MSG_INTERVAL_IN_SECONDS):
|
|
deploy_state_update_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
deploy_state_update_remaining = int(
|
|
SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
if not is_simplex():
|
|
# Get out-of-sync alarm to request peer sync even if no deployment in progress
|
|
out_of_sync_alarm_fault = sc.get_out_of_sync_alarm()
|
|
|
|
# data sync always start only from the active controller
|
|
if utils.is_active_controller():
|
|
if out_of_sync_alarm_fault or is_deployment_in_progress():
|
|
sc.socket_lock.acquire()
|
|
try:
|
|
deploy_state_update = SoftwareMessageDeployStateUpdate()
|
|
deploy_state_update.send(sc.sock_out)
|
|
except Exception as e:
|
|
LOG.exception("Failed to send deploy state update. Error: %s", str(e))
|
|
finally:
|
|
sc.socket_lock.release()
|
|
|
|
if not sc.pre_bootstrap:
|
|
sc.handle_deploy_state_sync()
|
|
|
|
except Exception as ex:
|
|
# Log all exceptions
|
|
LOG.exception("%s: error occurred during request processing: %s" % (self.name, str(ex)))
|
|
thread_death.set()
|
|
|
|
|
|
def main():
|
|
software_conf = constants.SOFTWARE_CONFIG_FILE_LOCAL
|
|
|
|
pkg_feed = ('"http://controller:8080/updates/debian/rel-%s/ %s updates"'
|
|
% (constants.STARLINGX_RELEASE, constants.DEBIAN_RELEASE))
|
|
|
|
config = configparser.ConfigParser()
|
|
config.read(software_conf)
|
|
config.set("runtime", "package_feed", pkg_feed)
|
|
with open(software_conf, "w+") as configfile:
|
|
config.write(configfile)
|
|
|
|
# The following call to CONF is to ensure the oslo config
|
|
# has been called to specify a valid config dir.
|
|
# Otherwise oslo_policy will fail when it looks for its files.
|
|
CONF(
|
|
(), # Required to load an anonymous configuration
|
|
default_config_files=['/etc/software/software.conf', ]
|
|
)
|
|
|
|
cfg.read_config()
|
|
|
|
configure_logging()
|
|
|
|
# daemon.pidlockfile.write_pid_to_pidfile(pidfile_path)
|
|
|
|
global thread_death
|
|
thread_death = threading.Event()
|
|
|
|
# Set the TMPDIR environment variable to /scratch so that any modules
|
|
# that create directories with tempfile will not use /tmp
|
|
os.environ['TMPDIR'] = '/scratch'
|
|
|
|
global sc
|
|
sc = PatchController()
|
|
|
|
LOG.info("launching")
|
|
api_thread = PatchControllerApiThread()
|
|
auth_api_thread = PatchControllerAuthApiThread(CONF.auth_api_port)
|
|
auth_api_alt_thread = PatchControllerAuthApiThread(CONF.auth_api_alt_port)
|
|
main_thread = PatchControllerMainThread()
|
|
|
|
api_thread.start()
|
|
auth_api_thread.start()
|
|
auth_api_alt_thread.start()
|
|
main_thread.start()
|
|
|
|
thread_death.wait()
|
|
global keep_running
|
|
keep_running = False
|
|
|
|
api_thread.join()
|
|
auth_api_thread.join()
|
|
auth_api_alt_thread.join()
|
|
main_thread.join()
|