
This change: 1) Allow a patch to be applied on top of an unavailable patch 2) Does not allow the system to go back to an unavailable patch 3) Does not raise obsolete alarm for unavailable patches in the current major release 4) After applying a patch on top of an unavailable patch, remove all the unavailable patches 5) Add the prepatched_iso and remove the requires tags from the lowest sysroot deployed patch Test-Plan: PASS: If a patch from the current major release is unavailable, does not raise the obsolete release alarm PASS: Apply a patch on top of an unavailable patch - Check the deployment entity is correct - Check the ostree commit is correct - Check the unavailable patch was deleted - Check the lowest applied patch contains the prepatched_iso and no requires tags in the metadata PASS: Should not allow removing a patch on top of an unavailable patch PASS: Should not allow removal of an unavailable patch PASS: Upgrade from 22.12 to 24.09.300 (with changes) Closes-Bug: 2119980 Change-Id: Iaf83a55af4c33a9d38eb569c7d614662d60d7a36 Signed-off-by: Lindley Vieira <lindley.vieira@windriver.com>
5199 lines
216 KiB
Python
5199 lines
216 KiB
Python
"""
|
|
Copyright (c) 2023-2025 Wind River Systems, Inc.
|
|
|
|
SPDX-License-Identifier: Apache-2.0
|
|
|
|
"""
|
|
import sys
|
|
|
|
# prevent software_controller from importing osprofiler
|
|
sys.modules['osprofiler'] = None
|
|
|
|
import configparser
|
|
import gc
|
|
import json
|
|
import logging
|
|
import os
|
|
from packaging import version
|
|
import re
|
|
import select
|
|
import sh
|
|
import shutil
|
|
import socket
|
|
import subprocess
|
|
import tempfile
|
|
import threading
|
|
import time
|
|
import typing
|
|
from wsgiref import simple_server
|
|
|
|
from fm_api import fm_api
|
|
from fm_api import constants as fm_constants
|
|
|
|
from oslo_config import cfg as oslo_cfg
|
|
|
|
import software.apt_utils as apt_utils
|
|
import software.lvm_snapshot as lvm_snapshot
|
|
import software.ostree_utils as ostree_utils
|
|
from software.api import app
|
|
from software.authapi import app as auth_app
|
|
from software.constants import CONTROLLER_0_HOSTNAME
|
|
from software.constants import CONTROLLER_1_HOSTNAME
|
|
from software.constants import INSTALL_LOCAL_FLAG
|
|
from software.states import DEPLOY_HOST_STATES
|
|
from software.states import DEPLOY_STATES
|
|
from software.states import INTERRUPTION_RECOVERY_STATES
|
|
from software.base import PatchService
|
|
from software.dc_utils import get_subcloud_groupby_version
|
|
from software.deploy_state import require_deploy_state
|
|
from software.exceptions import APTOSTreeCommandFail
|
|
from software.exceptions import HostNotFound
|
|
from software.exceptions import InternalError
|
|
from software.exceptions import MetadataFail
|
|
from software.exceptions import UpgradeNotSupported
|
|
from software.exceptions import OSTreeCommandFail
|
|
from software.exceptions import OSTreeTarFail
|
|
from software.exceptions import SoftwareError
|
|
from software.exceptions import SoftwareFail
|
|
from software.exceptions import ReleaseInvalidRequest
|
|
from software.exceptions import ReleaseValidationFailure
|
|
from software.exceptions import ReleaseIsoDeleteFailure
|
|
from software.exceptions import SoftwareServiceError
|
|
from software.exceptions import InvalidOperation
|
|
from software.exceptions import HostAgentUnreachable
|
|
from software.exceptions import HostIpNotFound
|
|
from software.exceptions import MaxReleaseExceeded
|
|
from software.exceptions import ServiceParameterNotFound
|
|
from software.plugin import DeployPluginRunner
|
|
from software.release_data import reload_release_data
|
|
from software.release_data import get_SWReleaseCollection
|
|
from software.software_functions import collect_current_load_for_hosts
|
|
from software.software_functions import copy_pxeboot_update_file
|
|
from software.software_functions import copy_pxeboot_cfg_files
|
|
from software.software_functions import create_deploy_hosts
|
|
from software.software_functions import deploy_host_validations
|
|
from software.software_functions import validate_host_deploy_order
|
|
from software.software_functions import parse_release_metadata
|
|
from software.software_functions import configure_logging
|
|
from software.software_functions import mount_iso_load
|
|
from software.software_functions import unmount_iso_load
|
|
from software.software_functions import read_upgrade_support_versions
|
|
from software.software_functions import get_to_release_from_metadata_file
|
|
from software.software_functions import BasePackageData
|
|
from software.software_functions import PatchFile
|
|
from software.software_functions import package_dir
|
|
from software.software_functions import repo_dir
|
|
from software.software_functions import root_scripts_dir
|
|
from software.software_functions import SW_VERSION
|
|
from software.software_functions import audit_log_info
|
|
from software.software_functions import repo_root_dir
|
|
from software.software_functions import is_deploy_state_in_sync
|
|
from software.software_functions import is_deployment_in_progress
|
|
from software.software_functions import get_release_from_patch
|
|
from software.software_functions import run_remove_temporary_data_script
|
|
from software.software_functions import to_bool
|
|
from software.release_state import ReleaseState
|
|
from software.utilities.deploy_set_failed import start_set_fail
|
|
from software.deploy_host_state import DeployHostState
|
|
from software.deploy_state import DeployState
|
|
from software.release_verify import verify_files
|
|
import software.config as cfg
|
|
import software.utils as utils
|
|
from software.sysinv_utils import get_k8s_ver
|
|
from software.sysinv_utils import is_system_controller
|
|
from software.sysinv_utils import update_host_sw_version
|
|
from software.sysinv_utils import are_all_hosts_unlocked_and_online
|
|
from software.sysinv_utils import get_system_info
|
|
from software.sysinv_utils import get_oot_drivers
|
|
from software.sysinv_utils import trigger_evaluate_apps_reapply
|
|
from software.sysinv_utils import trigger_vim_host_audit
|
|
|
|
from software.db.api import get_instance
|
|
|
|
import software.messages as messages
|
|
import software.constants as constants
|
|
from software import states
|
|
|
|
from tsconfig.tsconfig import INITIAL_CONFIG_COMPLETE_FLAG
|
|
from tsconfig.tsconfig import VOLATILE_CONTROLLER_CONFIG_COMPLETE
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
CONF = oslo_cfg.CONF
|
|
|
|
LOG = logging.getLogger('main_logger')
|
|
|
|
pidfile_path = "/var/run/patch_controller.pid"
|
|
|
|
sc = None
|
|
state_file = "%s/.controller.state" % constants.SOFTWARE_STORAGE_DIR
|
|
app_dependency_basename = "app_dependencies.json"
|
|
app_dependency_filename = "%s/%s" % (constants.SOFTWARE_STORAGE_DIR, app_dependency_basename)
|
|
|
|
insvc_patch_restart_controller = "/run/software/.restart.software-controller"
|
|
|
|
ETC_HOSTS_FILE_PATH = "/etc/hosts"
|
|
ETC_HOSTS_BACKUP_FILE_PATH = "/etc/hosts.patchbak"
|
|
PATCH_MIGRATION_SCRIPT_DIR = "/etc/update.d"
|
|
SOFTWARE_LOG_FILE = "/var/log/software.log"
|
|
|
|
stale_hosts = []
|
|
pending_queries = []
|
|
|
|
thread_death = None
|
|
keep_running = True
|
|
system_mode = None
|
|
|
|
# Limit socket blocking to 5 seconds to allow for thread to shutdown
|
|
api_socket_timeout = 5.0
|
|
|
|
|
|
def is_simplex():
|
|
global system_mode
|
|
if system_mode is None:
|
|
_, system_mode = get_system_info()
|
|
|
|
return system_mode == constants.SYSTEM_MODE_SIMPLEX
|
|
|
|
|
|
class ControllerNeighbour(object):
|
|
def __init__(self):
|
|
self.last_ack = 0
|
|
self.synced = False
|
|
|
|
def rx_ack(self):
|
|
self.last_ack = time.time()
|
|
|
|
def get_age(self):
|
|
return int(time.time() - self.last_ack)
|
|
|
|
def rx_synced(self):
|
|
self.synced = True
|
|
|
|
def clear_synced(self):
|
|
self.synced = False
|
|
|
|
def get_synced(self):
|
|
return self.synced
|
|
|
|
|
|
class AgentNeighbour(object):
|
|
def __init__(self, ip):
|
|
self.ip = ip
|
|
self.last_ack = 0
|
|
self.last_query_id = 0
|
|
self.out_of_date = False
|
|
self.hostname = "n/a"
|
|
self.requires_reboot = False
|
|
self.patch_failed = False
|
|
self.stale = False
|
|
self.pending_query = False
|
|
self.latest_sysroot_commit = None
|
|
self.nodetype = None
|
|
self.sw_version = "unknown"
|
|
self.subfunctions = []
|
|
self.state = None
|
|
self._is_alive = False
|
|
|
|
@property
|
|
def is_alive(self):
|
|
return self._is_alive
|
|
|
|
@is_alive.setter
|
|
def is_alive(self, value):
|
|
self._is_alive = value
|
|
|
|
def rx_ack(self,
|
|
hostname,
|
|
out_of_date,
|
|
requires_reboot,
|
|
query_id,
|
|
patch_failed,
|
|
sw_version,
|
|
state):
|
|
self.last_ack = time.time()
|
|
self.hostname = hostname
|
|
self.patch_failed = patch_failed
|
|
self.sw_version = sw_version
|
|
self.state = state
|
|
|
|
if out_of_date != self.out_of_date or requires_reboot != self.requires_reboot:
|
|
self.out_of_date = out_of_date
|
|
self.requires_reboot = requires_reboot
|
|
LOG.info("Agent %s (%s) reporting out_of_date=%s, requires_reboot=%s",
|
|
self.hostname,
|
|
self.ip,
|
|
self.out_of_date,
|
|
self.requires_reboot)
|
|
|
|
if self.last_query_id != query_id:
|
|
self.last_query_id = query_id
|
|
self.stale = True
|
|
if self.ip not in stale_hosts and self.ip not in pending_queries:
|
|
stale_hosts.append(self.ip)
|
|
|
|
def get_age(self):
|
|
return int(time.time() - self.last_ack)
|
|
|
|
def handle_query_detailed_resp(self,
|
|
latest_sysroot_commit,
|
|
nodetype,
|
|
sw_version,
|
|
subfunctions,
|
|
state):
|
|
self.latest_sysroot_commit = latest_sysroot_commit
|
|
self.nodetype = nodetype
|
|
self.stale = False
|
|
self.pending_query = False
|
|
self.sw_version = sw_version
|
|
self.subfunctions = subfunctions
|
|
self.state = state
|
|
|
|
if self.ip in pending_queries:
|
|
pending_queries.remove(self.ip)
|
|
|
|
if self.ip in stale_hosts:
|
|
stale_hosts.remove(self.ip)
|
|
|
|
def get_dict(self):
|
|
d = {"ip": self.ip,
|
|
"hostname": self.hostname,
|
|
"deployed": not self.out_of_date,
|
|
"secs_since_ack": self.get_age(),
|
|
"patch_failed": self.patch_failed,
|
|
"stale_details": self.stale,
|
|
"latest_sysroot_commit": self.latest_sysroot_commit,
|
|
"nodetype": self.nodetype,
|
|
"subfunctions": self.subfunctions,
|
|
"sw_version": self.sw_version,
|
|
"state": self.state}
|
|
|
|
return d
|
|
|
|
|
|
class PatchMessageHello(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO)
|
|
self.patch_op_counter = 0
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'patch_op_counter' in data:
|
|
self.patch_op_counter = data['patch_op_counter']
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['patch_op_counter'] = sc.patch_op_counter
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
# Send response
|
|
if self.patch_op_counter > 0:
|
|
sc.handle_nbr_patch_op_counter(host, self.patch_op_counter)
|
|
|
|
resp = PatchMessageHelloAck()
|
|
resp.send(sock)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
if sc.install_local:
|
|
return
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageHelloAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_ACK)
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
sc.controller_neighbours_lock.acquire()
|
|
if not addr[0] in sc.controller_neighbours:
|
|
sc.controller_neighbours[addr[0]] = ControllerNeighbour()
|
|
|
|
sc.controller_neighbours[addr[0]].rx_ack()
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageSyncReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SYNC_REQ)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the SYNC_REQ, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
# We may need to do this in a separate thread, so that we continue to process hellos
|
|
LOG.info("Handling sync req")
|
|
|
|
# NOTE(bqian) sync_from_nbr returns "False" if sync operations failed.
|
|
# need to think of reattempt to deal w/ the potential failure.
|
|
sc.sync_from_nbr(host)
|
|
|
|
resp = PatchMessageSyncComplete()
|
|
resp.send(sock)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync req")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageSyncComplete(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SYNC_COMPLETE)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the SYNC_COMPLETE, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
LOG.info("Handling sync complete")
|
|
|
|
sc.controller_neighbours_lock.acquire()
|
|
if not addr[0] in sc.controller_neighbours:
|
|
sc.controller_neighbours[addr[0]] = ControllerNeighbour()
|
|
|
|
sc.controller_neighbours[addr[0]].rx_synced()
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync complete")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class PatchMessageHelloAgent(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_AGENT)
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['patch_op_counter'] = sc.patch_op_counter
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
if not sc.install_local:
|
|
local_hostname = utils.ip_to_versioned_localhost(cfg.agent_mcast_group)
|
|
sock.sendto(str.encode(message), (local_hostname, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageSendLatestFeedCommit(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_SEND_LATEST_FEED_COMMIT)
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['latest_feed_commit'] = sc.latest_feed_commit
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
if not sc.install_local:
|
|
local_hostname = utils.ip_to_versioned_localhost(cfg.agent_mcast_group)
|
|
sock.sendto(str.encode(message), (local_hostname, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageHelloAgentAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_HELLO_AGENT_ACK)
|
|
self.query_id = 0
|
|
self.agent_out_of_date = False
|
|
self.agent_hostname = "n/a"
|
|
self.agent_requires_reboot = False
|
|
self.agent_patch_failed = False
|
|
self.agent_sw_version = "unknown"
|
|
self.agent_state = "unknown"
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'query_id' in data:
|
|
self.query_id = data['query_id']
|
|
if 'out_of_date' in data:
|
|
self.agent_out_of_date = data['out_of_date']
|
|
if 'hostname' in data:
|
|
self.agent_hostname = data['hostname']
|
|
if 'requires_reboot' in data:
|
|
self.agent_requires_reboot = data['requires_reboot']
|
|
if 'patch_failed' in data:
|
|
self.agent_patch_failed = data['patch_failed']
|
|
if 'sw_version' in data:
|
|
self.agent_sw_version = data['sw_version']
|
|
if 'state' in data:
|
|
self.agent_state = data['state']
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
sc.hosts_lock.acquire()
|
|
if not addr[0] in sc.hosts:
|
|
sc.hosts[addr[0]] = AgentNeighbour(addr[0])
|
|
|
|
sc.hosts[addr[0]].rx_ack(self.agent_hostname,
|
|
self.agent_out_of_date,
|
|
self.agent_requires_reboot,
|
|
self.query_id,
|
|
self.agent_patch_failed,
|
|
self.agent_sw_version,
|
|
self.agent_state)
|
|
sc.hosts_lock.release()
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageQueryDetailed(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_QUERY_DETAILED)
|
|
|
|
def encode(self):
|
|
# Nothing to add to the message, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendall(str.encode(message))
|
|
|
|
|
|
class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_QUERY_DETAILED_RESP)
|
|
self.agent_sw_version = "unknown"
|
|
self.latest_sysroot_commit = "unknown"
|
|
self.subfunctions = []
|
|
self.nodetype = "unknown"
|
|
self.agent_sw_version = "unknown"
|
|
self.agent_state = "unknown"
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'latest_sysroot_commit' in data:
|
|
self.latest_sysroot_commit = data['latest_sysroot_commit']
|
|
if 'nodetype' in data:
|
|
self.nodetype = data['nodetype']
|
|
if 'sw_version' in data:
|
|
self.agent_sw_version = data['sw_version']
|
|
if 'subfunctions' in data:
|
|
self.subfunctions = data['subfunctions']
|
|
if 'state' in data:
|
|
self.agent_state = data['state']
|
|
|
|
def encode(self):
|
|
LOG.error("Should not get here")
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
|
|
ip = addr[0]
|
|
sc.hosts_lock.acquire()
|
|
if ip in sc.hosts:
|
|
sc.hosts[ip].handle_query_detailed_resp(self.latest_sysroot_commit,
|
|
self.nodetype,
|
|
self.agent_sw_version,
|
|
self.subfunctions,
|
|
self.agent_state)
|
|
for patch_id in list(sc.interim_state):
|
|
if ip in sc.interim_state[patch_id]:
|
|
sc.interim_state[patch_id].remove(ip)
|
|
if len(sc.interim_state[patch_id]) == 0:
|
|
del sc.interim_state[patch_id]
|
|
sc.hosts_lock.release()
|
|
else:
|
|
sc.hosts_lock.release()
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageAgentInstallReq(messages.PatchMessage):
|
|
def __init__(self, additional_data=None):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_AGENT_INSTALL_REQ)
|
|
self.ip = None
|
|
self.force = False
|
|
self.major_release = None
|
|
self.commit_id = None
|
|
self.additional_data = additional_data
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
self.message['force'] = self.force
|
|
self.message['major_release'] = self.major_release
|
|
self.message['commit_id'] = self.commit_id
|
|
if self.additional_data:
|
|
self.message['additional_data'] = self.additional_data.copy()
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
msg = f"sending install request to node: {self.ip} with {message}"
|
|
LOG.info(msg)
|
|
sock.sendto(str.encode(message), (self.ip, cfg.agent_port))
|
|
|
|
|
|
class PatchMessageAgentInstallResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_AGENT_INSTALL_RESP)
|
|
self.status = False
|
|
self.reject_reason = None
|
|
self.reboot_required = False
|
|
reload_release_data()
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'status' in data:
|
|
self.status = data['status']
|
|
if 'reject_reason' in data:
|
|
self.reject_reason = data['reject_reason']
|
|
if 'reboot_required' in data:
|
|
self.reboot_required = data['reboot_required']
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def _set_host_install_completed(self, host):
|
|
global sc
|
|
|
|
sc.hosts_lock.acquire()
|
|
try:
|
|
host.install_status = self.status
|
|
host.install_pending = False
|
|
host.install_reject_reason = self.reject_reason
|
|
finally:
|
|
sc.hosts_lock.release()
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.info("Handling install resp from %s", addr[0])
|
|
global sc
|
|
|
|
ip = addr[0]
|
|
sc.hosts_lock.acquire()
|
|
try:
|
|
# NOTE(bqian) seems like trying to tolerate a failure situation
|
|
# that a host is directed to install a patch but during the installation
|
|
# software-controller-daemon gets restarted
|
|
# should remove the sc.hosts which is in memory volatile storage and replaced with
|
|
# permanent deploy-host entity
|
|
if ip not in sc.hosts:
|
|
sc.hosts[ip] = AgentNeighbour(ip)
|
|
|
|
host = sc.hosts[ip]
|
|
hostname = host.hostname
|
|
finally:
|
|
sc.hosts_lock.release()
|
|
|
|
dbapi = get_instance()
|
|
deploy = dbapi.get_deploy_all()
|
|
if len(deploy) == 0:
|
|
LOG.info("No deploy in progress. ignore install resp from %s", addr[0])
|
|
return
|
|
deploy = deploy[0]
|
|
|
|
success = False
|
|
deploy_host_state = DeployHostState(hostname)
|
|
|
|
try:
|
|
if self.status:
|
|
deploying = ReleaseState(release_state=states.DEPLOYING)
|
|
if deploying.is_major_release_deployment():
|
|
# For major release deployment, update sysinv ihost.sw_version
|
|
# so that right manifest can be generated.
|
|
sw_version = utils.get_major_release_version(deploy.get("to_release"))
|
|
msg = f"Update {hostname} to {sw_version}"
|
|
LOG.info(msg)
|
|
try:
|
|
update_host_sw_version(hostname, sw_version)
|
|
except Exception:
|
|
# Failed a step, fail the host deploy for reattempt
|
|
return
|
|
|
|
success = True
|
|
finally:
|
|
if success:
|
|
deploy_host_state.deployed()
|
|
if self.reboot_required:
|
|
sc.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_SUCCESS_RR,
|
|
fm_constants.FM_ALARM_STATE_SET,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname))
|
|
else:
|
|
deploy_host_state.deploy_failed()
|
|
sc.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_FAILURE,
|
|
fm_constants.FM_ALARM_STATE_SET,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname))
|
|
|
|
self._set_host_install_completed(host)
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchMessageDropHostReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DROP_HOST_REQ)
|
|
self.ip = None
|
|
|
|
def encode(self):
|
|
messages.PatchMessage.encode(self)
|
|
self.message['ip'] = self.ip
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'ip' in data:
|
|
self.ip = data['ip']
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
host = addr[0]
|
|
if host == cfg.get_mgmt_ip():
|
|
# Ignore messages from self
|
|
return
|
|
|
|
if self.ip is None:
|
|
LOG.error("Received PATCHMSG_DROP_HOST_REQ with no ip: %s", json.dumps(self.data))
|
|
return
|
|
|
|
sc.drop_host(self.ip, sync_nbr=False)
|
|
return
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
if sc.install_local:
|
|
return
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SoftwareMessageDeployStateUpdate(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_UPDATE)
|
|
self.data = None
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
self.data = data
|
|
|
|
def encode(self):
|
|
global sc
|
|
messages.PatchMessage.encode(self)
|
|
filesystem_data = utils.get_software_filesystem_data()
|
|
deploys_state = {"deploy_host": filesystem_data.get("deploy_host", {}),
|
|
"deploy": filesystem_data.get("deploy", {})}
|
|
self.message["deploy_state"] = deploys_state
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
if sc.mgmt_ip == addr[0]:
|
|
# update from localhost, ignore
|
|
return
|
|
|
|
filesystem_data = utils.get_software_filesystem_data()
|
|
synced_filesystem_data = utils.get_synced_software_filesystem_data()
|
|
|
|
actual_state = {"deploy_host": filesystem_data.get("deploy_host", {}),
|
|
"deploy": filesystem_data.get("deploy", {})}
|
|
|
|
synced_state = {"deploy_host": synced_filesystem_data.get("deploy_host", {}),
|
|
"deploy": synced_filesystem_data.get("deploy", {})}
|
|
|
|
peer_state = {"deploy_host": self.data.get("deploy_state").get("deploy_host", {}),
|
|
"deploy": self.data.get("deploy_state").get("deploy", {})}
|
|
|
|
result = "diverged"
|
|
if actual_state == peer_state:
|
|
result = messages.MSG_ACK_SUCCESS
|
|
elif actual_state == synced_state:
|
|
result = messages.MSG_ACK_SUCCESS
|
|
utils.save_to_json_file(constants.SOFTWARE_JSON_FILE, peer_state)
|
|
|
|
if result == messages.MSG_ACK_SUCCESS:
|
|
utils.save_to_json_file(constants.SYNCED_SOFTWARE_JSON_FILE, peer_state)
|
|
|
|
resp = SoftwareMessageDeployStateUpdateAck()
|
|
resp.send(sock, result)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SoftwareMessageDeployStateUpdateAck(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_UPDATE_ACK)
|
|
self.peer_state_data = {}
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
self.peer_state_data = data
|
|
|
|
def encode(self, result): # pylint: disable=arguments-differ
|
|
messages.PatchMessage.encode(self)
|
|
synced_data = utils.get_synced_software_filesystem_data()
|
|
self.message["result"] = result
|
|
self.message["deploy_state"] = synced_data
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
if sc.mgmt_ip == addr[0]:
|
|
# update from localhost, ignore
|
|
return
|
|
|
|
if self.peer_state_data["result"] == messages.MSG_ACK_SUCCESS:
|
|
LOG.debug("Peer controller is synced with value: %s",
|
|
self.peer_state_data["deploy_state"])
|
|
utils.save_to_json_file(constants.SYNCED_SOFTWARE_JSON_FILE,
|
|
self.peer_state_data["deploy_state"])
|
|
else:
|
|
LOG.error("Peer controller deploy state has diverged.")
|
|
|
|
def send(self, sock, result):
|
|
self.encode(result)
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SWMessageDeployStateChanged(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_CHANGED)
|
|
self.valid = False
|
|
self.agent = None
|
|
self.deploy_state = None
|
|
self.hostname = None
|
|
self.host_state = None
|
|
|
|
def decode(self, data):
|
|
"""
|
|
The message is a serialized json object:
|
|
{
|
|
"msgtype": "deploy-state-changed",
|
|
"msgversion": 1,
|
|
"agent": "<a valid agent>",
|
|
"deploy-state": "<deploy-state>",
|
|
"hostname": "<hostname>",
|
|
"host-state": "<host-deploy-substate>"
|
|
}
|
|
"""
|
|
|
|
messages.PatchMessage.decode(self, data)
|
|
|
|
self.valid = True
|
|
self.agent = None
|
|
|
|
valid_agents = ['deploy-start', 'deploy-activate', 'deploy-activate-rollback', 'admin']
|
|
if 'agent' in data:
|
|
self.agent = data['agent']
|
|
else:
|
|
self.agent = 'unknown'
|
|
|
|
if self.agent not in valid_agents:
|
|
# ignore msg from unknown senders
|
|
LOG.info("%s received from unknown agent %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent))
|
|
self.valid = False
|
|
|
|
valid_state = {
|
|
DEPLOY_STATES.START_DONE.value: DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED.value: DEPLOY_STATES.START_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_FAILED.value: DEPLOY_STATES.ACTIVATE_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_DONE.value: DEPLOY_STATES.ACTIVATE_DONE,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE.value: DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED.value: DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED,
|
|
DEPLOY_STATES.HOST_FAILED.value: DEPLOY_STATES.HOST_FAILED
|
|
}
|
|
if 'deploy-state' in data and data['deploy-state']:
|
|
deploy_state = data['deploy-state']
|
|
if deploy_state in valid_state:
|
|
self.deploy_state = valid_state[deploy_state]
|
|
LOG.info("%s received from %s with deploy-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state))
|
|
else:
|
|
self.valid = False
|
|
LOG.error("%s received from %s with invalid deploy-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state))
|
|
|
|
if 'hostname' in data and data['hostname']:
|
|
self.hostname = data['hostname']
|
|
|
|
if 'host-state' in data and data['host-state']:
|
|
host_state = states.DEPLOY_HOST_STATES(data['host-state'])
|
|
if host_state not in states.VALID_HOST_DEPLOY_STATE:
|
|
LOG.error("%s received from %s with invalid host-state %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, host_state))
|
|
self.valid = False
|
|
else:
|
|
self.host_state = host_state
|
|
|
|
if self.valid:
|
|
self.valid = (bool(self.host_state and self.hostname) != bool(self.deploy_state))
|
|
|
|
if not self.valid:
|
|
LOG.error("%s received from %s as invalid %s" %
|
|
(messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, data))
|
|
|
|
def handle(self, sock, addr):
|
|
global sc
|
|
if not self.valid:
|
|
# nothing to do
|
|
return
|
|
|
|
if self.deploy_state:
|
|
LOG.info("Received deploy state changed to %s, agent %s" %
|
|
(self.deploy_state, self.agent))
|
|
try:
|
|
sc.deploy_state_changed(self.deploy_state)
|
|
except Exception as e:
|
|
LOG.error("Deploy state change failed: %s" % str(e))
|
|
else:
|
|
LOG.info("Received %s deploy host state changed to %s, agent %s" %
|
|
(self.hostname, self.host_state, self.agent))
|
|
sc.host_deploy_state_changed(self.hostname, self.host_state)
|
|
|
|
sock.sendto(str.encode("OK"), addr)
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("sending sync req")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port))
|
|
|
|
|
|
class SoftwareMessageDeployDeleteCleanupReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_DELETE_CLEANUP_REQ)
|
|
self.ip = None
|
|
self.major_release = None
|
|
|
|
def encode(self):
|
|
messages.PatchMessage.encode(self)
|
|
self.message["major_release"] = self.major_release
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
global sc
|
|
LOG.info("Sending deploy delete cleanup request to all nodes.")
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (sc.agent_address, cfg.agent_port))
|
|
|
|
|
|
class SoftwareMessageDeployDeleteCleanupResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_DELETE_CLEANUP_RESP)
|
|
self.success = None
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
if 'success' in data:
|
|
self.success = data['success']
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
ip = addr[0]
|
|
LOG.info("Handling deploy delete cleanup resp from %s", ip)
|
|
global sc
|
|
if self.success:
|
|
LOG.info("Host %s sucessfully executed deploy delete "
|
|
"cleanup tasks." % sc.hosts[ip].hostname)
|
|
return
|
|
LOG.error("Host %s failed executing deploy delete "
|
|
"cleanup tasks." % sc.hosts[ip].hostname)
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class SoftwareMessageCheckAgentAliveReq(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_CHECK_AGENT_ALIVE_REQ)
|
|
self.ip = None
|
|
|
|
def encode(self):
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.error("Should not get here")
|
|
|
|
def send(self, sock):
|
|
LOG.info("Sending check agent alive to %s", self.ip)
|
|
self.encode()
|
|
message = json.dumps(self.message)
|
|
sock.sendto(str.encode(message), (self.ip, cfg.agent_port))
|
|
|
|
|
|
class SoftwareMessageCheckAgentAliveResp(messages.PatchMessage):
|
|
def __init__(self):
|
|
messages.PatchMessage.__init__(self, messages.PATCHMSG_CHECK_AGENT_ALIVE_RESP)
|
|
self.status = False
|
|
|
|
def decode(self, data):
|
|
messages.PatchMessage.decode(self, data)
|
|
|
|
def encode(self):
|
|
# Nothing to add, so just call the super class
|
|
messages.PatchMessage.encode(self)
|
|
|
|
def handle(self, sock, addr):
|
|
LOG.info("Handling check agent alive resp from %s", addr[0])
|
|
global sc
|
|
|
|
ip = addr[0]
|
|
sc.hosts_lock.acquire()
|
|
sc.hosts[ip].is_alive = True
|
|
sc.hosts_lock.release()
|
|
LOG.info("Agent from %s is reachable and alive." % ip)
|
|
|
|
def send(self, sock): # pylint: disable=unused-argument
|
|
LOG.error("Should not get here")
|
|
|
|
|
|
class PatchController(PatchService):
|
|
def __init__(self):
|
|
PatchService.__init__(self)
|
|
|
|
# Locks
|
|
self.socket_lock = threading.RLock()
|
|
self.controller_neighbours_lock = threading.RLock()
|
|
self.hosts_lock = threading.RLock()
|
|
|
|
self.hosts = {}
|
|
self.controller_neighbours = {}
|
|
self.host_mgmt_ip = []
|
|
|
|
self.db_api_instance = get_instance()
|
|
|
|
self.ignore_errors = 'False'
|
|
|
|
# interim_state is used to track hosts that have not responded
|
|
# with fresh queries since a patch was applied or removed, on
|
|
# a per-patch basis. This allows the patch controller to move
|
|
# patches immediately into a "Partial" state until all nodes
|
|
# have responded.
|
|
#
|
|
self.interim_state = {}
|
|
|
|
self.sock_out = None
|
|
self.sock_in = None
|
|
self.controller_address = None
|
|
self.agent_address = None
|
|
self.patch_op_counter = 1
|
|
reload_release_data()
|
|
try:
|
|
self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION)
|
|
except OSTreeCommandFail:
|
|
LOG.exception("Failure to fetch the feed ostree latest log while "
|
|
"initializing Patch Controller")
|
|
self.latest_feed_commit = None
|
|
|
|
self.base_pkgdata = BasePackageData()
|
|
|
|
# This is for alarm cache. It will be used to store the last raising alarm id
|
|
self.usm_alarm = {constants.LAST_IN_SYNC: False}
|
|
self.hostname = socket.gethostname()
|
|
self.fm_api = fm_api.FaultAPIs()
|
|
|
|
self.allow_insvc_patching = True
|
|
|
|
if os.path.exists(app_dependency_filename):
|
|
try:
|
|
with open(app_dependency_filename, 'r') as f:
|
|
self.app_dependencies = json.loads(f.read())
|
|
except Exception:
|
|
LOG.exception("Failed to read app dependencies: %s", app_dependency_filename)
|
|
else:
|
|
self.app_dependencies = {}
|
|
|
|
if os.path.isfile(state_file):
|
|
self.read_state_file()
|
|
else:
|
|
self.write_state_file()
|
|
|
|
# Create patch activation scripts folder
|
|
if not os.path.exists(PATCH_MIGRATION_SCRIPT_DIR):
|
|
os.makedirs(PATCH_MIGRATION_SCRIPT_DIR, 0o755)
|
|
|
|
self.register_deploy_state_change_listeners()
|
|
|
|
def _state_changed_sync(self, *args): # pylint: disable=unused-argument
|
|
if is_simplex():
|
|
# ensure the in-sync state for SX
|
|
# treat it as SX for deploy before bootstrap
|
|
shutil.copyfile(constants.SOFTWARE_JSON_FILE, constants.SYNCED_SOFTWARE_JSON_FILE)
|
|
else:
|
|
self._update_state_to_peer()
|
|
|
|
def _notify_vim_on_state_change(self, target_state):
|
|
"""Notify VIM of state change.
|
|
|
|
This method will notify VIM when one of the following state changes is made:
|
|
- start-done
|
|
- start-failed
|
|
- activate-done
|
|
- activate-failed
|
|
- activate-rollback-done
|
|
- activate-rollback-failed
|
|
|
|
If new async states are added they should be added here.
|
|
|
|
Args:
|
|
target_state: The new deployment state to notify VIM about
|
|
"""
|
|
|
|
if self.pre_bootstrap:
|
|
return
|
|
|
|
if target_state not in [
|
|
DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_DONE,
|
|
DEPLOY_STATES.ACTIVATE_FAILED,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED,
|
|
]:
|
|
return
|
|
|
|
# Get local hostname
|
|
LOG.info("Notifying VIM of state change: %s", target_state)
|
|
trigger_vim_host_audit(socket.gethostname())
|
|
|
|
def register_deploy_state_change_listeners(self):
|
|
# data sync listener
|
|
DeployState.register_event_listener(self._state_changed_sync)
|
|
DeployHostState.register_event_listener(self._state_changed_sync)
|
|
|
|
DeployHostState.register_event_listener(DeployState.host_deploy_updated)
|
|
DeployState.register_event_listener(ReleaseState.deploy_updated)
|
|
DeployState.register_event_listener(self.create_clean_up_deployment_alarm)
|
|
|
|
# VIM notifications
|
|
DeployState.register_event_listener(self._notify_vim_on_state_change)
|
|
# TODO(jkraitbe): Add host-deploy when that becomes async
|
|
|
|
@property
|
|
def release_collection(self):
|
|
swrc = get_SWReleaseCollection()
|
|
return swrc
|
|
|
|
def update_config(self):
|
|
cfg.read_config()
|
|
|
|
if self.port != cfg.controller_port:
|
|
self.port = cfg.controller_port
|
|
|
|
# Loopback interface does not support multicast messaging, therefore
|
|
# revert to using unicast messaging when configured against the
|
|
# loopback device
|
|
if self.pre_bootstrap:
|
|
mgmt_ip = utils.gethostbyname(constants.PREBOOTSTRAP_HOSTNAME)
|
|
self.mcast_addr = None
|
|
self.controller_address = mgmt_ip
|
|
self.agent_address = mgmt_ip
|
|
elif cfg.get_mgmt_iface() == constants.LOOPBACK_INTERFACE_NAME:
|
|
mgmt_ip = cfg.get_mgmt_ip()
|
|
self.mcast_addr = None
|
|
self.controller_address = mgmt_ip
|
|
self.agent_address = mgmt_ip
|
|
else:
|
|
self.mcast_addr = cfg.controller_mcast_group
|
|
self.controller_address = cfg.controller_mcast_group
|
|
self.agent_address = cfg.agent_mcast_group
|
|
|
|
def socket_lock_acquire(self):
|
|
self.socket_lock.acquire()
|
|
|
|
def socket_lock_release(self):
|
|
try:
|
|
self.socket_lock.release()
|
|
except Exception:
|
|
pass
|
|
|
|
def write_state_file(self):
|
|
config = configparser.ConfigParser(strict=False)
|
|
|
|
cfgfile = open(state_file, 'w')
|
|
|
|
config.add_section('runtime')
|
|
config.set('runtime', 'patch_op_counter', str(self.patch_op_counter))
|
|
config.write(cfgfile)
|
|
cfgfile.close()
|
|
|
|
def read_state_file(self):
|
|
config = configparser.ConfigParser(strict=False)
|
|
|
|
config.read(state_file)
|
|
|
|
try:
|
|
counter = config.getint('runtime', 'patch_op_counter')
|
|
self.patch_op_counter = counter
|
|
|
|
LOG.info("patch_op_counter is: %d", self.patch_op_counter)
|
|
except configparser.Error:
|
|
LOG.exception("Failed to read state info")
|
|
|
|
def handle_nbr_patch_op_counter(self, host, nbr_patch_op_counter):
|
|
if self.patch_op_counter >= nbr_patch_op_counter:
|
|
return
|
|
|
|
# NOTE(bqian) sync_from_nbr returns "False" if sync operations failed.
|
|
# need to think of reattempt to deal w/ the potential failure.
|
|
self.sync_from_nbr(host)
|
|
|
|
def sync_from_nbr(self, host):
|
|
# Sync the software repo
|
|
host_url = utils.ip_to_url(host)
|
|
try:
|
|
output = subprocess.check_output(["rsync",
|
|
"-acv",
|
|
"--delete",
|
|
"--exclude", "tmp",
|
|
"--exclude", "software.json",
|
|
"rsync://%s/software/" % host_url,
|
|
"%s/" % constants.SOFTWARE_STORAGE_DIR],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate software via rsync: %s", output)
|
|
except subprocess.CalledProcessError as e:
|
|
LOG.error("Failed to rsync: %s", e.output)
|
|
return False
|
|
|
|
try:
|
|
output = subprocess.check_output(["rsync",
|
|
"-acv",
|
|
"--delete",
|
|
"rsync://%s/repo/" % host_url,
|
|
"%s/" % repo_root_dir],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate repo via rsync: %s", output)
|
|
except subprocess.CalledProcessError:
|
|
LOG.error("Failed to rsync: %s", output)
|
|
return False
|
|
|
|
try:
|
|
for neighbour in list(self.hosts):
|
|
if (self.hosts[neighbour].nodetype == "controller" and
|
|
self.hosts[neighbour].ip == host):
|
|
LOG.info("Starting sync controllers")
|
|
# The output is a string that lists the directories
|
|
# Example output:
|
|
# >>> dir_names = sh.ls("/var/www/pages/feed/")
|
|
# >>> dir_names.stdout
|
|
# b'rel-22.12 rel-22.5\n'
|
|
dir_names = sh.ls(constants.FEED_OSTREE_BASE_DIR)
|
|
|
|
# Convert the output above into a list that can be iterated
|
|
# >>> list_of_dirs = dir_names.stdout.decode().rstrip().split()
|
|
# >>> print(list_of_dirs)
|
|
# ['rel-22.12', 'rel-22.5']
|
|
list_of_dirs = dir_names.stdout.decode("utf-8").rstrip().split()
|
|
|
|
for rel_dir in list_of_dirs:
|
|
|
|
# todo(lvieira): Filtered out the sync of N-1 feed folders.
|
|
# Recheck this if in the future a N-1 patch apply will be
|
|
# supported in the system controller.
|
|
rel_version = rel_dir.split("rel-")[-1]
|
|
if rel_version < SW_VERSION:
|
|
LOG.info("Skip syncing %s inactive release", rel_dir)
|
|
continue
|
|
|
|
feed_repo = "%s/%s/ostree_repo/" % (constants.FEED_OSTREE_BASE_DIR, rel_dir)
|
|
if not os.path.isdir(feed_repo):
|
|
LOG.info("Skipping feed dir %s", feed_repo)
|
|
continue
|
|
LOG.info("Syncing %s", feed_repo)
|
|
output = subprocess.check_output(["ostree",
|
|
"--repo=%s" % feed_repo,
|
|
"pull",
|
|
"--depth=-1",
|
|
"--mirror",
|
|
"starlingx"],
|
|
stderr=subprocess.STDOUT)
|
|
output = subprocess.check_output(["ostree",
|
|
"summary",
|
|
"--update",
|
|
"--repo=%s" % feed_repo],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced to mate feed via ostree pull: %s", output)
|
|
|
|
output = subprocess.check_output(["rsync",
|
|
"-acv",
|
|
"--delete",
|
|
"rsync://%s/update_scripts/" % host_url,
|
|
"%s/" % PATCH_MIGRATION_SCRIPT_DIR],
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Synced %s folder between controllers: %s"
|
|
% (PATCH_MIGRATION_SCRIPT_DIR, output))
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
LOG.error("Failed during controllers sync tasks: %s", e.output)
|
|
return False
|
|
except Exception as e:
|
|
LOG.error("Exception while syncing controllers: %s", e)
|
|
return False
|
|
|
|
self.read_state_file()
|
|
|
|
self.interim_state = {}
|
|
reload_release_data()
|
|
|
|
if os.path.exists(app_dependency_filename):
|
|
try:
|
|
with open(app_dependency_filename, 'r') as f:
|
|
self.app_dependencies = json.loads(f.read())
|
|
except Exception:
|
|
LOG.exception("Failed to read app dependencies: %s", app_dependency_filename)
|
|
else:
|
|
self.app_dependencies = {}
|
|
|
|
return True
|
|
|
|
def inc_patch_op_counter(self):
|
|
self.patch_op_counter += 1
|
|
self.write_state_file()
|
|
|
|
def get_release_dependency_list(self, release_id, preinstalled_patches=None):
|
|
"""
|
|
Returns a list of software releases that are required by this release.
|
|
Example: If R5 requires R4 and R1, R4 requires R3 and R1, R3 requires R1
|
|
then for input param release_id='R5', it will return ['R4', 'R1', 'R3']
|
|
:param release: The software release ID
|
|
:param preinstalled_patches: A list containing all pre installed patches
|
|
"""
|
|
|
|
def get_dependencies(release_id, visited):
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is None:
|
|
error = f"Not all required releases are uploaded, missing {release_id}"
|
|
raise SoftwareServiceError(error=error)
|
|
|
|
dependencies = []
|
|
for req_release in release.requires_release_ids:
|
|
if req_release not in visited:
|
|
visited.add(req_release)
|
|
dependencies.append(req_release)
|
|
if req_release not in preinstalled_patches:
|
|
dependencies.extend(get_dependencies(req_release, visited))
|
|
return dependencies
|
|
|
|
if preinstalled_patches is None:
|
|
preinstalled_patches = []
|
|
|
|
return get_dependencies(release_id, set())
|
|
|
|
def get_ostree_tar_filename(self, patch_sw_version, patch_id):
|
|
'''
|
|
Returns the path of the ostree tarball
|
|
:param patch_sw_version: sw version this patch must be applied to
|
|
:param patch_id: The patch ID
|
|
'''
|
|
ostree_tar_dir = package_dir[patch_sw_version]
|
|
ostree_tar_filename = "%s/%s-software.tar" % (ostree_tar_dir, patch_id)
|
|
return ostree_tar_filename
|
|
|
|
def delete_start_install_script(self, patch_id):
|
|
'''
|
|
Deletes the start and install scripts associated with the patch
|
|
:param patch_id: The patch ID
|
|
'''
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
scripts = ["pre_start", "post_start", "pre_install", "post_install"]
|
|
|
|
for script in scripts:
|
|
script_name = getattr(release, script, None)
|
|
if script_name:
|
|
script_path = os.path.join(root_scripts_dir, f"{patch_id}_{script_name}")
|
|
try:
|
|
os.remove(script_path)
|
|
LOG.info("Removed %s script" % script_path)
|
|
except OSError:
|
|
msg = "Failed to remove start/install script for %s" % patch_id
|
|
LOG.warning(msg)
|
|
|
|
def delete_patch_activate_scripts(self, patch_id):
|
|
'''
|
|
Deletes the activate scripts associated with the patch
|
|
:param patch_id: The patch ID
|
|
'''
|
|
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
activate_scripts_list = release.activation_scripts
|
|
|
|
for script in activate_scripts_list:
|
|
full_name_file = "%s_%s" % (patch_id, script)
|
|
script_path = "%s/%s" % (root_scripts_dir, full_name_file)
|
|
|
|
try:
|
|
os.remove(script_path)
|
|
except OSError:
|
|
msg = "Failed to remove the activate script for %s" % patch_id
|
|
LOG.warning(msg)
|
|
|
|
def run_semantic_check(self, action, patch_list):
|
|
if not os.path.exists(INITIAL_CONFIG_COMPLETE_FLAG):
|
|
# Skip semantic checks if initial configuration isn't complete
|
|
return
|
|
|
|
# Pass the current patch state to the semantic check as a series of args
|
|
patch_state_args = []
|
|
for release in self.release_collection.iterate_releases():
|
|
patch_state = '%s=%s' % (release.id, release.state)
|
|
patch_state_args += ['-p', patch_state]
|
|
|
|
# Run semantic checks, if any
|
|
for patch_id in patch_list:
|
|
semchk = os.path.join(constants.SEMANTICS_DIR, action, patch_id)
|
|
|
|
if os.path.exists(semchk):
|
|
try:
|
|
LOG.info("Running semantic check: %s", semchk)
|
|
subprocess.check_output([semchk] + patch_state_args,
|
|
stderr=subprocess.STDOUT)
|
|
LOG.info("Semantic check %s passed", semchk)
|
|
except subprocess.CalledProcessError as e:
|
|
msg = "Semantic check failed for %s:\n%s" % (patch_id, e.output)
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
def software_install_local_api(self, delete):
|
|
"""
|
|
Enable patch installation to local controller
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
dbapi = get_instance()
|
|
deploy = dbapi.get_deploy_all()
|
|
if len(deploy) > 0:
|
|
msg_info += "Software Deploy operation is in progress.\n"
|
|
msg_info += "Please finish current deploy before modifying install local mode.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if os.path.isfile(INSTALL_LOCAL_FLAG) and delete:
|
|
if os.path.isfile(INSTALL_LOCAL_FLAG):
|
|
try:
|
|
os.remove(INSTALL_LOCAL_FLAG)
|
|
except Exception:
|
|
LOG.exception("Failed to clear %s flag", INSTALL_LOCAL_FLAG)
|
|
msg = "Software deployment in local installation mode is stopped"
|
|
msg_info += f"{msg}.\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
elif not delete and not os.path.isfile(INSTALL_LOCAL_FLAG):
|
|
open(INSTALL_LOCAL_FLAG, 'a').close()
|
|
msg = "Software deployment in local installation mode is started"
|
|
msg_info += f"{msg}.\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
else:
|
|
mode = 'disabled' if delete else 'enabled'
|
|
msg_info += f"Software deployment in local installation mode is already {mode}.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def major_release_upload_check(self):
|
|
"""
|
|
major release upload semantic check
|
|
"""
|
|
valid_controllers = ['controller-0']
|
|
if socket.gethostname() not in valid_controllers:
|
|
msg = f"Upload rejected, major release must be uploaded to {valid_controllers}"
|
|
LOG.info(msg)
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
max_major_releases = 2
|
|
major_releases = []
|
|
for rel in self.release_collection.iterate_releases():
|
|
major_rel = rel.sw_version
|
|
if major_rel not in major_releases:
|
|
major_releases.append(major_rel)
|
|
|
|
# Only system controller can have 2 major releases (N+1 and N-1)
|
|
max_releases = max_major_releases + 1 if is_system_controller() else max_major_releases
|
|
if len(major_releases) >= max_releases:
|
|
msg = f"Major releases {major_releases} have already been uploaded{' in system controller' if is_system_controller() else ''}. " + \
|
|
f"Max major releases is {max_releases}"
|
|
LOG.info(msg)
|
|
raise MaxReleaseExceeded(msg)
|
|
|
|
def _run_load_import(self, from_release, to_release, iso_mount_dir, upgrade_files):
|
|
"""
|
|
Run load and import
|
|
:param from_release: From release
|
|
:param to_release: To release
|
|
:param iso_mount_dir: ISO mount directory
|
|
:return: info, warning, error messages, dict of release metadata info
|
|
"""
|
|
local_info = ""
|
|
local_warning = ""
|
|
local_error = ""
|
|
release_meta_info = {}
|
|
|
|
def run_script_command(cmd):
|
|
LOG.info("Running load import command: %s", " ".join(cmd))
|
|
result = subprocess.run(cmd, stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT, check=True, text=True)
|
|
return (result.stdout, None) if result.returncode == 0 else (None, result.stdout)
|
|
|
|
# Check if major-release-upload script exists in the iso
|
|
has_release_upload_script = os.path.isfile(os.path.join(
|
|
iso_mount_dir, 'upgrades', 'software-deploy', constants.MAJOR_RELEASE_UPLOAD_SCRIPT))
|
|
|
|
if has_release_upload_script:
|
|
# major-release-upload script is found. This iso supports upgrade from USM
|
|
try:
|
|
# Copy iso /upgrades/software-deploy/ to /opt/software/rel-<rel>/bin/
|
|
to_release_bin_dir = os.path.join(
|
|
constants.SOFTWARE_STORAGE_DIR, ("rel-%s" % to_release), "bin")
|
|
if os.path.exists(to_release_bin_dir):
|
|
shutil.rmtree(to_release_bin_dir)
|
|
shutil.copytree(os.path.join(iso_mount_dir, "upgrades", constants.SOFTWARE_DEPLOY_FOLDER),
|
|
to_release_bin_dir, symlinks=True)
|
|
|
|
# Run major-release-upload script
|
|
import_script = os.path.join(to_release_bin_dir, constants.MAJOR_RELEASE_UPLOAD_SCRIPT)
|
|
load_import_cmd = [
|
|
str(import_script),
|
|
f"--from-release={from_release}",
|
|
f"--to-release={to_release}",
|
|
f"--iso-dir={iso_mount_dir}"
|
|
]
|
|
|
|
load_import_info, load_import_error = run_script_command(load_import_cmd)
|
|
local_info += load_import_info or ""
|
|
local_error += load_import_error or ""
|
|
|
|
# Copy metadata.xml to /opt/software/rel-<rel>/
|
|
to_file = os.path.join(constants.SOFTWARE_STORAGE_DIR,
|
|
("rel-%s" % to_release), "metadata.xml")
|
|
metadata_file = os.path.join(iso_mount_dir, "upgrades", "metadata.xml")
|
|
shutil.copyfile(metadata_file, to_file)
|
|
|
|
# Update the release metadata
|
|
# metadata files have been copied over to the metadata/available directory
|
|
reload_release_data()
|
|
LOG.info("Updated release metadata for %s", to_release)
|
|
|
|
release_meta_info = self.get_release_meta_info(iso_mount_dir, upgrade_files)
|
|
|
|
return local_info, local_warning, local_error, release_meta_info
|
|
|
|
except Exception as e:
|
|
LOG.exception("Error occurred while running load import: %s", str(e))
|
|
raise
|
|
|
|
# At this step, major-release-upload script is not found in the iso
|
|
# Therefore, we run the local major-release-upload script which supports importing the N-1 iso
|
|
# that doesn't support USM feature.
|
|
# This is the special case where *only* DC system controller can import this iso
|
|
# TODO(ShawnLi): remove the code below when this special case is not supported
|
|
try:
|
|
local_import_script = os.path.join(
|
|
"/usr/sbin/software-deploy/", constants.MAJOR_RELEASE_UPLOAD_SCRIPT)
|
|
|
|
load_import_cmd = [local_import_script,
|
|
"--from-release=%s" % from_release,
|
|
"--to-release=%s" % to_release,
|
|
"--iso-dir=%s" % iso_mount_dir,
|
|
"--is-usm-iso=False"]
|
|
|
|
load_import_info, load_import_error = run_script_command(load_import_cmd)
|
|
local_info += load_import_info or ""
|
|
local_error += load_import_error or ""
|
|
|
|
# Update the release metadata
|
|
# metadata files have been copied over to the metadata/available directory
|
|
reload_release_data()
|
|
LOG.info("Updated release metadata for %s", to_release)
|
|
|
|
release_meta_info = {
|
|
os.path.basename(upgrade_files[constants.ISO_EXTENSION]): {
|
|
"id": constants.RELEASE_GA_NAME % to_release,
|
|
"sw_release": to_release,
|
|
},
|
|
os.path.basename(upgrade_files[constants.SIG_EXTENSION]): {
|
|
"id": None,
|
|
"sw_release": None,
|
|
}
|
|
}
|
|
return local_info, local_warning, local_error, release_meta_info
|
|
|
|
except Exception as e:
|
|
LOG.exception("Error occurred while running local load import script: %s", str(e))
|
|
raise
|
|
|
|
def get_release_meta_info(self, iso_mount_dir, upgrade_files) -> dict:
|
|
"""
|
|
Get release metadata information from metadata.xml
|
|
:param iso_mount_dir: ISO mount directory
|
|
:param upgrade_files: dict of upgrade files
|
|
:return: dict of release metadata info
|
|
"""
|
|
|
|
# Get release metadata
|
|
# NOTE(bqian) to_release is sw_version (MM.mm), the path isn't correct
|
|
# also prepatched iso needs to be handled.
|
|
# should go through the release_data to find the latest release of major release
|
|
# to_release
|
|
abs_meta_file_dir = os.path.join(iso_mount_dir, 'upgrades')
|
|
release_metadata_file_list = utils.find_file_by_regex(
|
|
abs_meta_file_dir, r'^([a-zA-Z]+)-([\d.]+)-metadata\.xml$')
|
|
if len(release_metadata_file_list) == 0:
|
|
raise SoftwareServiceError("No release metadata file found in %s" % abs_meta_file_dir)
|
|
release_metadata_file = release_metadata_file_list[0]
|
|
abs_stx_release_metadata_file = os.path.join(
|
|
iso_mount_dir, 'upgrades', release_metadata_file)
|
|
all_release_meta_info = parse_release_metadata(abs_stx_release_metadata_file)
|
|
return {
|
|
os.path.basename(upgrade_files[constants.ISO_EXTENSION]): {
|
|
"id": all_release_meta_info.get("id"),
|
|
"sw_release": all_release_meta_info.get("sw_version"),
|
|
},
|
|
os.path.basename(upgrade_files[constants.SIG_EXTENSION]): {
|
|
"id": None,
|
|
"sw_release": None,
|
|
}
|
|
}
|
|
|
|
def _clean_up_load_import(
|
|
self, iso_mount_dir, to_release, iso_file, is_import_completed, is_max_rel_exceeded):
|
|
"""
|
|
Clean up load and import
|
|
:param iso_mount_dir: ISO mount directory
|
|
:param to_release: To release
|
|
:param iso_file: ISO file
|
|
:param is_import_completed: Is import completed
|
|
:param is_max_rel_exceeded: Is max release exceeded
|
|
"""
|
|
# Unmount the iso file
|
|
if iso_mount_dir:
|
|
unmount_iso_load(iso_mount_dir)
|
|
LOG.info("Unmounted iso file %s", iso_file)
|
|
|
|
# remove upload leftover in case of failure
|
|
if to_release and not is_import_completed and not is_max_rel_exceeded:
|
|
to_release_dir = os.path.join(constants.SOFTWARE_STORAGE_DIR, "rel-%s" % to_release)
|
|
shutil.rmtree(to_release_dir, ignore_errors=True)
|
|
|
|
def _clean_up_inactive_load_import(self, release_version):
|
|
"""
|
|
Clean up inactive load and import
|
|
:param release_version: Release version
|
|
"""
|
|
dirs_to_remove = [
|
|
f"{constants.DC_VAULT_PLAYBOOK_DIR}/{release_version}",
|
|
f"{constants.DC_VAULT_LOADS_DIR}/{release_version}"
|
|
]
|
|
|
|
for dir_path in dirs_to_remove:
|
|
if os.path.exists(dir_path):
|
|
shutil.rmtree(dir_path, ignore_errors=True)
|
|
LOG.info("Removed %s", dir_path)
|
|
|
|
# TODO(ShawnLi): the code below is to only clean up those files that were created in usm_laod_import script
|
|
# delete 22.12 iso metadata in /opt/software/metadata/unavailable
|
|
# delete 22.12 patches in /opt/software/metadata/committed
|
|
file_patterns = [
|
|
(states.UNAVAILABLE_DIR, fr'^([a-zA-Z]+)-({release_version})-metadata\.xml$'),
|
|
(states.COMMITTED_DIR, fr'^([a-zA-Z]+)_({release_version})_PATCH_([0-9]+)-metadata\.xml$')
|
|
]
|
|
|
|
# Remove files matching patterns
|
|
for directory, pattern in file_patterns:
|
|
matched_file_names = utils.find_file_by_regex(directory, pattern)
|
|
for filename in matched_file_names:
|
|
abs_filename = os.path.join(directory, filename)
|
|
try:
|
|
os.remove(abs_filename)
|
|
LOG.info("Removed: %s", abs_filename)
|
|
except OSError:
|
|
LOG.warning("Failed to remove: %s", abs_filename)
|
|
|
|
def _process_upload_upgrade_files(
|
|
self, from_release, to_release, iso_mount_dir, supported_from_releases, upgrade_files):
|
|
"""
|
|
Process the uploaded upgrade files
|
|
:param from_release: From release
|
|
:param to_release: To release
|
|
:param iso_mount_dir: ISO mount directory
|
|
:param supported_from_releases: List of supported releases
|
|
:param upgrade_files: dict of upgrade files
|
|
:return: info, warning, error messages, dict of release metadata info
|
|
"""
|
|
|
|
# validate this major release upload
|
|
self.major_release_upload_check()
|
|
|
|
try:
|
|
# Validate that the current release is supported to upgrade to the new release
|
|
supported_versions = [v.get("version") for v in supported_from_releases]
|
|
if SW_VERSION not in supported_versions:
|
|
raise UpgradeNotSupported("Current release %s not supported to upgrade to %s"
|
|
% (SW_VERSION, to_release))
|
|
# Run major-release-upload script
|
|
LOG.info("Starting load import from %s", upgrade_files[constants.ISO_EXTENSION])
|
|
return self._run_load_import(from_release, to_release, iso_mount_dir, upgrade_files)
|
|
except Exception as e:
|
|
LOG.exception("Error occurred while processing upload upgrade files: %s", str(e))
|
|
raise
|
|
|
|
def _process_inactive_upgrade_files(
|
|
self, from_release, to_release, iso_mount_dir, upgrade_files):
|
|
"""
|
|
Process the uploaded inactive upgrade files, aka N-1 release
|
|
:param from_release: From release
|
|
:param to_release: To release
|
|
:param iso_mount_dir: ISO mount directory
|
|
:param upgrade_files: dict of upgrade files
|
|
:return: info, warning, error messages, dict of release metadata info
|
|
"""
|
|
|
|
# validate this major release upload
|
|
self.major_release_upload_check()
|
|
|
|
to_release_maj_ver = utils.get_major_release_version(to_release)
|
|
|
|
try:
|
|
|
|
# Validate the N-1 release from the iso file is supported to upgrade to the current N release
|
|
current_upgrade_supported_versions = read_upgrade_support_versions(
|
|
"/usr/rootdirs/opt/")
|
|
supported_versions = [v.get("version") for v in current_upgrade_supported_versions]
|
|
|
|
# to_release is N-1 release in here
|
|
if to_release_maj_ver not in supported_versions:
|
|
raise UpgradeNotSupported(
|
|
"ISO file release version %s not supported to upgrade to %s" %
|
|
(to_release_maj_ver, SW_VERSION))
|
|
|
|
# iso validation completed
|
|
LOG.info("Starting load import from %s", upgrade_files[constants.ISO_EXTENSION])
|
|
|
|
# from_release is set to None when uploading N-1 load
|
|
return self._run_load_import(from_release, to_release, iso_mount_dir, upgrade_files)
|
|
|
|
except Exception as e:
|
|
LOG.exception("Error occurred while processing inactive upgrade files: %s", str(e))
|
|
raise
|
|
|
|
def _checkout_commit_to_dc_vault_playbook_dir(self, release_version):
|
|
"""
|
|
Checkout commit to dc-vault playbook dir
|
|
:param release_version: release version
|
|
:return: None
|
|
"""
|
|
dc_vault_playbook_dir = f"{constants.DC_VAULT_PLAYBOOK_DIR}/{release_version}"
|
|
|
|
os.makedirs(dc_vault_playbook_dir, exist_ok=True)
|
|
ostree_repo = os.path.join(constants.FEED_DIR,
|
|
"rel-%s/ostree_repo" % release_version)
|
|
|
|
try:
|
|
latest_commit = ostree_utils.get_feed_latest_commit(release_version)
|
|
LOG.info("Getting latest commit for %s: %s", release_version, latest_commit)
|
|
except OSTreeCommandFail as e:
|
|
LOG.exception("Error occurred while getting latest commit for %s: %s",
|
|
release_version, str(e))
|
|
raise
|
|
|
|
try:
|
|
LOG.info("Checking out commit %s to %s", latest_commit, dc_vault_playbook_dir)
|
|
ostree_utils.checkout_commit_to_dir(
|
|
ostree_repo, latest_commit, dc_vault_playbook_dir, sub_path=constants.PLAYBOOKS_PATH)
|
|
except Exception:
|
|
if os.path.exists(dc_vault_playbook_dir):
|
|
shutil.rmtree(dc_vault_playbook_dir)
|
|
raise
|
|
|
|
def _process_upload_patch_files(self, patch_files):
|
|
"""
|
|
Process the uploaded patch files
|
|
:param patch_files: list of patch files
|
|
:return: info, warning, error messages
|
|
"""
|
|
|
|
local_info = ""
|
|
local_warning = ""
|
|
local_error = ""
|
|
upload_patch_info = []
|
|
try:
|
|
# Create the directories
|
|
for state_dir in states.DEPLOY_STATE_METADATA_DIR:
|
|
os.makedirs(state_dir, exist_ok=True)
|
|
except os.error:
|
|
msg = "Failed to create directories"
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
for patch_file in patch_files:
|
|
|
|
base_patch_filename = os.path.basename(patch_file)
|
|
|
|
# Get the release_id from the patch's metadata
|
|
# and check to see if it's already uploaded
|
|
release_id = get_release_from_patch(patch_file, 'id')
|
|
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
patch_id = None
|
|
thispatch = None
|
|
|
|
try:
|
|
if release:
|
|
if release.state == states.COMMITTED:
|
|
msg = "%s is committed. Metadata not updated" % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
elif release.state != states.AVAILABLE:
|
|
msg = "%s is not currently in available state to be deployed." % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
else:
|
|
# todo(abailey) PatchFile / extract_patch should be renamed
|
|
patch_id, thispatch, error_msg = PatchFile.extract_patch(
|
|
patch_file,
|
|
metadata_dir=states.AVAILABLE_DIR,
|
|
metadata_only=True,
|
|
existing_content=release.contents,
|
|
base_pkgdata=self.base_pkgdata)
|
|
|
|
if error_msg:
|
|
raise ReleaseValidationFailure(error=error_msg)
|
|
|
|
PatchFile.unpack_patch(patch_file)
|
|
reload_release_data()
|
|
msg = "%s is already uploaded. Updated metadata only" % release_id
|
|
LOG.info(msg)
|
|
local_info += msg + "\n"
|
|
else:
|
|
patch_id, thispatch, error_msg = PatchFile.extract_patch(
|
|
patch_file,
|
|
metadata_dir=states.AVAILABLE_DIR,
|
|
base_pkgdata=self.base_pkgdata)
|
|
|
|
if error_msg:
|
|
raise ReleaseValidationFailure(error=error_msg)
|
|
|
|
PatchFile.unpack_patch(patch_file)
|
|
local_info += "%s is now uploaded\n" % release_id
|
|
reload_release_data()
|
|
|
|
# NOTE(bqian) Below check an exception raise should be revisit,
|
|
# if applicable, should be applied to the beginning of all requests.
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
except Exception as e:
|
|
msg = "Failed to upload release %s" % release_id
|
|
LOG.exception("%s: %s" % (msg, e))
|
|
local_error += msg + "\n"
|
|
|
|
if patch_id and thispatch:
|
|
PatchFile.delete_extracted_patch(patch_id, thispatch)
|
|
|
|
try:
|
|
release_sw_version = thispatch.metadata[patch_id]["sw_version"]
|
|
pkg_feed_dir = "%s/rel-%s" % (constants.PACKAGE_FEED_DIR, release_sw_version)
|
|
apt_utils.component_remove(pkg_feed_dir, release_sw_version)
|
|
except Exception:
|
|
LOG.info("Could not delete apt-ostree component, does not exist")
|
|
continue
|
|
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release:
|
|
upload_patch_info.append({
|
|
base_patch_filename: {
|
|
"id": release_id,
|
|
"sw_release": release.sw_release, # MM.mm.pp release version
|
|
}
|
|
})
|
|
|
|
# create versioned precheck for uploaded patches
|
|
for patch in upload_patch_info:
|
|
filename, values = list(patch.items())[0]
|
|
LOG.info("Creating precheck for release %s..." % values.get("id"))
|
|
for pf in patch_files:
|
|
if filename in pf:
|
|
patch_file = pf
|
|
|
|
sw_release = values.get("sw_release")
|
|
|
|
required_patches = []
|
|
for dep_id in self.release_collection.get_release_by_id(values.get("id")).requires_release_ids:
|
|
required_patches.append(version.parse(dep_id))
|
|
|
|
# sort the required patches list and get the latest, if available
|
|
req_patch_version = None
|
|
if len(required_patches) > 0:
|
|
req_patch = str(sorted(required_patches)[-1])
|
|
_, req_patch_version, _, _ = utils.get_component_and_versions(req_patch)
|
|
if self.release_collection.get_release_by_id(req_patch) is None:
|
|
LOG.warning("Required patch '%s' is not uploaded." % req_patch)
|
|
|
|
PatchFile.create_versioned_precheck(patch_file, sw_release, req_patch_version=req_patch_version)
|
|
|
|
return local_info, local_warning, local_error, upload_patch_info
|
|
|
|
def software_release_upload(self, release_files):
|
|
"""
|
|
Upload software release files
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
upload_info = []
|
|
is_importing_inactive_load = False
|
|
|
|
# Refresh data, if needed
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
msg = "Uploading files: %s" % ",".join(release_files)
|
|
audit_log_info(msg)
|
|
|
|
# We now need to put the files in the category (patch or upgrade)
|
|
patch_files = []
|
|
upgrade_files = {}
|
|
|
|
for uploaded_file in release_files:
|
|
(_, ext) = os.path.splitext(uploaded_file)
|
|
if ext in [constants.PATCH_EXTENSION]:
|
|
patch_files.append(uploaded_file)
|
|
elif ext == constants.ISO_EXTENSION:
|
|
upgrade_files[constants.ISO_EXTENSION] = uploaded_file
|
|
elif ext == constants.SIG_EXTENSION:
|
|
upgrade_files[constants.SIG_EXTENSION] = uploaded_file
|
|
else:
|
|
msg = "The file extension is not supported. Supported extensions include .patch, .iso and .sig"
|
|
LOG.exception(msg)
|
|
raise ReleaseValidationFailure(error=msg)
|
|
|
|
if len(upgrade_files) == 1: # Only one upgrade file uploaded
|
|
msg = "Missing upgrade file or signature file"
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
elif upgrade_files.get(constants.ISO_EXTENSION, None) and self.hostname != constants.CONTROLLER_0_HOSTNAME:
|
|
raise SoftwareServiceError("Upload can only be performed on controller-0.")
|
|
elif len(upgrade_files) == 2: # Two upgrade files uploaded
|
|
tmp_info = ""
|
|
tmp_error = ""
|
|
tmp_warning = ""
|
|
tmp_release_meta_info = {}
|
|
is_import_completed = True
|
|
is_max_rel_exceeded = False
|
|
|
|
iso = upgrade_files[constants.ISO_EXTENSION]
|
|
sig = upgrade_files[constants.SIG_EXTENSION]
|
|
|
|
if not verify_files([iso], sig):
|
|
msg = "Software %s:%s signature validation failed" % (iso, sig)
|
|
raise ReleaseValidationFailure(error=msg)
|
|
|
|
LOG.info("iso and signature files upload completed.")
|
|
|
|
try:
|
|
# Mount the iso file after signature verification
|
|
iso_mount_dir = mount_iso_load(iso, constants.TMP_DIR)
|
|
LOG.info("Mounted iso file %s to %s", iso, iso_mount_dir)
|
|
|
|
# Read the metadata from the iso file to get to-release and supported-from-releases
|
|
supported_from_releases = read_upgrade_support_versions(iso_mount_dir)
|
|
to_release = get_to_release_from_metadata_file(iso_mount_dir)
|
|
to_release_maj_ver = utils.get_major_release_version(to_release)
|
|
LOG.info("Reading metadata from iso file %s completed. \nto_release: %s", iso, to_release_maj_ver)
|
|
|
|
# Same release is uploaded, return the metadata info from the iso file
|
|
if to_release_maj_ver == SW_VERSION:
|
|
tmp_info = f"Uploaded release {to_release} is the same as current release on the controller"
|
|
tmp_release_meta_info = self.get_release_meta_info(iso_mount_dir, upgrade_files)
|
|
elif to_release > SW_VERSION:
|
|
# N + 1 release is uploaded, process it regardless
|
|
tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_upload_upgrade_files(
|
|
SW_VERSION, to_release, iso_mount_dir, supported_from_releases, upgrade_files)
|
|
elif to_release < SW_VERSION and is_system_controller():
|
|
# N - 1 release is uploaded, process it only if the region is system controller
|
|
is_importing_inactive_load = True
|
|
tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_inactive_upgrade_files(
|
|
None, to_release, iso_mount_dir, upgrade_files)
|
|
# Checkout commit to dc-vault/playbooks directory
|
|
self._checkout_commit_to_dc_vault_playbook_dir(to_release_maj_ver)
|
|
except MaxReleaseExceeded:
|
|
is_max_rel_exceeded = True
|
|
raise
|
|
except Exception as e:
|
|
LOG.error("Error occurred while processing software release upload: %s", str(e))
|
|
is_import_completed = False
|
|
raise
|
|
finally:
|
|
self._clean_up_load_import(iso_mount_dir, to_release, iso,
|
|
is_import_completed, is_max_rel_exceeded)
|
|
if is_importing_inactive_load and not is_import_completed:
|
|
self._clean_up_inactive_load_import(to_release)
|
|
|
|
msg_info += tmp_info
|
|
msg_warning += tmp_warning
|
|
msg_error += tmp_error
|
|
upload_info.append(tmp_release_meta_info)
|
|
|
|
if len(patch_files) > 0:
|
|
tmp_info, tmp_warning, tmp_error, tmp_patch_meta_info = self._process_upload_patch_files(
|
|
patch_files)
|
|
msg_info += tmp_info
|
|
msg_warning += tmp_warning
|
|
msg_error += tmp_error
|
|
upload_info += tmp_patch_meta_info
|
|
|
|
reload_release_data()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, upload_info=upload_info)
|
|
|
|
def release_apply_order(self, release_id, running_release_sw_version):
|
|
"""
|
|
Determines the order of releases for applying.
|
|
:param release_id: The appliyng release id
|
|
:param running_release_sw_version: The running release major version
|
|
:return: List of releases in the order for applying
|
|
"""
|
|
|
|
deployed_or_unavailable_releases_id = []
|
|
preinstalled_patches = []
|
|
for rel in self.release_collection.iterate_releases():
|
|
if rel.state == states.DEPLOYED or rel.state == states.UNAVAILABLE:
|
|
deployed_or_unavailable_releases_id.append(rel.id)
|
|
|
|
if rel.prepatched_iso:
|
|
preinstalled_patches = rel.preinstalled_patches
|
|
|
|
release_dependencies = self.get_release_dependency_list(release_id, preinstalled_patches)
|
|
release_dependencies.append(release_id)
|
|
|
|
# filter release_dependencies to include only releases
|
|
# that matches the major running release version
|
|
# and remove all releases already deployed, including prepatched
|
|
to_apply_releases = [
|
|
rel_id for rel_id in release_dependencies
|
|
if f"-{running_release_sw_version}." in rel_id and
|
|
rel_id not in deployed_or_unavailable_releases_id + preinstalled_patches
|
|
]
|
|
|
|
to_apply_releases.sort()
|
|
return to_apply_releases
|
|
|
|
def release_remove_order(self, target_release_id, running_release_id, running_release_sw_version):
|
|
"""
|
|
Determines the order of releases for removing based on the feed commit order.
|
|
:param target_release_id: The target release id
|
|
:param running_release_id: The running release id
|
|
:param running_release_sw_version: The running release major version
|
|
:return: List of releases in the order for removing
|
|
"""
|
|
|
|
# if removing release is not from the major running version, cannot remove it
|
|
if f"-{running_release_sw_version}." not in target_release_id:
|
|
return []
|
|
|
|
releases = list(self.release_collection.iterate_releases_by_state(states.DEPLOYED))
|
|
release_map = {release.id: release for release in releases}
|
|
|
|
to_remove_releases = []
|
|
current = running_release_id
|
|
|
|
while current != target_release_id:
|
|
to_remove_releases.append(current)
|
|
current_release = release_map.get(current)
|
|
if not current_release:
|
|
error = f"Release {current} not found in releases map"
|
|
raise SoftwareServiceError(error=error)
|
|
|
|
next_release = next((r for r in releases if r.commit_id == current_release.base_commit_id), None)
|
|
if not next_release:
|
|
error = f"Release with commit id {current_release.base_commit_id} not found"
|
|
raise SoftwareServiceError(error=error)
|
|
|
|
current = next_release.id
|
|
|
|
return to_remove_releases
|
|
|
|
def reset_feed_commit(self, release):
|
|
commit_id = release.commit_id
|
|
if commit_id is None:
|
|
LOG.warning("Unable to find the commit id in metadata")
|
|
return
|
|
|
|
LOG.info("Reset feed to commit %s" % commit_id)
|
|
|
|
try:
|
|
feed_ostree_dir = "%s/rel-%s/ostree_repo" % \
|
|
(constants.FEED_OSTREE_BASE_DIR, release.sw_version)
|
|
|
|
apt_utils.run_rollback(feed_ostree_dir, commit_id)
|
|
self.latest_feed_commit = commit_id
|
|
except APTOSTreeCommandFail:
|
|
msg = "Failure when reseting commit %s" % commit_id
|
|
LOG.exception(msg)
|
|
raise APTOSTreeCommandFail(msg)
|
|
|
|
def software_release_delete_api(self, release_ids):
|
|
"""
|
|
Delete release(s)
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
# Protect against duplications
|
|
full_list = sorted(list(set(release_ids)))
|
|
|
|
not_founds = []
|
|
cannot_del = []
|
|
used_by_subcloud = []
|
|
release_list = []
|
|
for rel_id in full_list:
|
|
rel = self.release_collection.get_release_by_id(rel_id)
|
|
if rel is None:
|
|
not_founds.append(rel_id)
|
|
else:
|
|
if not rel.is_deletable:
|
|
cannot_del.append(rel_id)
|
|
elif rel.is_ga_release and is_system_controller():
|
|
subcloud_by_sw_version = get_subcloud_groupby_version()
|
|
if rel.sw_version in subcloud_by_sw_version:
|
|
used_by_subcloud.append(rel_id)
|
|
else:
|
|
release_list.append(rel_id)
|
|
else:
|
|
release_list.append(rel_id)
|
|
|
|
err_msg = ""
|
|
if not_founds:
|
|
list_str = ','.join(not_founds)
|
|
err_msg = f"Release{'' if len(not_founds) == 1 else 's'} {list_str} can not be found\n"
|
|
|
|
if cannot_del:
|
|
list_str = ','.join(cannot_del)
|
|
err_msg += (f"Release{'' if len(cannot_del) == 1 else 's'} {list_str} "
|
|
f"{'is' if len(cannot_del) == 1 else 'are'} not ready to be deleted\n")
|
|
|
|
if used_by_subcloud:
|
|
list_str = ','.join(used_by_subcloud)
|
|
err_msg += f"Release{'' if len(used_by_subcloud) == 1 else 's'} {list_str} still used by subcloud(s)"
|
|
|
|
if len(err_msg) > 0:
|
|
raise SoftwareServiceError(error=err_msg)
|
|
|
|
msg = "Deleting releases: %s" % ",".join(release_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# Handle operation
|
|
for release_id in release_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
release_sw_version = release.sw_version
|
|
|
|
# Delete ostree content if it exists.
|
|
# RPM based patches (from upgrades) will not have ostree contents
|
|
ostree_tar_filename = self.get_ostree_tar_filename(release_sw_version, release_id)
|
|
if os.path.isfile(ostree_tar_filename):
|
|
try:
|
|
os.remove(ostree_tar_filename)
|
|
except OSError:
|
|
msg = "Failed to remove ostree tarball %s" % ostree_tar_filename
|
|
LOG.exception(msg)
|
|
raise OSTreeTarFail(msg)
|
|
is_major_release = ReleaseState(release_ids=[release.id]).is_major_release_deployment()
|
|
if not is_major_release:
|
|
package_repo_dir = "%s/rel-%s" % (constants.PACKAGE_FEED_DIR, release_sw_version)
|
|
apt_utils.component_remove(package_repo_dir, release.sw_release)
|
|
|
|
# Delete upgrade iso file in folder
|
|
# TODO(heitormatsui): treat the prepatched iso scenario
|
|
metadata_file = "%s-metadata.xml" % release_id
|
|
delete_feed = False
|
|
to_release_iso_dir = os.path.join(constants.FEED_OSTREE_BASE_DIR, ("rel-%s" % release_sw_version))
|
|
|
|
if os.path.isdir(to_release_iso_dir):
|
|
# check if the release being deleted is related to this feed
|
|
if os.path.isfile("%s/upgrades/%s" % (to_release_iso_dir, metadata_file)):
|
|
delete_feed = True
|
|
if delete_feed:
|
|
try:
|
|
shutil.rmtree(to_release_iso_dir)
|
|
except OSError:
|
|
msg = "Failed to remove release iso %s folder" % to_release_iso_dir
|
|
LOG.exception(msg)
|
|
raise ReleaseIsoDeleteFailure(msg)
|
|
msg = "Deleted feed directory %s" % to_release_iso_dir
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
|
|
# TODO(lbonatti): treat the upcoming versioning changes
|
|
PatchFile.delete_versioned_directory(release.sw_release)
|
|
|
|
# Delete N-1 load on system controller
|
|
if is_system_controller():
|
|
self._clean_up_inactive_load_import(release_sw_version)
|
|
|
|
try:
|
|
# Delete the metadata
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[release.state]
|
|
os.remove("%s/%s" % (metadata_dir, metadata_file))
|
|
except OSError:
|
|
# When deleting the load from a system controller, the unavailable
|
|
# and commited directories are cleaned up and, if the metadata file
|
|
# is located in one of those, it will result in a exception since
|
|
# it would have been already removed by the
|
|
# _clean_up_inactive_load_import method
|
|
if (
|
|
is_system_controller() and
|
|
(
|
|
metadata_dir == states.UNAVAILABLE_DIR or
|
|
metadata_dir == states.COMMITTED_DIR
|
|
)
|
|
):
|
|
msg = (
|
|
f"Metadata file already removed: {metadata_dir}/{metadata_file}"
|
|
)
|
|
LOG.warning(msg)
|
|
else:
|
|
msg = "Failed to remove metadata for %s" % release_id
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
self.delete_start_install_script(release_id)
|
|
self.delete_patch_activate_scripts(release_id)
|
|
reload_release_data()
|
|
msg = "%s has been deleted" % release_id
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
|
|
# Refresh data, if needed
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def in_sync_controller_api(self):
|
|
"""
|
|
Check if both controllers are in sync
|
|
by checking the database JSON file
|
|
"""
|
|
is_in_sync = is_deploy_state_in_sync()
|
|
return {"in_sync": is_in_sync}
|
|
|
|
def patch_init_release_api(self, release_id):
|
|
"""
|
|
Create an empty repo for a new release_id
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
msg = "Initializing repo for: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
if release_id == SW_VERSION:
|
|
msg = "Rejected: Requested release %s is running release" % release_id
|
|
msg_error += msg + "\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# Refresh data
|
|
self.base_pkgdata.loaddirs()
|
|
|
|
reload_release_data()
|
|
|
|
repo_dir[release_id] = "%s/rel-%s" % (repo_root_dir, release_id)
|
|
|
|
# Verify the release doesn't already exist
|
|
if os.path.exists(repo_dir[release_id]):
|
|
msg = "Patch repository for %s already exists" % release_id
|
|
msg_info += msg + "\n"
|
|
LOG.info(msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# Create the repo
|
|
try:
|
|
# todo(jcasteli) determine if ostree change needs a createrepo equivalent
|
|
output = "UNDER CONSTRUCTION for OSTREE"
|
|
LOG.info("Repo[%s] updated:\n%s", release_id, output)
|
|
except Exception:
|
|
msg = "Failed to update the repo for %s" % release_id
|
|
LOG.exception(msg)
|
|
|
|
# Wipe out what was created
|
|
shutil.rmtree(repo_dir[release_id])
|
|
del repo_dir[release_id]
|
|
|
|
raise SoftwareFail(msg)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def patch_query_what_requires(self, patch_ids):
|
|
"""
|
|
Query the known patches to see which have dependencies on the specified patches
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
msg = "Querying what requires patches: %s" % ",".join(patch_ids)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# First, verify that all specified patches exist
|
|
id_verification = True
|
|
for patch_id in patch_ids:
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
msg = "Patch %s does not exist" % patch_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
id_verification = False
|
|
|
|
if not id_verification:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
required_patches = {}
|
|
for release in self.release_collection.iterate_releases():
|
|
for req_patch in release.requires_release_ids:
|
|
if req_patch not in patch_ids:
|
|
continue
|
|
|
|
if req_patch not in required_patches:
|
|
required_patches[req_patch] = []
|
|
|
|
required_patches[req_patch].append(release.id)
|
|
|
|
for patch_id in patch_ids:
|
|
if patch_id in required_patches:
|
|
iter_patch_list = required_patches[patch_id]
|
|
msg_info += "%s is required by: %s\n" % (patch_id, ", ".join(sorted(iter_patch_list)))
|
|
else:
|
|
msg_info += "%s is not required by any patches.\n" % patch_id
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def send_latest_feed_commit_to_agent(self):
|
|
"""
|
|
Notify the patch agent that the latest commit on the feed
|
|
repo has been updated
|
|
"""
|
|
# Skip sending messages if host not yet provisioned
|
|
if self.sock_out is None:
|
|
LOG.info("Skipping send feed commit to agent")
|
|
return
|
|
|
|
send_commit_to_agent = PatchMessageSendLatestFeedCommit()
|
|
self.socket_lock.acquire()
|
|
send_commit_to_agent.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
def software_sync(self):
|
|
# Increment the software_op_counter here
|
|
self.inc_patch_op_counter()
|
|
|
|
if self.sock_out is None or self.install_local:
|
|
return True
|
|
|
|
# Send the sync requests
|
|
|
|
self.controller_neighbours_lock.acquire()
|
|
for n in self.controller_neighbours:
|
|
self.controller_neighbours[n].clear_synced()
|
|
self.controller_neighbours_lock.release()
|
|
|
|
msg = PatchMessageSyncReq()
|
|
self.socket_lock.acquire()
|
|
msg.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
# Now we wait, up to two mins. future enhancement: Wait on a condition
|
|
my_ip = cfg.get_mgmt_ip()
|
|
sync_rc = False
|
|
max_time = time.time() + 120
|
|
while time.time() < max_time:
|
|
all_done = True
|
|
self.controller_neighbours_lock.acquire()
|
|
for n in self.controller_neighbours:
|
|
if n != my_ip and not self.controller_neighbours[n].get_synced():
|
|
all_done = False
|
|
self.controller_neighbours_lock.release()
|
|
|
|
if all_done:
|
|
LOG.info("Sync complete")
|
|
sync_rc = True
|
|
break
|
|
|
|
time.sleep(0.5)
|
|
|
|
# Send hellos to the hosts now, to get queries performed
|
|
hello_agent = PatchMessageHelloAgent()
|
|
self.socket_lock.acquire()
|
|
hello_agent.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
if not sync_rc:
|
|
LOG.info("Timed out waiting for sync completion")
|
|
return sync_rc
|
|
|
|
def software_release_query_cached(self, **kwargs):
|
|
query_state = None
|
|
if "show" in kwargs:
|
|
valid_query_states = [
|
|
states.AVAILABLE,
|
|
states.UNAVAILABLE,
|
|
states.DEPLOYED,
|
|
states.REMOVING,
|
|
states.COMMITTED,
|
|
states.DEPLOYING
|
|
]
|
|
if kwargs["show"] in valid_query_states:
|
|
query_state = kwargs["show"]
|
|
|
|
query_release = None
|
|
if "release" in kwargs:
|
|
query_release = kwargs["release"]
|
|
|
|
results = []
|
|
|
|
def filter_by_version():
|
|
for r in self.release_collection.iterate_releases():
|
|
if r.sw_version in query_release:
|
|
yield r
|
|
|
|
def filter_by_state():
|
|
for rel in self.release_collection.iterate_releases_by_state(query_state):
|
|
yield rel
|
|
|
|
if query_state is not None:
|
|
iterator = filter_by_state
|
|
elif query_release is not None:
|
|
iterator = filter_by_version
|
|
else:
|
|
iterator = self.release_collection.iterate_releases
|
|
|
|
for i in iterator():
|
|
data = i.to_query_dict()
|
|
results.append(data)
|
|
|
|
return results
|
|
|
|
def software_release_query_specific_cached(self, release_ids):
|
|
LOG.info("software release show")
|
|
|
|
results = []
|
|
|
|
for release_id in release_ids:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is not None:
|
|
results.append(release.to_query_dict())
|
|
|
|
return results
|
|
|
|
def get_dependencies(self, patch_ids, recursive):
|
|
dependencies = set()
|
|
patch_added = False
|
|
|
|
# Add patches to workset
|
|
for patch_id in sorted(patch_ids):
|
|
dependencies.add(patch_id)
|
|
patch_added = True
|
|
|
|
while patch_added:
|
|
patch_added = False
|
|
for patch_id in sorted(dependencies):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
for req in release.requires:
|
|
if req not in dependencies:
|
|
dependencies.add(req)
|
|
patch_added = recursive
|
|
|
|
return sorted(dependencies)
|
|
|
|
def patch_query_dependencies(self, patch_ids, **kwargs):
|
|
msg = "Patch query-dependencies %s" % patch_ids
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
failure = False
|
|
|
|
results = {"patches": [],
|
|
"error": ""}
|
|
|
|
recursive = False
|
|
if kwargs.get("recursive") == "yes":
|
|
recursive = True
|
|
|
|
# Verify patch IDs
|
|
for patch_id in sorted(patch_ids):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
errormsg = "%s is unrecognized\n" % patch_id
|
|
LOG.info("patch_query_dependencies: %s", errormsg)
|
|
results["error"] += errormsg
|
|
failure = True
|
|
|
|
if failure:
|
|
LOG.info("patch_query_dependencies failed")
|
|
return results
|
|
|
|
results["patches"] = self.get_dependencies(patch_ids, recursive)
|
|
|
|
return results
|
|
|
|
def patch_commit(self, patch_ids, dry_run=False):
|
|
msg = "Patch commit %s" % patch_ids
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
try:
|
|
if not os.path.exists(states.COMMITTED_DIR):
|
|
os.makedirs(states.COMMITTED_DIR)
|
|
except os.error:
|
|
msg = "Failed to create %s" % states.COMMITTED_DIR
|
|
LOG.exception(msg)
|
|
raise SoftwareFail(msg)
|
|
|
|
failure = False
|
|
recursive = True
|
|
cleanup_files = set()
|
|
results = {"info": "",
|
|
"error": ""}
|
|
|
|
# Ensure there are only REL patches
|
|
non_rel_list = []
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.status != constants.STATUS_RELEASED:
|
|
non_rel_list.append(release.id)
|
|
|
|
if len(non_rel_list) > 0:
|
|
errormsg = "A commit cannot be performed with non-REL status patches in the system:\n"
|
|
for patch_id in non_rel_list:
|
|
errormsg += " %s\n" % patch_id
|
|
LOG.info("patch_commit rejected: %s", errormsg)
|
|
results["error"] += errormsg
|
|
return results
|
|
|
|
# Verify Release IDs
|
|
for patch_id in sorted(patch_ids):
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release is None:
|
|
errormsg = "%s is unrecognized\n" % patch_id
|
|
LOG.info("patch_commit: %s", errormsg)
|
|
results["error"] += errormsg
|
|
failure = True
|
|
|
|
if failure:
|
|
LOG.info("patch_commit: Failed patch ID check")
|
|
return results
|
|
|
|
commit_list = self.get_dependencies(patch_ids, recursive)
|
|
|
|
# Check patch states
|
|
avail_list = []
|
|
for patch_id in commit_list:
|
|
release = self.release_collection.get_release_by_id(patch_id)
|
|
if release.state not in [states.DEPLOYED, states.COMMITTED]:
|
|
avail_list.append(patch_id)
|
|
|
|
if len(avail_list) > 0:
|
|
errormsg = "The following patches are not applied and cannot be committed:\n"
|
|
for patch_id in avail_list:
|
|
errormsg += " %s\n" % patch_id
|
|
LOG.info("patch_commit rejected: %s", errormsg)
|
|
results["error"] += errormsg
|
|
return results
|
|
|
|
# TODO(ShawnLi): Comment out for 24.09 release. This is gated to 25.03
|
|
# NOTE(lviera): Must include start scripts, refactor like self.delete_start_install_script(patch_id)
|
|
# for patch_id in commit_list:
|
|
# # Fetch file paths that need to be cleaned up to
|
|
# # free patch storage disk space
|
|
# pre_install_filename = self.release_data.metadata[patch_id].get("pre_install")
|
|
# post_install_filename = self.release_data.metadata[patch_id].get("post_install")
|
|
|
|
# if pre_install_filename:
|
|
# pre_install_script_path = "%s/%s_%s" % (root_scripts_dir, patch_id, pre_install_filename)
|
|
# post_install_script_path = "%s/%s_%s" % (root_scripts_dir, patch_id, post_install_filename)
|
|
# if os.path.exists(pre_install_script_path):
|
|
# cleanup_files.add(pre_install_script_path)
|
|
# if os.path.exists(post_install_script_path):
|
|
# cleanup_files.add(post_install_script_path)
|
|
|
|
# patch_sw_version = utils.get_major_release_version(
|
|
# self.release_data.metadata[patch_id]["sw_version"])
|
|
# abs_ostree_tar_dir = package_dir[patch_sw_version]
|
|
# software_tar_path = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id)
|
|
# if os.path.exists(software_tar_path):
|
|
# cleanup_files.add(software_tar_path)
|
|
|
|
# Calculate disk space
|
|
disk_space = 0
|
|
for file in cleanup_files:
|
|
statinfo = os.stat(file)
|
|
disk_space += statinfo.st_size
|
|
|
|
if dry_run:
|
|
results["info"] = "This commit operation would free %0.2f MiB" % (disk_space / (1024.0 * 1024.0))
|
|
return results
|
|
|
|
# Do the commit
|
|
|
|
# Move the metadata to the committed dir
|
|
for patch_id in commit_list:
|
|
metadata_fname = "%s-metadata.xml" % patch_id
|
|
deployed_fname = os.path.join(states.DEPLOYED_DIR, metadata_fname)
|
|
committed_fname = os.path.join(states.COMMITTED_DIR, metadata_fname)
|
|
if os.path.exists(deployed_fname):
|
|
try:
|
|
shutil.move(deployed_fname, committed_fname)
|
|
except shutil.Error:
|
|
msg = "Failed to move the metadata for %s" % patch_id
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
# Delete the files
|
|
for file in cleanup_files:
|
|
try:
|
|
os.remove(file)
|
|
except OSError:
|
|
msg = "Failed to remove: %s" % file
|
|
LOG.exception(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
reload_release_data()
|
|
|
|
results["info"] = "The releases have been committed."
|
|
return results
|
|
|
|
def query_host_cache(self):
|
|
output = []
|
|
|
|
self.hosts_lock.acquire()
|
|
for nbr in list(self.hosts):
|
|
host = self.hosts[nbr].get_dict()
|
|
host["interim_state"] = False
|
|
for patch_id in list(sc.interim_state):
|
|
if nbr in sc.interim_state[patch_id]:
|
|
host["interim_state"] = True
|
|
|
|
output.append(host)
|
|
|
|
self.hosts_lock.release()
|
|
|
|
return output
|
|
|
|
def any_patch_host_installing(self):
|
|
rc = False
|
|
|
|
with self.hosts_lock:
|
|
for host in self.hosts.values():
|
|
if host.state == constants.PATCH_AGENT_STATE_INSTALLING:
|
|
rc = True
|
|
break
|
|
return rc
|
|
|
|
def copy_install_scripts(self):
|
|
applying_states = [states.DEPLOYING, states.REMOVING]
|
|
for release in self.release_collection.iterate_releases():
|
|
pre_install = release.pre_install
|
|
post_install = release.post_install
|
|
folder = ["preinstall", "postinstall"]
|
|
if release.state in applying_states:
|
|
try:
|
|
for i, file in enumerate([pre_install, post_install]):
|
|
if file:
|
|
full_name_file = "%s_%s" % (release.id, file)
|
|
script_path = "%s/%s" % (root_scripts_dir, full_name_file)
|
|
dest_path = constants.PATCH_SCRIPTS_STAGING_DIR + "/" + folder[i]
|
|
dest_script_file = "%s/%s" % (dest_path, full_name_file)
|
|
if not os.path.exists(dest_path):
|
|
os.makedirs(dest_path, 0o700)
|
|
shutil.copyfile(script_path, dest_script_file)
|
|
os.chmod(dest_script_file, 0o700)
|
|
msg = "Creating install script %s for %s" % (full_name_file, release.id)
|
|
LOG.info(msg)
|
|
except shutil.Error:
|
|
msg = "Failed to copy the install script %s for %s" % (full_name_file, release.id)
|
|
LOG.exception(msg)
|
|
raise SoftwareError(msg)
|
|
else:
|
|
try:
|
|
for i, file in enumerate(file for file in (pre_install, post_install) if file):
|
|
full_name_file = "%s_%s" % (release.id, file)
|
|
script_path = "%s/%s/%s" % (constants.PATCH_SCRIPTS_STAGING_DIR, folder[i], full_name_file)
|
|
if os.path.exists(script_path):
|
|
os.remove(script_path)
|
|
msg = "Removing install script %s for %s" % (full_name_file, release.id)
|
|
LOG.info(msg)
|
|
except shutil.Error:
|
|
msg = "Failed to delete the install script %s for %s" % (full_name_file, release.id)
|
|
LOG.exception(msg)
|
|
|
|
def _update_state_to_peer(self):
|
|
self.socket_lock.acquire()
|
|
try:
|
|
state_update_msg = SoftwareMessageDeployStateUpdate()
|
|
state_update_msg.send(self.sock_out)
|
|
finally:
|
|
self.socket_lock.release()
|
|
|
|
def _sanitize_extra_options(self, value):
|
|
"""
|
|
Make sure value have only allowed characters.
|
|
"""
|
|
# Only letters, numbers, space, -, and _ are allowed.
|
|
if not re.match(r'^[\w\s\-]+$', value):
|
|
msg_error = f"Invalid value: '{value}'."
|
|
raise SoftwareServiceError(msg_error)
|
|
return value
|
|
|
|
def _parse_and_sanitize_extra_options(self, options_list):
|
|
"""
|
|
Validate, sanitize and convert a 'key=value' to dictionary.
|
|
"""
|
|
|
|
for item in options_list:
|
|
if item.count('=') != 1:
|
|
msg_error = f"Invalid format: '{item}'. Expected format is key=value"
|
|
raise SoftwareServiceError(msg_error)
|
|
|
|
options = {}
|
|
for item in options_list:
|
|
key, value = item.split('=', 1)
|
|
key = self._sanitize_extra_options(key.strip())
|
|
value = self._sanitize_extra_options(value.strip())
|
|
|
|
if key in constants.RESERVED_WORDS_SET:
|
|
msg_error = f"{key} is a reserved word and can't be used."
|
|
raise SoftwareServiceError(msg_error)
|
|
|
|
options[key] = value
|
|
return options
|
|
|
|
def _release_basic_checks(self, deployment):
|
|
"""
|
|
Does basic sanity checks on the release data
|
|
:param deployment: release to be checked
|
|
:return: release object (if exists),
|
|
bool with success output,
|
|
strings with info, warning and error messages
|
|
"""
|
|
|
|
# We need to verify that the software release exists
|
|
release = self.release_collection.get_release_by_id(deployment)
|
|
if not release:
|
|
msg = "Software release version corresponding to the specified release " \
|
|
"%s does not exist." % deployment
|
|
LOG.error(msg)
|
|
msg = msg + " Try deleting and re-uploading the software for recovery."
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
return release
|
|
|
|
def _deploy_precheck(self, release_version: str, force: bool = False,
|
|
region_name: typing.Optional[str] = None, patch: bool = False,
|
|
**kwargs) -> dict:
|
|
"""
|
|
Verify if system satisfy the requisites to upgrade to a specified deployment.
|
|
:param release_version: full release name, e.g. starlingx-MM.mm.pp
|
|
:param force: if True will ignore minor alarms during precheck
|
|
:param region_name: region_name
|
|
:param patch: if True then indicate precheck is for patch release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
if region_name is None:
|
|
region_name = utils.get_local_region_name()
|
|
precheck_script = utils.get_precheck_script(release_version)
|
|
|
|
if not os.path.isfile(precheck_script) and patch:
|
|
# Precheck script may not be available for some patches
|
|
# In that case, report system as healthy with info message to proceed
|
|
self._save_precheck_result(release_version, healthy=True)
|
|
msg_info = f"No deploy-precheck script available for patch version {release_version}"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=True)
|
|
|
|
if not os.path.isfile(precheck_script):
|
|
msg = "Release files for deployment %s are not present on the system, " \
|
|
"cannot proceed with the precheck." % release_version
|
|
LOG.error(msg)
|
|
msg_error = "Fail to perform deploy precheck. " \
|
|
"Uploaded release may have been damaged. " \
|
|
"Try delete and re-upload the release.\n"
|
|
self._save_precheck_result(release_version, healthy=False)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if self.pre_bootstrap and not patch:
|
|
# Deploy precheck should be avoided in case of major release.
|
|
msg_info = "Major release precheck is not valid in pre bootstrap scenario.\n"
|
|
self._save_precheck_result(release_version, healthy=True)
|
|
return dict(info=msg_info, warning=msg_info, error=msg_error, system_healthy=True)
|
|
|
|
if self.pre_bootstrap and not force:
|
|
# Deploy precheck may not be supported in prebootstrap environment if
|
|
# script access any of services like sysinv, keystone, etc.
|
|
msg_warning = "Pre-bootstrap environment may not support deploy precheck.\n" \
|
|
"Use --force option to execute deploy precheck script.\n"
|
|
self._save_precheck_result(release_version, healthy=True)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=True)
|
|
|
|
deploy_in_progress = self._get_software_upgrade()
|
|
|
|
# parse local config file to pass parameters to precheck script
|
|
try:
|
|
cp = configparser.ConfigParser(interpolation=None)
|
|
cp.read(constants.SOFTWARE_CONFIG_FILE_LOCAL)
|
|
ks_section = dict(cp["keystone_authtoken"]) if cp.has_section("keystone_authtoken") else {}
|
|
auth_url = ks_section.get("auth_url")
|
|
username = ks_section.get("username")
|
|
password = ks_section.get("password")
|
|
project_name = ks_section.get("project_name")
|
|
user_domain_name = ks_section.get("user_domain_name")
|
|
project_domain_name = ks_section.get("project_domain_name")
|
|
except Exception as e:
|
|
msg = "Error parsing config file: %s." % str(e)
|
|
LOG.error(msg)
|
|
msg_error = "Fail to perform deploy precheck. Internal error has occured." \
|
|
"Try lock and unlock the controller for recovery.\n"
|
|
self._save_precheck_result(release_version, healthy=False)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# Get releases info required for precheck
|
|
releases = self.software_release_query_cached()
|
|
|
|
preinstalled_patches = []
|
|
for release in releases:
|
|
if release['prepatched_iso']:
|
|
preinstalled_patches = release.get('preinstalled_patches', [])
|
|
break
|
|
|
|
for release in releases:
|
|
keys_to_delete = ['packages', 'summary', 'description',
|
|
'install_instructions', 'warnings', 'component']
|
|
for key in keys_to_delete:
|
|
del release[key]
|
|
|
|
# remove patch from requires if present in preinstalled_patches
|
|
if preinstalled_patches:
|
|
requires = release.get('requires', [])
|
|
common = set(requires) & set(preinstalled_patches)
|
|
if common:
|
|
release['requires'] = [id for id in requires if id not in common]
|
|
LOG.info("Removed %s from %s requires list, since these are prepatched"
|
|
% (common, release['release_id']))
|
|
|
|
cmd = [precheck_script,
|
|
"--auth_url=%s" % auth_url,
|
|
"--username=%s" % username,
|
|
"--password=%s" % password,
|
|
"--project_name=%s" % project_name,
|
|
"--user_domain_name=%s" % user_domain_name,
|
|
"--project_domain_name=%s" % project_domain_name,
|
|
"--region_name=%s" % region_name,
|
|
"--releases=%s" % json.dumps(releases),
|
|
"--options=%s" % json.dumps(kwargs.get("options", {})),
|
|
"--deploy_in_progress=%s" % json.dumps(deploy_in_progress)]
|
|
if force:
|
|
cmd.append("--force")
|
|
if patch:
|
|
cmd.append("--patch")
|
|
|
|
# Call precheck from the deployment files
|
|
precheck_return = subprocess.run(
|
|
cmd,
|
|
stderr=subprocess.STDOUT,
|
|
stdout=subprocess.PIPE,
|
|
check=False,
|
|
text=True,
|
|
)
|
|
system_healthy = None
|
|
if precheck_return.returncode in [constants.RC_SUCCESS, constants.RC_UNHEALTHY]:
|
|
system_healthy = precheck_return.returncode == constants.RC_SUCCESS
|
|
self._save_precheck_result(release_version, healthy=system_healthy)
|
|
msg_info += precheck_return.stdout
|
|
else:
|
|
self._save_precheck_result(release_version, healthy=False)
|
|
msg_error += precheck_return.stdout
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error, system_healthy=system_healthy)
|
|
|
|
def _get_release_additional_info(self, release):
|
|
"""
|
|
Get additional information related to release in precheck api.
|
|
:return: dict with release info.
|
|
"""
|
|
release_info = {}
|
|
running_release = self.release_collection.running_release
|
|
|
|
release_info["major_release"] = utils.is_upgrade_deploy(SW_VERSION, release.sw_release)
|
|
release_info["reboot_required"] = release.reboot_required
|
|
release_info["prepatched_iso"] = release.prepatched_iso
|
|
release_info["apply_operation"] = release > running_release
|
|
|
|
return release_info
|
|
|
|
def software_deploy_precheck_api(self, deployment: str, force: bool = False, region_name=None,
|
|
**kwargs) -> dict:
|
|
"""
|
|
Verify if system satisfy the requisites to upgrade to a specified deployment.
|
|
:param deployment: full release name, e.g. starlingx-MM.mm.pp
|
|
:param force: if True will ignore minor alarms during precheck
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
release = self._release_basic_checks(deployment)
|
|
release_version = release.sw_release
|
|
|
|
# Check fields (MM.mm) of release_version to set patch flag
|
|
is_patch = (not utils.is_upgrade_deploy(SW_VERSION, release_version))
|
|
if not is_patch and socket.gethostname() != constants.CONTROLLER_0_HOSTNAME:
|
|
raise SoftwareServiceError(f"Deploy precheck for major releases needs to be executed in"
|
|
f" {constants.CONTROLLER_0_HOSTNAME} host.")
|
|
if kwargs.get("options"):
|
|
kwargs["options"] = self._parse_and_sanitize_extra_options(kwargs.get("options"))
|
|
ret = self._deploy_precheck(release_version, force, region_name, is_patch, **kwargs)
|
|
if ret:
|
|
if ret.get("system_healthy") is None:
|
|
ret["error"] = "Fail to perform deploy precheck. Internal error has occurred.\n" + \
|
|
ret.get("error")
|
|
elif not ret.get("system_healthy"):
|
|
ret["error"] = "The following issues have been detected, which prevent " \
|
|
"deploying %s\n" % deployment + ret.get("info")
|
|
release_info = self._get_release_additional_info(release)
|
|
ret.update(release_info)
|
|
return ret
|
|
|
|
def _deploy_upgrade_start(self, to_release, commit_id, **kwargs):
|
|
LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION))
|
|
deploy_script_name = constants.DEPLOY_START_SCRIPT
|
|
cmd_path = utils.get_software_deploy_script(to_release, deploy_script_name)
|
|
if not os.path.isfile(cmd_path):
|
|
msg = f"{deploy_script_name} was not found"
|
|
LOG.error(msg)
|
|
raise SoftwareServiceError(f"{deploy_script_name} was not found. "
|
|
"The uploaded software could have been damaged. "
|
|
"Please delete the software and re-upload it")
|
|
major_to_release = utils.get_major_release_version(to_release)
|
|
k8s_ver = get_k8s_ver()
|
|
postgresql_port = str(cfg.alt_postgresql_port)
|
|
feed = os.path.join(constants.FEED_DIR,
|
|
"rel-%s/ostree_repo" % major_to_release)
|
|
|
|
LOG.info("k8s version %s" % k8s_ver)
|
|
upgrade_start_cmd = [cmd_path, SW_VERSION, major_to_release, k8s_ver, postgresql_port,
|
|
feed]
|
|
|
|
upgrade_start_cmd.append(commit_id if commit_id is not None else 0)
|
|
upgrade_start_cmd.append(json.dumps(kwargs.get("options")) if kwargs.get("options") is not None else "")
|
|
# pass in keystone auth through environment variables
|
|
# OS_AUTH_URL, OS_USERNAME, OS_PASSWORD, OS_PROJECT_NAME, OS_USER_DOMAIN_NAME,
|
|
# OS_PROJECT_DOMAIN_NAME, OS_REGION_NAME are in env variables.
|
|
keystone_auth = CONF.get('keystone_authtoken')
|
|
env = {}
|
|
env["OS_AUTH_URL"] = keystone_auth["auth_url"] + '/v3'
|
|
env["OS_USERNAME"] = keystone_auth["username"]
|
|
env["OS_PASSWORD"] = keystone_auth["password"]
|
|
env["OS_PROJECT_NAME"] = keystone_auth["project_name"]
|
|
env["OS_USER_DOMAIN_NAME"] = keystone_auth["user_domain_name"]
|
|
env["OS_PROJECT_DOMAIN_NAME"] = keystone_auth["project_domain_name"]
|
|
env["OS_REGION_NAME"] = keystone_auth["region_name"]
|
|
env["IGNORE_ERRORS"] = self.ignore_errors
|
|
|
|
try:
|
|
LOG.info("starting subprocess %s" % ' '.join(upgrade_start_cmd))
|
|
subprocess.Popen(upgrade_start_cmd, start_new_session=True, shell=False, env=env)
|
|
LOG.info("subprocess started")
|
|
return True
|
|
except subprocess.SubprocessError as e:
|
|
LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_start_cmd), e))
|
|
return False
|
|
|
|
def deploy_state_changed(self, new_state):
|
|
'''Handle 'deploy state change' event, invoked when operations complete. '''
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
state_event = {
|
|
DEPLOY_STATES.START_DONE: deploy_state.start_done,
|
|
DEPLOY_STATES.START_FAILED: deploy_state.start_failed,
|
|
DEPLOY_STATES.ACTIVATE_DONE: deploy_state.activate_done,
|
|
DEPLOY_STATES.ACTIVATE_FAILED: deploy_state.activate_failed,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE: deploy_state.activate_rollback_done,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED: deploy_state.activate_rollback_failed,
|
|
DEPLOY_STATES.HOST_FAILED: deploy_state.deploy_host_failed
|
|
}
|
|
if new_state in state_event:
|
|
state_event[new_state]()
|
|
else:
|
|
msg = f"Received invalid deploy state update {deploy_state}"
|
|
LOG.error(msg)
|
|
|
|
def host_deploy_state_changed(self, hostname, host_deploy_state):
|
|
'''Handle 'host deploy state change' event. '''
|
|
|
|
deploy_host_state = DeployHostState(hostname)
|
|
state_event = {
|
|
DEPLOY_HOST_STATES.FAILED: deploy_host_state.failed
|
|
}
|
|
if host_deploy_state in state_event:
|
|
state_event[host_deploy_state]()
|
|
else:
|
|
msg = f"Received invalid deploy host state update {host_deploy_state}"
|
|
LOG.error(msg)
|
|
|
|
def add_text_tag_to_xml(self, parent, tag, text):
|
|
'''Add text to tag. Create it if it does not exist'''
|
|
element = parent.find(tag)
|
|
if element is None:
|
|
element = ET.SubElement(parent, tag)
|
|
element.text = text
|
|
return element
|
|
|
|
def is_deployment_list_reboot_required(self, deployment_list):
|
|
"""Check if any deploy in deployment list is reboot required"""
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release.reboot_required:
|
|
return True
|
|
return False
|
|
|
|
def copy_patch_activate_scripts(self, release_id, activate_scripts_list):
|
|
"""Copy patch activate scripts to /etc/update.d"""
|
|
|
|
try:
|
|
existing_scripts = list(os.listdir(PATCH_MIGRATION_SCRIPT_DIR))
|
|
|
|
for script in activate_scripts_list:
|
|
full_name_file = "%s_%s" % (release_id, script)
|
|
script_path = "%s/%s" % (root_scripts_dir, full_name_file)
|
|
dest_script_file = "%s/%s" % (PATCH_MIGRATION_SCRIPT_DIR, script)
|
|
|
|
# Do not copy if script already exists in folder
|
|
if script in existing_scripts:
|
|
msg = "Script %s already exists in %s. Skipping copy" \
|
|
% (script, PATCH_MIGRATION_SCRIPT_DIR)
|
|
LOG.info(msg)
|
|
continue
|
|
|
|
shutil.copyfile(script_path, dest_script_file)
|
|
os.chmod(dest_script_file, 0o755)
|
|
msg = "Creating patch activate script %s for %s" \
|
|
% (full_name_file, release_id)
|
|
LOG.info(msg)
|
|
except shutil.Error:
|
|
msg = "Failed to copy patch activate script %s for %s" \
|
|
% (full_name_file, release_id)
|
|
LOG.exception(msg)
|
|
raise SoftwareError(msg)
|
|
|
|
def delete_all_patch_activate_scripts(self):
|
|
"""Delete all patch activate scripts in /etc/update.d"""
|
|
if os.path.exists(PATCH_MIGRATION_SCRIPT_DIR):
|
|
for script_name in os.listdir(PATCH_MIGRATION_SCRIPT_DIR):
|
|
script_path = os.path.join(PATCH_MIGRATION_SCRIPT_DIR, script_name)
|
|
try:
|
|
os.remove(script_path)
|
|
msg = "Deleted patch script: %s" % script_path
|
|
LOG.info(msg)
|
|
except Exception as e:
|
|
msg = "Failed to delete patch script %s. Reason: %s" % (script_path, e)
|
|
LOG.error(msg)
|
|
|
|
def _run_start_script(self, script_name, release_id, operation):
|
|
"""Run pre_start or post_start scripts"""
|
|
script_path = os.path.join(root_scripts_dir, f"{release_id}_{script_name}")
|
|
|
|
if os.path.isfile(script_path):
|
|
LOG.info("Running %s script", script_name)
|
|
try:
|
|
output = subprocess.check_output(
|
|
["sudo", script_path, f"--operation={operation}"],
|
|
stderr=subprocess.STDOUT,
|
|
text=True
|
|
)
|
|
LOG.info("%s output:\n%s" % (script_name, output.strip()))
|
|
except subprocess.CalledProcessError as e:
|
|
msg = "Failed to execute %s for release %s." % (script_name, release_id)
|
|
LOG.exception(msg)
|
|
LOG.error("Command output: %s", e.output)
|
|
raise SoftwareError(msg)
|
|
else:
|
|
LOG.warning("Script %s not found", script_name)
|
|
|
|
def cleanup_old_releases(self, target_commit, all_commits):
|
|
index = 0
|
|
to_delete_releases = []
|
|
|
|
while index < len(all_commits) and target_commit != all_commits[index]:
|
|
to_delete_release = self.release_collection.get_release_by_commit_id(all_commits[index])
|
|
if to_delete_release:
|
|
to_delete_releases.append(to_delete_release.id)
|
|
LOG.info("Deleting %s not used after prestage" % to_delete_release.id)
|
|
index += 1
|
|
|
|
# Delete metadata and all associated release files
|
|
self.software_release_delete_api(to_delete_releases)
|
|
|
|
def install_releases_thread(self, deployment_list, feed_repo, upgrade=False, **kwargs):
|
|
"""
|
|
In a separated thread.
|
|
Install the debian packages, create the commit and update the metadata.
|
|
IF it's an upgrade, also run the upgrade script
|
|
"""
|
|
def run():
|
|
LOG.info("Installing releases on repo: %s" % feed_repo)
|
|
|
|
try:
|
|
deploy_sw_version = None
|
|
for release_id in deployment_list:
|
|
msg = "Starting deployment for: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
deploy_release = self._release_basic_checks(release_id)
|
|
self.copy_patch_activate_scripts(release_id, deploy_release.activation_scripts)
|
|
|
|
# Run pre_start script
|
|
self._run_start_script(deploy_release.pre_start, release_id, constants.APPLY)
|
|
# Reload release in case pre_start script made some change
|
|
reload_release_data()
|
|
deploy_release = self._release_basic_checks(release_id)
|
|
|
|
deploy_sw_version = deploy_release.sw_version
|
|
|
|
all_commits = ostree_utils.get_all_feed_commits(deploy_release.sw_version)
|
|
latest_commit = all_commits[0]
|
|
target_commit = deploy_release.commit_id
|
|
if target_commit in all_commits:
|
|
# This case is for node with prestaged data where ostree
|
|
# commits have been pulled from system controller
|
|
LOG.info("Commit %s already exists in feed repo for release %s"
|
|
% (deploy_release.commit_id, release_id))
|
|
|
|
# If this is the last deployment, and it is not the latest commit in feed
|
|
# delete the commits until reach this, and delete metadatas
|
|
if release_id == deployment_list[-1] and target_commit != latest_commit:
|
|
self.cleanup_old_releases(target_commit, all_commits)
|
|
|
|
# Reset feed to last deployment release
|
|
self.reset_feed_commit(deploy_release)
|
|
|
|
continue
|
|
|
|
packages = [pkg.split("_")[0] for pkg in deploy_release.packages]
|
|
if packages is None:
|
|
msg = "Unable to determine packages to install"
|
|
LOG.error(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
# Install debian package through apt-ostree
|
|
try:
|
|
apt_utils.run_install(
|
|
feed_repo,
|
|
deploy_release.sw_version,
|
|
deploy_release.sw_release,
|
|
packages)
|
|
except APTOSTreeCommandFail:
|
|
msg = "Failed to install Debian packages."
|
|
LOG.exception(msg)
|
|
raise APTOSTreeCommandFail(msg)
|
|
|
|
# Get the latest commit after performing "apt-ostree install".
|
|
self.latest_feed_commit = \
|
|
ostree_utils.get_feed_latest_commit(deploy_release.sw_version)
|
|
|
|
deploystate = deploy_release.state
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[deploystate]
|
|
metadata_file = "%s/%s-metadata.xml" % (metadata_dir, release_id)
|
|
|
|
reload_release_data()
|
|
# NOTE(bqian) Below check an exception raise should be revisit, if applicable,
|
|
# should be applied to the begining of all requests.
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
|
|
with self.hosts_lock:
|
|
self.interim_state[release_id] = list(self.hosts)
|
|
|
|
self.latest_feed_commit = \
|
|
ostree_utils.get_feed_latest_commit(deploy_release.sw_version)
|
|
|
|
# Update metadata
|
|
tree = ET.parse(metadata_file)
|
|
root = tree.getroot()
|
|
|
|
contents = ET.SubElement(root, constants.CONTENTS_TAG)
|
|
ostree = ET.SubElement(contents, constants.OSTREE_TAG)
|
|
self.add_text_tag_to_xml(ostree, constants.NUMBER_OF_COMMITS_TAG, "1")
|
|
base = ET.SubElement(ostree, constants.BASE_TAG)
|
|
self.add_text_tag_to_xml(base, constants.COMMIT_TAG, latest_commit)
|
|
self.add_text_tag_to_xml(base, constants.CHECKSUM_TAG, "")
|
|
commit1 = ET.SubElement(ostree, constants.COMMIT1_TAG)
|
|
self.add_text_tag_to_xml(commit1, constants.COMMIT_TAG, self.latest_feed_commit)
|
|
self.add_text_tag_to_xml(commit1, constants.CHECKSUM_TAG, "")
|
|
|
|
ET.indent(tree, ' ')
|
|
with open(metadata_file, "wb") as outfile:
|
|
tree = ET.tostring(root)
|
|
outfile.write(tree)
|
|
|
|
LOG.info("Latest feed commit: %s added to metadata file" % self.latest_feed_commit)
|
|
|
|
# Run post_start script
|
|
self._run_start_script(deploy_release.post_start, release_id, constants.APPLY)
|
|
|
|
# In prepatched add tombstone
|
|
ostree_utils.add_tombstone_commit_if_prepatched(constants.OSTREE_REF, feed_repo)
|
|
|
|
# Update the feed ostree summary
|
|
ostree_utils.update_repo_summary_file(feed_repo)
|
|
self.latest_feed_commit = ostree_utils.get_feed_latest_commit(deploy_sw_version)
|
|
|
|
self.send_latest_feed_commit_to_agent()
|
|
self.software_sync()
|
|
|
|
if upgrade:
|
|
base_deployment = deployment_list[0]
|
|
base_release = self._release_basic_checks(base_deployment)
|
|
upgrade_commit_id = base_release.commit_id
|
|
if self._deploy_upgrade_start(base_release.sw_release, upgrade_commit_id, **kwargs):
|
|
LOG.info("Finished releases %s deploy start" % deployment_list)
|
|
else:
|
|
raise ValueError("_deploy_upgrade_start failed")
|
|
else:
|
|
# move the deploy state to start-done
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_done(self.latest_feed_commit)
|
|
LOG.info("Finished releases %s deploy start" % deployment_list)
|
|
|
|
except Exception as e:
|
|
msg = "Deploy start applying failed: %s" % str(e)
|
|
LOG.exception(msg)
|
|
audit_log_info(msg)
|
|
|
|
try:
|
|
# set state to failed
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_failed()
|
|
except Exception as e:
|
|
msg = "Unable to set deploy failed: %s" % str(e)
|
|
LOG.exception(msg)
|
|
audit_log_info(msg)
|
|
|
|
thread = threading.Thread(target=run)
|
|
thread.start()
|
|
|
|
def _precheck_before_start(self, deployment, release_version, is_patch, force=False, **kwargs):
|
|
LOG.info("Running deploy precheck.")
|
|
precheck_result = self._deploy_precheck(release_version, patch=is_patch, force=force, **kwargs)
|
|
if precheck_result.get('system_healthy') is None:
|
|
precheck_result["error"] = (
|
|
f"Fail to perform deploy precheck. Internal error has occurred.\n"
|
|
f"{precheck_result['error']}"
|
|
)
|
|
return precheck_result
|
|
elif precheck_result.get('system_healthy') is False:
|
|
precheck_result["error"] = (
|
|
f"The following issues have been detected, which prevent deploying {deployment}\n"
|
|
f"{precheck_result['info']}\n"
|
|
"Please fix above issues then retry the deploy.\n"
|
|
)
|
|
return precheck_result
|
|
return None
|
|
|
|
def _get_precheck_result_file_path(self, release_version):
|
|
return os.path.join("/opt/software/", f"rel-{release_version}", "precheck-result.json")
|
|
|
|
def _safe_remove_precheck_result_file(self, release_version):
|
|
precheck_result_file = self._get_precheck_result_file_path(release_version)
|
|
if os.path.isfile(precheck_result_file):
|
|
os.remove(precheck_result_file)
|
|
|
|
def _save_precheck_result(self, release_version, healthy):
|
|
precheck_result_file = self._get_precheck_result_file_path(release_version)
|
|
with open(precheck_result_file, "w") as f:
|
|
json.dump({"healthy": healthy, "timestamp": time.time()}, f)
|
|
|
|
def _should_run_precheck_prior_deploy_start(self, release_version, force, is_patch, **kwargs):
|
|
# there is not precheck script in this state
|
|
if self.pre_bootstrap:
|
|
return False
|
|
|
|
# we should be able to patch an unhealthy system ignoring the unhealthy state
|
|
if is_patch and force:
|
|
return False
|
|
|
|
file_path = self._get_precheck_result_file_path(release_version)
|
|
if not os.path.isfile(file_path):
|
|
LOG.info("The precheck result file %s does not exist." % file_path)
|
|
return True
|
|
|
|
if kwargs:
|
|
return True
|
|
|
|
with open(file_path) as f:
|
|
last_result = json.load(f)
|
|
|
|
if time.time() - last_result["timestamp"] > constants.PRECHECK_RESULT_VALID_PERIOD:
|
|
LOG.info("The precheck result expired.")
|
|
return True
|
|
|
|
return not last_result["healthy"]
|
|
|
|
@require_deploy_state([None],
|
|
"There is already a deployment in progress ({state.value}). "
|
|
"Please complete/delete the current deployment.")
|
|
def software_deploy_start_api(self, deployment: str, force: bool, **kwargs) -> dict:
|
|
"""
|
|
to start deploy of a specified release.
|
|
The operation implies deploying all undeployed dependency releases of
|
|
the specified release. i.e, to deploy release 24.09.1, it implies
|
|
deploying 24.09.0 and 24.09.1 when 24.09.0 has not been deployed.
|
|
The operation includes steps:
|
|
1. find all undeployed dependency releases
|
|
2. ensure all releases (dependency and specified release) are ready to deployed
|
|
3. precheck, if last precheck was not executed or if was executed and failed or
|
|
if precheck result expired
|
|
4. transform all involved releases to deploying state
|
|
5. start the deploy subprocess
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
deploy_release = self._release_basic_checks(deployment)
|
|
|
|
running_release = self.release_collection.running_release
|
|
deploy_sw_version = deploy_release.sw_version # MM.mm
|
|
is_patch = (not utils.is_upgrade_deploy(SW_VERSION, deploy_sw_version))
|
|
|
|
# pre-bootstrap patch removal case
|
|
if not self.pre_bootstrap:
|
|
if (not is_patch) and socket.gethostname() != constants.CONTROLLER_0_HOSTNAME:
|
|
raise SoftwareServiceError(f"Deploy start for major releases needs to be executed in "
|
|
f"{constants.CONTROLLER_0_HOSTNAME} host.")
|
|
|
|
feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, deploy_sw_version)
|
|
commit_id = deploy_release.commit_id
|
|
# Set hostname in case of local install
|
|
hostname = None
|
|
if self.pre_bootstrap:
|
|
hostname = constants.PREBOOTSTRAP_HOSTNAME
|
|
elif self.install_local:
|
|
hostname = socket.gethostname()
|
|
valid_hostnames = [constants.CONTROLLER_0_HOSTNAME, constants.CONTROLLER_1_HOSTNAME]
|
|
if hostname not in valid_hostnames:
|
|
LOG.warning("Using unknown hostname for local install: %s", hostname)
|
|
|
|
to_release = deploy_release.sw_release
|
|
if kwargs.get("options"):
|
|
kwargs["options"] = self._parse_and_sanitize_extra_options(kwargs.get("options"))
|
|
if self._should_run_precheck_prior_deploy_start(to_release, force, is_patch, **kwargs):
|
|
LOG.info("Executing software deploy precheck prior to software deploy start")
|
|
if precheck_result := self._precheck_before_start(
|
|
deployment,
|
|
to_release,
|
|
is_patch=is_patch,
|
|
force=force,
|
|
**kwargs
|
|
):
|
|
return precheck_result
|
|
self._safe_remove_precheck_result_file(to_release)
|
|
|
|
# Patch operation: 'deploy release' major version equals 'running release' major version (MM.mm)
|
|
|
|
# TODO(bqian) update references of sw_release (string) to SWRelease object
|
|
|
|
if deploy_release > running_release:
|
|
operation = constants.APPLY
|
|
elif running_release > deploy_release:
|
|
operation = constants.REMOVE
|
|
else:
|
|
# NOTE(bqian) The error message doesn't seem right. software version format
|
|
# or any metadata semantic check should be done during upload. If data
|
|
# invalid found subsequently, data is considered damaged, should recommend
|
|
# delete and re-upload
|
|
msg_error += "The software version format for this release is not correct.\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# NOTE(bqian) shouldn't that patch release deploy and remove are doing the same thing
|
|
# in terms of ostree commit, that it deploy to a commit specified by the commit-id that
|
|
# associated to the release from the deploy start command?
|
|
# If releases are such that:
|
|
# R2 requires R1, R3 requires R2, R4 requires R3
|
|
# If current running release is R2 and command issued is "software deploy start R4"
|
|
# operation is "apply" with order [R3, R4]
|
|
# If current running release is R4 and command issued is "software deploy start R2"
|
|
# operation is "remove" with order [R4, R3]
|
|
if operation == constants.APPLY:
|
|
deployment_list = self.release_apply_order(deployment, deploy_sw_version)
|
|
|
|
collect_current_load_for_hosts(deploy_sw_version, hostname=hostname)
|
|
create_deploy_hosts(hostname=hostname)
|
|
|
|
msg = "Deploy start order for apply operation: %s" % ",".join(deployment_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# todo(jcasteli) Do we need this block below?
|
|
# Check for patches that can't be applied during an upgrade
|
|
upgrade_check = True
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release.sw_version != SW_VERSION and release.apply_active_release_only == "Y":
|
|
msg = "%s cannot be created during an upgrade" % release_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
upgrade_check = False
|
|
|
|
if not upgrade_check:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skip-semantic") != "yes":
|
|
self.run_semantic_check(constants.SEMANTIC_PREAPPLY, deployment_list)
|
|
|
|
running_release = self.release_collection.running_release
|
|
to_deploy_release_id = deployment_list[-1]
|
|
to_deploy_release = self.release_collection.get_release_by_id(to_deploy_release_id)
|
|
reboot_required = self.is_deployment_list_reboot_required(deployment_list)
|
|
|
|
collect_current_load_for_hosts(to_deploy_release.sw_version, hostname=hostname)
|
|
release_state = ReleaseState(release_ids=deployment_list)
|
|
release_state.start_deploy()
|
|
|
|
# Setting deploy state to start, so that it can transition to start-done or start-failed
|
|
deploy_state = DeployState.get_instance()
|
|
to_release = to_deploy_release.sw_release
|
|
if is_patch:
|
|
deploy_state.start(running_release, to_release, feed_repo, None, reboot_required)
|
|
else:
|
|
deploy_state.start(running_release, to_release, feed_repo, commit_id,
|
|
reboot_required, **kwargs)
|
|
|
|
# Start applying the releases
|
|
upgrade = not is_patch
|
|
self.install_releases_thread(deployment_list, feed_repo, upgrade, **kwargs)
|
|
|
|
msg_info += "%s is now starting, await for the states: " \
|
|
"[deploy-start-done | deploy-start-failed] in " \
|
|
"'software deploy show'\n" % deployment_list
|
|
|
|
elif operation == constants.REMOVE:
|
|
if deploy_release.state == states.UNAVAILABLE:
|
|
msg = "Cannot go back to an unavailable release"
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
collect_current_load_for_hosts(deploy_sw_version, hostname=hostname)
|
|
create_deploy_hosts(hostname=hostname)
|
|
deployment_list = self.release_remove_order(deployment, running_release.id, running_release.sw_version)
|
|
|
|
msg = "Deploy start order for remove operation: %s" % ",".join(deployment_list)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
remove_unremovable = False
|
|
|
|
if kwargs.get("removeunremovable") == "yes":
|
|
remove_unremovable = True
|
|
|
|
# See if any of the patches are marked as unremovable
|
|
unremovable_verification = True
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release.unremovable:
|
|
if remove_unremovable:
|
|
msg = "Unremovable release %s being removed" % release_id
|
|
LOG.warning(msg)
|
|
msg_warning = msg + "\n"
|
|
else:
|
|
msg = "Release %s is not removable" % release_id
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
unremovable_verification = False
|
|
elif release.state == states.COMMITTED or release.state == states.UNAVAILABLE:
|
|
msg = "Release %s is %s and cannot be removed" % (release_id, release.state)
|
|
LOG.error(msg)
|
|
msg_error += msg + "\n"
|
|
unremovable_verification = False
|
|
|
|
if not unremovable_verification:
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skipappcheck") != "yes":
|
|
# Check application dependencies before removing
|
|
required_releases = {}
|
|
for release in deployment_list:
|
|
for appname, iter_release_list in self.app_dependencies.items():
|
|
if release in iter_release_list:
|
|
if release not in required_releases:
|
|
required_releases[release] = []
|
|
required_releases[release].append(appname)
|
|
|
|
if len(required_releases) > 0:
|
|
for req_release, app_list in required_releases.items():
|
|
msg = "%s is required by application(s): %s" % (req_release, ", ".join(sorted(app_list)))
|
|
msg_error += msg + "\n"
|
|
LOG.info(msg)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
if kwargs.get("skip-semantic") != "yes":
|
|
self.run_semantic_check(constants.SEMANTIC_PREREMOVE, deployment_list)
|
|
|
|
collect_current_load_for_hosts(deploy_sw_version, hostname=hostname)
|
|
release_state = ReleaseState(release_ids=deployment_list)
|
|
release_state.start_remove()
|
|
|
|
reboot_required = self.is_deployment_list_reboot_required(deployment_list)
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
to_release = deploy_release.sw_release
|
|
deploy_state.start(running_release, to_release, feed_repo, commit_id, reboot_required)
|
|
|
|
try:
|
|
for release_id in deployment_list:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
msg = "Removing release: %s" % release_id
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
# Run pre_start script
|
|
self._run_start_script(release.pre_start, release_id, constants.REMOVE)
|
|
# Reload release in case pre_start script made some change
|
|
reload_release_data()
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
|
|
if release.state == states.AVAILABLE:
|
|
msg = "The deployment for %s has not been created" % release_id
|
|
LOG.info(msg)
|
|
msg_info += msg + "\n"
|
|
continue
|
|
|
|
self.copy_patch_activate_scripts(release_id, release.activation_scripts)
|
|
|
|
major_release_sw_version = release.sw_version
|
|
# this is an ostree patch
|
|
# Base commit is fetched from the patch metadata.
|
|
base_commit = release.base_commit_id
|
|
feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_release_sw_version)
|
|
try:
|
|
# Reset the ostree HEAD
|
|
ostree_utils.reset_ostree_repo_head(base_commit, feed_repo)
|
|
|
|
# Delete all commits that belong to this release
|
|
# NOTE(bqian) there should be just one commit per release.
|
|
commit_to_delete = release.commit_id
|
|
ostree_utils.delete_ostree_repo_commit(commit_to_delete, feed_repo)
|
|
|
|
# Update the feed ostree summary
|
|
ostree_utils.update_repo_summary_file(feed_repo)
|
|
|
|
except OSTreeCommandFail:
|
|
LOG.exception("Failure while removing release %s.", release_id)
|
|
|
|
# Remove contents tag from metadata xml
|
|
self.remove_tags_from_metadata(release, constants.CONTENTS_TAG)
|
|
|
|
try:
|
|
# Move the metadata to the deleted dir
|
|
self.release_collection.update_state([release_id], states.REMOVING)
|
|
msg_info += "%s has been removed from the repo\n" % release_id
|
|
except shutil.Error:
|
|
msg = "Failed to move the metadata for %s" % release_id
|
|
LOG.Error(msg)
|
|
raise MetadataFail(msg)
|
|
|
|
if len(self.hosts) == 0:
|
|
msg = "service is running in incorrect state. No registered host"
|
|
raise InternalError(msg)
|
|
|
|
# only update lastest_feed_commit if it is an ostree patch
|
|
if release.base_commit_id is not None:
|
|
# Base Commit in this release's metadata.xml file represents the latest commit
|
|
# after this release has been removed from the feed repo
|
|
self.latest_feed_commit = release.base_commit_id
|
|
|
|
with self.hosts_lock:
|
|
self.interim_state[release_id] = list(self.hosts)
|
|
|
|
# Run post_start script
|
|
self._run_start_script(release.post_start, release_id, constants.REMOVE)
|
|
|
|
# In prepatched add tombstone
|
|
if ostree_utils.add_tombstone_commit_if_prepatched(constants.OSTREE_REF, feed_repo):
|
|
ostree_utils.update_repo_summary_file(feed_repo)
|
|
|
|
# There is no defined behavior for deploy start for patching releases, so
|
|
# move the deploy state to start-done
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_done(self.latest_feed_commit)
|
|
|
|
self.send_latest_feed_commit_to_agent()
|
|
self.software_sync()
|
|
except Exception as e:
|
|
msg_error = "Deploy start removing failed"
|
|
msg = "%s: %s" % (msg_error, e)
|
|
LOG.exception(msg)
|
|
audit_log_info(msg)
|
|
|
|
# set state to failed
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.start_failed()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def remove_tags_from_metadata(self, release, tag):
|
|
LOG.info("Removing %s tag from %s metadata" % (tag, release.id))
|
|
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[release.state]
|
|
metadata_path = "%s/%s-metadata.xml" % (metadata_dir, release.id)
|
|
tree = ET.parse(metadata_path)
|
|
root = tree.getroot()
|
|
metadata_tag = root.find(tag)
|
|
|
|
if metadata_tag is not None:
|
|
root.remove(metadata_tag)
|
|
|
|
ET.indent(tree, ' ')
|
|
with open(metadata_path, "wb") as outfile:
|
|
tree = ET.tostring(root)
|
|
outfile.write(tree)
|
|
|
|
def execute_delete_actions(self):
|
|
deploy = self.db_api_instance.get_current_deploy()
|
|
to_release = deploy.get("to_release")
|
|
from_release = deploy.get("from_release")
|
|
|
|
delete_cmd = f"/usr/bin/software-deploy-delete {from_release} {to_release} --is_major_release"
|
|
|
|
runner = DeployPluginRunner(deploy)
|
|
runner.execute(delete_cmd)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.HOST_ROLLBACK_DONE, DEPLOY_STATES.COMPLETED, DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED],
|
|
"Deploy must be in the following states to be able to delete: %s, %s, %s, %s" % (
|
|
DEPLOY_STATES.HOST_ROLLBACK_DONE.value, DEPLOY_STATES.COMPLETED.value,
|
|
DEPLOY_STATES.START_DONE.value, DEPLOY_STATES.START_FAILED.value))
|
|
def software_deploy_delete_api(self) -> dict:
|
|
"""
|
|
Delete deployment and the data generated during the deploy.
|
|
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy = self.db_api_instance.get_current_deploy()
|
|
to_release = deploy.get("to_release")
|
|
from_release = deploy.get("from_release")
|
|
|
|
deploy_state_instance = DeployState.get_instance()
|
|
|
|
# except in early stages of the deployment, such as deploy start,
|
|
# hosts must be unlocked and online since during delete deployment
|
|
# a request is sent to all hosts to clear flags and temporary data
|
|
# created during the deployment procedure
|
|
if not self.pre_bootstrap:
|
|
if (deploy_state_instance.get_deploy_state() not in [DEPLOY_STATES.START_DONE,
|
|
DEPLOY_STATES.START_FAILED] and
|
|
not are_all_hosts_unlocked_and_online()):
|
|
msg = f"Hosts must be {constants.ADMIN_UNLOCKED} and {constants.AVAILABILITY_ONLINE}."
|
|
raise SoftwareServiceError(error=msg)
|
|
|
|
is_major_release = False
|
|
|
|
deploy_state = deploy_state_instance.get_deploy_state()
|
|
deploying_release_state = ReleaseState(release_state=states.DEPLOYING)
|
|
is_applying = deploying_release_state.has_release_id()
|
|
|
|
if deploy_state in [
|
|
DEPLOY_STATES.START_DONE, DEPLOY_STATES.START_FAILED, DEPLOY_STATES.COMPLETED]:
|
|
is_major_release = deploying_release_state.is_major_release_deployment() if is_applying else False
|
|
elif deploy_state == DEPLOY_STATES.HOST_ROLLBACK_DONE:
|
|
is_major_release = ReleaseState(
|
|
release_state=states.DEPLOYING).is_major_release_deployment()
|
|
|
|
# Only major release is required to be deleted on controller-0
|
|
# Patch deletion can take place on either controller
|
|
if is_major_release and self.hostname != constants.CONTROLLER_0_HOSTNAME:
|
|
raise SoftwareServiceError("Deploy delete can only be performed on controller-0.")
|
|
|
|
if DEPLOY_STATES.COMPLETED == deploy_state:
|
|
if is_applying:
|
|
major_release = utils.get_major_release_version(from_release)
|
|
# In case of a major release deployment set all the releases related to from_release to unavailable
|
|
if is_major_release:
|
|
unavailable_releases = []
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.sw_version == major_release:
|
|
unavailable_releases.append(release.id)
|
|
ReleaseState(release_ids=unavailable_releases).replaced()
|
|
else:
|
|
# After deploy a patch, delete previous unavailable releases
|
|
# from current major release, if they exist (subcloud prestage case)
|
|
unavailable_releases = list(self.release_collection.iterate_releases_by_state(states.UNAVAILABLE))
|
|
if unavailable_releases:
|
|
for unavailable in unavailable_releases:
|
|
if unavailable.sw_version == major_release:
|
|
LOG.info("Deleting unavailable %s from current major release" % unavailable.id)
|
|
|
|
try:
|
|
os.remove("%s/%s-metadata.xml" % (states.UNAVAILABLE_DIR, unavailable.id))
|
|
except OSError:
|
|
LOG.error("Error when deleting %s metadata")
|
|
|
|
reload_release_data()
|
|
|
|
# Change the lowest deployed release metadata to prepatched
|
|
# Applied releases are all from current major release wich commits are deployed in sysroot
|
|
applied_releases_states = {states.DEPLOYED, states.DEPLOYING, states.COMMITTED}
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.sw_version == major_release and release.state in applied_releases_states:
|
|
LOG.info("Changing %s metadata to prepatched" % release.id)
|
|
|
|
metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[release.state]
|
|
metadata_file = "%s/%s-metadata.xml" % (metadata_dir, release.id)
|
|
|
|
tree = ET.parse(metadata_file)
|
|
root = tree.getroot()
|
|
|
|
self.add_text_tag_to_xml(root, constants.PREPATCHED_ISO_TAG, "Y")
|
|
requires_tag = root.find(constants.REQUIRES_TAG)
|
|
if requires_tag is not None:
|
|
root.remove(requires_tag)
|
|
|
|
ET.indent(tree, ' ')
|
|
with open(metadata_file, "wb") as outfile:
|
|
tree = ET.tostring(root)
|
|
outfile.write(tree)
|
|
|
|
# Run just for the first (lowest) release
|
|
break
|
|
|
|
# Set deploying releases to deployed state.
|
|
deploying_release_state.deploy_completed()
|
|
else:
|
|
removing_release_state = ReleaseState(release_state=states.REMOVING)
|
|
removing_release_state.available()
|
|
|
|
elif DEPLOY_STATES.HOST_ROLLBACK_DONE == deploy_state:
|
|
major_release = utils.get_major_release_version(from_release)
|
|
release_state = ReleaseState(release_state=states.DEPLOYING)
|
|
release_state.available()
|
|
|
|
elif deploy_state in [DEPLOY_STATES.START_DONE, DEPLOY_STATES.START_FAILED]:
|
|
# TODO(bqian), this check is redundant. there should be no host deployed/deploying
|
|
# when deploy in START_DONE or START_FAILED states
|
|
hosts_states = []
|
|
for host in self.db_api_instance.get_deploy_host():
|
|
hosts_states.append(host.get("state"))
|
|
if (DEPLOY_HOST_STATES.DEPLOYED.value in hosts_states or
|
|
DEPLOY_HOST_STATES.DEPLOYING.value in hosts_states):
|
|
raise SoftwareServiceError(f"There are hosts already {DEPLOY_HOST_STATES.DEPLOYED.value} "
|
|
f"or in {DEPLOY_HOST_STATES.DEPLOYING.value} process")
|
|
|
|
if is_applying:
|
|
major_release = utils.get_major_release_version(to_release)
|
|
|
|
if is_major_release:
|
|
try:
|
|
# TODO(bqian) Move below function to a delete action
|
|
run_remove_temporary_data_script(to_release)
|
|
except subprocess.CalledProcessError as e:
|
|
msg_error = "Failed to delete deploy"
|
|
LOG.error("%s: %s" % (msg_error, e))
|
|
raise SoftwareServiceError(msg_error)
|
|
else:
|
|
deployment_list = deploying_release_state.get_release_ids()
|
|
for release in self.release_collection.iterate_releases():
|
|
if release.sw_release == from_release:
|
|
self.reset_feed_commit(release)
|
|
|
|
if release.id in deployment_list:
|
|
self.remove_tags_from_metadata(release, constants.CONTENTS_TAG)
|
|
|
|
deploying_release_state.available()
|
|
else:
|
|
msg_error = "Delete is not supported while removing a release"
|
|
LOG.error(msg_error)
|
|
raise SoftwareServiceError(msg_error)
|
|
|
|
if os.path.isfile(INSTALL_LOCAL_FLAG):
|
|
# Remove install local flag if enabled
|
|
try:
|
|
os.remove(INSTALL_LOCAL_FLAG)
|
|
except Exception:
|
|
msg_error = "Failed to clear install-local mode flag"
|
|
LOG.error(msg_error)
|
|
raise SoftwareServiceError(msg_error)
|
|
LOG.info("Software deployment in local installation mode is stopped")
|
|
|
|
if is_major_release:
|
|
if SW_VERSION == major_release:
|
|
msg_error = (
|
|
f"Deploy {major_release} can't be deleted as it is still the"
|
|
"current running software.An error may have occurred during the deploy.")
|
|
LOG.error(msg_error)
|
|
raise SoftwareServiceError(msg_error)
|
|
|
|
# Send message to agents cleanup their ostree environment
|
|
# if the deployment has completed or rolled-back successfully
|
|
finished_deploy_states = [DEPLOY_STATES.COMPLETED, DEPLOY_STATES.HOST_ROLLBACK_DONE]
|
|
if deploy_state in finished_deploy_states:
|
|
cleanup_req = SoftwareMessageDeployDeleteCleanupReq()
|
|
cleanup_req.major_release = utils.get_major_release_version(to_release)
|
|
cleanup_req.encode()
|
|
self.socket_lock.acquire()
|
|
cleanup_req.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
self.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_CLEANUP_DEPLOYMENT_DATA,
|
|
fm_constants.FM_ALARM_STATE_CLEAR,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, constants.CONTROLLER_FLOATING_HOSTNAME))
|
|
|
|
# execute deploy delete plugins
|
|
# NOTE(bqian) implement for major release deploy delete only as deleting action
|
|
# for patching is undefined, i.e, in the case of patch is applied, both from and
|
|
# to releases are applied.
|
|
self.execute_delete_actions()
|
|
else:
|
|
self.delete_all_patch_activate_scripts()
|
|
|
|
msg_info += "Deploy deleted with success"
|
|
self.db_api_instance.delete_deploy_host_all()
|
|
self.db_api_instance.delete_deploy()
|
|
|
|
LOG.info("Deploy is deleted")
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def _deploy_complete(self):
|
|
is_all_hosts_in_deployed_state = all(host_state.get("state") == DEPLOY_HOST_STATES.DEPLOYED.value
|
|
for host_state in self.db_api_instance.get_deploy_host())
|
|
if not is_all_hosts_in_deployed_state:
|
|
raise SoftwareServiceError(f"Complete not allowed because there are hosts not"
|
|
f" in {DEPLOY_HOST_STATES.DEPLOYED.value} state.")
|
|
return True
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE_DONE],
|
|
"Deploy must be in %s state to be able to complete." % DEPLOY_STATES.ACTIVATE_DONE.value)
|
|
def software_deploy_complete_api(self) -> dict:
|
|
"""
|
|
Completes a deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
|
|
if self._deploy_complete():
|
|
deploy_state.completed()
|
|
msg_info += "Deployment has been completed\n"
|
|
try:
|
|
# the sysinv evaluate_apps_reapply function needs to
|
|
# be triggered after the deploy complete.
|
|
trigger_evaluate_apps_reapply({"type": "usm-upgrade-complete"})
|
|
except Exception as e:
|
|
LOG.error("The attempt to trigger the evaluate apps reapply \
|
|
failed with message: %s", e)
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def _activate(self):
|
|
deploy = self.db_api_instance.get_deploy_all()
|
|
if deploy:
|
|
deploy = deploy[0]
|
|
else:
|
|
msg = "Deployment is missing unexpectedly"
|
|
raise InvalidOperation(msg)
|
|
|
|
cmd_path = "/usr/bin/software-deploy-activate"
|
|
from_release = deploy.get("from_release")
|
|
to_release = deploy.get("to_release")
|
|
if self.pre_bootstrap:
|
|
activate_cmd = [cmd_path, from_release, to_release]
|
|
else:
|
|
activate_cmd = ["source", "/etc/platform/openrc;", cmd_path, from_release, to_release]
|
|
|
|
deploying = ReleaseState(release_state=states.DEPLOYING)
|
|
if deploying.is_major_release_deployment():
|
|
activate_cmd.append('--is_major_release')
|
|
|
|
env = os.environ.copy()
|
|
env["ANSIBLE_LOG_PATH"] = SOFTWARE_LOG_FILE
|
|
if not self.pre_bootstrap:
|
|
token, endpoint = utils.get_endpoints_token()
|
|
env["OS_AUTH_TOKEN"] = token
|
|
env["SYSTEM_URL"] = re.sub('/v[1,9]$', '', endpoint) # remove ending /v1
|
|
|
|
env["IGNORE_ERRORS"] = self.ignore_errors
|
|
try:
|
|
LOG.info("starting subprocess %s" % ' '.join(activate_cmd))
|
|
subprocess.Popen(' '.join(activate_cmd), start_new_session=True, shell=True, env=env)
|
|
LOG.info("subprocess started")
|
|
except subprocess.SubprocessError as e:
|
|
LOG.error("Failed to start command: %s. Error %s" % (' '.join(activate_cmd), e))
|
|
return False
|
|
|
|
return True
|
|
|
|
def _check_pre_activate(self):
|
|
if not self.pre_bootstrap:
|
|
if not are_all_hosts_unlocked_and_online():
|
|
msg = f"Hosts must be {constants.ADMIN_UNLOCKED} and {constants.AVAILABILITY_ONLINE}."
|
|
raise SoftwareServiceError(error=msg)
|
|
# check current deployment, deploy to all hosts have completed,
|
|
# the deploy state is host-done, or
|
|
# activate-failed' as reattempt from a previous failed activate
|
|
deploy_state = DeployState.get_deploy_state()
|
|
if deploy_state not in [DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.ACTIVATE_FAILED]:
|
|
msg = "Must complete deploying all hosts before activating the deployment"
|
|
raise InvalidOperation(msg)
|
|
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
invalid_hosts = []
|
|
for deploy_host in deploy_hosts:
|
|
if deploy_host['state'] not in [states.DEPLOYED]:
|
|
invalid_hosts.append(deploy_host)
|
|
|
|
if len(invalid_hosts) > 0:
|
|
msg = "All hosts must have completed deployment before activating the deployment"
|
|
for invalid_host in invalid_hosts:
|
|
msg += "%s: %s\n" % (invalid_host["hostname"], invalid_host["state"])
|
|
raise InvalidOperation(msg)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE, DEPLOY_STATES.ACTIVATE_DONE, DEPLOY_STATES.ACTIVATE_FAILED,
|
|
DEPLOY_STATES.COMPLETED, DEPLOY_STATES.HOST, DEPLOY_STATES.HOST_DONE,
|
|
DEPLOY_STATES.HOST_FAILED],
|
|
"Deploy must be in the following states to be able to abort: %s, %s, %s, %s, %s, %s, %s" %
|
|
(DEPLOY_STATES.ACTIVATE.value, DEPLOY_STATES.ACTIVATE_DONE.value,
|
|
DEPLOY_STATES.ACTIVATE_FAILED.value, DEPLOY_STATES.COMPLETED.value, DEPLOY_STATES.HOST.value,
|
|
DEPLOY_STATES.HOST_DONE.value, DEPLOY_STATES.HOST_FAILED.value))
|
|
def software_deploy_abort_api(self) -> dict:
|
|
"""
|
|
Aborts the deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy = self.db_api_instance.get_current_deploy()
|
|
from_release = deploy.get("from_release")
|
|
to_release = deploy.get("to_release")
|
|
from_release_deployment = self.release_collection.get_release_id_by_sw_release(from_release)
|
|
to_release_deployment = self.release_collection.get_release_id_by_sw_release(to_release)
|
|
|
|
try:
|
|
is_major_release = ReleaseState(release_state=states.DEPLOYING).is_major_release_deployment()
|
|
except AttributeError:
|
|
release = self.release_collection.get_release_by_id(to_release_deployment)
|
|
is_major_release = ReleaseState(release_ids=[release.id]).is_major_release_deployment()
|
|
|
|
if not is_major_release:
|
|
removing_release_state = ReleaseState(release_state=states.REMOVING)
|
|
is_removing = removing_release_state.has_release_id()
|
|
|
|
if is_removing:
|
|
raise SoftwareServiceError("Abort operation is not supported in patch removal")
|
|
|
|
from_deployment = self.release_collection.get_release_by_id(from_release_deployment)
|
|
self.reset_feed_commit(from_deployment)
|
|
|
|
self.send_latest_feed_commit_to_agent()
|
|
self.software_sync()
|
|
|
|
major_from_release = utils.get_major_release_version(from_release)
|
|
feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_from_release)
|
|
deploy_release = self._release_basic_checks(from_release_deployment)
|
|
commit_id = deploy_release.commit_id
|
|
|
|
# TODO(lbonatti): remove this condition when commit-id is built into GA metadata.
|
|
if is_major_release and commit_id in [constants.COMMIT_DEFAULT_VALUE, None]:
|
|
commit_id = ostree_utils.get_feed_latest_commit(deploy_release.sw_version)
|
|
|
|
# Update the deployment
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.abort(feed_repo, commit_id)
|
|
|
|
# Update the host deployment
|
|
deploy_host = self.db_api_instance.get_deploy_host()
|
|
for host in deploy_host:
|
|
hostname = host.get("hostname")
|
|
deploy_host_state = DeployHostState(hostname)
|
|
deploy_host_state.abort()
|
|
|
|
msg_info += "Deployment has been aborted\n"
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.ACTIVATE_FAILED],
|
|
"Activate deployment only when current deployment state is {require_states}")
|
|
def software_deploy_activate_api(self) -> dict:
|
|
"""
|
|
Activates the deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
self._check_pre_activate()
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.activate()
|
|
|
|
try:
|
|
self._activate()
|
|
msg_info = "Deploy activate has started"
|
|
except Exception:
|
|
deploy_state.activate_failed()
|
|
raise
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def _activate_rollback_major_release(self, deploy):
|
|
cmd_path = "/usr/bin/software-deploy-activate-rollback"
|
|
from_release = utils.get_major_release_version(deploy.get("from_release"))
|
|
to_release = utils.get_major_release_version(deploy.get("to_release"))
|
|
|
|
token, endpoint = utils.get_endpoints_token()
|
|
env = os.environ.copy()
|
|
env["ANSIBLE_LOG_PATH"] = SOFTWARE_LOG_FILE
|
|
env["OS_AUTH_TOKEN"] = token
|
|
env["SYSTEM_URL"] = re.sub('/v[1,9]$', '', endpoint) # remove ending /v1
|
|
|
|
env["IGNORE_ERRORS"] = self.ignore_errors
|
|
upgrade_activate_rollback_cmd = [
|
|
"source", "/etc/platform/openrc;", cmd_path, from_release, to_release]
|
|
|
|
# check if LVM snapshots are enabled and try to restore them
|
|
# TODO(heitormatsui): we don't really need to verify the system mode
|
|
# as LVM snapshots will only be allowed if the system is AIO-SX
|
|
system_mode = utils.get_platform_conf("system_mode")
|
|
if system_mode == constants.SYSTEM_MODE_SIMPLEX:
|
|
deploy = self.db_api_instance.get_deploy_all()[0]
|
|
options = deploy.get("options", {})
|
|
enabled_lvm_snapshots = to_bool(options.get("snapshot"))
|
|
if enabled_lvm_snapshots:
|
|
LOG.info("LVM snapshots are enabled")
|
|
manager = lvm_snapshot.LVMSnapshotManager()
|
|
success = manager.restore_snapshots()
|
|
if success:
|
|
LOG.info("LVM snapshots were restored, upgrade scripts with "
|
|
"action=activate-rollback will be skipped")
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.activate_rollback_done()
|
|
return
|
|
else:
|
|
LOG.warning("Failure restoring LVM snapshots, falling back "
|
|
"to standard activate-rollback procedure")
|
|
|
|
try:
|
|
LOG.info("starting subprocess %s" % ' '.join(upgrade_activate_rollback_cmd))
|
|
subprocess.Popen(' '.join(upgrade_activate_rollback_cmd), start_new_session=True, shell=True, env=env)
|
|
LOG.info("subprocess started")
|
|
except subprocess.SubprocessError as e:
|
|
LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_activate_rollback_cmd), e))
|
|
raise
|
|
|
|
def _activate_rollback_patching_release(self):
|
|
deploy_state = DeployState.get_instance()
|
|
# patching release activate-rollback operations go here
|
|
deploy_state.activate_rollback_done()
|
|
|
|
def _activate_rollback(self):
|
|
deploy = self.db_api_instance.get_current_deploy()
|
|
if not deploy:
|
|
msg = "Deployment is missing unexpectedly"
|
|
raise InvalidOperation(msg)
|
|
|
|
deploying = ReleaseState(release_state=states.DEPLOYING)
|
|
if deploying.is_major_release_deployment():
|
|
self._activate_rollback_major_release(deploy)
|
|
else:
|
|
self._activate_rollback_patching_release()
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE_ROLLBACK_PENDING, DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED],
|
|
"Activate-rollback deployment only when current deployment state is {require_states}")
|
|
def software_deploy_activate_rollback_api(self) -> dict:
|
|
"""
|
|
Rolls back activates the deployment associated with the release
|
|
:return: dict of info, warning and error messages
|
|
"""
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_state.activate_rollback()
|
|
|
|
try:
|
|
self._activate_rollback()
|
|
msg_info = "Deploy activate-rollback has started"
|
|
except Exception:
|
|
deploy_state.activate_rollback_failed()
|
|
raise
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def software_deploy_show_api(self, from_release=None, to_release=None):
|
|
# Retrieve deploy state from db
|
|
if from_release and to_release:
|
|
deploy_data = self.db_api_instance.get_deploy(from_release, to_release)
|
|
if not deploy_data:
|
|
return deploy_data
|
|
release_deployment = deploy_data["to_release"]
|
|
else:
|
|
# Retrieve deploy state from db in list format
|
|
deploy_data = self.db_api_instance.get_deploy_all()
|
|
if not deploy_data:
|
|
return deploy_data
|
|
release_deployment = deploy_data[0]["to_release"]
|
|
|
|
release_id = self.release_collection.get_release_id_by_sw_release(release_deployment)
|
|
release = self._release_basic_checks(release_id)
|
|
release_info = self._get_release_additional_info(release)
|
|
|
|
if isinstance(deploy_data, list):
|
|
deploy_data[0].update(release_info)
|
|
else:
|
|
deploy_data.update(release_info)
|
|
return deploy_data
|
|
|
|
def _deploy_host(self, hostname, force, async_req=False, rollback=False):
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
try:
|
|
ip = utils.gethostbyname(hostname)
|
|
except socket.gaierror:
|
|
msg_error += "Host %s not found\n" % hostname
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
# NOTE(bqian) Get IP address to fulfill the need of patching structure.
|
|
# need to review the design
|
|
# ensure ip is in table as in some cases the host is aged out from the hosts table
|
|
if ip not in self.hosts:
|
|
raise HostIpNotFound(hostname)
|
|
|
|
# check if host agent is reachable via message
|
|
self.hosts[ip].is_alive = False
|
|
check_alive_req = SoftwareMessageCheckAgentAliveReq()
|
|
check_alive_req.ip = ip
|
|
self.socket_lock.acquire()
|
|
check_alive_req.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
time.sleep(5) # sleep 5 seconds for agent to reply
|
|
if not self.hosts[ip].is_alive:
|
|
raise HostAgentUnreachable(hostname)
|
|
|
|
is_major_release = self.check_upgrade_in_progress()
|
|
deploy_host = self.db_api_instance.get_deploy_host_by_hostname(hostname)
|
|
if deploy_host is None:
|
|
raise HostNotFound(hostname)
|
|
|
|
deploy = self.db_api_instance.get_deploy_all()[0]
|
|
# Determine reboot required from deployment info
|
|
self.allow_insvc_patching = True
|
|
is_reboot_req = deploy.get(constants.REBOOT_REQUIRED, False)
|
|
if is_reboot_req:
|
|
self.allow_insvc_patching = False
|
|
|
|
# for rr patch in pre bootstrap
|
|
if self.pre_bootstrap:
|
|
self.allow_insvc_patching = True
|
|
|
|
commit_id = deploy.get("commit_id")
|
|
if not self.install_local:
|
|
deploy_host_validations(
|
|
hostname,
|
|
is_major_release=is_major_release,
|
|
rollback=rollback
|
|
)
|
|
deploy_state = DeployState.get_instance()
|
|
deploy_host_state = DeployHostState(hostname)
|
|
deploy_state.deploy_host()
|
|
deploy_host_state.deploy_started()
|
|
|
|
# if in a 'deploy host' reentrant scenario, i.e. retrying after
|
|
# a failure, then clear the failure alarm before retrying
|
|
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, hostname)
|
|
self.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_DEPLOY_HOST_FAILURE,
|
|
fm_constants.FM_ALARM_STATE_CLEAR,
|
|
entity_instance_id)
|
|
|
|
msg = "Running software deploy host for %s (%s), force=%s, async_req=%s" % (
|
|
hostname, ip, force, async_req)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
if not is_major_release and self.allow_insvc_patching:
|
|
LOG.info("Allowing in-service patching")
|
|
force = True
|
|
self.copy_install_scripts()
|
|
|
|
# Check if there is a major release deployment in progress
|
|
# and set agent request parameters accordingly
|
|
major_release = None
|
|
additional_data = {}
|
|
if is_major_release:
|
|
upgrade_release = self.get_software_upgrade()
|
|
major_release = upgrade_release["to_release"]
|
|
force = False
|
|
async_req = False
|
|
msg = "Running major release deployment, major_release=%s, force=%s, async_req=%s, commit_id=%s" % (
|
|
major_release, force, async_req, commit_id)
|
|
msg_info += msg + "\n"
|
|
LOG.info(msg)
|
|
try:
|
|
copy_pxeboot_update_file(major_release, rollback=rollback)
|
|
copy_pxeboot_cfg_files(major_release)
|
|
except Exception:
|
|
LOG.error("Fail to start deploy host")
|
|
deploy_host_state.deploy_failed()
|
|
raise
|
|
|
|
# TODO(bqian) This code below is for upgrading to stx-10. Beside the code is specific for the upgrade
|
|
# path, the solution is also temporary. Need a better design with smooth support of host deploy with
|
|
# predetermined parameters
|
|
impacted_upgrade = ["24.09", "22.12"]
|
|
if upgrade_release["to_release"] in impacted_upgrade and \
|
|
upgrade_release["from_release"] in impacted_upgrade:
|
|
if rollback:
|
|
oot_drivers = ""
|
|
else:
|
|
try:
|
|
oot_drivers = get_oot_drivers()
|
|
except ServiceParameterNotFound:
|
|
# the oot_drivers should be identical to the new default service parameter declare in
|
|
# config/controllerconfig/controllerconfig/upgrade-scripts/26-add-service-parameter.py#L52
|
|
oot_drivers = "ice,i40e,iavf"
|
|
|
|
additional_data.update({"out-of-tree-drivers": oot_drivers})
|
|
|
|
self.hosts_lock.acquire()
|
|
self.hosts[ip].install_pending = True
|
|
self.hosts[ip].install_status = False
|
|
self.hosts[ip].install_reject_reason = None
|
|
self.hosts_lock.release()
|
|
|
|
installreq = PatchMessageAgentInstallReq(additional_data)
|
|
installreq.ip = ip
|
|
installreq.force = force
|
|
installreq.major_release = major_release
|
|
installreq.commit_id = commit_id
|
|
installreq.encode()
|
|
self.socket_lock.acquire()
|
|
installreq.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
if async_req:
|
|
# async_req install requested, so return now
|
|
msg = "Host deployment request sent to %s." % self.hosts[ip].hostname
|
|
msg_info += msg + "\n"
|
|
LOG.info("host-install async_req: %s", msg)
|
|
# TODO(bqian) update deploy state to deploy-host
|
|
|
|
# Now we wait, up to ten mins. future enhancement: Wait on a condition
|
|
resp_rx = False
|
|
max_time = time.time() + 600
|
|
success = True
|
|
# NOTE(bqian) loop below blocks REST API service (slow thread)
|
|
# Consider remove.
|
|
while time.time() < max_time:
|
|
self.hosts_lock.acquire()
|
|
if ip not in self.hosts:
|
|
# The host aged out while we were waiting
|
|
self.hosts_lock.release()
|
|
success = False
|
|
msg = "Agent expired while waiting: %s" % ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
break
|
|
|
|
if not self.hosts[ip].install_pending:
|
|
# We got a response
|
|
resp_rx = True
|
|
if self.hosts[ip].install_status:
|
|
msg = "Host deployment was successful on %s." % self.hosts[ip].hostname
|
|
msg_info += msg + "\n"
|
|
LOG.info("host-install: %s", msg)
|
|
elif self.hosts[ip].install_reject_reason:
|
|
msg = "Host deployment rejected by %s. %s" % (
|
|
self.hosts[ip].hostname,
|
|
self.hosts[ip].install_reject_reason)
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
success = False
|
|
else:
|
|
msg = "Host deployment failed on %s." % self.hosts[ip].hostname
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
success = False
|
|
|
|
self.hosts_lock.release()
|
|
break
|
|
|
|
self.hosts_lock.release()
|
|
|
|
time.sleep(0.5)
|
|
|
|
if not resp_rx:
|
|
msg = "Timeout occurred while waiting response from %s." % ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in host-install: %s", msg)
|
|
success = False
|
|
|
|
if not success:
|
|
deploy_host_state.deploy_failed()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.START_DONE, DEPLOY_STATES.HOST, DEPLOY_STATES.HOST_FAILED],
|
|
"Current deployment ({state.value}) is not ready to deploy host")
|
|
def software_deploy_host_api(self, hostname, force, async_req=False):
|
|
return self._deploy_host(hostname, force, async_req)
|
|
|
|
@require_deploy_state([DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE,
|
|
DEPLOY_STATES.HOST_ROLLBACK, DEPLOY_STATES.HOST_ROLLBACK_FAILED],
|
|
"Current deployment ({state.value}) is not ready to rollback host")
|
|
def software_deploy_host_rollback_api(self, hostname, force, async_req=False):
|
|
return self._deploy_host(hostname, force, async_req, rollback=True)
|
|
|
|
def drop_host(self, host_ip, sync_nbr=True):
|
|
msg_info = ""
|
|
msg_warning = ""
|
|
msg_error = ""
|
|
|
|
ip = host_ip
|
|
|
|
self.hosts_lock.acquire()
|
|
# If not in hosts table, maybe a hostname was used instead
|
|
if host_ip not in self.hosts:
|
|
try:
|
|
# Because the host may be getting dropped due to deletion,
|
|
# we may be unable to do a hostname lookup. Instead, we'll
|
|
# iterate through the table here.
|
|
for host in list(self.hosts):
|
|
if host_ip == self.hosts[host].hostname:
|
|
ip = host
|
|
break
|
|
|
|
if ip not in self.hosts:
|
|
# Translated successfully, but IP isn't in the table.
|
|
# Raise an exception to drop out to the failure handling
|
|
raise SoftwareError("Host IP (%s) not in table" % ip)
|
|
except Exception:
|
|
self.hosts_lock.release()
|
|
msg = "Unknown host specified: %s" % host_ip
|
|
msg_error += msg + "\n"
|
|
LOG.error("Error in drop-host: %s", msg)
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
msg = "Running drop-host for %s (%s)" % (host_ip, ip)
|
|
LOG.info(msg)
|
|
audit_log_info(msg)
|
|
|
|
del self.hosts[ip]
|
|
for patch_id in list(self.interim_state):
|
|
if ip in self.interim_state[patch_id]:
|
|
self.interim_state[patch_id].remove(ip)
|
|
|
|
self.hosts_lock.release()
|
|
|
|
if sync_nbr:
|
|
sync_msg = PatchMessageDropHostReq()
|
|
sync_msg.ip = ip
|
|
self.socket_lock.acquire()
|
|
sync_msg.send(self.sock_out)
|
|
self.socket_lock.release()
|
|
|
|
return dict(info=msg_info, warning=msg_warning, error=msg_error)
|
|
|
|
def check_releases_state(self, release_ids, state):
|
|
"""check all releases to be in the specified state"""
|
|
all_matched = True
|
|
|
|
for release_id in release_ids:
|
|
release = self.release_collection.get_release_by_id(release_id)
|
|
if release is None:
|
|
all_matched = False
|
|
break
|
|
|
|
if release.state != state:
|
|
all_matched = False
|
|
break
|
|
return all_matched
|
|
|
|
def is_available(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.AVAILABLE)
|
|
|
|
def is_deployed(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.DEPLOYED)
|
|
|
|
def is_committed(self, release_ids):
|
|
return self.check_releases_state(release_ids, states.COMMITTED)
|
|
|
|
# NOTE(bqian) report_app_dependencies function not being called?
|
|
# which means self.app_dependencies will always be empty and file
|
|
# app_dependency_filename will never exist?
|
|
def report_app_dependencies(self, patch_ids, **kwargs):
|
|
"""
|
|
Handle report of application dependencies
|
|
"""
|
|
if "app" not in kwargs:
|
|
raise ReleaseInvalidRequest
|
|
|
|
appname = kwargs.get("app")
|
|
|
|
LOG.info("Handling app dependencies report: app=%s, patch_ids=%s",
|
|
appname, ','.join(patch_ids))
|
|
|
|
if len(patch_ids) == 0:
|
|
if appname in self.app_dependencies:
|
|
del self.app_dependencies[appname]
|
|
else:
|
|
self.app_dependencies[appname] = patch_ids
|
|
|
|
try:
|
|
tmpfile, tmpfname = tempfile.mkstemp(
|
|
prefix=app_dependency_basename,
|
|
dir=constants.SOFTWARE_STORAGE_DIR)
|
|
|
|
os.write(tmpfile, json.dumps(self.app_dependencies).encode())
|
|
os.close(tmpfile)
|
|
|
|
os.rename(tmpfname, app_dependency_filename)
|
|
except Exception:
|
|
LOG.exception("Failed in report_app_dependencies")
|
|
raise SoftwareFail("Internal failure")
|
|
|
|
return True
|
|
|
|
# NOTE(bqian) unused function query_app_dependencies
|
|
def query_app_dependencies(self):
|
|
"""
|
|
Query application dependencies
|
|
"""
|
|
data = self.app_dependencies
|
|
|
|
return dict(data)
|
|
|
|
def is_host_next_to_be_deployed_api(self, hostname):
|
|
is_major_release = ReleaseState(release_state=states.DEPLOYING).is_major_release_deployment()
|
|
deploy_state = DeployState.get_deploy_state()
|
|
# If there's no deploy in progress return False
|
|
if deploy_state is None:
|
|
return False
|
|
is_rollback_action = deploy_state in [DEPLOY_STATES.HOST_ROLLBACK, DEPLOY_STATES.ACTIVATE_ROLLBACK_PENDING,
|
|
DEPLOY_STATES.HOST_ROLLBACK_FAILED, DEPLOY_STATES.ACTIVATE_ROLLBACK_DONE,
|
|
DEPLOY_STATES.ACTIVATE_ROLLBACK_FAILED]
|
|
try:
|
|
validate_host_deploy_order(hostname, is_major_release, is_rollback_action)
|
|
return True
|
|
except SoftwareServiceError:
|
|
return False
|
|
except Exception as err:
|
|
msg_error = "Error to check deploy order"
|
|
LOG.exception("%s: %s" % (msg_error, err))
|
|
return False
|
|
|
|
def deploy_host_list(self):
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
deploy = self.db_api_instance.get_deploy_all()
|
|
if not deploy:
|
|
return []
|
|
deploy = deploy[0]
|
|
|
|
deploy_host_list = []
|
|
for host in deploy_hosts:
|
|
state = host.get("state")
|
|
deploy_host = {
|
|
"hostname": host.get("hostname"),
|
|
"software_release": deploy.get("from_release"),
|
|
"target_release": deploy.get("to_release") if state else None,
|
|
"reboot_required": deploy.get("reboot_required") if state else None,
|
|
"host_state": state
|
|
}
|
|
deploy_host_list.append(deploy_host)
|
|
return deploy_host_list
|
|
|
|
def manage_software_alarm(self, alarm_id, alarm_state, entity_instance_id, **kwargs):
|
|
try:
|
|
if alarm_id not in constants.SOFTWARE_ALARMS:
|
|
raise Exception("Unknown software alarm '%s'." % alarm_id)
|
|
|
|
# deal with the alarm clear scenario
|
|
if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR:
|
|
LOG.info("Clearing alarm: %s for %s" % (alarm_id, entity_instance_id))
|
|
self.fm_api.clear_fault(alarm_id, entity_instance_id)
|
|
return
|
|
|
|
# if not clear alarm scenario, create the alarm
|
|
alarm_data = constants.SOFTWARE_ALARMS.get(alarm_id)
|
|
# update the alarm_data if it is present in kwargs
|
|
if kwargs:
|
|
for data in alarm_data:
|
|
if data in kwargs.keys():
|
|
alarm_data[data] = kwargs[data]
|
|
|
|
alarm = fm_api.Fault(
|
|
alarm_id=alarm_id,
|
|
alarm_state=alarm_state,
|
|
entity_type_id=alarm_data.get("entity_type_id"),
|
|
entity_instance_id=entity_instance_id,
|
|
severity=alarm_data.get("severity"),
|
|
reason_text=alarm_data.get("reason_text"),
|
|
alarm_type=alarm_data.get("alarm_type"),
|
|
probable_cause=alarm_data.get("probable_cause"),
|
|
proposed_repair_action=alarm_data.get("proposed_repair_action"),
|
|
service_affecting=alarm_data.get("service_affecting"),
|
|
)
|
|
LOG.info("Raising alarm: %s for %s" % (alarm_id, entity_instance_id))
|
|
self.fm_api.set_fault(alarm)
|
|
except Exception as e:
|
|
LOG.exception("Failed to manage alarm %s with action %s: %s" % (
|
|
alarm_id, alarm_state, str(e)
|
|
))
|
|
|
|
def get_out_of_sync_alarm(self):
|
|
"""Get the out-of-sync alarm instance from fm_api"""
|
|
return self.fm_api.get_fault(fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC,
|
|
constants.ALARM_INSTANCE_ID_OUT_OF_SYNC)
|
|
|
|
def create_clean_up_deployment_alarm(self, target_state):
|
|
"""
|
|
Creates the 900.022 alarm to warn the user to clean up the deployment data remaining for the specified release
|
|
version.
|
|
|
|
"""
|
|
if target_state in [DEPLOY_STATES.COMPLETED, DEPLOY_STATES.HOST_ROLLBACK_DONE]:
|
|
is_major_release = ReleaseState(release_state=states.DEPLOYING).is_major_release_deployment()
|
|
# Do not create in case of patch release.
|
|
if not is_major_release:
|
|
return
|
|
reason_text = constants.SOFTWARE_ALARMS[fm_constants.FM_ALARM_ID_USM_CLEANUP_DEPLOYMENT_DATA]["reason_text"]
|
|
self.manage_software_alarm(fm_constants.FM_ALARM_ID_USM_CLEANUP_DEPLOYMENT_DATA,
|
|
fm_constants.FM_ALARM_STATE_SET,
|
|
"%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, constants.CONTROLLER_FLOATING_HOSTNAME),
|
|
reason_text=reason_text)
|
|
|
|
def handle_deploy_state_sync(self):
|
|
"""
|
|
Handle the deploy state sync.
|
|
If deploy state is in sync, clear the alarm.
|
|
If not, raise the alarm.
|
|
"""
|
|
is_in_sync = is_deploy_state_in_sync()
|
|
|
|
# Deploy in sync state is not changed, no need to update the alarm
|
|
if is_in_sync == self.usm_alarm.get(constants.LAST_IN_SYNC):
|
|
return
|
|
|
|
try:
|
|
LOG.info("software.json in sync: %s", is_in_sync)
|
|
out_of_sync_alarm_fault = self.get_out_of_sync_alarm()
|
|
|
|
if out_of_sync_alarm_fault and is_in_sync:
|
|
# There was an out of sync alarm raised, but local software.json is in sync,
|
|
# we clear the alarm
|
|
self.manage_software_alarm(
|
|
alarm_id=fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC,
|
|
alarm_state=fm_constants.FM_ALARM_STATE_CLEAR,
|
|
entity_instance_id=constants.ALARM_INSTANCE_ID_OUT_OF_SYNC
|
|
)
|
|
# Deploy in sync state is changed, update the cache
|
|
self.usm_alarm[constants.LAST_IN_SYNC] = is_in_sync
|
|
elif (not out_of_sync_alarm_fault) and (not is_in_sync):
|
|
# There was no out of sync alarm raised, but local software.json is not in sync,
|
|
# we raise the alarm
|
|
self.manage_software_alarm(
|
|
alarm_id=fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC,
|
|
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
|
entity_instance_id=constants.ALARM_INSTANCE_ID_OUT_OF_SYNC
|
|
)
|
|
# Deploy in sync state is changed, update the cache
|
|
self.usm_alarm[constants.LAST_IN_SYNC] = is_in_sync
|
|
else:
|
|
# Shouldn't come to here
|
|
LOG.error("Unexpected case in handling deploy state sync. ")
|
|
|
|
except Exception as ex:
|
|
LOG.exception("Failed in handling deploy state sync. Error: %s" % str(ex))
|
|
|
|
def _get_software_upgrade(self):
|
|
"""
|
|
Get the current software upgrade from/to versions and state
|
|
:return: dict of from_release, to_release and state
|
|
"""
|
|
|
|
all_deploy = self.db_api_instance.get_deploy_all()
|
|
|
|
if not all_deploy:
|
|
return None
|
|
|
|
deploy = all_deploy[0]
|
|
from_maj_min_release = utils.get_major_release_version(deploy.get("from_release"))
|
|
to_maj_min_release = utils.get_major_release_version(deploy.get("to_release"))
|
|
state = deploy.get("state")
|
|
|
|
return {
|
|
"from_release": from_maj_min_release,
|
|
"to_release": to_maj_min_release,
|
|
"state": state
|
|
}
|
|
|
|
def check_upgrade_in_progress(self):
|
|
"""
|
|
Check if major release upgrade is in progress
|
|
"""
|
|
_upgrade_in_progress = False
|
|
upgrade_release = self._get_software_upgrade()
|
|
if not upgrade_release:
|
|
return _upgrade_in_progress
|
|
from_release = version.Version(upgrade_release["from_release"])
|
|
to_release = version.Version(upgrade_release["to_release"])
|
|
if (from_release.major != to_release.major) or (from_release.minor != to_release.minor):
|
|
_upgrade_in_progress = True
|
|
return _upgrade_in_progress
|
|
|
|
def get_software_upgrade(self):
|
|
return self._get_software_upgrade()
|
|
|
|
def get_all_software_host_upgrade(self):
|
|
"""
|
|
Get all software host upgrade from/to versions and state
|
|
:return: list of dict of hostname, current_sw_version, target_sw_version and host_state
|
|
"""
|
|
deploy = self._get_software_upgrade()
|
|
deploy_hosts = self.db_api_instance.get_deploy_host()
|
|
|
|
if deploy is None or deploy_hosts is None:
|
|
return None
|
|
|
|
from_maj_min_release = deploy.get("from_release")
|
|
to_maj_min_release = deploy.get("to_release")
|
|
|
|
all_host_upgrades = []
|
|
for deploy_host in deploy_hosts:
|
|
all_host_upgrades.append({
|
|
"hostname": deploy_host.get("hostname"),
|
|
"current_sw_version": to_maj_min_release if deploy_host.get(
|
|
"state") == states.DEPLOYED else from_maj_min_release,
|
|
"target_sw_version": to_maj_min_release,
|
|
"host_state": deploy_host.get("state")
|
|
})
|
|
|
|
return all_host_upgrades
|
|
|
|
def get_one_software_host_upgrade(self, hostname):
|
|
"""
|
|
Get the given software host upgrade from/to versions and state
|
|
:param hostname: hostname
|
|
:return: array of dict of hostname, current_sw_version, target_sw_version and host_state
|
|
"""
|
|
|
|
all_host_upgrades = self.get_all_software_host_upgrade()
|
|
|
|
if not all_host_upgrades:
|
|
return None
|
|
|
|
for host_upgrade in all_host_upgrades:
|
|
if host_upgrade.get("hostname") == hostname:
|
|
return [host_upgrade]
|
|
|
|
return None
|
|
|
|
def is_host_active_controller(self):
|
|
"""
|
|
Check if current host is active controller by checking if floating ip is assigned
|
|
to the host
|
|
:return: True if it is active controller, False otherwise
|
|
"""
|
|
if not os.path.exists(INITIAL_CONFIG_COMPLETE_FLAG):
|
|
return False
|
|
|
|
floating_mgmt_ip = utils.gethostbyname(constants.CONTROLLER_FLOATING_HOSTNAME)
|
|
if not floating_mgmt_ip:
|
|
return False
|
|
|
|
ip_family = utils.get_management_family()
|
|
mgmt_iface = cfg.get_mgmt_iface()
|
|
|
|
host_mgmt_ip_list = utils.get_iface_ip(mgmt_iface, ip_family)
|
|
return floating_mgmt_ip in host_mgmt_ip_list if host_mgmt_ip_list else False
|
|
|
|
def set_interruption_fail_state(self):
|
|
"""
|
|
Set the host failed state after an interruption based on current deployment state
|
|
"""
|
|
upgrade_status = self.get_software_upgrade()
|
|
if self.is_host_active_controller() and os.path.exists(INITIAL_CONFIG_COMPLETE_FLAG) and upgrade_status:
|
|
|
|
if upgrade_status.get('state') == DEPLOY_STATES.HOST.value and not is_simplex():
|
|
to_fail_hostname = CONTROLLER_0_HOSTNAME if self.hostname == CONTROLLER_1_HOSTNAME else \
|
|
CONTROLLER_1_HOSTNAME
|
|
# In DX, when it is in deploy-host state, we can only set the standby controller to fail
|
|
start_set_fail(True, to_fail_hostname)
|
|
|
|
elif upgrade_status.get('state') in INTERRUPTION_RECOVERY_STATES:
|
|
# The deployment was interrupted. We need to update the deployment state first
|
|
start_set_fail(True, self.hostname)
|
|
|
|
|
|
class PatchControllerApiThread(threading.Thread):
|
|
def __init__(self):
|
|
threading.Thread.__init__(self)
|
|
self.wsgi = None
|
|
self.name = "PatchControllerApiThread"
|
|
|
|
def run(self):
|
|
global thread_death
|
|
|
|
host = "127.0.0.1"
|
|
port = cfg.api_port
|
|
|
|
try:
|
|
# In order to support IPv6, server_class.address_family must be
|
|
# set to the correct address family. Because the unauthenticated
|
|
# API always uses IPv4 for the loopback address, the address_family
|
|
# variable cannot be set directly in the WSGIServer class, so a
|
|
# local subclass needs to be created for the call to make_server,
|
|
# where the correct address_family can be specified.
|
|
class server_class(simple_server.WSGIServer):
|
|
pass
|
|
|
|
server_class.address_family = socket.AF_INET
|
|
self.wsgi = simple_server.make_server(
|
|
host, port,
|
|
app.VersionSelectorApplication(),
|
|
server_class=server_class)
|
|
|
|
self.wsgi.socket.settimeout(api_socket_timeout)
|
|
global keep_running
|
|
while keep_running:
|
|
self.wsgi.handle_request()
|
|
|
|
if thread_death.is_set():
|
|
LOG.info("%s exits as thread death is detected.", self.name)
|
|
return
|
|
|
|
# Call garbage collect after wsgi request is handled,
|
|
# to ensure any open file handles are closed in the case
|
|
# of an upload.
|
|
gc.collect()
|
|
except Exception as ex:
|
|
# Log all exceptions
|
|
LOG.exception("%s: error occurred during request processing: %s" % (self.name, str(ex)))
|
|
thread_death.set()
|
|
|
|
def kill(self):
|
|
# Must run from other thread
|
|
if self.wsgi is not None:
|
|
self.wsgi.shutdown()
|
|
|
|
|
|
class PatchControllerAuthApiThread(threading.Thread):
|
|
def __init__(self, port):
|
|
threading.Thread.__init__(self)
|
|
# LOG.info ("Initializing Authenticated API thread")
|
|
self.wsgi = None
|
|
self.port = port
|
|
self.name = f"PatchControllerAuthApiThread_{port}"
|
|
|
|
def run(self):
|
|
global thread_death
|
|
host = CONF.auth_api_bind_ip
|
|
if host is None:
|
|
host = utils.get_versioned_address_all()
|
|
try:
|
|
# Can only launch authenticated server post-config
|
|
while not os.path.exists(VOLATILE_CONTROLLER_CONFIG_COMPLETE):
|
|
LOG.info("Authorized API: Waiting for controller config complete.")
|
|
time.sleep(5)
|
|
|
|
LOG.info("Authorized API: Initializing")
|
|
|
|
# In order to support IPv6, server_class.address_family must be
|
|
# set to the correct address family. Because the unauthenticated
|
|
# API always uses IPv4 for the loopback address, the address_family
|
|
# variable cannot be set directly in the WSGIServer class, so a
|
|
# local subclass needs to be created for the call to make_server,
|
|
# where the correct address_family can be specified.
|
|
class server_class(simple_server.WSGIServer):
|
|
pass
|
|
|
|
server_class.address_family = utils.get_management_family()
|
|
self.wsgi = simple_server.make_server(
|
|
host, self.port,
|
|
auth_app.VersionSelectorApplication(),
|
|
server_class=server_class)
|
|
|
|
# self.wsgi.serve_forever()
|
|
self.wsgi.socket.settimeout(api_socket_timeout)
|
|
|
|
global keep_running
|
|
while keep_running:
|
|
self.wsgi.handle_request()
|
|
|
|
if thread_death.is_set():
|
|
LOG.info("%s exits as thread death is detected.", self.name)
|
|
return
|
|
|
|
# Call garbage collect after wsgi request is handled,
|
|
# to ensure any open file handles are closed in the case
|
|
# of an upload.
|
|
gc.collect()
|
|
except Exception as ex:
|
|
# Log all exceptions
|
|
LOG.exception("%s: error occurred during request processing: %s" % (self.name, str(ex)))
|
|
thread_death.set()
|
|
|
|
def kill(self):
|
|
# Must run from other thread
|
|
if self.wsgi is not None:
|
|
self.wsgi.shutdown()
|
|
|
|
LOG.info("%s exits as requested", self.name)
|
|
global thread_death
|
|
thread_death.set()
|
|
|
|
|
|
class PatchControllerMainThread(threading.Thread):
|
|
def __init__(self):
|
|
threading.Thread.__init__(self)
|
|
# LOG.info ("Initializing Main thread")
|
|
self.name = "PatchControllerMainThread"
|
|
|
|
def run(self):
|
|
global sc
|
|
global thread_death
|
|
|
|
# TODO(jvazhapp) Fix following temporary workaround
|
|
# for eventlet issue resulting in error message:
|
|
# 'Resolver configuration could not be read or
|
|
# specified no nameservers eventlet fix version'
|
|
with open('/etc/resolv.conf', 'a+') as f:
|
|
f.seek(0)
|
|
data = f.read()
|
|
if "nameserver" not in data:
|
|
f.writelines("nameserver 8.8.8.8")
|
|
|
|
# Send periodic messages to the agents
|
|
# We only can use one inverval
|
|
SEND_MSG_INTERVAL_IN_SECONDS = 30.0
|
|
|
|
sc.ignore_errors = os.environ.get('IGNORE_ERRORS', 'False')
|
|
LOG.info("IGNORE_ERRORS execution flag is set: %s", sc.ignore_errors)
|
|
|
|
LOG.info("software-controller-daemon is starting")
|
|
|
|
LOG.info("%s is active controller: %s", sc.hostname, sc.is_host_active_controller())
|
|
|
|
sc.set_interruption_fail_state()
|
|
|
|
try:
|
|
if sc.pre_bootstrap and cfg.get_mgmt_ip():
|
|
sc.pre_bootstrap = False
|
|
|
|
if sc.pre_bootstrap or os.path.isfile(INSTALL_LOCAL_FLAG):
|
|
sc.install_local = True
|
|
else:
|
|
sc.install_local = False
|
|
|
|
# Update the out of sync alarm cache when the thread starts
|
|
out_of_sync_alarm_fault = sc.get_out_of_sync_alarm()
|
|
sc.usm_alarm[constants.LAST_IN_SYNC] = not out_of_sync_alarm_fault
|
|
|
|
sock_in = sc.setup_socket()
|
|
|
|
while sock_in is None:
|
|
# Check every thirty seconds?
|
|
# Once we've got a conf file, tied into packstack,
|
|
# we'll get restarted when the file is updated,
|
|
# and this should be unnecessary.
|
|
time.sleep(30)
|
|
sock_in = sc.setup_socket()
|
|
|
|
# Ok, now we've got our socket. Let's start with a hello!
|
|
sc.socket_lock.acquire()
|
|
|
|
hello = PatchMessageHello()
|
|
hello.send(sc.sock_out)
|
|
|
|
hello_agent = PatchMessageHelloAgent()
|
|
hello_agent.send(sc.sock_out)
|
|
|
|
sc.socket_lock.release()
|
|
|
|
# Send hello every thirty seconds
|
|
hello_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
# Send deploy state update every thirty seconds
|
|
deploy_state_update_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
remaining = int(SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
agent_query_conns = []
|
|
|
|
while True:
|
|
# Check to see if any other thread has died
|
|
if thread_death.is_set():
|
|
LOG.info("%s exits as thread death is detected.", self.name)
|
|
return
|
|
|
|
# Check for in-service patch restart flag
|
|
if os.path.exists(insvc_patch_restart_controller):
|
|
LOG.info("In-service patch restart flag detected. Exiting.")
|
|
global keep_running
|
|
keep_running = False
|
|
os.remove(insvc_patch_restart_controller)
|
|
return
|
|
|
|
# If bootstrap is completed re-initialize sockets
|
|
if sc.pre_bootstrap and cfg.get_mgmt_ip():
|
|
sc.pre_bootstrap = False
|
|
|
|
sock_in = sc.setup_socket()
|
|
while sock_in is None:
|
|
time.sleep(30)
|
|
sock_in = sc.setup_socket()
|
|
|
|
sc.socket_lock.acquire()
|
|
|
|
hello = PatchMessageHello()
|
|
hello.send(sc.sock_out)
|
|
|
|
hello_agent = PatchMessageHelloAgent()
|
|
hello_agent.send(sc.sock_out)
|
|
|
|
sc.socket_lock.release()
|
|
for s in agent_query_conns.copy():
|
|
agent_query_conns.remove(s)
|
|
s.shutdown(socket.SHUT_RDWR)
|
|
s.close()
|
|
|
|
local_mode = sc.pre_bootstrap or os.path.isfile(INSTALL_LOCAL_FLAG)
|
|
if local_mode and not sc.install_local:
|
|
sc.install_local = True
|
|
elif not local_mode and sc.install_local:
|
|
sc.install_local = False
|
|
|
|
inputs = [sc.sock_in] + agent_query_conns
|
|
outputs = []
|
|
|
|
rlist, wlist, xlist = select.select(
|
|
inputs, outputs, inputs, SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
if (len(rlist) == 0 and
|
|
len(wlist) == 0 and
|
|
len(xlist) == 0):
|
|
# Timeout hit
|
|
sc.audit_socket()
|
|
|
|
for s in rlist:
|
|
data = ''
|
|
addr = None
|
|
msg = None
|
|
|
|
if s == sc.sock_in:
|
|
# Receive from UDP
|
|
sc.socket_lock.acquire()
|
|
data, addr = s.recvfrom(1024)
|
|
sc.socket_lock.release()
|
|
else:
|
|
# Receive from TCP
|
|
while True:
|
|
try:
|
|
packet = s.recv(1024)
|
|
except socket.error:
|
|
LOG.exception("Socket error on recv")
|
|
data = ''
|
|
break
|
|
|
|
if packet:
|
|
data += packet.decode()
|
|
|
|
if data == '':
|
|
break
|
|
try:
|
|
json.loads(data)
|
|
break
|
|
except ValueError:
|
|
# Message is incomplete
|
|
continue
|
|
else:
|
|
LOG.info('End of TCP message received')
|
|
break
|
|
|
|
if data == '':
|
|
# Connection dropped
|
|
agent_query_conns.remove(s)
|
|
s.close()
|
|
continue
|
|
|
|
# Get the TCP endpoint address
|
|
addr = s.getpeername()
|
|
|
|
msgdata = json.loads(data)
|
|
|
|
# For now, discard any messages that are not msgversion==1
|
|
if 'msgversion' in msgdata and msgdata['msgversion'] != 1:
|
|
continue
|
|
|
|
if 'msgtype' in msgdata:
|
|
if msgdata['msgtype'] == messages.PATCHMSG_HELLO:
|
|
msg = PatchMessageHello()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_HELLO_ACK:
|
|
msg = PatchMessageHelloAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_SYNC_REQ:
|
|
msg = PatchMessageSyncReq()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_SYNC_COMPLETE:
|
|
msg = PatchMessageSyncComplete()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_HELLO_AGENT_ACK:
|
|
msg = PatchMessageHelloAgentAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_QUERY_DETAILED_RESP:
|
|
msg = PatchMessageQueryDetailedResp()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_AGENT_INSTALL_RESP:
|
|
msg = PatchMessageAgentInstallResp()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DROP_HOST_REQ:
|
|
msg = PatchMessageDropHostReq()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_UPDATE:
|
|
msg = SoftwareMessageDeployStateUpdate()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_UPDATE_ACK:
|
|
msg = SoftwareMessageDeployStateUpdateAck()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_CHANGED:
|
|
msg = SWMessageDeployStateChanged()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_DELETE_CLEANUP_RESP:
|
|
msg = SoftwareMessageDeployDeleteCleanupResp()
|
|
elif msgdata['msgtype'] == messages.PATCHMSG_CHECK_AGENT_ALIVE_RESP:
|
|
msg = SoftwareMessageCheckAgentAliveResp()
|
|
|
|
if msg is None:
|
|
msg = messages.PatchMessage()
|
|
|
|
msg.decode(msgdata)
|
|
if s == sc.sock_in:
|
|
msg.handle(sc.sock_out, addr)
|
|
else:
|
|
msg.handle(s, addr)
|
|
|
|
# We can drop the connection after a query response
|
|
if msg.msgtype == messages.PATCHMSG_QUERY_DETAILED_RESP and s != sc.sock_in:
|
|
agent_query_conns.remove(s)
|
|
s.shutdown(socket.SHUT_RDWR)
|
|
s.close()
|
|
|
|
while len(stale_hosts) > 0 and len(agent_query_conns) <= 5:
|
|
ip = stale_hosts.pop()
|
|
try:
|
|
agent_sock = socket.create_connection((ip, cfg.agent_port))
|
|
query = PatchMessageQueryDetailed()
|
|
query.send(agent_sock)
|
|
agent_query_conns.append(agent_sock)
|
|
except Exception:
|
|
# Put it back on the list
|
|
stale_hosts.append(ip)
|
|
|
|
remaining = int(hello_timeout - time.time())
|
|
if remaining <= 0 or remaining > int(SEND_MSG_INTERVAL_IN_SECONDS):
|
|
hello_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
remaining = int(SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
sc.socket_lock.acquire()
|
|
|
|
hello = PatchMessageHello()
|
|
hello.send(sc.sock_out)
|
|
|
|
hello_agent = PatchMessageHelloAgent()
|
|
hello_agent.send(sc.sock_out)
|
|
|
|
sc.socket_lock.release()
|
|
|
|
# Age out neighbours
|
|
sc.controller_neighbours_lock.acquire()
|
|
nbrs = list(sc.controller_neighbours)
|
|
for n in nbrs:
|
|
# Age out controllers after 2 minutes
|
|
if sc.controller_neighbours[n].get_age() >= 120:
|
|
LOG.info("Aging out controller %s from table", n)
|
|
del sc.controller_neighbours[n]
|
|
sc.controller_neighbours_lock.release()
|
|
|
|
sc.hosts_lock.acquire()
|
|
nbrs = list(sc.hosts)
|
|
for n in nbrs:
|
|
# Age out hosts after 1 hour
|
|
if sc.hosts[n].get_age() >= 3600:
|
|
LOG.info("Aging out host %s from table", n)
|
|
del sc.hosts[n]
|
|
for patch_id in list(sc.interim_state):
|
|
if n in sc.interim_state[patch_id]:
|
|
sc.interim_state[patch_id].remove(n)
|
|
|
|
sc.hosts_lock.release()
|
|
|
|
deploy_state_update_remaining = int(deploy_state_update_timeout - time.time())
|
|
# Only send the deploy state update from the active controller
|
|
if deploy_state_update_remaining <= 0 or deploy_state_update_remaining > int(
|
|
SEND_MSG_INTERVAL_IN_SECONDS):
|
|
deploy_state_update_timeout = time.time() + SEND_MSG_INTERVAL_IN_SECONDS
|
|
deploy_state_update_remaining = int(
|
|
SEND_MSG_INTERVAL_IN_SECONDS)
|
|
|
|
if not is_simplex():
|
|
# Get out-of-sync alarm to request peer sync even if no deployment in progress
|
|
out_of_sync_alarm_fault = sc.get_out_of_sync_alarm()
|
|
|
|
# data sync always start only from the active controller
|
|
if utils.is_active_controller():
|
|
if out_of_sync_alarm_fault or is_deployment_in_progress():
|
|
sc.socket_lock.acquire()
|
|
try:
|
|
deploy_state_update = SoftwareMessageDeployStateUpdate()
|
|
deploy_state_update.send(sc.sock_out)
|
|
except Exception as e:
|
|
LOG.exception("Failed to send deploy state update. Error: %s", str(e))
|
|
finally:
|
|
sc.socket_lock.release()
|
|
|
|
if not sc.pre_bootstrap:
|
|
sc.handle_deploy_state_sync()
|
|
|
|
except Exception as ex:
|
|
# Log all exceptions
|
|
LOG.exception("%s: error occurred during request processing: %s" % (self.name, str(ex)))
|
|
thread_death.set()
|
|
|
|
|
|
def main():
|
|
software_conf = constants.SOFTWARE_CONFIG_FILE_LOCAL
|
|
|
|
pkg_feed = ('"http://controller:8080/updates/debian/rel-%s/ %s updates"'
|
|
% (constants.STARLINGX_RELEASE, constants.DEBIAN_RELEASE))
|
|
|
|
config = configparser.ConfigParser()
|
|
config.read(software_conf)
|
|
config.set("runtime", "package_feed", pkg_feed)
|
|
with open(software_conf, "w+") as configfile:
|
|
config.write(configfile)
|
|
|
|
# The following call to CONF is to ensure the oslo config
|
|
# has been called to specify a valid config dir.
|
|
# Otherwise oslo_policy will fail when it looks for its files.
|
|
CONF(
|
|
(), # Required to load an anonymous configuration
|
|
default_config_files=['/etc/software/software.conf', ]
|
|
)
|
|
|
|
cfg.read_config()
|
|
|
|
configure_logging()
|
|
|
|
# daemon.pidlockfile.write_pid_to_pidfile(pidfile_path)
|
|
|
|
global thread_death
|
|
thread_death = threading.Event()
|
|
|
|
# Set the TMPDIR environment variable to /scratch so that any modules
|
|
# that create directories with tempfile will not use /tmp
|
|
os.environ['TMPDIR'] = '/scratch'
|
|
|
|
global sc
|
|
sc = PatchController()
|
|
|
|
LOG.info("launching")
|
|
api_thread = PatchControllerApiThread()
|
|
auth_api_thread = PatchControllerAuthApiThread(CONF.auth_api_port)
|
|
auth_api_alt_thread = PatchControllerAuthApiThread(CONF.auth_api_alt_port)
|
|
main_thread = PatchControllerMainThread()
|
|
|
|
api_thread.start()
|
|
auth_api_thread.start()
|
|
auth_api_alt_thread.start()
|
|
main_thread.start()
|
|
|
|
thread_death.wait()
|
|
global keep_running
|
|
keep_running = False
|
|
|
|
api_thread.join()
|
|
auth_api_thread.join()
|
|
auth_api_alt_thread.join()
|
|
main_thread.join()
|