
This commit adds the command "subcloud redeploy" to dcmanager. The redeploy operation is similar to "subcloud reinstall", performing a fresh install, bootstrapping and configuring the subcloud, but allowing the user to use either previously used install/bootstrap/config values stored on the system controller or new values from provided files. Since config is an optional phase, it will only be executed if respective parameters are provided in the current request or were provided in a previous deployment. Test Plan: Success cases: - PASS: Redeploy subcloud without passing any new files and verify the redeployment was successful and the final deploy state is "complete". - PASS: Redeploy subcloud passing new install/bootstrap/config files and verify the redeployment was successful and the final deploy state is "complete". - PASS: Redeploy a subcloud with a different management subnet and verify that the network reconfiguration was executed during the bootstrap phase and the redeployment completed successfully. - PASS: Redeploy a subcloud that wasn't configure by the "deploy config" command passing a config file and verify that the subcloud was redeploy and configured. - PASS: Redeploy a subcloud that wasn't configure by the "deploy config" command without passing a config file. and verify that the subcloud was redeployed and no configuration attempt was made. - PASS: Redeploy a subcloud passing a previous release (21.12) and verify the redeployment was successful and the final deploy state is "complete". - PASS: Abort each one of the three deployment phases. Verify the deployment was successfully aborted. - PASS: Resume the aborted deployment and verify the subcloud was successfully deployed. - PASS: Repeat previous tests but directly call the API (using CURL) instead of using the CLI. Failure cases: - PASS: Verify it's not possible to redeploy an online and/or managed subcloud. - PASS: Call the API directly, passing bmc-password and/or sysadmin-password as plain text as opposed to b64encoded and verify that the response contains the correct error code and message. Story: 2010756 Task: 48496 Change-Id: I6148096909adda2b95b6bb964bc7a749ac62c20c Signed-off-by: Victor Romano <victor.gluzromano@windriver.com>
2627 lines
113 KiB
Python
2627 lines
113 KiB
Python
# Copyright 2017 Ericsson AB.
|
|
# Copyright (c) 2017-2023 Wind River Systems, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
from __future__ import division
|
|
|
|
import collections
|
|
import datetime
|
|
import filecmp
|
|
import functools
|
|
import json
|
|
import os
|
|
import shutil
|
|
import threading
|
|
import time
|
|
|
|
from eventlet import greenpool
|
|
from fm_api import constants as fm_const
|
|
from fm_api import fm_api
|
|
import keyring
|
|
import netaddr
|
|
from oslo_log import log as logging
|
|
from oslo_messaging import RemoteError
|
|
from tsconfig.tsconfig import CONFIG_PATH
|
|
from tsconfig.tsconfig import SW_VERSION
|
|
|
|
from dccommon import consts as dccommon_consts
|
|
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
|
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
|
|
from dccommon.exceptions import PlaybookExecutionFailed
|
|
from dccommon import kubeoperator
|
|
from dccommon.subcloud_install import SubcloudInstall
|
|
from dccommon.subcloud_install import SubcloudShutdown
|
|
from dccommon.utils import run_playbook
|
|
from dccommon.utils import RunAnsible
|
|
from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
|
|
from dcmanager.common import consts
|
|
from dcmanager.common.consts import INVENTORY_FILE_POSTFIX
|
|
from dcmanager.common import context as dcmanager_context
|
|
from dcmanager.common import exceptions
|
|
from dcmanager.common.exceptions import DCManagerException
|
|
from dcmanager.common.i18n import _
|
|
from dcmanager.common import manager
|
|
from dcmanager.common import phased_subcloud_deploy as psd_common
|
|
from dcmanager.common import prestage
|
|
from dcmanager.common import utils
|
|
from dcmanager.db import api as db_api
|
|
from dcmanager.db.sqlalchemy.models import Subcloud
|
|
from dcmanager.rpc import client as dcmanager_rpc_client
|
|
from dcorch.rpc import client as dcorch_rpc_client
|
|
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
# Name of our distributed cloud addn_hosts file for dnsmasq
|
|
# to read. This file is referenced in dnsmasq.conf
|
|
ADDN_HOSTS_DC = 'dnsmasq.addn_hosts_dc'
|
|
|
|
# Subcloud configuration paths
|
|
ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK = \
|
|
'/usr/share/ansible/stx-ansible/playbooks/create_subcloud_backup.yml'
|
|
ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK = \
|
|
'/usr/share/ansible/stx-ansible/playbooks/delete_subcloud_backup.yml'
|
|
ANSIBLE_SUBCLOUD_BACKUP_RESTORE_PLAYBOOK = \
|
|
'/usr/share/ansible/stx-ansible/playbooks/restore_subcloud_backup.yml'
|
|
ANSIBLE_SUBCLOUD_PLAYBOOK = \
|
|
'/usr/share/ansible/stx-ansible/playbooks/bootstrap.yml'
|
|
ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK = \
|
|
'/usr/share/ansible/stx-ansible/playbooks/rehome_subcloud.yml'
|
|
ANSIBLE_SUBCLOUD_UPDATE_PLAYBOOK = \
|
|
'/usr/share/ansible/stx-ansible/playbooks/update_subcloud.yml'
|
|
|
|
USERS_TO_REPLICATE = [
|
|
'sysinv',
|
|
'patching',
|
|
'vim',
|
|
'mtce',
|
|
'fm',
|
|
'barbican',
|
|
'dcmanager'
|
|
]
|
|
|
|
# The timeout of the rehome playbook is set to 180 seconds as it takes a
|
|
# long time for privilege escalation before resetting the host route and
|
|
# LDAP server address in a subcloud.
|
|
REHOME_PLAYBOOK_TIMEOUT = "180" # 180 seconds
|
|
UPDATE_PLAYBOOK_TIMEOUT = "180"
|
|
SC_INTERMEDIATE_CERT_DURATION = "8760h" # 1 year = 24 hours x 365
|
|
SC_INTERMEDIATE_CERT_RENEW_BEFORE = "720h" # 30 days
|
|
CERT_NAMESPACE = "dc-cert"
|
|
|
|
TRANSITORY_STATES = {
|
|
consts.DEPLOY_STATE_NONE: consts.DEPLOY_STATE_DEPLOY_PREP_FAILED,
|
|
consts.DEPLOY_STATE_PRE_DEPLOY: consts.DEPLOY_STATE_DEPLOY_PREP_FAILED,
|
|
consts.DEPLOY_STATE_CREATING: consts.DEPLOY_STATE_CREATE_FAILED,
|
|
consts.DEPLOY_STATE_PRE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
|
|
consts.DEPLOY_STATE_INSTALLING: consts.DEPLOY_STATE_INSTALL_FAILED,
|
|
consts.DEPLOY_STATE_PRE_BOOTSTRAP: consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED,
|
|
consts.DEPLOY_STATE_BOOTSTRAPPING: consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
|
|
consts.DEPLOY_STATE_PRE_CONFIG: consts.DEPLOY_STATE_PRE_CONFIG_FAILED,
|
|
consts.DEPLOY_STATE_CONFIGURING: consts.DEPLOY_STATE_CONFIG_FAILED,
|
|
consts.DEPLOY_STATE_DEPLOYING: consts.DEPLOY_STATE_DEPLOY_FAILED,
|
|
consts.DEPLOY_STATE_ABORTING_INSTALL: consts.DEPLOY_STATE_INSTALL_FAILED,
|
|
consts.DEPLOY_STATE_ABORTING_BOOTSTRAP: consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
|
|
consts.DEPLOY_STATE_ABORTING_CONFIG: consts.DEPLOY_STATE_CONFIG_FAILED,
|
|
consts.DEPLOY_STATE_MIGRATING_DATA: consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
|
|
consts.DEPLOY_STATE_PRE_RESTORE: consts.DEPLOY_STATE_RESTORE_PREP_FAILED,
|
|
consts.DEPLOY_STATE_RESTORING: consts.DEPLOY_STATE_RESTORE_FAILED,
|
|
consts.PRESTAGE_STATE_PACKAGES: consts.PRESTAGE_STATE_FAILED,
|
|
consts.PRESTAGE_STATE_IMAGES: consts.PRESTAGE_STATE_FAILED,
|
|
}
|
|
|
|
|
|
TRANSITORY_BACKUP_STATES = {
|
|
consts.BACKUP_STATE_VALIDATING: consts.BACKUP_STATE_VALIDATE_FAILED,
|
|
consts.BACKUP_STATE_PRE_BACKUP: consts.BACKUP_STATE_PREP_FAILED,
|
|
consts.BACKUP_STATE_IN_PROGRESS: consts.BACKUP_STATE_FAILED
|
|
}
|
|
|
|
MAX_PARALLEL_SUBCLOUD_BACKUP_CREATE = 250
|
|
MAX_PARALLEL_SUBCLOUD_BACKUP_DELETE = 250
|
|
MAX_PARALLEL_SUBCLOUD_BACKUP_RESTORE = 100
|
|
CENTRAL_BACKUP_DIR = '/opt/dc-vault/backups'
|
|
|
|
ENDPOINT_URLS = {
|
|
dccommon_consts.ENDPOINT_TYPE_PLATFORM: "https://{}:6386/v1",
|
|
dccommon_consts.ENDPOINT_TYPE_IDENTITY: "https://{}:5001/v3",
|
|
dccommon_consts.ENDPOINT_TYPE_PATCHING: "https://{}:5492",
|
|
dccommon_consts.ENDPOINT_TYPE_FM: "https://{}:18003",
|
|
dccommon_consts.ENDPOINT_TYPE_NFV: "https://{}:4546",
|
|
dccommon_consts.ENDPOINT_TYPE_SOFTWARE: "https://{}:5498",
|
|
}
|
|
|
|
|
|
class SubcloudManager(manager.Manager):
|
|
"""Manages tasks related to subclouds."""
|
|
|
|
regionone_data = collections.defaultdict(dict)
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
LOG.debug(_('SubcloudManager initialization...'))
|
|
|
|
super(SubcloudManager, self).__init__(service_name="subcloud_manager",
|
|
*args, **kwargs)
|
|
self.context = dcmanager_context.get_admin_context()
|
|
self.dcorch_rpc_client = dcorch_rpc_client.EngineClient()
|
|
self.fm_api = fm_api.FaultAPIs()
|
|
self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient()
|
|
self.state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()
|
|
|
|
@staticmethod
|
|
def _get_subcloud_cert_name(subcloud_name):
|
|
cert_name = "%s-adminep-ca-certificate" % subcloud_name
|
|
return cert_name
|
|
|
|
@staticmethod
|
|
def _get_subcloud_cert_secret_name(subcloud_name):
|
|
secret_name = "%s-adminep-ca-certificate" % subcloud_name
|
|
return secret_name
|
|
|
|
@staticmethod
|
|
def _create_intermediate_ca_cert(payload):
|
|
subcloud_name = payload["name"]
|
|
cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name)
|
|
secret_name = SubcloudManager._get_subcloud_cert_secret_name(
|
|
subcloud_name)
|
|
|
|
cert = {
|
|
"apiVersion": "%s/%s" % (kubeoperator.CERT_MANAGER_GROUP,
|
|
kubeoperator.CERT_MANAGER_VERSION),
|
|
"kind": "Certificate",
|
|
"metadata": {
|
|
"namespace": CERT_NAMESPACE,
|
|
"name": cert_name
|
|
},
|
|
"spec": {
|
|
"secretName": secret_name,
|
|
"duration": SC_INTERMEDIATE_CERT_DURATION,
|
|
"renewBefore": SC_INTERMEDIATE_CERT_RENEW_BEFORE,
|
|
"issuerRef": {
|
|
"kind": "Issuer",
|
|
"name": "dc-adminep-root-ca-issuer"
|
|
},
|
|
"commonName": cert_name,
|
|
"isCA": True,
|
|
},
|
|
}
|
|
|
|
kube = kubeoperator.KubeOperator()
|
|
kube.apply_cert_manager_certificate(CERT_NAMESPACE, cert_name, cert)
|
|
|
|
for count in range(1, 20):
|
|
secret = kube.kube_get_secret(secret_name, CERT_NAMESPACE)
|
|
if not hasattr(secret, 'data'):
|
|
time.sleep(1)
|
|
LOG.debug('Wait for %s ... %s' % (secret_name, count))
|
|
continue
|
|
|
|
data = secret.data
|
|
if ('ca.crt' not in data or
|
|
'tls.crt' not in data or 'tls.key' not in data) or \
|
|
not (data['ca.crt'] and data['tls.crt'] and data['tls.key']):
|
|
# ca cert, certificate and key pair are needed and must exist
|
|
# for creating an intermediate ca. If not, certificate is not
|
|
# ready yet.
|
|
time.sleep(1)
|
|
LOG.debug('Wait for %s ... %s' % (secret_name, count))
|
|
continue
|
|
|
|
payload['dc_root_ca_cert'] = data['ca.crt']
|
|
payload['sc_ca_cert'] = data['tls.crt']
|
|
payload['sc_ca_key'] = data['tls.key']
|
|
return
|
|
|
|
raise Exception("Secret for certificate %s is not ready." % cert_name)
|
|
|
|
# TODO(kmacleod) switch to using utils.get_ansible_filename
|
|
@staticmethod
|
|
def _get_ansible_filename(subcloud_name, postfix='.yml'):
|
|
ansible_filename = os.path.join(
|
|
dccommon_consts.ANSIBLE_OVERRIDES_PATH,
|
|
subcloud_name + postfix)
|
|
return ansible_filename
|
|
|
|
def compose_install_command(self, subcloud_name,
|
|
ansible_subcloud_inventory_file,
|
|
software_version=None):
|
|
install_command = [
|
|
"ansible-playbook", dccommon_consts.ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK,
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name,
|
|
"-e", "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" +
|
|
subcloud_name + '/' + "install_values.yml",
|
|
"-e", "install_release_version=%s" %
|
|
software_version if software_version else SW_VERSION]
|
|
return install_command
|
|
|
|
def compose_bootstrap_command(self, subcloud_name,
|
|
ansible_subcloud_inventory_file,
|
|
software_version=None):
|
|
bootstrap_command = [
|
|
"ansible-playbook",
|
|
utils.get_playbook_for_software_version(
|
|
ANSIBLE_SUBCLOUD_PLAYBOOK, software_version),
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name
|
|
]
|
|
# Add the overrides dir and region_name so the playbook knows
|
|
# which overrides to load
|
|
bootstrap_command += [
|
|
"-e", str("override_files_dir='%s' region_name=%s") % (
|
|
dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name),
|
|
"-e", "install_release_version=%s" %
|
|
software_version if software_version else SW_VERSION]
|
|
return bootstrap_command
|
|
|
|
def compose_config_command(self, subcloud_name, ansible_subcloud_inventory_file, payload):
|
|
config_command = [
|
|
"ansible-playbook", payload[consts.DEPLOY_PLAYBOOK],
|
|
"-e", "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" +
|
|
subcloud_name + '_deploy_values.yml',
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name
|
|
]
|
|
return config_command
|
|
|
|
def compose_backup_command(self, subcloud_name, ansible_subcloud_inventory_file):
|
|
backup_command = [
|
|
"ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK,
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name,
|
|
"-e", "subcloud_bnr_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" +
|
|
subcloud_name + "_backup_create_values.yml"]
|
|
|
|
return backup_command
|
|
|
|
def compose_backup_delete_command(self, subcloud_name,
|
|
ansible_subcloud_inventory_file):
|
|
backup_command = [
|
|
"ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK,
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name,
|
|
"-e", "subcloud_bnr_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" +
|
|
subcloud_name + "_backup_delete_values.yml"]
|
|
|
|
return backup_command
|
|
|
|
def compose_backup_restore_command(self, subcloud_name, ansible_subcloud_inventory_file):
|
|
backup_command = [
|
|
"ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_RESTORE_PLAYBOOK,
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name,
|
|
"-e", "subcloud_bnr_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" +
|
|
subcloud_name + "_backup_restore_values.yml"]
|
|
|
|
return backup_command
|
|
|
|
def compose_update_command(self, subcloud_name, ansible_subcloud_inventory_file):
|
|
subcloud_update_command = [
|
|
"ansible-playbook", ANSIBLE_SUBCLOUD_UPDATE_PLAYBOOK,
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name,
|
|
"--timeout", UPDATE_PLAYBOOK_TIMEOUT,
|
|
"-e", "subcloud_update_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" +
|
|
subcloud_name + "_update_values.yml"]
|
|
return subcloud_update_command
|
|
|
|
def compose_rehome_command(self, subcloud_name,
|
|
ansible_subcloud_inventory_file,
|
|
software_version):
|
|
rehome_command = [
|
|
"ansible-playbook",
|
|
utils.get_playbook_for_software_version(
|
|
ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK, software_version),
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name,
|
|
"--timeout", REHOME_PLAYBOOK_TIMEOUT,
|
|
"-e", str("override_files_dir='%s' region_name=%s") % (
|
|
dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name)]
|
|
return rehome_command
|
|
|
|
def rehome_subcloud(self, context, subcloud, payload):
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
rehome_command = self.compose_rehome_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
subcloud.software_version)
|
|
|
|
self.run_deploy_thread(subcloud, payload, context,
|
|
rehome_command=rehome_command)
|
|
|
|
def add_subcloud(self, context, subcloud_id, payload):
|
|
"""Add subcloud and notify orchestrators.
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: id of the subcloud
|
|
:param payload: subcloud configuration
|
|
"""
|
|
LOG.info(f"Adding subcloud {payload['name']}.")
|
|
|
|
rehoming = payload.get('migrate', '').lower() == "true"
|
|
payload['ansible_ssh_pass'] = payload['sysadmin_password']
|
|
|
|
# Create the subcloud
|
|
subcloud = self.subcloud_deploy_create(context, subcloud_id,
|
|
payload, rehoming,
|
|
return_as_dict=False)
|
|
|
|
# Return if create failed
|
|
if rehoming:
|
|
success_state = consts.DEPLOY_STATE_PRE_REHOME
|
|
else:
|
|
success_state = consts.DEPLOY_STATE_CREATED
|
|
if subcloud.deploy_status != success_state:
|
|
return
|
|
|
|
# Rehome subcloud
|
|
if rehoming:
|
|
self.rehome_subcloud(context, subcloud, payload)
|
|
return
|
|
|
|
# Define which deploy phases should be run
|
|
phases_to_run = []
|
|
if consts.INSTALL_VALUES in payload:
|
|
phases_to_run.append(consts.DEPLOY_PHASE_INSTALL)
|
|
phases_to_run.append(consts.DEPLOY_PHASE_BOOTSTRAP)
|
|
if consts.DEPLOY_CONFIG in payload:
|
|
phases_to_run.append(consts.DEPLOY_PHASE_CONFIG)
|
|
|
|
# Finish adding the subcloud by running the deploy phases
|
|
succeeded = self.run_deploy_phases(
|
|
context, subcloud_id, payload, phases_to_run)
|
|
|
|
if succeeded:
|
|
subcloud = db_api.subcloud_update(
|
|
context, subcloud_id, deploy_status=consts.DEPLOY_STATE_DONE)
|
|
|
|
LOG.info(f"Finished adding subcloud {subcloud['name']}.")
|
|
|
|
def reconfigure_subcloud(self, context, subcloud_id, payload):
|
|
"""Reconfigure subcloud
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: id of the subcloud
|
|
:param payload: subcloud configuration
|
|
"""
|
|
LOG.info("Reconfiguring subcloud %s." % subcloud_id)
|
|
|
|
subcloud = db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_DEPLOY)
|
|
try:
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
config_command = None
|
|
if "deploy_playbook" in payload:
|
|
self._prepare_for_deployment(payload, subcloud.name)
|
|
config_command = self.compose_config_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload)
|
|
|
|
del payload['sysadmin_password']
|
|
apply_thread = threading.Thread(
|
|
target=self.run_deploy_thread,
|
|
args=(subcloud, payload, context, None, None, config_command))
|
|
apply_thread.start()
|
|
return db_api.subcloud_db_model_to_dict(subcloud)
|
|
except Exception:
|
|
LOG.exception("Failed to create subcloud %s" % subcloud.name)
|
|
# If we failed to create the subcloud, update the
|
|
# deployment status
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED)
|
|
|
|
def reinstall_subcloud(self, context, subcloud_id, payload):
|
|
"""Reinstall subcloud
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud id from db
|
|
:param payload: subcloud reinstall
|
|
"""
|
|
|
|
# Retrieve the subcloud details from the database
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
|
|
LOG.info("Reinstalling subcloud %s." % subcloud_id)
|
|
|
|
try:
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
m_ks_client = OpenStackDriver(
|
|
region_name=dccommon_consts.DEFAULT_REGION_NAME,
|
|
region_clients=None).keystone_client
|
|
cached_regionone_data = self._get_cached_regionone_data(m_ks_client)
|
|
self._populate_payload_with_cached_keystone_data(
|
|
cached_regionone_data, payload)
|
|
|
|
payload['install_values']['ansible_ssh_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['install_values']['ansible_become_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['bootstrap-address'] = \
|
|
payload['install_values']['bootstrap_address']
|
|
|
|
config_command = None
|
|
if "deploy_playbook" in payload:
|
|
self._prepare_for_deployment(payload, subcloud.name)
|
|
config_command = self.compose_config_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload)
|
|
del payload['sysadmin_password']
|
|
|
|
payload['users'] = dict()
|
|
for user in USERS_TO_REPLICATE:
|
|
payload['users'][user] = \
|
|
str(keyring.get_password(
|
|
user, dccommon_consts.SERVICES_USER_NAME))
|
|
|
|
utils.create_subcloud_inventory(payload,
|
|
ansible_subcloud_inventory_file)
|
|
|
|
self._create_intermediate_ca_cert(payload)
|
|
|
|
self._write_subcloud_ansible_config(cached_regionone_data, payload)
|
|
|
|
install_command = self.compose_install_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload['software_version'])
|
|
bootstrap_command = self.compose_bootstrap_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload['software_version'])
|
|
network_reconfig = utils.has_network_reconfig(payload, subcloud)
|
|
apply_thread = threading.Thread(
|
|
target=self.run_deploy_thread,
|
|
args=(subcloud, payload, context,
|
|
install_command, bootstrap_command, config_command,
|
|
None, network_reconfig))
|
|
apply_thread.start()
|
|
return db_api.subcloud_db_model_to_dict(subcloud)
|
|
except Exception:
|
|
LOG.exception("Failed to reinstall subcloud %s" % subcloud.name)
|
|
# If we failed to reinstall the subcloud, update the
|
|
# deployment status
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED)
|
|
|
|
def redeploy_subcloud(self, context, subcloud_id, payload):
|
|
"""Redeploy subcloud
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud id from db
|
|
:param payload: subcloud redeploy
|
|
"""
|
|
|
|
# Retrieve the subcloud details from the database
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
|
|
LOG.info("Redeploying subcloud %s." % subcloud.name)
|
|
|
|
# Define which deploy phases to run
|
|
phases_to_run = [consts.DEPLOY_PHASE_INSTALL,
|
|
consts.DEPLOY_PHASE_BOOTSTRAP]
|
|
if consts.DEPLOY_CONFIG in payload:
|
|
phases_to_run.append(consts.DEPLOY_PHASE_CONFIG)
|
|
|
|
succeeded = self.run_deploy_phases(context, subcloud_id, payload,
|
|
phases_to_run)
|
|
|
|
if succeeded:
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DONE,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
LOG.info(f"Finished redeploying subcloud {subcloud['name']}.")
|
|
|
|
def create_subcloud_backups(self, context, payload):
|
|
"""Backup subcloud or group of subclouds
|
|
|
|
:param context: request context object
|
|
:param payload: subcloud backup create detail
|
|
"""
|
|
|
|
subcloud_id = payload.get('subcloud')
|
|
group_id = payload.get('group')
|
|
|
|
# Retrieve either a single subcloud or all subclouds in a group
|
|
subclouds = [db_api.subcloud_get(context, subcloud_id)] if subcloud_id\
|
|
else db_api.subcloud_get_for_group(context, group_id)
|
|
|
|
self._filter_subclouds_with_ongoing_backup(subclouds)
|
|
self._update_backup_status(context, subclouds,
|
|
consts.BACKUP_STATE_INITIAL)
|
|
|
|
# Validate the subclouds and filter the ones applicable for backup
|
|
self._update_backup_status(context, subclouds,
|
|
consts.BACKUP_STATE_VALIDATING)
|
|
|
|
subclouds_to_backup, invalid_subclouds = \
|
|
self._validate_subclouds_for_backup(subclouds, 'create')
|
|
|
|
self._mark_invalid_subclouds_for_backup(context, invalid_subclouds)
|
|
|
|
# Use thread pool to limit number of operations in parallel
|
|
backup_pool = greenpool.GreenPool(size=MAX_PARALLEL_SUBCLOUD_BACKUP_CREATE)
|
|
|
|
# Spawn threads to back up each applicable subcloud
|
|
backup_function = functools.partial(self._backup_subcloud, context,
|
|
payload)
|
|
|
|
self._run_parallel_group_operation('backup create',
|
|
backup_function,
|
|
backup_pool,
|
|
subclouds_to_backup)
|
|
|
|
LOG.info("Subcloud backup operation finished")
|
|
|
|
def delete_subcloud_backups(self, context, release_version, payload):
|
|
"""Delete backups for subcloud or group of subclouds for a given release
|
|
|
|
:param context: request context object
|
|
:param release_version Backup release version to be deleted
|
|
:param payload: subcloud backup delete detail
|
|
"""
|
|
|
|
local_delete = payload.get('local_only')
|
|
|
|
subclouds_to_delete_backup, invalid_subclouds = \
|
|
self._filter_subclouds_for_backup_delete(context, payload, local_delete)
|
|
|
|
# Spawn threads to back up each applicable subcloud
|
|
backup_delete_function = functools.partial(
|
|
self._delete_subcloud_backup, context, payload, release_version)
|
|
|
|
# Use thread pool to limit number of operations in parallel
|
|
max_parallel_operations = MAX_PARALLEL_SUBCLOUD_BACKUP_DELETE
|
|
backup_delete_pool = greenpool.GreenPool(size=max_parallel_operations)
|
|
|
|
failed_subclouds = self._run_parallel_group_operation(
|
|
'backup delete', backup_delete_function, backup_delete_pool,
|
|
subclouds_to_delete_backup)
|
|
|
|
LOG.info("Subcloud backup delete operation finished")
|
|
|
|
return self._subcloud_operation_notice('delete', subclouds_to_delete_backup,
|
|
failed_subclouds, invalid_subclouds)
|
|
|
|
def restore_subcloud_backups(self, context, payload):
|
|
"""Restore a subcloud or group of subclouds from backup data
|
|
|
|
:param context: request context object
|
|
:param payload: restore backup subcloud detail
|
|
"""
|
|
|
|
subcloud_id = payload.get('subcloud')
|
|
group_id = payload.get('group')
|
|
|
|
# Initialize subclouds lists
|
|
restore_subclouds, invalid_subclouds, failed_subclouds = (
|
|
list(), list(), list())
|
|
|
|
# Retrieve either a single subcloud or all subclouds in a group
|
|
subclouds = (
|
|
[db_api.subcloud_get(context, subcloud_id)] if subcloud_id
|
|
else db_api.subcloud_get_for_group(context, group_id)
|
|
)
|
|
|
|
restore_subclouds, invalid_subclouds = (
|
|
self._validate_subclouds_for_backup(subclouds, 'restore')
|
|
)
|
|
|
|
if restore_subclouds:
|
|
# Use thread pool to limit number of operations in parallel
|
|
restore_pool = greenpool.GreenPool(
|
|
size=MAX_PARALLEL_SUBCLOUD_BACKUP_RESTORE)
|
|
|
|
# Spawn threads to back up each applicable subcloud
|
|
restore_function = functools.partial(
|
|
self._restore_subcloud_backup, context, payload)
|
|
|
|
failed_subclouds = self._run_parallel_group_operation(
|
|
'backup restore', restore_function,
|
|
restore_pool, restore_subclouds
|
|
)
|
|
|
|
restored_subclouds = len(restore_subclouds) - len(failed_subclouds)
|
|
LOG.info("Subcloud restore backup operation finished.\n"
|
|
"Restored subclouds: %s. Invalid subclouds: %s. "
|
|
"Failed subclouds: %s." % (restored_subclouds,
|
|
len(invalid_subclouds),
|
|
len(failed_subclouds)))
|
|
|
|
return self._subcloud_operation_notice('restore', restore_subclouds,
|
|
failed_subclouds, invalid_subclouds)
|
|
|
|
def _deploy_bootstrap_prep(self, context, subcloud, payload: dict,
|
|
ansible_subcloud_inventory_file):
|
|
"""Run the preparation steps needed to run the bootstrap operation
|
|
|
|
:param context: target request context object
|
|
:param subcloud: subcloud model object
|
|
:param payload: bootstrap request parameters
|
|
:param ansible_subcloud_inventory_file: the ansible inventory file path
|
|
:return: ansible command needed to run the bootstrap playbook
|
|
"""
|
|
network_reconfig = utils.has_network_reconfig(payload, subcloud)
|
|
if network_reconfig:
|
|
self._configure_system_controller_network(context, payload, subcloud,
|
|
update_db=False)
|
|
# Regenerate the addn_hosts_dc file
|
|
self._create_addn_hosts_dc(context)
|
|
|
|
# Update subcloud
|
|
subcloud = db_api.subcloud_update(
|
|
context,
|
|
subcloud.id,
|
|
description=payload.get("description"),
|
|
management_subnet=utils.get_management_subnet(payload),
|
|
management_gateway_ip=utils.get_management_gateway_address(
|
|
payload),
|
|
management_start_ip=utils.get_management_start_address(
|
|
payload),
|
|
management_end_ip=utils.get_management_end_address(payload),
|
|
systemcontroller_gateway_ip=payload.get(
|
|
"systemcontroller_gateway_address"),
|
|
location=payload.get("location"),
|
|
deploy_status=consts.DEPLOY_STATE_PRE_BOOTSTRAP)
|
|
|
|
# Populate payload with passwords
|
|
payload['ansible_become_pass'] = payload['sysadmin_password']
|
|
payload['ansible_ssh_pass'] = payload['sysadmin_password']
|
|
payload['admin_password'] = str(keyring.get_password('CGCS', 'admin'))
|
|
payload_without_sysadmin_password = payload.copy()
|
|
if 'sysadmin_password' in payload_without_sysadmin_password:
|
|
del payload_without_sysadmin_password['sysadmin_password']
|
|
|
|
# Update the ansible overrides file
|
|
overrides_file = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH,
|
|
subcloud.name + '.yml')
|
|
utils.update_values_on_yaml_file(overrides_file,
|
|
payload_without_sysadmin_password)
|
|
|
|
# Update the ansible inventory for the subcloud
|
|
utils.create_subcloud_inventory(payload,
|
|
ansible_subcloud_inventory_file)
|
|
|
|
bootstrap_command = self.compose_bootstrap_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
subcloud.software_version)
|
|
return bootstrap_command
|
|
|
|
def _deploy_config_prep(self, subcloud, payload: dict,
|
|
ansible_subcloud_inventory_file):
|
|
"""Run the preparation steps needed to run the config operation
|
|
|
|
:param subcloud: target subcloud model object
|
|
:param payload: config request parameters
|
|
:param ansible_subcloud_inventory_file: the ansible inventory file path
|
|
:return: ansible command needed to run the config playbook
|
|
"""
|
|
self._prepare_for_deployment(payload, subcloud.name)
|
|
config_command = self.compose_config_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload)
|
|
return config_command
|
|
|
|
def _deploy_install_prep(self, subcloud, payload: dict,
|
|
ansible_subcloud_inventory_file):
|
|
"""Run the preparation steps needed to run the install operation
|
|
|
|
:param subcloud: target subcloud model object
|
|
:param payload: install request parameters
|
|
:param ansible_subcloud_inventory_file: the ansible inventory file path
|
|
:return: ansible command needed to run the install playbook
|
|
"""
|
|
payload['install_values']['ansible_ssh_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['install_values']['ansible_become_pass'] = \
|
|
payload['sysadmin_password']
|
|
|
|
# If all update_values already exists on override file or are
|
|
# the same as the existing ones, the update won't happen
|
|
# and the file will remain untouched
|
|
bootstrap_file = psd_common.get_config_file_path(subcloud.name,
|
|
consts.BOOTSTRAP_VALUES)
|
|
update_values = {'software_version': payload['software_version'],
|
|
'bmc_password': payload['bmc_password'],
|
|
'ansible_ssh_pass': payload['sysadmin_password'],
|
|
'ansible_become_pass': payload['sysadmin_password']
|
|
}
|
|
utils.update_values_on_yaml_file(bootstrap_file,
|
|
update_values)
|
|
|
|
install_command = self.compose_install_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload['software_version'])
|
|
|
|
return install_command
|
|
|
|
def subcloud_deploy_abort(self, context, subcloud_id, deploy_status):
|
|
"""Abort the subcloud deploy
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud id from db
|
|
:param deploy_status: subcloud deploy status from db
|
|
"""
|
|
|
|
LOG.info("Aborting deployment of subcloud %s." % subcloud_id)
|
|
subcloud = utils.update_abort_status(context, subcloud_id, deploy_status)
|
|
|
|
try:
|
|
run_ansible = RunAnsible()
|
|
aborted = run_ansible.run_abort(subcloud.name)
|
|
if not aborted:
|
|
LOG.warning("Ansible deploy phase subprocess of %s "
|
|
"was terminated before it could be aborted"
|
|
% subcloud.name)
|
|
# let the main phase thread handle the state update
|
|
return
|
|
|
|
if subcloud.deploy_status == consts.DEPLOY_STATE_ABORTING_INSTALL:
|
|
# First delete the k8s job and pod, stopping the current
|
|
# installation attempt if exists
|
|
# Then send shutdown signal to subcloud
|
|
kube = kubeoperator.KubeOperator()
|
|
shutdown_subcloud = SubcloudShutdown(subcloud.name)
|
|
namespace = dccommon_consts.RVMC_NAME_PREFIX
|
|
jobname = '%s-%s' % (namespace, subcloud.name)
|
|
pod_basename = '%s-' % jobname
|
|
all_pods = kube.get_pods_by_namespace(namespace)
|
|
desired_pod = next((s for s in all_pods if pod_basename in s), None)
|
|
if desired_pod:
|
|
kube.kube_delete_job(jobname, namespace)
|
|
kube.kube_delete_pod(desired_pod, namespace)
|
|
while kube.pod_exists(desired_pod, namespace):
|
|
time.sleep(2)
|
|
shutdown_subcloud.send_shutdown_signal()
|
|
except Exception as ex:
|
|
LOG.error("Subcloud deploy abort failed for subcloud %s" % subcloud.name)
|
|
utils.update_abort_status(context, subcloud.id, subcloud.deploy_status,
|
|
abort_failed=True)
|
|
# exception is logged above
|
|
raise ex
|
|
LOG.info("Successfully aborted deployment of %s" % subcloud.name)
|
|
utils.update_abort_status(context, subcloud.id, subcloud.deploy_status)
|
|
|
|
def subcloud_deploy_resume(self, context, subcloud_id, subcloud_name,
|
|
payload: dict, deploy_states_to_run):
|
|
"""Resume the subcloud deployment
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud id from db
|
|
:param subcloud_name: name of the subcloud
|
|
:param payload: subcloud resume payload
|
|
:param deploy_states_to_run: deploy phases pending execution
|
|
"""
|
|
LOG.info("Resuming deployment of subcloud %s. Deploy phases to be executed: %s"
|
|
% (subcloud_name, ', '.join(deploy_states_to_run)))
|
|
|
|
self.run_deploy_phases(context, subcloud_id, payload,
|
|
deploy_states_to_run)
|
|
|
|
def subcloud_deploy_create(self, context, subcloud_id, payload,
|
|
rehoming=False, return_as_dict=True):
|
|
"""Create subcloud and notify orchestrators.
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud_id from db
|
|
:param payload: subcloud configuration
|
|
:param rehoming: flag indicating if this is part of a rehoming operation
|
|
:param return_as_dict: converts the subcloud DB object to a dict before returning
|
|
:return: resulting subcloud DB object or dictionary
|
|
"""
|
|
LOG.info("Creating subcloud %s." % payload['name'])
|
|
|
|
if rehoming:
|
|
deploy_state = consts.DEPLOY_STATE_PRE_REHOME
|
|
else:
|
|
deploy_state = consts.DEPLOY_STATE_CREATING
|
|
|
|
subcloud = db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=deploy_state)
|
|
|
|
try:
|
|
# Create a new route to this subcloud on the management interface
|
|
# on both controllers.
|
|
m_ks_client = OpenStackDriver(
|
|
region_name=dccommon_consts.DEFAULT_REGION_NAME,
|
|
region_clients=None).keystone_client
|
|
subcloud_subnet = netaddr.IPNetwork(
|
|
utils.get_management_subnet(payload))
|
|
endpoint = m_ks_client.endpoint_cache.get_endpoint('sysinv')
|
|
sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME,
|
|
m_ks_client.session,
|
|
endpoint=endpoint)
|
|
LOG.debug("Getting cached regionone data for %s" % subcloud.name)
|
|
cached_regionone_data = self._get_cached_regionone_data(
|
|
m_ks_client, sysinv_client)
|
|
for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']:
|
|
sysinv_client.create_route(
|
|
mgmt_if_uuid,
|
|
str(subcloud_subnet.ip),
|
|
subcloud_subnet.prefixlen,
|
|
payload['systemcontroller_gateway_address'],
|
|
1)
|
|
|
|
# Create endpoints to this subcloud on the
|
|
# management-start-ip of the subcloud which will be allocated
|
|
# as the floating Management IP of the Subcloud if the
|
|
# Address Pool is not shared. Incase the endpoint entries
|
|
# are incorrect, or the management IP of the subcloud is changed
|
|
# in the future, it will not go managed or will show up as
|
|
# out of sync. To fix this use Openstack endpoint commands
|
|
# on the SystemController to change the subcloud endpoints.
|
|
# The non-identity endpoints are added to facilitate horizon access
|
|
# from the System Controller to the subcloud.
|
|
endpoint_config = []
|
|
endpoint_ip = utils.get_management_start_address(payload)
|
|
if netaddr.IPAddress(endpoint_ip).version == 6:
|
|
endpoint_ip = '[' + endpoint_ip + ']'
|
|
|
|
for service in m_ks_client.services_list:
|
|
admin_endpoint_url = ENDPOINT_URLS.get(service.type, None)
|
|
if admin_endpoint_url:
|
|
admin_endpoint_url = admin_endpoint_url.format(endpoint_ip)
|
|
endpoint_config.append(
|
|
{"id": service.id,
|
|
"admin_endpoint_url": admin_endpoint_url})
|
|
|
|
if len(endpoint_config) < len(ENDPOINT_URLS):
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Missing service in SystemController')
|
|
|
|
for endpoint in endpoint_config:
|
|
try:
|
|
m_ks_client.keystone_client.endpoints.create(
|
|
endpoint["id"],
|
|
endpoint['admin_endpoint_url'],
|
|
interface=dccommon_consts.KS_ENDPOINT_ADMIN,
|
|
region=subcloud.name)
|
|
except Exception as e:
|
|
# Keystone service must be temporarily busy, retry
|
|
LOG.error(str(e))
|
|
m_ks_client.keystone_client.endpoints.create(
|
|
endpoint["id"],
|
|
endpoint['admin_endpoint_url'],
|
|
interface=dccommon_consts.KS_ENDPOINT_ADMIN,
|
|
region=subcloud.name)
|
|
|
|
# Inform orchestrator that subcloud has been added
|
|
self.dcorch_rpc_client.add_subcloud(
|
|
context, subcloud.name, subcloud.software_version)
|
|
|
|
# create entry into alarm summary table, will get real values later
|
|
alarm_updates = {'critical_alarms': -1,
|
|
'major_alarms': -1,
|
|
'minor_alarms': -1,
|
|
'warnings': -1,
|
|
'cloud_status': consts.ALARMS_DISABLED}
|
|
db_api.subcloud_alarms_create(context, subcloud.name,
|
|
alarm_updates)
|
|
|
|
# Regenerate the addn_hosts_dc file
|
|
self._create_addn_hosts_dc(context)
|
|
|
|
self._populate_payload_with_cached_keystone_data(
|
|
cached_regionone_data, payload, populate_passwords=False)
|
|
|
|
if "deploy_playbook" in payload:
|
|
self._prepare_for_deployment(payload, subcloud.name,
|
|
populate_passwords=False)
|
|
|
|
payload['users'] = {}
|
|
for user in USERS_TO_REPLICATE:
|
|
payload['users'][user] = \
|
|
str(keyring.get_password(
|
|
user, dccommon_consts.SERVICES_USER_NAME))
|
|
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = utils.get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
# Create the ansible inventory for the new subcloud
|
|
utils.create_subcloud_inventory(payload,
|
|
ansible_subcloud_inventory_file)
|
|
|
|
# create subcloud intermediate certificate and pass in keys
|
|
self._create_intermediate_ca_cert(payload)
|
|
|
|
# Write this subclouds overrides to file
|
|
# NOTE: This file should not be deleted if subcloud add fails
|
|
# as it is used for debugging
|
|
self._write_subcloud_ansible_config(cached_regionone_data, payload)
|
|
|
|
if not rehoming:
|
|
deploy_state = consts.DEPLOY_STATE_CREATED
|
|
|
|
except Exception:
|
|
LOG.exception("Failed to create subcloud %s" % payload['name'])
|
|
# If we failed to create the subcloud, update the deployment status
|
|
|
|
if rehoming:
|
|
deploy_state = consts.DEPLOY_STATE_REHOME_PREP_FAILED
|
|
else:
|
|
deploy_state = consts.DEPLOY_STATE_CREATE_FAILED
|
|
|
|
subcloud = db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=deploy_state)
|
|
|
|
# The RPC call must return the subcloud as a dictionary, otherwise it
|
|
# should return the DB object for dcmanager internal use (subcloud add)
|
|
if return_as_dict:
|
|
subcloud = db_api.subcloud_db_model_to_dict(subcloud)
|
|
|
|
return subcloud
|
|
|
|
def subcloud_deploy_install(self, context, subcloud_id, payload: dict) -> bool:
|
|
"""Install subcloud
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud id from db
|
|
:param payload: subcloud Install
|
|
:return: success status
|
|
"""
|
|
|
|
# Retrieve the subcloud details from the database
|
|
subcloud = db_api.subcloud_update(
|
|
context,
|
|
subcloud_id,
|
|
software_version=payload['software_version'],
|
|
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL,
|
|
data_install=json.dumps(payload['install_values']))
|
|
|
|
LOG.info("Installing subcloud %s." % subcloud.name)
|
|
|
|
try:
|
|
log_file = (
|
|
os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name)
|
|
+ "_playbook_output.log"
|
|
)
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
install_command = self._deploy_install_prep(
|
|
subcloud, payload, ansible_subcloud_inventory_file)
|
|
install_success = self._run_subcloud_install(
|
|
context, subcloud, install_command,
|
|
log_file, payload['install_values'],
|
|
abortable=True)
|
|
if install_success:
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_INSTALLED,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
return install_success
|
|
|
|
except Exception:
|
|
LOG.exception("Failed to install subcloud %s" % subcloud.name)
|
|
# If we failed to install the subcloud,
|
|
# update the deployment status
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED)
|
|
return False
|
|
|
|
def subcloud_deploy_bootstrap(self, context, subcloud_id, payload):
|
|
"""Bootstrap subcloud
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud_id from db
|
|
:param payload: subcloud bootstrap configuration
|
|
:return: success status
|
|
"""
|
|
LOG.info("Bootstrapping subcloud %s." % payload['name'])
|
|
|
|
# Retrieve the subcloud details from the database
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
|
|
try:
|
|
log_file = (
|
|
os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name)
|
|
+ "_playbook_output.log"
|
|
)
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
bootstrap_command = self._deploy_bootstrap_prep(
|
|
context, subcloud, payload,
|
|
ansible_subcloud_inventory_file)
|
|
bootstrap_success = self._run_subcloud_bootstrap(
|
|
context, subcloud, bootstrap_command, log_file)
|
|
return bootstrap_success
|
|
|
|
except Exception:
|
|
LOG.exception("Failed to bootstrap subcloud %s" % payload['name'])
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED)
|
|
return False
|
|
|
|
def subcloud_deploy_config(self, context, subcloud_id, payload: dict) -> bool:
|
|
"""Configure subcloud
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud_id from db
|
|
:param payload: subcloud configuration
|
|
:return: success status
|
|
"""
|
|
LOG.info("Configuring subcloud %s." % subcloud_id)
|
|
|
|
subcloud = db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_CONFIG)
|
|
try:
|
|
log_file = (
|
|
os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name)
|
|
+ "_playbook_output.log"
|
|
)
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
self._prepare_for_deployment(payload, subcloud.name)
|
|
config_command = self.compose_config_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload)
|
|
|
|
config_success = self._run_subcloud_config(subcloud, context,
|
|
config_command, log_file)
|
|
return config_success
|
|
|
|
except Exception:
|
|
LOG.exception("Failed to configure %s" % subcloud.name)
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_CONFIG_FAILED)
|
|
return False
|
|
|
|
def subcloud_deploy_complete(self, context, subcloud_id):
|
|
"""Completes the subcloud deployment.
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud_id from db
|
|
:return: resulting subcloud dictionary
|
|
"""
|
|
LOG.info("Completing subcloud %s deployment." % subcloud_id)
|
|
|
|
# Just update the deploy status
|
|
subcloud = db_api.subcloud_update(context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_DONE)
|
|
|
|
LOG.info("Subcloud %s deploy status set to: %s"
|
|
% (subcloud_id, consts.DEPLOY_STATE_DONE))
|
|
|
|
return db_api.subcloud_db_model_to_dict(subcloud)
|
|
|
|
def _subcloud_operation_notice(
|
|
self, operation, restore_subclouds, failed_subclouds,
|
|
invalid_subclouds):
|
|
all_failed = ((not set(restore_subclouds) - set(failed_subclouds))
|
|
and not invalid_subclouds)
|
|
if all_failed:
|
|
LOG.error("Backup %s failed for all applied subclouds" % operation)
|
|
raise exceptions.SubcloudBackupOperationFailed(operation=operation)
|
|
|
|
if invalid_subclouds:
|
|
self._warn_for_invalid_subclouds_on_backup_operation(invalid_subclouds)
|
|
if failed_subclouds:
|
|
self._warn_for_failed_subclouds_on_backup_operation(operation,
|
|
failed_subclouds)
|
|
|
|
if invalid_subclouds or failed_subclouds:
|
|
return self._build_subcloud_operation_notice(operation,
|
|
failed_subclouds,
|
|
invalid_subclouds)
|
|
return
|
|
|
|
def _filter_subclouds_with_ongoing_backup(self, subclouds):
|
|
i = 0
|
|
while i < len(subclouds):
|
|
subcloud = subclouds[i]
|
|
if subcloud.backup_status in consts.STATES_FOR_ONGOING_BACKUP:
|
|
LOG.info(_('Subcloud %s already has a backup operation in '
|
|
'progress' % subcloud.name))
|
|
subclouds.pop(i)
|
|
else:
|
|
i += 1
|
|
|
|
def _validate_subclouds_for_backup(self, subclouds, operation):
|
|
valid_subclouds = []
|
|
invalid_subclouds = []
|
|
for subcloud in subclouds:
|
|
is_valid = False
|
|
try:
|
|
if utils.is_valid_for_backup_operation(operation, subcloud):
|
|
is_valid = True
|
|
|
|
except exceptions.ValidateFail:
|
|
is_valid = False
|
|
|
|
if is_valid:
|
|
valid_subclouds.append(subcloud)
|
|
else:
|
|
invalid_subclouds.append(subcloud)
|
|
|
|
return valid_subclouds, invalid_subclouds
|
|
|
|
@staticmethod
|
|
def _mark_invalid_subclouds_for_backup(context, invalid_subclouds):
|
|
try:
|
|
invalid_ids = {subcloud.id for subcloud in invalid_subclouds}
|
|
invalid_names = {subcloud.name for subcloud in invalid_subclouds}
|
|
|
|
if invalid_ids:
|
|
# Set state on subclouds that failed validation
|
|
LOG.warn('The following subclouds are not online and/or managed '
|
|
'and/or in a valid deploy state, and will not be backed '
|
|
'up: %s', ', '.join(list(invalid_names)))
|
|
SubcloudManager._update_backup_status_by_ids(
|
|
context, invalid_ids,
|
|
consts.BACKUP_STATE_VALIDATE_FAILED)
|
|
|
|
except DCManagerException as ex:
|
|
LOG.exception("Subcloud backup validation failed")
|
|
raise ex
|
|
|
|
@staticmethod
|
|
def _warn_for_invalid_subclouds_on_backup_operation(invalid_subclouds):
|
|
invalid_names = {subcloud.name for subcloud in invalid_subclouds}
|
|
LOG.warn('The following subclouds were not online and/or in a valid '
|
|
'deploy/management state, and thus were not be reached '
|
|
'for backup operation: %s', ', '.join(list(invalid_names)))
|
|
|
|
@staticmethod
|
|
def _warn_for_failed_subclouds_on_backup_operation(operation, failed_subclouds):
|
|
failed_names = {subcloud.name for subcloud in failed_subclouds}
|
|
LOG.warn('Backup %s operation failed for some subclouds, '
|
|
'check previous logs for details. Failed subclouds: %s' %
|
|
(operation, ', '.join(list(failed_names))))
|
|
|
|
@staticmethod
|
|
def _update_backup_status(context, subclouds, backup_status):
|
|
subcloud_ids = [subcloud.id for subcloud in subclouds]
|
|
return SubcloudManager.\
|
|
_update_backup_status_by_ids(context, subcloud_ids,
|
|
backup_status)
|
|
|
|
@staticmethod
|
|
def _update_backup_status_by_ids(context, subcloud_ids, backup_status):
|
|
validate_state_form = {
|
|
Subcloud.backup_status.name: backup_status
|
|
}
|
|
db_api.subcloud_bulk_update_by_ids(context, subcloud_ids,
|
|
validate_state_form)
|
|
|
|
@staticmethod
|
|
def _run_parallel_group_operation(op_type, op_function, thread_pool, subclouds):
|
|
failed_subclouds = []
|
|
processed = 0
|
|
|
|
for subcloud, success in thread_pool.imap(op_function, subclouds):
|
|
processed += 1
|
|
|
|
if not success:
|
|
failed_subclouds.append(subcloud)
|
|
|
|
completion = float(processed) / float(len(subclouds)) * 100
|
|
remaining = len(subclouds) - processed
|
|
LOG.info("Processed subcloud %s for %s (operation %.0f%% "
|
|
"complete, %d subcloud(s) remaining)" %
|
|
(subcloud.name, op_type, completion, remaining))
|
|
|
|
return failed_subclouds
|
|
|
|
def _backup_subcloud(self, context, payload, subcloud):
|
|
try:
|
|
# Health check validation
|
|
if not utils.is_subcloud_healthy(subcloud.name):
|
|
db_api.subcloud_update(
|
|
context,
|
|
subcloud.id,
|
|
backup_status=consts.BACKUP_STATE_VALIDATE_FAILED,
|
|
)
|
|
LOG.info(
|
|
("Subcloud %s is not in good health for subcloud-backup create")
|
|
% subcloud.name
|
|
)
|
|
return subcloud, False
|
|
|
|
db_api.subcloud_update(
|
|
context,
|
|
subcloud.id,
|
|
backup_status=consts.BACKUP_STATE_PRE_BACKUP,
|
|
)
|
|
|
|
subcloud_inventory_file = self._create_subcloud_inventory_file(subcloud)
|
|
|
|
# Prepare for backup
|
|
overrides_file = self._create_overrides_for_backup_or_restore(
|
|
'create', payload, subcloud.name
|
|
)
|
|
backup_command = self.compose_backup_command(
|
|
subcloud.name, subcloud_inventory_file)
|
|
|
|
self._clear_subcloud_backup_failure_alarm_if_exists(subcloud)
|
|
except Exception:
|
|
self._fail_subcloud_backup_prep(context, subcloud)
|
|
return subcloud, False
|
|
|
|
local_only = payload.get('local_only') or False
|
|
success = self._run_subcloud_backup_create_playbook(
|
|
subcloud, backup_command, context, local_only)
|
|
|
|
if success:
|
|
utils.delete_subcloud_inventory(overrides_file)
|
|
|
|
return subcloud, success
|
|
|
|
def _filter_subclouds_for_backup_delete(self, context, payload, local_delete):
|
|
subcloud_id = payload.get('subcloud')
|
|
group_id = payload.get('group')
|
|
|
|
# Retrieve either a single subcloud or all subclouds in a group
|
|
subclouds = [db_api.subcloud_get(context, subcloud_id)] if subcloud_id \
|
|
else db_api.subcloud_get_for_group(context, group_id)
|
|
invalid_subclouds = []
|
|
|
|
# Subcloud state validation only required for local delete
|
|
if local_delete:
|
|
# Use same criteria defined for subcloud backup create
|
|
subclouds_to_delete_backup, invalid_subclouds = \
|
|
self._validate_subclouds_for_backup(subclouds, 'delete')
|
|
else:
|
|
# Otherwise, validation is unnecessary, since connection is not required
|
|
subclouds_to_delete_backup = subclouds
|
|
|
|
return subclouds_to_delete_backup, invalid_subclouds
|
|
|
|
def _delete_subcloud_backup(self, context, payload, release_version, subcloud):
|
|
try:
|
|
overrides_file = self._create_overrides_for_backup_delete(
|
|
payload, subcloud.name, release_version
|
|
)
|
|
inventory_file = self._create_subcloud_inventory_file(subcloud)
|
|
delete_command = self.compose_backup_delete_command(
|
|
subcloud.name, inventory_file)
|
|
|
|
except Exception:
|
|
LOG.exception("Failed to prepare subcloud %s for backup delete"
|
|
% subcloud.name)
|
|
return subcloud, False
|
|
|
|
success = self._run_subcloud_backup_delete_playbook(context, subcloud,
|
|
delete_command)
|
|
|
|
if success:
|
|
utils.delete_subcloud_inventory(overrides_file)
|
|
|
|
return subcloud, success
|
|
|
|
def _restore_subcloud_backup(self, context, payload, subcloud):
|
|
log_file = (os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) +
|
|
'_playbook_output.log')
|
|
data_install = json.loads(subcloud.data_install)
|
|
try:
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_RESTORE
|
|
)
|
|
subcloud_inventory_file = self._create_subcloud_inventory_file(
|
|
subcloud, data_install=data_install)
|
|
# Prepare for restore
|
|
overrides_file = self._create_overrides_for_backup_or_restore(
|
|
'restore', payload, subcloud.name
|
|
)
|
|
restore_command = self.compose_backup_restore_command(
|
|
subcloud.name, subcloud_inventory_file)
|
|
except Exception:
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_RESTORE_PREP_FAILED
|
|
)
|
|
LOG.exception("Failed to prepare subcloud %s for backup restore"
|
|
% subcloud.name)
|
|
return subcloud, False
|
|
|
|
if payload.get('with_install'):
|
|
software_version = payload.get('software_version')
|
|
install_command = self.compose_install_command(
|
|
subcloud.name, subcloud_inventory_file, software_version)
|
|
# Update data_install with missing data
|
|
matching_iso, _ = utils.get_vault_load_files(software_version)
|
|
data_install['software_version'] = software_version
|
|
data_install['image'] = matching_iso
|
|
data_install['ansible_ssh_pass'] = payload['sysadmin_password']
|
|
data_install['ansible_become_pass'] = payload['sysadmin_password']
|
|
install_success = self._run_subcloud_install(
|
|
context, subcloud, install_command, log_file, data_install)
|
|
if not install_success:
|
|
return subcloud, False
|
|
|
|
success = self._run_subcloud_backup_restore_playbook(
|
|
subcloud, restore_command, context, log_file)
|
|
|
|
if success:
|
|
utils.delete_subcloud_inventory(overrides_file)
|
|
|
|
return subcloud, success
|
|
|
|
@staticmethod
|
|
def _build_subcloud_operation_notice(operation, failed_subclouds, invalid_subclouds):
|
|
invalid_subcloud_names = [subcloud.name for subcloud in invalid_subclouds]
|
|
failed_subcloud_names = [subcloud.name for subcloud in failed_subclouds]
|
|
|
|
notice = "Subcloud backup %s operation completed with warnings:\n" % operation
|
|
if invalid_subclouds:
|
|
notice += ("The following subclouds were skipped for local backup "
|
|
"%s operation: %s."
|
|
% (operation, ' ,'.join(invalid_subcloud_names)))
|
|
if failed_subclouds:
|
|
notice += ("The following subclouds failed during backup "
|
|
"%s operation: %s."
|
|
% (operation, ' ,'.join(failed_subcloud_names)))
|
|
return notice
|
|
|
|
def _create_subcloud_inventory_file(self, subcloud, data_install=None):
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
oam_fip = None
|
|
# Restore is restrict to Redfish enabled subclouds
|
|
if data_install:
|
|
oam_fip = data_install.get('bootstrap_address')
|
|
else:
|
|
# Use subcloud floating IP for host reachability
|
|
keystone_client = OpenStackDriver(
|
|
region_name=subcloud.name,
|
|
region_clients=None).keystone_client
|
|
oam_fip = utils.get_oam_addresses(subcloud.name, keystone_client)\
|
|
.oam_floating_ip
|
|
|
|
# Add parameters used to generate inventory
|
|
subcloud_params = {'name': subcloud.name,
|
|
'bootstrap-address': oam_fip}
|
|
|
|
utils.create_subcloud_inventory(subcloud_params,
|
|
ansible_subcloud_inventory_file)
|
|
return ansible_subcloud_inventory_file
|
|
|
|
def _create_overrides_for_backup_or_restore(self, op, payload, subcloud_name):
|
|
# Set override names as expected by the playbook
|
|
if not payload.get('override_values'):
|
|
payload['override_values'] = {}
|
|
|
|
payload['override_values']['local'] = \
|
|
payload['local_only'] or False
|
|
|
|
if op == 'create':
|
|
payload['override_values']['backup_registry_images'] = \
|
|
payload['registry_images'] or False
|
|
suffix = 'backup_create_values'
|
|
else:
|
|
payload['override_values']['restore_registry_images'] = \
|
|
payload['registry_images'] or False
|
|
suffix = 'backup_restore_values'
|
|
|
|
if not payload['local_only']:
|
|
payload['override_values']['central_backup_dir'] = CENTRAL_BACKUP_DIR
|
|
|
|
payload['override_values']['ansible_ssh_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['override_values']['ansible_become_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['override_values']['admin_password'] = \
|
|
str(keyring.get_password('CGCS', 'admin'))
|
|
|
|
if payload.get('backup_values'):
|
|
LOG.info('Backup create: Received backup_values for subcloud %s'
|
|
% subcloud_name)
|
|
for key, value in payload.get('backup_values').items():
|
|
payload['override_values'][key] = value
|
|
elif payload.get('restore_values'):
|
|
LOG.info('Backup restore: Received restore_values for subcloud %s'
|
|
% subcloud_name)
|
|
for key, value in payload.get('restore_values').items():
|
|
payload['override_values'][key] = value
|
|
|
|
return self._create_backup_overrides_file(payload, subcloud_name, suffix)
|
|
|
|
def _create_overrides_for_backup_delete(self, payload, subcloud_name,
|
|
release_version):
|
|
# Set override names as expected by the playbook
|
|
if not payload.get('override_values'):
|
|
payload['override_values'] = {}
|
|
|
|
payload['override_values']['software_version'] = release_version
|
|
|
|
payload['override_values']['local'] = \
|
|
payload['local_only'] or False
|
|
|
|
if not payload['local_only']:
|
|
payload['override_values']['central_backup_dir'] = CENTRAL_BACKUP_DIR
|
|
else:
|
|
payload['override_values']['ansible_ssh_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['override_values']['ansible_become_pass'] = \
|
|
payload['sysadmin_password']
|
|
|
|
return self._create_backup_overrides_file(
|
|
payload, subcloud_name, 'backup_delete_values'
|
|
)
|
|
|
|
def _create_backup_overrides_file(self, payload, subcloud_name, filename_suffix):
|
|
backup_overrides_file = os.path.join(
|
|
dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + '_' +
|
|
filename_suffix + '.yml')
|
|
|
|
with open(backup_overrides_file, 'w') as f_out:
|
|
f_out.write(
|
|
'---\n'
|
|
)
|
|
for k, v in payload['override_values'].items():
|
|
f_out.write("%s: %s\n" % (k, v))
|
|
|
|
return backup_overrides_file
|
|
|
|
def _run_subcloud_backup_create_playbook(self, subcloud, backup_command,
|
|
context, local_only):
|
|
log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \
|
|
'_playbook_output.log'
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
backup_status=consts.BACKUP_STATE_IN_PROGRESS,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
|
|
# Run the subcloud backup playbook
|
|
try:
|
|
run_playbook(log_file, backup_command)
|
|
|
|
# Decide between complete-local or complete-central
|
|
if local_only:
|
|
backup_status = consts.BACKUP_STATE_COMPLETE_LOCAL
|
|
else:
|
|
backup_status = consts.BACKUP_STATE_COMPLETE_CENTRAL
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
backup_status=backup_status,
|
|
backup_datetime=datetime.datetime.utcnow())
|
|
|
|
LOG.info("Successfully backed up subcloud %s" % subcloud.name)
|
|
return True
|
|
except PlaybookExecutionFailed:
|
|
self._fail_subcloud_backup_operation(context, log_file, subcloud)
|
|
return False
|
|
|
|
@staticmethod
|
|
def _run_subcloud_backup_delete_playbook(context, subcloud, delete_command):
|
|
log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \
|
|
'_playbook_output.log'
|
|
|
|
try:
|
|
# Run the subcloud backup delete playbook
|
|
run_playbook(log_file, delete_command)
|
|
|
|
# Set backup status to unknown after delete, since most recent backup may
|
|
# have been deleted
|
|
db_api.subcloud_bulk_update_by_ids(
|
|
context, [subcloud.id],
|
|
{Subcloud.backup_status.name: consts.BACKUP_STATE_UNKNOWN,
|
|
Subcloud.backup_datetime.name: None})
|
|
|
|
LOG.info("Successfully deleted backup for subcloud %s" % subcloud.name)
|
|
return True
|
|
|
|
except PlaybookExecutionFailed:
|
|
LOG.error("Failed to delete backup for subcloud %s, check individual "
|
|
"log at %s for detailed output." % (subcloud.name, log_file))
|
|
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud.name, log_file, consts.BACKUP_STATE_FAILED)
|
|
LOG.error(msg)
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
|
|
return False
|
|
|
|
def _run_subcloud_backup_restore_playbook(
|
|
self, subcloud, restore_command, context, log_file):
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_RESTORING,
|
|
error_description=consts.ERROR_DESC_EMPTY
|
|
)
|
|
# Run the subcloud backup restore playbook
|
|
try:
|
|
run_playbook(log_file, restore_command)
|
|
LOG.info("Successfully restore subcloud %s" % subcloud.name)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DONE
|
|
)
|
|
return True
|
|
except PlaybookExecutionFailed:
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud.name, log_file, consts.DEPLOY_STATE_RESTORING)
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_RESTORE_FAILED,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]
|
|
)
|
|
return False
|
|
|
|
@staticmethod
|
|
def _fail_subcloud_backup_prep(context, subcloud):
|
|
LOG.exception("Failed to prepare subcloud %s for backup" % subcloud.name)
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
backup_status=consts.BACKUP_STATE_PREP_FAILED)
|
|
|
|
def _fail_subcloud_backup_operation(self, context, log_file, subcloud):
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud.name, log_file, consts.BACKUP_STATE_IN_PROGRESS)
|
|
LOG.error(msg)
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
backup_status=consts.BACKUP_STATE_FAILED,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
|
|
self._set_subcloud_backup_failure_alarm(subcloud)
|
|
|
|
def _clear_subcloud_backup_failure_alarm_if_exists(self, subcloud):
|
|
entity_instance_id = "subcloud=%s" % subcloud.name
|
|
|
|
try:
|
|
fault = self.fm_api.get_fault(
|
|
fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED,
|
|
entity_instance_id)
|
|
if fault:
|
|
self.fm_api.clear_fault(
|
|
fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, # noqa
|
|
entity_instance_id)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
def _set_subcloud_backup_failure_alarm(self, subcloud):
|
|
entity_instance_id = "subcloud=%s" % subcloud.name
|
|
|
|
try:
|
|
fault = fm_api.Fault(
|
|
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, # noqa
|
|
alarm_state=fm_const.FM_ALARM_STATE_SET,
|
|
entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD,
|
|
entity_instance_id=entity_instance_id,
|
|
severity=fm_const.FM_ALARM_SEVERITY_MINOR,
|
|
reason_text=("Subcloud Backup Failure (subcloud=%s)"
|
|
% subcloud.name),
|
|
alarm_type=fm_const.FM_ALARM_TYPE_3,
|
|
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_UNKNOWN,
|
|
proposed_repair_action="Retry subcloud backup after checking input "
|
|
"file. If problem persists, please contact "
|
|
"next level of support.",
|
|
service_affecting=False)
|
|
self.fm_api.set_fault(fault)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
def run_deploy_thread(self, subcloud, payload, context,
|
|
install_command=None, bootstrap_command=None,
|
|
config_command=None, rehome_command=None,
|
|
network_reconfig=None):
|
|
try:
|
|
self._run_deploy(subcloud, payload, context,
|
|
install_command, bootstrap_command,
|
|
config_command, rehome_command,
|
|
network_reconfig)
|
|
except Exception as ex:
|
|
LOG.exception("run_deploy failed")
|
|
raise ex
|
|
|
|
def _run_deploy(self, subcloud, payload, context,
|
|
install_command, bootstrap_command,
|
|
config_command, rehome_command,
|
|
network_reconfig):
|
|
log_file = (
|
|
os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name)
|
|
+ "_playbook_output.log"
|
|
)
|
|
if install_command:
|
|
install_success = self._run_subcloud_install(
|
|
context, subcloud, install_command,
|
|
log_file, payload['install_values']
|
|
)
|
|
if not install_success:
|
|
return
|
|
if bootstrap_command:
|
|
try:
|
|
# Update the subcloud to bootstrapping
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPING,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
except Exception:
|
|
LOG.error("DB subcloud_update failed")
|
|
# exception is logged above
|
|
raise
|
|
|
|
# Run the ansible boostrap-subcloud playbook
|
|
LOG.info("Starting bootstrap of %s" % subcloud.name)
|
|
try:
|
|
run_playbook(log_file, bootstrap_command)
|
|
except PlaybookExecutionFailed:
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud.name, log_file, consts.DEPLOY_STATE_BOOTSTRAPPING)
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
return
|
|
LOG.info("Successfully bootstrapped %s" % subcloud.name)
|
|
if config_command:
|
|
# Run the custom deploy playbook
|
|
LOG.info("Starting deploy of %s" % subcloud.name)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DEPLOYING,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
|
|
try:
|
|
run_playbook(log_file, config_command)
|
|
except PlaybookExecutionFailed:
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud.name, log_file, consts.DEPLOY_STATE_DEPLOYING)
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DEPLOY_FAILED,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
return
|
|
LOG.info("Successfully deployed %s" % subcloud.name)
|
|
if rehome_command:
|
|
# Update the deploy status to rehoming
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_REHOMING)
|
|
|
|
# Run the rehome-subcloud playbook
|
|
try:
|
|
run_playbook(log_file, rehome_command)
|
|
except PlaybookExecutionFailed:
|
|
msg = "Failed to run the subcloud rehome playbook" \
|
|
" for subcloud %s, check individual log at " \
|
|
"%s for detailed output." % (
|
|
subcloud.name,
|
|
log_file)
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_REHOME_FAILED)
|
|
return
|
|
LOG.info("Successfully rehomed subcloud %s" %
|
|
subcloud.name)
|
|
if network_reconfig:
|
|
self._configure_system_controller_network(context, payload, subcloud)
|
|
subcloud = db_api.subcloud_update(
|
|
context,
|
|
subcloud.id,
|
|
description=payload.get('description', subcloud.description),
|
|
management_subnet=utils.get_management_subnet(payload),
|
|
management_gateway_ip=utils.get_management_gateway_address(payload),
|
|
management_start_ip=utils.get_management_start_address(payload),
|
|
management_end_ip=utils.get_management_end_address(payload),
|
|
location=payload.get('location', subcloud.location),
|
|
group_id=payload.get('group_id', subcloud.group_id),
|
|
data_install=payload.get('data_install', subcloud.data_install)
|
|
)
|
|
|
|
# Regenerate the addn_hosts_dc file
|
|
self._create_addn_hosts_dc(context)
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DONE,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
|
|
def run_deploy_phases(self, context, subcloud_id, payload,
|
|
deploy_phases_to_run):
|
|
"""Run one or more deployment phases, ensuring correct order
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud id from db
|
|
:param payload: deploy phases payload
|
|
:param deploy_phases_to_run: deploy phases that should run
|
|
"""
|
|
try:
|
|
succeeded = True
|
|
if consts.DEPLOY_PHASE_INSTALL in deploy_phases_to_run:
|
|
succeeded = self.subcloud_deploy_install(
|
|
context, subcloud_id, payload)
|
|
if succeeded and consts.DEPLOY_PHASE_BOOTSTRAP in deploy_phases_to_run:
|
|
succeeded = self.subcloud_deploy_bootstrap(
|
|
context, subcloud_id, payload)
|
|
if succeeded and consts.DEPLOY_PHASE_CONFIG in deploy_phases_to_run:
|
|
succeeded = self.subcloud_deploy_config(
|
|
context, subcloud_id, payload)
|
|
return succeeded
|
|
|
|
except Exception as ex:
|
|
LOG.exception("run_deploy_phases failed")
|
|
raise ex
|
|
|
|
def _run_subcloud_config(self, subcloud, context,
|
|
config_command, log_file):
|
|
# Run the custom deploy playbook
|
|
LOG.info("Starting deploy of %s" % subcloud.name)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_CONFIGURING,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
|
|
try:
|
|
run_ansible = RunAnsible()
|
|
aborted = run_ansible.exec_playbook(
|
|
log_file, config_command, subcloud.name)
|
|
except PlaybookExecutionFailed:
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud.name, log_file, consts.DEPLOY_STATE_CONFIGURING)
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_CONFIG_FAILED,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
return False
|
|
if aborted:
|
|
return False
|
|
LOG.info("Successfully deployed %s" % subcloud.name)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DONE,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
|
|
@staticmethod
|
|
def _run_subcloud_install(context, subcloud, install_command,
|
|
log_file, payload, abortable=False):
|
|
software_version = str(payload['software_version'])
|
|
LOG.info("Preparing remote install of %s, version: %s",
|
|
subcloud.name, software_version)
|
|
if (subcloud.deploy_status != consts.DEPLOY_STATE_PRE_INSTALL or
|
|
subcloud.software_version != software_version):
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL,
|
|
software_version=software_version)
|
|
try:
|
|
install = SubcloudInstall(context, subcloud.name)
|
|
install.prep(dccommon_consts.ANSIBLE_OVERRIDES_PATH, payload)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED)
|
|
LOG.error(str(e))
|
|
install.cleanup(software_version)
|
|
return False
|
|
|
|
# Run the remote install playbook
|
|
LOG.info("Starting remote install of %s" % subcloud.name)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_INSTALLING,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
try:
|
|
aborted = install.install(
|
|
consts.DC_ANSIBLE_LOG_DIR, install_command, abortable=abortable)
|
|
except Exception as e:
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud.name, log_file, consts.DEPLOY_STATE_INSTALLING)
|
|
LOG.error(str(e))
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_INSTALL_FAILED,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
install.cleanup(software_version)
|
|
return False
|
|
install.cleanup(software_version)
|
|
if aborted:
|
|
return False
|
|
LOG.info("Successfully installed %s" % subcloud.name)
|
|
return True
|
|
|
|
def _run_subcloud_bootstrap(self, context, subcloud,
|
|
bootstrap_command, log_file):
|
|
# Update the subcloud deploy_status to bootstrapping
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPING,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
|
|
# Run the ansible subcloud boostrap playbook
|
|
LOG.info("Starting bootstrap of %s" % subcloud.name)
|
|
try:
|
|
run_ansible = RunAnsible()
|
|
aborted = run_ansible.exec_playbook(
|
|
log_file, bootstrap_command, subcloud.name)
|
|
except PlaybookExecutionFailed:
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud.name, log_file, consts.DEPLOY_STATE_BOOTSTRAPPING)
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
return False
|
|
|
|
if aborted:
|
|
return False
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPED,
|
|
error_description=consts.ERROR_DESC_EMPTY)
|
|
|
|
LOG.info("Successfully bootstrapped %s" % subcloud.name)
|
|
return True
|
|
|
|
def _create_addn_hosts_dc(self, context):
|
|
"""Generate the addn_hosts_dc file for hostname/ip translation"""
|
|
|
|
addn_hosts_dc = os.path.join(CONFIG_PATH, ADDN_HOSTS_DC)
|
|
addn_hosts_dc_temp = addn_hosts_dc + '.temp'
|
|
|
|
subclouds = db_api.subcloud_get_all(context)
|
|
with open(addn_hosts_dc_temp, 'w') as f_out_addn_dc_temp:
|
|
for subcloud in subclouds:
|
|
addn_dc_line = subcloud.management_start_ip + ' ' + \
|
|
subcloud.name + '\n'
|
|
f_out_addn_dc_temp.write(addn_dc_line)
|
|
|
|
# if no more subclouds, create empty file so dnsmasq does not
|
|
# emit an error log.
|
|
if not subclouds:
|
|
f_out_addn_dc_temp.write(' ')
|
|
|
|
if not filecmp.cmp(addn_hosts_dc_temp, addn_hosts_dc):
|
|
os.rename(addn_hosts_dc_temp, addn_hosts_dc)
|
|
# restart dnsmasq so it can re-read our addn_hosts file.
|
|
os.system("pkill -HUP dnsmasq")
|
|
|
|
def _write_subcloud_ansible_config(self, cached_regionone_data, payload):
|
|
"""Create the override file for usage with the specified subcloud"""
|
|
|
|
overrides_file = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH,
|
|
payload['name'] + '.yml')
|
|
|
|
mgmt_pool = cached_regionone_data['mgmt_pool']
|
|
mgmt_floating_ip = mgmt_pool.floating_address
|
|
mgmt_subnet = "%s/%d" % (mgmt_pool.network, mgmt_pool.prefix)
|
|
|
|
oam_addresses = cached_regionone_data['oam_addresses']
|
|
oam_floating_ip = oam_addresses.oam_floating_ip
|
|
oam_subnet = oam_addresses.oam_subnet
|
|
|
|
with open(overrides_file, 'w') as f_out_overrides_file:
|
|
f_out_overrides_file.write(
|
|
'---'
|
|
'\nregion_config: yes'
|
|
'\ndistributed_cloud_role: subcloud'
|
|
'\nsystem_controller_subnet: ' + mgmt_subnet +
|
|
'\nsystem_controller_floating_address: ' + mgmt_floating_ip +
|
|
'\nsystem_controller_oam_subnet: ' + oam_subnet +
|
|
'\nsystem_controller_oam_floating_address: ' + oam_floating_ip
|
|
+ '\n'
|
|
)
|
|
|
|
for k, v in payload.items():
|
|
if k not in ['deploy_playbook', 'deploy_values',
|
|
'deploy_config', 'deploy_chart',
|
|
'deploy_overrides', 'install_values']:
|
|
f_out_overrides_file.write("%s: %s\n" % (k, json.dumps(v)))
|
|
|
|
def _write_deploy_files(self, payload, subcloud_name):
|
|
"""Create the deploy value files for the subcloud"""
|
|
|
|
deploy_values_file = os.path.join(
|
|
dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name +
|
|
'_deploy_values.yml')
|
|
|
|
with open(deploy_values_file, 'w') as f_out_deploy_values_file:
|
|
json.dump(payload['deploy_values'], f_out_deploy_values_file)
|
|
|
|
def _prepare_for_deployment(self, payload, subcloud_name,
|
|
populate_passwords=True):
|
|
payload['deploy_values'] = dict()
|
|
if populate_passwords:
|
|
payload['deploy_values']['ansible_become_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['deploy_values']['ansible_ssh_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['deploy_values']['admin_password'] = \
|
|
str(keyring.get_password('CGCS', 'admin'))
|
|
payload['deploy_values']['deployment_config'] = \
|
|
payload[consts.DEPLOY_CONFIG]
|
|
payload['deploy_values']['deployment_manager_chart'] = \
|
|
payload[consts.DEPLOY_CHART]
|
|
payload['deploy_values']['deployment_manager_overrides'] = \
|
|
payload[consts.DEPLOY_OVERRIDES]
|
|
payload['deploy_values']['user_uploaded_artifacts'] = \
|
|
payload["user_uploaded_artifacts"]
|
|
self._write_deploy_files(payload, subcloud_name)
|
|
|
|
def _delete_subcloud_routes(self, keystone_client, subcloud):
|
|
"""Delete the routes to this subcloud"""
|
|
|
|
# Delete the route to this subcloud on the management interface on
|
|
# both controllers.
|
|
management_subnet = netaddr.IPNetwork(subcloud.management_subnet)
|
|
endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv')
|
|
sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, keystone_client.session,
|
|
endpoint=endpoint)
|
|
cached_regionone_data = self._get_cached_regionone_data(keystone_client, sysinv_client)
|
|
for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']:
|
|
sysinv_client.delete_route(mgmt_if_uuid,
|
|
str(management_subnet.ip),
|
|
management_subnet.prefixlen,
|
|
str(netaddr.IPAddress(subcloud.systemcontroller_gateway_ip)),
|
|
1)
|
|
|
|
@staticmethod
|
|
def _delete_subcloud_cert(subcloud_name):
|
|
cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name)
|
|
secret_name = SubcloudManager._get_subcloud_cert_secret_name(
|
|
subcloud_name)
|
|
|
|
kube = kubeoperator.KubeOperator()
|
|
kube.delete_cert_manager_certificate(CERT_NAMESPACE, cert_name)
|
|
|
|
kube.kube_delete_secret(secret_name, CERT_NAMESPACE)
|
|
LOG.info("cert %s and secret %s are deleted" % (cert_name, secret_name))
|
|
|
|
def _remove_subcloud_details(self, context,
|
|
subcloud,
|
|
ansible_subcloud_inventory_file):
|
|
"""Remove subcloud details from database and inform orchestrators"""
|
|
# Inform orchestrators that subcloud has been deleted
|
|
try:
|
|
self.dcorch_rpc_client.del_subcloud(context, subcloud.name)
|
|
except RemoteError as e:
|
|
# TODO(kmacleod): this should be caught as explicit remote exception
|
|
# Fix when centos/python2 is no longer supported
|
|
if "SubcloudNotFound" in str(e):
|
|
pass
|
|
|
|
# delete the associated alarm entry
|
|
try:
|
|
db_api.subcloud_alarms_delete(context, subcloud.name)
|
|
except RemoteError as e:
|
|
# TODO(kmacleod): fix same with above
|
|
if "SubcloudNotFound" in str(e):
|
|
pass
|
|
|
|
# We only delete subcloud endpoints, region and user information
|
|
# in the Central Region. The subcloud is already unmanaged and powered
|
|
# down so is not accessible. Therefore set up a session with the
|
|
# Central Region Keystone ONLY.
|
|
keystone_client = OpenStackDriver(
|
|
region_name=dccommon_consts.DEFAULT_REGION_NAME,
|
|
region_clients=None).keystone_client
|
|
|
|
# Delete keystone endpoints for subcloud
|
|
keystone_client.delete_endpoints(subcloud.name)
|
|
keystone_client.delete_region(subcloud.name)
|
|
|
|
# Delete the routes to this subcloud
|
|
self._delete_subcloud_routes(keystone_client, subcloud)
|
|
|
|
# Remove the subcloud from the database
|
|
try:
|
|
db_api.subcloud_destroy(context, subcloud.id)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
raise e
|
|
|
|
# Delete the ansible inventory for the new subcloud
|
|
utils.delete_subcloud_inventory(ansible_subcloud_inventory_file)
|
|
|
|
# Delete the subcloud intermediate certificate
|
|
SubcloudManager._delete_subcloud_cert(subcloud.name)
|
|
|
|
# Delete the subcloud backup path
|
|
self._delete_subcloud_backup_data(subcloud.name)
|
|
|
|
# Regenerate the addn_hosts_dc file
|
|
self._create_addn_hosts_dc(context)
|
|
|
|
@staticmethod
|
|
def _delete_subcloud_backup_data(subcloud_name):
|
|
try:
|
|
backup_path = os.path.join(CENTRAL_BACKUP_DIR, subcloud_name)
|
|
if os.path.exists(backup_path):
|
|
shutil.rmtree(backup_path)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
def delete_subcloud(self, context, subcloud_id):
|
|
"""Delete subcloud and notify orchestrators.
|
|
|
|
:param context: request context object.
|
|
:param subcloud_id: id of subcloud to delete
|
|
"""
|
|
LOG.info("Deleting subcloud %s." % subcloud_id)
|
|
|
|
# Retrieve the subcloud details from the database
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
|
|
# Semantic checking
|
|
if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED:
|
|
raise exceptions.SubcloudNotUnmanaged()
|
|
|
|
if subcloud.availability_status == \
|
|
dccommon_consts.AVAILABILITY_ONLINE:
|
|
raise exceptions.SubcloudNotOffline()
|
|
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
self._remove_subcloud_details(context,
|
|
subcloud,
|
|
ansible_subcloud_inventory_file)
|
|
|
|
# Clear any subcloud alarms.
|
|
# Note that endpoint out-of-sync alarms should have been cleared when
|
|
# the subcloud was unmanaged and the endpoint sync statuses were set to
|
|
# unknown.
|
|
#
|
|
# TODO(kmacleod): Until an API is available to clear all alarms
|
|
# for a subcloud, we manually clear the following:
|
|
# - subcloud offline
|
|
# - subloud resource out of sync
|
|
# - Subcloud Backup Failure
|
|
for alarm_id, entity_instance_id in (
|
|
(fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
|
"subcloud=%s" % subcloud.name),
|
|
(fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
|
|
"subcloud=%s.resource=%s" %
|
|
(subcloud.name, dccommon_consts.ENDPOINT_TYPE_DC_CERT)),
|
|
(fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED,
|
|
"subcloud=%s" % subcloud.name)):
|
|
try:
|
|
fault = self.fm_api.get_fault(alarm_id,
|
|
entity_instance_id)
|
|
if fault:
|
|
self.fm_api.clear_fault(alarm_id,
|
|
entity_instance_id)
|
|
except Exception as e:
|
|
LOG.info(
|
|
"Problem clearing fault for subcloud %s, alarm_id=%s" %
|
|
(subcloud.name, alarm_id))
|
|
LOG.exception(e)
|
|
|
|
def update_subcloud(self,
|
|
context,
|
|
subcloud_id,
|
|
management_state=None,
|
|
description=None,
|
|
location=None,
|
|
group_id=None,
|
|
data_install=None,
|
|
force=None):
|
|
"""Update subcloud and notify orchestrators.
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: id of subcloud to update
|
|
:param management_state: new management state
|
|
:param description: new description
|
|
:param location: new location
|
|
:param group_id: new subcloud group id
|
|
:param data_install: subcloud install values
|
|
:param force: force flag
|
|
"""
|
|
|
|
LOG.info("Updating subcloud %s." % subcloud_id)
|
|
|
|
# Get the subcloud details from the database
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
original_management_state = subcloud.management_state
|
|
|
|
# Semantic checking
|
|
if management_state:
|
|
if management_state == dccommon_consts.MANAGEMENT_UNMANAGED:
|
|
if subcloud.management_state == dccommon_consts.MANAGEMENT_UNMANAGED:
|
|
LOG.warning("Subcloud %s already unmanaged" % subcloud_id)
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Subcloud is already unmanaged')
|
|
elif management_state == dccommon_consts.MANAGEMENT_MANAGED:
|
|
if subcloud.management_state == dccommon_consts.MANAGEMENT_MANAGED:
|
|
LOG.warning("Subcloud %s already managed" % subcloud_id)
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Subcloud is already managed')
|
|
elif not force:
|
|
if (subcloud.deploy_status != consts.DEPLOY_STATE_DONE and
|
|
not prestage.is_deploy_status_prestage(
|
|
subcloud.deploy_status)):
|
|
LOG.warning("Subcloud %s can be managed only when"
|
|
"deploy_status is complete" % subcloud_id)
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Subcloud can be managed only if deploy status is complete')
|
|
if subcloud.availability_status != \
|
|
dccommon_consts.AVAILABILITY_ONLINE:
|
|
LOG.warning("Subcloud %s is not online" % subcloud_id)
|
|
raise exceptions.SubcloudNotOnline()
|
|
else:
|
|
LOG.error("Invalid management_state %s" % management_state)
|
|
raise exceptions.InternalError()
|
|
|
|
subcloud = db_api.subcloud_update(
|
|
context,
|
|
subcloud_id,
|
|
management_state=management_state,
|
|
description=description,
|
|
location=location,
|
|
group_id=group_id,
|
|
data_install=data_install
|
|
)
|
|
|
|
# Inform orchestrators that subcloud has been updated
|
|
if management_state:
|
|
try:
|
|
# Inform orchestrator of state change
|
|
self.dcorch_rpc_client.update_subcloud_states(
|
|
context,
|
|
subcloud.name,
|
|
management_state,
|
|
subcloud.availability_status)
|
|
|
|
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
|
|
'availability:%s' % (subcloud.name,
|
|
management_state,
|
|
subcloud.availability_status))
|
|
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
LOG.warn('Problem informing dcorch of subcloud '
|
|
'state change, resume to original state, subcloud: %s'
|
|
% subcloud.name)
|
|
management_state = original_management_state
|
|
subcloud = \
|
|
db_api.subcloud_update(context, subcloud_id,
|
|
management_state=management_state,
|
|
description=description,
|
|
location=location)
|
|
|
|
if management_state == dccommon_consts.MANAGEMENT_UNMANAGED:
|
|
# set all endpoint statuses to unknown, except the dc-cert
|
|
# endpoint which continues to be audited for unmanaged
|
|
# subclouds
|
|
self.state_rpc_client.update_subcloud_endpoint_status_sync(
|
|
context,
|
|
subcloud_name=subcloud.name,
|
|
endpoint_type=None,
|
|
sync_status=dccommon_consts.SYNC_STATUS_UNKNOWN,
|
|
ignore_endpoints=[dccommon_consts.ENDPOINT_TYPE_DC_CERT])
|
|
elif management_state == dccommon_consts.MANAGEMENT_MANAGED:
|
|
# Subcloud is managed
|
|
# Tell cert-mon to audit endpoint certificate
|
|
LOG.info('Request certmon audit for %s' % subcloud.name)
|
|
dc_notification = dcmanager_rpc_client.DCManagerNotifications()
|
|
dc_notification.subcloud_managed(context, subcloud.name)
|
|
|
|
return db_api.subcloud_db_model_to_dict(subcloud)
|
|
|
|
def update_subcloud_with_network_reconfig(self, context, subcloud_id, payload):
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
subcloud = db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK
|
|
)
|
|
subcloud_name = payload['name']
|
|
try:
|
|
self._create_intermediate_ca_cert(payload)
|
|
subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud_name, INVENTORY_FILE_POSTFIX)
|
|
subcloud_params = {'name': subcloud_name,
|
|
'bootstrap-address': payload.get('bootstrap_address')}
|
|
utils.create_subcloud_inventory(subcloud_params, subcloud_inventory_file)
|
|
overrides_file = self._create_subcloud_update_overrides_file(
|
|
payload, subcloud_name, 'update_values')
|
|
update_command = self.compose_update_command(
|
|
subcloud_name, subcloud_inventory_file)
|
|
except Exception:
|
|
LOG.exception(
|
|
"Failed to prepare subcloud %s for update." % subcloud_name)
|
|
return
|
|
try:
|
|
apply_thread = threading.Thread(
|
|
target=self._run_network_reconfiguration,
|
|
args=(subcloud_name, update_command, overrides_file,
|
|
payload, context, subcloud))
|
|
apply_thread.start()
|
|
except Exception:
|
|
LOG.exception("Failed to update subcloud %s" % subcloud_name)
|
|
|
|
def _run_network_reconfiguration(
|
|
self, subcloud_name, update_command, overrides_file,
|
|
payload, context, subcloud
|
|
):
|
|
log_file = (os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud_name) +
|
|
'_playbook_output.log')
|
|
subcloud_id = subcloud.id
|
|
try:
|
|
run_playbook(log_file, update_command)
|
|
utils.delete_subcloud_inventory(overrides_file)
|
|
except PlaybookExecutionFailed:
|
|
msg = utils.find_ansible_error_msg(
|
|
subcloud_name, log_file, consts.DEPLOY_STATE_RECONFIGURING_NETWORK)
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED,
|
|
error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]
|
|
)
|
|
return
|
|
|
|
self._configure_system_controller_network(context, payload, subcloud)
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud_id, deploy_status=consts.DEPLOY_STATE_DONE
|
|
)
|
|
|
|
subcloud = db_api.subcloud_update(
|
|
context,
|
|
subcloud_id,
|
|
description=payload.get('description', subcloud.description),
|
|
management_subnet=payload.get('management_subnet'),
|
|
management_gateway_ip=payload.get('management_gateway_ip'),
|
|
management_start_ip=payload.get('management_start_ip'),
|
|
management_end_ip=payload.get('management_end_ip'),
|
|
location=payload.get('location', subcloud.location),
|
|
group_id=payload.get('group_id', subcloud.group_id),
|
|
data_install=payload.get('data_install', subcloud.data_install)
|
|
)
|
|
|
|
# Regenerate the addn_hosts_dc file
|
|
self._create_addn_hosts_dc(context)
|
|
|
|
def _configure_system_controller_network(self, context, payload, subcloud,
|
|
update_db=True):
|
|
"""Configure system controller network
|
|
|
|
:param context: request context object
|
|
:param payload: subcloud bootstrap configuration
|
|
:param subcloud: subcloud model object
|
|
:param update_db: whether it should update the db on success/failure
|
|
"""
|
|
subcloud_name = subcloud.name
|
|
subcloud_id = subcloud.id
|
|
sys_controller_gw_ip = payload.get("systemcontroller_gateway_address",
|
|
subcloud.systemcontroller_gateway_ip)
|
|
|
|
try:
|
|
m_ks_client = OpenStackDriver(
|
|
region_name=dccommon_consts.DEFAULT_REGION_NAME,
|
|
region_clients=None).keystone_client
|
|
self._create_subcloud_route(payload, m_ks_client,
|
|
sys_controller_gw_ip)
|
|
except Exception:
|
|
LOG.exception(
|
|
"Failed to create route to subcloud %s." % subcloud_name)
|
|
if update_db:
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED,
|
|
error_description=consts.ERROR_DESC_EMPTY
|
|
)
|
|
return
|
|
try:
|
|
self._update_services_endpoint(
|
|
context, payload, subcloud_name, m_ks_client)
|
|
except Exception:
|
|
LOG.exception("Failed to update subcloud %s endpoints" % subcloud_name)
|
|
if update_db:
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED,
|
|
error_description=consts.ERROR_DESC_EMPTY
|
|
)
|
|
return
|
|
|
|
# Delete old routes
|
|
if utils.get_management_subnet(payload) != subcloud.management_subnet:
|
|
self._delete_subcloud_routes(m_ks_client, subcloud)
|
|
|
|
def _create_subcloud_route(self, payload, keystone_client,
|
|
systemcontroller_gateway_ip):
|
|
subcloud_subnet = netaddr.IPNetwork(utils.get_management_subnet(payload))
|
|
endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv')
|
|
sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME,
|
|
keystone_client.session,
|
|
endpoint=endpoint)
|
|
cached_regionone_data = self._get_cached_regionone_data(
|
|
keystone_client, sysinv_client)
|
|
for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']:
|
|
sysinv_client.create_route(mgmt_if_uuid,
|
|
str(subcloud_subnet.ip),
|
|
subcloud_subnet.prefixlen,
|
|
systemcontroller_gateway_ip,
|
|
1)
|
|
|
|
def _update_services_endpoint(
|
|
self, context, payload, subcloud_name, m_ks_client):
|
|
endpoint_ip = utils.get_management_start_address(payload)
|
|
if netaddr.IPAddress(endpoint_ip).version == 6:
|
|
endpoint_ip = f"[{endpoint_ip}]"
|
|
|
|
services_endpoints = {
|
|
"keystone": "https://{}:5001/v3".format(endpoint_ip),
|
|
"sysinv": "https://{}:6386/v1".format(endpoint_ip),
|
|
"fm": "https://{}:18003".format(endpoint_ip),
|
|
"patching": "https://{}:5492".format(endpoint_ip),
|
|
"vim": "https://{}:4546".format(endpoint_ip),
|
|
"usm": "https://{}:5498".format(endpoint_ip),
|
|
}
|
|
|
|
for endpoint in m_ks_client.keystone_client.endpoints.list(
|
|
region=subcloud_name):
|
|
service_type = m_ks_client.keystone_client.services.get(
|
|
endpoint.service_id).type
|
|
if service_type == dccommon_consts.ENDPOINT_TYPE_PLATFORM:
|
|
admin_endpoint_url = services_endpoints.get('sysinv')
|
|
elif service_type == dccommon_consts.ENDPOINT_TYPE_IDENTITY:
|
|
admin_endpoint_url = services_endpoints.get('keystone')
|
|
elif service_type == dccommon_consts.ENDPOINT_TYPE_PATCHING:
|
|
admin_endpoint_url = services_endpoints.get('patching')
|
|
elif service_type == dccommon_consts.ENDPOINT_TYPE_FM:
|
|
admin_endpoint_url = services_endpoints.get('fm')
|
|
elif service_type == dccommon_consts.ENDPOINT_TYPE_NFV:
|
|
admin_endpoint_url = services_endpoints.get('vim')
|
|
elif service_type == dccommon_consts.ENDPOINT_TYPE_SOFTWARE:
|
|
admin_endpoint_url = services_endpoints.get('usm')
|
|
else:
|
|
LOG.exception("Endpoint Type Error: %s" % service_type)
|
|
m_ks_client.keystone_client.endpoints.update(
|
|
endpoint, url=admin_endpoint_url)
|
|
|
|
LOG.info("Update services endpoint to %s in subcloud %s" % (
|
|
endpoint_ip, subcloud_name))
|
|
# Update service URLs in subcloud endpoint cache
|
|
self.audit_rpc_client.trigger_subcloud_endpoints_update(
|
|
context, subcloud_name, services_endpoints)
|
|
self.dcorch_rpc_client.update_subcloud_endpoints(
|
|
context, subcloud_name, services_endpoints)
|
|
# Update sysinv URL in cert-mon cache
|
|
dc_notification = dcmanager_rpc_client.DCManagerNotifications()
|
|
dc_notification.subcloud_sysinv_endpoint_update(
|
|
context, subcloud_name, services_endpoints.get("sysinv"))
|
|
|
|
def _create_subcloud_update_overrides_file(
|
|
self, payload, subcloud_name, filename_suffix):
|
|
update_overrides_file = os.path.join(
|
|
dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + '_' +
|
|
filename_suffix + '.yml')
|
|
|
|
self._update_override_values(payload)
|
|
|
|
with open(update_overrides_file, 'w', encoding='UTF-8') as f_out:
|
|
f_out.write('---\n')
|
|
for key, value in payload['override_values'].items():
|
|
if key in ['ansible_ssh_pass', 'ansible_become_pass']:
|
|
f_out.write(f"{key}: {value}\n")
|
|
else:
|
|
f_out.write(f"{key}: {json.dumps(value)}\n")
|
|
|
|
return update_overrides_file
|
|
|
|
def _update_override_values(self, payload):
|
|
if not payload.get('override_values'):
|
|
payload['override_values'] = {}
|
|
|
|
payload['override_values']['ansible_ssh_pass'] = (
|
|
payload['sysadmin_password'])
|
|
payload['override_values']['ansible_become_pass'] = (
|
|
payload['sysadmin_password'])
|
|
|
|
payload['override_values']['sc_gateway_address'] = (
|
|
payload['management_gateway_ip'])
|
|
payload['override_values']['sc_floating_address'] = (
|
|
payload['management_start_ip'])
|
|
payload['override_values']['system_controller_network'] = (
|
|
payload['system_controller_network'])
|
|
payload['override_values']['system_controller_network_prefix'] = (
|
|
payload['system_controller_network_prefix'])
|
|
payload['override_values']['sc_subnet'] = payload['management_subnet']
|
|
|
|
payload['override_values']['dc_root_ca_cert'] = payload['dc_root_ca_cert']
|
|
payload['override_values']['sc_ca_cert'] = payload['sc_ca_cert']
|
|
payload['override_values']['sc_ca_key'] = payload['sc_ca_key']
|
|
|
|
def update_subcloud_sync_endpoint_type(self, context,
|
|
subcloud_name,
|
|
endpoint_type_list,
|
|
openstack_installed):
|
|
operation = 'add' if openstack_installed else 'remove'
|
|
func_switcher = {
|
|
'add': (
|
|
self.dcorch_rpc_client.add_subcloud_sync_endpoint_type,
|
|
db_api.subcloud_status_create
|
|
),
|
|
'remove': (
|
|
self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type,
|
|
db_api.subcloud_status_delete
|
|
)
|
|
}
|
|
|
|
try:
|
|
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
|
|
except Exception:
|
|
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
|
|
raise
|
|
|
|
try:
|
|
# Notify dcorch to add/remove sync endpoint type list
|
|
func_switcher[operation][0](self.context, subcloud_name,
|
|
endpoint_type_list)
|
|
LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' %
|
|
(subcloud_name, endpoint_type_list))
|
|
|
|
# Update subcloud status table by adding/removing openstack sync
|
|
# endpoint types
|
|
for endpoint_type in endpoint_type_list:
|
|
func_switcher[operation][1](self.context, subcloud.id,
|
|
endpoint_type)
|
|
# Update openstack_installed of subcloud table
|
|
db_api.subcloud_update(self.context, subcloud.id,
|
|
openstack_installed=openstack_installed)
|
|
except Exception:
|
|
LOG.exception('Problem informing dcorch of subcloud sync endpoint'
|
|
' type change, subcloud: %s' % subcloud_name)
|
|
|
|
def handle_subcloud_operations_in_progress(self):
|
|
"""Identify subclouds in transitory stages and update subcloud
|
|
|
|
state to failure.
|
|
"""
|
|
|
|
LOG.info('Identifying subclouds in transitory stages.')
|
|
|
|
subclouds = db_api.subcloud_get_all(self.context)
|
|
|
|
for subcloud in subclouds:
|
|
# Identify subclouds in transitory states
|
|
new_deploy_status = TRANSITORY_STATES.get(subcloud.deploy_status)
|
|
new_backup_status = TRANSITORY_BACKUP_STATES.get(subcloud.backup_status)
|
|
|
|
# update deploy and backup states to the corresponding failure states
|
|
if new_deploy_status or new_backup_status:
|
|
if new_deploy_status:
|
|
LOG.info("Changing subcloud %s deploy status from %s to %s."
|
|
% (subcloud.name, subcloud.deploy_status,
|
|
new_deploy_status))
|
|
if new_backup_status:
|
|
LOG.info("Changing subcloud %s backup status from %s to %s."
|
|
% (subcloud.name, subcloud.backup_status,
|
|
new_backup_status))
|
|
|
|
db_api.subcloud_update(
|
|
self.context,
|
|
subcloud.id,
|
|
deploy_status=new_deploy_status or subcloud.deploy_status,
|
|
backup_status=new_backup_status or subcloud.backup_status
|
|
)
|
|
|
|
@staticmethod
|
|
def prestage_subcloud(context, payload):
|
|
"""Subcloud prestaging"""
|
|
return prestage.prestage_subcloud(context, payload)
|
|
|
|
@utils.synchronized("regionone-data-cache", external=False)
|
|
def _get_cached_regionone_data(self, regionone_keystone_client, regionone_sysinv_client=None):
|
|
if (not SubcloudManager.regionone_data or
|
|
SubcloudManager.regionone_data['expiry'] <= datetime.datetime.utcnow()):
|
|
user_list = regionone_keystone_client.get_enabled_users(id_only=False)
|
|
for user in user_list:
|
|
if user.name == dccommon_consts.ADMIN_USER_NAME:
|
|
SubcloudManager.regionone_data['admin_user_id'] = user.id
|
|
elif user.name == dccommon_consts.SYSINV_USER_NAME:
|
|
SubcloudManager.regionone_data['sysinv_user_id'] = user.id
|
|
elif user.name == dccommon_consts.DCMANAGER_USER_NAME:
|
|
SubcloudManager.regionone_data['dcmanager_user_id'] = user.id
|
|
|
|
project_list = regionone_keystone_client.get_enabled_projects(id_only=False)
|
|
for project in project_list:
|
|
if project.name == dccommon_consts.ADMIN_PROJECT_NAME:
|
|
SubcloudManager.regionone_data['admin_project_id'] = project.id
|
|
elif project.name == dccommon_consts.SERVICES_USER_NAME:
|
|
SubcloudManager.regionone_data['services_project_id'] = project.id
|
|
|
|
if regionone_sysinv_client is None:
|
|
endpoint = regionone_keystone_client.endpoint_cache.get_endpoint('sysinv')
|
|
regionone_sysinv_client = SysinvClient(
|
|
dccommon_consts.DEFAULT_REGION_NAME,
|
|
regionone_keystone_client.session,
|
|
endpoint=endpoint)
|
|
|
|
controllers = regionone_sysinv_client.get_controller_hosts()
|
|
mgmt_interface_uuids = []
|
|
for controller in controllers:
|
|
mgmt_interface = regionone_sysinv_client.get_management_interface(
|
|
controller.hostname)
|
|
if mgmt_interface is not None:
|
|
mgmt_interface_uuids.append(mgmt_interface.uuid)
|
|
SubcloudManager.regionone_data['mgmt_interface_uuids'] = mgmt_interface_uuids
|
|
|
|
SubcloudManager.regionone_data['mgmt_pool'] = \
|
|
regionone_sysinv_client.get_management_address_pool()
|
|
SubcloudManager.regionone_data['oam_addresses'] = \
|
|
regionone_sysinv_client.get_oam_addresses()
|
|
|
|
SubcloudManager.regionone_data['expiry'] = \
|
|
datetime.datetime.utcnow() + datetime.timedelta(hours=1)
|
|
LOG.info("RegionOne cached data updated %s" % SubcloudManager.regionone_data)
|
|
|
|
cached_regionone_data = SubcloudManager.regionone_data
|
|
return cached_regionone_data
|
|
|
|
def _populate_payload_with_cached_keystone_data(self, cached_data, payload,
|
|
populate_passwords=True):
|
|
payload['system_controller_keystone_admin_user_id'] = \
|
|
cached_data['admin_user_id']
|
|
payload['system_controller_keystone_admin_project_id'] = \
|
|
cached_data['admin_project_id']
|
|
payload['system_controller_keystone_services_project_id'] = \
|
|
cached_data['services_project_id']
|
|
payload['system_controller_keystone_sysinv_user_id'] = \
|
|
cached_data['sysinv_user_id']
|
|
payload['system_controller_keystone_dcmanager_user_id'] = \
|
|
cached_data['dcmanager_user_id']
|
|
|
|
if populate_passwords:
|
|
# While at it, add the admin and service user passwords to the
|
|
# payload so they get copied to the overrides file
|
|
payload['ansible_become_pass'] = payload['sysadmin_password']
|
|
payload['ansible_ssh_pass'] = payload['sysadmin_password']
|
|
payload['admin_password'] = str(keyring.get_password('CGCS',
|
|
'admin'))
|