# Copyright 2017 Ericsson AB.
# Copyright (c) 2017-2022 Wind River Systems, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import division

import collections
import datetime
import filecmp
import functools
import json
import keyring
import netaddr
import os
import threading
import time

from eventlet import greenpool
from oslo_log import log as logging
from oslo_messaging import RemoteError

from tsconfig.tsconfig import CONFIG_PATH
from tsconfig.tsconfig import SW_VERSION

from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.exceptions import PlaybookExecutionFailed
from dccommon import kubeoperator
from dccommon.subcloud_install import SubcloudInstall
from dccommon.utils import run_playbook
from dcmanager.common.exceptions import DCManagerException
from dcmanager.common.exceptions import SubcloudBackupDeleteOperationFailed
from dcmanager.db.sqlalchemy.models import Subcloud

from dcorch.rpc import client as dcorch_rpc_client

from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
from dcmanager.common import consts
from dcmanager.common.consts import INVENTORY_FILE_POSTFIX
from dcmanager.common import context as dcmanager_context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import manager
from dcmanager.common import prestage
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.rpc import client as dcmanager_rpc_client

from fm_api import constants as fm_const
from fm_api import fm_api

LOG = logging.getLogger(__name__)

# Name of our distributed cloud addn_hosts file for dnsmasq
# to read.  This file is referenced in dnsmasq.conf
ADDN_HOSTS_DC = 'dnsmasq.addn_hosts_dc'

# Subcloud configuration paths
ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK = \
    '/usr/share/ansible/stx-ansible/playbooks/create_subcloud_backup.yml'
ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK = \
    '/usr/share/ansible/stx-ansible/playbooks/delete_subcloud_backup.yml'
ANSIBLE_HOST_VALIDATION_PLAYBOOK = \
    '/usr/share/ansible/stx-ansible/playbooks/validate_host.yml'
ANSIBLE_SUBCLOUD_PLAYBOOK = \
    '/usr/share/ansible/stx-ansible/playbooks/bootstrap.yml'
ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK = \
    '/usr/share/ansible/stx-ansible/playbooks/install.yml'
ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK = \
    '/usr/share/ansible/stx-ansible/playbooks/rehome_subcloud.yml'
ANSIBLE_SUBCLOUD_RESTORE_PLAYBOOK = \
    '/usr/share/ansible/stx-ansible/playbooks/restore_platform.yml'

USERS_TO_REPLICATE = [
    'sysinv',
    'patching',
    'vim',
    'mtce',
    'fm',
    'barbican',
    'dcmanager']

# The timeout of the rehome playbook is set to 180 seconds as it takes a
# long time for privilege escalation before resetting the host route and
# LDAP server address in a subcloud.
REHOME_PLAYBOOK_TIMEOUT = "180"  # 180 seconds
SC_INTERMEDIATE_CERT_DURATION = "8760h"  # 1 year = 24 hours x 365
SC_INTERMEDIATE_CERT_RENEW_BEFORE = "720h"  # 30 days
CERT_NAMESPACE = "dc-cert"

TRANSITORY_STATES = {
    consts.DEPLOY_STATE_NONE: consts.DEPLOY_STATE_DEPLOY_PREP_FAILED,
    consts.DEPLOY_STATE_PRE_DEPLOY: consts.DEPLOY_STATE_DEPLOY_PREP_FAILED,
    consts.DEPLOY_STATE_PRE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
    consts.DEPLOY_STATE_INSTALLING: consts.DEPLOY_STATE_INSTALL_FAILED,
    consts.DEPLOY_STATE_BOOTSTRAPPING: consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
    consts.DEPLOY_STATE_DEPLOYING: consts.DEPLOY_STATE_DEPLOY_FAILED,
    consts.DEPLOY_STATE_MIGRATING_DATA: consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
    consts.DEPLOY_STATE_PRE_RESTORE: consts.DEPLOY_STATE_RESTORE_PREP_FAILED,
    consts.DEPLOY_STATE_RESTORING: consts.DEPLOY_STATE_RESTORE_FAILED,
    consts.PRESTAGE_STATE_PREPARE: consts.PRESTAGE_STATE_FAILED,
    consts.PRESTAGE_STATE_PACKAGES: consts.PRESTAGE_STATE_FAILED,
    consts.PRESTAGE_STATE_IMAGES: consts.PRESTAGE_STATE_FAILED,
}


TRANSITORY_BACKUP_STATES = {
    consts.BACKUP_STATE_VALIDATING: consts.BACKUP_STATE_VALIDATE_FAILED,
    consts.BACKUP_STATE_PRE_BACKUP: consts.BACKUP_STATE_PREP_FAILED,
    consts.BACKUP_STATE_IN_PROGRESS: consts.BACKUP_STATE_FAILED
}

MAX_PARALLEL_SUBCLOUD_BACKUP_CREATE = 50
MAX_PARALLEL_SUBCLOUD_BACKUP_DELETE = 250
CENTRAL_BACKUP_DIR = '/opt/dc-vault/backups'


class SubcloudManager(manager.Manager):
    """Manages tasks related to subclouds."""

    regionone_data = collections.defaultdict(dict)

    def __init__(self, *args, **kwargs):
        LOG.debug(_('SubcloudManager initialization...'))

        super(SubcloudManager, self).__init__(service_name="subcloud_manager",
                                              *args, **kwargs)
        self.context = dcmanager_context.get_admin_context()
        self.dcorch_rpc_client = dcorch_rpc_client.EngineClient()
        self.fm_api = fm_api.FaultAPIs()
        self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient()
        self.state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()

    @staticmethod
    def _get_subcloud_cert_name(subcloud_name):
        cert_name = "%s-adminep-ca-certificate" % subcloud_name
        return cert_name

    @staticmethod
    def _get_subcloud_cert_secret_name(subcloud_name):
        secret_name = "%s-adminep-ca-certificate" % subcloud_name
        return secret_name

    @staticmethod
    def _create_intermediate_ca_cert(payload):
        subcloud_name = payload["name"]
        cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name)
        secret_name = SubcloudManager._get_subcloud_cert_secret_name(
            subcloud_name)

        cert = {
            "apiVersion": "%s/%s" % (kubeoperator.CERT_MANAGER_GROUP,
                                     kubeoperator.CERT_MANAGER_VERSION),
            "kind": "Certificate",
            "metadata": {
                "namespace": CERT_NAMESPACE,
                "name": cert_name
            },
            "spec": {
                "secretName": secret_name,
                "duration": SC_INTERMEDIATE_CERT_DURATION,
                "renewBefore": SC_INTERMEDIATE_CERT_RENEW_BEFORE,
                "issuerRef": {
                    "kind": "Issuer",
                    "name": "dc-adminep-root-ca-issuer"
                },
                "commonName": cert_name,
                "isCA": True,
            },
        }

        kube = kubeoperator.KubeOperator()
        kube.apply_cert_manager_certificate(CERT_NAMESPACE, cert_name, cert)

        for count in range(1, 20):
            secret = kube.kube_get_secret(secret_name, CERT_NAMESPACE)
            if not hasattr(secret, 'data'):
                time.sleep(1)
                LOG.debug('Wait for %s ... %s' % (secret_name, count))
                continue

            data = secret.data
            if ('ca.crt' not in data or
                    'tls.crt' not in data or 'tls.key' not in data) or  \
               not (data['ca.crt'] and data['tls.crt'] and data['tls.key']):
                # ca cert, certificate and key pair are needed and must exist
                # for creating an intermediate ca. If not, certificate is not
                # ready yet.
                time.sleep(1)
                LOG.debug('Wait for %s ... %s' % (secret_name, count))
                continue

            payload['dc_root_ca_cert'] = data['ca.crt']
            payload['sc_ca_cert'] = data['tls.crt']
            payload['sc_ca_key'] = data['tls.key']
            return

        raise Exception("Secret for certificate %s is not ready." % cert_name)

    # TODO(kmacleod) switch to using utils.get_ansible_filename
    @staticmethod
    def _get_ansible_filename(subcloud_name, postfix='.yml'):
        ansible_filename = os.path.join(
            consts.ANSIBLE_OVERRIDES_PATH,
            subcloud_name + postfix)
        return ansible_filename

    def compose_install_command(self, subcloud_name, ansible_subcloud_inventory_file):
        install_command = [
            "ansible-playbook", ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK,
            "-i", ansible_subcloud_inventory_file,
            "--limit", subcloud_name,
            "-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
                  subcloud_name + '/' + "install_values.yml"]
        return install_command

    def compose_apply_command(self, subcloud_name, ansible_subcloud_inventory_file):
        apply_command = [
            "ansible-playbook", ANSIBLE_SUBCLOUD_PLAYBOOK, "-i",
            ansible_subcloud_inventory_file,
            "--limit", subcloud_name
        ]
        # Add the overrides dir and region_name so the playbook knows
        # which overrides to load
        apply_command += [
            "-e", str("override_files_dir='%s' region_name=%s") % (
                consts.ANSIBLE_OVERRIDES_PATH, subcloud_name)]
        return apply_command

    def compose_deploy_command(self, subcloud_name, ansible_subcloud_inventory_file, payload):
        deploy_command = [
            "ansible-playbook", payload[consts.DEPLOY_PLAYBOOK],
            "-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
                  subcloud_name + '_deploy_values.yml',
            "-i", ansible_subcloud_inventory_file,
            "--limit", subcloud_name
            ]
        return deploy_command

    def compose_check_target_command(self, subcloud_name,
                                     ansible_subcloud_inventory_file, payload):
        check_target_command = [
            "ansible-playbook", ANSIBLE_HOST_VALIDATION_PLAYBOOK,
            "-i", ansible_subcloud_inventory_file,
            "--limit", subcloud_name,
            "-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
            subcloud_name + "_check_target_values.yml"]

        return check_target_command

    def compose_restore_command(self, subcloud_name,
                                ansible_subcloud_inventory_file, payload):
        restore_command = [
            "ansible-playbook", ANSIBLE_SUBCLOUD_RESTORE_PLAYBOOK,
            "-i", ansible_subcloud_inventory_file,
            "--limit", subcloud_name,
            "-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
            subcloud_name + "_restore_values.yml"]

        return restore_command

    def compose_backup_command(self, subcloud_name, ansible_subcloud_inventory_file):
        backup_command = [
            "ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK,
            "-i", ansible_subcloud_inventory_file,
            "--limit", subcloud_name,
            "-e", "subcloud_bnr_overrides=%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
            subcloud_name + "_backup_create_values.yml"]

        return backup_command

    def compose_backup_delete_command(self, subcloud_name,
                                      ansible_subcloud_inventory_file):
        backup_command = [
            "ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK,
            "-i", ansible_subcloud_inventory_file,
            "--limit", subcloud_name,
            "-e", "subcloud_bnr_overrides=%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
            subcloud_name + "_backup_delete_values.yml"]

        return backup_command

    def compose_rehome_command(self, subcloud_name, ansible_subcloud_inventory_file):
        rehome_command = [
            "ansible-playbook", ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK,
            "-i", ansible_subcloud_inventory_file,
            "--limit", subcloud_name,
            "--timeout", REHOME_PLAYBOOK_TIMEOUT,
            "-e", str("override_files_dir='%s' region_name=%s") % (
                consts.ANSIBLE_OVERRIDES_PATH, subcloud_name)]
        return rehome_command

    def add_subcloud(self, context, payload):
        """Add subcloud and notify orchestrators.

        :param context: request context object
        :param payload: subcloud configuration
        """
        LOG.info("Adding subcloud %s." % payload['name'])
        subcloud_id = db_api.subcloud_get_by_name(context, payload['name']).id

        # Check the migrate option from payload
        migrate_str = payload.get('migrate', '')
        migrate_flag = (migrate_str.lower() == 'true')
        if migrate_flag:
            subcloud = db_api.subcloud_update(
                context, subcloud_id,
                deploy_status=consts.DEPLOY_STATE_PRE_REHOME)
        else:
            subcloud = db_api.subcloud_update(
                context, subcloud_id,
                deploy_status=consts.DEPLOY_STATE_PRE_DEPLOY)

        try:
            # Ansible inventory filename for the specified subcloud
            ansible_subcloud_inventory_file = self._get_ansible_filename(
                subcloud.name, INVENTORY_FILE_POSTFIX)

            # Create a new route to this subcloud on the management interface
            # on both controllers.
            m_ks_client = OpenStackDriver(
                region_name=dccommon_consts.DEFAULT_REGION_NAME,
                region_clients=None).keystone_client
            subcloud_subnet = netaddr.IPNetwork(payload['management_subnet'])
            endpoint = m_ks_client.endpoint_cache.get_endpoint('sysinv')
            sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME,
                                         m_ks_client.session,
                                         endpoint=endpoint)
            LOG.debug("Getting cached regionone data for %s" % subcloud.name)
            cached_regionone_data = self._get_cached_regionone_data(m_ks_client, sysinv_client)
            for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']:
                sysinv_client.create_route(mgmt_if_uuid,
                                           str(subcloud_subnet.ip),
                                           subcloud_subnet.prefixlen,
                                           payload['systemcontroller_gateway_address'],
                                           1)

            # Create endpoints to this subcloud on the
            # management-start-ip of the subcloud which will be allocated
            # as the floating Management IP of the Subcloud if the
            # Address Pool is not shared. Incase the endpoint entries
            # are incorrect, or the management IP of the subcloud is changed
            # in the future, it will not go managed or will show up as
            # out of sync. To fix this use Openstack endpoint commands
            # on the SystemController to change the subcloud endpoints.
            # The non-identity endpoints are added to facilitate horizon access
            # from the System Controller to the subcloud.
            endpoint_config = []
            endpoint_ip = payload['management_start_address']
            if netaddr.IPAddress(endpoint_ip).version == 6:
                endpoint_ip = '[' + endpoint_ip + ']'

            for service in m_ks_client.services_list:
                if service.type == dccommon_consts.ENDPOINT_TYPE_PLATFORM:
                    admin_endpoint_url = "https://{}:6386/v1".format(endpoint_ip)
                    endpoint_config.append({"id": service.id,
                                            "admin_endpoint_url": admin_endpoint_url})
                elif service.type == dccommon_consts.ENDPOINT_TYPE_IDENTITY:
                    admin_endpoint_url = "https://{}:5001/v3".format(endpoint_ip)
                    endpoint_config.append({"id": service.id,
                                            "admin_endpoint_url": admin_endpoint_url})
                elif service.type == dccommon_consts.ENDPOINT_TYPE_PATCHING:
                    admin_endpoint_url = "https://{}:5492".format(endpoint_ip)
                    endpoint_config.append({"id": service.id,
                                            "admin_endpoint_url": admin_endpoint_url})
                elif service.type == dccommon_consts.ENDPOINT_TYPE_FM:
                    admin_endpoint_url = "https://{}:18003".format(endpoint_ip)
                    endpoint_config.append({"id": service.id,
                                            "admin_endpoint_url": admin_endpoint_url})
                elif service.type == dccommon_consts.ENDPOINT_TYPE_NFV:
                    admin_endpoint_url = "https://{}:4546".format(endpoint_ip)
                    endpoint_config.append({"id": service.id,
                                            "admin_endpoint_url": admin_endpoint_url})

            if len(endpoint_config) < 5:
                raise exceptions.BadRequest(
                    resource='subcloud',
                    msg='Missing service in SystemController')

            for endpoint in endpoint_config:
                try:
                    m_ks_client.keystone_client.endpoints.create(
                        endpoint["id"],
                        endpoint['admin_endpoint_url'],
                        interface=dccommon_consts.KS_ENDPOINT_ADMIN,
                        region=subcloud.name)
                except Exception as e:
                    # Keystone service must be temporarily busy, retry
                    LOG.error(str(e))
                    m_ks_client.keystone_client.endpoints.create(
                        endpoint["id"],
                        endpoint['admin_endpoint_url'],
                        interface=dccommon_consts.KS_ENDPOINT_ADMIN,
                        region=subcloud.name)

            # Inform orchestrator that subcloud has been added
            self.dcorch_rpc_client.add_subcloud(
                context, subcloud.name, subcloud.software_version)

            # create entry into alarm summary table, will get real values later
            alarm_updates = {'critical_alarms': -1,
                             'major_alarms': -1,
                             'minor_alarms': -1,
                             'warnings': -1,
                             'cloud_status': consts.ALARMS_DISABLED}
            db_api.subcloud_alarms_create(context, subcloud.name,
                                          alarm_updates)

            # Regenerate the addn_hosts_dc file
            self._create_addn_hosts_dc(context)

            self._populate_payload_with_cached_keystone_data(
                cached_regionone_data, payload)

            if "install_values" in payload:
                payload['install_values']['ansible_ssh_pass'] = \
                    payload['sysadmin_password']
                if 'image' not in payload['install_values']:
                    matching_iso, matching_sig = utils.get_vault_load_files(
                        SW_VERSION)
                    payload['install_values'].update({'image': matching_iso})

            deploy_command = None
            if "deploy_playbook" in payload:
                self._prepare_for_deployment(payload, subcloud.name)
                deploy_command = self.compose_deploy_command(
                    subcloud.name,
                    ansible_subcloud_inventory_file,
                    payload)

            del payload['sysadmin_password']
            payload['users'] = dict()
            for user in USERS_TO_REPLICATE:
                payload['users'][user] = \
                    str(keyring.get_password(
                        user, dccommon_consts.SERVICES_USER_NAME))

            # Create the ansible inventory for the new subcloud
            utils.create_subcloud_inventory(payload,
                                            ansible_subcloud_inventory_file)

            # create subcloud intermediate certificate and pass in keys
            self._create_intermediate_ca_cert(payload)

            # Write this subclouds overrides to file
            # NOTE: This file should not be deleted if subcloud add fails
            # as it is used for debugging
            self._write_subcloud_ansible_config(cached_regionone_data, payload)

            if migrate_flag:
                rehome_command = self.compose_rehome_command(
                    subcloud.name,
                    ansible_subcloud_inventory_file)
                apply_thread = threading.Thread(
                    target=self.run_deploy,
                    args=(subcloud, payload, context,
                          None, None, None, None, None, rehome_command))
            else:
                install_command = None
                if "install_values" in payload:
                    install_command = self.compose_install_command(
                        subcloud.name,
                        ansible_subcloud_inventory_file)
                apply_command = self.compose_apply_command(
                    subcloud.name,
                    ansible_subcloud_inventory_file)
                apply_thread = threading.Thread(
                    target=self.run_deploy,
                    args=(subcloud, payload, context,
                          install_command, apply_command, deploy_command))

            apply_thread.start()

            return db_api.subcloud_db_model_to_dict(subcloud)

        except Exception:
            LOG.exception("Failed to create subcloud %s" % payload['name'])
            # If we failed to create the subcloud, update the
            # deployment status
            if migrate_flag:
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_REHOME_PREP_FAILED)
            else:
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED)

    def reconfigure_subcloud(self, context, subcloud_id, payload):
        """Reconfigure subcloud

        :param context: request context object
        :param payload: subcloud configuration
        """
        LOG.info("Reconfiguring subcloud %s." % subcloud_id)

        subcloud = db_api.subcloud_update(
            context, subcloud_id,
            deploy_status=consts.DEPLOY_STATE_PRE_DEPLOY)
        try:
            # Ansible inventory filename for the specified subcloud
            ansible_subcloud_inventory_file = self._get_ansible_filename(
                subcloud.name, INVENTORY_FILE_POSTFIX)

            deploy_command = None
            if "deploy_playbook" in payload:
                self._prepare_for_deployment(payload, subcloud.name)
                deploy_command = self.compose_deploy_command(
                    subcloud.name,
                    ansible_subcloud_inventory_file,
                    payload)

            del payload['sysadmin_password']
            apply_thread = threading.Thread(
                target=self.run_deploy,
                args=(subcloud, payload, context, None, None, deploy_command))
            apply_thread.start()
            return db_api.subcloud_db_model_to_dict(subcloud)
        except Exception:
            LOG.exception("Failed to create subcloud %s" % subcloud.name)
            # If we failed to create the subcloud, update the
            # deployment status
            db_api.subcloud_update(
                context, subcloud_id,
                deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED)

    def reinstall_subcloud(self, context, subcloud_id, payload):
        """Reinstall subcloud

        :param context: request context object
        :param subcloud_id: subcloud id from db
        :param payload: subcloud reinstall
        """

        # Retrieve the subcloud details from the database
        subcloud = db_api.subcloud_get(context, subcloud_id)

        LOG.info("Reinstalling subcloud %s." % subcloud_id)

        try:
            ansible_subcloud_inventory_file = self._get_ansible_filename(
                subcloud.name, INVENTORY_FILE_POSTFIX)

            m_ks_client = OpenStackDriver(
                region_name=dccommon_consts.DEFAULT_REGION_NAME,
                region_clients=None).keystone_client
            cached_regionone_data = self._get_cached_regionone_data(m_ks_client)
            self._populate_payload_with_cached_keystone_data(
                cached_regionone_data, payload)

            payload['install_values']['ansible_ssh_pass'] = \
                payload['sysadmin_password']
            payload['install_values']['ansible_become_pass'] = \
                payload['sysadmin_password']
            payload['bootstrap-address'] = \
                payload['install_values']['bootstrap_address']

            deploy_command = None
            if "deploy_playbook" in payload:
                self._prepare_for_deployment(payload, subcloud.name)
                deploy_command = self.compose_deploy_command(
                    subcloud.name,
                    ansible_subcloud_inventory_file,
                    payload)
            del payload['sysadmin_password']

            payload['users'] = dict()
            for user in USERS_TO_REPLICATE:
                payload['users'][user] = \
                    str(keyring.get_password(
                        user, dccommon_consts.SERVICES_USER_NAME))

            utils.create_subcloud_inventory(payload,
                                            ansible_subcloud_inventory_file)

            self._create_intermediate_ca_cert(payload)

            self._write_subcloud_ansible_config(cached_regionone_data, payload)

            install_command = self.compose_install_command(
                subcloud.name,
                ansible_subcloud_inventory_file)
            apply_command = self.compose_apply_command(
                subcloud.name,
                ansible_subcloud_inventory_file)
            apply_thread = threading.Thread(
                target=self.run_deploy,
                args=(subcloud, payload, context,
                      install_command, apply_command, deploy_command))
            apply_thread.start()
            return db_api.subcloud_db_model_to_dict(subcloud)
        except Exception:
            LOG.exception("Failed to reinstall subcloud %s" % subcloud.name)
            # If we failed to reinstall the subcloud, update the
            # deployment status
            db_api.subcloud_update(
                context, subcloud_id,
                deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED)

    def _create_check_target_override_file(self, payload, subcloud_name):
        check_target_override_file = os.path.join(
            consts.ANSIBLE_OVERRIDES_PATH, subcloud_name +
            '_check_target_values.yml')

        with open(check_target_override_file, 'w') as f_out:
            f_out.write(
                '---\n'
            )
            for k, v in payload['check_target_values'].items():
                f_out.write("%s: %s\n" % (k, json.dumps(v)))

    def _create_restore_override_file(self, payload, subcloud_name):
        restore_override_file = os.path.join(
            consts.ANSIBLE_OVERRIDES_PATH, subcloud_name +
            '_restore_values.yml')

        with open(restore_override_file, 'w') as f_out:
            f_out.write(
                '---\n'
            )
            for k, v in payload['restore_values'].items():
                f_out.write("%s: %s\n" % (k, json.dumps(v)))

    def _prepare_for_restore(self, payload, subcloud_name):
        payload['check_target_values'] = dict()
        payload['check_target_values']['ansible_ssh_pass'] = \
            payload['sysadmin_password']
        payload['check_target_values']['software_version'] = SW_VERSION
        payload['check_target_values']['bootstrap_address'] = \
            payload['bootstrap-address']
        payload['check_target_values']['check_bootstrap_address'] = 'true'
        payload['check_target_values']['check_patches'] = 'false'

        self._create_check_target_override_file(payload, subcloud_name)

        payload['restore_values']['ansible_ssh_pass'] = \
            payload['sysadmin_password']
        payload['restore_values']['ansible_become_pass'] = \
            payload['sysadmin_password']
        payload['restore_values']['admin_password'] = \
            str(keyring.get_password('CGCS', 'admin'))
        payload['restore_values']['skip_patches_restore'] = 'true'

        self._create_restore_override_file(payload, subcloud_name)

    def create_subcloud_backups(self, context, payload):
        """Backup subcloud or group of subclouds

        :param context: request context object
        :param payload: subcloud backup create detail
        """

        subcloud_id = payload.get('subcloud')
        group_id = payload.get('group')

        # Retrieve either a single subcloud or all subclouds in a group
        subclouds = [db_api.subcloud_get(context, subcloud_id)] if subcloud_id\
            else db_api.subcloud_get_for_group(context, group_id)

        # Validate the subclouds and filter the ones applicable for backup
        self._update_backup_status(context, subclouds,
                                   consts.BACKUP_STATE_VALIDATING)

        subclouds_to_backup, invalid_subclouds = \
            self._validate_subclouds_for_backup(subclouds)

        self._mark_invalid_subclouds_for_backup(context, invalid_subclouds)
        self._update_backup_status(context, subclouds_to_backup,
                                   consts.BACKUP_STATE_PRE_BACKUP)

        # Use thread pool to limit number of operations in parallel
        backup_pool = greenpool.GreenPool(size=MAX_PARALLEL_SUBCLOUD_BACKUP_CREATE)

        # Spawn threads to back up each applicable subcloud
        backup_function = functools.partial(self._backup_subcloud, context,
                                            payload)

        self._run_parallel_group_operation('backup create',
                                           backup_function,
                                           backup_pool,
                                           subclouds_to_backup)

        LOG.info("Subcloud backup operation finished")

    def delete_subcloud_backups(self, context, release_version, payload):
        """Delete backups for subcloud or group of subclouds for a given release

        :param context: request context object
        :param release_version Backup release version to be deleted
        :param payload: subcloud backup delete detail
        """

        local_delete = payload.get('local_only')

        subclouds_to_delete_backup, invalid_subclouds = \
            self._filter_subclouds_for_backup_delete(context, payload, local_delete)

        # Spawn threads to back up each applicable subcloud
        backup_delete_function = functools.partial(
            self._delete_subcloud_backup, context, payload, release_version)

        # Use thread pool to limit number of operations in parallel
        max_parallel_operations = MAX_PARALLEL_SUBCLOUD_BACKUP_DELETE
        backup_delete_pool = greenpool.GreenPool(size=max_parallel_operations)

        failed_subclouds = self._run_parallel_group_operation(
            'backup delete', backup_delete_function, backup_delete_pool,
            subclouds_to_delete_backup)

        all_failed = not set(subclouds_to_delete_backup) - set(failed_subclouds)
        if subclouds_to_delete_backup and all_failed:
            LOG.error("Backup delete operation failed for all applied subclouds")
            raise SubcloudBackupDeleteOperationFailed()

        if invalid_subclouds:
            self._warn_for_invalid_subclouds_on_backup_delete(invalid_subclouds)
        if failed_subclouds:
            self._warn_for_failed_subclouds_on_backup_delete(failed_subclouds)

        LOG.info("Subcloud backup delete operation finished")

        if invalid_subclouds or failed_subclouds:
            return self._build_subcloud_delete_notice(failed_subclouds,
                                                      invalid_subclouds)

    @staticmethod
    def _validate_subclouds_for_backup(subclouds):
        valid_subclouds = []
        invalid_subclouds = []
        for subcloud in subclouds:
            if SubcloudManager.__is_valid_for_backup(subcloud):
                valid_subclouds.append(subcloud)
            else:
                invalid_subclouds.append(subcloud)

        return valid_subclouds, invalid_subclouds

    @staticmethod
    def _mark_invalid_subclouds_for_backup(context, invalid_subclouds):
        try:
            invalid_ids = {subcloud.id for subcloud in invalid_subclouds}
            invalid_names = {subcloud.name for subcloud in invalid_subclouds}

            if invalid_ids:
                # Set state on subclouds that failed validation
                LOG.warn('The following subclouds are not online and/or managed '
                         'and/or in a valid deploy state, and will not be backed '
                         'up: %s', ', '.join(list(invalid_names)))
                SubcloudManager._update_backup_status_by_ids(
                    context, invalid_ids,
                    consts.BACKUP_STATE_VALIDATE_FAILED)

        except DCManagerException as ex:
            LOG.exception("Subcloud backup validation failed")
            raise ex

    @staticmethod
    def _warn_for_invalid_subclouds_on_backup_delete(invalid_subclouds):
        invalid_names = {subcloud.name for subcloud in invalid_subclouds}
        LOG.warn('The following subclouds were not online and/or managed '
                 'and/or in a valid deploy state, and thus were not be reached '
                 'for backup delete: %s', ', '.join(list(invalid_names)))

    @staticmethod
    def _warn_for_failed_subclouds_on_backup_delete(failed_subclouds):
        failed_subcloud_names = {subcloud.name for subcloud in failed_subclouds}
        LOG.warn('Backup delete operation failed for some subclouds, '
                 'check previous logs for details. Failed subclouds: %s',
                 ', '.join(list(failed_subcloud_names)))

    @staticmethod
    def __is_valid_for_backup(subcloud):
        return (subcloud.availability_status == dccommon_consts.AVAILABILITY_ONLINE
                and subcloud.management_state == dccommon_consts.MANAGEMENT_MANAGED
                and subcloud.deploy_status not in
                consts.INVALID_DEPLOY_STATES_FOR_BACKUP)

    @staticmethod
    def _update_backup_status(context, subclouds, backup_status):
        subcloud_ids = [subcloud.id for subcloud in subclouds]
        return SubcloudManager.\
            _update_backup_status_by_ids(context, subcloud_ids,
                                         backup_status)

    @staticmethod
    def _update_backup_status_by_ids(context, subcloud_ids, backup_status):
        validate_state_form = {
            Subcloud.backup_status.name: backup_status
        }
        db_api.subcloud_bulk_update_by_ids(context, subcloud_ids,
                                           validate_state_form)

    @staticmethod
    def _run_parallel_group_operation(op_type, op_function, thread_pool, subclouds):
        failed_subclouds = []
        processed = 0

        for subcloud, success in thread_pool.imap(op_function, subclouds):
            processed += 1

            if not success:
                failed_subclouds.append(subcloud)

            completion = float(processed) / float(len(subclouds)) * 100
            remaining = len(subclouds) - processed
            LOG.info("Processed subcloud %s for %s (operation %.0f%% "
                     "complete, %d subcloud(s) remaining)" %
                     (subcloud.name, op_type, completion, remaining))

        return failed_subclouds

    def _backup_subcloud(self, context, payload, subcloud):
        try:
            subcloud_inventory_file = self._create_subcloud_inventory_file(subcloud)

            # Prepare for backup
            self._create_overrides_for_backup(payload, subcloud.name)
            backup_command = self.compose_backup_command(
                subcloud.name, subcloud_inventory_file)

            self._clear_subcloud_backup_failure_alarm_if_exists(subcloud)
        except Exception:
            self._fail_subcloud_backup_prep(context, subcloud)
            return subcloud, False

        success = self._run_subcloud_backup_create_playbook(subcloud, backup_command,
                                                            context)
        return subcloud, success

    def _filter_subclouds_for_backup_delete(self, context, payload, local_delete):
        subcloud_id = payload.get('subcloud')
        group_id = payload.get('group')

        # Retrieve either a single subcloud or all subclouds in a group
        subclouds = [db_api.subcloud_get(context, subcloud_id)] if subcloud_id \
            else db_api.subcloud_get_for_group(context, group_id)
        invalid_subclouds = []

        # Subcloud state validation only required for local delete
        if local_delete:
            # Use same criteria defined for subcloud backup create
            subclouds_to_delete_backup, invalid_subclouds = \
                self._validate_subclouds_for_backup(subclouds)

        else:
            # Otherwise, validation is unnecessary, since connection is not required
            subclouds_to_delete_backup = subclouds

        return subclouds_to_delete_backup, invalid_subclouds

    def _delete_subcloud_backup(self, context, payload, release_version, subcloud):
        try:
            self._create_overrides_for_backup_delete(payload, subcloud.name,
                                                     release_version)
            inventory_file = self._create_subcloud_inventory_file(subcloud)
            delete_command = self.compose_backup_delete_command(
                subcloud.name, inventory_file)

        except Exception:
            LOG.exception("Failed to prepare subcloud %s for backup delete"
                          % subcloud.name)
            return subcloud, False

        success = self._run_subcloud_backup_delete_playbook(context, subcloud,
                                                            delete_command)
        return subcloud, success

    @staticmethod
    def _build_subcloud_delete_notice(failed_subclouds, invalid_subclouds):
        invalid_subcloud_names = [subcloud.name for subcloud in invalid_subclouds]
        failed_subcloud_names = [subcloud.name for subcloud in failed_subclouds]

        notice = "Subcloud backup delete operation completed with warnings:\n"
        if invalid_subclouds:
            notice += ("The following subclouds were skipped for local backup "
                       "delete: %s." % ' ,'.join(invalid_subcloud_names))
        if failed_subclouds:
            notice += ("The following subclouds failed during backup delete "
                       "operation: %s." % ' ,'.join(failed_subcloud_names))
        return notice

    def _create_subcloud_inventory_file(self, subcloud):
        # Ansible inventory filename for the specified subcloud
        ansible_subcloud_inventory_file = self._get_ansible_filename(
            subcloud.name, INVENTORY_FILE_POSTFIX)

        # Use subcloud floating IP for host reachability
        keystone_client = OpenStackDriver(
            region_name=subcloud.name,
            region_clients=None).keystone_client
        oam_fip = utils.get_oam_addresses(subcloud.name, keystone_client)\
            .oam_floating_ip

        # Add parameters used to generate inventory
        subcloud_params = {'name': subcloud.name,
                           'bootstrap-address': oam_fip}

        utils.create_subcloud_inventory(subcloud_params,
                                        ansible_subcloud_inventory_file)
        return ansible_subcloud_inventory_file

    def _create_overrides_for_backup(self, payload, subcloud_name):
        # Set override names as expected by the playbook
        if not payload.get('override_values'):
            payload['override_values'] = {}

        payload['override_values']['local'] = \
            payload['local_only'] or False
        payload['override_values']['backup_user_local_registry'] = \
            payload['registry_images'] or False

        if not payload['local_only']:
            payload['override_values']['central_backup_dir'] = CENTRAL_BACKUP_DIR

        payload['override_values']['ansible_ssh_pass'] = \
            payload['sysadmin_password']
        payload['override_values']['ansible_become_pass'] = \
            payload['sysadmin_password']
        payload['override_values']['admin_password'] = \
            str(keyring.get_password('CGCS', 'admin'))

        if payload.get('backup_values'):
            for key, value in payload.get('backup_values').items():
                payload['override_values'][key] = value

        self._create_backup_overrides_file(payload, subcloud_name, 'backup_create_values')

    def _create_overrides_for_backup_delete(self, payload, subcloud_name,
                                            release_version):
        # Set override names as expected by the playbook
        if not payload.get('override_values'):
            payload['override_values'] = {}

        payload['override_values']['software_version'] = release_version

        payload['override_values']['local'] = \
            payload['local_only'] or False

        if not payload['local_only']:
            payload['override_values']['central_backup_dir'] = CENTRAL_BACKUP_DIR

        # payload['override_values']['backup_dir'] = \
        #     '/opt/platform-backup/backups' if payload['local_only'] else None

        payload['override_values']['ansible_ssh_pass'] = \
            payload['sysadmin_password']
        payload['override_values']['ansible_become_pass'] = \
            payload['sysadmin_password']

        self._create_backup_overrides_file(payload, subcloud_name, 'backup_delete_values')

    def _create_backup_overrides_file(self, payload, subcloud_name, filename_suffix):
        backup_overrides_file = os.path.join(
            consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + '_' +
            filename_suffix + '.yml')

        with open(backup_overrides_file, 'w') as f_out:
            f_out.write(
                '---\n'
            )
            for k, v in payload['override_values'].items():
                f_out.write("%s: %s\n" % (k, json.dumps(v)))

    def _run_subcloud_backup_create_playbook(self, subcloud, backup_command, context):
        log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \
            '_playbook_output.log'

        db_api.subcloud_update(
            context, subcloud.id,
            backup_status=consts.BACKUP_STATE_IN_PROGRESS)

        # Run the subcloud backup playbook
        try:
            run_playbook(log_file, backup_command)
            db_api.subcloud_update(
                context, subcloud.id,
                backup_status=consts.BACKUP_STATE_COMPLETE,
                backup_datetime=datetime.datetime.utcnow())

            LOG.info("Successfully backed up subcloud %s" % subcloud.name)
            return True

        except PlaybookExecutionFailed:
            self._fail_subcloud_backup_operation(context, log_file, subcloud)
            return False

    @staticmethod
    def _run_subcloud_backup_delete_playbook(context, subcloud, delete_command):
        log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \
            '_playbook_output.log'

        try:
            # Run the subcloud backup delete playbook
            run_playbook(log_file, delete_command)

            # Set backup status to unknown after delete, since most recent backup may
            # have been deleted
            db_api.subcloud_bulk_update_by_ids(
                context, [subcloud.id],
                {Subcloud.backup_status.name: consts.BACKUP_STATE_UNKNOWN,
                 Subcloud.backup_datetime.name: None})

            LOG.info("Successfully deleted backup for subcloud %s" % subcloud.name)
            return True

        except PlaybookExecutionFailed:
            LOG.error("Failed to delete backup for subcloud %s, check individual "
                      "log at %s for detailed output." % (subcloud.name, log_file))
            return False

    @staticmethod
    def _fail_subcloud_backup_prep(context, subcloud):
        LOG.exception("Failed to prepare subcloud %s for backup" % subcloud.name)

        db_api.subcloud_update(
            context, subcloud.id,
            backup_status=consts.BACKUP_STATE_PREP_FAILED)

    def _fail_subcloud_backup_operation(self, context, log_file, subcloud):
        msg = "Failed to backup subcloud %s, check individual log at %s for " \
              "detailed output." % (subcloud.name, log_file)
        LOG.error(msg)

        db_api.subcloud_update(
            context, subcloud.id,
            backup_status=consts.BACKUP_STATE_FAILED)

        self._set_subcloud_backup_failure_alarm(subcloud)

    def _clear_subcloud_backup_failure_alarm_if_exists(self, subcloud):
        entity_instance_id = "subcloud=%s" % subcloud.name

        try:
            fault = self.fm_api.get_fault(
                fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED,
                entity_instance_id)
            if fault:
                self.fm_api.clear_fault(
                    fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED,  # noqa
                    entity_instance_id)
        except Exception as e:
            LOG.exception(e)

    def _set_subcloud_backup_failure_alarm(self, subcloud):
        entity_instance_id = "subcloud=%s" % subcloud.name

        try:
            fault = fm_api.Fault(
                alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED,  # noqa
                alarm_state=fm_const.FM_ALARM_STATE_SET,
                entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD,
                entity_instance_id=entity_instance_id,
                severity=fm_const.FM_ALARM_SEVERITY_MINOR,
                reason_text=("Subcloud Backup Failure (subcloud=%s)"
                             % subcloud.name),
                alarm_type=fm_const.FM_ALARM_TYPE_3,
                probable_cause=fm_const.ALARM_PROBABLE_CAUSE_UNKNOWN,
                proposed_repair_action="Retry subcloud backup after checking input "
                                       "file. If problem persists, please contact "
                                       "next level of support.",
                service_affecting=False)
            self.fm_api.set_fault(fault)
        except Exception as e:
            LOG.exception(e)

    def restore_subcloud(self, context, subcloud_id, payload):
        """Restore subcloud

        :param context: request context object
        :param subcloud_id: subcloud id from db
        :param payload: subcloud restore detail
        """

        # Retrieve the subcloud details from the database
        subcloud = db_api.subcloud_get(context, subcloud_id)

        if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED:
            raise exceptions.SubcloudNotUnmanaged()

        db_api.subcloud_update(context, subcloud_id,
                               deploy_status=consts.DEPLOY_STATE_PRE_RESTORE)

        try:
            # Ansible inventory filename for the specified subcloud
            ansible_subcloud_inventory_file = self._get_ansible_filename(
                subcloud.name, INVENTORY_FILE_POSTFIX)

            # Add parameters used to generate inventory
            payload['name'] = subcloud.name
            payload['bootstrap-address'] = \
                payload['install_values']['bootstrap_address']
            payload['software_version'] = SW_VERSION

            install_command = None

            if payload['with_install']:
                # Redfish capable subclouds
                LOG.info("Reinstalling subcloud %s." % subcloud.name)

                # Disegard the current 'image' config. Always reinstall with
                # the system controller active image in dc-vault.
                matching_iso, matching_sig = utils.get_vault_load_files(SW_VERSION)

                payload['install_values'].update({'image': matching_iso})
                payload['install_values']['ansible_ssh_pass'] = \
                    payload['sysadmin_password']

                utils.create_subcloud_inventory(payload,
                                                ansible_subcloud_inventory_file)

                install_command = self.compose_install_command(
                    subcloud.name, ansible_subcloud_inventory_file)

            else:
                # Non Redfish capable subcloud
                # Shouldn't get here as the API has already rejected the request.
                return

            # Prepare for restore
            self._prepare_for_restore(payload, subcloud.name)
            check_target_command = self.compose_check_target_command(
                subcloud.name, ansible_subcloud_inventory_file, payload)

            restore_command = self.compose_restore_command(
                subcloud.name, ansible_subcloud_inventory_file, payload)

            apply_thread = threading.Thread(
                target=self.run_deploy,
                args=(subcloud, payload, context,
                      install_command, None, None, check_target_command, restore_command))
            apply_thread.start()
            return db_api.subcloud_db_model_to_dict(subcloud)

        except Exception:
            LOG.exception("Failed to restore subcloud %s" % subcloud.name)
            db_api.subcloud_update(
                context, subcloud_id,
                deploy_status=consts.DEPLOY_STATE_RESTORE_PREP_FAILED)

    # TODO(kmacleod) add outer try/except here to catch and log unexpected
    # exception. As this stands, any uncaught exception is a silent (unlogged)
    # failure
    @staticmethod
    def run_deploy(subcloud, payload, context,
                   install_command=None, apply_command=None,
                   deploy_command=None, check_target_command=None,
                   restore_command=None, rehome_command=None):

        log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \
            '_playbook_output.log'
        if install_command:
            LOG.info("Preparing remote install of %s" % subcloud.name)
            db_api.subcloud_update(
                context, subcloud.id,
                deploy_status=consts.DEPLOY_STATE_PRE_INSTALL)
            try:
                install = SubcloudInstall(context, subcloud.name)
                install.prep(consts.ANSIBLE_OVERRIDES_PATH,
                             payload['install_values'])
            except Exception as e:
                LOG.exception(e)
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED)
                LOG.error(str(e))
                install.cleanup()
                return

            # Run the remote install playbook
            LOG.info("Starting remote install of %s" % subcloud.name)
            db_api.subcloud_update(
                context, subcloud.id,
                deploy_status=consts.DEPLOY_STATE_INSTALLING,
                error_description=consts.ERROR_DESC_EMPTY)
            try:
                install.install(consts.DC_ANSIBLE_LOG_DIR, install_command)
            except Exception as e:
                msg = utils.find_ansible_error_msg(
                    subcloud.name, log_file, consts.DEPLOY_STATE_INSTALLING)
                LOG.error(str(e))
                LOG.error(msg)
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_INSTALL_FAILED,
                    error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
                install.cleanup()
                return
            install.cleanup()
            LOG.info("Successfully installed %s" % subcloud.name)

        # Leave the following block here in case there is another use
        # case besides subcloud restore where validating host post
        # fresh install is necessary.
        if check_target_command:
            try:
                run_playbook(log_file, check_target_command)
            except PlaybookExecutionFailed:
                msg = "Failed to run the validate host playbook" \
                      " for subcloud %s, check individual log at " \
                      "%s for detailed output." % (
                          subcloud.name,
                          log_file)
                LOG.error(msg)
                if restore_command:
                    db_api.subcloud_update(
                        context, subcloud.id,
                        deploy_status=consts.DEPLOY_STATE_RESTORE_PREP_FAILED)
                return

            LOG.info("Successfully checked subcloud %s" % subcloud.name)

        if apply_command:
            try:
                # Update the subcloud to bootstrapping
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPING,
                    error_description=consts.ERROR_DESC_EMPTY)
            except Exception as e:
                LOG.exception(e)
                raise e

            # Run the ansible boostrap-subcloud playbook
            LOG.info("Starting bootstrap of %s" % subcloud.name)
            try:
                run_playbook(log_file, apply_command)
            except PlaybookExecutionFailed:
                msg = utils.find_ansible_error_msg(
                    subcloud.name, log_file, consts.DEPLOY_STATE_BOOTSTRAPPING)
                LOG.error(msg)
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
                    error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
                return
            LOG.info("Successfully bootstrapped %s" % subcloud.name)

        if deploy_command:
            # Run the custom deploy playbook
            LOG.info("Starting deploy of %s" % subcloud.name)
            db_api.subcloud_update(
                context, subcloud.id,
                deploy_status=consts.DEPLOY_STATE_DEPLOYING,
                error_description=consts.ERROR_DESC_EMPTY)

            try:
                run_playbook(log_file, deploy_command)
            except PlaybookExecutionFailed:
                msg = utils.find_ansible_error_msg(
                    subcloud.name, log_file, consts.DEPLOY_STATE_DEPLOYING)
                LOG.error(msg)
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_DEPLOY_FAILED,
                    error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH])
                return
            LOG.info("Successfully deployed %s" % subcloud.name)
        elif restore_command:
            db_api.subcloud_update(
                context, subcloud.id,
                deploy_status=consts.DEPLOY_STATE_RESTORING)

            # Run the restore platform playbook
            try:
                run_playbook(log_file, restore_command)
            except PlaybookExecutionFailed:
                msg = "Failed to run the subcloud restore playbook" \
                      " for subcloud %s, check individual log at " \
                      "%s for detailed output." % (
                          subcloud.name,
                          log_file)
                LOG.error(msg)
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_RESTORE_FAILED)
                return
            LOG.info("Successfully restored controller-0 of subcloud %s" %
                     subcloud.name)

        if rehome_command:
            # Update the deploy status to rehoming
            db_api.subcloud_update(
                context, subcloud.id,
                deploy_status=consts.DEPLOY_STATE_REHOMING)

            # Run the rehome-subcloud playbook
            try:
                run_playbook(log_file, rehome_command)
            except PlaybookExecutionFailed:
                msg = "Failed to run the subcloud rehome playbook" \
                      " for subcloud %s, check individual log at " \
                      "%s for detailed output." % (
                          subcloud.name,
                          log_file)
                LOG.error(msg)
                db_api.subcloud_update(
                    context, subcloud.id,
                    deploy_status=consts.DEPLOY_STATE_REHOME_FAILED)
                return
            LOG.info("Successfully rehomed subcloud %s" %
                     subcloud.name)

        db_api.subcloud_update(
            context, subcloud.id,
            deploy_status=consts.DEPLOY_STATE_DONE,
            error_description=consts.ERROR_DESC_EMPTY)

    def _create_addn_hosts_dc(self, context):
        """Generate the addn_hosts_dc file for hostname/ip translation"""

        addn_hosts_dc = os.path.join(CONFIG_PATH, ADDN_HOSTS_DC)
        addn_hosts_dc_temp = addn_hosts_dc + '.temp'

        subclouds = db_api.subcloud_get_all(context)
        with open(addn_hosts_dc_temp, 'w') as f_out_addn_dc_temp:
            for subcloud in subclouds:
                addn_dc_line = subcloud.management_start_ip + ' ' + \
                    subcloud.name + '\n'
                f_out_addn_dc_temp.write(addn_dc_line)

            # if no more subclouds, create empty file so dnsmasq does not
            # emit an error log.
            if not subclouds:
                f_out_addn_dc_temp.write(' ')

        if not filecmp.cmp(addn_hosts_dc_temp, addn_hosts_dc):
            os.rename(addn_hosts_dc_temp, addn_hosts_dc)
            # restart dnsmasq so it can re-read our addn_hosts file.
            os.system("pkill -HUP dnsmasq")

    def _write_subcloud_ansible_config(self, cached_regionone_data, payload):
        """Create the override file for usage with the specified subcloud"""

        overrides_file = os.path.join(consts.ANSIBLE_OVERRIDES_PATH,
                                      payload['name'] + '.yml')

        mgmt_pool = cached_regionone_data['mgmt_pool']
        mgmt_floating_ip = mgmt_pool.floating_address
        mgmt_subnet = "%s/%d" % (mgmt_pool.network, mgmt_pool.prefix)

        oam_addresses = cached_regionone_data['oam_addresses']
        oam_floating_ip = oam_addresses.oam_floating_ip
        oam_subnet = oam_addresses.oam_subnet

        with open(overrides_file, 'w') as f_out_overrides_file:
            f_out_overrides_file.write(
                '---'
                '\nregion_config: yes'
                '\ndistributed_cloud_role: subcloud'
                '\nsystem_controller_subnet: ' + mgmt_subnet +
                '\nsystem_controller_floating_address: ' + mgmt_floating_ip +
                '\nsystem_controller_oam_subnet: ' + oam_subnet +
                '\nsystem_controller_oam_floating_address: ' + oam_floating_ip
                + '\n'
            )

            for k, v in payload.items():
                if k not in ['deploy_playbook', 'deploy_values',
                             'deploy_config', 'deploy_chart',
                             'deploy_overrides', 'install_values']:
                    f_out_overrides_file.write("%s: %s\n" % (k, json.dumps(v)))

    def _write_deploy_files(self, payload, subcloud_name):
        """Create the deploy value files for the subcloud"""

        deploy_values_file = os.path.join(
            consts.ANSIBLE_OVERRIDES_PATH, subcloud_name +
            '_deploy_values.yml')

        with open(deploy_values_file, 'w') as f_out_deploy_values_file:
            json.dump(payload['deploy_values'], f_out_deploy_values_file)

    def _prepare_for_deployment(self, payload, subcloud_name):
        payload['deploy_values'] = dict()
        payload['deploy_values']['ansible_become_pass'] = \
            payload['sysadmin_password']
        payload['deploy_values']['ansible_ssh_pass'] = \
            payload['sysadmin_password']
        payload['deploy_values']['admin_password'] = \
            str(keyring.get_password('CGCS', 'admin'))
        payload['deploy_values']['deployment_config'] = \
            payload[consts.DEPLOY_CONFIG]
        payload['deploy_values']['deployment_manager_chart'] = \
            payload[consts.DEPLOY_CHART]
        payload['deploy_values']['deployment_manager_overrides'] = \
            payload[consts.DEPLOY_OVERRIDES]
        self._write_deploy_files(payload, subcloud_name)

    def _delete_subcloud_routes(self, context, subcloud):
        """Delete the routes to this subcloud"""

        keystone_client = OpenStackDriver(
            region_name=dccommon_consts.DEFAULT_REGION_NAME,
            region_clients=None).keystone_client

        # Delete the route to this subcloud on the management interface on
        # both controllers.
        management_subnet = netaddr.IPNetwork(subcloud.management_subnet)
        endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv')
        sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, keystone_client.session,
                                     endpoint=endpoint)
        cached_regionone_data = self._get_cached_regionone_data(keystone_client, sysinv_client)
        for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']:
            sysinv_client.delete_route(mgmt_if_uuid,
                                       str(management_subnet.ip),
                                       management_subnet.prefixlen,
                                       str(netaddr.IPAddress(subcloud.systemcontroller_gateway_ip)),
                                       1)

    @staticmethod
    def _delete_subcloud_cert(subcloud_name):
        cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name)
        secret_name = SubcloudManager._get_subcloud_cert_secret_name(
            subcloud_name)

        kube = kubeoperator.KubeOperator()
        kube.delete_cert_manager_certificate(CERT_NAMESPACE, cert_name)

        kube.kube_delete_secret(secret_name, CERT_NAMESPACE)
        LOG.info("cert %s and secret %s are deleted" % (cert_name, secret_name))

    def _remove_subcloud_details(self, context,
                                 subcloud,
                                 ansible_subcloud_inventory_file):
        """Remove subcloud details from database and inform orchestrators"""
        # Inform orchestrators that subcloud has been deleted
        try:
            self.dcorch_rpc_client.del_subcloud(context, subcloud.name)
        except RemoteError as e:
            # TODO(kmacleod): this should be caught as explicit remote exception
            # Fix when centos/python2 is no longer supported
            if "SubcloudNotFound" in str(e):
                pass

        # delete the associated alarm entry
        try:
            db_api.subcloud_alarms_delete(context, subcloud.name)
        except RemoteError as e:
            # TODO(kmacleod): fix same with above
            if "SubcloudNotFound" in str(e):
                pass

        # We only delete subcloud endpoints, region and user information
        # in the Central Region. The subcloud is already unmanaged and powered
        # down so is not accessible. Therefore set up a session with the
        # Central Region Keystone ONLY.
        keystone_client = OpenStackDriver(
            region_name=dccommon_consts.DEFAULT_REGION_NAME,
            region_clients=None).keystone_client

        # Delete keystone endpoints for subcloud
        keystone_client.delete_endpoints(subcloud.name)
        keystone_client.delete_region(subcloud.name)

        # Delete the routes to this subcloud
        self._delete_subcloud_routes(context, subcloud)

        # Remove the subcloud from the database
        try:
            db_api.subcloud_destroy(context, subcloud.id)
        except Exception as e:
            LOG.exception(e)
            raise e

        # Delete the ansible inventory for the new subcloud
        utils.delete_subcloud_inventory(ansible_subcloud_inventory_file)

        # Delete the subcloud intermediate certificate
        SubcloudManager._delete_subcloud_cert(subcloud.name)

        # Regenerate the addn_hosts_dc file
        self._create_addn_hosts_dc(context)

    def delete_subcloud(self, context, subcloud_id):
        """Delete subcloud and notify orchestrators.

        :param context: request context object.
        :param subcloud_id: id of subcloud to delete
        """
        LOG.info("Deleting subcloud %s." % subcloud_id)

        # Retrieve the subcloud details from the database
        subcloud = db_api.subcloud_get(context, subcloud_id)

        # Semantic checking
        if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED:
            raise exceptions.SubcloudNotUnmanaged()

        if subcloud.availability_status == \
                dccommon_consts.AVAILABILITY_ONLINE:
            raise exceptions.SubcloudNotOffline()

        # Ansible inventory filename for the specified subcloud
        ansible_subcloud_inventory_file = self._get_ansible_filename(
            subcloud.name, INVENTORY_FILE_POSTFIX)

        self._remove_subcloud_details(context,
                                      subcloud,
                                      ansible_subcloud_inventory_file)

        # Clear any subcloud alarms.
        # Note that endpoint out-of-sync alarms should have been cleared when
        # the subcloud was unmanaged and the endpoint sync statuses were set to
        # unknown.
        #
        # TODO(kmacleod): Until an API is available to clear all alarms
        # for a subcloud, we manually clear the following:
        # - subcloud offline
        # - subloud resource out of sync
        for alarm_id, entity_instance_id in (
                (fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
                 "subcloud=%s" % subcloud.name),
                (fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
                 "subcloud=%s.resource=%s" %
                 (subcloud.name, dccommon_consts.ENDPOINT_TYPE_DC_CERT))):
            try:
                fault = self.fm_api.get_fault(alarm_id,
                                              entity_instance_id)
                if fault:
                    self.fm_api.clear_fault(alarm_id,
                                            entity_instance_id)
            except Exception as e:
                LOG.info(
                    "Problem clearing fault for subcloud %s, alarm_id=%s" %
                    (subcloud.name, alarm_id))
                LOG.exception(e)

    def update_subcloud(self,
                        context,
                        subcloud_id,
                        management_state=None,
                        description=None,
                        location=None,
                        group_id=None,
                        data_install=None,
                        force=None):
        """Update subcloud and notify orchestrators.

        :param context: request context object
        :param subcloud_id: id of subcloud to update
        :param management_state: new management state
        :param description: new description
        :param location: new location
        :param group_id: new subcloud group id
        :param data_install: subcloud install values
        :param force: force flag
        """

        LOG.info("Updating subcloud %s." % subcloud_id)

        # Get the subcloud details from the database
        subcloud = db_api.subcloud_get(context, subcloud_id)
        original_management_state = subcloud.management_state

        # Semantic checking
        if management_state:
            if management_state == dccommon_consts.MANAGEMENT_UNMANAGED:
                if subcloud.management_state == dccommon_consts.MANAGEMENT_UNMANAGED:
                    LOG.warning("Subcloud %s already unmanaged" % subcloud_id)
                    raise exceptions.BadRequest(
                        resource='subcloud',
                        msg='Subcloud is already unmanaged')
            elif management_state == dccommon_consts.MANAGEMENT_MANAGED:
                if subcloud.management_state == dccommon_consts.MANAGEMENT_MANAGED:
                    LOG.warning("Subcloud %s already managed" % subcloud_id)
                    raise exceptions.BadRequest(
                        resource='subcloud',
                        msg='Subcloud is already managed')
                elif not force:
                    if (subcloud.deploy_status != consts.DEPLOY_STATE_DONE and
                            not prestage.is_deploy_status_prestage(
                                subcloud.deploy_status)):
                        LOG.warning("Subcloud %s can be managed only when"
                                    "deploy_status is complete" % subcloud_id)
                        raise exceptions.BadRequest(
                            resource='subcloud',
                            msg='Subcloud can be managed only if deploy status is complete')
                    if subcloud.availability_status != \
                            dccommon_consts.AVAILABILITY_ONLINE:
                        LOG.warning("Subcloud %s is not online" % subcloud_id)
                        raise exceptions.SubcloudNotOnline()
            else:
                LOG.error("Invalid management_state %s" % management_state)
                raise exceptions.InternalError()

        subcloud = db_api.subcloud_update(context,
                                          subcloud_id,
                                          management_state=management_state,
                                          description=description,
                                          location=location,
                                          group_id=group_id,
                                          data_install=data_install)

        # Inform orchestrators that subcloud has been updated
        if management_state:

            try:
                # Inform orchestrator of state change
                self.dcorch_rpc_client.update_subcloud_states(
                    context,
                    subcloud.name,
                    management_state,
                    subcloud.availability_status)

                LOG.info('Notifying dcorch, subcloud:%s management: %s, '
                         'availability:%s' % (subcloud.name,
                                              management_state,
                                              subcloud.availability_status))

            except Exception as e:
                LOG.exception(e)
                LOG.warn('Problem informing dcorch of subcloud '
                         'state change, resume to original state, subcloud: %s'
                         % subcloud.name)
                management_state = original_management_state
                subcloud = \
                    db_api.subcloud_update(context, subcloud_id,
                                           management_state=management_state,
                                           description=description,
                                           location=location)

            if management_state == dccommon_consts.MANAGEMENT_UNMANAGED:
                # set all endpoint statuses to unknown, except the dc-cert
                # endpoint which continues to be audited for unmanaged
                # subclouds
                self.state_rpc_client.update_subcloud_endpoint_status_sync(
                    context,
                    subcloud_name=subcloud.name,
                    endpoint_type=None,
                    sync_status=dccommon_consts.SYNC_STATUS_UNKNOWN,
                    ignore_endpoints=[dccommon_consts.ENDPOINT_TYPE_DC_CERT])
            elif management_state == dccommon_consts.MANAGEMENT_MANAGED:
                # Subcloud is managed
                # Tell cert-mon to audit endpoint certificate
                LOG.info('Request for managed audit for %s' % subcloud.name)
                dc_notification = dcmanager_rpc_client.DCManagerNotifications()
                dc_notification.subcloud_managed(context, subcloud.name)
                # Since sysinv user is sync'ed during bootstrap, trigger the
                # related audits. Patch and load audits are delayed until the
                # identity resource synchronized by dcdbsync is complete.
                exclude_endpoints = [dccommon_consts.ENDPOINT_TYPE_PATCHING,
                                     dccommon_consts.ENDPOINT_TYPE_LOAD]
                self.audit_rpc_client.trigger_subcloud_audits(
                    context, subcloud_id, exclude_endpoints)

        return db_api.subcloud_db_model_to_dict(subcloud)

    def update_subcloud_sync_endpoint_type(self, context,
                                           subcloud_name,
                                           endpoint_type_list,
                                           openstack_installed):
        operation = 'add' if openstack_installed else 'remove'
        func_switcher = {
            'add': (
                self.dcorch_rpc_client.add_subcloud_sync_endpoint_type,
                db_api.subcloud_status_create
            ),
            'remove': (
                self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type,
                db_api.subcloud_status_delete
            )
        }

        try:
            subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
        except Exception:
            LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
            raise

        try:
            # Notify dcorch to add/remove sync endpoint type list
            func_switcher[operation][0](self.context, subcloud_name,
                                        endpoint_type_list)
            LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' %
                     (subcloud_name, endpoint_type_list))

            # Update subcloud status table by adding/removing openstack sync
            # endpoint types
            for endpoint_type in endpoint_type_list:
                func_switcher[operation][1](self.context, subcloud.id,
                                            endpoint_type)
            # Update openstack_installed of subcloud table
            db_api.subcloud_update(self.context, subcloud.id,
                                   openstack_installed=openstack_installed)
        except Exception:
            LOG.exception('Problem informing dcorch of subcloud sync endpoint'
                          ' type change, subcloud: %s' % subcloud_name)

    def handle_subcloud_operations_in_progress(self):
        """Identify subclouds in transitory stages and update subcloud

        state to failure.
        """

        LOG.info('Identifying subclouds in transitory stages.')

        subclouds = db_api.subcloud_get_all(self.context)

        for subcloud in subclouds:
            # Identify subclouds in transitory states
            new_deploy_status = TRANSITORY_STATES.get(subcloud.deploy_status)
            new_backup_status = TRANSITORY_BACKUP_STATES.get(subcloud.backup_status)

            # update deploy and backup states to the corresponding failure states
            if new_deploy_status or new_backup_status:
                if new_deploy_status:
                    LOG.info("Changing subcloud %s deploy status from %s to %s."
                             % (subcloud.name, subcloud.deploy_status,
                                new_deploy_status))
                if new_backup_status:
                    LOG.info("Changing subcloud %s backup status from %s to %s."
                             % (subcloud.name, subcloud.backup_status,
                                new_backup_status))

                db_api.subcloud_update(
                    self.context,
                    subcloud.id,
                    deploy_status=new_deploy_status or subcloud.deploy_status,
                    backup_status=new_backup_status or subcloud.backup_status
                )

    @staticmethod
    def prestage_subcloud(context, payload):
        """Subcloud prestaging"""
        return prestage.prestage_subcloud(context, payload)

    @utils.synchronized("regionone-data-cache", external=False)
    def _get_cached_regionone_data(self, regionone_keystone_client, regionone_sysinv_client=None):
        if (not SubcloudManager.regionone_data or
                SubcloudManager.regionone_data['expiry'] <= datetime.datetime.utcnow()):
            user_list = regionone_keystone_client.get_enabled_users(id_only=False)
            for user in user_list:
                if user.name == dccommon_consts.ADMIN_USER_NAME:
                    SubcloudManager.regionone_data['admin_user_id'] = user.id
                elif user.name == dccommon_consts.SYSINV_USER_NAME:
                    SubcloudManager.regionone_data['sysinv_user_id'] = user.id
                elif user.name == dccommon_consts.DCMANAGER_USER_NAME:
                    SubcloudManager.regionone_data['dcmanager_user_id'] = user.id

            project_list = regionone_keystone_client.get_enabled_projects(id_only=False)
            for project in project_list:
                if project.name == dccommon_consts.ADMIN_PROJECT_NAME:
                    SubcloudManager.regionone_data['admin_project_id'] = project.id
                elif project.name == dccommon_consts.SERVICES_USER_NAME:
                    SubcloudManager.regionone_data['services_project_id'] = project.id

            if regionone_sysinv_client is None:
                endpoint = regionone_keystone_client.endpoint_cache.get_endpoint('sysinv')
                regionone_sysinv_client = SysinvClient(
                    dccommon_consts.DEFAULT_REGION_NAME,
                    regionone_keystone_client.session,
                    endpoint=endpoint)

            controllers = regionone_sysinv_client.get_controller_hosts()
            mgmt_interface_uuids = []
            for controller in controllers:
                mgmt_interface = regionone_sysinv_client.get_management_interface(
                    controller.hostname)
                if mgmt_interface is not None:
                    mgmt_interface_uuids.append(mgmt_interface.uuid)
            SubcloudManager.regionone_data['mgmt_interface_uuids'] = mgmt_interface_uuids

            SubcloudManager.regionone_data['mgmt_pool'] = \
                regionone_sysinv_client.get_management_address_pool()
            SubcloudManager.regionone_data['oam_addresses'] = \
                regionone_sysinv_client.get_oam_addresses()

            SubcloudManager.regionone_data['expiry'] = \
                datetime.datetime.utcnow() + datetime.timedelta(hours=1)
            LOG.info("RegionOne cached data updated %s" % SubcloudManager.regionone_data)

        cached_regionone_data = SubcloudManager.regionone_data
        return cached_regionone_data

    def _populate_payload_with_cached_keystone_data(self, cached_data, payload):
        payload['system_controller_keystone_admin_user_id'] = \
            cached_data['admin_user_id']
        payload['system_controller_keystone_admin_project_id'] = \
            cached_data['admin_project_id']
        payload['system_controller_keystone_services_project_id'] = \
            cached_data['services_project_id']
        payload['system_controller_keystone_sysinv_user_id'] = \
            cached_data['sysinv_user_id']
        payload['system_controller_keystone_dcmanager_user_id'] = \
            cached_data['dcmanager_user_id']

        # While at it, add the admin and service user passwords to the payload so
        # they get copied to the overrides file
        payload['ansible_become_pass'] = payload['sysadmin_password']
        payload['ansible_ssh_pass'] = payload['sysadmin_password']
        payload['admin_password'] = str(keyring.get_password('CGCS',
                                                             'admin'))