Distributed Cloud Kubernetes Upgrade Orchestration

This commit introduces DC Kubernetes upgrade orchestration.

- Added a new kubernetes endpoint for DC audit
  The audit compares that the active-target-version in the subcloud for
 kubernetes matches the system controller.
- Wiring the kubernetes strategy to the kube orch thread
- Add protections to orchestrator shutdown for when 'start' encounters
 a problem.
- Added 'kubernetes' as a valid strategy type for:
   create / abort / delete / show
- Update the endpoint flag file name to trigger updating the new
 endpoint
- Added a kubernetes orch thread similar to sw upgrade orch thread
 with these states:
  - updating kube patches
  - creating vim patch strategy
  - applying vim patch strategy
  - deleting vim patch strategy
  - creating vim kube upgrade strategy
  - applying vim kube upgrade strategy

Patch vim orchestration steps will be skipped if there are no patches
that need to be applied in the subcloud.

Change-Id: Ic508ed1a504dba177c100a9caf783e87bc10193a
Story: 2008137
Task: 41177
Depends-On: https://review.opendev.org/c/starlingx/nfv/+/767421
Signed-off-by: albailey <al.bailey@windriver.com>
This commit is contained in:
albailey
2020-11-17 15:47:58 -06:00
parent 0523b53884
commit b6638ce9a1
49 changed files with 2161 additions and 622 deletions

View File

@@ -12,7 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -52,6 +52,30 @@ SSL_PEM_FILE = os.path.join(SSL_CERT_DIR, SSL_CERT_FILE)
DOCKER_REGISTRY_CERT_FILE = os.path.join(SSL_CERT_DIR, "registry-cert.crt")
DOCKER_REGISTRY_KEY_FILE = os.path.join(SSL_CERT_DIR, "registry-cert.key")
# The following constants are declared in sysinv/common/kubernetes.py
# Kubernetes upgrade states
KUBE_UPGRADE_STARTED = 'upgrade-started'
KUBE_UPGRADE_DOWNLOADING_IMAGES = 'downloading-images'
KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED = 'downloading-images-failed'
KUBE_UPGRADE_DOWNLOADED_IMAGES = 'downloaded-images'
KUBE_UPGRADING_FIRST_MASTER = 'upgrading-first-master'
KUBE_UPGRADING_FIRST_MASTER_FAILED = 'upgrading-first-master-failed'
KUBE_UPGRADED_FIRST_MASTER = 'upgraded-first-master'
KUBE_UPGRADING_NETWORKING = 'upgrading-networking'
KUBE_UPGRADING_NETWORKING_FAILED = 'upgrading-networking-failed'
KUBE_UPGRADED_NETWORKING = 'upgraded-networking'
KUBE_UPGRADING_SECOND_MASTER = 'upgrading-second-master'
KUBE_UPGRADING_SECOND_MASTER_FAILED = 'upgrading-second-master-failed'
KUBE_UPGRADED_SECOND_MASTER = 'upgraded-second-master'
KUBE_UPGRADING_KUBELETS = 'upgrading-kubelets'
KUBE_UPGRADE_COMPLETE = 'upgrade-complete'
# Kubernetes host upgrade statuses
KUBE_HOST_UPGRADING_CONTROL_PLANE = 'upgrading-control-plane'
KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED = 'upgrading-control-plane-failed'
KUBE_HOST_UPGRADING_KUBELET = 'upgrading-kubelet'
KUBE_HOST_UPGRADING_KUBELET_FAILED = 'upgrading-kubelet-failed'
# The following is the name of the host filesystem 'scratch' which is used
# by dcmanager upgrade orchestration for the load import operations.
HOST_FS_NAME_SCRATCH = 'scratch'
@@ -652,6 +676,30 @@ class SysinvClient(base.DriverBase):
"""Get a list of device image states."""
return self.sysinv_client.device_image_state.list()
def get_kube_upgrade(self, kube_upgrade_uuid):
"""Retrieve the details of a given kubernetes upgrade
:param kube_upgrade_uuid: kube upgrade uuid
If the upgrade is not found, returns None
"""
return self.sysinv_client.kube_upgrade.get(kube_upgrade_uuid)
def get_kube_upgrades(self):
"""Retrieve the kubernetes upgrade if one is present."""
return self.sysinv_client.kube_upgrade.list()
def get_kube_version(self, version):
"""Retrieve the details of a given kubernetes version
:param version: kubernetes version
If the version is not found, returns None
"""
return self.sysinv_client.kube_version.get(version)
def get_kube_versions(self):
"""Retrieve the list of kubernetes versions known to the system."""
return self.sysinv_client.kube_version.list()
def apply_device_image(self, device_image_id, labels=None):
"""Apply a device image.

View File

@@ -12,7 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -31,6 +31,7 @@ from dccommon import exceptions
LOG = log.getLogger(__name__)
STRATEGY_NAME_FW_UPDATE = 'fw-update'
STRATEGY_NAME_KUBE_UPGRADE = 'kube-upgrade'
STRATEGY_NAME_SW_PATCH = 'sw-patch'
STRATEGY_NAME_SW_UPGRADE = 'sw-upgrade'
@@ -88,9 +89,14 @@ class VimClient(base.DriverBase):
except exceptions.ServiceUnavailable:
raise
def create_strategy(self, strategy_name, storage_apply_type,
worker_apply_type, max_parallel_worker_hosts,
default_instance_action, alarm_restrictions):
def create_strategy(self,
strategy_name,
storage_apply_type,
worker_apply_type,
max_parallel_worker_hosts,
default_instance_action,
alarm_restrictions,
**kwargs):
"""Create orchestration strategy"""
url = self.endpoint
@@ -103,9 +109,10 @@ class VimClient(base.DriverBase):
worker_apply_type=worker_apply_type,
max_parallel_worker_hosts=max_parallel_worker_hosts,
default_instance_action=default_instance_action,
alarm_restrictions=alarm_restrictions)
alarm_restrictions=alarm_restrictions,
**kwargs)
if not strategy:
raise Exception("Strategy creation failed")
raise Exception("Strategy:(%s) creation failed" % strategy_name)
LOG.debug("Strategy created: %s" % strategy)
return strategy

View File

@@ -13,7 +13,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -41,8 +41,9 @@ LOG = logging.getLogger(__name__)
SUPPORTED_STRATEGY_TYPES = [
consts.SW_UPDATE_TYPE_FIRMWARE,
consts.SW_UPDATE_TYPE_UPGRADE,
consts.SW_UPDATE_TYPE_PATCH
consts.SW_UPDATE_TYPE_KUBERNETES,
consts.SW_UPDATE_TYPE_PATCH,
consts.SW_UPDATE_TYPE_UPGRADE
]

View File

@@ -0,0 +1,157 @@
# Copyright 2017 Ericsson AB.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
from keystoneauth1 import exceptions as keystone_exceptions
from oslo_log import log as logging
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dcorch.common import consts as dcorch_consts
from dcmanager.common import consts
LOG = logging.getLogger(__name__)
class KubernetesAuditData(object):
def __init__(self, target, version, state):
self.target = target
self.version = version
self.state = state
def to_dict(self):
return {
'target': self.target,
'version': self.version,
'state': self.state,
}
@classmethod
def from_dict(cls, values):
if values is None:
return None
return cls(**values)
class KubernetesAudit(object):
"""Manages tasks related to kubernetes audits."""
def __init__(self, context, dcmanager_rpc_client):
LOG.debug('KubernetesAudit initialization...')
self.context = context
self.dcmanager_rpc_client = dcmanager_rpc_client
self.audit_count = 0
def _update_subcloud_sync_status(self, sc_name, sc_endpoint_type,
sc_status):
self.dcmanager_rpc_client.update_subcloud_endpoint_status(
self.context,
subcloud_name=sc_name,
endpoint_type=sc_endpoint_type,
sync_status=sc_status)
def get_regionone_audit_data(self):
"""Query RegionOne to determine kubernetes information
:return: A list of kubernetes versions on the system controller
"""
try:
m_os_ks_client = OpenStackDriver(
region_name=consts.DEFAULT_REGION_NAME,
region_clients=None).keystone_client
sysinv_client = SysinvClient(
consts.DEFAULT_REGION_NAME, m_os_ks_client.session)
except Exception:
LOG.exception('Failed init OS Client, skip kubernetes audit.')
return None
region_one_data = []
results_list = sysinv_client.get_kube_versions()
for result in results_list:
region_one_data.append(KubernetesAuditData(result.target,
result.version,
result.state))
LOG.debug("RegionOne kubernetes versions: %s" % region_one_data)
return region_one_data
def subcloud_kubernetes_audit(self, subcloud_name, audit_data):
LOG.info('Triggered kubernetes audit for subcloud:%s' % subcloud_name)
if not audit_data:
self._update_subcloud_sync_status(
subcloud_name, dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
consts.SYNC_STATUS_IN_SYNC)
LOG.debug('No region one audit data, exiting kubernetes audit')
return
try:
sc_os_client = OpenStackDriver(region_name=subcloud_name,
region_clients=None)
session = sc_os_client.keystone_client.session
sysinv_client = SysinvClient(subcloud_name, session)
except (keystone_exceptions.EndpointNotFound,
keystone_exceptions.ConnectFailure,
keystone_exceptions.ConnectTimeout,
IndexError):
LOG.exception("Endpoint for online subcloud:(%s) not found, skip "
"kubernetes audit." % subcloud_name)
return
# Retrieve kubernetes info for this subcloud
# state - active, partial, available
# active - true / false
# version - any value ex: v1.18.1
# Find the target=true state=active version on system controller
# The audit_data for region one is a dictionary
region_one_version = None
for result in audit_data:
# audit_data will be a dict from passing through RPC, so objectify
result = KubernetesAuditData.from_dict(result)
if result.target and result.state == 'active':
region_one_version = result.version
break
if region_one_version is None:
LOG.info("No active target version found in region one audit data")
return
out_of_sync = True
# We will consider it out of sync even for 'partial' state
# The audit data for subcloud_results is an object not a dictionary
subcloud_results = sysinv_client.get_kube_versions()
for result in subcloud_results:
if result.target and result.state == 'active':
subcloud_version = result.version
if subcloud_version == region_one_version:
out_of_sync = False
break
if out_of_sync:
self._update_subcloud_sync_status(
subcloud_name, dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
consts.SYNC_STATUS_OUT_OF_SYNC)
else:
self._update_subcloud_sync_status(
subcloud_name, dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
consts.SYNC_STATUS_IN_SYNC)
LOG.info('Kubernetes audit completed for:(%s)' % subcloud_name)

View File

@@ -10,7 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -59,12 +59,15 @@ class ManagerAuditClient(object):
client = self._client
return client.cast(ctxt, method, **kwargs)
def trigger_patch_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_patch_audit'))
def trigger_firmware_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_firmware_audit'))
def trigger_kubernetes_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_kubernetes_audit'))
def trigger_patch_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_patch_audit'))
def trigger_subcloud_audits(self, ctxt, subcloud_id):
return self.cast(ctxt, self.make_msg('trigger_subcloud_audits',
subcloud_id=subcloud_id))
@@ -104,12 +107,21 @@ class ManagerAuditWorkerClient(object):
client = self._client
return client.cast(ctxt, method, **kwargs)
# Tell audit-worker to perform audit on the subclouds with these
# subcloud IDs.
def audit_subclouds(self, ctxt, subcloud_ids, patch_audit_data=None,
firmware_audit_data=None, do_openstack_audit=False):
return self.cast(ctxt, self.make_msg('audit_subclouds',
subcloud_ids=subcloud_ids,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_openstack_audit=do_openstack_audit))
def audit_subclouds(self,
ctxt,
subcloud_ids,
patch_audit_data=None,
firmware_audit_data=None,
kubernetes_audit_data=None,
do_openstack_audit=False):
"""Tell audit-worker to perform audit on the subclouds with these
subcloud IDs.
"""
return self.cast(
ctxt, self.make_msg('audit_subclouds',
subcloud_ids=subcloud_ids,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_openstack_audit=do_openstack_audit))

View File

@@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -109,13 +109,6 @@ class DCManagerAuditService(service.Service):
LOG.info("All threads were gone, terminating engine")
super(DCManagerAuditService, self).stop()
@request_context
def trigger_patch_audit(self, context):
"""Used to force a patch audit on the next interval"""
LOG.info("Trigger patch audit.")
return self.subcloud_audit_manager.trigger_patch_audit(context)
@request_context
def trigger_firmware_audit(self, context):
"""Used to force a firmware audit on the next interval"""
@@ -123,6 +116,20 @@ class DCManagerAuditService(service.Service):
LOG.info("Trigger firmware audit.")
return self.subcloud_audit_manager.trigger_firmware_audit(context)
@request_context
def trigger_kubernetes_audit(self, context):
"""Used to force a kubernetes audit on the next interval"""
LOG.info("Trigger kubernetes audit.")
return self.subcloud_audit_manager.trigger_kubernetes_audit(context)
@request_context
def trigger_patch_audit(self, context):
"""Used to force a patch audit on the next interval"""
LOG.info("Trigger patch audit.")
return self.subcloud_audit_manager.trigger_patch_audit(context)
@request_context
def trigger_subcloud_audits(self, context, subcloud_id):
"""Trigger all subcloud audits for one subcloud."""
@@ -185,10 +192,18 @@ class DCManagerAuditWorkerService(service.Service):
super(DCManagerAuditWorkerService, self).stop()
@request_context
def audit_subclouds(self, context, subcloud_ids,
patch_audit_data, firmware_audit_data,
def audit_subclouds(self,
context,
subcloud_ids,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
do_openstack_audit):
"""Used to trigger audits of the specified subcloud(s)"""
self.subcloud_audit_worker_manager.audit_subclouds(
context, subcloud_ids, patch_audit_data,
firmware_audit_data, do_openstack_audit)
context,
subcloud_ids,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
do_openstack_audit)

View File

@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -33,6 +33,7 @@ from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack import sysinv_v1
from dcmanager.audit import firmware_audit
from dcmanager.audit import kubernetes_audit
from dcmanager.audit import patch_audit
from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
from dcmanager.common import context
@@ -56,6 +57,9 @@ SUBCLOUD_STATE_UPDATE_ITERATIONS = \
# Name of starlingx openstack helm application
HELM_APP_OPENSTACK = 'stx-openstack'
# Every 4 audits triggers a kubernetes audit
KUBERNETES_AUDIT_RATE = 4
class SubcloudAuditManager(manager.Manager):
"""Manages tasks related to audits."""
@@ -66,6 +70,9 @@ class SubcloudAuditManager(manager.Manager):
# Used to force firmware audit on the next interval
force_firmware_audit = False
# Used to force kubernetes audit on the next interval
force_kubernetes_audit = False
def __init__(self, *args, **kwargs):
LOG.debug(_('SubcloudAuditManager initialization...'))
@@ -83,11 +90,14 @@ class SubcloudAuditManager(manager.Manager):
self.patch_audit_time = 0
self.firmware_audit = firmware_audit.FirmwareAudit(
self.context, None)
self.kubernetes_audit = kubernetes_audit.KubernetesAudit(
self.context, None)
def _add_missing_endpoints(self):
file_path = os.path.join(CONFIG_PATH, '.fpga_endpoint_added')
# Update this flag file based on the most recent new endpoint
file_path = os.path.join(CONFIG_PATH, '.kube_endpoint_added')
# If file exists on the controller, all the endpoints have been
# added to DB
# added to DB since last time an endpoint was added
if not os.path.isfile(file_path):
# Ensures all endpoints exist for all subclouds
# If the endpoint doesn't exist, an entry will be made
@@ -112,18 +122,6 @@ class SubcloudAuditManager(manager.Manager):
# the DB checks for missing subcloud endpoints
open(file_path, 'w').close()
@classmethod
def trigger_patch_audit(cls, context):
"""Trigger patch audit at next interval.
This can be called from outside the dcmanager audit
"""
cls.force_patch_audit = True
@classmethod
def reset_force_patch_audit(cls):
cls.force_patch_audit = False
@classmethod
def trigger_firmware_audit(cls, context):
"""Trigger firmware audit at next interval.
@@ -136,6 +134,30 @@ class SubcloudAuditManager(manager.Manager):
def reset_force_firmware_audit(cls):
cls.force_firmware_audit = False
@classmethod
def trigger_kubernetes_audit(cls, context):
"""Trigger kubernetes audit at next interval.
This can be called from outside the dcmanager audit
"""
cls.force_kubernetes_audit = True
@classmethod
def reset_force_kubernetes_audit(cls):
cls.force_kubernetes_audit = False
@classmethod
def trigger_patch_audit(cls, context):
"""Trigger patch audit at next interval.
This can be called from outside the dcmanager audit
"""
cls.force_patch_audit = True
@classmethod
def reset_force_patch_audit(cls):
cls.force_patch_audit = False
def trigger_subcloud_audits(self, context, subcloud_id):
"""Trigger all subcloud audits for one subcloud."""
values = {
@@ -175,6 +197,7 @@ class SubcloudAuditManager(manager.Manager):
audit_patch = False
audit_load = False
audit_firmware = False
audit_kubernetes = False
current_time = time.time()
# Determine whether to trigger a patch audit of each subcloud
if (SubcloudAuditManager.force_patch_audit or
@@ -194,26 +217,42 @@ class SubcloudAuditManager(manager.Manager):
audit_firmware = True
# Reset force_firmware_audit only when firmware audit has been fired
SubcloudAuditManager.reset_force_firmware_audit()
if (self.patch_audit_count % KUBERNETES_AUDIT_RATE == 1):
LOG.info("Trigger kubernetes audit")
audit_kubernetes = True
# Reset force_kubernetes_audit only when kubernetes audit has been fired
SubcloudAuditManager.reset_force_kubernetes_audit()
# Trigger a firmware audit as it is changed through proxy
if (SubcloudAuditManager.force_firmware_audit):
if SubcloudAuditManager.force_firmware_audit:
LOG.info("Trigger firmware audit")
audit_firmware = True
SubcloudAuditManager.reset_force_firmware_audit()
return audit_patch, audit_load, audit_firmware
# Trigger a kubernetes audit as it is changed through proxy
if SubcloudAuditManager.force_kubernetes_audit:
LOG.info("Trigger kubernetes audit")
audit_kubernetes = True
SubcloudAuditManager.reset_force_kubernetes_audit()
def _get_audit_data(self, audit_patch, audit_firmware):
"""Return the patch audit and firmware audit data as needed."""
return audit_patch, audit_load, audit_firmware, audit_kubernetes
def _get_audit_data(self, audit_patch, audit_firmware, audit_kubernetes):
"""Return the patch / firmware / kubernetes audit data as needed."""
patch_audit_data = None
firmware_audit_data = None
kubernetes_audit_data = None
if audit_patch:
# Query RegionOne patches and software version
patch_audit_data = self.patch_audit.get_regionone_audit_data()
if audit_firmware:
# Query RegionOne firmware
firmware_audit_data = self.firmware_audit.get_regionone_audit_data()
if audit_kubernetes:
# Query RegionOne kubernetes version info
kubernetes_audit_data = self.kubernetes_audit.get_regionone_audit_data()
return patch_audit_data, firmware_audit_data
return patch_audit_data, firmware_audit_data, kubernetes_audit_data
def _periodic_subcloud_audit_loop(self):
"""Audit availability of subclouds loop."""
@@ -230,7 +269,8 @@ class SubcloudAuditManager(manager.Manager):
update_subcloud_state = False
# Determine whether we want to trigger specialty audits.
audit_patch, audit_load, audit_firmware = self._get_audits_needed()
audit_patch, audit_load, audit_firmware, audit_kubernetes = \
self._get_audits_needed()
# Set desired audit flags for all subclouds.
values = {}
@@ -242,6 +282,8 @@ class SubcloudAuditManager(manager.Manager):
values['load_audit_requested'] = True
if audit_firmware:
values['firmware_audit_requested'] = True
if audit_kubernetes:
values['kubernetes_audit_requested'] = True
db_api.subcloud_audits_update_all(self.context, values)
do_openstack_audit = False
@@ -300,10 +342,19 @@ class SubcloudAuditManager(manager.Manager):
LOG.info("DB says firmware audit needed")
audit_firmware = True
break
patch_audit_data, firmware_audit_data = self._get_audit_data(
audit_patch, audit_firmware)
LOG.info("patch_audit_data: %s, firmware_audit_data: %s" %
(patch_audit_data, firmware_audit_data))
if not audit_kubernetes:
for audit in subcloud_audits:
if audit.kubernetes_audit_requested:
LOG.info("DB says kubernetes audit needed")
audit_kubernetes = True
break
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
self._get_audit_data(audit_patch, audit_firmware, audit_kubernetes)
LOG.info("patch_audit_data: %s, "
"firmware_audit_data: %s, "
"kubernetes_audit_data: %s, " % (patch_audit_data,
firmware_audit_data,
kubernetes_audit_data))
# We want a chunksize of at least 1 so add the number of workers.
chunksize = (len(subcloud_audits) + CONF.audit_worker_workers) / CONF.audit_worker_workers
@@ -313,14 +364,16 @@ class SubcloudAuditManager(manager.Manager):
# We've gathered a batch of subclouds, send it for processing.
self.audit_worker_rpc_client.audit_subclouds(
self.context, subcloud_ids, patch_audit_data,
firmware_audit_data, do_openstack_audit)
firmware_audit_data, kubernetes_audit_data,
do_openstack_audit)
LOG.debug('Sent subcloud audit request message for subclouds: %s' % subcloud_ids)
subcloud_ids = []
if len(subcloud_ids) > 0:
# We've got a partial batch...send it off for processing.
self.audit_worker_rpc_client.audit_subclouds(
self.context, subcloud_ids, patch_audit_data,
firmware_audit_data, do_openstack_audit)
firmware_audit_data, kubernetes_audit_data,
do_openstack_audit)
LOG.debug('Sent final subcloud audit request message for subclouds: %s' % subcloud_ids)
else:
LOG.debug('Done sending audit request messages.')

View File

@@ -31,6 +31,7 @@ from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dcmanager.audit import alarm_aggregation
from dcmanager.audit import firmware_audit
from dcmanager.audit import kubernetes_audit
from dcmanager.audit import patch_audit
from dcmanager.audit.subcloud_audit_manager import HELM_APP_OPENSTACK
from dcmanager.common import consts
@@ -71,10 +72,13 @@ class SubcloudAuditWorkerManager(manager.Manager):
self.context, self.dcmanager_rpc_client)
self.firmware_audit = firmware_audit.FirmwareAudit(
self.context, self.dcmanager_rpc_client)
self.kubernetes_audit = kubernetes_audit.KubernetesAudit(
self.context, self.dcmanager_rpc_client)
self.pid = os.getpid()
def audit_subclouds(self, context, subcloud_ids, patch_audit_data,
firmware_audit_data, do_openstack_audit):
firmware_audit_data, kubernetes_audit_data,
do_openstack_audit):
"""Run audits of the specified subcloud(s)"""
LOG.debug('PID: %s, subclouds to audit: %s, do_openstack_audit: %s' %
@@ -119,6 +123,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
do_patch_audit = subcloud_audits.patch_audit_requested
do_load_audit = subcloud_audits.load_audit_requested
do_firmware_audit = subcloud_audits.firmware_audit_requested
do_kubernetes_audit = subcloud_audits.kubernetes_audit_requested
update_subcloud_state = subcloud_audits.state_update_requested
# Create a new greenthread for each subcloud to allow the audits
@@ -131,9 +136,11 @@ class SubcloudAuditWorkerManager(manager.Manager):
do_openstack_audit,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
do_patch_audit,
do_load_audit,
do_firmware_audit)
do_firmware_audit,
do_kubernetes_audit)
def _update_subcloud_availability(self, subcloud_name,
availability_status=None,
@@ -227,17 +234,25 @@ class SubcloudAuditWorkerManager(manager.Manager):
endpoint_type_list,
openstack_installed_current)
def _do_audit_subcloud(self, subcloud, update_subcloud_state,
do_audit_openstack, patch_audit_data,
firmware_audit_data, do_patch_audit,
do_load_audit, do_firmware_audit):
def _do_audit_subcloud(self,
subcloud,
update_subcloud_state,
do_audit_openstack,
patch_audit_data,
firmware_audit_data,
kubernetes_audit_data,
do_patch_audit,
do_load_audit,
do_firmware_audit,
do_kubernetes_audit):
audits_done = []
# Do the actual subcloud audit.
try:
audits_done = self._audit_subcloud(
subcloud, update_subcloud_state, do_audit_openstack,
patch_audit_data, firmware_audit_data, do_patch_audit,
do_load_audit, do_firmware_audit)
patch_audit_data, firmware_audit_data, kubernetes_audit_data,
do_patch_audit,
do_load_audit, do_firmware_audit, do_kubernetes_audit)
except Exception:
LOG.exception("Got exception auditing subcloud: %s" % subcloud.name)
@@ -251,8 +266,10 @@ class SubcloudAuditWorkerManager(manager.Manager):
(self.pid, subcloud.name))
def _audit_subcloud(self, subcloud, update_subcloud_state,
do_audit_openstack, patch_audit_data, firmware_audit_data,
do_patch_audit, do_load_audit, do_firmware_audit):
do_audit_openstack, patch_audit_data,
firmware_audit_data, kubernetes_audit_data,
do_patch_audit, do_load_audit, do_firmware_audit,
do_kubernetes_audit):
"""Audit a single subcloud."""
avail_status_current = subcloud.availability_status
@@ -357,6 +374,12 @@ class SubcloudAuditWorkerManager(manager.Manager):
self.firmware_audit.subcloud_firmware_audit(subcloud_name,
firmware_audit_data)
audits_done.append('firmware')
# Perform kubernetes audit
if do_kubernetes_audit:
self.kubernetes_audit.subcloud_kubernetes_audit(
subcloud_name,
kubernetes_audit_data)
audits_done.append('kubernetes')
# Audit openstack application in the subcloud
if do_audit_openstack and sysinv_client:
self._audit_subcloud_openstack_app(

View File

@@ -80,6 +80,7 @@ AVAIL_FAIL_COUNT_MAX = 9999
# Software update strategy types
SW_UPDATE_TYPE_FIRMWARE = "firmware"
SW_UPDATE_TYPE_KUBERNETES = "kubernetes"
SW_UPDATE_TYPE_PATCH = "patch"
SW_UPDATE_TYPE_UPGRADE = "upgrade"
@@ -146,6 +147,20 @@ STRATEGY_STATE_CREATING_FW_UPDATE_STRATEGY = "creating fw update strategy"
STRATEGY_STATE_APPLYING_FW_UPDATE_STRATEGY = "applying fw update strategy"
STRATEGY_STATE_FINISHING_FW_UPDATE = "finishing fw update"
# Kubernetes update orchestration states
STRATEGY_STATE_KUBE_UPDATING_PATCHES = \
"kube updating patches"
STRATEGY_STATE_KUBE_CREATING_VIM_PATCH_STRATEGY = \
"kube creating vim patch strategy"
STRATEGY_STATE_KUBE_APPLYING_VIM_PATCH_STRATEGY = \
"kube applying vim patch strategy"
STRATEGY_STATE_KUBE_DELETING_VIM_PATCH_STRATEGY = \
"kube deleting vim patch strategy"
STRATEGY_STATE_KUBE_CREATING_VIM_KUBE_UPGRADE_STRATEGY = \
"kube creating vim kube upgrade strategy"
STRATEGY_STATE_KUBE_APPLYING_VIM_KUBE_UPGRADE_STRATEGY = \
"kube applying vim kube upgrade strategy"
# Subcloud deploy status states
DEPLOY_STATE_NONE = 'not-deployed'
DEPLOY_STATE_PRE_DEPLOY = 'pre-deploy'

View File

@@ -14,7 +14,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -182,6 +182,10 @@ class LicenseMissingError(DCManagerException):
message = _("License does not exist on subcloud: %(subcloud_id)s")
class KubeUpgradeFailedException(DCManagerException):
message = _("Subcloud: %(subcloud)s kube upgrade failed: %(details)s")
class ManualRecoveryRequiredException(DCManagerException):
message = _("Offline Subcloud: %(subcloud)s needs manual recovery from "
"deploy state: %(deploy_status)s")

View File

@@ -269,6 +269,18 @@ def get_vault_load_files(target_version):
return (matching_iso, matching_sig)
def get_active_kube_version(kube_versions):
"""Returns the active version name for kubernetes from a list of versions"""
active_kube_version = None
for kube in kube_versions:
kube_dict = kube.to_dict()
if kube_dict.get('target') and kube_dict.get('state') == 'active':
active_kube_version = kube_dict.get('version')
break
return active_kube_version
def get_loads_for_patching(loads):
"""Filter the loads that can be patched. Return their software versions"""
valid_states = [

View File

@@ -129,7 +129,6 @@ class DCManagerService(service.Service):
group_id,
data_install,
force)
return subcloud
@request_context
@@ -185,6 +184,12 @@ class DCManagerService(service.Service):
sync_status == consts.SYNC_STATUS_UNKNOWN:
self.audit_rpc_client.trigger_firmware_audit(context)
# If the kubernetes sync status is being set to unknown, trigger the
# kubernetes audit so it can update the sync status ASAP.
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_KUBERNETES and \
sync_status == consts.SYNC_STATUS_UNKNOWN:
self.audit_rpc_client.trigger_kubernetes_audit(context)
return
@request_context

View File

@@ -13,26 +13,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import datetime
import threading
import time
from keystoneauth1 import exceptions as keystone_exceptions
from oslo_log import log as logging
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common import scheduler
from dcmanager.db import api as db_api
from dcmanager.orchestrator.orch_thread import OrchThread
from dcmanager.orchestrator.states.firmware.applying_vim_strategy \
import ApplyingVIMStrategyState
from dcmanager.orchestrator.states.firmware.creating_vim_strategy \
@@ -42,22 +31,8 @@ from dcmanager.orchestrator.states.firmware.finishing_fw_update \
from dcmanager.orchestrator.states.firmware.importing_firmware \
import ImportingFirmwareState
LOG = logging.getLogger(__name__)
# every state in fw orchestration must have an operator
STATE_OPERATORS = {
consts.STRATEGY_STATE_IMPORTING_FIRMWARE:
ImportingFirmwareState,
consts.STRATEGY_STATE_CREATING_FW_UPDATE_STRATEGY:
CreatingVIMStrategyState,
consts.STRATEGY_STATE_APPLYING_FW_UPDATE_STRATEGY:
ApplyingVIMStrategyState,
consts.STRATEGY_STATE_FINISHING_FW_UPDATE:
FinishingFwUpdateState,
}
class FwUpdateOrchThread(threading.Thread):
class FwUpdateOrchThread(OrchThread):
"""FwUpdate Orchestration Thread
This thread is responsible for the firmware orchestration strategy.
@@ -73,406 +48,26 @@ class FwUpdateOrchThread(threading.Thread):
so, it executes the strategy, updating the strategy and steps in the
database as it goes, with state and progress information.
"""
# every state in fw orchestration must have an operator
STATE_OPERATORS = {
consts.STRATEGY_STATE_IMPORTING_FIRMWARE:
ImportingFirmwareState,
consts.STRATEGY_STATE_CREATING_FW_UPDATE_STRATEGY:
CreatingVIMStrategyState,
consts.STRATEGY_STATE_APPLYING_FW_UPDATE_STRATEGY:
ApplyingVIMStrategyState,
consts.STRATEGY_STATE_FINISHING_FW_UPDATE:
FinishingFwUpdateState,
}
def __init__(self, strategy_lock, audit_rpc_client):
super(FwUpdateOrchThread, self).__init__()
self.context = context.get_admin_context()
self._stop = threading.Event()
# Used to protect strategy when an atomic read/update is required.
self.strategy_lock = strategy_lock
# Used to notify dcmanager-audit to trigger an audit
self.audit_rpc_client = audit_rpc_client
# Keeps track of greenthreads we create to do work.
self.thread_group_manager = scheduler.ThreadGroupManager(
thread_pool_size=100)
# Track worker created for each subcloud.
self.subcloud_workers = dict()
super(FwUpdateOrchThread, self).__init__(
strategy_lock,
audit_rpc_client,
consts.SW_UPDATE_TYPE_FIRMWARE,
vim.STRATEGY_NAME_FW_UPDATE,
consts.STRATEGY_STATE_IMPORTING_FIRMWARE)
# When an apply is initiated, this is the first state
self.starting_state = consts.STRATEGY_STATE_IMPORTING_FIRMWARE
def stopped(self):
return self._stop.isSet()
def stop(self):
LOG.info("FwUpdateOrchThread Stopping")
self._stop.set()
def run(self):
self.run_orch()
# Stop any greenthreads that are still running
self.thread_group_manager.stop()
LOG.info("FwUpdateOrchThread Stopped")
@staticmethod
def get_ks_client(region_name=consts.DEFAULT_REGION_NAME):
"""This will get a cached keystone client (and token)"""
try:
os_client = OpenStackDriver(
region_name=region_name,
region_clients=None)
return os_client.keystone_client
except Exception:
LOG.warn('Failure initializing KeystoneClient')
raise
def get_vim_client(self, region_name=consts.DEFAULT_REGION_NAME):
ks_client = self.get_ks_client(region_name)
return vim.VimClient(region_name, ks_client.session)
@staticmethod
def get_region_name(strategy_step):
"""Get the region name for a strategy step"""
if strategy_step.subcloud_id is None:
# This is the SystemController.
return consts.DEFAULT_REGION_NAME
return strategy_step.subcloud.name
@staticmethod
def format_update_details(last_state, info):
# include the last state, since the current state is likely 'failed'
details = "%s: %s" % (last_state, info)
# details cannot exceed 255 chars. truncate and add '..'
if len(details) > 255:
details = details[:253] + '..'
return details
@staticmethod
def determine_state_operator(strategy_step):
"""Return the state operator for the current state"""
state_operator = STATE_OPERATORS.get(strategy_step.state)
# instantiate and return the state_operator class
return state_operator(region_name=FwUpdateOrchThread.get_region_name(strategy_step))
def strategy_step_update(self, subcloud_id, state=None, details=None):
"""Update the strategy step in the DB
Sets the start and finished timestamp if necessary, based on state.
"""
started_at = None
finished_at = None
if state == self.starting_state:
started_at = datetime.datetime.now()
elif state in [consts.STRATEGY_STATE_COMPLETE,
consts.STRATEGY_STATE_ABORTED,
consts.STRATEGY_STATE_FAILED]:
finished_at = datetime.datetime.now()
# Return the updated object, in case we need to use its updated values
return db_api.strategy_step_update(self.context,
subcloud_id,
state=state,
details=details,
started_at=started_at,
finished_at=finished_at)
def run_orch(self):
while not self.stopped():
try:
LOG.debug('Running fw update orchestration')
sw_update_strategy = db_api.sw_update_strategy_get(
self.context,
update_type=consts.SW_UPDATE_TYPE_FIRMWARE)
if sw_update_strategy.type == consts.SW_UPDATE_TYPE_FIRMWARE:
if sw_update_strategy.state in [
consts.SW_UPDATE_STATE_APPLYING,
consts.SW_UPDATE_STATE_ABORTING]:
self.apply(sw_update_strategy)
elif sw_update_strategy.state == \
consts.SW_UPDATE_STATE_ABORT_REQUESTED:
self.abort(sw_update_strategy)
elif sw_update_strategy.state == \
consts.SW_UPDATE_STATE_DELETING:
self.delete(sw_update_strategy)
except exceptions.NotFound:
# Nothing to do if a strategy doesn't exist
pass
except Exception as e:
# We catch all exceptions to avoid terminating the thread.
LOG.exception(e)
# Wake up every 10 seconds to see if there is work to do.
time.sleep(10)
LOG.info("FwUpdateOrchThread ended main loop")
def apply(self, sw_update_strategy):
"""Apply a fw update strategy"""
LOG.debug("Applying fw update strategy")
strategy_steps = db_api.strategy_step_get_all(self.context)
# Figure out which stage we are working on
current_stage = None
stop_after_stage = None
failure_detected = False
abort_detected = False
for strategy_step in strategy_steps:
if strategy_step.state == consts.STRATEGY_STATE_COMPLETE:
# This step is complete
continue
elif strategy_step.state == consts.STRATEGY_STATE_ABORTED:
# This step was aborted
abort_detected = True
continue
elif strategy_step.state == consts.STRATEGY_STATE_FAILED:
failure_detected = True
# This step has failed and needs no further action
if strategy_step.subcloud_id is None:
# Strategy on SystemController failed. We are done.
LOG.info("Stopping strategy due to failure while "
"processing update step on SystemController")
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_FAILED,
update_type=consts.SW_UPDATE_TYPE_FIRMWARE)
# Trigger audit to update the sync status for
# each subcloud.
self.audit_rpc_client.trigger_firmware_audit(self.context)
return
elif sw_update_strategy.stop_on_failure:
# We have been told to stop on failures
stop_after_stage = strategy_step.stage
current_stage = strategy_step.stage
break
continue
# We have found the first step that isn't complete or failed.
# This is the stage we are working on now.
current_stage = strategy_step.stage
break
else:
# The strategy application is complete
if failure_detected:
LOG.info("Strategy application has failed.")
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_FAILED,
update_type=consts.SW_UPDATE_TYPE_FIRMWARE)
elif abort_detected:
LOG.info("Strategy application was aborted.")
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_ABORTED,
update_type=consts.SW_UPDATE_TYPE_FIRMWARE)
else:
LOG.info("Strategy application is complete.")
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_COMPLETE,
update_type=consts.SW_UPDATE_TYPE_FIRMWARE)
# Trigger audit to update the sync status for each subcloud.
self.audit_rpc_client.trigger_firmware_audit(self.context)
return
if stop_after_stage is not None:
work_remaining = False
# We are going to stop after the steps in this stage have finished.
for strategy_step in strategy_steps:
if strategy_step.stage == stop_after_stage:
if strategy_step.state != consts.STRATEGY_STATE_COMPLETE \
and strategy_step.state != \
consts.STRATEGY_STATE_FAILED:
# There is more work to do in this stage
work_remaining = True
break
if not work_remaining:
# We have completed the stage that failed
LOG.info("Stopping strategy due to failure in stage %d" %
stop_after_stage)
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_FAILED,
update_type=consts.SW_UPDATE_TYPE_FIRMWARE)
# Trigger audit to update the sync status for each subcloud.
self.audit_rpc_client.trigger_firmware_audit(self.context)
return
LOG.debug("Working on stage %d" % current_stage)
for strategy_step in strategy_steps:
if strategy_step.stage == current_stage:
region = self.get_region_name(strategy_step)
if self.stopped():
LOG.info("Exiting because task is stopped")
return
if strategy_step.state == \
consts.STRATEGY_STATE_FAILED:
LOG.debug("Intermediate step is failed")
continue
elif strategy_step.state == \
consts.STRATEGY_STATE_COMPLETE:
LOG.debug("Intermediate step is complete")
continue
elif strategy_step.state == \
consts.STRATEGY_STATE_INITIAL:
# Don't start upgrading this subcloud if it has been
# unmanaged by the user. If orchestration was already
# started, it will be allowed to complete.
if strategy_step.subcloud_id is not None and \
strategy_step.subcloud.management_state == \
consts.MANAGEMENT_UNMANAGED:
message = ("Subcloud %s is unmanaged." %
strategy_step.subcloud.name)
LOG.warn(message)
self.strategy_step_update(
strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_FAILED,
details=message)
continue
# We are just getting started, enter the first state
# Use the updated value for calling process_update_step
strategy_step = self.strategy_step_update(
strategy_step.subcloud_id,
state=self.starting_state)
# Starting state should log an error if greenthread exists
self.process_update_step(region,
strategy_step,
log_error=True)
else:
self.process_update_step(region,
strategy_step,
log_error=False)
def abort(self, sw_update_strategy):
"""Abort an update strategy"""
LOG.info("Aborting fw update strategy")
# Mark any steps that have not yet started as aborted,
# so we will not run them later.
strategy_steps = db_api.strategy_step_get_all(self.context)
for strategy_step in strategy_steps:
if strategy_step.state == consts.STRATEGY_STATE_INITIAL:
LOG.info("Aborting step for subcloud %s" %
self.get_region_name(strategy_step))
self.strategy_step_update(
strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_ABORTED,
details="")
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_ABORTING,
update_type=consts.SW_UPDATE_TYPE_FIRMWARE)
def delete(self, sw_update_strategy):
"""Delete an update strategy"""
LOG.info("Deleting fw update strategy")
strategy_steps = db_api.strategy_step_get_all(self.context)
for strategy_step in strategy_steps:
self.delete_subcloud_strategy(strategy_step)
if self.stopped():
LOG.info("Exiting because task is stopped")
return
# Remove the strategy from the database
try:
db_api.strategy_step_destroy_all(self.context)
db_api.sw_update_strategy_destroy(self.context)
except Exception as e:
LOG.exception(e)
raise e
# todo(abailey): refactor delete to reuse patch orch code
def delete_subcloud_strategy(self, strategy_step):
"""Delete the vim strategy in this subcloud"""
strategy_name = vim.STRATEGY_NAME_FW_UPDATE
region = self.get_region_name(strategy_step)
LOG.info("Deleting vim strategy %s for %s" % (strategy_name, region))
# First check if the strategy has been created.
try:
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=strategy_name)
except (keystone_exceptions.EndpointNotFound, IndexError):
message = ("Endpoint for subcloud: %s not found." %
region)
LOG.error(message)
self.strategy_step_update(
strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_FAILED,
details=message)
return
except Exception:
# Strategy doesn't exist so there is nothing to do
return
if subcloud_strategy.state in [vim.STATE_BUILDING,
vim.STATE_APPLYING,
vim.STATE_ABORTING]:
# Can't delete a strategy in these states
message = ("Strategy for %s in wrong state (%s)for delete" %
(region, subcloud_strategy.state))
LOG.warn(message)
raise Exception(message)
# If we are here, we need to delete the strategy
try:
self.get_vim_client(region).delete_strategy(
strategy_name=strategy_name)
except Exception:
message = "Strategy delete failed for %s" % region
LOG.warn(message)
raise
def process_update_step(self, region, strategy_step, log_error=False):
"""manage the green thread for calling perform_state_action"""
if region in self.subcloud_workers:
# A worker already exists. Let it finish whatever it was doing.
if log_error:
LOG.error("Worker should not exist for %s." % region)
else:
LOG.debug("Update worker exists for %s." % region)
else:
# Create a greenthread to start processing the update for the
# subcloud and invoke the perform_state_action method
self.subcloud_workers[region] = \
self.thread_group_manager.start(self.perform_state_action,
strategy_step)
def perform_state_action(self, strategy_step):
"""Extensible state handler for processing and transitioning states """
try:
LOG.info("Stage: %s, State: %s, Subcloud: %s"
% (strategy_step.stage,
strategy_step.state,
self.get_region_name(strategy_step)))
# Instantiate the state operator and perform the state actions
state_operator = self.determine_state_operator(strategy_step)
state_operator.registerStopEvent(self._stop)
next_state = state_operator.perform_state_action(strategy_step)
self.strategy_step_update(strategy_step.subcloud_id,
state=next_state,
details="")
except Exception as e:
# Catch ALL exceptions and set the strategy to failed
LOG.exception("Failed! Stage: %s, State: %s, Subcloud: %s"
% (strategy_step.stage,
strategy_step.state,
self.get_region_name(strategy_step)))
details = self.format_update_details(strategy_step.state, str(e))
self.strategy_step_update(strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_FAILED,
details=details)
finally:
# The worker is done.
region = self.get_region_name(strategy_step)
if region in self.subcloud_workers:
del self.subcloud_workers[region]
def trigger_audit(self):
"""Trigger an audit for firmware"""
self.audit_rpc_client.trigger_firmware_audit(self.context)

View File

@@ -0,0 +1,67 @@
# Copyright 2017 Ericsson AB.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.orch_thread import OrchThread
from dcmanager.orchestrator.states.kube.applying_vim_kube_upgrade_strategy \
import ApplyingVIMKubeUpgradeStrategyState
from dcmanager.orchestrator.states.kube.applying_vim_patch_strategy \
import ApplyingVIMPatchStrategyState
from dcmanager.orchestrator.states.kube.creating_vim_kube_upgrade_strategy \
import CreatingVIMKubeUpgradeStrategyState
from dcmanager.orchestrator.states.kube.creating_vim_patch_strategy \
import CreatingVIMPatchStrategyState
from dcmanager.orchestrator.states.kube.deleting_vim_patch_strategy \
import DeletingVIMPatchStrategyState
from dcmanager.orchestrator.states.kube.updating_kube_patches \
import UpdatingKubePatchesState
class KubeUpgradeOrchThread(OrchThread):
"""Kube Upgrade Orchestration Thread"""
# every state in kube orchestration must have an operator
STATE_OPERATORS = {
consts.STRATEGY_STATE_KUBE_UPDATING_PATCHES:
UpdatingKubePatchesState,
consts.STRATEGY_STATE_KUBE_CREATING_VIM_PATCH_STRATEGY:
CreatingVIMPatchStrategyState,
consts.STRATEGY_STATE_KUBE_APPLYING_VIM_PATCH_STRATEGY:
ApplyingVIMPatchStrategyState,
consts.STRATEGY_STATE_KUBE_DELETING_VIM_PATCH_STRATEGY:
DeletingVIMPatchStrategyState,
consts.STRATEGY_STATE_KUBE_CREATING_VIM_KUBE_UPGRADE_STRATEGY:
CreatingVIMKubeUpgradeStrategyState,
consts.STRATEGY_STATE_KUBE_APPLYING_VIM_KUBE_UPGRADE_STRATEGY:
ApplyingVIMKubeUpgradeStrategyState,
}
def __init__(self, strategy_lock, audit_rpc_client):
super(KubeUpgradeOrchThread, self).__init__(
strategy_lock,
audit_rpc_client,
consts.SW_UPDATE_TYPE_KUBERNETES,
vim.STRATEGY_NAME_KUBE_UPGRADE,
consts.STRATEGY_STATE_KUBE_UPDATING_PATCHES)
def trigger_audit(self):
"""Trigger an audit for kubernetes"""
self.audit_rpc_client.trigger_kubernetes_audit(self.context)

View File

@@ -0,0 +1,492 @@
# Copyright 2017 Ericsson AB.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import abc
import datetime
import threading
import time
from keystoneauth1 import exceptions as keystone_exceptions
from oslo_log import log as logging
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common import scheduler
from dcmanager.db import api as db_api
LOG = logging.getLogger(__name__)
class OrchThread(threading.Thread):
"""Abstract Orchestration Thread
This thread is responsible for the orchestration strategy.
Here is how it works:
- The user creates an update strategy from CLI (or REST API)
- This is being handled by the SwUpdateManager class, which
runs under the main dcmanager thread. The strategy is created and stored
in the database.
- The user then applies the strategy from the CLI (or REST API). The
SwUpdateManager code updates the state of the strategy in the database.
- The OrchThread wakes up periodically and checks the database for
a strategy of its expected type that is in an active state. If
so, it executes the strategy, updating the strategy and steps in the
database as it goes, with state and progress information.
"""
# each subclass must provide the STATE_OPERATORS
STATE_OPERATORS = {}
def __init__(self,
strategy_lock,
audit_rpc_client,
update_type,
vim_strategy_name,
starting_state):
super(OrchThread, self).__init__()
# Used to protect strategy when an atomic read/update is required.
self.strategy_lock = strategy_lock
# Used to notify dcmanager-audit to trigger an audit
self.audit_rpc_client = audit_rpc_client
# The update type for the orch thread
self.update_type = update_type
# The vim strategy name for the orch thread
self.vim_strategy_name = vim_strategy_name
# When an apply is initiated, this is the first state
self.starting_state = starting_state
self.context = context.get_admin_context()
self._stop = threading.Event()
# Keeps track of greenthreads we create to do work.
self.thread_group_manager = scheduler.ThreadGroupManager(
thread_pool_size=100)
# Track worker created for each subcloud.
self.subcloud_workers = dict()
@abc.abstractmethod
def trigger_audit(self):
"""Subclass MUST override this method"""
LOG.warn("(%s) OrchThread subclass must override trigger_audit"
% self.update_type)
def stopped(self):
return self._stop.isSet()
def stop(self):
LOG.info("(%s) OrchThread Stopping" % self.update_type)
self._stop.set()
def run(self):
self.run_orch()
# Stop any greenthreads that are still running
self.thread_group_manager.stop()
LOG.info("(%s) OrchThread Stopped" % self.update_type)
@staticmethod
def get_ks_client(region_name=consts.DEFAULT_REGION_NAME):
"""This will get a cached keystone client (and token)
throws an exception if keystone client cannot be initialized
"""
os_client = OpenStackDriver(region_name=region_name,
region_clients=None)
return os_client.keystone_client
@staticmethod
def get_vim_client(region_name=consts.DEFAULT_REGION_NAME):
ks_client = OrchThread.get_ks_client(region_name)
return vim.VimClient(region_name, ks_client.session)
@staticmethod
def get_region_name(strategy_step):
"""Get the region name for a strategy step"""
if strategy_step.subcloud_id is None:
# This is the SystemController.
return consts.DEFAULT_REGION_NAME
return strategy_step.subcloud.name
@staticmethod
def format_update_details(last_state, info):
# include the last state, since the current state is likely 'failed'
details = "%s: %s" % (last_state, info)
# details cannot exceed 255 chars. truncate and add '..'
if len(details) > 255:
details = details[:253] + '..'
return details
def determine_state_operator(self, strategy_step):
"""Return the state operator for the current state"""
state_operator = self.STATE_OPERATORS.get(strategy_step.state)
# instantiate and return the state_operator class
return state_operator(
region_name=OrchThread.get_region_name(strategy_step))
def strategy_step_update(self, subcloud_id, state=None, details=None):
"""Update the strategy step in the DB
Sets the start and finished timestamp if necessary, based on state.
"""
started_at = None
finished_at = None
if state == self.starting_state:
started_at = datetime.datetime.now()
elif state in [consts.STRATEGY_STATE_COMPLETE,
consts.STRATEGY_STATE_ABORTED,
consts.STRATEGY_STATE_FAILED]:
finished_at = datetime.datetime.now()
# Return the updated object, in case we need to use its updated values
return db_api.strategy_step_update(self.context,
subcloud_id,
state=state,
details=details,
started_at=started_at,
finished_at=finished_at)
def run_orch(self):
while not self.stopped():
try:
LOG.debug('(%s) OrchThread Running' % self.update_type)
sw_update_strategy = db_api.sw_update_strategy_get(
self.context,
update_type=self.update_type)
if sw_update_strategy.type == self.update_type:
if sw_update_strategy.state in [
consts.SW_UPDATE_STATE_APPLYING,
consts.SW_UPDATE_STATE_ABORTING]:
self.apply(sw_update_strategy)
elif sw_update_strategy.state == \
consts.SW_UPDATE_STATE_ABORT_REQUESTED:
self.abort(sw_update_strategy)
elif sw_update_strategy.state == \
consts.SW_UPDATE_STATE_DELETING:
self.delete(sw_update_strategy)
except exceptions.NotFound:
# Nothing to do if a strategy doesn't exist
pass
except Exception:
# We catch all exceptions to avoid terminating the thread.
LOG.exception("(%s) OrchThread unexpected exception"
% self.update_type)
# Wake up every 10 seconds to see if there is work to do.
time.sleep(10)
LOG.info("(%s) OrchThread ended main loop" % self.update_type)
def apply(self, sw_update_strategy):
"""Apply a sw update strategy"""
LOG.debug("(%s) Applying update strategy" % self.update_type)
strategy_steps = db_api.strategy_step_get_all(self.context)
# Figure out which stage we are working on
current_stage = None
stop_after_stage = None
failure_detected = False
abort_detected = False
for strategy_step in strategy_steps:
if strategy_step.state == consts.STRATEGY_STATE_COMPLETE:
# This step is complete
continue
elif strategy_step.state == consts.STRATEGY_STATE_ABORTED:
# This step was aborted
abort_detected = True
continue
elif strategy_step.state == consts.STRATEGY_STATE_FAILED:
failure_detected = True
# This step has failed and needs no further action
if strategy_step.subcloud_id is None:
# Strategy on SystemController failed. We are done.
LOG.info("(%s) Stopping strategy due to failure while "
"processing update step on SystemController"
% self.update_type)
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_FAILED,
update_type=self.update_type)
# Trigger audit to update the sync status for
# each subcloud.
self.trigger_audit()
return
elif sw_update_strategy.stop_on_failure:
# We have been told to stop on failures
stop_after_stage = strategy_step.stage
current_stage = strategy_step.stage
break
continue
# We have found the first step that isn't complete or failed.
# This is the stage we are working on now.
current_stage = strategy_step.stage
break
else:
# The strategy application is complete
if failure_detected:
LOG.info("(%s) Strategy application has failed."
% self.update_type)
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_FAILED,
update_type=self.update_type)
elif abort_detected:
LOG.info("(%s) Strategy application was aborted."
% self.update_type)
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_ABORTED,
update_type=self.update_type)
else:
LOG.info("(%s) Strategy application is complete."
% self.update_type)
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_COMPLETE,
update_type=self.update_type)
# Trigger audit to update the sync status for each subcloud.
self.trigger_audit()
return
if stop_after_stage is not None:
work_remaining = False
# We are going to stop after the steps in this stage have finished.
for strategy_step in strategy_steps:
if strategy_step.stage == stop_after_stage:
if strategy_step.state != consts.STRATEGY_STATE_COMPLETE \
and strategy_step.state != \
consts.STRATEGY_STATE_FAILED:
# There is more work to do in this stage
work_remaining = True
break
if not work_remaining:
# We have completed the stage that failed
LOG.info("(%s) Stopping strategy due to failure in stage %d"
% (self.update_type, stop_after_stage))
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_FAILED,
update_type=self.update_type)
# Trigger audit to update the sync status for each subcloud.
self.trigger_audit()
return
LOG.debug("(%s) Working on stage %d"
% (self.update_type, current_stage))
for strategy_step in strategy_steps:
if strategy_step.stage == current_stage:
region = self.get_region_name(strategy_step)
if self.stopped():
LOG.info("(%s) Exiting because task is stopped"
% self.update_type)
return
if strategy_step.state == \
consts.STRATEGY_STATE_FAILED:
LOG.debug("(%s) Intermediate step is failed"
% self.update_type)
continue
elif strategy_step.state == \
consts.STRATEGY_STATE_COMPLETE:
LOG.debug("(%s) Intermediate step is complete"
% self.update_type)
continue
elif strategy_step.state == \
consts.STRATEGY_STATE_INITIAL:
# Don't start upgrading this subcloud if it has been
# unmanaged by the user. If orchestration was already
# started, it will be allowed to complete.
if strategy_step.subcloud_id is not None and \
strategy_step.subcloud.management_state == \
consts.MANAGEMENT_UNMANAGED:
message = ("Subcloud %s is unmanaged." %
strategy_step.subcloud.name)
LOG.warn(message)
self.strategy_step_update(
strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_FAILED,
details=message)
continue
# We are just getting started, enter the first state
# Use the updated value for calling process_update_step
strategy_step = self.strategy_step_update(
strategy_step.subcloud_id,
state=self.starting_state)
# Starting state should log an error if greenthread exists
self.process_update_step(region,
strategy_step,
log_error=True)
else:
self.process_update_step(region,
strategy_step,
log_error=False)
def abort(self, sw_update_strategy):
"""Abort an update strategy"""
LOG.info("(%s) Aborting update strategy" % self.update_type)
# Mark any steps that have not yet started as aborted,
# so we will not run them later.
strategy_steps = db_api.strategy_step_get_all(self.context)
for strategy_step in strategy_steps:
if strategy_step.state == consts.STRATEGY_STATE_INITIAL:
LOG.info("(%s) Aborting step for subcloud %s"
% (self.update_type,
self.get_region_name(strategy_step)))
self.strategy_step_update(
strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_ABORTED,
details="")
with self.strategy_lock:
db_api.sw_update_strategy_update(
self.context,
state=consts.SW_UPDATE_STATE_ABORTING,
update_type=self.update_type)
def delete(self, sw_update_strategy):
"""Delete an update strategy"""
LOG.info("(%s) Deleting update strategy" % self.update_type)
strategy_steps = db_api.strategy_step_get_all(self.context)
for strategy_step in strategy_steps:
self.delete_subcloud_strategy(strategy_step)
if self.stopped():
LOG.info("(%s) Exiting because task is stopped"
% self.update_type)
return
# Remove the strategy from the database
try:
db_api.strategy_step_destroy_all(self.context)
db_api.sw_update_strategy_destroy(self.context)
except Exception as e:
LOG.exception("(%s) exception during delete"
% self.update_type)
raise e
def delete_subcloud_strategy(self, strategy_step):
"""Delete the vim strategy in this subcloud"""
region = self.get_region_name(strategy_step)
LOG.info("(%s) Deleting vim strategy:(%s) for region:(%s)"
% (self.update_type, self.vim_strategy_name, region))
# First check if the strategy has been created.
try:
subcloud_strategy = OrchThread.get_vim_client(region).get_strategy(
strategy_name=self.vim_strategy_name)
except (keystone_exceptions.EndpointNotFound, IndexError):
message = ("(%s) Endpoint for subcloud: %s not found." %
(self.update_type, region))
LOG.error(message)
self.strategy_step_update(
strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_FAILED,
details=message)
return
except Exception:
# Strategy doesn't exist so there is nothing to do
return
if subcloud_strategy.state in [vim.STATE_BUILDING,
vim.STATE_APPLYING,
vim.STATE_ABORTING]:
# Can't delete a vim strategy in these states
message = ("(%s) Vim strategy:(%s) for region:(%s)"
" in wrong state:(%s) for delete."
% (self.update_type, self.vim_strategy_name, region,
subcloud_strategy.state))
LOG.warn(message)
raise Exception(message)
# If we are here, we need to delete the strategy
try:
OrchThread.get_vim_client(region).delete_strategy(
strategy_name=self.vim_strategy_name)
except Exception:
message = ("(%s) Vim strategy:(%s) delete failed for region:(%s)"
% (self.update_type, self.vim_strategy_name, region))
LOG.warn(message)
raise
def process_update_step(self, region, strategy_step, log_error=False):
"""manage the green thread for calling perform_state_action"""
if region in self.subcloud_workers:
# A worker already exists. Let it finish whatever it was doing.
if log_error:
LOG.error("(%s) Worker should not exist for %s."
% (self.update_type, region))
else:
LOG.debug("(%s) Update worker exists for %s."
% (self.update_type, region))
else:
# Create a greenthread to start processing the update for the
# subcloud and invoke the perform_state_action method
self.subcloud_workers[region] = \
self.thread_group_manager.start(self.perform_state_action,
strategy_step)
def perform_state_action(self, strategy_step):
"""Extensible state handler for processing and transitioning states """
try:
LOG.info("(%s) Stage: %s, State: %s, Subcloud: %s"
% (self.update_type,
strategy_step.stage,
strategy_step.state,
self.get_region_name(strategy_step)))
# Instantiate the state operator and perform the state actions
state_operator = self.determine_state_operator(strategy_step)
state_operator.registerStopEvent(self._stop)
next_state = state_operator.perform_state_action(strategy_step)
self.strategy_step_update(strategy_step.subcloud_id,
state=next_state,
details="")
except Exception as e:
# Catch ALL exceptions and set the strategy to failed
LOG.exception("(%s) Failed! Stage: %s, State: %s, Subcloud: %s"
% (self.update_type,
strategy_step.stage,
strategy_step.state,
self.get_region_name(strategy_step)))
details = self.format_update_details(strategy_step.state, str(e))
self.strategy_step_update(strategy_step.subcloud_id,
state=consts.STRATEGY_STATE_FAILED,
details=details)
finally:
# The worker is done.
region = self.get_region_name(strategy_step)
if region in self.subcloud_workers:
del self.subcloud_workers[region]

View File

@@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -86,18 +86,25 @@ class DCManagerOrchestratorService(service.Service):
def _stop_rpc_server(self):
# Stop RPC connection to prevent new requests
LOG.debug("Attempting to stop engine service...")
try:
self._rpc_server.stop()
self._rpc_server.wait()
LOG.info('Engine service stopped successfully')
except Exception as ex:
LOG.error('Failed to stop engine service: %s',
six.text_type(ex))
if self._rpc_server is not None:
try:
self._rpc_server.stop()
self._rpc_server.wait()
self._rpc_server = None
LOG.info('Engine service stopped successfully')
except Exception as ex:
LOG.error('Failed to stop engine service: %s',
six.text_type(ex))
def stop(self):
"""Stop anything initiated by start"""
self._stop_rpc_server()
self.TG.stop()
self.sw_update_manager.stop()
if self.TG is not None:
self.TG.stop()
self.TG = None
if self.sw_update_manager is not None:
self.sw_update_manager.stop()
self.sw_update_manager = None
# Terminate the engine process
LOG.info("All threads were gone, terminating engine")
super(DCManagerOrchestratorService, self).stop()

View File

@@ -61,22 +61,27 @@ class ApplyingVIMStrategyState(BaseState):
subcloud_strategy = self.get_vim_client(region).apply_strategy(
strategy_name=self.strategy_name)
if subcloud_strategy.state == vim.STATE_APPLYING:
self.info_log(strategy_step, "VIM Strategy apply in progress")
self.info_log(strategy_step,
"(%s) VIM Strategy apply in progress"
% self.strategy_name)
elif subcloud_strategy.state == vim.STATE_APPLIED:
# Success.
self.info_log(strategy_step,
"VIM strategy has been applied")
"(%s) VIM strategy has been applied"
% self.strategy_name)
elif subcloud_strategy.state in [vim.STATE_APPLY_FAILED,
vim.STATE_APPLY_TIMEOUT]:
# Explicit known failure states
raise Exception("VIM strategy apply failed. %s. %s"
% (subcloud_strategy.state,
raise Exception("(%s) VIM strategy apply failed. %s. %s"
% (self.strategy_name,
subcloud_strategy.state,
subcloud_strategy.apply_phase.reason))
else:
# Other states are bad
raise Exception("VIM strategy apply failed. "
raise Exception("(%s) VIM strategy apply failed. "
"Unexpected State: %s."
% subcloud_strategy.state)
% (self.strategy_name,
subcloud_strategy.state))
# wait for the new strategy to apply or an existing strategy.
# Loop until the strategy applies. Repeatedly query the API
@@ -98,7 +103,8 @@ class ApplyingVIMStrategyState(BaseState):
# break out of the loop if the max number of attempts is reached
wait_count += 1
if wait_count >= self.wait_attempts:
raise Exception("Timeout applying VIM strategy.")
raise Exception("Timeout applying (%s) vim strategy."
% self.strategy_name)
# every loop we wait, even the first one
time.sleep(self.wait_interval)
@@ -117,15 +123,17 @@ class ApplyingVIMStrategyState(BaseState):
if get_fail_count >= self.max_failed_queries:
# We have waited too long.
raise Exception("Timeout during recovery of apply "
"VIM strategy.")
"(%s) Vim strategy."
% self.strategy_name)
self.debug_log(strategy_step,
"Unable to get VIM strategy - "
"attempt %d" % get_fail_count)
"Unable to get (%s) vim strategy - attempt %d"
% (self.strategy_name, get_fail_count))
continue
# The loop gets here if the API is able to respond
# Check if the strategy no longer exists. This should not happen.
if subcloud_strategy is None:
raise Exception("VIM strategy not found.")
raise Exception("(%s) vim strategy not found."
% self.strategy_name)
elif subcloud_strategy.state == vim.STATE_APPLYING:
# Still applying. Update details if it has changed
new_details = ("%s phase is %s%% complete" % (
@@ -143,19 +151,22 @@ class ApplyingVIMStrategyState(BaseState):
elif subcloud_strategy.state == vim.STATE_APPLIED:
# Success.
self.info_log(strategy_step,
"VIM strategy has been applied")
"(%s) Vim strategy has been applied"
% self.strategy_name)
break
elif subcloud_strategy.state in [vim.STATE_APPLY_FAILED,
vim.STATE_APPLY_TIMEOUT]:
# Explicit known failure states
raise Exception("VIM strategy apply failed. %s. %s"
% (subcloud_strategy.state,
raise Exception("(%s) Vim strategy apply failed. %s. %s"
% (self.strategy_name,
subcloud_strategy.state,
subcloud_strategy.apply_phase.reason))
else:
# Other states are bad
raise Exception("VIM strategy apply failed. "
raise Exception("(%s) Vim strategy apply failed. "
"Unexpected State: %s."
% subcloud_strategy.state)
% (self.strategy_name,
subcloud_strategy.state))
# end of loop
# Success, state machine can proceed to the next state

View File

@@ -28,7 +28,8 @@ class CreatingVIMStrategyState(BaseState):
self.max_queries = DEFAULT_MAX_QUERIES
def _create_vim_strategy(self, strategy_step, region):
self.info_log(strategy_step, "Creating VIM strategy")
self.info_log(strategy_step,
"Creating (%s) VIM strategy" % self.strategy_name)
# Get the update options
opts_dict = dcmanager_utils.get_sw_update_opts(
@@ -51,6 +52,10 @@ class CreatingVIMStrategyState(BaseState):
% subcloud_strategy.state)
return subcloud_strategy
def skip_check(self, strategy_step, subcloud_strategy):
"""Subclasses can override this to allow this state to skip ahead"""
return None
def perform_state_action(self, strategy_step):
"""Create a VIM strategy using VIM REST API
@@ -113,6 +118,17 @@ class CreatingVIMStrategyState(BaseState):
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=self.strategy_name,
raise_error_if_missing=True)
# Check for skip criteria where a failed 'build' might be expected
skip_state = self.skip_check(strategy_step, # pylint: disable=assignment-from-none
subcloud_strategy)
if skip_state is not None:
self.info_log(strategy_step,
"Skip forward to state:(%s)" % skip_state)
self.override_next_state(skip_state)
# break out of loop. Let overridden 'next_state' take over
break
if subcloud_strategy.state == vim.STATE_READY_TO_APPLY:
self.info_log(strategy_step, "VIM strategy has been built")
break

View File

@@ -0,0 +1,19 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states.applying_vim_strategy \
import ApplyingVIMStrategyState
class ApplyingVIMKubeUpgradeStrategyState(ApplyingVIMStrategyState):
"""State for applying the VIM kube upgrade strategy."""
def __init__(self, region_name):
super(ApplyingVIMKubeUpgradeStrategyState, self).__init__(
next_state=consts.STRATEGY_STATE_COMPLETE,
region_name=region_name,
strategy_name=vim.STRATEGY_NAME_KUBE_UPGRADE)

View File

@@ -0,0 +1,19 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states.applying_vim_strategy \
import ApplyingVIMStrategyState
class ApplyingVIMPatchStrategyState(ApplyingVIMStrategyState):
"""State for applying the VIM patch strategy during kube upgrade."""
def __init__(self, region_name):
super(ApplyingVIMPatchStrategyState, self).__init__(
next_state=consts.STRATEGY_STATE_KUBE_DELETING_VIM_PATCH_STRATEGY,
region_name=region_name,
strategy_name=vim.STRATEGY_NAME_SW_PATCH)

View File

@@ -0,0 +1,62 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.common import utils as dcmanager_utils
from dcmanager.orchestrator.states.creating_vim_strategy \
import CreatingVIMStrategyState
class CreatingVIMKubeUpgradeStrategyState(CreatingVIMStrategyState):
"""State for creating the VIM upgrade strategy."""
def __init__(self, region_name):
next_state = \
consts.STRATEGY_STATE_KUBE_APPLYING_VIM_KUBE_UPGRADE_STRATEGY
super(CreatingVIMKubeUpgradeStrategyState, self).__init__(
next_state=next_state,
region_name=region_name,
strategy_name=vim.STRATEGY_NAME_KUBE_UPGRADE)
def get_target_kube_version(self, strategy_step):
kube_versions = self.get_sysinv_client(
consts.DEFAULT_REGION_NAME).get_kube_versions()
active_kube_version = dcmanager_utils.get_active_kube_version(
kube_versions)
if active_kube_version is None:
message = "Active kube version in RegionOne not found"
self.warn_log(strategy_step, message)
raise Exception(message)
return active_kube_version
def _create_vim_strategy(self, strategy_step, region):
self.info_log(strategy_step,
"Creating (%s) VIM strategy" % self.strategy_name)
# determine the target for the vim kube strategy
active_kube_version = self.get_target_kube_version(strategy_step)
# Get the update options
opts_dict = dcmanager_utils.get_sw_update_opts(
self.context,
for_sw_update=True,
subcloud_id=strategy_step.subcloud_id)
# Call the API to build the VIM strategy
subcloud_strategy = self.get_vim_client(region).create_strategy(
self.strategy_name,
opts_dict['storage-apply-type'],
opts_dict['worker-apply-type'],
opts_dict['max-parallel-workers'],
opts_dict['default-instance-action'],
opts_dict['alarm-restriction-type'],
to_version=active_kube_version)
# a successful API call to create MUST set the state be 'building'
if subcloud_strategy.state != vim.STATE_BUILDING:
raise Exception("Unexpected VIM strategy build state: %s"
% subcloud_strategy.state)
return subcloud_strategy

View File

@@ -0,0 +1,46 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.orchestrator.states.creating_vim_strategy \
import CreatingVIMStrategyState
class CreatingVIMPatchStrategyState(CreatingVIMStrategyState):
"""State for creating the VIM patch strategy prior to kube upgrade."""
def __init__(self, region_name):
next_state = consts.STRATEGY_STATE_KUBE_APPLYING_VIM_PATCH_STRATEGY
super(CreatingVIMPatchStrategyState, self).__init__(
next_state=next_state,
region_name=region_name,
strategy_name=vim.STRATEGY_NAME_SW_PATCH)
self.SKIP_REASON = "no software patches need to be applied"
self.SKIP_STATE = \
consts.STRATEGY_STATE_KUBE_DELETING_VIM_PATCH_STRATEGY
def skip_check(self, strategy_step, subcloud_strategy):
"""Check if the vim strategy does not need to be built.
If the vim_strategy that was constructed returns a failure, and
the reason for the failure is expected, the state machine can skip
past this vim strategy create/apply and simply delete and move on.
That happens when the subcloud is already considered up-to-date for
its patches based on what the vim calculates for the applies patches
This method will skip if "no software patches need to be applied'
"""
if subcloud_strategy is not None:
if subcloud_strategy.state == vim.STATE_BUILD_FAILED:
if subcloud_strategy.build_phase.reason == self.SKIP_REASON:
self.info_log(strategy_step,
"Skip forward in state machine due to:(%s)"
% subcloud_strategy.build_phase.reason)
return self.SKIP_STATE
# If we get here, there is not a reason to skip
return None

View File

@@ -0,0 +1,57 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.common.exceptions import KubeUpgradeFailedException
from dcmanager.orchestrator.states.base import BaseState
class DeletingVIMPatchStrategyState(BaseState):
"""State to delete vim patch strategy before creating vim kube strategy"""
def __init__(self, region_name):
next_state = \
consts.STRATEGY_STATE_KUBE_CREATING_VIM_KUBE_UPGRADE_STRATEGY
super(DeletingVIMPatchStrategyState, self).__init__(
next_state=next_state,
region_name=region_name)
def perform_state_action(self, strategy_step):
"""Delete the VIM patch strategy if it exists.
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
self.info_log(strategy_step, "Delete vim patch strategy if it exists")
region = self.get_region_name(strategy_step)
strategy_name = vim.STRATEGY_NAME_SW_PATCH
vim_strategy = self.get_vim_client(region).get_strategy(
strategy_name=strategy_name,
raise_error_if_missing=False)
# If the vim patch strategy does not exist, there is nothing to delete
if vim_strategy is None:
self.info_log(strategy_step, "Skip. No vim patch strategy exists")
else:
self.info_log(strategy_step, "Deleting vim patch strategy")
# The vim patch strategy cannot be deleted in certain states
if vim_strategy.state in [vim.STATE_BUILDING,
vim.STATE_APPLYING,
vim.STATE_ABORTING]:
# Can't delete a strategy in these states
message = ("VIM patch strategy in wrong state:(%s) to delete"
% vim_strategy.state)
raise KubeUpgradeFailedException(
subcloud=self.region_name,
details=message)
# delete the vim patch strategy
self.get_vim_client(region).delete_strategy(
strategy_name=strategy_name)
# Success
return self.next_state

View File

@@ -0,0 +1,186 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import os
import time
from dccommon.drivers.openstack import patching_v1
from dcmanager.common import consts
from dcmanager.common.exceptions import StrategyStoppedException
from dcmanager.common import utils
from dcmanager.orchestrator.states.base import BaseState
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
class UpdatingKubePatchesState(BaseState):
"""Kube upgrade state for updating patches"""
def __init__(self, region_name):
super(UpdatingKubePatchesState, self).__init__(
next_state=consts.STRATEGY_STATE_KUBE_CREATING_VIM_PATCH_STRATEGY,
region_name=region_name)
# max time to wait (in seconds) is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
def perform_state_action(self, strategy_step):
"""Update patches in this subcloud required for kubernetes upgrade.
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
self.info_log(strategy_step, "Updating kube patches")
region = self.get_region_name(strategy_step)
# query RegionOne patches
regionone_patches = self.get_patching_client(
consts.DEFAULT_REGION_NAME).query()
# Query RegionOne loads to filter the patches
loads = self.get_sysinv_client(consts.DEFAULT_REGION_NAME).get_loads()
# this filters by active and imported loads
installed_loads = utils.get_loads_for_patching(loads)
# Query RegionOne active kube version to examine the patches
kube_versions = self.get_sysinv_client(
consts.DEFAULT_REGION_NAME).get_kube_versions()
active_kube_version = utils.get_active_kube_version(kube_versions)
if active_kube_version is None:
message = "Active kube version in RegionOne not found"
self.warn_log(strategy_step, message)
raise Exception(message)
kube_ver = self.get_sysinv_client(
consts.DEFAULT_REGION_NAME).get_kube_version(active_kube_version)
kube_details = kube_ver.to_dict()
# filter the active patches
filtered_region_one_patches = list()
applyable_region_one_patches = list()
for patch_id in regionone_patches.keys():
# Only the patches for the installed loads will be examined
if regionone_patches[patch_id]['sw_version'] in installed_loads:
# Only care about applied/committed patches
if regionone_patches[patch_id]['repostate'] in [
patching_v1.PATCH_STATE_APPLIED,
patching_v1.PATCH_STATE_COMMITTED]:
filtered_region_one_patches.append(patch_id)
# "available_patches" should not be applied
if patch_id not in kube_details.get("available_patches"):
applyable_region_one_patches.append(patch_id)
# Retrieve all the patches that are present in this subcloud.
subcloud_patches = self.get_patching_client(region).query()
# Not all applied patches can be applied in the subcloud
# kube patch orchestration requires the vim strategy to apply some
# No patches are being removed at this time.
patches_to_upload = list()
patches_to_apply = list()
subcloud_patch_ids = list(subcloud_patches.keys())
for patch_id in subcloud_patch_ids:
if subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_APPLIED:
# todo(abailey): determine if we want to support remove
pass
elif subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_COMMITTED:
# todo(abailey): determine if mismatch committed subcloud
# patches should cause failure
pass
elif subcloud_patches[patch_id]['repostate'] == \
patching_v1.PATCH_STATE_AVAILABLE:
# No need to upload. May need to apply
if patch_id in applyable_region_one_patches:
self.info_log(strategy_step,
"Patch %s will be applied" % patch_id)
patches_to_apply.append(patch_id)
else:
# This patch is in an invalid state
message = ('Patch %s in subcloud in unexpected state %s' %
(patch_id, subcloud_patches[patch_id]['repostate']))
self.warn_log(strategy_step, message)
raise Exception(message)
# Check that all uploaded patches in RegionOne are in subcloud
for patch_id in filtered_region_one_patches:
if patch_id not in subcloud_patch_ids:
patches_to_upload.append(patch_id)
# Check that all applyable patches in RegionOne are in subcloud
for patch_id in applyable_region_one_patches:
if patch_id not in subcloud_patch_ids:
patches_to_apply.append(patch_id)
if patches_to_upload:
self.info_log(strategy_step,
"Uploading patches %s to subcloud"
% patches_to_upload)
for patch in patches_to_upload:
patch_sw_version = regionone_patches[patch]['sw_version']
patch_file = "%s/%s/%s.patch" % (consts.PATCH_VAULT_DIR,
patch_sw_version,
patch)
if not os.path.isfile(patch_file):
message = ('Patch file %s is missing' % patch_file)
self.error_log(strategy_step, message)
raise Exception(message)
self.get_patching_client(region).upload([patch_file])
if self.stopped():
self.info_log(strategy_step,
"Exiting because task is stopped")
raise StrategyStoppedException()
if patches_to_apply:
self.info_log(strategy_step,
"Applying patches %s to subcloud"
% patches_to_apply)
self.get_patching_client(region).apply(patches_to_apply)
# Now that we have applied/uploaded patches, we need to give
# the patch controller on this subcloud time to determine whether
# each host on that subcloud is patch current.
wait_count = 0
while True:
subcloud_hosts = self.get_patching_client(region).query_hosts()
self.debug_log(strategy_step,
"query_hosts for subcloud returned %s"
% subcloud_hosts)
for host in subcloud_hosts:
if host['interim_state']:
# This host is not yet ready.
self.debug_log(strategy_step,
"Host %s in subcloud in interim state"
% host["hostname"])
break
else:
# All hosts in the subcloud are updated
break
wait_count += 1
if wait_count >= 6:
# We have waited at least 60 seconds. This is too long. We
# will just log it and move on without failing the step.
message = ("Too much time expired after applying patches to "
"subcloud - continuing.")
self.warn_log(strategy_step, message)
break
if self.stopped():
self.info_log(strategy_step, "Exiting because task is stopped")
raise StrategyStoppedException()
# Wait 10 seconds before doing another query.
time.sleep(10)
return self.next_state

View File

@@ -15,8 +15,6 @@ from dcmanager.orchestrator.states.base import BaseState
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
LOAD_IMPORT_REQUEST_TYPE = 'import'
LOAD_DELETE_REQUEST_TYPE = 'delete'
class UpdatingPatchesState(BaseState):

View File

@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -30,6 +30,8 @@ from dcmanager.common import manager
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.orchestrator.fw_update_orch_thread import FwUpdateOrchThread
from dcmanager.orchestrator.kube_upgrade_orch_thread \
import KubeUpgradeOrchThread
from dcmanager.orchestrator.patch_orch_thread import PatchOrchThread
from dcmanager.orchestrator.sw_upgrade_orch_thread import SwUpgradeOrchThread
from dcorch.common import consts as dcorch_consts
@@ -64,6 +66,10 @@ class SwUpdateManager(manager.Manager):
self.fw_update_orch_thread = FwUpdateOrchThread(self.strategy_lock,
self.audit_rpc_client)
self.fw_update_orch_thread.start()
# - kube upgrade orchestration thread
self.kube_upgrade_orch_thread = \
KubeUpgradeOrchThread(self.strategy_lock, self.audit_rpc_client)
self.kube_upgrade_orch_thread.start()
def stop(self):
# Stop (and join) the worker threads
@@ -76,6 +82,9 @@ class SwUpdateManager(manager.Manager):
# - fw update orchestration thread
self.fw_update_orch_thread.stop()
self.fw_update_orch_thread.join()
# - kube upgrade orchestration thread
self.kube_upgrade_orch_thread.stop()
self.kube_upgrade_orch_thread.join()
def _validate_subcloud_status_sync(self, strategy_type,
subcloud_status, force):
@@ -104,6 +113,11 @@ class SwUpdateManager(manager.Manager):
dcorch_consts.ENDPOINT_TYPE_FIRMWARE and
subcloud_status.sync_status ==
consts.SYNC_STATUS_OUT_OF_SYNC)
elif strategy_type == consts.SW_UPDATE_TYPE_KUBERNETES:
return (subcloud_status.endpoint_type ==
dcorch_consts.ENDPOINT_TYPE_KUBERNETES and
subcloud_status.sync_status ==
consts.SYNC_STATUS_OUT_OF_SYNC)
# Unimplemented strategy_type status check. Log an error
LOG.error("_validate_subcloud_status_sync for %s not implemented" %
strategy_type)
@@ -207,6 +221,14 @@ class SwUpdateManager(manager.Manager):
resource='strategy',
msg='Subcloud %s does not require firmware update'
% cloud_name)
elif strategy_type == consts.SW_UPDATE_TYPE_KUBERNETES:
subcloud_status = db_api.subcloud_status_get(
context, subcloud.id, dcorch_consts.ENDPOINT_TYPE_KUBERNETES)
if subcloud_status.sync_status == consts.SYNC_STATUS_IN_SYNC:
raise exceptions.BadRequest(
resource='strategy',
msg='Subcloud %s does not require kubernetes update'
% cloud_name)
elif strategy_type == consts.SW_UPDATE_TYPE_PATCH:
# Make sure subcloud requires patching
subcloud_status = db_api.subcloud_status_get(
@@ -268,6 +290,17 @@ class SwUpdateManager(manager.Manager):
resource='strategy',
msg='Firmware sync status is unknown for one or more '
'subclouds')
elif strategy_type == consts.SW_UPDATE_TYPE_KUBERNETES:
if subcloud.availability_status != consts.AVAILABILITY_ONLINE:
continue
elif (subcloud_status.endpoint_type ==
dcorch_consts.ENDPOINT_TYPE_KUBERNETES and
subcloud_status.sync_status ==
consts.SYNC_STATUS_UNKNOWN):
raise exceptions.BadRequest(
resource='strategy',
msg='Kubernetes sync status is unknown for one or more '
'subclouds')
# Create the strategy
strategy = db_api.sw_update_strategy_create(

View File

@@ -10,7 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -403,7 +403,8 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -435,7 +436,8 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -467,7 +469,8 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -499,7 +502,8 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -531,7 +535,8 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -563,7 +568,8 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -595,7 +601,8 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -627,7 +634,8 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)

View File

@@ -0,0 +1,232 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import mock
import uuid
from dcmanager.audit import firmware_audit
from dcmanager.audit import kubernetes_audit
from dcmanager.audit import patch_audit
from dcmanager.audit import subcloud_audit_manager
from dcmanager.common import consts
from dcorch.common import consts as dcorch_consts
from dcmanager.tests import base
from dcmanager.tests import utils
PREVIOUS_KUBE_VERSION = 'v1.2.3'
UPGRADED_KUBE_VERSION = 'v1.2.3-a'
class FakeDCManagerAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_endpoint_status = mock.MagicMock()
class FakeKubeVersion(object):
def __init__(self,
obj_id=1,
version=UPGRADED_KUBE_VERSION,
target=True,
state='active'):
self.id = obj_id
self.uuid = str(uuid.uuid4())
self.version = version
self.target = target
self.state = state
self.upgrade_from = []
self.applied_patches = []
self.available_patches = []
def to_dict(self):
return dict(self.__dict__)
class FakeAuditWorkerAPI(object):
def __init__(self):
self.audit_subclouds = mock.MagicMock()
class FakeSysinvClient(object):
def __init__(self):
self.region = None
self.session = None
self.get_kube_versions = mock.MagicMock()
class TestKubernetesAudit(base.DCManagerTestCase):
def setUp(self):
super(TestKubernetesAudit, self).setUp()
self.ctxt = utils.dummy_context()
# Mock the DCManager API
self.fake_dcmanager_api = FakeDCManagerAPI()
p = mock.patch('dcmanager.rpc.client.ManagerClient')
self.mock_dcmanager_api = p.start()
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
self.addCleanup(p.stop)
# Mock the Audit Worker API
self.fake_audit_worker_api = FakeAuditWorkerAPI()
p = mock.patch('dcmanager.audit.rpcapi.ManagerAuditWorkerClient')
self.mock_audit_worker_api = p.start()
self.mock_audit_worker_api.return_value = self.fake_audit_worker_api
self.addCleanup(p.stop)
# Note: mock where an item is used, not where it comes from
p = mock.patch.object(patch_audit, 'OpenStackDriver')
self.mock_patch_audit_driver = p.start()
self.mock_patch_audit_driver.return_value = mock.MagicMock()
self.addCleanup(p.stop)
p = mock.patch.object(patch_audit, 'SysinvClient')
self.mock_patch_audit_sys = p.start()
self.mock_patch_audit_sys.return_value = mock.MagicMock()
self.addCleanup(p.stop)
p = mock.patch.object(patch_audit, 'PatchingClient')
self.mock_patch_audit_pc = p.start()
self.mock_patch_audit_pc.return_value = mock.MagicMock()
self.addCleanup(p.stop)
p = mock.patch.object(firmware_audit, 'OpenStackDriver')
self.mock_firmware_audit_driver = p.start()
self.mock_firmware_audit_driver.return_value = mock.MagicMock()
self.addCleanup(p.stop)
p = mock.patch.object(firmware_audit, 'SysinvClient')
self.mock_firmware_audit_sys = p.start()
self.mock_firmware_audit_sys .return_value = mock.MagicMock()
self.addCleanup(p.stop)
p = mock.patch.object(kubernetes_audit, 'OpenStackDriver')
self.kube_openstack_driver = mock.MagicMock()
self.mock_kube_audit_driver = p.start()
self.mock_kube_audit_driver.return_value = self.kube_openstack_driver
self.addCleanup(p.stop)
p = mock.patch.object(kubernetes_audit, 'SysinvClient')
self.kube_sysinv_client = FakeSysinvClient()
self.mock_kube_audit_sys = p.start()
self.mock_kube_audit_sys.return_value = self.kube_sysinv_client
self.addCleanup(p.stop)
def _rpc_convert(self, object_list):
# Convert to dict like what would happen calling via RPC
dict_results = []
for result in object_list:
dict_results.append(result.to_dict())
return dict_results
def test_init(self):
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
self.assertIsNotNone(audit)
self.assertEqual(self.ctxt, audit.context)
self.assertEqual(self.fake_dcmanager_api, audit.dcmanager_rpc_client)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_no_kubernetes_audit_data_to_sync(self, mock_context):
mock_context.get_admin_context.return_value = self.ctxt
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.kubernetes_audit = audit
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
# Convert to dict like what would happen calling via RPC
kubernetes_audit_data = self._rpc_convert(kubernetes_audit_data)
for name in ['subcloud1', 'subcloud2']:
audit.subcloud_kubernetes_audit(name, kubernetes_audit_data)
expected_calls = [
mock.call(mock.ANY,
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_kubernetes_audit_data_out_of_sync_older(self, mock_context):
mock_context.get_admin_context.return_value = self.ctxt
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.kubernetes_audit = audit
# Set the region one data as being the upgraded version
self.kube_sysinv_client.get_kube_versions.return_value = [
FakeKubeVersion(version=UPGRADED_KUBE_VERSION),
]
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
# Convert to dict like what would happen calling via RPC
kubernetes_audit_data = self._rpc_convert(kubernetes_audit_data)
for name in ['subcloud1', 'subcloud2']:
# return different kube versions in the subclouds
self.kube_sysinv_client.get_kube_versions.return_value = [
FakeKubeVersion(version=PREVIOUS_KUBE_VERSION),
]
audit.subcloud_kubernetes_audit(name, kubernetes_audit_data)
expected_calls = [
mock.call(mock.ANY,
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_kubernetes_audit_data_out_of_sync_newer(self, mock_context):
mock_context.get_admin_context.return_value = self.ctxt
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.kubernetes_audit = audit
# Set the region one data as being the upgraded version
self.kube_sysinv_client.get_kube_versions.return_value = [
FakeKubeVersion(version=PREVIOUS_KUBE_VERSION),
]
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
# Convert to dict like what would happen calling via RPC
kubernetes_audit_data = self._rpc_convert(kubernetes_audit_data)
for name in ['subcloud1', 'subcloud2']:
# return different kube versions in the subclouds
self.kube_sysinv_client.get_kube_versions.return_value = [
FakeKubeVersion(version=UPGRADED_KUBE_VERSION),
]
audit.subcloud_kubernetes_audit(name, kubernetes_audit_data)
expected_calls = [
mock.call(mock.ANY,
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)

View File

@@ -10,7 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -281,7 +281,8 @@ class TestPatchAudit(base.DCManagerTestCase):
am.patch_audit = pm
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
@@ -317,7 +318,8 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client.side_effect = FakeSysinvClientOneLoad
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
@@ -380,7 +382,8 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client.side_effect = FakeSysinvClientOneLoad
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
@@ -416,7 +419,8 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client.side_effect = FakeSysinvClientOneLoadUnmatchedSoftwareVersion
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
@@ -462,7 +466,8 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client.side_effect = FakeSysinvClientOneLoadUpgradeInProgress
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(True, True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()

View File

@@ -46,6 +46,12 @@ class FakeFirmwareAudit(object):
self.get_regionone_audit_data = mock.MagicMock()
class FakeKubernetesAudit(object):
def __init__(self):
self.get_regionone_audit_data = mock.MagicMock()
class FakeServiceGroup(object):
def __init__(self, status, desired_state, service_group_name, uuid,
node_name, state, condition, name):
@@ -233,6 +239,15 @@ class TestAuditManager(base.DCManagerTestCase):
self.fake_firmware_audit
self.addCleanup(p.stop)
# Mock kubernetes audit
self.fake_kubernetes_audit = FakeKubernetesAudit()
p = mock.patch.object(subcloud_audit_manager,
'kubernetes_audit')
self.mock_kubernetes_audit = p.start()
self.mock_kubernetes_audit.KubernetesAudit.return_value = \
self.fake_kubernetes_audit
self.addCleanup(p.stop)
@staticmethod
def create_subcloud_static(ctxt, **kwargs):
values = {

View File

@@ -66,6 +66,13 @@ class FakeFirmwareAudit(object):
self.get_regionone_audit_data = mock.MagicMock()
class FakeKubernetesAudit(object):
def __init__(self):
self.subcloud_kubernetes_audit = mock.MagicMock()
self.get_regionone_audit_data = mock.MagicMock()
class FakeServiceGroup(object):
def __init__(self, status, desired_state, service_group_name, uuid,
node_name, state, condition, name):
@@ -291,7 +298,6 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
self.fake_firmware_audit
self.addCleanup(p.stop)
# Mock firmware audit
self.fake_firmware_audit2 = FakeFirmwareAudit()
p = mock.patch.object(subcloud_audit_manager,
'firmware_audit')
@@ -300,6 +306,23 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
self.fake_firmware_audit2
self.addCleanup(p.stop)
# Mock kubernetes audit in Audit Worker and Audit Manager
self.fake_kubernetes_audit = FakeKubernetesAudit()
p = mock.patch.object(subcloud_audit_worker_manager,
'kubernetes_audit')
self.mock_kubernetes_audit = p.start()
self.mock_kubernetes_audit.KubernetesAudit.return_value = \
self.fake_kubernetes_audit
self.addCleanup(p.stop)
self.fake_kubernetes_audit2 = FakeKubernetesAudit()
p = mock.patch.object(subcloud_audit_manager,
'kubernetes_audit')
self.mock_kubernetes_audit2 = p.start()
self.mock_kubernetes_audit2.KubernetesAudit.return_value = \
self.fake_kubernetes_audit2
self.addCleanup(p.stop)
@staticmethod
def create_subcloud_static(ctxt, **kwargs):
values = {
@@ -343,17 +366,22 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_patch_audit = True
do_load_audit = True
do_firmware_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
do_kubernetes_audit = True
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(do_patch_audit,
do_firmware_audit,
do_kubernetes_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
do_firmware_audit=do_firmware_audit,
do_kubernetes_audit=do_kubernetes_audit)
# Verify the subcloud was set to online
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
@@ -376,6 +404,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
self.fake_firmware_audit.subcloud_firmware_audit.assert_called_with(
subcloud.name, firmware_audit_data)
# Verify kubernetes audit is called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_called_with(
subcloud.name, kubernetes_audit_data)
def test_audit_subcloud_online_unmanaged(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
@@ -388,17 +420,22 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_patch_audit = True
do_load_audit = True
do_firmware_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
do_kubernetes_audit = True
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(do_patch_audit,
do_firmware_audit,
do_kubernetes_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
do_firmware_audit=do_firmware_audit,
do_kubernetes_audit=do_kubernetes_audit)
# Verify the subcloud was set to online
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
@@ -418,6 +455,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
# Verify kubernetes audit is not called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called()
def test_audit_subcloud_online_no_change(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
@@ -435,9 +475,11 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False, patch_audit_data=None,
firmware_audit_data=None,
kubernetes_audit_data=None,
do_patch_audit=False,
do_load_audit=False,
do_firmware_audit=False)
do_firmware_audit=False,
do_kubernetes_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
@@ -469,9 +511,11 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
wm._audit_subcloud(subcloud, update_subcloud_state=True,
do_audit_openstack=False, patch_audit_data=None,
firmware_audit_data=None,
kubernetes_audit_data=None,
do_patch_audit=False,
do_load_audit=False,
do_firmware_audit=False)
do_firmware_audit=False,
do_kubernetes_audit=False)
# Verify the subcloud state was updated even though no change
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
@@ -491,6 +535,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
# Verify kubernetes audit is not called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called()
def test_audit_subcloud_go_offline(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
@@ -515,17 +562,22 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_patch_audit = True
do_load_audit = True
do_firmware_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
do_kubernetes_audit = True
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(do_patch_audit,
do_firmware_audit,
do_kubernetes_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
do_firmware_audit=do_firmware_audit,
do_kubernetes_audit=do_kubernetes_audit)
# Verify the audit fail count was updated
audit_fail_count = 1
@@ -545,9 +597,11 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
do_firmware_audit=do_firmware_audit,
do_kubernetes_audit=do_kubernetes_audit)
audit_fail_count = audit_fail_count + 1
@@ -566,6 +620,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called()
def test_audit_subcloud_offline_no_change(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
@@ -586,17 +643,22 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_patch_audit = True
do_load_audit = True
do_firmware_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
do_kubernetes_audit = True
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(do_patch_audit,
do_firmware_audit,
do_kubernetes_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=True,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
do_firmware_audit=do_firmware_audit,
do_kubernetes_audit=do_kubernetes_audit)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
@@ -615,6 +677,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
# Verify kubernetes audit is not called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_installed(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
@@ -631,8 +696,11 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Audit the subcloud
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None, do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False)
firmware_audit_data=None,
kubernetes_audit_data=None,
do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False,
do_kubernetes_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
@@ -654,6 +722,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
# Verify kubernetes audit is not called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_removed(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
@@ -674,8 +745,11 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Audit the subcloud
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None, do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False)
firmware_audit_data=None,
kubernetes_audit_data=None,
do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False,
do_kubernetes_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
@@ -696,6 +770,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
# Verify kubernetes audit is not called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_inactive(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
@@ -716,8 +793,10 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Audit the subcloud
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None, do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False)
firmware_audit_data=None,
kubernetes_audit_data=None, do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False,
do_kubernetes_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
@@ -738,6 +817,9 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
# Verify kubernetes audit is not called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called()
def test_audit_subcloud_partial_subaudits(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
@@ -754,8 +836,11 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_patch_audit = True
do_load_audit = False
do_firmware_audit = False
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
do_kubernetes_audit = False
patch_audit_data, firmware_audit_data, kubernetes_audit_data = \
am._get_audit_data(do_patch_audit,
do_firmware_audit,
do_kubernetes_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
@@ -774,9 +859,11 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
kubernetes_audit_data=kubernetes_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
do_firmware_audit=do_firmware_audit,
do_kubernetes_audit=do_kubernetes_audit)
# Verify patch audit is called
self.fake_patch_audit.subcloud_patch_audit.assert_called_with(
@@ -785,8 +872,12 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
# Verify kubernetes audit is not called
self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called()
# Ensure the subaudits that didn't run are still requested
audits = db_api.subcloud_audits_get(self.ctx, subcloud.id)
self.assertEqual(audits.patch_audit_requested, False)
self.assertEqual(audits.load_audit_requested, True)
self.assertEqual(audits.firmware_audit_requested, True)
self.assertEqual(audits.kubernetes_audit_requested, True)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -9,13 +9,17 @@ import uuid
from dcmanager.common import consts
from oslo_utils import timeutils
from dcmanager.tests.unit.common.fake_subcloud import FAKE_SUBCLOUD_INSTALL_VALUES
from dcmanager.tests.unit.common.fake_subcloud \
import FAKE_SUBCLOUD_INSTALL_VALUES
PREVIOUS_PREVIOUS_VERSION = '01.23'
PREVIOUS_VERSION = '12.34'
UPGRADED_VERSION = '56.78'
PREVIOUS_KUBE_VERSION = 'v1.2.3'
UPGRADED_KUBE_VERSION = 'v1.2.3-a'
FAKE_VENDOR = '8086'
FAKE_DEVICE = '0b30'
@@ -98,6 +102,38 @@ class FakeKeystoneClient(object):
self.session = mock.MagicMock()
class FakeKubeUpgrade(object):
def __init__(self,
obj_id=1,
from_version=PREVIOUS_KUBE_VERSION,
to_version=UPGRADED_KUBE_VERSION,
state='upgrade-complete'):
self.id = obj_id
self.uuid = str(uuid.uuid4())
self.from_version = state
self.to_version = to_version
self.state = state
class FakeKubeVersion(object):
def __init__(self,
obj_id=1,
version=UPGRADED_KUBE_VERSION,
target=True,
state='active'):
self.id = obj_id
self.uuid = str(uuid.uuid4())
self.version = version
self.target = target
self.state = state
self.upgrade_from = []
self.applied_patches = []
self.available_patches = []
def to_dict(self):
return dict(self.__dict__)
class FakeLoad(object):
def __init__(self,
obj_id,

View File

@@ -0,0 +1,21 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.kube.test_base \
import TestKubeUpgradeState
from dcmanager.tests.unit.orchestrator.states.test_applying_vim_strategy \
import ApplyingVIMStrategyMixin
class TestApplyingVIMKubeUpgradeStrategyStage(ApplyingVIMStrategyMixin,
TestKubeUpgradeState):
"""This test applies the 'kube' vim strategy during kube upgrade"""
def setUp(self):
super(TestApplyingVIMKubeUpgradeStrategyStage, self).setUp()
self.set_state(
consts.STRATEGY_STATE_KUBE_APPLYING_VIM_KUBE_UPGRADE_STRATEGY,
consts.STRATEGY_STATE_COMPLETE)

View File

@@ -0,0 +1,21 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.kube.test_base \
import TestKubeUpgradeState
from dcmanager.tests.unit.orchestrator.states.test_applying_vim_strategy \
import ApplyingVIMStrategyMixin
class TestApplyingVIMPatchStrategyStage(ApplyingVIMStrategyMixin,
TestKubeUpgradeState):
"""This test applies the patch vim strategy during kube upgrade"""
def setUp(self):
super(TestApplyingVIMPatchStrategyStage, self).setUp()
self.set_state(
consts.STRATEGY_STATE_KUBE_APPLYING_VIM_PATCH_STRATEGY,
consts.STRATEGY_STATE_KUBE_DELETING_VIM_PATCH_STRATEGY)

View File

@@ -0,0 +1,16 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.test_base import TestSwUpdate
class TestKubeUpgradeState(TestSwUpdate):
# Setting DEFAULT_STRATEGY_TYPE to upgrade will setup the kube upgrade
# orchestration worker, and will mock away the other orch threads
DEFAULT_STRATEGY_TYPE = consts.SW_UPDATE_TYPE_KUBERNETES
def setUp(self):
super(TestKubeUpgradeState, self).setUp()

View File

@@ -0,0 +1,29 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.fakes import FakeKubeVersion
from dcmanager.tests.unit.orchestrator.states.kube.test_base \
import TestKubeUpgradeState
from dcmanager.tests.unit.orchestrator.states.test_creating_vim_strategy \
import CreatingVIMStrategyStageMixin
class TestCreatingVIMKubeUpgradeStrategyStage(CreatingVIMStrategyStageMixin,
TestKubeUpgradeState):
"""Test a vim kube upgrade strategy during kube orchestration"""
def setUp(self):
super(TestCreatingVIMKubeUpgradeStrategyStage, self).setUp()
self.set_state(
consts.STRATEGY_STATE_KUBE_CREATING_VIM_KUBE_UPGRADE_STRATEGY,
consts.STRATEGY_STATE_KUBE_APPLYING_VIM_KUBE_UPGRADE_STRATEGY)
self.sysinv_client.get_kube_versions = mock.MagicMock()
self.sysinv_client.get_kube_versions.return_value = [
FakeKubeVersion(),
]

View File

@@ -0,0 +1,21 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.kube.test_base \
import TestKubeUpgradeState
from dcmanager.tests.unit.orchestrator.states.test_creating_vim_strategy \
import CreatingVIMStrategyStageMixin
class TestCreatingVIMPatchStrategyStage(CreatingVIMStrategyStageMixin,
TestKubeUpgradeState):
"""Test a VIM Patch Strategy during Kube upgrade orchestration"""
def setUp(self):
super(TestCreatingVIMPatchStrategyStage, self).setUp()
self.set_state(
consts.STRATEGY_STATE_KUBE_CREATING_VIM_PATCH_STRATEGY,
consts.STRATEGY_STATE_KUBE_APPLYING_VIM_PATCH_STRATEGY)

View File

@@ -0,0 +1,22 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.kube.test_base \
import TestKubeUpgradeState
class TestKubeDeletingVimPatchStrategyStage(TestKubeUpgradeState):
"Test deleting the vim patch strategy during kube orch."""
def setUp(self):
super(TestKubeDeletingVimPatchStrategyStage, self).setUp()
self.strategy_step = self.setup_strategy_step(
consts.STRATEGY_STATE_KUBE_DELETING_VIM_PATCH_STRATEGY)
self.on_success_state = \
consts.STRATEGY_STATE_KUBE_CREATING_VIM_KUBE_UPGRADE_STRATEGY
self.subcloud = self.setup_subcloud()

View File

@@ -0,0 +1,22 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from dcmanager.common import consts
from dcmanager.tests.unit.orchestrator.states.kube.test_base \
import TestKubeUpgradeState
class TestKubeUpdatingPatchesStage(TestKubeUpgradeState):
"Test uploading and applying the patces required for kube orch."""
def setUp(self):
super(TestKubeUpdatingPatchesStage, self).setUp()
self.strategy_step = self.setup_strategy_step(
consts.STRATEGY_STATE_KUBE_UPDATING_PATCHES)
self.on_success_state = \
consts.STRATEGY_STATE_KUBE_CREATING_VIM_PATCH_STRATEGY
self.subcloud = self.setup_subcloud()

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -26,21 +26,17 @@ STRATEGY_APPLY_FAILED = FakeVimStrategy(vim.STATE_APPLY_FAILED)
"DEFAULT_MAX_WAIT_ATTEMPTS", 3)
@mock.patch("dcmanager.orchestrator.states.applying_vim_strategy."
"WAIT_INTERVAL", 1)
class TestSwUpgradeApplyingVIMStrategyStage(TestSwUpgradeState):
class ApplyingVIMStrategyMixin(object):
def setUp(self):
super(TestSwUpgradeApplyingVIMStrategyStage, self).setUp()
# set the next state in the chain (when this state is successful)
self.on_success_state = \
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0
def set_state(self, state, success_state):
self.state = state
self.on_success_state = success_state
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY)
self.strategy_step = self.setup_strategy_step(self.state)
# Add mock API endpoints for client calls invcked by this state
self.vim_client.get_strategy = mock.MagicMock()
@@ -180,3 +176,12 @@ class TestSwUpgradeApplyingVIMStrategyStage(TestSwUpgradeState):
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeApplyingVIMStrategyStage(ApplyingVIMStrategyMixin,
TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeApplyingVIMStrategyStage, self).setUp()
self.set_state(consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY,
consts.STRATEGY_STATE_SWACTING_TO_CONTROLLER_0)

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -24,21 +24,17 @@ STRATEGY_FAILED_BUILDING = FakeVimStrategy(vim.STATE_BUILD_FAILED)
"DEFAULT_MAX_QUERIES", 3)
@mock.patch("dcmanager.orchestrator.states.creating_vim_strategy."
"DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeCreatingVIMStrategyStage(TestSwUpgradeState):
class CreatingVIMStrategyStageMixin(object):
def setUp(self):
super(TestSwUpgradeCreatingVIMStrategyStage, self).setUp()
# set the next state in the chain (when this state is successful)
self.on_success_state =\
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY
def set_state(self, state, success_state):
self.state = state
self.on_success_state = success_state
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY)
self.strategy_step = self.setup_strategy_step(self.state)
# Add mock API endpoints for sysinv client calls invcked by this state
self.vim_client.create_strategy = mock.MagicMock()
@@ -185,3 +181,13 @@ class TestSwUpgradeCreatingVIMStrategyStage(TestSwUpgradeState):
# Failure case
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
class TestSwUpgradeCreatingVIMStrategyStage(CreatingVIMStrategyStageMixin,
TestSwUpgradeState):
"""Test Creating Vim Strategy for a SW upgrade"""
def setUp(self):
super(TestSwUpgradeCreatingVIMStrategyStage, self).setUp()
self.set_state(consts.STRATEGY_STATE_CREATING_VIM_UPGRADE_STRATEGY,
consts.STRATEGY_STATE_APPLYING_VIM_UPGRADE_STRATEGY)

View File

@@ -10,7 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -131,6 +131,20 @@ class TestSwUpdate(base.DCManagerTestCase):
self.fake_fw_update_orch_thread
self.addCleanup(p.stop)
if strategy_type == consts.SW_UPDATE_TYPE_KUBERNETES:
sw_update_manager.KubeUpgradeOrchThread.stopped = lambda x: False
worker = sw_update_manager.KubeUpgradeOrchThread(
mock_strategy_lock,
mock_dcmanager_audit_api)
else:
# mock the patch orch thread
self.fake_kube_upgrade_orch_thread = FakeOrchThread()
p = mock.patch.object(sw_update_manager, 'KubeUpgradeOrchThread')
self.mock_kube_upgrade_orch_thread = p.start()
self.mock_kube_upgrade_orch_thread.return_value = \
self.fake_kube_upgrade_orch_thread
self.addCleanup(p.stop)
return worker
def setup_subcloud(self):

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2020-2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -9,7 +9,7 @@ from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.common import exceptions as exception
from dcmanager.db.sqlalchemy import api as db_api
from dcmanager.orchestrator.fw_update_orch_thread import FwUpdateOrchThread
from dcmanager.orchestrator.orch_thread import OrchThread
from dcmanager.tests.unit.common import fake_strategy
from dcmanager.tests.unit.fakes import FakeVimClient
@@ -26,16 +26,16 @@ class TestFwOrchThread(TestSwUpdate):
def setUp(self):
super(TestFwOrchThread, self).setUp()
# Mock the vim client defined in the orch thread
self.worker_vim_client = FakeVimClient()
p = mock.patch.object(FwUpdateOrchThread, 'get_vim_client')
self.mock_worker_vim_client = p.start()
self.mock_worker_vim_client.return_value = self.worker_vim_client
# Mock the vim client defined in the base state class
self.vim_client = FakeVimClient()
p = mock.patch.object(OrchThread, 'get_vim_client')
self.mock_vim_client = p.start()
self.mock_vim_client.return_value = self.vim_client
self.addCleanup(p.stop)
self.worker_vim_client.create_strategy = mock.MagicMock()
self.worker_vim_client.delete_strategy = mock.MagicMock()
self.worker_vim_client.get_strategy = mock.MagicMock()
self.vim_client.create_strategy = mock.MagicMock()
self.vim_client.delete_strategy = mock.MagicMock()
self.vim_client.get_strategy = mock.MagicMock()
def setup_strategy(self, state):
return fake_strategy.create_fake_strategy(
@@ -52,7 +52,7 @@ class TestFwOrchThread(TestSwUpdate):
self.worker.delete(self.strategy)
# There are no strategy steps, so no vim api calls should be invoked
self.worker_vim_client.get_strategy.assert_not_called()
self.vim_client.get_strategy.assert_not_called()
# Verify the strategy was deleted
self.assertRaises(exception.NotFound,
@@ -70,13 +70,13 @@ class TestFwOrchThread(TestSwUpdate):
consts.STRATEGY_STATE_CREATING_FW_UPDATE_STRATEGY)
# If the subcloud does not have a vim strategy, it raises an exception
self.worker_vim_client.get_strategy.side_effect = Exception
self.vim_client.get_strategy.side_effect = Exception
# invoke the strategy (not strategy step) operation on the orch thread
self.worker.delete(self.strategy)
# There is a step, so the vim strategy should be queried
self.worker_vim_client.get_strategy.assert_called()
self.vim_client.get_strategy.assert_called()
# Verify the strategy was deleted
self.assertRaises(exception.NotFound,
@@ -99,14 +99,14 @@ class TestFwOrchThread(TestSwUpdate):
# the subcloud returns a vim strategy
vim_strategy = FakeVimStrategy(state=vim.STATE_APPLIED)
self.worker_vim_client.get_strategy.return_value = vim_strategy
self.vim_client.get_strategy.return_value = vim_strategy
# invoke the strategy (not strategy step) operation on the orch thread
self.worker.delete(self.strategy)
# There is a step, so the vim strategy should be queried and deleted
self.worker_vim_client.get_strategy.assert_called()
self.worker_vim_client.delete_strategy.assert_called()
self.vim_client.get_strategy.assert_called()
self.vim_client.delete_strategy.assert_called()
# Verify the strategy was deleted
self.assertRaises(exception.NotFound,

View File

@@ -10,7 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -439,6 +439,13 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.fake_fw_update_orch_thread
self.addCleanup(p.stop)
self.fake_kube_upgrade_orch_thread = FakeOrchThread()
p = mock.patch.object(sw_update_manager, 'KubeUpgradeOrchThread')
self.mock_kube_upgrade_orch_thread = p.start()
self.mock_kube_upgrade_orch_thread.return_value = \
self.fake_kube_upgrade_orch_thread
self.addCleanup(p.stop)
# Mock the dcmanager audit API
self.fake_dcmanager_audit_api = FakeDCManagerAuditAPI()
p = mock.patch('dcmanager.audit.rpcapi.ManagerAuditClient')

View File

@@ -1,4 +1,4 @@
# Copyright 2017-2020 Wind River
# Copyright 2017-2021 Wind River
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -403,16 +403,22 @@ class SysinvAPIController(APIController):
response = req.get_response(application)
return self.process_response(environ, request, response)
def _notify_dcmanager(self, request, response):
def _notify_dcmanager(self, request, response, endpoint_type, sync_status):
# Send a RPC to dcmanager
LOG.info("Send RPC to dcmanager to set firmware sync status to "
"unknown")
LOG.info("Send RPC to dcmanager to set: %s sync status to: %s"
% (endpoint_type, sync_status))
self.dcmanager_rpc_client.update_subcloud_endpoint_status(
self.ctxt,
endpoint_type=consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=dcmanager_consts.SYNC_STATUS_UNKNOWN)
endpoint_type=endpoint_type,
sync_status=sync_status)
return response
def _notify_dcmanager_firmware(self, request, response):
return self._notify_dcmanager(request,
response,
consts.ENDPOINT_TYPE_FIRMWARE,
dcmanager_consts.SYNC_STATUS_UNKNOWN)
def _process_response(self, environ, request, response):
try:
resource_type = self._get_resource_type_from_environ(environ)
@@ -440,7 +446,7 @@ class SysinvAPIController(APIController):
# PATCH operation for apply/remove commands fall through
# as they only require to notify dcmanager
if notify:
self._notify_dcmanager(request, response)
self._notify_dcmanager_firmware(request, response)
else:
self._enqueue_work(environ, request, response)
self.notify(environ, self.ENDPOINT_TYPE)

View File

@@ -12,7 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -129,6 +129,7 @@ ENDPOINT_TYPE_NFV = "nfv"
ENDPOINT_TYPE_LOAD = "load"
ENDPOINT_TYPE_DC_CERT = 'dc-cert'
ENDPOINT_TYPE_FIRMWARE = 'firmware'
ENDPOINT_TYPE_KUBERNETES = 'kubernetes'
# All endpoint types
ENDPOINT_TYPES_LIST = [ENDPOINT_TYPE_PLATFORM,
@@ -136,7 +137,8 @@ ENDPOINT_TYPES_LIST = [ENDPOINT_TYPE_PLATFORM,
ENDPOINT_TYPE_IDENTITY,
ENDPOINT_TYPE_LOAD,
ENDPOINT_TYPE_DC_CERT,
ENDPOINT_TYPE_FIRMWARE]
ENDPOINT_TYPE_FIRMWARE,
ENDPOINT_TYPE_KUBERNETES]
# Dcorch sync endpoint types
SYNC_ENDPOINT_TYPES_LIST = [ENDPOINT_TYPE_PLATFORM,

View File

@@ -229,7 +229,7 @@ class GenericSyncManager(object):
sc = sc.create()
for endpoint_type in endpoint_type_list:
db_api.subcloud_sync_create(context, name, endpoint_type,
values={'subcloud_id': sc.id})
values={'subcloud_id': sc.id}) # pylint: disable=E1101
# Create the sync object for this engine
self.create_sync_objects(name, capabilities)