Add CA certificate transfer state to DC upgrade orchestration

Add a step to transfer CA certificate for duplex sublcoud to the DC
upgrade orchestration.

Test Plan:
PASSED: DX subcloud upgrade
PASSED: All unit tests. The newly introduced step is only relevant
        to duplex subcloud and skipped for simplex subcloud.

Story: 2009834
Task: 46913

Depends-On: https://review.opendev.org/c/starlingx/config/+/865281

Signed-off-by: Li Zhu <li.zhu@windriver.com>
Change-Id: I7ec1619e6daa3c19bed49f52b1570da0f8516afd
This commit is contained in:
Li Zhu
2022-11-22 00:38:40 -05:00
parent 28d0c8ecfb
commit 52d96db89c
7 changed files with 272 additions and 13 deletions

View File

@@ -108,6 +108,7 @@ STRATEGY_STATE_IMPORTING_LOAD = "importing load"
STRATEGY_STATE_UPDATING_PATCHES = "updating patches"
STRATEGY_STATE_FINISHING_PATCH_STRATEGY = "finishing patch strategy"
STRATEGY_STATE_STARTING_UPGRADE = "starting upgrade"
STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE = "transferring CA certificate"
STRATEGY_STATE_LOCKING_CONTROLLER_0 = "locking controller-0"
STRATEGY_STATE_LOCKING_CONTROLLER_1 = "locking controller-1"
STRATEGY_STATE_UPGRADING_SIMPLEX = "upgrading simplex"
@@ -298,3 +299,8 @@ INVALID_DEPLOY_STATES_FOR_RESTORE = [DEPLOY_STATE_INSTALLING,
DEPLOY_STATE_BOOTSTRAPPING,
DEPLOY_STATE_DEPLOYING,
DEPLOY_STATE_REHOMING]
# The k8s secret that holds openldap CA certificate
OPENLDAP_CA_CERT_SECRET_NAME = "system-local-ca"
CERT_NAMESPACE_PLATFORM_CA_CERTS = 'cert-manager'

View File

@@ -30,12 +30,14 @@ from keystoneauth1 import exceptions as keystone_exceptions
from oslo_concurrency import lockutils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import base64
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.drivers.openstack import vim
from dccommon import exceptions as dccommon_exceptions
from dccommon import kubeoperator
from dcmanager.common import consts
from dcmanager.common import exceptions
from dcmanager.db import api as db_api
@@ -774,3 +776,35 @@ def is_subcloud_healthy(subcloud_name):
return True
return False
def get_certificate_from_secret(secret_name, secret_ns):
"""Get certificate from k8s secret
:param secret_name: the name of the secret
:param secret_ns: the namespace of the secret
:return: tls_crt: the certificate.
tls_key: the corresponding private key of the certificate.
raise Exception for kubernetes data errors
"""
kube = kubeoperator.KubeOperator()
secret = kube.kube_get_secret(secret_name, secret_ns)
if not hasattr(secret, 'data'):
raise Exception('Invalid secret %s\\%s' % (secret_ns, secret_name))
data = secret.data
if 'tls.crt' not in data or 'tls.key' not in data:
raise Exception('Invalid certificate data from secret %s\\%s' %
(secret_ns, secret_name))
try:
tls_crt = base64.decode_as_text(data['tls.crt'])
tls_key = base64.decode_as_text(data['tls.key'])
except TypeError:
raise Exception('Certificate secret data is invalid %s\\%s' %
(secret_ns, secret_name))
return tls_crt, tls_key

View File

@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2021 Wind River Systems, Inc.
# Copyright (c) 2020-2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -25,16 +25,9 @@ class StartingUpgradeState(BaseState):
"""Upgrade state for starting an upgrade on a subcloud"""
def __init__(self, region_name):
subcloud_type = self.get_sysinv_client(
region_name).get_system().system_mode
if subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
super(StartingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER_0,
region_name=region_name)
else:
super(StartingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER_1,
region_name=region_name)
super(StartingUpgradeState, self).__init__(
next_state=consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE,
region_name=region_name)
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
self.max_failed_retries = MAX_FAILED_RETRIES

View File

@@ -0,0 +1,76 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dcmanager.common import consts
from dcmanager.common import utils
from dcmanager.orchestrator.states.base import BaseState
# Max time: 1.5 minutes = 3 retries x 30 seconds between each
DEFAULT_MAX_RETRIES = 3
DEFAULT_SLEEP_DURATION = 30
class TransferCACertificateState(BaseState):
"""Upgrade step for transfering CA certificate"""
def __init__(self, region_name):
self.subcloud_type = self.get_sysinv_client(
region_name).get_system().system_mode
if self.subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
super(TransferCACertificateState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER_0,
region_name=region_name)
else:
super(TransferCACertificateState, self).__init__(
next_state=consts.STRATEGY_STATE_LOCKING_CONTROLLER_1,
region_name=region_name)
self.max_retries = DEFAULT_MAX_RETRIES
self.sleep_duration = DEFAULT_SLEEP_DURATION
def perform_state_action(self, strategy_step):
"""Transfer CA certificate for an upgrade to a subcloud
Returns the next state in the state machine on success.
Any exceptions raised by this method set the strategy to FAILED.
"""
if self.subcloud_type == consts.SYSTEM_MODE_SIMPLEX:
return self.next_state
self.info_log(strategy_step, "Start transferring CA certificate...")
retry_counter = 0
while True:
try:
sysinv_client = self.get_sysinv_client(strategy_step.subcloud.name)
data = {'mode': 'openldap_ca'}
ldap_ca_cert, _ = utils.get_certificate_from_secret(
consts.OPENLDAP_CA_CERT_SECRET_NAME,
consts.CERT_NAMESPACE_PLATFORM_CA_CERTS)
sysinv_client.update_certificate('', ldap_ca_cert, data)
break
except Exception as e:
self.warn_log(strategy_step,
"Encountered exception: %s" % str(e))
retry_counter += 1
if retry_counter > self.max_retries:
raise Exception(
"Failed to transfer CA certificate for subcloud %s."
% strategy_step.subcloud.name)
self.warn_log(strategy_step,
"Retry (%i/%i) in %i secs."
% (retry_counter,
self.max_retries,
self.sleep_duration))
time.sleep(self.sleep_duration)
self.info_log(strategy_step, "CA certificate transfer completed.")
return self.next_state

View File

@@ -49,6 +49,8 @@ from dcmanager.orchestrator.states.upgrade.swact_to_controller_0 \
import SwactToController0State
from dcmanager.orchestrator.states.upgrade.swact_to_controller_1 \
import SwactToController1State
from dcmanager.orchestrator.states.upgrade.transfer_ca_certificate \
import TransferCACertificateState
from dcmanager.orchestrator.states.upgrade.unlock_duplex \
import UnlockDuplexState
from dcmanager.orchestrator.states.upgrade.unlock_simplex \
@@ -86,6 +88,7 @@ class SwUpgradeOrchThread(OrchThread):
consts.STRATEGY_STATE_FINISHING_PATCH_STRATEGY:
FinishingPatchStrategyState,
consts.STRATEGY_STATE_STARTING_UPGRADE: StartingUpgradeState,
consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE: TransferCACertificateState,
consts.STRATEGY_STATE_LOCKING_CONTROLLER_0: LockSimplexState,
consts.STRATEGY_STATE_LOCKING_CONTROLLER_1: LockDuplexState,
consts.STRATEGY_STATE_UPGRADING_SIMPLEX: UpgradingSimplexState,

View File

@@ -31,7 +31,7 @@ class TestSwUpgradeSimplexStartingUpgradeStage(TestSwUpgradeState):
super(TestSwUpgradeSimplexStartingUpgradeStage, self).setUp()
# next state after 'starting upgrade' is 'migrating data'
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_0
self.on_success_state = consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
@@ -215,7 +215,7 @@ class TestSwUpgradeDuplexStartingUpgradeStage(TestSwUpgradeSimplexStartingUpgrad
super(TestSwUpgradeDuplexStartingUpgradeStage, self).setUp()
# next state after 'starting upgrade' is 'migrating data'
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_1
self.on_success_state = consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE
# Add mock API endpoints for sysinv client calls invoked by this state
system_values = FakeSystem()

View File

@@ -0,0 +1,147 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import mock
from dcmanager.common import consts
from dcmanager.orchestrator.states.upgrade import transfer_ca_certificate
from dcmanager.tests.unit.orchestrator.states.fakes import FakeSystem
from dcmanager.tests.unit.orchestrator.states.upgrade.test_base \
import TestSwUpgradeState
FAKE_CERT = "-----BEGIN CERTIFICATE-----\nMIIDAO\n-----END CERTIFICATE-----\n"
class TestSwUpgradeSimplexTransferringCACertificateStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeSimplexTransferringCACertificateStage, self).setUp()
# next state after 'transferring CA certificate' is 'locking controller-0'
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_0
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.update_certificate = mock.MagicMock()
self.sysinv_client.get_system = mock.MagicMock()
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_SIMPLEX
self.sysinv_client.get_system.return_value = system_values
def test_upgrade_subcloud_upgrade_transferring_ca_certificate_skipped(self):
"""Test transferring CA certificate is skipped for the Simplex."""
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify the API call was not invoked
self.sysinv_client.update_certificate.assert_not_called()
# On simplex, the step is skipped and the state should transition to
# the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
@mock.patch("dcmanager.orchestrator.states.upgrade.transfer_ca_certificate"
".DEFAULT_SLEEP_DURATION", 1)
class TestSwUpgradeDuplexTransferringCACertificateStage(TestSwUpgradeState):
def setUp(self):
super(TestSwUpgradeDuplexTransferringCACertificateStage, self).setUp()
# next state after 'transferring CA certificate' is 'locking controller-1'
self.on_success_state = consts.STRATEGY_STATE_LOCKING_CONTROLLER_1
# Add the subcloud being processed by this unit test
self.subcloud = self.setup_subcloud()
# Add the strategy_step state being processed by this unit test
self.strategy_step = self.setup_strategy_step(
self.subcloud.id, consts.STRATEGY_STATE_TRANSFERRING_CA_CERTIFICATE)
# Add mock API endpoints for sysinv client calls invoked by this state
self.sysinv_client.update_certificate = mock.MagicMock()
self.sysinv_client.get_system = mock.MagicMock()
system_values = FakeSystem()
system_values.system_mode = consts.SYSTEM_MODE_DUPLEX
self.sysinv_client.get_system.return_value = system_values
def test_upgrade_subcloud_upgrade_transferring_ca_certificate_success(self):
"""Test transferring_ca_certificate where the API call succeeds."""
# simulate get_certificate_from_secret finding the openldap ca certificate
p = mock.patch('dcmanager.common.utils.get_certificate_from_secret')
self.mock_cert_file = p.start()
self.mock_cert_file.return_value = (FAKE_CERT, None)
self.addCleanup(p.stop)
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify update_certificate was invoked
self.sysinv_client.update_certificate.assert_called_with(
'', FAKE_CERT, {'mode': 'openldap_ca'})
# On success, the state should transition to the next state
self.assert_step_updated(self.strategy_step.subcloud_id,
self.on_success_state)
def test_upgrade_subcloud_upgrade_transferring_ca_certificate_fails_get_cert(
self):
"""Test API call fails due to failing to get certificate from secret."""
# simulate get_certificate_from_secret failing to get
# the openldap ca certificate
p = mock.patch('dcmanager.common.utils.get_certificate_from_secret')
self.mock_cert_file = p.start()
self.mock_cert_file.side_effect = Exception("Invalid certificated")
self.addCleanup(p.stop)
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify update_certificate was not invoked
self.sysinv_client.update_certificate.assert_not_called()
# Verify the failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)
def test_upgrade_subcloud_upgrade_transferring_ca_certificate_fails_update_cert(
self):
"""Test API call fails due to failing to update certificate."""
# simulate get_certificate_from_secret finding the openldap ca certificate
p = mock.patch('dcmanager.common.utils.get_certificate_from_secret')
self.mock_cert_file = p.start()
self.mock_cert_file.return_value = (FAKE_CERT, None)
self.addCleanup(p.stop)
# simulate update_certificate failing to update
self.sysinv_client.update_certificate.side_effect = Exception(
"Faile to update certificated")
# invoke the strategy state operation on the orch thread
self.worker.perform_state_action(self.strategy_step)
# verify update_certificate was invoked
self.sysinv_client.update_certificate.assert_called_with(
'', FAKE_CERT, {'mode': 'openldap_ca'})
# verify the update_certificate was invoked: 1 + max_retries times
self.assertEqual(transfer_ca_certificate.DEFAULT_MAX_RETRIES + 1,
self.sysinv_client.update_certificate.call_count)
# Verify the failure leads to a state failure
self.assert_step_updated(self.strategy_step.subcloud_id,
consts.STRATEGY_STATE_FAILED)