diff --git a/api-ref/source/api-ref-dcmanager-v1.rst b/api-ref/source/api-ref-dcmanager-v1.rst index dbc4c9167..faeb78752 100644 --- a/api-ref/source/api-ref-dcmanager-v1.rst +++ b/api-ref/source/api-ref-dcmanager-v1.rst @@ -1257,6 +1257,7 @@ serviceUnavailable (503) - subcloud-apply-type: subcloud_apply_type - type: sw_update_strategy_type - upload-only: patch_strategy_upload_only + - force: force_sync_status Request Example ---------------- diff --git a/api-ref/source/parameters.yaml b/api-ref/source/parameters.yaml index 71b88ad0e..98725e149 100644 --- a/api-ref/source/parameters.yaml +++ b/api-ref/source/parameters.yaml @@ -308,6 +308,13 @@ force: in: body required: false type: boolean +force_sync_status: + description: | + Indicates whether to disregard subcloud endpoint sync status + or management alarm condition depending on strategy type. + in: body + required: false + type: boolean group_id: description: | The ID of a subcloud group. Default is 1. diff --git a/api-ref/source/samples/sw-update-strategy/sw-update-strategy-post-request.json b/api-ref/source/samples/sw-update-strategy/sw-update-strategy-post-request.json index 32572e1ed..a48a1eb6f 100644 --- a/api-ref/source/samples/sw-update-strategy/sw-update-strategy-post-request.json +++ b/api-ref/source/samples/sw-update-strategy/sw-update-strategy-post-request.json @@ -3,5 +3,6 @@ "type": "patch", "stop-on-failure": "true", "max-parallel-subclouds": 2, - "upload-only": "true" + "upload-only": "true", + "force": "true" } diff --git a/distributedcloud/dcagent/api/controllers/v1/audit.py b/distributedcloud/dcagent/api/controllers/v1/audit.py index b11b44bd9..9194c9cff 100644 --- a/distributedcloud/dcagent/api/controllers/v1/audit.py +++ b/distributedcloud/dcagent/api/controllers/v1/audit.py @@ -43,7 +43,21 @@ class AuditController(object): if not payload: pecan.abort(http.client.BAD_REQUEST, _("Body required")) - LOG.debug(f"Payload sent by system controller: {payload}") + # TODO(vgluzrom): Remove extra_args from header and keep it only in payload + # once all supported dcagent versions have this possibility. If system + # controller sends extra_args in payload to a dcagent that doesn't support it, + # it will raise an UnsupportedAudit exception. + try: + headers = json.loads(request.headers.get("X-DCAGENT-HEADERS", "{}")) + except ValueError: + pecan.abort(http.client.BAD_REQUEST, _("Request headers decoding error")) + + extra_args = payload.pop("extra_args", {}) + extra_args = {**extra_args, **headers} + + LOG.debug( + f"Payload sent by system controller: {payload}. Extra args: {extra_args}" + ) try: # Delete "use_cache" from payload so it doesn't get passed as an audit @@ -52,7 +66,7 @@ class AuditController(object): requested_audit = RequestedAudit( request_token=context.auth_token, use_cache=use_cache ) - return requested_audit.get_sync_status(payload) + return requested_audit.get_sync_status(payload, extra_args) except UnsupportedAudit as ex: LOG.exception(ex) diff --git a/distributedcloud/dcagent/common/audit_manager.py b/distributedcloud/dcagent/common/audit_manager.py index f0715ea81..dfa1c13a1 100644 --- a/distributedcloud/dcagent/common/audit_manager.py +++ b/distributedcloud/dcagent/common/audit_manager.py @@ -102,9 +102,16 @@ class PeriodicAudit(utils.BaseAuditManager): (get_subcloud_base_audit, lambda: [self.sysinv_client, self.fm_client]), (FirmwareAudit.get_subcloud_audit_data, lambda: [self.sysinv_client]), (KubernetesAudit.get_subcloud_audit_data, lambda: [self.sysinv_client]), + # Need to call kube rootca function two times as it has a different + # response if the subcloud was rehomed or not and we want to cache both + # results ( KubeRootcaUpdateAudit.get_subcloud_audit_data, - lambda: [self.sysinv_client, self.fm_client], + lambda: [self.sysinv_client, self.fm_client, False], + ), + ( + KubeRootcaUpdateAudit.get_subcloud_audit_data, + lambda: [self.sysinv_client, self.fm_client, True], ), (SoftwareAudit.get_subcloud_audit_data, lambda: [self.software_client]), ] @@ -119,7 +126,7 @@ class RequestedAudit(utils.BaseAuditManager): self.request_token = request_token self.use_cache = use_cache - def get_single_audit_status(self, audit_type, regionone_audit_data): + def get_single_audit_status(self, audit_type, regionone_audit_data, extra_args): # Since this run in parallel, we need to initialize the clients # here to not use the same socket in every call sysinv_client, fm_client, software_client = self.initialize_clients( @@ -139,8 +146,9 @@ class RequestedAudit(utils.BaseAuditManager): sysinv_client, regionone_audit_data ) elif audit_type == dccommon_consts.KUBE_ROOTCA_AUDIT: + rehomed = extra_args.get("rehomed", False) resp = KubeRootcaUpdateAudit.get_subcloud_sync_status( - sysinv_client, fm_client, regionone_audit_data + sysinv_client, fm_client, regionone_audit_data, rehomed ) elif audit_type == dccommon_consts.KUBERNETES_AUDIT: resp = KubernetesAudit.get_subcloud_sync_status( @@ -161,11 +169,16 @@ class RequestedAudit(utils.BaseAuditManager): raise exceptions.AuditStatusFailure(audit=audit_type) return audit_type, resp - def get_sync_status(self, payload): + def get_sync_status(self, payload, extra_args): sync_resp = {} pool = GreenPool(size=10) jobs = [ - pool.spawn(self.get_single_audit_status, audit_type, regionone_audit_data) + pool.spawn( + self.get_single_audit_status, + audit_type, + regionone_audit_data, + extra_args, + ) for audit_type, regionone_audit_data in payload.items() ] diff --git a/distributedcloud/dccommon/drivers/openstack/dcagent_v1.py b/distributedcloud/dccommon/drivers/openstack/dcagent_v1.py index 41bb670be..5662cdaf6 100644 --- a/distributedcloud/dccommon/drivers/openstack/dcagent_v1.py +++ b/distributedcloud/dccommon/drivers/openstack/dcagent_v1.py @@ -13,7 +13,7 @@ from dccommon.drivers import base LOG = log.getLogger(__name__) -DCAGENT_REST_DEFAULT_TIMEOUT = 900 +DCAGENT_REST_DEFAULT_TIMEOUT = 30 class DcagentClient(base.DriverBase): @@ -37,11 +37,17 @@ class DcagentClient(base.DriverBase): else: self.endpoint = endpoint - def audit(self, audit_data, timeout=DCAGENT_REST_DEFAULT_TIMEOUT): + def audit( + self, + audit_data: dict, + headers: dict = None, + timeout: int = DCAGENT_REST_DEFAULT_TIMEOUT, + ): """Audit subcloud""" url = self.endpoint + "/v1/dcaudit" + headers = headers or {} response = self.session.patch( - url, json=audit_data, timeout=timeout, raise_exc=False + url, json=audit_data, headers=headers, timeout=timeout, raise_exc=False ) if response.status_code == 200: diff --git a/distributedcloud/dcmanager/audit/kube_rootca_update_audit.py b/distributedcloud/dcmanager/audit/kube_rootca_update_audit.py index 15be88853..d9d1557d5 100644 --- a/distributedcloud/dcmanager/audit/kube_rootca_update_audit.py +++ b/distributedcloud/dcmanager/audit/kube_rootca_update_audit.py @@ -87,18 +87,22 @@ class KubeRootcaUpdateAudit(object): cls, sysinv_client: SysinvClient, fm_client: FmClient, + rehomed: bool = False, subcloud_name: str = None, ) -> tuple: skip_audit = 2 * [dccommon_consts.SKIP_AUDIT] - try: - success, subcloud_cert_data = sysinv_client.get_kube_rootca_cert_id() - except Exception: - msg = f"Failed to get Kubernetes root CA status, skip {AUDIT_TYPE} audit." - log_subcloud_msg(LOG.exception, msg, subcloud_name) - return skip_audit + if rehomed: + try: + success, subcloud_cert_data = sysinv_client.get_kube_rootca_cert_id() + except Exception: + msg = ( + f"Failed to get Kubernetes root CA status, skip {AUDIT_TYPE} audit." + ) + log_subcloud_msg(LOG.exception, msg, subcloud_name) + return skip_audit - if success: - return CERT_BASED, subcloud_cert_data + if success: + return CERT_BASED, subcloud_cert_data try: detected_alarms = fm_client.get_alarms_by_ids(KUBE_ROOTCA_ALARM_LIST) @@ -114,12 +118,13 @@ class KubeRootcaUpdateAudit(object): sysinv_client: SysinvClient, fm_client: FmClient, regionone_rootca_certid: str, + rehomed: bool = False, subcloud_name: str = None, ): """Get the sync status of the subcloud's kube root CA cert.""" audit_method, subcloud_audit_data = cls.get_subcloud_audit_data( - sysinv_client, fm_client, subcloud_name + sysinv_client, fm_client, rehomed, subcloud_name ) sync_status = None @@ -147,8 +152,10 @@ class KubeRootcaUpdateAudit(object): The audit logic is as follow: No region one cert ID -> skip audit - Subcloud doesn't have the API to get cert ID -> alarm based - Subcloud has the API to get cert ID -> cert based + Failure to get alarms or subcloud cert ID -> skip audit + Subcloud was not rehomed -> alarm based + Subcloud was rehomed and doesn't have the API to get cert ID -> alarm based + Subcloud was rehomed and has the API to get cert ID -> cert based :param sysinv_client: the sysinv client object :param fm_client: the fm client object @@ -165,7 +172,11 @@ class KubeRootcaUpdateAudit(object): return dccommon_consts.SYNC_STATUS_IN_SYNC sync_status = self.get_subcloud_sync_status( - sysinv_client, fm_client, regionone_rootca_certid, subcloud.name + sysinv_client, + fm_client, + regionone_rootca_certid, + subcloud.rehomed, + subcloud.name, ) if sync_status: diff --git a/distributedcloud/dcmanager/audit/subcloud_audit_worker_manager.py b/distributedcloud/dcmanager/audit/subcloud_audit_worker_manager.py index 4cf04eedf..e1ce22c30 100644 --- a/distributedcloud/dcmanager/audit/subcloud_audit_worker_manager.py +++ b/distributedcloud/dcmanager/audit/subcloud_audit_worker_manager.py @@ -15,6 +15,7 @@ # import copy +import json import os import threading import time @@ -357,6 +358,13 @@ class SubcloudAuditWorkerManager(manager.Manager): audit_payload["use_cache"] = use_cache return audit_payload + def _build_dcagent_request_headers(self, subcloud: models.Subcloud): + dc_agent_headers = {} + if subcloud.rehomed: + dc_agent_headers["rehomed"] = subcloud.rehomed + header = {"X-DCAGENT-HEADERS": json.dumps(dc_agent_headers)} + return header + def _update_sw_sync_status_from_deploy_status(self, subcloud, audit_results): # If the subcloud deploy_status is in any of the following states, # the sync_status should be set to out-of-sync for software audit. @@ -541,9 +549,10 @@ class SubcloudAuditWorkerManager(manager.Manager): do_software_audit, use_cache, ) + headers = self._build_dcagent_request_headers(subcloud) audit_results = {} try: - audit_results = dcagent_client.audit(audit_payload) + audit_results = dcagent_client.audit(audit_payload, headers) except Exception: LOG.exception(failmsg % (subcloud.name, "dcagent")) failures.append("dcagent") diff --git a/distributedcloud/dcmanager/orchestrator/sw_update_manager.py b/distributedcloud/dcmanager/orchestrator/sw_update_manager.py index 40cbd1ffb..23a011c73 100644 --- a/distributedcloud/dcmanager/orchestrator/sw_update_manager.py +++ b/distributedcloud/dcmanager/orchestrator/sw_update_manager.py @@ -248,6 +248,7 @@ class SwUpdateManager(manager.Manager): max_parallel_subclouds = int(max_parallel_subclouds_str) stop_on_failure = payload.get("stop-on-failure") in ["true"] + force = payload.get(consts.EXTRA_ARGS_FORCE) in ["true"] # Has the user specified a specific subcloud? cloud_name = payload.get("cloud_name") @@ -257,7 +258,6 @@ class SwUpdateManager(manager.Manager): # Has the user specified for_sw_deploy flag for prestage strategy? if strategy_type == consts.SW_UPDATE_TYPE_PRESTAGE: for_sw_deploy = payload.get(consts.PRESTAGE_FOR_SW_DEPLOY) in ["true"] - force = payload.get(consts.EXTRA_ARGS_FORCE) in ["true"] if cloud_name: # Make sure subcloud exists @@ -282,7 +282,7 @@ class SwUpdateManager(manager.Manager): raise exceptions.BadRequest(resource="strategy", msg=str(ex)) else: self.strategy_validators[strategy_type].validate_strategy_requirements( - context, subcloud.id, subcloud.name + context, subcloud.id, subcloud.name, force ) extra_args = None @@ -328,7 +328,7 @@ class SwUpdateManager(manager.Manager): single_group.id if subcloud_group else None, cloud_name, self.strategy_validators[strategy_type].build_availability_status_filter(), - self.strategy_validators[strategy_type].build_sync_status_filter(), + self.strategy_validators[strategy_type].build_sync_status_filter(force), ) # TODO(rlima): move this step to validators diff --git a/distributedcloud/dcmanager/orchestrator/validators/base.py b/distributedcloud/dcmanager/orchestrator/validators/base.py index df184509f..77d9ed8fc 100644 --- a/distributedcloud/dcmanager/orchestrator/validators/base.py +++ b/distributedcloud/dcmanager/orchestrator/validators/base.py @@ -20,17 +20,24 @@ class StrategyValidationBase(object): """Base class for strategy validation""" def __init__(self): + self.accepts_force = False self.endpoint_type = None - def validate_strategy_requirements(self, context, subcloud_id, subcloud_name): + def validate_strategy_requirements( + self, context, subcloud_id, subcloud_name, force=False + ): """Validates the requirements for a strategy :param context: request context object :param subcloud_id: subcloud's id :param subcloud_name: subcloud's name + :param force: if the strategy should be forced to execute :raises BadRequest: if the requirements for the strategy are not met """ + if self.accepts_force and force: + return + subcloud_status = db_api.subcloud_status_get( context, subcloud_id, self.endpoint_type ) @@ -64,9 +71,10 @@ class StrategyValidationBase(object): return dccommon_consts.AVAILABILITY_ONLINE - def build_sync_status_filter(self): + def build_sync_status_filter(self, force): """Builds the sync status filter for valid subclouds + :param force: if the strategy should be forced to execute :return: sync status to filter :rtype: list """ diff --git a/distributedcloud/dcmanager/orchestrator/validators/kube_root_ca_validator.py b/distributedcloud/dcmanager/orchestrator/validators/kube_root_ca_validator.py index 536c39453..cf7f82c55 100644 --- a/distributedcloud/dcmanager/orchestrator/validators/kube_root_ca_validator.py +++ b/distributedcloud/dcmanager/orchestrator/validators/kube_root_ca_validator.py @@ -26,6 +26,7 @@ class KubeRootCaStrategyValidator(StrategyValidationBase): super().__init__() self.endpoint_type = dccommon_consts.ENDPOINT_TYPE_KUBE_ROOTCA + self.accepts_force = True def build_extra_args(self, payload): """Builds the extra args for a strategy @@ -40,3 +41,18 @@ class KubeRootCaStrategyValidator(StrategyValidationBase): consts.EXTRA_ARGS_SUBJECT: payload.get(consts.EXTRA_ARGS_SUBJECT), consts.EXTRA_ARGS_CERT_FILE: payload.get(consts.EXTRA_ARGS_CERT_FILE), } + + def build_sync_status_filter(self, force): + """Builds the sync status filter for valid subclouds + + :param force: if the strategy should be forced to execute + :return: sync status to filter + :rtype: list + """ + + if force: + return [ + dccommon_consts.SYNC_STATUS_IN_SYNC, + dccommon_consts.SYNC_STATUS_OUT_OF_SYNC, + ] + return [dccommon_consts.SYNC_STATUS_OUT_OF_SYNC] diff --git a/distributedcloud/dcmanager/orchestrator/validators/patch_validator.py b/distributedcloud/dcmanager/orchestrator/validators/patch_validator.py index 7b794c98a..d948080d4 100644 --- a/distributedcloud/dcmanager/orchestrator/validators/patch_validator.py +++ b/distributedcloud/dcmanager/orchestrator/validators/patch_validator.py @@ -44,9 +44,10 @@ class PatchStrategyValidator(StrategyValidationBase): consts.EXTRA_ARGS_REMOVE: remove_bool, } - def build_sync_status_filter(self): + def build_sync_status_filter(self, force): """Builds the sync status filter for valid subclouds + :param force: if the strategy should be forced to execute :return: sync status to filter :rtype: list """ diff --git a/distributedcloud/dcmanager/orchestrator/validators/prestage_validator.py b/distributedcloud/dcmanager/orchestrator/validators/prestage_validator.py index ffcc61762..e8577a501 100644 --- a/distributedcloud/dcmanager/orchestrator/validators/prestage_validator.py +++ b/distributedcloud/dcmanager/orchestrator/validators/prestage_validator.py @@ -30,7 +30,7 @@ class PrestageStrategyValidator(StrategyValidationBase): self.endpoint_type = dccommon_consts.AUDIT_TYPE_SOFTWARE # TODO(rlima): move prestage validations here - def build_sync_status_filter(self): + def build_sync_status_filter(self, force): """Builds the sync status filter for valid subclouds :param force: if the strategy should be forced to execute diff --git a/distributedcloud/dcmanager/tests/base.py b/distributedcloud/dcmanager/tests/base.py index c6bac2e63..88a0c6290 100644 --- a/distributedcloud/dcmanager/tests/base.py +++ b/distributedcloud/dcmanager/tests/base.py @@ -53,37 +53,37 @@ SUBCLOUD_1 = { "name": "subcloud1", "region_name": "2ec93dfb654846909efe61d1b39dd2ce", "rehomed": True, - "software_version": "22.12", + "software_version": "24.09", } SUBCLOUD_2 = { "name": "subcloud2", "region_name": "ca2761ee7aa34cbe8415ec9a3c86854f", "rehomed": True, - "software_version": "22.12", + "software_version": "24.09", } SUBCLOUD_3 = { "name": "subcloud3", "region_name": "659e12e5f7ad411abfcd83f5cedca0bf", "rehomed": True, - "software_version": "21.12", + "software_version": "22.12", } SUBCLOUD_4 = { "name": "subcloud4", "region_name": "c25f3b0553384104b664789bd93a2ba8", "rehomed": False, - "software_version": "21.12", + "software_version": "22.12", } SUBCLOUD_5 = { "name": "subcloud5", "region_name": "809581dc2d154e008480bac1f43b7aff", "rehomed": False, - "software_version": "21.12", + "software_version": "22.12", } SUBCLOUD_6 = { "name": "subcloud6", "region_name": "8c60b99f3e1245b7bc5a049802ade8d2", "rehomed": False, - "software_version": "22.12", + "software_version": "24.09", } SUBCLOUD_7 = {"name": "subcloud7", "region_name": "9fde6dca22fa422bb1e8cf03bedc18e4"} SUBCLOUD_8 = {"name": "subcloud8", "region_name": "f3cb0b109c4543fda3ed50ed5783279d"} diff --git a/distributedcloud/dcmanager/tests/unit/audit/test_kube_rootca_update_audit_manager.py b/distributedcloud/dcmanager/tests/unit/audit/test_kube_rootca_update_audit_manager.py index 9897da490..fa9642025 100644 --- a/distributedcloud/dcmanager/tests/unit/audit/test_kube_rootca_update_audit_manager.py +++ b/distributedcloud/dcmanager/tests/unit/audit/test_kube_rootca_update_audit_manager.py @@ -26,6 +26,7 @@ class FakeSubcloudObj(object): def __init__(self, subcloud_dict): self.name = subcloud_dict["name"] self.region_name = subcloud_dict["region_name"] + self.rehomed = subcloud_dict["rehomed"] self.software_version = subcloud_dict["software_version"] @@ -225,6 +226,9 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase): self.mock_sysinv_client().get_kube_rootca_cert_id.return_value = ( base.FakeException("API cert ID request failed") ) + self.mock_fm_client().get_alarms_by_ids.side_effect = base.FakeException( + "get_alarms_by_ids failed" + ) response = self.audit.subcloud_kube_rootca_audit( self.mock_sysinv_client(), @@ -234,3 +238,36 @@ class TestKubeRootcaUpdateAudit(base.DCManagerTestCase): ) self.assertEqual(response, None) + + def test_kube_rootca_update_audit_method(self): + """Test if kube-rootca is auditing correctly based using alarm or cert_id""" + # Set the region one data + self.kube_rootca_cert_id.return_value = ( + True, + FakeKubeRootcaData("cert1", ""), + ) + kube_rootca_update_audit_data = self.get_rootca_audit_data() + + subclouds = [base.SUBCLOUD_1, base.SUBCLOUD_2] + for subcloud_dict in subclouds: + subcloud = FakeSubcloudObj(subcloud_dict) + + self.kube_rootca_cert_id.return_value = True, FakeKubeRootcaData( + "cert1", "" + ) + self.mock_sysinv_client().get_kube_rootca_cert_id.return_value = ( + True, + FakeKubeRootcaData("cert1", ""), + ) + self.mock_fm_client().get_alarms_by_ids.return_value = None + + self.audit.subcloud_kube_rootca_audit( + self.mock_sysinv_client(), + self.mock_fm_client(), + subcloud, + kube_rootca_update_audit_data, + ) + if subcloud.rehomed: + self.mock_sysinv_client().get_kube_rootca_cert_id.assert_called() + else: + self.mock_fm_client().get_alarms_by_ids.assert_called()