Move subcloud audit to separate process
Remove subcloud audit from dcmanager-manager process. Create dcmanager-audit process & associated files. Add new RPC calls for dcmanager-audit to notify dcmanager subcloud availability and sync endpoint type changes. Update dcmanager to handle availability and sync endpoint type updates from dcmanager-audit. Subcloud audit interval will be reduced to 20 seconds. Create/update unit tests, to verify the implementation changes. Story: 2007267 Task: 39637 Change-Id: Iff408166753f22ce3616d34e267ca1155ac43042 Signed-off-by: Tao Liu <tao.liu@windriver.com>
This commit is contained in:
@@ -28,6 +28,7 @@ Source9: dcmanager.conf
|
||||
Source10: dcorch.conf
|
||||
Source11: dcdbsync.conf
|
||||
Source12: clean-dcorch
|
||||
Source13: dcmanager-audit.service
|
||||
|
||||
BuildArch: noarch
|
||||
|
||||
@@ -130,6 +131,7 @@ install -d -m 755 %{buildroot}%{_sysconfdir}/dcmanager/
|
||||
# install systemd unit files
|
||||
install -p -D -m 644 %{SOURCE1} %{buildroot}%{_unitdir}/dcmanager-api.service
|
||||
install -p -D -m 644 %{SOURCE2} %{buildroot}%{_unitdir}/dcmanager-manager.service
|
||||
install -p -D -m 644 %{SOURCE13} %{buildroot}%{_unitdir}/dcmanager-audit.service
|
||||
install -p -D -m 644 %{SOURCE9} %{buildroot}%{_tmpfilesdir}
|
||||
# install default config files
|
||||
cd %{_builddir}/%{pypi_name}-%{version} && oslo-config-generator --config-file ./dcmanager/config-generator.conf --output-file %{_builddir}/%{pypi_name}-%{version}%{_sysconfdir}/dcmanager/dcmanager.conf.sample
|
||||
@@ -185,6 +187,8 @@ install -m 755 -D -p %{SOURCE12} %{buildroot}/%{_bindir}/clean-dcorch
|
||||
%exclude %{python2_sitelib}/dcmanager/tests
|
||||
%{_bindir}/dcmanager-api
|
||||
%{_unitdir}/dcmanager-api.service
|
||||
%{_bindir}/dcmanager-audit
|
||||
%{_unitdir}/dcmanager-audit.service
|
||||
%{_bindir}/dcmanager-manager
|
||||
%{_unitdir}/dcmanager-manager.service
|
||||
%{_bindir}/dcmanager-manage
|
||||
|
12
distributedcloud/centos/files/dcmanager-audit.service
Normal file
12
distributedcloud/centos/files/dcmanager-audit.service
Normal file
@@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=DC Manager Audit Service
|
||||
After=syslog-ng.service network-online.target dcmanager-manager.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
ExecStart=/usr/bin/dcmanager-audit --config-file /etc/dcmanager/dcmanager.conf
|
||||
Restart=on-failure
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
0
distributedcloud/dcmanager/audit/__init__.py
Normal file
0
distributedcloud/dcmanager/audit/__init__.py
Normal file
93
distributedcloud/dcmanager/audit/service.py
Normal file
93
distributedcloud/dcmanager/audit/service.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Copyright (c) 2020 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
||||
# of an applicable Wind River license agreement.
|
||||
#
|
||||
|
||||
import six
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import oslo_messaging
|
||||
from oslo_service import service
|
||||
|
||||
from dcmanager.audit.subcloud_audit_manager import SubcloudAuditManager
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common.i18n import _
|
||||
from dcmanager.common import messaging as rpc_messaging
|
||||
from dcmanager.common import scheduler
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DCManagerAuditService(service.Service):
|
||||
"""Lifecycle manager for a running audit service."""
|
||||
|
||||
def __init__(self):
|
||||
|
||||
super(DCManagerAuditService, self).__init__()
|
||||
self.host = cfg.CONF.host
|
||||
# To be used by the sw update manager to trigger the patch audit
|
||||
self.rpc_api_version = consts.RPC_API_VERSION
|
||||
self.topic = consts.TOPIC_DC_MANAGER_AUDIT
|
||||
# The following are initialized here, but assigned in start() which
|
||||
# happens after the fork when spawning multiple worker processes
|
||||
self.TG = None
|
||||
self.target = None
|
||||
self._rpc_server = None
|
||||
self.subcloud_audit_manager = None
|
||||
|
||||
def start(self):
|
||||
self.init_tgm()
|
||||
self.init_audit_managers()
|
||||
target = oslo_messaging.Target(version=self.rpc_api_version,
|
||||
server=self.host,
|
||||
topic=self.topic)
|
||||
self.target = target
|
||||
self._rpc_server = rpc_messaging.get_rpc_server(self.target, self)
|
||||
self._rpc_server.start()
|
||||
super(DCManagerAuditService, self).start()
|
||||
|
||||
def init_tgm(self):
|
||||
self.TG = scheduler.ThreadGroupManager()
|
||||
|
||||
def init_audit_managers(self):
|
||||
self.subcloud_audit_manager = SubcloudAuditManager()
|
||||
# Audit availability of all subclouds.
|
||||
# Note this will run in a separate green thread
|
||||
self.TG.start(self.subcloud_audit_manager.periodic_subcloud_audit)
|
||||
|
||||
def _stop_rpc_server(self):
|
||||
# Stop RPC connection to prevent new requests
|
||||
LOG.debug(_("Attempting to stop engine service..."))
|
||||
try:
|
||||
self._rpc_server.stop()
|
||||
self._rpc_server.wait()
|
||||
LOG.info('Engine service stopped successfully')
|
||||
except Exception as ex:
|
||||
LOG.error('Failed to stop engine service: %s',
|
||||
six.text_type(ex))
|
||||
|
||||
def stop(self):
|
||||
self._stop_rpc_server()
|
||||
|
||||
self.TG.stop()
|
||||
|
||||
# Terminate the engine process
|
||||
LOG.info("All threads were gone, terminating engine")
|
||||
super(DCManagerAuditService, self).stop()
|
339
distributedcloud/dcmanager/audit/subcloud_audit_manager.py
Normal file
339
distributedcloud/dcmanager/audit/subcloud_audit_manager.py
Normal file
@@ -0,0 +1,339 @@
|
||||
# Copyright 2017 Ericsson AB.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Copyright (c) 2017-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
||||
# of an applicable Wind River license agreement.
|
||||
#
|
||||
|
||||
import eventlet
|
||||
|
||||
from keystoneauth1 import exceptions as keystone_exceptions
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
from sysinv.common import constants as sysinv_constants
|
||||
|
||||
from dccommon import consts as dccommon_consts
|
||||
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
||||
|
||||
from dcmanager.audit import alarm_aggregation
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common import context
|
||||
from dcmanager.common import exceptions
|
||||
from dcmanager.common.i18n import _
|
||||
from dcmanager.common import manager
|
||||
from dcmanager.common import scheduler
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.rpc import client as dcmanager_rpc_client
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
# We will update the state of each subcloud in the dcorch about once per hour.
|
||||
# Calculate how many iterations that will be.
|
||||
SUBCLOUD_STATE_UPDATE_ITERATIONS = \
|
||||
dccommon_consts.SECONDS_IN_HOUR / CONF.scheduler.subcloud_audit_interval
|
||||
|
||||
|
||||
class SubcloudAuditManager(manager.Manager):
|
||||
"""Manages tasks related to audits."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
LOG.debug(_('SubcloudAuditManager initialization...'))
|
||||
|
||||
super(SubcloudAuditManager, self).__init__(
|
||||
service_name="subcloud_audit_manager")
|
||||
self.context = context.get_admin_context()
|
||||
self.dcmanager_rpc_client = dcmanager_rpc_client.ManagerClient()
|
||||
# Keeps track of greenthreads we create to do work.
|
||||
self.thread_group_manager = scheduler.ThreadGroupManager(
|
||||
thread_pool_size=100)
|
||||
# Track workers created for each subcloud.
|
||||
self.subcloud_workers = dict()
|
||||
# Number of audits since last subcloud state update
|
||||
self.audit_count = 0
|
||||
self.alarm_aggr = alarm_aggregation.AlarmAggregation(self.context)
|
||||
|
||||
def periodic_subcloud_audit(self):
|
||||
"""Audit availability of subclouds."""
|
||||
|
||||
# Blanket catch all exceptions in the audit so that the audit
|
||||
# does not die.
|
||||
while True:
|
||||
try:
|
||||
eventlet.greenthread.sleep(
|
||||
CONF.scheduler.subcloud_audit_interval)
|
||||
self._periodic_subcloud_audit_loop()
|
||||
except eventlet.greenlet.GreenletExit:
|
||||
# We have been told to exit
|
||||
return
|
||||
except Exception:
|
||||
LOG.exception("Error in periodic subcloud audit loop")
|
||||
|
||||
def _periodic_subcloud_audit_loop(self):
|
||||
"""Audit availability of subclouds loop."""
|
||||
|
||||
# We will be running in our own green thread here.
|
||||
LOG.info('Triggered subcloud audit.')
|
||||
self.audit_count += 1
|
||||
|
||||
# Determine whether to trigger a state update to each subcloud
|
||||
if self.audit_count >= SUBCLOUD_STATE_UPDATE_ITERATIONS:
|
||||
update_subcloud_state = True
|
||||
else:
|
||||
update_subcloud_state = False
|
||||
|
||||
openstack_installed = False
|
||||
# The feature of syncing openstack resources to the subclouds was not
|
||||
# completed, therefore, auditing the openstack application is disabled
|
||||
# Determine whether OpenStack is installed in central cloud
|
||||
# os_client = OpenStackDriver(region_name=consts.DEFAULT_REGION_NAME,
|
||||
# thread_name='dcmanager-audit')
|
||||
# sysinv_client = os_client.sysinv_client
|
||||
# This could be optimized in the future by attempting to get just the
|
||||
# one application. However, sysinv currently treats this as a failure
|
||||
# if the application is not installed and generates warning logs, so it
|
||||
# would require changes to handle this gracefully.
|
||||
# apps = sysinv_client.get_applications()
|
||||
# for app in apps:
|
||||
# if app.name == sysinv_constants.HELM_APP_OPENSTACK and app.active:
|
||||
# openstack_installed = True
|
||||
# break
|
||||
|
||||
for subcloud in db_api.subcloud_get_all(self.context):
|
||||
if (subcloud.deploy_status not in
|
||||
[consts.DEPLOY_STATE_DONE,
|
||||
consts.DEPLOY_STATE_DEPLOYING,
|
||||
consts.DEPLOY_STATE_DEPLOY_FAILED]):
|
||||
LOG.debug("Skip subcloud %s audit, deploy_status: %s" %
|
||||
(subcloud.name, subcloud.deploy_status))
|
||||
continue
|
||||
|
||||
# Create a new greenthread for each subcloud to allow the audits
|
||||
# to be done in parallel. If there are not enough greenthreads
|
||||
# in the pool, this will block until one becomes available.
|
||||
self.subcloud_workers[subcloud.name] = \
|
||||
self.thread_group_manager.start(self._audit_subcloud,
|
||||
subcloud.name,
|
||||
update_subcloud_state,
|
||||
openstack_installed)
|
||||
|
||||
# Wait for all greenthreads to complete
|
||||
LOG.info('Waiting for subcloud audits to complete.')
|
||||
for thread in self.subcloud_workers.values():
|
||||
thread.wait()
|
||||
|
||||
# Clear the list of workers before next audit
|
||||
self.subcloud_workers = dict()
|
||||
LOG.info('All subcloud audits have completed.')
|
||||
|
||||
def _update_subcloud_availability(self, subcloud_name,
|
||||
availability_status=None,
|
||||
update_state_only=False,
|
||||
audit_fail_count=None):
|
||||
try:
|
||||
self.dcmanager_rpc_client.update_subcloud_availability(
|
||||
self.context, subcloud_name, availability_status,
|
||||
update_state_only, audit_fail_count)
|
||||
LOG.info('Notifying dcmanager, subcloud:%s, availability:%s' %
|
||||
(subcloud_name,
|
||||
availability_status))
|
||||
except Exception:
|
||||
LOG.exception('Problem informing dcmanager of subcloud '
|
||||
'availability state change, subcloud: %s'
|
||||
% subcloud_name)
|
||||
|
||||
@staticmethod
|
||||
def _get_subcloud_availability_status(subcloud_name, sysinv_client):
|
||||
"""For each subcloud, if at least one service is active in each
|
||||
|
||||
service of servicegroup-list then declare the subcloud online.
|
||||
"""
|
||||
avail_to_set = consts.AVAILABILITY_OFFLINE
|
||||
svc_groups = None
|
||||
|
||||
# get a list of service groups in the subcloud
|
||||
try:
|
||||
svc_groups = sysinv_client.get_service_groups()
|
||||
except Exception as e:
|
||||
LOG.warn('Cannot retrieve service groups for '
|
||||
'subcloud: %s, %s' % (subcloud_name, e))
|
||||
|
||||
if svc_groups:
|
||||
active_sgs = []
|
||||
inactive_sgs = []
|
||||
|
||||
# Build 2 lists, 1 of active service groups,
|
||||
# one with non-active.
|
||||
for sg in svc_groups:
|
||||
if sg.state != consts.SERVICE_GROUP_STATUS_ACTIVE:
|
||||
inactive_sgs.append(sg.service_group_name)
|
||||
else:
|
||||
active_sgs.append(sg.service_group_name)
|
||||
|
||||
# Create a list of service groups that are only present
|
||||
# in non-active list
|
||||
inactive_only = [sg for sg in inactive_sgs if
|
||||
sg not in active_sgs]
|
||||
|
||||
# An empty inactive only list and a non-empty active list
|
||||
# means we're good to go.
|
||||
if not inactive_only and active_sgs:
|
||||
avail_to_set = \
|
||||
consts.AVAILABILITY_ONLINE
|
||||
else:
|
||||
LOG.info("Subcloud:%s has non-active "
|
||||
"service groups: %s" %
|
||||
(subcloud_name, inactive_only))
|
||||
return avail_to_set
|
||||
|
||||
def _audit_subcloud_openstack_app(self, subcloud_name, sysinv_client,
|
||||
openstack_installed):
|
||||
openstack_installed_current = False
|
||||
# get a list of installed apps in the subcloud
|
||||
try:
|
||||
apps = sysinv_client.get_applications()
|
||||
except Exception:
|
||||
LOG.exception('Cannot retrieve installed apps for subcloud:%s'
|
||||
% subcloud_name)
|
||||
return
|
||||
|
||||
for app in apps:
|
||||
if app.name == sysinv_constants.HELM_APP_OPENSTACK \
|
||||
and app.active:
|
||||
# audit find openstack app is installed and active in
|
||||
# the subcloud
|
||||
openstack_installed_current = True
|
||||
break
|
||||
|
||||
endpoint_type_list = dccommon_consts.ENDPOINT_TYPES_LIST_OS
|
||||
if openstack_installed_current and not openstack_installed:
|
||||
self.dcmanager_rpc_client.update_subcloud_sync_endpoint_type(
|
||||
self.context,
|
||||
subcloud_name,
|
||||
endpoint_type_list,
|
||||
openstack_installed_current)
|
||||
elif not openstack_installed_current and openstack_installed:
|
||||
self.dcmanager_rpc_client.update_subcloud_sync_endpoint_type(
|
||||
self.context,
|
||||
subcloud_name,
|
||||
endpoint_type_list,
|
||||
openstack_installed_current)
|
||||
|
||||
def _audit_subcloud(self, subcloud_name, update_subcloud_state,
|
||||
audit_openstack):
|
||||
"""Audit a single subcloud."""
|
||||
|
||||
# Retrieve the subcloud
|
||||
try:
|
||||
subcloud = db_api.subcloud_get_by_name(self.context, subcloud_name)
|
||||
except exceptions.SubcloudNotFound:
|
||||
# Possibility subcloud could have been deleted since the list of
|
||||
# subclouds to audit was created.
|
||||
LOG.info('Ignoring SubcloudNotFound when auditing subcloud %s' %
|
||||
subcloud_name)
|
||||
return
|
||||
|
||||
avail_status_current = subcloud.availability_status
|
||||
audit_fail_count = subcloud.audit_fail_count
|
||||
|
||||
# Set defaults to None and disabled so we will still set disabled
|
||||
# status if we encounter an error.
|
||||
|
||||
sysinv_client = None
|
||||
fm_client = None
|
||||
avail_to_set = consts.AVAILABILITY_OFFLINE
|
||||
|
||||
try:
|
||||
os_client = OpenStackDriver(region_name=subcloud_name,
|
||||
thread_name='subcloud-audit')
|
||||
sysinv_client = os_client.sysinv_client
|
||||
fm_client = os_client.fm_client
|
||||
except (keystone_exceptions.EndpointNotFound,
|
||||
keystone_exceptions.ConnectFailure,
|
||||
keystone_exceptions.ConnectTimeout,
|
||||
IndexError):
|
||||
if avail_status_current == consts.AVAILABILITY_OFFLINE:
|
||||
LOG.info("Identity or Platform endpoint for %s not "
|
||||
"found, ignoring for offline "
|
||||
"subcloud." % subcloud_name)
|
||||
return
|
||||
else:
|
||||
# The subcloud will be marked as offline below.
|
||||
LOG.error("Identity or Platform endpoint for online "
|
||||
"subcloud: %s not found." % subcloud_name)
|
||||
|
||||
except Exception:
|
||||
LOG.exception("Failed to get OS Client for subcloud: %s"
|
||||
% subcloud_name)
|
||||
|
||||
# Check availability of the subcloud
|
||||
if sysinv_client:
|
||||
avail_to_set = self._get_subcloud_availability_status(
|
||||
subcloud_name, sysinv_client)
|
||||
|
||||
if avail_to_set == consts.AVAILABILITY_OFFLINE:
|
||||
if audit_fail_count < consts.AVAIL_FAIL_COUNT_MAX:
|
||||
audit_fail_count = audit_fail_count + 1
|
||||
if (avail_status_current == consts.AVAILABILITY_ONLINE) and \
|
||||
(audit_fail_count < consts.AVAIL_FAIL_COUNT_TO_ALARM):
|
||||
# Do not set offline until we have failed audit
|
||||
# the requisite number of times
|
||||
avail_to_set = consts.AVAILABILITY_ONLINE
|
||||
else:
|
||||
# In the case of a one off blip, we may need to set the
|
||||
# fail count back to 0
|
||||
audit_fail_count = 0
|
||||
|
||||
if avail_to_set != avail_status_current:
|
||||
|
||||
if avail_to_set == consts.AVAILABILITY_ONLINE:
|
||||
audit_fail_count = 0
|
||||
|
||||
LOG.info('Setting new availability status: %s '
|
||||
'on subcloud: %s' %
|
||||
(avail_to_set, subcloud_name))
|
||||
self._update_subcloud_availability(
|
||||
subcloud_name,
|
||||
availability_status=avail_to_set,
|
||||
audit_fail_count=audit_fail_count)
|
||||
|
||||
elif audit_fail_count != subcloud.audit_fail_count:
|
||||
self._update_subcloud_availability(
|
||||
subcloud_name,
|
||||
availability_status=None,
|
||||
audit_fail_count=audit_fail_count)
|
||||
|
||||
elif update_subcloud_state:
|
||||
# Nothing has changed, but we want to send a state update for this
|
||||
# subcloud as an audit.
|
||||
self._update_subcloud_availability(
|
||||
subcloud_name,
|
||||
availability_status=avail_status_current,
|
||||
update_state_only=True)
|
||||
self.audit_count = 0
|
||||
|
||||
if avail_to_set == consts.AVAILABILITY_ONLINE:
|
||||
# If subcloud is online, get alarm summary and store in db,
|
||||
if fm_client:
|
||||
self.alarm_aggr.update_alarm_summary(subcloud_name, fm_client)
|
||||
|
||||
# Audit openstack application in the subcloud
|
||||
if audit_openstack and sysinv_client:
|
||||
self._audit_subcloud_openstack_app(
|
||||
subcloud_name, sysinv_client, subcloud.openstack_installed)
|
63
distributedcloud/dcmanager/cmd/audit.py
Normal file
63
distributedcloud/dcmanager/cmd/audit.py
Normal file
@@ -0,0 +1,63 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# Copyright (c) 2020 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
||||
# of an applicable Wind River license agreement.
|
||||
#
|
||||
|
||||
"""
|
||||
DC Manager Audit Service.
|
||||
"""
|
||||
|
||||
import eventlet
|
||||
eventlet.monkey_patch()
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_i18n import _lazy
|
||||
from oslo_log import log as logging
|
||||
from oslo_service import service
|
||||
|
||||
from dcmanager.common import config
|
||||
from dcmanager.common import messaging
|
||||
|
||||
|
||||
_lazy.enable_lazy()
|
||||
config.register_options()
|
||||
config.register_keystone_options()
|
||||
LOG = logging.getLogger('dcmanager.audit')
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
||||
def main():
|
||||
logging.register_options(CONF)
|
||||
CONF(project='dcmanager', prog='dcmanager-audit')
|
||||
logging.setup(cfg.CONF, 'dcmanager-audit')
|
||||
logging.set_defaults()
|
||||
messaging.setup()
|
||||
|
||||
from dcmanager.audit import service as audit
|
||||
|
||||
srv = audit.DCManagerAuditService()
|
||||
launcher = service.launch(cfg.CONF,
|
||||
srv, workers=CONF.audit_workers)
|
||||
|
||||
LOG.info("Configuration:")
|
||||
cfg.CONF.log_opt_values(LOG, logging.INFO)
|
||||
|
||||
launcher.wait()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -11,7 +11,7 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# Copyright (c) 2017 Wind River Systems, Inc.
|
||||
# Copyright (c) 2017-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
||||
@@ -112,7 +112,7 @@ scheduler_opts = [
|
||||
default=True,
|
||||
help='boolean value for enable/disable periodic tasks'),
|
||||
cfg.IntOpt('subcloud_audit_interval',
|
||||
default=180,
|
||||
default=20,
|
||||
help='periodic time interval for subcloud audit'),
|
||||
cfg.IntOpt('patch_audit_interval',
|
||||
default=10,
|
||||
@@ -122,6 +122,8 @@ scheduler_opts = [
|
||||
common_opts = [
|
||||
cfg.IntOpt('workers', default=1,
|
||||
help='number of workers'),
|
||||
cfg.IntOpt('audit_workers', default=1,
|
||||
help='number of audit workers'),
|
||||
cfg.StrOpt('host',
|
||||
default='localhost',
|
||||
help='hostname of the machine')
|
||||
|
@@ -23,6 +23,8 @@ RPC_API_VERSION = "1.0"
|
||||
|
||||
TOPIC_DC_MANAGER = "dcmanager"
|
||||
|
||||
TOPIC_DC_MANAGER_AUDIT = "dcmanager-audit"
|
||||
|
||||
PATCH_VAULT_DIR = "/opt/dc-vault/patches"
|
||||
|
||||
# Well known region names
|
||||
|
@@ -10,7 +10,7 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# Copyright (c) 2017 Wind River Systems, Inc.
|
||||
# Copyright (c) 2017-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
@@ -36,10 +36,10 @@ from dccommon.drivers.openstack import vim
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common import context
|
||||
from dcmanager.common import exceptions
|
||||
from dcmanager.common import scheduler
|
||||
from dcmanager.common import utils
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.manager.patch_audit_manager import PatchAuditManager
|
||||
from dcmanager.manager import scheduler
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
@@ -34,9 +34,8 @@ from dcmanager.common import context
|
||||
from dcmanager.common import exceptions
|
||||
from dcmanager.common.i18n import _
|
||||
from dcmanager.common import messaging as rpc_messaging
|
||||
from dcmanager.common import scheduler
|
||||
from dcmanager.manager.patch_audit_manager import PatchAuditManager
|
||||
from dcmanager.manager import scheduler
|
||||
from dcmanager.manager.subcloud_audit_manager import SubcloudAuditManager
|
||||
from dcmanager.manager.subcloud_manager import SubcloudManager
|
||||
from dcmanager.manager.sw_update_manager import SwUpdateManager
|
||||
|
||||
@@ -81,7 +80,6 @@ class DCManagerService(service.Service):
|
||||
self.target = None
|
||||
self._rpc_server = None
|
||||
self.subcloud_manager = None
|
||||
self.subcloud_audit_manager = None
|
||||
self.sw_update_manager = None
|
||||
self.patch_audit_manager = None
|
||||
|
||||
@@ -89,8 +87,6 @@ class DCManagerService(service.Service):
|
||||
self.TG = scheduler.ThreadGroupManager()
|
||||
|
||||
def init_audit_managers(self):
|
||||
self.subcloud_audit_manager = SubcloudAuditManager(
|
||||
subcloud_manager=self.subcloud_manager)
|
||||
self.patch_audit_manager = PatchAuditManager(
|
||||
subcloud_manager=self.subcloud_manager)
|
||||
|
||||
@@ -116,18 +112,9 @@ class DCManagerService(service.Service):
|
||||
super(DCManagerService, self).start()
|
||||
if self.periodic_enable:
|
||||
LOG.info("Adding periodic tasks for the manager to perform")
|
||||
self.TG.add_timer(cfg.CONF.scheduler.subcloud_audit_interval,
|
||||
self.subcloud_audit, initial_delay=10)
|
||||
self.TG.add_timer(cfg.CONF.scheduler.patch_audit_interval,
|
||||
self.patch_audit, initial_delay=60)
|
||||
|
||||
def subcloud_audit(self):
|
||||
# Audit availability of all subclouds.
|
||||
# Note this will run in a separate green thread
|
||||
LOG.debug("Subcloud audit job started at: %s",
|
||||
time.strftime("%c"))
|
||||
self.subcloud_audit_manager.periodic_subcloud_audit()
|
||||
|
||||
def patch_audit(self):
|
||||
# Audit patch status of all subclouds.
|
||||
# Note this will run in a separate green thread
|
||||
@@ -189,6 +176,32 @@ class DCManagerService(service.Service):
|
||||
|
||||
return
|
||||
|
||||
@request_context
|
||||
def update_subcloud_availability(self, context,
|
||||
subcloud_name,
|
||||
availability_status,
|
||||
update_state_only=False,
|
||||
audit_fail_count=None):
|
||||
# Updates subcloud availability
|
||||
LOG.info("Handling update_subcloud_availability request for: %s" %
|
||||
subcloud_name)
|
||||
self.subcloud_manager.update_subcloud_availability(
|
||||
context,
|
||||
subcloud_name,
|
||||
availability_status,
|
||||
update_state_only,
|
||||
audit_fail_count)
|
||||
|
||||
@request_context
|
||||
def update_subcloud_sync_endpoint_type(self, context, subcloud_name,
|
||||
endpoint_type_list,
|
||||
openstack_installed):
|
||||
# Updates subcloud sync endpoint type
|
||||
LOG.info("Handling update_subcloud_sync_endpoint_type request for: %s"
|
||||
% subcloud_name)
|
||||
self.subcloud_manager.update_subcloud_sync_endpoint_type(
|
||||
context, subcloud_name, endpoint_type_list, openstack_installed)
|
||||
|
||||
@request_context
|
||||
def create_sw_update_strategy(self, context, payload):
|
||||
# Creates a software update strategy
|
||||
|
@@ -1,422 +0,0 @@
|
||||
# Copyright 2017 Ericsson AB.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Copyright (c) 2017-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
||||
# of an applicable Wind River license agreement.
|
||||
#
|
||||
|
||||
from keystoneauth1 import exceptions as keystone_exceptions
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
from fm_api import constants as fm_const
|
||||
from fm_api import fm_api
|
||||
from sysinv.common import constants as sysinv_constants
|
||||
|
||||
from dccommon import consts as dccommon_consts
|
||||
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
||||
from dcorch.rpc import client as dcorch_rpc_client
|
||||
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common import context
|
||||
from dcmanager.common import exceptions
|
||||
from dcmanager.common.i18n import _
|
||||
from dcmanager.common import manager
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.manager import alarm_aggregation
|
||||
from dcmanager.manager import scheduler
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
# We will update the state of each subcloud in the dcorch about once per hour.
|
||||
# Calculate how many iterations that will be.
|
||||
SUBCLOUD_STATE_UPDATE_ITERATIONS = \
|
||||
dccommon_consts.SECONDS_IN_HOUR / CONF.scheduler.subcloud_audit_interval
|
||||
|
||||
|
||||
class SubcloudAuditManager(manager.Manager):
|
||||
"""Manages tasks related to audits."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
LOG.debug(_('SubcloudAuditManager initialization...'))
|
||||
|
||||
super(SubcloudAuditManager, self).__init__(
|
||||
service_name="subcloud_audit_manager")
|
||||
self.context = context.get_admin_context()
|
||||
self.dcorch_rpc_client = dcorch_rpc_client.EngineClient()
|
||||
self.fm_api = fm_api.FaultAPIs()
|
||||
self.subcloud_manager = kwargs['subcloud_manager']
|
||||
# Keeps track of greenthreads we create to do work.
|
||||
self.thread_group_manager = scheduler.ThreadGroupManager(
|
||||
thread_pool_size=100)
|
||||
# Track workers created for each subcloud.
|
||||
self.subcloud_workers = dict()
|
||||
# Number of audits since last subcloud state update
|
||||
self.audit_count = 0
|
||||
self.alarm_aggr = alarm_aggregation.AlarmAggregation(self.context)
|
||||
|
||||
def periodic_subcloud_audit(self):
|
||||
"""Audit availability of subclouds."""
|
||||
|
||||
# Blanket catch all exceptions in the audit so that the audit
|
||||
# does not die.
|
||||
try:
|
||||
self._periodic_subcloud_audit_loop()
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
|
||||
def _periodic_subcloud_audit_loop(self):
|
||||
"""Audit availability of subclouds loop."""
|
||||
|
||||
# We will be running in our own green thread here.
|
||||
LOG.info('Triggered subcloud audit.')
|
||||
self.audit_count += 1
|
||||
|
||||
# Determine whether to trigger a state update to each subcloud
|
||||
if self.audit_count >= SUBCLOUD_STATE_UPDATE_ITERATIONS:
|
||||
update_subcloud_state = True
|
||||
else:
|
||||
update_subcloud_state = False
|
||||
|
||||
# Determine whether OpenStack is installed in central cloud
|
||||
os_client = OpenStackDriver(region_name=consts.DEFAULT_REGION_NAME,
|
||||
thread_name='dcmanager')
|
||||
sysinv_client = os_client.sysinv_client
|
||||
# This could be optimized in the future by attempting to get just the
|
||||
# one application. However, sysinv currently treats this as a failure
|
||||
# if the application is not installed and generates warning logs, so it
|
||||
# would require changes to handle this gracefully.
|
||||
apps = sysinv_client.get_applications()
|
||||
openstack_installed = False
|
||||
for app in apps:
|
||||
if app.name == sysinv_constants.HELM_APP_OPENSTACK and app.active:
|
||||
openstack_installed = True
|
||||
break
|
||||
|
||||
for subcloud in db_api.subcloud_get_all(self.context):
|
||||
if (subcloud.deploy_status not in
|
||||
[consts.DEPLOY_STATE_DONE,
|
||||
consts.DEPLOY_STATE_DEPLOYING,
|
||||
consts.DEPLOY_STATE_DEPLOY_FAILED]):
|
||||
LOG.debug("Skip subcloud %s audit, deploy_status: %s" %
|
||||
(subcloud.name, subcloud.deploy_status))
|
||||
continue
|
||||
# Create a new greenthread for each subcloud to allow the audits
|
||||
# to be done in parallel. If there are not enough greenthreads
|
||||
# in the pool, this will block until one becomes available.
|
||||
self.subcloud_workers[subcloud.name] = \
|
||||
self.thread_group_manager.start(self._audit_subcloud,
|
||||
subcloud.name,
|
||||
update_subcloud_state,
|
||||
openstack_installed)
|
||||
|
||||
# Wait for all greenthreads to complete
|
||||
LOG.info('Waiting for subcloud audits to complete.')
|
||||
for thread in self.subcloud_workers.values():
|
||||
thread.wait()
|
||||
|
||||
# Clear the list of workers before next audit
|
||||
self.subcloud_workers = dict()
|
||||
LOG.info('All subcloud audits have completed.')
|
||||
|
||||
def _audit_subcloud(self, subcloud_name, update_subcloud_state,
|
||||
audit_openstack):
|
||||
"""Audit a single subcloud."""
|
||||
|
||||
# Retrieve the subcloud
|
||||
try:
|
||||
subcloud = db_api.subcloud_get_by_name(self.context, subcloud_name)
|
||||
except exceptions.SubcloudNotFound:
|
||||
# Possibility subcloud could have been deleted since the list of
|
||||
# subclouds to audit was created.
|
||||
LOG.info('Ignoring SubcloudNotFound when auditing subcloud %s' %
|
||||
subcloud_name)
|
||||
return
|
||||
|
||||
# For each subcloud, if at least one service is active in
|
||||
# each service of servicegroup-list then declare the subcloud online.
|
||||
|
||||
subcloud_id = subcloud.id
|
||||
avail_status_current = subcloud.availability_status
|
||||
audit_fail_count = subcloud.audit_fail_count
|
||||
|
||||
# Set defaults to None and disabled so we will still set disabled
|
||||
# status if we encounter an error.
|
||||
|
||||
sysinv_client = None
|
||||
fm_client = None
|
||||
svc_groups = None
|
||||
avail_to_set = consts.AVAILABILITY_OFFLINE
|
||||
|
||||
try:
|
||||
os_client = OpenStackDriver(region_name=subcloud_name,
|
||||
thread_name='dcmanager')
|
||||
sysinv_client = os_client.sysinv_client
|
||||
fm_client = os_client.fm_client
|
||||
except (keystone_exceptions.EndpointNotFound,
|
||||
keystone_exceptions.ConnectFailure,
|
||||
keystone_exceptions.ConnectTimeout,
|
||||
IndexError):
|
||||
if avail_status_current == consts.AVAILABILITY_OFFLINE:
|
||||
LOG.info("Identity or Platform endpoint for %s not "
|
||||
"found, ignoring for offline "
|
||||
"subcloud." % subcloud_name)
|
||||
return
|
||||
else:
|
||||
# The subcloud will be marked as offline below.
|
||||
LOG.error("Identity or Platform endpoint for online "
|
||||
"subcloud: %s not found." % subcloud_name)
|
||||
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
|
||||
if sysinv_client:
|
||||
# get a list of service groups in the subcloud
|
||||
try:
|
||||
svc_groups = sysinv_client.get_service_groups()
|
||||
except Exception as e:
|
||||
svc_groups = None
|
||||
LOG.warn('Cannot retrieve service groups for '
|
||||
'subcloud: %s, %s' % (subcloud_name, e))
|
||||
|
||||
if svc_groups:
|
||||
active_sgs = []
|
||||
inactive_sgs = []
|
||||
|
||||
# Build 2 lists, 1 of active service groups,
|
||||
# one with non-active.
|
||||
for sg in svc_groups:
|
||||
if sg.state != consts.SERVICE_GROUP_STATUS_ACTIVE:
|
||||
inactive_sgs.append(sg.service_group_name)
|
||||
else:
|
||||
active_sgs.append(sg.service_group_name)
|
||||
|
||||
# Create a list of service groups that are only present
|
||||
# in non-active list
|
||||
inactive_only = [sg for sg in inactive_sgs if
|
||||
sg not in active_sgs]
|
||||
|
||||
# An empty inactive only list and a non-empty active list
|
||||
# means we're good to go.
|
||||
if not inactive_only and active_sgs:
|
||||
avail_to_set = \
|
||||
consts.AVAILABILITY_ONLINE
|
||||
else:
|
||||
LOG.info("Subcloud:%s has non-active "
|
||||
"service groups: %s" %
|
||||
(subcloud_name, inactive_only))
|
||||
|
||||
if avail_to_set == consts.AVAILABILITY_OFFLINE:
|
||||
if audit_fail_count < consts.AVAIL_FAIL_COUNT_MAX:
|
||||
audit_fail_count = audit_fail_count + 1
|
||||
|
||||
if (avail_status_current == consts.AVAILABILITY_ONLINE) and \
|
||||
(audit_fail_count < consts.AVAIL_FAIL_COUNT_TO_ALARM):
|
||||
# Do not set offline until we have failed audit
|
||||
# the requisite number of times
|
||||
avail_to_set = consts.AVAILABILITY_ONLINE
|
||||
else:
|
||||
# In the case of a one off blip, we may need to set the
|
||||
# fail count back to 0
|
||||
audit_fail_count = 0
|
||||
|
||||
if avail_to_set != avail_status_current:
|
||||
|
||||
if avail_to_set == consts.AVAILABILITY_ONLINE:
|
||||
audit_fail_count = 0
|
||||
|
||||
LOG.info('Setting new availability status: %s '
|
||||
'on subcloud: %s' %
|
||||
(avail_to_set, subcloud_name))
|
||||
|
||||
entity_instance_id = "subcloud=%s" % subcloud_name
|
||||
fault = self.fm_api.get_fault(
|
||||
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
||||
entity_instance_id)
|
||||
|
||||
if fault and (avail_to_set == consts.AVAILABILITY_ONLINE):
|
||||
try:
|
||||
self.fm_api.clear_fault(
|
||||
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
||||
entity_instance_id)
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
|
||||
elif not fault and \
|
||||
(avail_to_set == consts.AVAILABILITY_OFFLINE):
|
||||
try:
|
||||
fault = fm_api.Fault(
|
||||
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
||||
alarm_state=fm_const.FM_ALARM_STATE_SET,
|
||||
entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD,
|
||||
entity_instance_id=entity_instance_id,
|
||||
severity=fm_const.FM_ALARM_SEVERITY_CRITICAL,
|
||||
reason_text=('%s is offline' % subcloud_name),
|
||||
alarm_type=fm_const.FM_ALARM_TYPE_0,
|
||||
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_29,
|
||||
proposed_repair_action="Wait for subcloud to "
|
||||
"become online; if "
|
||||
"problem persists contact "
|
||||
"next level of support.",
|
||||
service_affecting=True)
|
||||
|
||||
self.fm_api.set_fault(fault)
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
|
||||
try:
|
||||
updated_subcloud = db_api.subcloud_update(
|
||||
self.context,
|
||||
subcloud_id,
|
||||
management_state=None,
|
||||
availability_status=avail_to_set,
|
||||
software_version=None,
|
||||
description=None, location=None,
|
||||
audit_fail_count=audit_fail_count)
|
||||
except exceptions.SubcloudNotFound:
|
||||
# slim possibility subcloud could have been deleted since
|
||||
# we found it in db, ignore this benign error.
|
||||
LOG.info('Ignoring SubcloudNotFound when attempting state'
|
||||
' update: %s' % subcloud_name)
|
||||
return
|
||||
|
||||
try:
|
||||
self.dcorch_rpc_client.\
|
||||
update_subcloud_states(self.context,
|
||||
subcloud_name,
|
||||
updated_subcloud.management_state,
|
||||
avail_to_set)
|
||||
|
||||
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
|
||||
'availability:%s' %
|
||||
(subcloud_name,
|
||||
updated_subcloud.management_state,
|
||||
avail_to_set))
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
LOG.warn('Problem informing dcorch of subcloud '
|
||||
'state change, subcloud: %s' % subcloud_name)
|
||||
|
||||
if avail_to_set == consts.AVAILABILITY_OFFLINE:
|
||||
# Subcloud is going offline, set all endpoint statuses to
|
||||
# unknown.
|
||||
try:
|
||||
self.subcloud_manager.update_subcloud_endpoint_status(
|
||||
self.context,
|
||||
subcloud_name=subcloud_name,
|
||||
endpoint_type=None,
|
||||
sync_status=consts.SYNC_STATUS_UNKNOWN)
|
||||
except exceptions.SubcloudNotFound:
|
||||
LOG.info('Ignoring SubcloudNotFound when attempting '
|
||||
'sync_status update: %s' % subcloud_name)
|
||||
return
|
||||
|
||||
elif audit_fail_count != subcloud.audit_fail_count:
|
||||
|
||||
try:
|
||||
db_api.subcloud_update(self.context, subcloud_id,
|
||||
management_state=None,
|
||||
availability_status=None,
|
||||
software_version=None,
|
||||
description=None, location=None,
|
||||
audit_fail_count=audit_fail_count)
|
||||
except exceptions.SubcloudNotFound:
|
||||
# slim possibility subcloud could have been deleted since
|
||||
# we found it in db, ignore this benign error.
|
||||
LOG.info('Ignoring SubcloudNotFound when attempting '
|
||||
'audit_fail_count update: %s' % subcloud_name)
|
||||
return
|
||||
|
||||
elif update_subcloud_state:
|
||||
# Nothing has changed, but we want to send a state update for this
|
||||
# subcloud as an audit. Get the most up-to-date data.
|
||||
subcloud = db_api.subcloud_get_by_name(self.context, subcloud_name)
|
||||
self.dcorch_rpc_client. \
|
||||
update_subcloud_states(self.context,
|
||||
subcloud_name,
|
||||
subcloud.management_state,
|
||||
subcloud.availability_status)
|
||||
self.audit_count = 0
|
||||
|
||||
# If subcloud is online, get alarm summary and store in db.
|
||||
subcloud = db_api.subcloud_get_by_name(self.context, subcloud_name)
|
||||
if (subcloud.availability_status == consts.AVAILABILITY_ONLINE) and \
|
||||
fm_client:
|
||||
self.alarm_aggr.update_alarm_summary(subcloud_name, fm_client)
|
||||
|
||||
# Audit openstack application in the subcloud
|
||||
if audit_openstack and sysinv_client:
|
||||
# get a list of installed apps in the subcloud
|
||||
try:
|
||||
apps = sysinv_client.get_applications()
|
||||
except Exception as e:
|
||||
LOG.warn('Cannot retrieve installed apps for '
|
||||
'subcloud:%s, %s' % (subcloud_name, e))
|
||||
return
|
||||
|
||||
openstack_installed = subcloud.openstack_installed
|
||||
openstack_installed_current = False
|
||||
for app in apps:
|
||||
if app.name == sysinv_constants.HELM_APP_OPENSTACK\
|
||||
and app.active:
|
||||
# audit find openstack app is installed and active in
|
||||
# the subcloud
|
||||
openstack_installed_current = True
|
||||
break
|
||||
|
||||
dcm_update_func = None
|
||||
dco_update_func = None
|
||||
if openstack_installed_current and not openstack_installed:
|
||||
dcm_update_func = db_api.subcloud_status_create
|
||||
# TODO(andy.ning): This RPC will block for the duration of the
|
||||
# initial sync. It needs to be made non-blocking.
|
||||
dco_update_func = self.dcorch_rpc_client.\
|
||||
add_subcloud_sync_endpoint_type
|
||||
elif not openstack_installed_current and openstack_installed:
|
||||
dcm_update_func = db_api.subcloud_status_delete
|
||||
dco_update_func = self.dcorch_rpc_client.\
|
||||
remove_subcloud_sync_endpoint_type
|
||||
|
||||
if dcm_update_func and dco_update_func:
|
||||
endpoint_type_list = dccommon_consts.ENDPOINT_TYPES_LIST_OS
|
||||
try:
|
||||
# Notify dcorch to add/remove sync endpoint type list
|
||||
dco_update_func(self.context, subcloud_name,
|
||||
endpoint_type_list)
|
||||
LOG.info('Notifying dcorch, subcloud: %s new sync'
|
||||
' endpoint: %s' % (subcloud_name,
|
||||
endpoint_type_list))
|
||||
# Update subcloud status table by adding/removing
|
||||
# openstack sync endpoint types.
|
||||
for endpoint_type in endpoint_type_list:
|
||||
dcm_update_func(self.context, subcloud_id,
|
||||
endpoint_type)
|
||||
# Update openstack_installed of subcloud table
|
||||
db_api.subcloud_update(
|
||||
self.context, subcloud_id,
|
||||
openstack_installed=openstack_installed_current)
|
||||
except exceptions.SubcloudNotFound:
|
||||
LOG.info('Ignoring SubcloudNotFound when attempting'
|
||||
' openstack_installed update: %s'
|
||||
% subcloud_name)
|
||||
except Exception as e:
|
||||
LOG.exception(e)
|
||||
LOG.warn('Problem informing dcorch of subcloud '
|
||||
'sync endpoint type change, subcloud: %s'
|
||||
% subcloud_name)
|
@@ -1041,3 +1041,153 @@ class SubcloudManager(manager.Manager):
|
||||
self._update_subcloud_endpoint_status(
|
||||
context, subcloud.name, endpoint_type, sync_status,
|
||||
alarmable)
|
||||
|
||||
def _update_subcloud_state(self, context, subcloud_name,
|
||||
management_state, availability_status):
|
||||
try:
|
||||
self.dcorch_rpc_client.update_subcloud_states(
|
||||
context, subcloud_name, management_state, availability_status)
|
||||
|
||||
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
|
||||
'availability:%s' %
|
||||
(subcloud_name,
|
||||
management_state,
|
||||
availability_status))
|
||||
except Exception:
|
||||
LOG.exception('Problem informing dcorch of subcloud state change,'
|
||||
'subcloud: %s' % subcloud_name)
|
||||
|
||||
def _raise_or_clear_subcloud_status_alarm(self, subcloud_name,
|
||||
availability_status):
|
||||
entity_instance_id = "subcloud=%s" % subcloud_name
|
||||
fault = self.fm_api.get_fault(
|
||||
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
||||
entity_instance_id)
|
||||
|
||||
if fault and (availability_status == consts.AVAILABILITY_ONLINE):
|
||||
try:
|
||||
self.fm_api.clear_fault(
|
||||
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
||||
entity_instance_id)
|
||||
except Exception:
|
||||
LOG.exception("Failed to clear offline alarm for subcloud: %s",
|
||||
subcloud_name)
|
||||
|
||||
elif not fault and \
|
||||
(availability_status == consts.AVAILABILITY_OFFLINE):
|
||||
try:
|
||||
fault = fm_api.Fault(
|
||||
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
||||
alarm_state=fm_const.FM_ALARM_STATE_SET,
|
||||
entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD,
|
||||
entity_instance_id=entity_instance_id,
|
||||
|
||||
severity=fm_const.FM_ALARM_SEVERITY_CRITICAL,
|
||||
reason_text=('%s is offline' % subcloud_name),
|
||||
alarm_type=fm_const.FM_ALARM_TYPE_0,
|
||||
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_29,
|
||||
proposed_repair_action="Wait for subcloud to "
|
||||
"become online; if "
|
||||
"problem persists contact "
|
||||
"next level of support.",
|
||||
service_affecting=True)
|
||||
|
||||
self.fm_api.set_fault(fault)
|
||||
except Exception:
|
||||
LOG.exception("Failed to raise offline alarm for subcloud: %s",
|
||||
subcloud_name)
|
||||
|
||||
def update_subcloud_availability(self, context, subcloud_name,
|
||||
availability_status,
|
||||
update_state_only=False,
|
||||
audit_fail_count=None):
|
||||
try:
|
||||
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
|
||||
except Exception:
|
||||
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
|
||||
|
||||
if update_state_only:
|
||||
# Nothing has changed, but we want to send a state update for this
|
||||
# subcloud as an audit. Get the most up-to-date data.
|
||||
self._update_subcloud_state(context, subcloud_name,
|
||||
subcloud.management_state,
|
||||
availability_status)
|
||||
elif availability_status is None:
|
||||
# only update the audit fail count
|
||||
try:
|
||||
db_api.subcloud_update(self.context, subcloud.id,
|
||||
audit_fail_count=audit_fail_count)
|
||||
except exceptions.SubcloudNotFound:
|
||||
# slim possibility subcloud could have been deleted since
|
||||
# we found it in db, ignore this benign error.
|
||||
LOG.info('Ignoring SubcloudNotFound when attempting '
|
||||
'audit_fail_count update: %s' % subcloud_name)
|
||||
return
|
||||
else:
|
||||
self._raise_or_clear_subcloud_status_alarm(subcloud_name,
|
||||
availability_status)
|
||||
|
||||
if availability_status == consts.AVAILABILITY_OFFLINE:
|
||||
# Subcloud is going offline, set all endpoint statuses to
|
||||
# unknown.
|
||||
self._update_subcloud_endpoint_status(
|
||||
context, subcloud_name, endpoint_type=None,
|
||||
sync_status=consts.SYNC_STATUS_UNKNOWN)
|
||||
|
||||
try:
|
||||
updated_subcloud = db_api.subcloud_update(
|
||||
context,
|
||||
subcloud.id,
|
||||
availability_status=availability_status,
|
||||
audit_fail_count=audit_fail_count)
|
||||
except exceptions.SubcloudNotFound:
|
||||
# slim possibility subcloud could have been deleted since
|
||||
# we found it in db, ignore this benign error.
|
||||
LOG.info('Ignoring SubcloudNotFound when attempting state'
|
||||
' update: %s' % subcloud_name)
|
||||
return
|
||||
|
||||
# Send dcorch a state update
|
||||
self._update_subcloud_state(context, subcloud_name,
|
||||
updated_subcloud.management_state,
|
||||
availability_status)
|
||||
|
||||
def update_subcloud_sync_endpoint_type(self, context,
|
||||
subcloud_name,
|
||||
endpoint_type_list,
|
||||
openstack_installed):
|
||||
operation = 'add' if openstack_installed else 'remove'
|
||||
func_switcher = {
|
||||
'add': (
|
||||
self.dcorch_rpc_client.add_subcloud_sync_endpoint_type,
|
||||
db_api.subcloud_status_create
|
||||
),
|
||||
'remove': (
|
||||
self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type,
|
||||
db_api.subcloud_status_delete
|
||||
)
|
||||
}
|
||||
|
||||
try:
|
||||
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
|
||||
except Exception:
|
||||
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
|
||||
|
||||
try:
|
||||
# Notify dcorch to add/remove sync endpoint type list
|
||||
func_switcher[operation][0](self.context, subcloud_name,
|
||||
endpoint_type_list)
|
||||
LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' %
|
||||
(subcloud_name, endpoint_type_list))
|
||||
|
||||
# Update subcloud status table by adding/removing openstack sync
|
||||
# endpoint types
|
||||
for endpoint_type in endpoint_type_list:
|
||||
func_switcher[operation][1](self.context, subcloud.id,
|
||||
endpoint_type)
|
||||
# Update openstack_installed of subcloud table
|
||||
db_api.subcloud_update(self.context, subcloud.id,
|
||||
openstack_installed=openstack_installed)
|
||||
except Exception:
|
||||
LOG.exception('Problem informing dcorch of subcloud sync endpoint'
|
||||
' type change, subcloud: %s' % subcloud_name)
|
||||
|
@@ -30,9 +30,9 @@ from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common import context
|
||||
from dcmanager.common import exceptions
|
||||
from dcmanager.common import scheduler
|
||||
from dcmanager.db import api as db_api
|
||||
from dcmanager.manager.patch_audit_manager import PatchAuditManager
|
||||
from dcmanager.manager import scheduler
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
@@ -89,6 +89,31 @@ class ManagerClient(object):
|
||||
endpoint_type=endpoint_type,
|
||||
sync_status=sync_status))
|
||||
|
||||
def update_subcloud_availability(self, ctxt,
|
||||
subcloud_name,
|
||||
availability_status,
|
||||
update_state_only=False,
|
||||
audit_fail_count=None):
|
||||
return self.call(
|
||||
ctxt,
|
||||
self.make_msg('update_subcloud_availability',
|
||||
subcloud_name=subcloud_name,
|
||||
availability_status=availability_status,
|
||||
update_state_only=update_state_only,
|
||||
audit_fail_count=audit_fail_count))
|
||||
|
||||
def update_subcloud_sync_endpoint_type(self, ctxt, subcloud_id,
|
||||
subcloud_name,
|
||||
endpoint_type_list,
|
||||
openstack_installed):
|
||||
return self.cast(
|
||||
ctxt,
|
||||
self.make_msg('update_subcloud_sync_endpoint_type',
|
||||
subcloud_id=subcloud_id,
|
||||
subcloud_name=subcloud_name,
|
||||
endpoint_type_list=endpoint_type_list,
|
||||
openstack_installed=openstack_installed))
|
||||
|
||||
def create_sw_update_strategy(self, ctxt, payload):
|
||||
return self.call(ctxt, self.make_msg('create_sw_update_strategy',
|
||||
payload=payload))
|
||||
|
@@ -20,8 +20,8 @@
|
||||
import mock
|
||||
|
||||
from dccommon.drivers.openstack import sdk_platform as sdk
|
||||
from dcmanager.audit import alarm_aggregation
|
||||
from dcmanager.common import exceptions
|
||||
from dcmanager.manager import alarm_aggregation
|
||||
from dcmanager.tests import base
|
||||
from dcmanager.tests import utils
|
||||
|
45
distributedcloud/dcmanager/tests/unit/audit/test_service.py
Normal file
45
distributedcloud/dcmanager/tests/unit/audit/test_service.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# Copyright (c) 2020 Wind River Systems, Inc.
|
||||
#
|
||||
# The right to copy, distribute, modify, or otherwise make use
|
||||
# of this software may be licensed only pursuant to the terms
|
||||
# of an applicable Wind River license agreement.
|
||||
#
|
||||
|
||||
from dcmanager.audit import service
|
||||
from dcmanager.common import scheduler
|
||||
from dcmanager.tests import base
|
||||
from dcmanager.tests import utils
|
||||
from oslo_config import cfg
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
||||
class TestDCManagerAuditService(base.DCManagerTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestDCManagerAuditService, self).setUp()
|
||||
self.tenant_id = 'fake_admin'
|
||||
self.thm = scheduler.ThreadGroupManager()
|
||||
self.context = utils.dummy_context(user='test_user',
|
||||
tenant=self.tenant_id)
|
||||
self.service_obj = service.DCManagerAuditService()
|
||||
|
||||
def test_init(self):
|
||||
self.assertEqual(self.service_obj.host, 'localhost')
|
||||
self.assertEqual(self.service_obj.topic, 'dcmanager-audit')
|
||||
|
||||
def test_init_tgm(self):
|
||||
self.service_obj.init_tgm()
|
||||
self.assertIsNotNone(self.service_obj.TG)
|
@@ -24,19 +24,19 @@ import sys
|
||||
sys.modules['fm_core'] = mock.Mock()
|
||||
|
||||
from dccommon import consts as dccommon_consts
|
||||
from dcmanager.audit import subcloud_audit_manager
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.db.sqlalchemy import api as db_api
|
||||
from dcmanager.manager import subcloud_audit_manager
|
||||
from dcmanager.manager import subcloud_manager
|
||||
# from dcmanager.manager import subcloud_manager
|
||||
|
||||
from dcmanager.tests import base
|
||||
|
||||
|
||||
class FakeDCOrchAPI(object):
|
||||
class FakeDCManagerAPI(object):
|
||||
|
||||
def __init__(self):
|
||||
self.update_subcloud_states = mock.MagicMock()
|
||||
self.add_subcloud_sync_endpoint_type = mock.MagicMock()
|
||||
self.update_subcloud_availability = mock.MagicMock()
|
||||
self.update_subcloud_sync_endpoint_type = mock.MagicMock()
|
||||
|
||||
|
||||
class FakeAlarmAggregation(object):
|
||||
@@ -201,11 +201,11 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
def setUp(self):
|
||||
super(TestAuditManager, self).setUp()
|
||||
|
||||
# Mock the DCOrch API
|
||||
self.fake_dcorch_api = FakeDCOrchAPI()
|
||||
p = mock.patch('dcorch.rpc.client.EngineClient')
|
||||
self.mock_dcorch_api = p.start()
|
||||
self.mock_dcorch_api.return_value = self.fake_dcorch_api
|
||||
# Mock the DCManager API
|
||||
self.fake_dcmanager_api = FakeDCManagerAPI()
|
||||
p = mock.patch('dcmanager.rpc.client.ManagerClient')
|
||||
self.mock_dcmanager_api = p.start()
|
||||
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
# Mock the OpenStackDriver
|
||||
@@ -250,63 +250,46 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
return db_api.subcloud_create(ctxt, **values)
|
||||
|
||||
def test_init(self):
|
||||
sm = subcloud_manager.SubcloudManager()
|
||||
am = subcloud_audit_manager.SubcloudAuditManager(subcloud_manager=sm)
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
self.assertIsNotNone(am)
|
||||
self.assertEqual('subcloud_audit_manager', am.service_name)
|
||||
self.assertEqual('localhost', am.host)
|
||||
self.assertEqual(self.ctx, am.context)
|
||||
|
||||
def test_periodic_subcloud_audit(self):
|
||||
mock_sm = mock.Mock()
|
||||
am = subcloud_audit_manager.SubcloudAuditManager(
|
||||
subcloud_manager=mock_sm)
|
||||
am.periodic_subcloud_audit()
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
am._periodic_subcloud_audit_loop()
|
||||
|
||||
def test_audit_subcloud_online(self):
|
||||
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
mock_sm = mock.Mock()
|
||||
am = subcloud_audit_manager.SubcloudAuditManager(
|
||||
subcloud_manager=mock_sm)
|
||||
|
||||
# No stx-openstack application
|
||||
self.fake_openstack_client.sysinv_client.get_application_results = []
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
|
||||
# Audit the subcloud
|
||||
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
|
||||
audit_openstack=False)
|
||||
|
||||
# Verify the subcloud was set to online
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_called_with(
|
||||
mock.ANY, subcloud.name, consts.MANAGEMENT_UNMANAGED,
|
||||
consts.AVAILABILITY_ONLINE)
|
||||
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
|
||||
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
|
||||
False, 0)
|
||||
|
||||
# Verify the openstack endpoints were not added
|
||||
self.fake_dcorch_api.add_subcloud_sync_endpoint_type.\
|
||||
# Verify the openstack endpoints were not updated
|
||||
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify the subcloud openstack_installed was not updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.openstack_installed, False)
|
||||
|
||||
# Verify alarm update is called
|
||||
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
|
||||
'subcloud1', self.fake_openstack_client.fm_client)
|
||||
subcloud.name, self.fake_openstack_client.fm_client)
|
||||
|
||||
def test_audit_subcloud_online_no_change(self):
|
||||
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
mock_sm = mock.Mock()
|
||||
am = subcloud_audit_manager.SubcloudAuditManager(
|
||||
subcloud_manager=mock_sm)
|
||||
|
||||
# No stx-openstack application
|
||||
self.fake_openstack_client.sysinv_client.get_application_results = []
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
|
||||
# Set the subcloud to online
|
||||
db_api.subcloud_update(
|
||||
@@ -318,15 +301,12 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
audit_openstack=False)
|
||||
|
||||
# Verify the subcloud state was not updated
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_not_called()
|
||||
|
||||
# Verify the openstack endpoints were not added
|
||||
self.fake_dcorch_api.add_subcloud_sync_endpoint_type.\
|
||||
self.fake_dcmanager_api.update_subcloud_availability.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify the subcloud openstack_installed was not updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.openstack_installed, False)
|
||||
# Verify the openstack endpoints were not added
|
||||
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify alarm update is called
|
||||
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
|
||||
@@ -337,12 +317,7 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
mock_sm = mock.Mock()
|
||||
am = subcloud_audit_manager.SubcloudAuditManager(
|
||||
subcloud_manager=mock_sm)
|
||||
|
||||
# No stx-openstack application
|
||||
self.fake_openstack_client.sysinv_client.get_application_results = []
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
|
||||
# Set the subcloud to online
|
||||
db_api.subcloud_update(
|
||||
@@ -354,30 +329,24 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
audit_openstack=False)
|
||||
|
||||
# Verify the subcloud state was updated even though no change
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_called_with(
|
||||
mock.ANY, 'subcloud1', consts.MANAGEMENT_UNMANAGED,
|
||||
consts.AVAILABILITY_ONLINE)
|
||||
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
|
||||
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
|
||||
True, None)
|
||||
|
||||
# Verify the openstack endpoints were not added
|
||||
self.fake_dcorch_api.add_subcloud_sync_endpoint_type.\
|
||||
# Verify the openstack endpoints were not updated
|
||||
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify the subcloud openstack_installed was not updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.openstack_installed, False)
|
||||
|
||||
# Verify alarm update is called
|
||||
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
|
||||
'subcloud1', self.fake_openstack_client.fm_client)
|
||||
|
||||
def test_audit_subcloud_offline(self):
|
||||
def test_audit_subcloud_go_offline(self):
|
||||
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
mock_sm = mock.Mock()
|
||||
am = subcloud_audit_manager.SubcloudAuditManager(
|
||||
subcloud_manager=mock_sm)
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
|
||||
# Set the subcloud to online
|
||||
db_api.subcloud_update(
|
||||
@@ -394,54 +363,152 @@ class TestAuditManager(base.DCManagerTestCase):
|
||||
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
|
||||
audit_openstack=False)
|
||||
|
||||
# Verify the subcloud was not set to offline
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_not_called()
|
||||
# Verify the audit fail count was updated
|
||||
audit_fail_count = 1
|
||||
self.fake_dcmanager_api.update_subcloud_availability.\
|
||||
assert_called_with(mock.ANY, subcloud.name,
|
||||
None, False, audit_fail_count)
|
||||
|
||||
# Verify the audit_fail_count was updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.audit_fail_count, 1)
|
||||
db_api.subcloud_update(self.ctx, subcloud.id,
|
||||
audit_fail_count=audit_fail_count)
|
||||
|
||||
# Audit the subcloud again
|
||||
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
|
||||
audit_openstack=False)
|
||||
audit_fail_count = audit_fail_count + 1
|
||||
|
||||
# Verify the subcloud was set to offline
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_called_with(
|
||||
mock.ANY, 'subcloud1', consts.MANAGEMENT_UNMANAGED,
|
||||
consts.AVAILABILITY_OFFLINE)
|
||||
self.fake_dcmanager_api.update_subcloud_availability.\
|
||||
assert_called_with(mock.ANY, subcloud.name,
|
||||
consts.AVAILABILITY_OFFLINE, False,
|
||||
audit_fail_count)
|
||||
|
||||
# Verify the sublcoud availability was updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.availability_status,
|
||||
consts.AVAILABILITY_OFFLINE)
|
||||
|
||||
# Verify alarm update is called
|
||||
# Verify alarm update is called only once
|
||||
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
|
||||
'subcloud1', self.fake_openstack_client.fm_client)
|
||||
|
||||
def test_audit_subcloud_online_with_openstack(self):
|
||||
subcloud.name, self.fake_openstack_client.fm_client)
|
||||
|
||||
def test_audit_subcloud_offline_no_change(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
mock_sm = mock.Mock()
|
||||
am = subcloud_audit_manager.SubcloudAuditManager(
|
||||
subcloud_manager=mock_sm)
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
|
||||
db_api.subcloud_update(self.ctx, subcloud.id,
|
||||
audit_fail_count=consts.AVAIL_FAIL_COUNT_MAX)
|
||||
|
||||
# Mark a service group as inactive
|
||||
self.fake_openstack_client.sysinv_client.get_service_groups_result = \
|
||||
copy.deepcopy(FAKE_SERVICE_GROUPS)
|
||||
self.fake_openstack_client.sysinv_client. \
|
||||
get_service_groups_result[3].state = 'inactive'
|
||||
|
||||
# Audit the subcloud
|
||||
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
|
||||
audit_openstack=True)
|
||||
|
||||
# Verify the subcloud was set to online
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_called_with(
|
||||
mock.ANY, 'subcloud1', consts.MANAGEMENT_UNMANAGED,
|
||||
consts.AVAILABILITY_ONLINE)
|
||||
# Verify the subcloud state was not updated
|
||||
self.fake_dcmanager_api.update_subcloud_availability.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify the openstack endpoints were not updated
|
||||
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify alarm update is not called
|
||||
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
|
||||
|
||||
def test_audit_subcloud_online_with_openstack_installed(self):
|
||||
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
|
||||
# Set the subcloud to online
|
||||
db_api.subcloud_update(
|
||||
self.ctx, subcloud.id,
|
||||
availability_status=consts.AVAILABILITY_ONLINE)
|
||||
|
||||
# Audit the subcloud
|
||||
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
|
||||
audit_openstack=True)
|
||||
|
||||
# Verify the subcloud state was not updated
|
||||
self.fake_dcmanager_api.update_subcloud_availability.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify the openstack endpoints were added
|
||||
self.fake_dcorch_api.add_subcloud_sync_endpoint_type.\
|
||||
assert_called_with(mock.ANY, 'subcloud1',
|
||||
dccommon_consts.ENDPOINT_TYPES_LIST_OS)
|
||||
# self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
|
||||
# assert_called_with(mock.ANY, 'subcloud1',
|
||||
# dccommon_consts.ENDPOINT_TYPES_LIST_OS,
|
||||
# True)
|
||||
|
||||
# Verify the subcloud openstack_installed was updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.openstack_installed, True)
|
||||
# Verify alarm update is called
|
||||
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
|
||||
'subcloud1', self.fake_openstack_client.fm_client)
|
||||
|
||||
def test_audit_subcloud_online_with_openstack_removed(self):
|
||||
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
|
||||
# Set the subcloud to online and openstack installed
|
||||
db_api.subcloud_update(
|
||||
self.ctx, subcloud.id,
|
||||
availability_status=consts.AVAILABILITY_ONLINE,
|
||||
openstack_installed=True)
|
||||
|
||||
# Remove stx-openstack application
|
||||
FAKE_APPLICATIONS.pop(1)
|
||||
|
||||
# Audit the subcloud
|
||||
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
|
||||
audit_openstack=True)
|
||||
|
||||
# Verify the subcloud state was not updated
|
||||
self.fake_dcmanager_api.update_subcloud_availability.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify the openstack endpoints were removed
|
||||
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
|
||||
assert_called_with(mock.ANY, 'subcloud1',
|
||||
dccommon_consts.ENDPOINT_TYPES_LIST_OS, False)
|
||||
|
||||
# Verify alarm update is called
|
||||
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
|
||||
'subcloud1', self.fake_openstack_client.fm_client)
|
||||
|
||||
def test_audit_subcloud_online_with_openstack_inactive(self):
|
||||
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
am = subcloud_audit_manager.SubcloudAuditManager()
|
||||
|
||||
# Set the subcloud to online and openstack installed
|
||||
db_api.subcloud_update(
|
||||
self.ctx, subcloud.id,
|
||||
availability_status=consts.AVAILABILITY_ONLINE,
|
||||
openstack_installed=True)
|
||||
|
||||
# stx-openstack application is not active
|
||||
FAKE_APPLICATIONS[1].active = False
|
||||
|
||||
# Audit the subcloud
|
||||
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
|
||||
audit_openstack=True)
|
||||
|
||||
# Verify the subcloud state was not updated
|
||||
self.fake_dcmanager_api.update_subcloud_availability.\
|
||||
assert_not_called()
|
||||
|
||||
# Verify the openstack endpoints were removed
|
||||
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
|
||||
assert_called_with(mock.ANY, 'subcloud1',
|
||||
dccommon_consts.ENDPOINT_TYPES_LIST_OS, False)
|
||||
|
||||
# Verify alarm update is called
|
||||
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
|
||||
'subcloud1', self.fake_openstack_client.fm_client)
|
@@ -22,7 +22,7 @@ import mock
|
||||
import sys
|
||||
sys.modules['fm_core'] = mock.Mock()
|
||||
|
||||
from dcmanager.manager import scheduler
|
||||
from dcmanager.common import scheduler
|
||||
from dcmanager.manager import service
|
||||
from dcmanager.tests import base
|
||||
from dcmanager.tests import utils
|
||||
@@ -56,10 +56,8 @@ class TestDCManagerService(base.DCManagerTestCase):
|
||||
self.service_obj.init_tgm()
|
||||
self.assertIsNotNone(self.service_obj.TG)
|
||||
|
||||
@mock.patch.object(service, 'SubcloudAuditManager')
|
||||
def test_init_audit_managers(self, mock_audit_manager):
|
||||
def test_init_audit_managers(self):
|
||||
self.service_obj.init_audit_managers()
|
||||
self.assertIsNotNone(self.service_obj.subcloud_audit_manager)
|
||||
self.assertIsNotNone(self.service_obj.patch_audit_manager)
|
||||
|
||||
@mock.patch.object(service, 'SwUpdateManager')
|
||||
@@ -72,29 +70,16 @@ class TestDCManagerService(base.DCManagerTestCase):
|
||||
|
||||
@mock.patch.object(service, 'SwUpdateManager')
|
||||
@mock.patch.object(service, 'SubcloudManager')
|
||||
@mock.patch.object(service, 'SubcloudAuditManager')
|
||||
@mock.patch.object(service, 'rpc_messaging')
|
||||
def test_start(self, mock_rpc, mock_audit_manager, mock_subcloud_manager,
|
||||
def test_start(self, mock_rpc, mock_subcloud_manager,
|
||||
mock_sw_update_manager):
|
||||
self.service_obj.start()
|
||||
mock_rpc.get_rpc_server.assert_called_once_with(
|
||||
self.service_obj.target, self.service_obj)
|
||||
mock_rpc.get_rpc_server().start.assert_called_once_with()
|
||||
|
||||
@mock.patch.object(service, 'SubcloudAuditManager')
|
||||
@mock.patch.object(service, 'PatchAuditManager')
|
||||
def test_periodic_audit_subclouds(self, mock_patch_audit_manager,
|
||||
mock_subcloud_audit_manager):
|
||||
self.service_obj.init_tgm()
|
||||
self.service_obj.init_audit_managers()
|
||||
self.service_obj.subcloud_audit()
|
||||
mock_subcloud_audit_manager().periodic_subcloud_audit.\
|
||||
assert_called_once_with()
|
||||
|
||||
@mock.patch.object(service, 'SubcloudAuditManager')
|
||||
@mock.patch.object(service, 'PatchAuditManager')
|
||||
def test_periodic_audit_patches(self, mock_patch_audit_manager,
|
||||
mock_subcloud_audit_manager):
|
||||
def test_periodic_audit_patches(self, mock_patch_audit_manager):
|
||||
self.service_obj.init_tgm()
|
||||
self.service_obj.init_audit_managers()
|
||||
self.service_obj.patch_audit()
|
||||
@@ -137,20 +122,18 @@ class TestDCManagerService(base.DCManagerTestCase):
|
||||
|
||||
@mock.patch.object(service, 'SwUpdateManager')
|
||||
@mock.patch.object(service, 'SubcloudManager')
|
||||
@mock.patch.object(service, 'SubcloudAuditManager')
|
||||
@mock.patch.object(service, 'rpc_messaging')
|
||||
def test_stop_rpc_server(self, mock_rpc, mock_audit_manager,
|
||||
mock_subcloud_manager, mock_sw_update_manager):
|
||||
def test_stop_rpc_server(self, mock_rpc, mock_subcloud_manager,
|
||||
mock_sw_update_manager):
|
||||
self.service_obj.start()
|
||||
self.service_obj._stop_rpc_server()
|
||||
mock_rpc.get_rpc_server().stop.assert_called_once_with()
|
||||
|
||||
@mock.patch.object(service, 'SwUpdateManager')
|
||||
@mock.patch.object(service, 'SubcloudManager')
|
||||
@mock.patch.object(service, 'SubcloudAuditManager')
|
||||
@mock.patch.object(service, 'rpc_messaging')
|
||||
def test_stop(self, mock_rpc, mock_audit_manager,
|
||||
mock_subcloud_manager, mock_sw_update_manager):
|
||||
def test_stop(self, mock_rpc, mock_subcloud_manager,
|
||||
mock_sw_update_manager):
|
||||
self.service_obj.start()
|
||||
self.service_obj.stop()
|
||||
mock_rpc.get_rpc_server().stop.assert_called_once_with()
|
||||
|
@@ -27,7 +27,9 @@ sys.modules['fm_core'] = mock.Mock()
|
||||
|
||||
import threading
|
||||
|
||||
from dccommon import consts as dccommon_consts
|
||||
from dcmanager.common import consts
|
||||
from dcmanager.common import exceptions
|
||||
from dcmanager.db.sqlalchemy import api as db_api
|
||||
from dcmanager.manager import subcloud_manager
|
||||
from dcmanager.tests import base
|
||||
@@ -39,6 +41,7 @@ class FakeDCOrchAPI(object):
|
||||
def __init__(self):
|
||||
self.update_subcloud_states = mock.MagicMock()
|
||||
self.add_subcloud_sync_endpoint_type = mock.MagicMock()
|
||||
self.remove_subcloud_sync_endpoint_type = mock.MagicMock()
|
||||
self.del_subcloud = mock.MagicMock()
|
||||
self.add_subcloud = mock.MagicMock()
|
||||
|
||||
@@ -443,3 +446,136 @@ class TestSubcloudManager(base.DCManagerTestCase):
|
||||
self.assertIsNotNone(updated_subcloud_status)
|
||||
self.assertEqual(updated_subcloud_status.sync_status,
|
||||
consts.SYNC_STATUS_OUT_OF_SYNC)
|
||||
|
||||
def test_update_subcloud_availability_go_online(self):
|
||||
# create a subcloud
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
self.assertEqual(subcloud.availability_status,
|
||||
consts.AVAILABILITY_OFFLINE)
|
||||
|
||||
sm = subcloud_manager.SubcloudManager()
|
||||
sm.update_subcloud_availability(self.ctx, subcloud.name,
|
||||
consts.AVAILABILITY_ONLINE)
|
||||
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
# Verify the subcloud was set to online
|
||||
self.assertEqual(updated_subcloud.availability_status,
|
||||
consts.AVAILABILITY_ONLINE)
|
||||
# Verify notifying dcorch
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_called_once_with(
|
||||
self.ctx, subcloud.name, updated_subcloud.management_state,
|
||||
consts.AVAILABILITY_ONLINE)
|
||||
|
||||
def test_update_subcloud_availability_go_offline(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
# Set the subcloud to online/managed
|
||||
db_api.subcloud_update(self.ctx, subcloud.id,
|
||||
management_state=consts.MANAGEMENT_MANAGED,
|
||||
availability_status=consts.AVAILABILITY_ONLINE)
|
||||
|
||||
sm = subcloud_manager.SubcloudManager()
|
||||
|
||||
# create sync statuses for endpoints and set them to in-sync
|
||||
for endpoint in [dcorch_consts.ENDPOINT_TYPE_PLATFORM,
|
||||
dcorch_consts.ENDPOINT_TYPE_IDENTITY,
|
||||
dcorch_consts.ENDPOINT_TYPE_PATCHING,
|
||||
dcorch_consts.ENDPOINT_TYPE_FM,
|
||||
dcorch_consts.ENDPOINT_TYPE_NFV]:
|
||||
db_api.subcloud_status_create(
|
||||
self.ctx, subcloud.id, endpoint)
|
||||
sm.update_subcloud_endpoint_status(
|
||||
self.ctx, subcloud_name=subcloud.name,
|
||||
endpoint_type=endpoint,
|
||||
sync_status=consts.SYNC_STATUS_IN_SYNC)
|
||||
|
||||
# Audit fails once
|
||||
audit_fail_count = 1
|
||||
sm.update_subcloud_availability(self.ctx, subcloud.name,
|
||||
availability_status=None,
|
||||
audit_fail_count=audit_fail_count)
|
||||
# Verify the subclcoud availability was not updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.availability_status,
|
||||
consts.AVAILABILITY_ONLINE)
|
||||
# Verify dcorch was not notified
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_not_called()
|
||||
# Verify the audit_fail_count was updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.audit_fail_count, audit_fail_count)
|
||||
|
||||
# Audit fails again
|
||||
audit_fail_count = audit_fail_count + 1
|
||||
sm.update_subcloud_availability(self.ctx, subcloud.name,
|
||||
consts.AVAILABILITY_OFFLINE,
|
||||
audit_fail_count=audit_fail_count)
|
||||
|
||||
# Verify the subclcoud availability was updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
|
||||
self.assertEqual(updated_subcloud.availability_status,
|
||||
consts.AVAILABILITY_OFFLINE)
|
||||
|
||||
# Verify notifying dcorch
|
||||
self.fake_dcorch_api.update_subcloud_states.assert_called_once_with(
|
||||
self.ctx, subcloud.name, updated_subcloud.management_state,
|
||||
consts.AVAILABILITY_OFFLINE)
|
||||
|
||||
# Verify all endpoint statuses set to unknown
|
||||
for subcloud, subcloud_status in db_api. \
|
||||
subcloud_get_with_status(self.ctx, subcloud.id):
|
||||
self.assertIsNotNone(subcloud_status)
|
||||
self.assertEqual(subcloud_status.sync_status,
|
||||
consts.SYNC_STATUS_UNKNOWN)
|
||||
|
||||
def test_update_subcloud_sync_endpoint_type(self):
|
||||
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
|
||||
self.assertIsNotNone(subcloud)
|
||||
|
||||
sm = subcloud_manager.SubcloudManager()
|
||||
|
||||
endpoint_type_list = dccommon_consts.ENDPOINT_TYPES_LIST_OS
|
||||
|
||||
# Test openstack app installed
|
||||
openstack_installed = True
|
||||
sm.update_subcloud_sync_endpoint_type(self.ctx, subcloud.name,
|
||||
endpoint_type_list,
|
||||
openstack_installed)
|
||||
|
||||
# Verify notifying dcorch to add subcloud sync endpoint type
|
||||
self.fake_dcorch_api.add_subcloud_sync_endpoint_type.\
|
||||
assert_called_once_with(self.ctx, subcloud.name,
|
||||
endpoint_type_list)
|
||||
|
||||
# Verify the subcloud status created for os endpoints
|
||||
for endpoint in endpoint_type_list:
|
||||
subcloud_status = db_api.subcloud_status_get(
|
||||
self.ctx, subcloud.id, endpoint)
|
||||
self.assertIsNotNone(subcloud_status)
|
||||
self.assertEqual(subcloud_status.sync_status,
|
||||
consts.SYNC_STATUS_UNKNOWN)
|
||||
|
||||
# Verify the subcloud openstack_installed was updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, subcloud.name)
|
||||
self.assertEqual(updated_subcloud.openstack_installed, True)
|
||||
|
||||
# Test openstack app removed
|
||||
openstack_installed = False
|
||||
sm.update_subcloud_sync_endpoint_type(self.ctx, subcloud.name,
|
||||
endpoint_type_list,
|
||||
openstack_installed)
|
||||
# Verify notifying dcorch to remove subcloud sync endpoint type
|
||||
self.fake_dcorch_api.remove_subcloud_sync_endpoint_type.\
|
||||
assert_called_once_with(self.ctx, subcloud.name,
|
||||
endpoint_type_list)
|
||||
|
||||
# Verify the subcloud status is deleted for os endpoints
|
||||
for endpoint in endpoint_type_list:
|
||||
self.assertRaises(exceptions.SubcloudStatusNotFound,
|
||||
db_api.subcloud_status_get, self.ctx,
|
||||
subcloud.id, endpoint)
|
||||
|
||||
# Verify the subcloud openstack_installed was updated
|
||||
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, subcloud.name)
|
||||
self.assertEqual(updated_subcloud.openstack_installed, False)
|
||||
|
@@ -100,7 +100,7 @@ class EngineClient(object):
|
||||
|
||||
def add_subcloud_sync_endpoint_type(self, ctxt, subcloud_name,
|
||||
endpoint_type_list):
|
||||
return self.call(
|
||||
return self.cast(
|
||||
ctxt,
|
||||
self.make_msg('add_subcloud_sync_endpoint_type',
|
||||
subcloud_name=subcloud_name,
|
||||
@@ -108,7 +108,7 @@ class EngineClient(object):
|
||||
|
||||
def remove_subcloud_sync_endpoint_type(self, ctxt, subcloud_name,
|
||||
endpoint_type_list):
|
||||
return self.call(
|
||||
return self.cast(
|
||||
ctxt,
|
||||
self.make_msg('remove_subcloud_sync_endpoint_type',
|
||||
subcloud_name=subcloud_name,
|
||||
|
322
distributedcloud/ocf/dcmanager-audit
Normal file
322
distributedcloud/ocf/dcmanager-audit
Normal file
@@ -0,0 +1,322 @@
|
||||
#!/bin/sh
|
||||
# OpenStack DC Manager Audit Service (dcmanager-audit)
|
||||
#
|
||||
# Description:
|
||||
# Manages an OpenStack DC Manager Audit Service (dcmanager-audit)
|
||||
# process as an HA resource
|
||||
#
|
||||
# Copyright (c) 2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
#
|
||||
# See usage() function below for more details ...
|
||||
#
|
||||
# OCF instance parameters:
|
||||
# OCF_RESKEY_binary
|
||||
# OCF_RESKEY_config
|
||||
# OCF_RESKEY_user
|
||||
# OCF_RESKEY_pid
|
||||
# OCF_RESKEY_additional_parameters
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
|
||||
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
||||
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
||||
|
||||
#######################################################################
|
||||
|
||||
# Fill in some defaults if no values are specified
|
||||
|
||||
OCF_RESKEY_binary_default="/usr/bin/dcmanager-audit"
|
||||
OCF_RESKEY_config_default="/etc/dcmanager/dcmanager.conf"
|
||||
OCF_RESKEY_user_default="root"
|
||||
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
||||
|
||||
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
||||
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
|
||||
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
||||
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
||||
|
||||
#######################################################################
|
||||
|
||||
usage() {
|
||||
cat <<UEND
|
||||
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
||||
|
||||
$0 manages an OpenStack DC Manager Audit service (dcmanager-audit) process as an HA resource
|
||||
|
||||
The 'start' operation starts the dcmanager-audit service.
|
||||
The 'stop' operation stops the dcmanager-audit service.
|
||||
The 'validate-all' operation reports whether the parameters are valid
|
||||
The 'meta-data' operation reports this RA's meta-data information
|
||||
The 'status' operation reports whether the dcmanager-audit service is running
|
||||
The 'monitor' operation reports whether the dcmanager-audit service seems to be working
|
||||
|
||||
UEND
|
||||
}
|
||||
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
<resource-agent name="dcmanager-audit">
|
||||
<version>1.0</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
Resource agent for the DC Manager service (dcmanager-audit)
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Manages the OpenStack DC Manager Audit Service (dcmanager-audit)</shortdesc>
|
||||
<parameters>
|
||||
|
||||
<parameter name="binary" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Location of the DC Manager Audit Service binary (dcmanager-audit)
|
||||
</longdesc>
|
||||
<shortdesc lang="en">DC Manager Audit Service binary (dcmanager-audit)</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="config" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Location of the DC Manager Audit Service (dcmanager-audit) configuration file
|
||||
</longdesc>
|
||||
<shortdesc lang="en">DC Manager Audit Service (dcmanager-audit registry) config file</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_config_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="user" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
User running DC Manager Audit Service (dcmanager-audit)
|
||||
</longdesc>
|
||||
<shortdesc lang="en">DC Manager Audit Service (dcmanager-audit) user</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_user_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="pid" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
The pid file to use for this DC Manager Audit Service (dcmanager-audit) instance
|
||||
</longdesc>
|
||||
<shortdesc lang="en">DC Manager Audit Service (dcmanager-audit) pid file</shortdesc>
|
||||
<content type="string" default="${OCF_RESKEY_pid_default}" />
|
||||
</parameter>
|
||||
|
||||
<parameter name="additional_parameters" unique="0" required="0">
|
||||
<longdesc lang="en">
|
||||
Additional parameters to pass on to the dcmanager-audit
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Additional parameters for dcmanager-audit</shortdesc>
|
||||
<content type="string" />
|
||||
</parameter>
|
||||
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
<action name="start" timeout="20" />
|
||||
<action name="stop" timeout="20" />
|
||||
<action name="status" timeout="20" />
|
||||
<action name="monitor" timeout="10" interval="5" />
|
||||
<action name="validate-all" timeout="5" />
|
||||
<action name="meta-data" timeout="5" />
|
||||
</actions>
|
||||
</resource-agent>
|
||||
END
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
# Functions invoked by resource manager actions
|
||||
|
||||
dcmanager_audit_validate() {
|
||||
local rc
|
||||
|
||||
check_binary $OCF_RESKEY_binary
|
||||
check_binary curl
|
||||
check_binary tr
|
||||
check_binary grep
|
||||
check_binary cut
|
||||
check_binary head
|
||||
|
||||
# A config file on shared storage that is not available
|
||||
# during probes is OK.
|
||||
if [ ! -f $OCF_RESKEY_config ]; then
|
||||
if ! ocf_is_probe; then
|
||||
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
|
||||
return $OCF_ERR_INSTALLED
|
||||
fi
|
||||
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
|
||||
fi
|
||||
|
||||
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "User $OCF_RESKEY_user doesn't exist"
|
||||
return $OCF_ERR_INSTALLED
|
||||
fi
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
dcmanager_audit_status() {
|
||||
local pid
|
||||
local rc
|
||||
|
||||
if [ ! -f $OCF_RESKEY_pid ]; then
|
||||
ocf_log info "DC Manager Audit Service (dcmanager-audit) is not running"
|
||||
return $OCF_NOT_RUNNING
|
||||
else
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
fi
|
||||
|
||||
ocf_run -warn kill -s 0 $pid
|
||||
rc=$?
|
||||
if [ $rc -eq 0 ]; then
|
||||
return $OCF_SUCCESS
|
||||
else
|
||||
ocf_log info "Old PID file found, but DC Manager Audit Service (dcmanager-audit) is not running"
|
||||
rm -f $OCF_RESKEY_pid
|
||||
return $OCF_NOT_RUNNING
|
||||
fi
|
||||
}
|
||||
|
||||
dcmanager_audit_monitor() {
|
||||
local rc
|
||||
|
||||
dcmanager_audit_status
|
||||
rc=$?
|
||||
|
||||
# If status returned anything but success, return that immediately
|
||||
if [ $rc -ne $OCF_SUCCESS ]; then
|
||||
return $rc
|
||||
fi
|
||||
|
||||
ocf_log debug "DC Manager Audit Service (dcmanager-audit) monitor succeeded"
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
dcmanager_audit_start() {
|
||||
local rc
|
||||
|
||||
dcmanager_audit_status
|
||||
rc=$?
|
||||
if [ $rc -eq $OCF_SUCCESS ]; then
|
||||
ocf_log info "DC Manager Audit Service (dcmanager-audit) already running"
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
|
||||
# Change the working dir to /, to be sure it's accesible
|
||||
cd /
|
||||
|
||||
# run the actual dcmanager-audit daemon. Don't use ocf_run as we're sending the tool's output
|
||||
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
|
||||
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
|
||||
$OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
||||
|
||||
# Spin waiting for the server to come up.
|
||||
# Let the CRM/LRM time us out if required
|
||||
while true; do
|
||||
dcmanager_audit_monitor
|
||||
rc=$?
|
||||
[ $rc -eq $OCF_SUCCESS ] && break
|
||||
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
||||
ocf_log err "DC Manager Audit Service (dcmanager-audit) start failed"
|
||||
exit $OCF_ERR_GENERIC
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
ocf_log info "DC Manager Audit Service (dcmanager-audit) started"
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
dcmanager_audit_confirm_stop() {
|
||||
local my_bin
|
||||
local my_processes
|
||||
|
||||
my_binary=`which ${OCF_RESKEY_binary}`
|
||||
my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`
|
||||
|
||||
if [ -n "${my_processes}" ]
|
||||
then
|
||||
ocf_log info "About to SIGKILL the following: ${my_processes}"
|
||||
pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
|
||||
fi
|
||||
}
|
||||
|
||||
dcmanager_audit_stop() {
|
||||
local rc
|
||||
local pid
|
||||
|
||||
dcmanager_audit_status
|
||||
rc=$?
|
||||
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
||||
ocf_log info "DC Manager Audit Service (dcmanager-audit) already stopped"
|
||||
dcmanager_audit_confirm_stop
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
|
||||
# Try SIGTERM
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
ocf_run kill -s TERM $pid
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "DC Manager Audit Service (dcmanager-audit) couldn't be stopped"
|
||||
dcmanager_audit_confirm_stop
|
||||
exit $OCF_ERR_GENERIC
|
||||
fi
|
||||
|
||||
# stop waiting
|
||||
shutdown_timeout=15
|
||||
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
|
||||
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
|
||||
fi
|
||||
count=0
|
||||
while [ $count -lt $shutdown_timeout ]; do
|
||||
dcmanager_audit_status
|
||||
rc=$?
|
||||
if [ $rc -eq $OCF_NOT_RUNNING ]; then
|
||||
break
|
||||
fi
|
||||
count=`expr $count + 1`
|
||||
sleep 1
|
||||
ocf_log debug "DC Manager Audit Service (dcmanager-audit) still hasn't stopped yet. Waiting ..."
|
||||
done
|
||||
|
||||
dcmanager_audit_status
|
||||
rc=$?
|
||||
if [ $rc -ne $OCF_NOT_RUNNING ]; then
|
||||
# SIGTERM didn't help either, try SIGKILL
|
||||
ocf_log info "DC Manager Audit Service (dcmanager-audit) failed to stop after ${shutdown_timeout}s \
|
||||
using SIGTERM. Trying SIGKILL ..."
|
||||
ocf_run kill -s KILL $pid
|
||||
fi
|
||||
dcmanager_audit_confirm_stop
|
||||
|
||||
ocf_log info "DC Manager Audit Service (dcmanager-audit) stopped"
|
||||
|
||||
rm -f $OCF_RESKEY_pid
|
||||
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
|
||||
case "$1" in
|
||||
meta-data) meta_data
|
||||
exit $OCF_SUCCESS;;
|
||||
usage|help) usage
|
||||
exit $OCF_SUCCESS;;
|
||||
esac
|
||||
|
||||
# Anything except meta-data and help must pass validation
|
||||
dcmanager_audit_validate || exit $?
|
||||
|
||||
# What kind of method was invoked?
|
||||
case "$1" in
|
||||
start) dcmanager_audit_start;;
|
||||
stop) dcmanager_audit_stop;;
|
||||
status) dcmanager_audit_status;;
|
||||
monitor) dcmanager_audit_monitor;;
|
||||
validate-all) ;;
|
||||
*) usage
|
||||
exit $OCF_ERR_UNIMPLEMENTED;;
|
||||
esac
|
@@ -29,6 +29,7 @@ packages =
|
||||
[entry_points]
|
||||
console_scripts =
|
||||
dcmanager-api = dcmanager.cmd.api:main
|
||||
dcmanager-audit = dcmanager.cmd.audit:main
|
||||
dcmanager-manager = dcmanager.cmd.manager:main
|
||||
dcmanager-manage = dcmanager.cmd.manage:main
|
||||
dcorch-api = dcorch.cmd.api:main
|
||||
|
Reference in New Issue
Block a user