Add metrics support to IPA
This utilizes the new metrics support in ironic-lib to allow the agent to report timing metrics for agent API methods as configured in ironic-lib. Additionally, this adds developer docs on how to use metrics in IPA, including some caveats specific to ironic-lib.metrics use in IPA. Co-Authored-By: Jay Faulkner <jay@jvf.cc> Co-Authored-By: Alex Weeks <alex.weeks@gmail.com> Change-Id: Ic08d4ff78b6fb614b474b956a32eac352a14262a Partial-bug: #1526219
This commit is contained in:
parent
ad60806f93
commit
fd874652e3
@ -17,7 +17,8 @@ Index
|
||||
|
||||
.. toctree::
|
||||
|
||||
troubleshooting
|
||||
troubleshooting
|
||||
metrics
|
||||
|
||||
How it works
|
||||
============
|
||||
|
53
doc/source/metrics.rst
Normal file
53
doc/source/metrics.rst
Normal file
@ -0,0 +1,53 @@
|
||||
.. _metrics:
|
||||
|
||||
===============================================
|
||||
Emitting metrics from Ironic-Python-Agent (IPA)
|
||||
===============================================
|
||||
|
||||
This document describes how to emit metrics from IPA, including timers and
|
||||
counters in code to directly emitting hardware metrics from a custom
|
||||
HardwareManager.
|
||||
|
||||
Overview
|
||||
========
|
||||
IPA uses the metrics implementation from ironic-lib, with a few caveats due
|
||||
to the dynamic configuration done at lookup time. You cannot cache the metrics
|
||||
instance as the MetricsLogger returned will change after lookup if configs
|
||||
different than the default setting have been used. This also means that the
|
||||
method decorator supported by ironic-lib cannot be used in IPA.
|
||||
|
||||
Using a context manager
|
||||
=======================
|
||||
Using the context manager is the recommended way for sending metrics that time
|
||||
or count sections of code. However, given that you cannot cache the
|
||||
MetricsLogger, you have to explicitly call get_metrics_logger() from
|
||||
ironic-lib every time. For example:
|
||||
|
||||
from ironic_lib import metrics_utils
|
||||
|
||||
def my_method():
|
||||
with metrics_utils.get_metrics_logger(__name__).timer():
|
||||
return _do_work()
|
||||
|
||||
As a note, these metric collectors do work for custom HardwareManagers as
|
||||
well, however, you may want to metric the portions of a method that determine
|
||||
compatability separate from portions of a method that actually do work, in
|
||||
order to assure the metrics are relevant and useful on all hardware.
|
||||
|
||||
Explicitly sending metrics
|
||||
==========================
|
||||
A feature that may be particularly helpful for deployers writing custom
|
||||
HardwareManagers is the ability to explicitly send metrics. As an example,
|
||||
you could add a cleaning step which would retrieve metrics about a device and
|
||||
ship them using the provided metrics library. For example:
|
||||
|
||||
from ironic_lib import metrics_utils
|
||||
|
||||
def my_cleaning_step():
|
||||
for name, value in _get_smart_data():
|
||||
metrics_utils.get_metrics_logger(__name__).send_gauge(name, value)
|
||||
|
||||
References
|
||||
==========
|
||||
For more information, please read the source of the metrics module in
|
||||
`ironic-lib <http://git.openstack.org/cgit/openstack/ironic-lib/tree/ironic_lib>`_.
|
@ -20,6 +20,7 @@ import threading
|
||||
import time
|
||||
|
||||
from oslo_concurrency import processutils
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log
|
||||
import pkg_resources
|
||||
from six.moves.urllib import parse as urlparse
|
||||
@ -35,7 +36,6 @@ from ironic_python_agent import inspector
|
||||
from ironic_python_agent import ironic_api_client
|
||||
from ironic_python_agent import utils
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
# Time(in seconds) to wait for any of the interfaces to be up
|
||||
@ -45,6 +45,9 @@ NETWORK_WAIT_TIMEOUT = 60
|
||||
# Time(in seconds) to wait before reattempt
|
||||
NETWORK_WAIT_RETRY = 5
|
||||
|
||||
cfg.CONF.import_group('metrics', 'ironic_lib.metrics_utils')
|
||||
cfg.CONF.import_group('metrics_statsd', 'ironic_lib.metrics_statsd')
|
||||
|
||||
|
||||
def _time():
|
||||
"""Wraps time.time() for simpler testing."""
|
||||
@ -340,6 +343,15 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
|
||||
hardware.cache_node(self.node)
|
||||
self.heartbeat_timeout = content['heartbeat_timeout']
|
||||
|
||||
# Update config with values from Ironic
|
||||
config = content.get('config', {})
|
||||
if config.get('metrics'):
|
||||
for opt, val in config.items():
|
||||
setattr(cfg.CONF.metrics, opt, val)
|
||||
if config.get('metrics_statsd'):
|
||||
for opt, val in config.items():
|
||||
setattr(cfg.CONF.metrics_statsd, opt, val)
|
||||
|
||||
wsgi = simple_server.make_server(
|
||||
self.listen_address[0],
|
||||
self.listen_address[1],
|
||||
|
@ -12,9 +12,9 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from ironic_lib import metrics_utils
|
||||
import pecan
|
||||
from pecan import rest
|
||||
|
||||
from wsme import types as wtypes
|
||||
import wsmeext.pecan as wsme_pecan
|
||||
|
||||
@ -81,7 +81,8 @@ class RootController(rest.RestController):
|
||||
# NOTE: The reason why convert() it's being called for every
|
||||
# request is because we need to get the host url from
|
||||
# the request object to make the links.
|
||||
return Root.convert()
|
||||
with metrics_utils.get_metrics_logger(__name__).timer('get'):
|
||||
return Root.convert()
|
||||
|
||||
@pecan.expose()
|
||||
def _route(self, args):
|
||||
|
@ -13,6 +13,7 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from ironic_lib import metrics_utils
|
||||
import pecan
|
||||
from pecan import rest
|
||||
from wsme import types
|
||||
@ -78,9 +79,10 @@ class CommandController(rest.RestController):
|
||||
@wsme_pecan.wsexpose(CommandResultList)
|
||||
def get_all(self):
|
||||
"""Get all command results."""
|
||||
agent = pecan.request.agent
|
||||
results = agent.list_command_results()
|
||||
return CommandResultList.from_results(results)
|
||||
with metrics_utils.get_metrics_logger(__name__).timer('get_all'):
|
||||
agent = pecan.request.agent
|
||||
results = agent.list_command_results()
|
||||
return CommandResultList.from_results(results)
|
||||
|
||||
@wsme_pecan.wsexpose(CommandResult, types.text, types.text)
|
||||
def get_one(self, result_id, wait=None):
|
||||
@ -91,13 +93,14 @@ class CommandController(rest.RestController):
|
||||
:returns: a :class:`ironic_python_agent.api.controller.v1.command.
|
||||
CommandResult` object.
|
||||
"""
|
||||
agent = pecan.request.agent
|
||||
result = agent.get_command_result(result_id)
|
||||
with metrics_utils.get_metrics_logger(__name__).timer('get_one'):
|
||||
agent = pecan.request.agent
|
||||
result = agent.get_command_result(result_id)
|
||||
|
||||
if wait and wait.lower() == 'true':
|
||||
result.join()
|
||||
if wait and wait.lower() == 'true':
|
||||
result.join()
|
||||
|
||||
return CommandResult.from_result(result)
|
||||
return CommandResult.from_result(result)
|
||||
|
||||
@wsme_pecan.wsexpose(CommandResult, types.text, body=Command)
|
||||
def post(self, wait=None, command=None):
|
||||
@ -109,14 +112,15 @@ class CommandController(rest.RestController):
|
||||
:returns: a :class:`ironic_python_agent.api.controller.v1.command.
|
||||
CommandResult` object.
|
||||
"""
|
||||
# the POST body is always the last arg,
|
||||
# so command must be a kwarg here
|
||||
if command is None:
|
||||
command = Command()
|
||||
agent = pecan.request.agent
|
||||
result = agent.execute_command(command.name, **command.params)
|
||||
with metrics_utils.get_metrics_logger(__name__).timer('post'):
|
||||
# the POST body is always the last arg,
|
||||
# so command must be a kwarg here
|
||||
if command is None:
|
||||
command = Command()
|
||||
agent = pecan.request.agent
|
||||
result = agent.execute_command(command.name, **command.params)
|
||||
|
||||
if wait and wait.lower() == 'true':
|
||||
result.join()
|
||||
if wait and wait.lower() == 'true':
|
||||
result.join()
|
||||
|
||||
return result
|
||||
return result
|
||||
|
@ -13,6 +13,7 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from ironic_lib import metrics_utils
|
||||
import pecan
|
||||
from pecan import rest
|
||||
from wsme import types
|
||||
@ -48,6 +49,7 @@ class StatusController(rest.RestController):
|
||||
@wsme_pecan.wsexpose(AgentStatus)
|
||||
def get_all(self):
|
||||
"""Get current status of the running agent."""
|
||||
agent = pecan.request.agent
|
||||
status = agent.get_status()
|
||||
return AgentStatus.from_agent_status(status)
|
||||
with metrics_utils.get_metrics_logger(__name__).timer('get_all'):
|
||||
agent = pecan.request.agent
|
||||
status = agent.get_status()
|
||||
return AgentStatus.from_agent_status(status)
|
||||
|
@ -533,7 +533,6 @@ class StandbyExtension(base.BaseAgentExtension):
|
||||
stream_raw_images = image_info.get('stream_raw_images', False)
|
||||
# don't write image again if already cached
|
||||
if self.cached_image_id != image_info['id']:
|
||||
|
||||
if self.cached_image_id is not None:
|
||||
LOG.debug('Already had %s cached, overwriting',
|
||||
self.cached_image_id)
|
||||
|
Loading…
Reference in New Issue
Block a user