diff --git a/doc/source/index.rst b/doc/source/index.rst index 14b174838..241806f37 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -17,7 +17,8 @@ Index .. toctree:: - troubleshooting + troubleshooting + metrics How it works ============ diff --git a/doc/source/metrics.rst b/doc/source/metrics.rst new file mode 100644 index 000000000..8d9cd3ae2 --- /dev/null +++ b/doc/source/metrics.rst @@ -0,0 +1,53 @@ +.. _metrics: + +=============================================== +Emitting metrics from Ironic-Python-Agent (IPA) +=============================================== + +This document describes how to emit metrics from IPA, including timers and +counters in code to directly emitting hardware metrics from a custom +HardwareManager. + +Overview +======== +IPA uses the metrics implementation from ironic-lib, with a few caveats due +to the dynamic configuration done at lookup time. You cannot cache the metrics +instance as the MetricsLogger returned will change after lookup if configs +different than the default setting have been used. This also means that the +method decorator supported by ironic-lib cannot be used in IPA. + +Using a context manager +======================= +Using the context manager is the recommended way for sending metrics that time +or count sections of code. However, given that you cannot cache the +MetricsLogger, you have to explicitly call get_metrics_logger() from +ironic-lib every time. For example: + + from ironic_lib import metrics_utils + + def my_method(): + with metrics_utils.get_metrics_logger(__name__).timer(): + return _do_work() + +As a note, these metric collectors do work for custom HardwareManagers as +well, however, you may want to metric the portions of a method that determine +compatability separate from portions of a method that actually do work, in +order to assure the metrics are relevant and useful on all hardware. + +Explicitly sending metrics +========================== +A feature that may be particularly helpful for deployers writing custom +HardwareManagers is the ability to explicitly send metrics. As an example, +you could add a cleaning step which would retrieve metrics about a device and +ship them using the provided metrics library. For example: + + from ironic_lib import metrics_utils + + def my_cleaning_step(): + for name, value in _get_smart_data(): + metrics_utils.get_metrics_logger(__name__).send_gauge(name, value) + +References +========== +For more information, please read the source of the metrics module in +`ironic-lib <http://git.openstack.org/cgit/openstack/ironic-lib/tree/ironic_lib>`_. diff --git a/ironic_python_agent/agent.py b/ironic_python_agent/agent.py index 0ff3aa67e..569b2d412 100644 --- a/ironic_python_agent/agent.py +++ b/ironic_python_agent/agent.py @@ -20,6 +20,7 @@ import threading import time from oslo_concurrency import processutils +from oslo_config import cfg from oslo_log import log import pkg_resources from six.moves.urllib import parse as urlparse @@ -35,7 +36,6 @@ from ironic_python_agent import inspector from ironic_python_agent import ironic_api_client from ironic_python_agent import utils - LOG = log.getLogger(__name__) # Time(in seconds) to wait for any of the interfaces to be up @@ -45,6 +45,9 @@ NETWORK_WAIT_TIMEOUT = 60 # Time(in seconds) to wait before reattempt NETWORK_WAIT_RETRY = 5 +cfg.CONF.import_group('metrics', 'ironic_lib.metrics_utils') +cfg.CONF.import_group('metrics_statsd', 'ironic_lib.metrics_statsd') + def _time(): """Wraps time.time() for simpler testing.""" @@ -342,6 +345,15 @@ class IronicPythonAgent(base.ExecuteCommandMixin): hardware.cache_node(self.node) self.heartbeat_timeout = content['config']['heartbeat_timeout'] + # Update config with values from Ironic + config = content.get('config', {}) + if config.get('metrics'): + for opt, val in config.items(): + setattr(cfg.CONF.metrics, opt, val) + if config.get('metrics_statsd'): + for opt, val in config.items(): + setattr(cfg.CONF.metrics_statsd, opt, val) + wsgi = simple_server.make_server( self.listen_address[0], self.listen_address[1], diff --git a/ironic_python_agent/api/controllers/root.py b/ironic_python_agent/api/controllers/root.py index 46caaad01..c95e2186d 100644 --- a/ironic_python_agent/api/controllers/root.py +++ b/ironic_python_agent/api/controllers/root.py @@ -12,9 +12,9 @@ # License for the specific language governing permissions and limitations # under the License. +from ironic_lib import metrics_utils import pecan from pecan import rest - from wsme import types as wtypes import wsmeext.pecan as wsme_pecan @@ -81,7 +81,8 @@ class RootController(rest.RestController): # NOTE: The reason why convert() it's being called for every # request is because we need to get the host url from # the request object to make the links. - return Root.convert() + with metrics_utils.get_metrics_logger(__name__).timer('get'): + return Root.convert() @pecan.expose() def _route(self, args): diff --git a/ironic_python_agent/api/controllers/v1/command.py b/ironic_python_agent/api/controllers/v1/command.py index e4483e292..b6971fa76 100644 --- a/ironic_python_agent/api/controllers/v1/command.py +++ b/ironic_python_agent/api/controllers/v1/command.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +from ironic_lib import metrics_utils import pecan from pecan import rest from wsme import types @@ -78,9 +79,10 @@ class CommandController(rest.RestController): @wsme_pecan.wsexpose(CommandResultList) def get_all(self): """Get all command results.""" - agent = pecan.request.agent - results = agent.list_command_results() - return CommandResultList.from_results(results) + with metrics_utils.get_metrics_logger(__name__).timer('get_all'): + agent = pecan.request.agent + results = agent.list_command_results() + return CommandResultList.from_results(results) @wsme_pecan.wsexpose(CommandResult, types.text, types.text) def get_one(self, result_id, wait=None): @@ -91,13 +93,14 @@ class CommandController(rest.RestController): :returns: a :class:`ironic_python_agent.api.controller.v1.command. CommandResult` object. """ - agent = pecan.request.agent - result = agent.get_command_result(result_id) + with metrics_utils.get_metrics_logger(__name__).timer('get_one'): + agent = pecan.request.agent + result = agent.get_command_result(result_id) - if wait and wait.lower() == 'true': - result.join() + if wait and wait.lower() == 'true': + result.join() - return CommandResult.from_result(result) + return CommandResult.from_result(result) @wsme_pecan.wsexpose(CommandResult, types.text, body=Command) def post(self, wait=None, command=None): @@ -109,14 +112,15 @@ class CommandController(rest.RestController): :returns: a :class:`ironic_python_agent.api.controller.v1.command. CommandResult` object. """ - # the POST body is always the last arg, - # so command must be a kwarg here - if command is None: - command = Command() - agent = pecan.request.agent - result = agent.execute_command(command.name, **command.params) + with metrics_utils.get_metrics_logger(__name__).timer('post'): + # the POST body is always the last arg, + # so command must be a kwarg here + if command is None: + command = Command() + agent = pecan.request.agent + result = agent.execute_command(command.name, **command.params) - if wait and wait.lower() == 'true': - result.join() + if wait and wait.lower() == 'true': + result.join() - return result + return result diff --git a/ironic_python_agent/api/controllers/v1/status.py b/ironic_python_agent/api/controllers/v1/status.py index 84efc6a49..b1ed83b8a 100644 --- a/ironic_python_agent/api/controllers/v1/status.py +++ b/ironic_python_agent/api/controllers/v1/status.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +from ironic_lib import metrics_utils import pecan from pecan import rest from wsme import types @@ -48,6 +49,7 @@ class StatusController(rest.RestController): @wsme_pecan.wsexpose(AgentStatus) def get_all(self): """Get current status of the running agent.""" - agent = pecan.request.agent - status = agent.get_status() - return AgentStatus.from_agent_status(status) + with metrics_utils.get_metrics_logger(__name__).timer('get_all'): + agent = pecan.request.agent + status = agent.get_status() + return AgentStatus.from_agent_status(status) diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py index 1c77d6675..a3d1ccac7 100644 --- a/ironic_python_agent/extensions/standby.py +++ b/ironic_python_agent/extensions/standby.py @@ -533,7 +533,6 @@ class StandbyExtension(base.BaseAgentExtension): stream_raw_images = image_info.get('stream_raw_images', False) # don't write image again if already cached if self.cached_image_id != image_info['id']: - if self.cached_image_id is not None: LOG.debug('Already had %s cached, overwriting', self.cached_image_id)