Timing metrics: pxe boot and iscsi deploy driver

Adds timing metrics for methods in the iscsi_deploy, pxe, and
agent_base_vendor modules.

Change-Id: Iae4a6652febcbf1ecbab948b2c4d9fa60af1e3a9
Related-bug: #1526219
This commit is contained in:
Jay Faulkner 2016-08-02 16:57:05 -07:00 committed by Jim Rollenhagen
parent 49b5027158
commit 8af7429d6e
4 changed files with 51 additions and 6 deletions

View File

@ -20,6 +20,7 @@ import ast
import collections import collections
import time import time
from ironic_lib import metrics_utils
from oslo_log import log from oslo_log import log
from oslo_utils import excutils from oslo_utils import excutils
from oslo_utils import strutils from oslo_utils import strutils
@ -45,6 +46,8 @@ from ironic import objects
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
METRICS = metrics_utils.get_metrics_logger(__name__)
# This contains a nested dictionary containing the post clean step # This contains a nested dictionary containing the post clean step
# hooks registered for each clean step of every interface. # hooks registered for each clean step of every interface.
# Every key of POST_CLEAN_STEP_HOOKS is an interface and its value # Every key of POST_CLEAN_STEP_HOOKS is an interface and its value
@ -79,6 +82,7 @@ def _get_client():
return client return client
@METRICS.timer('post_clean_step_hook')
def post_clean_step_hook(interface, step): def post_clean_step_hook(interface, step):
"""Decorator method for adding a post clean step hook. """Decorator method for adding a post clean step hook.
@ -203,6 +207,7 @@ def _get_completed_cleaning_command(task, commands):
return last_command return last_command
@METRICS.timer('log_and_raise_deployment_error')
def log_and_raise_deployment_error(task, msg): def log_and_raise_deployment_error(task, msg):
"""Helper method to log the error and raise exception.""" """Helper method to log the error and raise exception."""
LOG.error(msg) LOG.error(msg)
@ -244,6 +249,7 @@ class AgentDeployMixin(object):
""" """
@METRICS.timer('AgentDeployMixin._refresh_clean_steps')
def _refresh_clean_steps(self, task): def _refresh_clean_steps(self, task):
"""Refresh the node's cached clean steps from the booted agent. """Refresh the node's cached clean steps from the booted agent.
@ -300,6 +306,7 @@ class AgentDeployMixin(object):
LOG.debug('Refreshed agent clean step cache for node %(node)s: ' LOG.debug('Refreshed agent clean step cache for node %(node)s: '
'%(steps)s', {'node': node.uuid, 'steps': steps}) '%(steps)s', {'node': node.uuid, 'steps': steps})
@METRICS.timer('AgentDeployMixin.continue_cleaning')
def continue_cleaning(self, task, **kwargs): def continue_cleaning(self, task, **kwargs):
"""Start the next cleaning step if the previous one is complete. """Start the next cleaning step if the previous one is complete.
@ -433,6 +440,7 @@ class AgentDeployMixin(object):
LOG.error(msg) LOG.error(msg)
return manager_utils.cleaning_error_handler(task, msg) return manager_utils.cleaning_error_handler(task, msg)
@METRICS.timer('AgentDeployMixin.heartbeat')
def heartbeat(self, task, callback_url): def heartbeat(self, task, callback_url):
"""Process a heartbeat. """Process a heartbeat.
@ -511,6 +519,7 @@ class AgentDeployMixin(object):
elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT): elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
deploy_utils.set_failed_state(task, last_error) deploy_utils.set_failed_state(task, last_error)
@METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy')
def reboot_and_finish_deploy(self, task): def reboot_and_finish_deploy(self, task):
"""Helper method to trigger reboot on the node and finish deploy. """Helper method to trigger reboot on the node and finish deploy.
@ -581,6 +590,7 @@ class AgentDeployMixin(object):
task.process_event('done') task.process_event('done')
LOG.info(_LI('Deployment to node %s done'), task.node.uuid) LOG.info(_LI('Deployment to node %s done'), task.node.uuid)
@METRICS.timer('AgentDeployMixin.prepare_instance_to_boot')
def prepare_instance_to_boot(self, task, root_uuid, efi_sys_uuid): def prepare_instance_to_boot(self, task, root_uuid, efi_sys_uuid):
"""Prepares instance to boot. """Prepares instance to boot.
@ -605,6 +615,7 @@ class AgentDeployMixin(object):
msg = _('Failed to continue agent deployment.') msg = _('Failed to continue agent deployment.')
log_and_raise_deployment_error(task, msg) log_and_raise_deployment_error(task, msg)
@METRICS.timer('AgentDeployMixin.configure_local_boot')
def configure_local_boot(self, task, root_uuid=None, def configure_local_boot(self, task, root_uuid=None,
efi_system_part_uuid=None): efi_system_part_uuid=None):
"""Helper method to configure local boot on the node. """Helper method to configure local boot on the node.
@ -692,6 +703,7 @@ class BaseAgentVendor(AgentDeployMixin, base.VendorInterface):
'payload version: %s') 'payload version: %s')
% version) % version)
@METRICS.timer('BaseAgentVendor.heartbeat')
@base.passthru(['POST']) @base.passthru(['POST'])
@task_manager.require_exclusive_lock @task_manager.require_exclusive_lock
def heartbeat(self, task, **kwargs): def heartbeat(self, task, **kwargs):
@ -715,6 +727,7 @@ class BaseAgentVendor(AgentDeployMixin, base.VendorInterface):
super(BaseAgentVendor, self).heartbeat(task, callback_url) super(BaseAgentVendor, self).heartbeat(task, callback_url)
@METRICS.timer('BaseAgentVendor.lookup')
@base.driver_passthru(['POST'], async=False) @base.driver_passthru(['POST'], async=False)
def lookup(self, context, **kwargs): def lookup(self, context, **kwargs):
"""Find a matching node for the agent. """Find a matching node for the agent.

View File

@ -16,6 +16,7 @@
import os import os
from ironic_lib import disk_utils from ironic_lib import disk_utils
from ironic_lib import metrics_utils
from ironic_lib import utils as ironic_utils from ironic_lib import utils as ironic_utils
from oslo_config import cfg from oslo_config import cfg
from oslo_log import log as logging from oslo_log import log as logging
@ -37,6 +38,8 @@ from ironic.drivers.modules import image_cache
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
METRICS = metrics_utils.get_metrics_logger(__name__)
# NOTE(rameshg87): This file now registers some of opts in pxe group. # NOTE(rameshg87): This file now registers some of opts in pxe group.
# This is acceptable for now as a future refactoring into # This is acceptable for now as a future refactoring into
# separate boot and deploy interfaces is planned, and moving config # separate boot and deploy interfaces is planned, and moving config
@ -129,6 +132,7 @@ def _save_disk_layout(node, i_info):
node.save() node.save()
@METRICS.timer('check_image_size')
def check_image_size(task): def check_image_size(task):
"""Check if the requested image is larger than the root partition size. """Check if the requested image is larger than the root partition size.
@ -147,6 +151,7 @@ def check_image_size(task):
raise exception.InstanceDeployFailure(msg) raise exception.InstanceDeployFailure(msg)
@METRICS.timer('cache_instance_image')
def cache_instance_image(ctx, node): def cache_instance_image(ctx, node):
"""Fetch the instance's image from Glance """Fetch the instance's image from Glance
@ -172,6 +177,7 @@ def cache_instance_image(ctx, node):
return (uuid, image_path) return (uuid, image_path)
@METRICS.timer('destroy_images')
def destroy_images(node_uuid): def destroy_images(node_uuid):
"""Delete instance's image file. """Delete instance's image file.
@ -182,6 +188,7 @@ def destroy_images(node_uuid):
InstanceImageCache().clean_up() InstanceImageCache().clean_up()
@METRICS.timer('get_deploy_info')
def get_deploy_info(node, address, iqn, port=None, lun='1'): def get_deploy_info(node, address, iqn, port=None, lun='1'):
"""Returns the information required for doing iSCSI deploy in a dictionary. """Returns the information required for doing iSCSI deploy in a dictionary.
@ -235,6 +242,7 @@ def get_deploy_info(node, address, iqn, port=None, lun='1'):
return params return params
@METRICS.timer('continue_deploy')
def continue_deploy(task, **kwargs): def continue_deploy(task, **kwargs):
"""Resume a deployment upon getting POST data from deploy ramdisk. """Resume a deployment upon getting POST data from deploy ramdisk.
@ -306,6 +314,7 @@ def continue_deploy(task, **kwargs):
return uuid_dict_returned return uuid_dict_returned
@METRICS.timer('do_agent_iscsi_deploy')
def do_agent_iscsi_deploy(task, agent_client): def do_agent_iscsi_deploy(task, agent_client):
"""Method invoked when deployed with the agent ramdisk. """Method invoked when deployed with the agent ramdisk.
@ -375,6 +384,7 @@ def _get_boot_mode(node):
return "bios" return "bios"
@METRICS.timer('validate')
def validate(task): def validate(task):
"""Validates the pre-requisites for iSCSI deploy. """Validates the pre-requisites for iSCSI deploy.
@ -405,6 +415,7 @@ def validate(task):
class AgentDeployMixin(agent_base_vendor.AgentDeployMixin): class AgentDeployMixin(agent_base_vendor.AgentDeployMixin):
@METRICS.timer('AgentDeployMixin.continue_deploy')
@task_manager.require_exclusive_lock @task_manager.require_exclusive_lock
def continue_deploy(self, task): def continue_deploy(self, task):
"""Method invoked when deployed using iSCSI. """Method invoked when deployed using iSCSI.
@ -436,6 +447,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
def get_properties(self): def get_properties(self):
return {} return {}
@METRICS.timer('ISCSIDeploy.validate')
def validate(self, task): def validate(self, task):
"""Validate the deployment information for the task's node. """Validate the deployment information for the task's node.
@ -454,6 +466,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
# broken down into separate boot and deploy implementations. # broken down into separate boot and deploy implementations.
validate(task) validate(task)
@METRICS.timer('ISCSIDeploy.deploy')
@task_manager.require_exclusive_lock @task_manager.require_exclusive_lock
def deploy(self, task): def deploy(self, task):
"""Start deployment of the task's node. """Start deployment of the task's node.
@ -474,6 +487,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
return states.DEPLOYWAIT return states.DEPLOYWAIT
@METRICS.timer('ISCSIDeploy.tear_down')
@task_manager.require_exclusive_lock @task_manager.require_exclusive_lock
def tear_down(self, task): def tear_down(self, task):
"""Tear down a previous deployment on the task's node. """Tear down a previous deployment on the task's node.
@ -493,6 +507,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
task.driver.network.unconfigure_tenant_networks(task) task.driver.network.unconfigure_tenant_networks(task)
return states.DELETED return states.DELETED
@METRICS.timer('ISCSIDeploy.prepare')
@task_manager.require_exclusive_lock @task_manager.require_exclusive_lock
def prepare(self, task): def prepare(self, task):
"""Prepare the deployment environment for this task's node. """Prepare the deployment environment for this task's node.
@ -523,6 +538,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
deploy_opts = deploy_utils.build_agent_options(node) deploy_opts = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, deploy_opts) task.driver.boot.prepare_ramdisk(task, deploy_opts)
@METRICS.timer('ISCSIDeploy.clean_up')
def clean_up(self, task): def clean_up(self, task):
"""Clean up the deployment environment for the task's node. """Clean up the deployment environment for the task's node.
@ -540,6 +556,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
def take_over(self, task): def take_over(self, task):
pass pass
@METRICS.timer('ISCSIDeploy.get_clean_steps')
def get_clean_steps(self, task): def get_clean_steps(self, task):
"""Get the list of clean steps from the agent. """Get the list of clean steps from the agent.
@ -555,6 +572,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
'erase_devices': CONF.deploy.erase_devices_priority}) 'erase_devices': CONF.deploy.erase_devices_priority})
return steps return steps
@METRICS.timer('ISCSIDeploy.execute_clean_step')
def execute_clean_step(self, task, step): def execute_clean_step(self, task, step):
"""Execute a clean step asynchronously on the agent. """Execute a clean step asynchronously on the agent.
@ -567,6 +585,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
""" """
return deploy_utils.agent_execute_clean_step(task, step) return deploy_utils.agent_execute_clean_step(task, step)
@METRICS.timer('ISCSIDeploy.prepare_cleaning')
def prepare_cleaning(self, task): def prepare_cleaning(self, task):
"""Boot into the agent to prepare for cleaning. """Boot into the agent to prepare for cleaning.
@ -578,6 +597,7 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
return deploy_utils.prepare_inband_cleaning( return deploy_utils.prepare_inband_cleaning(
task, manage_boot=True) task, manage_boot=True)
@METRICS.timer('ISCSIDeploy.tear_down_cleaning')
def tear_down_cleaning(self, task): def tear_down_cleaning(self, task):
"""Clean up the PXE and DHCP files after cleaning. """Clean up the PXE and DHCP files after cleaning.

View File

@ -18,6 +18,7 @@ PXE Boot Interface
import os import os
import shutil import shutil
from ironic_lib import metrics_utils
from ironic_lib import utils as ironic_utils from ironic_lib import utils as ironic_utils
from oslo_config import cfg from oslo_config import cfg
from oslo_log import log as logging from oslo_log import log as logging
@ -93,6 +94,8 @@ pxe_opts = [
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
METRICS = metrics_utils.get_metrics_logger(__name__)
CONF = cfg.CONF CONF = cfg.CONF
CONF.register_opts(pxe_opts, group='pxe') CONF.register_opts(pxe_opts, group='pxe')
CONF.import_opt('deploy_callback_timeout', 'ironic.conductor.manager', CONF.import_opt('deploy_callback_timeout', 'ironic.conductor.manager',
@ -240,6 +243,7 @@ def _build_pxe_config_options(task, pxe_info):
return pxe_options return pxe_options
@METRICS.timer('validate_boot_option_for_uefi')
def validate_boot_option_for_uefi(node): def validate_boot_option_for_uefi(node):
"""In uefi boot mode, validate if the boot option is compatible. """In uefi boot mode, validate if the boot option is compatible.
@ -263,6 +267,7 @@ def validate_boot_option_for_uefi(node):
{'node_uuid': node.uuid}) {'node_uuid': node.uuid})
@METRICS.timer('validate_boot_option_for_trusted_boot')
def validate_boot_parameters_for_trusted_boot(node): def validate_boot_parameters_for_trusted_boot(node):
"""Check if boot parameters are valid for trusted boot.""" """Check if boot parameters are valid for trusted boot."""
boot_mode = deploy_utils.get_boot_mode_for_deploy(node) boot_mode = deploy_utils.get_boot_mode_for_deploy(node)
@ -335,6 +340,7 @@ class PXEBoot(base.BootInterface):
""" """
return COMMON_PROPERTIES return COMMON_PROPERTIES
@METRICS.timer('PXEBoot.validate')
def validate(self, task): def validate(self, task):
"""Validate the PXE-specific info for booting deploy/instance images. """Validate the PXE-specific info for booting deploy/instance images.
@ -385,6 +391,7 @@ class PXEBoot(base.BootInterface):
props = ['kernel', 'ramdisk'] props = ['kernel', 'ramdisk']
deploy_utils.validate_image_properties(task.context, d_info, props) deploy_utils.validate_image_properties(task.context, d_info, props)
@METRICS.timer('PXEBoot.prepare_ramdisk')
def prepare_ramdisk(self, task, ramdisk_params): def prepare_ramdisk(self, task, ramdisk_params):
"""Prepares the boot of Ironic ramdisk using PXE. """Prepares the boot of Ironic ramdisk using PXE.
@ -441,6 +448,7 @@ class PXEBoot(base.BootInterface):
# the image kernel and ramdisk (Or even require it). # the image kernel and ramdisk (Or even require it).
_cache_ramdisk_kernel(task.context, node, pxe_info) _cache_ramdisk_kernel(task.context, node, pxe_info)
@METRICS.timer('PXEBoot.clean_up_ramdisk')
def clean_up_ramdisk(self, task): def clean_up_ramdisk(self, task):
"""Cleans up the boot of ironic ramdisk. """Cleans up the boot of ironic ramdisk.
@ -461,6 +469,7 @@ class PXEBoot(base.BootInterface):
else: else:
_clean_up_pxe_env(task, images_info) _clean_up_pxe_env(task, images_info)
@METRICS.timer('PXEBoot.prepare_instance')
def prepare_instance(self, task): def prepare_instance(self, task):
"""Prepares the boot of instance. """Prepares the boot of instance.
@ -525,6 +534,7 @@ class PXEBoot(base.BootInterface):
pxe_utils.clean_up_pxe_config(task) pxe_utils.clean_up_pxe_config(task)
deploy_utils.try_set_boot_device(task, boot_devices.DISK) deploy_utils.try_set_boot_device(task, boot_devices.DISK)
@METRICS.timer('PXEBoot.clean_up_instance')
def clean_up_instance(self, task): def clean_up_instance(self, task):
"""Cleans up the boot of instance. """Cleans up the boot of instance.

View File

@ -911,9 +911,10 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
shared=False) as task: shared=False) as task:
self.deploy.prepare_instance_to_boot(task, root_uuid, self.deploy.prepare_instance_to_boot(task, root_uuid,
efi_system_part_uuid) efi_system_part_uuid)
configure_mock.assert_called_once_with(self.deploy, task, configure_mock.assert_called_once_with(
root_uuid, self.deploy, task,
efi_system_part_uuid) root_uuid=root_uuid,
efi_system_part_uuid=efi_system_part_uuid)
boot_option_mock.assert_called_once_with(task.node) boot_option_mock.assert_called_once_with(task.node)
prepare_instance_mock.assert_called_once_with(task.driver.boot, prepare_instance_mock.assert_called_once_with(task.driver.boot,
task) task)
@ -945,9 +946,10 @@ class TestHeartbeat(AgentDeployMixinBaseTest):
self.assertRaises(exception.InstanceDeployFailure, self.assertRaises(exception.InstanceDeployFailure,
self.deploy.prepare_instance_to_boot, task, self.deploy.prepare_instance_to_boot, task,
root_uuid, efi_system_part_uuid) root_uuid, efi_system_part_uuid)
configure_mock.assert_called_once_with(self.deploy, task, configure_mock.assert_called_once_with(
root_uuid, self.deploy, task,
efi_system_part_uuid) root_uuid=root_uuid,
efi_system_part_uuid=efi_system_part_uuid)
boot_option_mock.assert_called_once_with(task.node) boot_option_mock.assert_called_once_with(task.node)
self.assertFalse(prepare_mock.called) self.assertFalse(prepare_mock.called)
self.assertFalse(failed_state_mock.called) self.assertFalse(failed_state_mock.called)