From 5b93b9a2c2157f8ffe2c149e52ccf597625eb9ab Mon Sep 17 00:00:00 2001
From: Kevin Carter <>
Date: Wed, 7 Dec 2016 15:27:14 -0600
Subject: [PATCH] Added nova quota plugin

This change adds a second plugin to the telegraf setup. A change is
being made to the telegraf config file to allow for more than one
external plugin to be executed and to allow for full plugin execution
between telegraf reporting intervals.

Each plugin will potentially account for up to 8 seconds of runtime with
the telegraf agent now using a dynamic reporting interval based on the
number of plugins a given agent is needing to execute.

Change-Id: I652e8e2f13bd4fb9135280b76f2344177a14eaf7
Signed-off-by: Kevin Carter <>
 cluster_metrics/playbook-influx-telegraf.yml  |  18 +-
 .../telegraf-plugins/          | 205 ++++++++++++++++++
 cluster_metrics/templates/telegraf.conf.j2    |  30 ++-
 3 files changed, 237 insertions(+), 16 deletions(-)
 create mode 100644 cluster_metrics/templates/telegraf-plugins/

diff --git a/cluster_metrics/playbook-influx-telegraf.yml b/cluster_metrics/playbook-influx-telegraf.yml
index e5e616f6..07d46a61 100644
--- a/cluster_metrics/playbook-influx-telegraf.yml
+++ b/cluster_metrics/playbook-influx-telegraf.yml
@@ -44,18 +44,8 @@
       with_dict: "{{ command_plugins }}"
         - item.value.when_group | bool
-        - == inventory_hostname or 
+        - == inventory_hostname or
           inventory_hostname in | default([])
-    - name: Add to command plugins
-      set_fact:
-        commands: "{{ commands | union(item.value.command) }}"
-      with_dict: "{{ command_plugins }}"
-      when:
-        - item.value.when_group | bool
-        - == inventory_hostname or 
-          inventory_hostname in | default([])
-      tags:
-        - always
     - name: Store my_cnf
         src: "/root/.my.cnf"
@@ -96,5 +86,11 @@
           - "python /opt/telegraf/"
         group: "{{ groups['utility_all'][0] }}"
         when_group: "{{ (groups['ironic_api'] | length) > 0 }}"
+      vm_quota:
+        plugin_name: ""
+        command:
+          - "python /opt/telegraf/"
+        group: "{{ groups['utility_all'][0] }}"
+        when_group: "{{ (groups['nova_compute'] | length) > 0 }}"
       - "{{ influxdb_host|default(internal_lb_vip_address) }}:{{ influxdb_port }}"
diff --git a/cluster_metrics/templates/telegraf-plugins/ b/cluster_metrics/templates/telegraf-plugins/
new file mode 100644
index 00000000..f9b4fc7a
--- /dev/null
+++ b/cluster_metrics/templates/telegraf-plugins/
@@ -0,0 +1,205 @@
+# Copyright 2016, Rackspace US, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import collections
+from openstack import connection as os_conn
+    'auth_url': '{{ keystone_service_internalurl }}',
+    'project_name': '{{ keystone_admin_tenant_name }}',
+    'user_domain_name': '{{ openrc_os_domain_name }}',
+    'project_domain_name': '{{ openrc_os_domain_name }}',
+    'username': '{{ keystone_admin_user_name }}',
+    'password': '{{ keystone_auth_admin_password }}',
+OS_CONNECTION = {'conn': None}
+def line_return(collection, metric_name):
+    system_states_return = '%s ' % metric_name
+    for key, value in collection.items():
+        system_states_return += '%s=%s,' % (key.replace(' ', '_'), value)
+    else:
+        system_states_return = system_states_return.rstrip(',')
+    return system_states_return
+def _connect():
+    if OS_CONNECTION['conn']:
+        return OS_CONNECTION['conn']
+    else:
+        OS_CONNECTION['conn'] = os_conn.Connection(**OS_AUTH_ARGS)
+        return OS_CONNECTION['conn']
+def get_consumers():
+    conn = _connect()
+    _consumers = list()
+    projects = conn.identity.projects()
+    for project in projects:
+        if project['description'].lower() != 'heat stack user project':
+            _consumers.append(project)
+    return _consumers
+def get_consumer_limits(consumer_id):
+    conn = _connect()
+    url = conn.compute.session.get_endpoint(
+        interface='internal',
+        service_type='compute'
+    )
+    quota_data = conn.compute.session.get(
+        url + '/os-quota-sets/' + consumer_id
+    )
+    quota_data = quota_data.json()
+    return quota_data['quota_set']
+def get_consumer_usage():
+    conn = _connect()
+    tenant_kwargs = {'all_tenants': True, 'limit': 5000}
+    return conn.compute.servers(details=True, **tenant_kwargs)
+def get_flavors():
+    conn = _connect()
+    flavor_cache = dict()
+    for flavor in conn.compute.flavors():
+        entry = flavor_cache[flavor['id']] = dict()
+        entry['ram'] = flavor['ram']
+        entry['cores'] = flavor['vcpus']
+        entry['disk'] = flavor['disk']
+    return flavor_cache
+def main():
+    return_data = list()
+    consumer_quota_instance = dict()
+    consumer_quota_cores = dict()
+    consumer_quota_ram = dict()
+    consumer_used_instances = collections.Counter()
+    consumer_used_cores = collections.Counter()
+    consumer_used_ram = collections.Counter()
+    consumer_used_disk = collections.Counter()
+    consumer_quota_totals = dict()
+    flavor_cache = get_flavors()
+    consumer_id_cache = dict()
+    for consumer in get_consumers():
+        consumer_name = consumer['name']
+        consumer_id = consumer['id']
+        _quota = get_consumer_limits(consumer_id)
+        consumer_id_cache[consumer_id] = consumer_name
+        consumer_quota_instance[consumer_name] = int(_quota['instances'])
+        consumer_quota_cores[consumer_name] = int(_quota['cores'])
+        consumer_quota_ram[consumer_name] = int(_quota['ram'])
+    for used_instance in get_consumer_usage():
+        consumer_name = consumer_id_cache[used_instance['tenant_id']]
+        consumer_used_instances[consumer_name] += 1
+        consumer_used_cores[consumer_name] += \
+            int(flavor_cache[used_instance['flavor']['id']]['cores'])
+        consumer_used_ram[consumer_name] += \
+            int(flavor_cache[used_instance['flavor']['id']]['ram'])
+        consumer_used_disk[consumer_name] += \
+            int(flavor_cache[used_instance['flavor']['id']]['disk'])
+    consumer_quota_totals['total_quota_instance'] = sum(
+        consumer_quota_instance.values()
+    )
+    consumer_quota_totals['total_quota_cores'] = sum(
+        consumer_quota_cores.values()
+    )
+    consumer_quota_totals['total_quota_ram'] = sum(
+        consumer_quota_ram.values()
+    )
+    consumer_quota_totals['total_used_instances'] = sum(
+        consumer_used_instances.values()
+    )
+    consumer_quota_totals['total_used_cores'] = sum(
+        consumer_used_cores.values()
+    )
+    consumer_quota_totals['total_used_ram'] = sum(
+        consumer_used_ram.values()
+    )
+    consumer_quota_totals['total_used_disk'] = sum(
+        consumer_used_disk.values()
+    )
+    return_data.append(
+        line_return(
+            collection=consumer_quota_instance,
+            metric_name='consumer_quota_instance'
+        )
+    )
+    return_data.append(
+        line_return(
+            collection=consumer_quota_cores,
+            metric_name='consumer_quota_cores'
+        )
+    )
+    return_data.append(
+        line_return(
+            collection=consumer_quota_ram,
+            metric_name='consumer_quota_ram'
+        )
+    )
+    return_data.append(
+        line_return(
+            collection=consumer_used_instances,
+            metric_name='consumer_used_instances'
+        )
+    )
+    return_data.append(
+        line_return(
+            collection=consumer_used_cores,
+            metric_name='consumer_used_cores'
+        )
+    )
+    return_data.append(
+        line_return(
+            collection=consumer_used_ram,
+            metric_name='consumer_used_ram'
+        )
+    )
+    return_data.append(
+        line_return(
+            collection=consumer_used_disk,
+            metric_name='consumer_used_disk'
+        )
+    )
+    return_data.append(
+        line_return(
+            collection=consumer_quota_totals,
+            metric_name='consumer_quota_totals'
+        )
+    )
+    for item in return_data:
+        print(item)
+if __name__ == '__main__':
+    main()
diff --git a/cluster_metrics/templates/telegraf.conf.j2 b/cluster_metrics/templates/telegraf.conf.j2
index 0341f04b..580a956e 100644
--- a/cluster_metrics/templates/telegraf.conf.j2
+++ b/cluster_metrics/templates/telegraf.conf.j2
@@ -5,13 +5,26 @@
   node_type = "physical_host"
 {% endif %}
+{%   set run_commands = [] %}
+{%   for key, value in command_plugins.items() %}
+{%     if value.when_group | bool and ( == inventory_hostname or inventory_hostname in | default([])) %}
+{%       set _ = run_commands.extend(value.command) %}
+{%     endif %}
+{%   endfor %}
+{# The run_int adds padding to the interval so that plugins being added to the system have #}
+{#  enough time to execute. Every added plugin will add 8 seconds to the interval with a #}
+{#  default of 24. This value is later used as the flush interval which needs to be 2x the agent. #}
+{% set run_int = run_commands | length %}
+{% set interval = (run_int < 1 | ternary(0, run_int * 8)) + 24 %}
-  interval = "24s"
+  interval = "{{ interval }}s"
   round_interval = false
   metric_batch_size = 1024
   metric_buffer_limit = 10240
   collection_jitter = "8s"
-  flush_interval = "48s"
+  flush_interval = "{{ interval * 2 }}s"
   flush_jitter = "8s"
   debug = false
   quiet = true
@@ -33,10 +46,17 @@
-{%   if commands %}
+{%   set run_commands = [] %}
+{%   for key, value in command_plugins.items() %}
+{%     if value.when_group | bool and ( == inventory_hostname or inventory_hostname in | default([])) %}
+{%       set _ = run_commands.extend(value.command) %}
+{%     endif %}
+{%   endfor %}
+{%   if run_commands %}
-  commands = [{{ commands | map('quote') | join(',') }}]
-  timeout = "15s"
+  commands = [{{ run_commands | map('quote') | join(',') }}]
+  timeout = "{{ (run_commands | length) * 8 }}s"
   data_format = "influx"
 {%   endif %}