implement minimal metric collection
This change implements metric collection system using influxdata (influxdb and telegraf) with visulization using grafana. No Dashboard automation is provided at this time however a template dashboard can be used by importing the JSON files from the dashboards directory. Change-Id: I5445b01170054393a31afc2a20ffb3ea4eda1209 Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
This commit is contained in:
parent
3d3a8c0d5d
commit
19255fd1a8
22
cluster_metrics/ansible.cfg
Normal file
22
cluster_metrics/ansible.cfg
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
[defaults]
|
||||||
|
# Set the role path
|
||||||
|
roles_path = /etc/ansible/roles:roles
|
||||||
|
|
||||||
|
inventory = /opt/openstack-ansible/playbooks/inventory/dynamic_inventory.py
|
||||||
|
|
||||||
|
# Fact caching
|
||||||
|
gathering = smart
|
||||||
|
fact_caching = jsonfile
|
||||||
|
fact_caching_connection = /etc/openstack_deploy/ansible_facts
|
||||||
|
fact_caching_timeout = 86400
|
||||||
|
|
||||||
|
# Additional plugins
|
||||||
|
action_plugins = /etc/ansible/roles/plugins/action
|
||||||
|
callback_plugins = /etc/ansible/roles/plugins/callback
|
||||||
|
filter_plugins = /etc/ansible/roles/plugins/filter
|
||||||
|
lookup_plugins = /etc/ansible/roles/plugins/lookup
|
||||||
|
library = /etc/ansible/roles/plugins/library
|
||||||
|
|
||||||
|
# Set color options
|
||||||
|
nocolor = 0
|
||||||
|
host_key_checking = False
|
12
cluster_metrics/etc/env.d/cluster_metrics.yml
Normal file
12
cluster_metrics/etc/env.d/cluster_metrics.yml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
---
|
||||||
|
component_skel:
|
||||||
|
cluster-metrics:
|
||||||
|
belongs_to:
|
||||||
|
- cluster-metrics_all
|
||||||
|
|
||||||
|
container_skel:
|
||||||
|
cluster-metrics_container:
|
||||||
|
belongs_to:
|
||||||
|
- log_containers
|
||||||
|
contains:
|
||||||
|
- cluster-metrics
|
0
cluster_metrics/etc/user_metrics.yml
Normal file
0
cluster_metrics/etc/user_metrics.yml
Normal file
23
cluster_metrics/files/kvm_virsh.py
Normal file
23
cluster_metrics/files/kvm_virsh.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import json
|
||||||
|
import libvirt
|
||||||
|
import socket
|
||||||
|
|
||||||
|
return_data = dict()
|
||||||
|
conn = libvirt.openReadOnly()
|
||||||
|
try:
|
||||||
|
domains = conn.listDomainsID()
|
||||||
|
return_data['kvm_vms'] = len(domains)
|
||||||
|
return_data['kvm_total_vcpus'] = conn.getCPUMap()[0]
|
||||||
|
return_data['kvm_scheduled_vcpus'] = 0
|
||||||
|
for domain in domains:
|
||||||
|
return_data['kvm_scheduled_vcpus'] += conn.lookupByID(
|
||||||
|
domain
|
||||||
|
).maxVcpus()
|
||||||
|
return_data['kvm_host_id'] = abs(hash(socket.getfqdn()))
|
||||||
|
except Exception:
|
||||||
|
raise SystemExit('Plugin failure')
|
||||||
|
else:
|
||||||
|
print(json.dumps(return_data))
|
||||||
|
finally:
|
||||||
|
conn.close()
|
446
cluster_metrics/grafana-dashboards/openstack-aggregates.json
Normal file
446
cluster_metrics/grafana-dashboards/openstack-aggregates.json
Normal file
@ -0,0 +1,446 @@
|
|||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"name": "DS_OSIC_INFLUXDB",
|
||||||
|
"label": "OSIC InfluxDB",
|
||||||
|
"description": "",
|
||||||
|
"type": "datasource",
|
||||||
|
"pluginId": "influxdb",
|
||||||
|
"pluginName": "InfluxDB"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"__requires": [
|
||||||
|
{
|
||||||
|
"type": "panel",
|
||||||
|
"id": "singlestat",
|
||||||
|
"name": "Singlestat",
|
||||||
|
"version": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "panel",
|
||||||
|
"id": "graph",
|
||||||
|
"name": "Graph",
|
||||||
|
"version": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "grafana",
|
||||||
|
"id": "grafana",
|
||||||
|
"name": "Grafana",
|
||||||
|
"version": "3.1.1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "datasource",
|
||||||
|
"id": "influxdb",
|
||||||
|
"name": "InfluxDB",
|
||||||
|
"version": "1.0.0"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": null,
|
||||||
|
"title": "OpenStack Compute Aggregates",
|
||||||
|
"tags": [],
|
||||||
|
"style": "dark",
|
||||||
|
"timezone": "browser",
|
||||||
|
"editable": true,
|
||||||
|
"hideControls": false,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": true,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"cacheTimeout": null,
|
||||||
|
"colorBackground": true,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(204, 85, 16, 0.97)",
|
||||||
|
"rgba(4, 133, 3, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "${DS_OSIC_INFLUXDB}",
|
||||||
|
"editable": true,
|
||||||
|
"error": false,
|
||||||
|
"format": "bytes",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"height": "10px",
|
||||||
|
"id": 3,
|
||||||
|
"interval": null,
|
||||||
|
"isNew": true,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"minSpan": 6,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"nullText": null,
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "RAM:",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 12,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(189, 188, 31, 0.18)",
|
||||||
|
"full": true,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT sum(total) as total FROM \"mem\" WHERE host =~ /comp/ AND $timeFilter GROUP BY time($interval)",
|
||||||
|
"rawQuery": true,
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "",
|
||||||
|
"title": "Compute node total Memory",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "70%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"datasource": "${DS_OSIC_INFLUXDB}",
|
||||||
|
"editable": true,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1": null,
|
||||||
|
"threshold1Color": "rgba(27, 42, 216, 0.27)",
|
||||||
|
"threshold2": null,
|
||||||
|
"threshold2Color": "rgba(167, 0, 0, 0.22)",
|
||||||
|
"thresholdLine": false
|
||||||
|
},
|
||||||
|
"height": "250px",
|
||||||
|
"id": 2,
|
||||||
|
"isNew": true,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": true,
|
||||||
|
"avg": true,
|
||||||
|
"current": true,
|
||||||
|
"max": true,
|
||||||
|
"min": true,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"sideWidth": 15,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"minSpan": 6,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 1,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT sum(used) as used FROM \"mem\" WHERE host =~ /$compute_node$/ AND $timeFilter GROUP BY time($interval)",
|
||||||
|
"rawQuery": true,
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Compute Node Used Memory",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": true,
|
||||||
|
"shared": false,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"transparent": true,
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"datasource": "${DS_OSIC_INFLUXDB}",
|
||||||
|
"editable": true,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1": null,
|
||||||
|
"threshold1Color": "rgba(27, 42, 216, 0.27)",
|
||||||
|
"threshold2": null,
|
||||||
|
"threshold2Color": "rgba(167, 0, 0, 0.22)",
|
||||||
|
"thresholdLine": false
|
||||||
|
},
|
||||||
|
"height": "250px",
|
||||||
|
"id": 1,
|
||||||
|
"isNew": true,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": true,
|
||||||
|
"avg": true,
|
||||||
|
"current": true,
|
||||||
|
"max": true,
|
||||||
|
"min": true,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"sideWidth": 15,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"minSpan": 6,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 1,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"dsType": "influxdb",
|
||||||
|
"groupBy": [
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"$interval"
|
||||||
|
],
|
||||||
|
"type": "time"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"null"
|
||||||
|
],
|
||||||
|
"type": "fill"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"hide": false,
|
||||||
|
"policy": "default",
|
||||||
|
"query": "SELECT sum(available) as available FROM \"mem\" WHERE host =~ /$compute_node$/ AND $timeFilter GROUP BY time($interval)",
|
||||||
|
"rawQuery": true,
|
||||||
|
"refId": "A",
|
||||||
|
"resultFormat": "time_series",
|
||||||
|
"select": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"params": [
|
||||||
|
"value"
|
||||||
|
],
|
||||||
|
"type": "field"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"params": [],
|
||||||
|
"type": "mean"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Compute Node Available Memory",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": true,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"transparent": true,
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": true,
|
||||||
|
"title": "Memory"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"time": {
|
||||||
|
"from": "now/d",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"15s",
|
||||||
|
"1m",
|
||||||
|
"15m",
|
||||||
|
"1h"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"current": {},
|
||||||
|
"datasource": "${DS_OSIC_INFLUXDB}",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": true,
|
||||||
|
"label": "compute node",
|
||||||
|
"multi": false,
|
||||||
|
"name": "compute_node",
|
||||||
|
"options": [],
|
||||||
|
"query": "SHOW TAG VALUES FROM system WITH KEY=host",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "/comp/",
|
||||||
|
"type": "query"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"schemaVersion": 12,
|
||||||
|
"version": 43,
|
||||||
|
"links": [],
|
||||||
|
"gnetId": null
|
||||||
|
}
|
2734
cluster_metrics/grafana-dashboards/openstack-metrics.json
Normal file
2734
cluster_metrics/grafana-dashboards/openstack-metrics.json
Normal file
File diff suppressed because it is too large
Load Diff
14
cluster_metrics/handlers/main.yml
Normal file
14
cluster_metrics/handlers/main.yml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
---
|
||||||
|
# Copyright 2016, Rackspace US, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
70
cluster_metrics/playbook-grafana.yml
Normal file
70
cluster_metrics/playbook-grafana.yml
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
---
|
||||||
|
# Copyright 2016, Rackspace US, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
- name: Deploy grafana
|
||||||
|
hosts: "cluster-metrics"
|
||||||
|
gather_facts: true
|
||||||
|
user: root
|
||||||
|
pre_tasks:
|
||||||
|
- name: Create DB for service
|
||||||
|
mysql_db:
|
||||||
|
login_user: "{{ galera_root_user }}"
|
||||||
|
login_password: "{{ galera_root_password }}"
|
||||||
|
login_host: "127.0.0.1"
|
||||||
|
name: "{{ grafana_db_name }}"
|
||||||
|
state: "present"
|
||||||
|
delegate_to: "{{ groups['galera_all'][0] }}"
|
||||||
|
- name: Grant access to the DB for the service
|
||||||
|
mysql_user:
|
||||||
|
login_user: "{{ galera_root_user }}"
|
||||||
|
login_password: "{{ galera_root_password }}"
|
||||||
|
login_host: "127.0.0.1"
|
||||||
|
name: "{{ grafana_db_user }}"
|
||||||
|
password: "{{ grafana_db_password }}"
|
||||||
|
host: "{{ item }}"
|
||||||
|
state: "present"
|
||||||
|
priv: "{{ grafana_db_name }}.*:ALL"
|
||||||
|
delegate_to: "{{ groups['galera_all'][0] }}"
|
||||||
|
with_items:
|
||||||
|
- "localhost"
|
||||||
|
- "%"
|
||||||
|
tasks:
|
||||||
|
- name: Ensure https repos function
|
||||||
|
apt:
|
||||||
|
pkg: "apt-transport-https"
|
||||||
|
state: "latest"
|
||||||
|
- name: Add grafana apt-keys
|
||||||
|
apt_key:
|
||||||
|
url: "https://packagecloud.io/gpg.key"
|
||||||
|
state: "present"
|
||||||
|
- name: Add grafana repo
|
||||||
|
apt_repository:
|
||||||
|
repo: "deb https://packagecloud.io/grafana/stable/debian/ wheezy main"
|
||||||
|
state: "present"
|
||||||
|
- name: Install grafana
|
||||||
|
apt:
|
||||||
|
pkg: "grafana"
|
||||||
|
state: "latest"
|
||||||
|
- name: Drop grafana config file
|
||||||
|
template:
|
||||||
|
src: templates/grafana.ini.j2
|
||||||
|
dest: /etc/grafana/grafana.ini
|
||||||
|
- name: Enable and start grafana
|
||||||
|
service:
|
||||||
|
name: "grafana-server"
|
||||||
|
enabled: true
|
||||||
|
state: restarted
|
||||||
|
vars_files:
|
||||||
|
- vars.yml
|
67
cluster_metrics/playbook-influx-db.yml
Normal file
67
cluster_metrics/playbook-influx-db.yml
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
---
|
||||||
|
# Copyright 2016, Rackspace US, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
- name: Deploy influxdb
|
||||||
|
hosts: "cluster-metrics"
|
||||||
|
gather_facts: true
|
||||||
|
user: root
|
||||||
|
tasks:
|
||||||
|
- name: InfluxDB datapath bind mount
|
||||||
|
lxc_container:
|
||||||
|
name: "{{ inventory_hostname }}"
|
||||||
|
container_command: |
|
||||||
|
[[ ! -d "/var/lib/influxdb" ]] && mkdir -p "/var/lib/influxdb"
|
||||||
|
container_config:
|
||||||
|
- "lxc.mount.entry=/openstack/{{ inventory_hostname }} var/lib/influxdb none bind 0 0"
|
||||||
|
delegate_to: "{{ physical_host }}"
|
||||||
|
- name: Add influxdata apt-keys
|
||||||
|
apt_key:
|
||||||
|
url: "https://repos.influxdata.com/influxdb.key"
|
||||||
|
state: "present"
|
||||||
|
- name: Add influxdata repo
|
||||||
|
apt_repository:
|
||||||
|
repo: "deb https://repos.influxdata.com/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable"
|
||||||
|
state: "present"
|
||||||
|
- name: Install influxdb
|
||||||
|
apt:
|
||||||
|
pkg: "influxdb"
|
||||||
|
state: "latest"
|
||||||
|
- name: Drop influxdb config file
|
||||||
|
template:
|
||||||
|
src: templates/influxdb.conf.j2
|
||||||
|
dest: /etc/influxdb/influxdb.conf
|
||||||
|
- name: Enable and restart influxdb
|
||||||
|
service:
|
||||||
|
name: "influxdb"
|
||||||
|
enabled: true
|
||||||
|
state: restarted
|
||||||
|
- name: Wait for influxdb to be ready
|
||||||
|
wait_for:
|
||||||
|
host: "{{ hostvars[groups['cluster-metrics'][0]]['ansible_ssh_host'] }}"
|
||||||
|
port: "{{ influxdb_port }}"
|
||||||
|
delay: 1
|
||||||
|
- name: Create metrics DB
|
||||||
|
shell: >
|
||||||
|
influx -username {{ influxdb_db_root_name }}
|
||||||
|
-password {{ influxdb_db_root_password }}
|
||||||
|
-execute "{{ item }}"
|
||||||
|
with_items:
|
||||||
|
- "CREATE DATABASE {{ influxdb_db_name }}"
|
||||||
|
- "CREATE RETENTION POLICY {{ influxdb_db_retention_policy }} ON {{ influxdb_db_name }} DURATION {{ influxdb_db_retention }} REPLICATION {{ influxdb_db_replication }}"
|
||||||
|
- "CREATE USER {{ influxdb_db_metric_user }} WITH PASSWORD '{{ influxdb_db_metric_password }}'"
|
||||||
|
- "GRANT ALL ON {{ influxdb_db_name }} TO {{ influxdb_db_metric_user }}"
|
||||||
|
vars_files:
|
||||||
|
- vars.yml
|
||||||
|
|
64
cluster_metrics/playbook-influx-telegraf.yml
Normal file
64
cluster_metrics/playbook-influx-telegraf.yml
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
---
|
||||||
|
# Copyright 2016, Rackspace US, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
- name: Deploy telegraf
|
||||||
|
hosts: "all"
|
||||||
|
gather_facts: true
|
||||||
|
user: root
|
||||||
|
tasks:
|
||||||
|
- name: Add influxdata apt-keys
|
||||||
|
apt_key:
|
||||||
|
url: "https://repos.influxdata.com/influxdb.key"
|
||||||
|
state: "present"
|
||||||
|
- name: Add influxdata repo
|
||||||
|
apt_repository:
|
||||||
|
repo: "deb https://repos.influxdata.com/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable"
|
||||||
|
state: "present"
|
||||||
|
- name: Install telegraf
|
||||||
|
apt:
|
||||||
|
pkg: "telegraf"
|
||||||
|
state: "latest"
|
||||||
|
- name: Create telegraf plugin dir
|
||||||
|
file:
|
||||||
|
path: "/opt/telegraf"
|
||||||
|
state: directory
|
||||||
|
mode: "0755"
|
||||||
|
- name: Drop telegraf plugin file(s)
|
||||||
|
copy:
|
||||||
|
src: "files/{{ item }}"
|
||||||
|
dest: "/opt/telegraf/{{ item }}"
|
||||||
|
mode: '0755'
|
||||||
|
with_items:
|
||||||
|
- kvm_virsh.py
|
||||||
|
- name: Drop telegraf config file
|
||||||
|
template:
|
||||||
|
src: templates/telegraf.conf.j2
|
||||||
|
dest: /etc/telegraf/telegraf.conf
|
||||||
|
register: telegraf_config
|
||||||
|
- name: Enable and restart telegraf
|
||||||
|
service:
|
||||||
|
name: "telegraf"
|
||||||
|
enabled: true
|
||||||
|
state: restarted
|
||||||
|
when: telegraf_config | changed
|
||||||
|
- name: Enable and start telegraf
|
||||||
|
service:
|
||||||
|
name: "telegraf"
|
||||||
|
enabled: true
|
||||||
|
state: started
|
||||||
|
when: not telegraf_config | changed
|
||||||
|
vars_files:
|
||||||
|
- vars.yml
|
||||||
|
|
55
cluster_metrics/playbook-metrics-lb.yml
Normal file
55
cluster_metrics/playbook-metrics-lb.yml
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
---
|
||||||
|
# Copyright 2016, Rackspace US, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
- name: Add haproxy config
|
||||||
|
hosts: haproxy
|
||||||
|
gather_facts: true
|
||||||
|
user: root
|
||||||
|
roles:
|
||||||
|
- role: "haproxy_server"
|
||||||
|
haproxy_service_configs:
|
||||||
|
- service:
|
||||||
|
haproxy_service_name: influxdb_admin
|
||||||
|
haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}"
|
||||||
|
haproxy_ssl: "{{ haproxy_ssl }}"
|
||||||
|
haproxy_port: 8083
|
||||||
|
haproxy_balance_type: tcp
|
||||||
|
haproxy_backend_options:
|
||||||
|
- tcp-check
|
||||||
|
haproxy_whitelist_networks:
|
||||||
|
- 192.168.0.0/16
|
||||||
|
- 172.16.0.0/12
|
||||||
|
- 10.0.0.0/8
|
||||||
|
- service:
|
||||||
|
haproxy_service_name: influxdb
|
||||||
|
haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}"
|
||||||
|
haproxy_ssl: "{{ haproxy_ssl }}"
|
||||||
|
haproxy_port: 8086
|
||||||
|
haproxy_balance_type: tcp
|
||||||
|
haproxy_backend_options:
|
||||||
|
- tcp-check
|
||||||
|
haproxy_whitelist_networks:
|
||||||
|
- 192.168.0.0/16
|
||||||
|
- 172.16.0.0/12
|
||||||
|
- 10.0.0.0/8
|
||||||
|
- service:
|
||||||
|
haproxy_service_name: grafana
|
||||||
|
haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}"
|
||||||
|
haproxy_ssl: "{{ haproxy_ssl }}"
|
||||||
|
haproxy_port: 8089
|
||||||
|
haproxy_balance_type: tcp
|
||||||
|
haproxy_backend_options:
|
||||||
|
- tcp-check
|
||||||
|
|
56
cluster_metrics/readme.rst
Normal file
56
cluster_metrics/readme.rst
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
Gather and visualize cluster wide metrics
|
||||||
|
#########################################
|
||||||
|
:date: 2016-09-01
|
||||||
|
:tags: openstack, ansible
|
||||||
|
:category: \*openstack, \*nix
|
||||||
|
|
||||||
|
|
||||||
|
About this repository
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
This set of playbooks will deploy InfluxDB, Telegraf, and Grafana for the purpose of collecting metrics on an OpenStack cluster.
|
||||||
|
|
||||||
|
Process
|
||||||
|
-------
|
||||||
|
|
||||||
|
Clone the OPS repo
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd /opt
|
||||||
|
git clone https://github.com/openstack/openstack-ansible-ops
|
||||||
|
|
||||||
|
Copy the env.d files into place
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd openstack-ansible-ops/cluster_metrics
|
||||||
|
cp etc/env.d/cluster_metrics.yml /etc/openstack_deploy/env.d/
|
||||||
|
|
||||||
|
Create the containers
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
openstack-ansible /opt/openstack-ansible/playbooks/lxc-containers-create.yml -e container_group=cluster-metrics
|
||||||
|
|
||||||
|
Install InfluxDB
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
openstack-ansible playbook-influx-db.yml
|
||||||
|
|
||||||
|
Install Influx Telegraf
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
openstack-ansible playbook-influx-telegraf.yml --forks 100
|
||||||
|
|
||||||
|
Install grafana
|
||||||
|
|
||||||
|
If you're proxy'ing grafana you will need to provide the full ``root_path`` when you run the playbook add the following ``-e grafana_root_url='https://cloud.something:8443/grafana/'``
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
openstack-ansible playbook-grafana.yml -e galera_root_user=root -e galera_address='127.0.0.1'
|
||||||
|
|
||||||
|
Once that last playbook is completed you will have a functioning InfluxDB, Telegraf, and Grafana metric collection system active and collecting metrics. Grafana will need some setup, however functional dash boards have been provided in the ``grafana-dashboards`` directory.
|
66
cluster_metrics/templates/grafana.ini.j2
Normal file
66
cluster_metrics/templates/grafana.ini.j2
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# {{ ansible_managed }}
|
||||||
|
[paths]
|
||||||
|
|
||||||
|
[server]
|
||||||
|
http_port = {{ grafana_port }}
|
||||||
|
{% if grafana_root_url is defined %}
|
||||||
|
root_url = {{ grafana_root_url }}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
[database]
|
||||||
|
type = mysql
|
||||||
|
host = {{ galera_address }}:3306
|
||||||
|
name = {{ grafana_db_name }}
|
||||||
|
user = {{ grafana_db_user }}
|
||||||
|
password = {{ grafana_db_password }}
|
||||||
|
|
||||||
|
[session]
|
||||||
|
|
||||||
|
[analytics]
|
||||||
|
check_for_updates = true
|
||||||
|
|
||||||
|
[security]
|
||||||
|
admin_user = admin
|
||||||
|
admin_password = {{ grafana_admin_password }}
|
||||||
|
|
||||||
|
[snapshots]
|
||||||
|
|
||||||
|
[users]
|
||||||
|
allow_sign_up = false
|
||||||
|
allow_org_create = false
|
||||||
|
|
||||||
|
[auth.anonymous]
|
||||||
|
enabled = true
|
||||||
|
org_name = OpenStack
|
||||||
|
org_role = Viewer
|
||||||
|
|
||||||
|
[auth.github]
|
||||||
|
|
||||||
|
[auth.google]
|
||||||
|
|
||||||
|
[auth.proxy]
|
||||||
|
|
||||||
|
[auth.basic]
|
||||||
|
|
||||||
|
[auth.ldap]
|
||||||
|
|
||||||
|
[smtp]
|
||||||
|
|
||||||
|
[emails]
|
||||||
|
|
||||||
|
[log]
|
||||||
|
|
||||||
|
[log.console]
|
||||||
|
|
||||||
|
[log.file]
|
||||||
|
|
||||||
|
[log.syslog]
|
||||||
|
|
||||||
|
[event_publisher]
|
||||||
|
|
||||||
|
[dashboards.json]
|
||||||
|
|
||||||
|
[metrics]
|
||||||
|
|
||||||
|
[grafana_net]
|
||||||
|
url = https://grafana.net
|
81
cluster_metrics/templates/influxdb.conf.j2
Normal file
81
cluster_metrics/templates/influxdb.conf.j2
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
# {{ ansible_managed }}
|
||||||
|
reporting-disabled = false
|
||||||
|
|
||||||
|
[logging]
|
||||||
|
level = "info"
|
||||||
|
|
||||||
|
[meta]
|
||||||
|
dir = "/var/lib/influxdb/meta"
|
||||||
|
retention-autocreate = true
|
||||||
|
logging-enabled = true
|
||||||
|
pprof-enabled = false
|
||||||
|
lease-duration = "1m0s"
|
||||||
|
|
||||||
|
[data]
|
||||||
|
enabled = true
|
||||||
|
dir = "/var/lib/influxdb/data"
|
||||||
|
wal-dir = "/var/lib/influxdb/wal"
|
||||||
|
wal-logging-enabled = true
|
||||||
|
query-log-enabled = false
|
||||||
|
cache-max-memory-size = 679477248
|
||||||
|
cache-snapshot-memory-size = 28311552
|
||||||
|
cache-snapshot-write-cold-duration = "1h0m0s"
|
||||||
|
compact-full-write-cold-duration = "24h0m0s"
|
||||||
|
max-points-per-block = 0
|
||||||
|
data-logging-enabled = false
|
||||||
|
|
||||||
|
[cluster]
|
||||||
|
shard-writer-timeout = "8s" # The time within which a remote shard must respond to a write request.
|
||||||
|
write-timeout = "16s" # The time within which a write request must complete on the cluster.
|
||||||
|
max-concurrent-queries = 0 # The maximum number of concurrent queries that can run. 0 to disable.
|
||||||
|
query-timeout = "0s" # The time within a query must complete before being killed automatically. 0s to disable.
|
||||||
|
max-select-point = 0 # The maximum number of points to scan in a query. 0 to disable.
|
||||||
|
max-select-series = 0 # The maximum number of series to select in a query. 0 to disable.
|
||||||
|
max-select-buckets = 0 # The maximum number of buckets to select in an aggregate query. 0 to disable.
|
||||||
|
|
||||||
|
[retention]
|
||||||
|
enabled = true
|
||||||
|
check-interval = "32m"
|
||||||
|
|
||||||
|
[shard-precreation]
|
||||||
|
enabled = true
|
||||||
|
check-interval = "16m"
|
||||||
|
advance-period = "32m"
|
||||||
|
|
||||||
|
[monitor]
|
||||||
|
store-enabled = true # Whether to record statistics internally.
|
||||||
|
store-database = "_internal" # The destination database for recorded statistics
|
||||||
|
store-interval = "16s" # The interval at which to record statistics
|
||||||
|
|
||||||
|
[admin]
|
||||||
|
enabled = true
|
||||||
|
bind-address = ":{{ influxdb_admin_port }}"
|
||||||
|
https-enabled = false
|
||||||
|
https-certificate = "/etc/ssl/influxdb.pem"
|
||||||
|
|
||||||
|
[http]
|
||||||
|
enabled = true
|
||||||
|
bind-address = ":{{ influxdb_port }}"
|
||||||
|
auth-enabled = false
|
||||||
|
log-enabled = false
|
||||||
|
write-tracing = false
|
||||||
|
pprof-enabled = false
|
||||||
|
https-enabled = false
|
||||||
|
https-certificate = "/etc/ssl/influxdb.pem"
|
||||||
|
max-row-limit = 10240
|
||||||
|
|
||||||
|
[[graphite]]
|
||||||
|
enabled = false
|
||||||
|
|
||||||
|
[[collectd]]
|
||||||
|
enabled = false
|
||||||
|
|
||||||
|
[[opentsdb]]
|
||||||
|
enabled = false
|
||||||
|
|
||||||
|
[[udp]]
|
||||||
|
enabled = false
|
||||||
|
|
||||||
|
[continuous_queries]
|
||||||
|
log-enabled = false
|
||||||
|
enabled = true
|
67
cluster_metrics/templates/telegraf.conf.j2
Normal file
67
cluster_metrics/templates/telegraf.conf.j2
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
[global_tags]
|
||||||
|
{% if inventory_hostname in groups['all_containers'] %}
|
||||||
|
node_type = "container"
|
||||||
|
{% elif inventory_hostname in groups['hosts'] %}
|
||||||
|
node_type = "physical_host"
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
[agent]
|
||||||
|
interval = "24s"
|
||||||
|
round_interval = false
|
||||||
|
metric_batch_size = 1024
|
||||||
|
metric_buffer_limit = 10240
|
||||||
|
collection_jitter = "8s"
|
||||||
|
flush_interval = "48s"
|
||||||
|
flush_jitter = "8s"
|
||||||
|
debug = false
|
||||||
|
quiet = true
|
||||||
|
{% if inventory_hostname in groups['all_containers'] %}
|
||||||
|
hostname = "{{ ansible_hostname }}"
|
||||||
|
{% else %}
|
||||||
|
hostname = "{{ inventory_hostname }}"
|
||||||
|
{% endif %}
|
||||||
|
omit_hostname = false
|
||||||
|
|
||||||
|
[[outputs.influxdb]]
|
||||||
|
urls = ["http://{{ hostvars[groups['cluster-metrics'][0]]['ansible_ssh_host'] }}:{{ influxdb_port }}"]
|
||||||
|
database = "{{ influxdb_db_name }}"
|
||||||
|
precision = "s"
|
||||||
|
write_consistency = "any"
|
||||||
|
timeout = "5s"
|
||||||
|
|
||||||
|
[[inputs.processes]]
|
||||||
|
|
||||||
|
[[inputs.system]]
|
||||||
|
|
||||||
|
{% if inventory_hostname in groups['all_containers'] %}
|
||||||
|
[[inputs.net]]
|
||||||
|
|
||||||
|
{% elif inventory_hostname in groups['hosts'] %}
|
||||||
|
[[inputs.cpu]]
|
||||||
|
percpu = true
|
||||||
|
totalcpu = true
|
||||||
|
fielddrop = ["time_*"]
|
||||||
|
|
||||||
|
[[inputs.net]]
|
||||||
|
|
||||||
|
[[inputs.netstat]]
|
||||||
|
|
||||||
|
[[inputs.disk]]
|
||||||
|
ignore_fs = ["tmpfs", "devtmpfs"]
|
||||||
|
|
||||||
|
[[inputs.diskio]]
|
||||||
|
|
||||||
|
[[inputs.kernel]]
|
||||||
|
|
||||||
|
[[inputs.mem]]
|
||||||
|
|
||||||
|
[[inputs.swap]]
|
||||||
|
|
||||||
|
{% if inventory_hostname in groups['nova_compute'] %}
|
||||||
|
[[inputs.exec]]
|
||||||
|
commands = ["/opt/telegraf/kvm_virsh.py"]
|
||||||
|
timeout = "15s"
|
||||||
|
data_format = "json"
|
||||||
|
name_prefix = "custom_"
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
34
cluster_metrics/vars.yml
Normal file
34
cluster_metrics/vars.yml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
---
|
||||||
|
# Copyright 2016, Rackspace US, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
# Grafana vars
|
||||||
|
grafana_port: 8089
|
||||||
|
grafana_db_name: grafana
|
||||||
|
grafana_db_user: grafana
|
||||||
|
grafana_db_password: secrete
|
||||||
|
grafana_admin_password: SuperSecrete
|
||||||
|
|
||||||
|
# InfluxDB vars
|
||||||
|
influxdb_admin_port: 8083
|
||||||
|
influxdb_port: 8086
|
||||||
|
influxdb_db_name: telegraf
|
||||||
|
influxdb_db_retention: 90d
|
||||||
|
influxdb_db_retention_policy: openstack
|
||||||
|
influxdb_db_replication: 1
|
||||||
|
influxdb_db_root_name: root
|
||||||
|
influxdb_db_root_password: SuperSecrete
|
||||||
|
influxdb_db_metric_user: openstack
|
||||||
|
influxdb_db_metric_password: SuperDuperSecrete
|
Loading…
Reference in New Issue
Block a user