Update curator to better metric storage
Now that the roll-up has been implemented the original shrink method is no longer required or useful. This change cleans up things up. Change-Id: I24fd5b4daafc2f48ee5a3421f6b58b157a7aff6c Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
This commit is contained in:
parent
5f2fb9b022
commit
6da0fca375
@ -32,5 +32,5 @@
|
||||
state: "started"
|
||||
options:
|
||||
OnBootSec: 30min
|
||||
OnUnitActiveSec: 24h
|
||||
OnUnitActiveSec: 6h
|
||||
Persistent: true
|
||||
|
@ -17,7 +17,7 @@
|
||||
cron:
|
||||
name: "Run curator"
|
||||
minute: 0
|
||||
hour: 2
|
||||
hour: */6
|
||||
user: "curator"
|
||||
job: "/opt/elasticsearch-curator/bin/curator --config /var/lib/curator/curator.yml /var/lib/curator/actions.yml"
|
||||
cron_file: "elasticsearch-curator"
|
||||
|
@ -17,11 +17,16 @@
|
||||
{# Delete index loop #}
|
||||
{% for key in elastic_beat_retention_policy_keys -%}
|
||||
{% set delete_indices = {} -%}
|
||||
{% set index_retention = hostvars[inventory_hostname]['elastic_' + key + '_retention'] -%}
|
||||
{# Total retention size in days #}
|
||||
{% set _index_retention = hostvars[inventory_hostname]['elastic_' + key + '_retention'] -%}
|
||||
{% set index_retention = ((_index_retention | int) > 0) | ternary(_index_retention, 1) | int %}
|
||||
{# Total retention size in gigabytes #}
|
||||
{% set _index_size = ((hostvars[inventory_hostname]['elastic_' + key + '_size'] | int) // 1024) -%}
|
||||
{% set index_size = ((_index_size | int) > 0) | ternary(_index_size, 1) | int %}
|
||||
{% set _ = delete_indices.update(
|
||||
{
|
||||
'action': 'delete_indices',
|
||||
'description': 'Prune indices for ' + key + ' after ' ~ (index_retention | int) ~ ' days.',
|
||||
'description': 'Prune indices for ' + key + ' after ' ~ index_retention ~ ' days or index is > ' ~ index_size ~ 'gb',
|
||||
'options': {
|
||||
'ignore_empty_list': true,
|
||||
'disable_action': false
|
||||
@ -29,7 +34,6 @@
|
||||
}
|
||||
)
|
||||
-%}
|
||||
{# add the filter loop #}
|
||||
{% set filters = [] -%}
|
||||
{% set _ = filters.append(
|
||||
{
|
||||
@ -39,6 +43,15 @@
|
||||
}
|
||||
)
|
||||
-%}
|
||||
{% set _ = filters.append(
|
||||
{
|
||||
'filtertype': 'space',
|
||||
'disk_space': index_size,
|
||||
'use_age': true,
|
||||
'source': 'creation_date'
|
||||
}
|
||||
)
|
||||
-%}
|
||||
{% set _ = filters.append(
|
||||
{
|
||||
'filtertype': 'age',
|
||||
@ -46,72 +59,12 @@
|
||||
'direction': 'older',
|
||||
'timestring': '%Y.%m.%d',
|
||||
'unit': 'days',
|
||||
'unit_count': (index_retention | int)
|
||||
'unit_count': index_retention
|
||||
}
|
||||
)
|
||||
-%}
|
||||
{% set _ = delete_indices.update({'filters': filters}) -%}
|
||||
{% set _ = action_items.append(delete_indices) -%}
|
||||
|
||||
{# Set shrink curator options #}
|
||||
{% set shrink_indices = {} -%}
|
||||
{% set _ = shrink_indices.update(
|
||||
{
|
||||
'action': 'shrink',
|
||||
'description': 'Shrink ' + key + ' indices older than ' ~ (index_retention | int) // 4 ~ ' days',
|
||||
'options': {
|
||||
"disable_action": false,
|
||||
"ignore_empty_list": true,
|
||||
"shrink_node": "DETERMINISTIC",
|
||||
"node_filters": {
|
||||
"permit_masters": ((master_nodes | length) < (data_nodes | length)) | ternary(true, false),
|
||||
"exclude_nodes": (groups['kibana'] | map('extract', hostvars, 'ansible_host') | list)
|
||||
},
|
||||
"number_of_shards": 1,
|
||||
"number_of_replicas": (elasticsearch_number_of_replicas | int),
|
||||
"shrink_suffix": '-shrink',
|
||||
"copy_aliases": true,
|
||||
"delete_after": true,
|
||||
"post_allocation": {
|
||||
"allocation_type": "include",
|
||||
"key": "node_tag",
|
||||
"value": "cold"
|
||||
},
|
||||
"wait_for_active_shards": 1,
|
||||
"extra_settings": {
|
||||
"settings": {
|
||||
"index.codec": "best_compression"
|
||||
}
|
||||
},
|
||||
"wait_for_completion": true,
|
||||
"wait_for_rebalance": true,
|
||||
"wait_interval": 9,
|
||||
"max_wait": -1
|
||||
}
|
||||
}
|
||||
)
|
||||
-%}
|
||||
{% set filters = [] -%}
|
||||
{% set _ = filters.append(
|
||||
{
|
||||
'filtertype': 'pattern',
|
||||
'kind': 'prefix',
|
||||
'value': key + '-'
|
||||
}
|
||||
)
|
||||
-%}
|
||||
{% set _ = filters.append(
|
||||
{
|
||||
'filtertype': 'age',
|
||||
'source': 'creation_date',
|
||||
'direction': 'older',
|
||||
'unit': 'days',
|
||||
'unit_count': (index_retention | int) // 4
|
||||
}
|
||||
)
|
||||
-%}
|
||||
{% set _ = shrink_indices.update({'filters': filters}) -%}
|
||||
{% set _ = action_items.append(shrink_indices) -%}
|
||||
{% endfor -%}
|
||||
|
||||
{% set actions = {} -%}
|
||||
|
@ -16,7 +16,9 @@
|
||||
elastic_index_retention_algorithm: default
|
||||
|
||||
### Elastic curator variables
|
||||
## Default retention policy options. All retention options are in days.
|
||||
## If any of these retention policy option are undefined a dynamic fact will be
|
||||
## generated.
|
||||
## These options are all in days.
|
||||
# elastic_logstash_retention: 1
|
||||
# elastic_apm_retention: 1
|
||||
# elastic_auditbeat_retention: 1
|
||||
@ -26,25 +28,69 @@ elastic_index_retention_algorithm: default
|
||||
# elastic_metricbeat_retention: 1
|
||||
# elastic_packetbeat_retention: 1
|
||||
|
||||
# This is used to calculate the storage a beat could generate per node, per day.
|
||||
# This constant is used as a multiplier. If the expected storage is larger than
|
||||
# the actual available storage after the buffer is calculated the multiplier
|
||||
# will be doubled there-by cutting the potential storage days in half.
|
||||
elastic_beat_storage_constant: 512
|
||||
## These options are all in megabytes.
|
||||
# elastic_logstash_size: 1024
|
||||
# elastic_apm_size: 1024
|
||||
# elastic_auditbeat_size: 1024
|
||||
# elastic_filebeat_size: 1024
|
||||
# elastic_heartbeat_size: 1024
|
||||
# elastic_journalbeat_size: 1024
|
||||
# elastic_metricbeat_size: 1024
|
||||
# elastic_packetbeat_size: 1024
|
||||
|
||||
## WHen a static retention policy option is not defined these options will be
|
||||
## used for dynamic fact generation.
|
||||
##
|
||||
## Facts will be generated for the general retention using the total available
|
||||
## storage from the ES data nodes, subtracting 25%. Using the weights, each
|
||||
## index will be given a percentage of the total available storage. Indexes with
|
||||
## higher weights are expected to use more storage. The list of hosts in a given
|
||||
## index will be used to determine the number of days data can exist within an
|
||||
## index before it's pruned.
|
||||
|
||||
## Example:
|
||||
# es cluster has 4TiB of storage
|
||||
# filebeat is deployed to 100 hosts
|
||||
# filebeat has a weight of 10
|
||||
# metricbeat is deployed to 125 hosts
|
||||
# metricbeat has a weight of 2
|
||||
#
|
||||
# es storage in MiB: 4194304
|
||||
# hosts and weighting total: (100 + 125) x (10 + 2) = 2700
|
||||
# filebeat pct: (100 x 10) / 2700 = 0.37
|
||||
# filebeat storage allowed: 0.37 * 4194304 = 1551892.48 MiB
|
||||
# filebeat days allowed: 1551892.48 / (100 * 1024) = 15.1552 Days
|
||||
# filebeat result: 15 days of retention or 1.5TiB of storage, whatever comes first
|
||||
# metricbeat pct: (125 x 2) / 2700 = 0.09
|
||||
# metricbeat storage allowed: 0.09 * 4194304 = 377487.36 MiB
|
||||
# metricbeat days allowed: 377487.36 / (125 * 1024) = 2.94912 Days
|
||||
# metricbeat result: 2 days of retention or 38GiB of storage, whatever comes first
|
||||
|
||||
## If any retention policy option is undefined a dynamic fact will be generated.
|
||||
## Fact will be generated for the general retention using the storage constant
|
||||
## per node, per index, where a given collector is expected to be deployed. The
|
||||
## equation used will take the total available storage from the ES data nodes
|
||||
## subtract 25% divided by the total number of data nodes. That is then divided
|
||||
## by number of hosts assumed to be a beat target which is multiplied by the
|
||||
## storage constant.
|
||||
elastic_beat_retention_policy_hosts:
|
||||
logstash: "{{ groups['elastic-logstash'] | default([null]) | length }}"
|
||||
apm: "{{ groups['apm-server'] | default([null]) | length }}"
|
||||
auditbeat: "{{ (groups['hosts'] | default([null]) | length) * 2 }}"
|
||||
filebeat: "{{ (groups['hosts'] | default([null]) | length) * 2 }}"
|
||||
heartbeat: "{{ groups['kibana'][:3] | default([null]) | length }}"
|
||||
journalbeat: "{{ (groups['all'] | default([null]) | length) * 1.5 }}"
|
||||
metricbeat: "{{ (groups['all'] | default([null]) | length) * 1.5 }}"
|
||||
packetbeat: "{{ (groups['hosts'] | default([null]) | length) * 5 }}"
|
||||
logstash:
|
||||
weight: 1
|
||||
hosts: "{{ groups['elastic-logstash'] | default([]) }}"
|
||||
apm:
|
||||
weight: 1
|
||||
hosts: "{{ groups['apm-server'] | default([]) }}"
|
||||
auditbeat:
|
||||
weight: 10
|
||||
hosts: "{{ groups['hosts'] | default([]) }}"
|
||||
filebeat:
|
||||
weight: 10
|
||||
hosts: "{{ groups['hosts'] | default([]) }}"
|
||||
syslog:
|
||||
weight: 1
|
||||
hosts: "{{ groups['hosts'] | default([]) }}"
|
||||
heartbeat:
|
||||
weight: 1
|
||||
hosts: "{{ groups['kibana'][:3] | default([]) }}"
|
||||
journalbeat:
|
||||
weight: 3
|
||||
hosts: "{{ groups['all'] | default([]) }}"
|
||||
metricbeat:
|
||||
weight: 2
|
||||
hosts: "{{ groups['all'] | default([]) }}"
|
||||
packetbeat:
|
||||
weight: 1
|
||||
hosts: "{{ groups['hosts'] | default([]) }}"
|
||||
|
@ -18,21 +18,32 @@
|
||||
url: "http://{{ coordination_nodes[0] }}/_nodes/{{ (data_nodes | map('extract', hostvars, 'ansible_host') | list) | join(',') }}/stats/fs"
|
||||
method: GET
|
||||
register: elk_data
|
||||
until: elk_data is success
|
||||
until:
|
||||
- elk_data is success and elk_data['json'] is defined
|
||||
retries: 5
|
||||
delay: 5
|
||||
delay: 10
|
||||
run_once: true
|
||||
|
||||
- name: Load data node variables
|
||||
- name: Set retention keys fact
|
||||
set_fact:
|
||||
es_storage_json: "{{ elk_data['json'] }}"
|
||||
|
||||
- name: Load retention algo variables
|
||||
include_vars: "calculate_index_retention_{{ elastic_index_retention_algorithm }}.yml"
|
||||
tags:
|
||||
- always
|
||||
|
||||
- name: Set retention facts
|
||||
set_fact: "elastic_{{ item.key }}_retention={{ (es_assumed_usable_storage_per_node | int) // ((item.value | int) * (es_storage_multiplier | int)) }}"
|
||||
- name: Set retention facts (mb size)
|
||||
set_fact: "elastic_{{ item.key }}_size={{ item.value }}"
|
||||
when:
|
||||
- hostvars[inventory_hostname]["elastic_" + item.key + "_retention"] is undefined
|
||||
with_dict: "{{ elastic_beat_retention_policy_hosts }}"
|
||||
- hostvars[inventory_hostname]["elastic_" ~ item.key ~ "_size"] is undefined
|
||||
with_dict: "{{ es_storage_per_index }}"
|
||||
|
||||
- name: Set retention facts (days)
|
||||
set_fact: "elastic_{{ item.key }}_retention={{ item.value }}"
|
||||
when:
|
||||
- hostvars[inventory_hostname]["elastic_" ~ item.key ~ "_retention"] is undefined
|
||||
with_dict: "{{ es_days_per_index }}"
|
||||
|
||||
- name: Set retention keys fact
|
||||
set_fact:
|
||||
|
@ -13,18 +13,46 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Set available storage fact
|
||||
es_total_available_storage: "{{ ((elk_data['json']['nodes'].values() | list) | map(attribute='fs.total.total_in_bytes') | list | sum) // 1024 // 1024 }}"
|
||||
# Set available storage fact. This tasks the total amount of storage found
|
||||
# within the data nodes of the elasticsearch cluster and converts bytes to
|
||||
# megabytes.
|
||||
es_total_available_storage: "{{ ((es_storage_json['nodes'].values() | list) | map(attribute='fs.total.total_in_bytes') | list | sum) // 1024 // 1024 }}"
|
||||
|
||||
# Set assumed buffer storage fact
|
||||
# Set assumed buffer storage fact. This will result in 25% of the total
|
||||
# available storage.
|
||||
es_assumed_buffer_storage: "{{ ((es_total_available_storage | int) * 0.25) | round | int }}"
|
||||
|
||||
# Set usable buffer storage fact(s)
|
||||
# Set usable buffer storage fact(s). This is the toal storage minus the buffer.
|
||||
es_usable_buffer_storage: "{{ (es_total_available_storage | int) - (es_assumed_buffer_storage | int) }}"
|
||||
es_expected_storage: "{{ ((elastic_beat_retention_policy_hosts.values() | map('int') | list) | sum) * (elastic_beat_storage_constant | int) }}"
|
||||
|
||||
# Set buffer storage fact
|
||||
es_assumed_usable_storage_per_node: "{{ (es_usable_buffer_storage | int) // (data_nodes | length | int) }}"
|
||||
# This function will take the sum total of all hosts in the retention policy
|
||||
# after weighting. Once the policy is set the sum total will be carved up into
|
||||
# individual percentages of the total amount of usable storage after the buffer
|
||||
# is calculated.
|
||||
es_storage_per_index: |-
|
||||
{%- set es_hash = {} %}
|
||||
{%- set total_weight = (elastic_beat_retention_policy_hosts.values() | list | map(attribute='weight') | list | sum) %}
|
||||
{%- set host_count = (elastic_beat_retention_policy_hosts.values() | list | map(attribute='hosts') | list | map('flatten') | list | length) %}
|
||||
{%- set total_values = (total_weight | int) * (host_count | int) %}
|
||||
{%- for key, value in elastic_beat_retention_policy_hosts.items() %}
|
||||
{%- set value_pct = (((value.weight | int) * (value.hosts | length)) / (total_values | int)) %}
|
||||
{%- set value_total = ((value_pct | float) * (es_usable_buffer_storage | int)) %}
|
||||
{%- set _ = es_hash.__setitem__(key, value_total | int) %}
|
||||
{%- endfor %}
|
||||
{{ es_hash }}
|
||||
|
||||
# Set storage the mulitplier
|
||||
es_storage_multiplier: "{{ ((es_usable_buffer_storage | int) < (es_expected_storage | int)) | ternary(((elastic_beat_storage_constant | int) * 2), elastic_beat_storage_constant | int) }}"
|
||||
# The assumed number of days an index will be retained is based on the size of
|
||||
# the given index. With the sizes all figured out in the function above this
|
||||
# function will divide each retention size be a constant of 1024 and the number
|
||||
# of hosts within a given collector segment.
|
||||
es_days_per_index: |-
|
||||
{%- set es_hash = {} %}
|
||||
{%- for key, value in elastic_beat_retention_policy_hosts.items() %}
|
||||
{%- if (es_storage_per_index[key] | int) > 0 %}
|
||||
{%- set value_days = ((es_storage_per_index[key] | int) // ((value.hosts | length) * 1024)) %}
|
||||
{%- set _ = es_hash.__setitem__(key, ((value_days | int) > 0) | ternary(value_days, 1) ) %}
|
||||
{%- else %}
|
||||
{%- set _ = es_hash.__setitem__(key, 1) %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{{ es_hash }}
|
||||
|
@ -37,16 +37,21 @@
|
||||
|
||||
- name: Create rollup block
|
||||
block:
|
||||
- name: Set retention days fact
|
||||
- name: Set min retention days fact
|
||||
set_fact:
|
||||
days_until_rollup: |-
|
||||
min_days_until_rollup: |-
|
||||
{% set index_retention = [] %}
|
||||
{% for item in ansible_play_hosts %}
|
||||
{% set _ = index_retention.append((hostvars[item]['elastic_' + index_name + '_retention'] | int) // 3) %}
|
||||
{% set _ = index_retention.append(hostvars[item]['elastic_' + index_name + '_retention'] | int) %}
|
||||
{% endfor %}
|
||||
{{ index_retention | min }}
|
||||
run_once: true
|
||||
|
||||
- name: Set retention days fact
|
||||
set_fact:
|
||||
days_until_rollup: "{{ ((min_days_until_rollup | int) > 1) | ternary(((min_days_until_rollup | int) - 1), min_days_until_rollup) }}"
|
||||
run_once: true
|
||||
|
||||
- name: Create rollup job
|
||||
uri:
|
||||
url: "{{ item.url }}"
|
||||
@ -59,7 +64,7 @@
|
||||
retries: 5
|
||||
delay: 5
|
||||
when:
|
||||
- hostvars[inventory_hostname]['elastic_' + index_name + '_retention'] > days_until_rollup
|
||||
- (days_until_rollup | int) > 0
|
||||
with_items:
|
||||
- url: "http://{{ coordination_nodes[0] }}/_xpack/rollup/job/rollup_{{ index_name }}/_stop"
|
||||
method: POST
|
||||
|
Loading…
Reference in New Issue
Block a user