From e65886681170b7106fa58fc65816a954d89331af Mon Sep 17 00:00:00 2001 From: Mohammed Naser Date: Mon, 22 Aug 2022 23:18:13 -0400 Subject: [PATCH] rabbitmq: monitoring and reliablity improvements Sem-Ver: bugfix Change-Id: I655a6e5237ee0dc98547b5e8b4fa146a020f5606 --- ...abbitmq-improvements-875277bea9dfc9bb.yaml | 7 ++++++ roles/rabbitmq/tasks/main.yml | 3 +++ roles/rabbitmq_operator/tasks/main.yml | 22 +++++++++++++++++-- 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml diff --git a/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml b/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml new file mode 100644 index 0000000..b38d4f2 --- /dev/null +++ b/releasenotes/notes/rabbitmq-improvements-875277bea9dfc9bb.yaml @@ -0,0 +1,7 @@ +--- +features: + - Added additional monitoring to RabbitMQ in order to detect and alert on + alarms raised by it such as memory, etc. +fixes: + - Switch RabbitmqConnections to a more reliable solution that can avoid + alerting on larger scale clouds. diff --git a/roles/rabbitmq/tasks/main.yml b/roles/rabbitmq/tasks/main.yml index 8e51bfe..b650123 100644 --- a/roles/rabbitmq/tasks/main.yml +++ b/roles/rabbitmq/tasks/main.yml @@ -31,6 +31,9 @@ operator: In values: - enabled + rabbitmq: + additionalConfig: | + vm_memory_high_watermark.relative = 0.9 resources: requests: cpu: 500m diff --git a/roles/rabbitmq_operator/tasks/main.yml b/roles/rabbitmq_operator/tasks/main.yml index 0a8ce52..5124b24 100644 --- a/roles/rabbitmq_operator/tasks/main.yml +++ b/roles/rabbitmq_operator/tasks/main.yml @@ -129,6 +129,20 @@ "(.*)" ) ) + - name: alarms + rules: + - alert: RabbitmqAlarmFreeDiskSpace + expr: rabbitmq_alarms_free_disk_space_watermark == 1 + labels: + severity: critical + - alert: RabbitmqAlarmMemoryUsedWatermark + expr: rabbitmq_alarms_memory_used_watermark == 1 + labels: + severity: critical + - alert: RabbitmqAlarmFileDescriptorLimit + expr: rabbitmq_alarms_file_descriptor_limit == 1 + labels: + severity: critical - name: limits rules: - alert: RabbitmqMemoryHigh @@ -147,10 +161,14 @@ expr: rabbitmq_process_open_fds / rabbitmq_process_max_fds > 0.95 labels: severity: critical - - alert: RabbitmqConnections - expr: rabbitmq_connections > 1000 + - alert: RabbitmqTcpSocketsUsage + expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.80 labels: severity: warning + - alert: RabbitmqTcpSocketsUsage + expr: rabbitmq_process_open_tcp_sockets / rabbitmq_process_max_tcp_sockets > 0.95 + labels: + severity: critical - name: msgs rules: - alert: RabbitmqUnackedMessages