From 70f6f8e4c02ed6a8687d2bd714d3fe0b9d04d84a Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Mon, 27 Apr 2020 10:59:06 +0100 Subject: [PATCH] Reduce RabbitMQ busy waiting, lowering CPU load On machines with many cores, we were seeing excessive CPU load on systems that were not very busy. With the following Erlang VM argument we saw RabbitMQ CPU usage drop from about 150% to around 20%, on a system with 40 hyperthreads. +S 2:2 By default RabbitMQ starts N schedulers where N is the number of CPU cores, including hyper-threaded cores. This is fine when you assume all your CPUs are dedicated to RabbitMQ. Its not a good idea in a typical Kolla Ansible setup. Here we go for two scheduler threads. More details can be found here: https://www.rabbitmq.com/runtime.html#scheduling and here: https://erlang.org/doc/man/erl.html#emulator-flags +sbwt none This stops busy waiting of the scheduler, for more details see: https://www.rabbitmq.com/runtime.html#busy-waiting Newer versions of rabbit may need additional flags: "+sbwt none +sbwtdcpu none +sbwtdio none" But this patch should be back portable to older versions of RabbitMQ used in Train and Stein. Note that information on this tuning was found by looking at data from: rabbitmq-diagnostics runtime_thread_stats More details on that can be found here: https://www.rabbitmq.com/runtime.html#thread-stats Related-Bug: #1846467 Change-Id: Iced014acee7e590c10848e73feca166f48b622dc --- ansible/roles/rabbitmq/defaults/main.yml | 2 +- .../reference/message-queues/rabbitmq.rst | 27 ++++++++++++++----- etc/kolla/globals.yml | 7 ++++- ...-rabbit-busy-waiting-085433c822165eab.yaml | 13 +++++++++ 4 files changed, 40 insertions(+), 9 deletions(-) create mode 100644 releasenotes/notes/reduce-rabbit-busy-waiting-085433c822165eab.yaml diff --git a/ansible/roles/rabbitmq/defaults/main.yml b/ansible/roles/rabbitmq/defaults/main.yml index 6f42e12a21..6b7cf89ae4 100644 --- a/ansible/roles/rabbitmq/defaults/main.yml +++ b/ansible/roles/rabbitmq/defaults/main.yml @@ -71,7 +71,7 @@ rabbitmq_user: "openstack" rabbitmq_cluster_name: "openstack" rabbitmq_hostname: "{{ ansible_hostname }}" rabbitmq_pid_file: "/var/lib/rabbitmq/mnesia/rabbitmq.pid" -rabbitmq_server_additional_erl_args: "" +rabbitmq_server_additional_erl_args: "+S 2:2 +sbwt none" # Dict of TLS options for RabbitMQ. Keys will be prefixed with 'ssl_options.'. rabbitmq_tls_options: {} # To avoid split-brain diff --git a/doc/source/reference/message-queues/rabbitmq.rst b/doc/source/reference/message-queues/rabbitmq.rst index 916df309c8..8259fcc43a 100644 --- a/doc/source/reference/message-queues/rabbitmq.rst +++ b/doc/source/reference/message-queues/rabbitmq.rst @@ -86,12 +86,25 @@ internal VIP. As such, traffic to this endpoint is encrypted when Passing arguments to RabbitMQ server's Erlang VM ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Erlang programs run in Erlang VM (virtual machine) and use Erlang runtime. -Erlang VM can be configured. +Erlang programs run in an Erlang VM (virtual machine) and use the Erlang +runtime. The Erlang VM can be configured. Kolla Ansible makes it possible to pass arguments to the Erlang VM via the -usage of ``rabbitmq_server_additional_erl_args`` variable. The contents of it -are appended to ``RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS`` environment variable -passed to RabbitMQ server startup script. Kolla Ansible already configures -RabbitMQ server for IPv6 (if necessary). Any argument can be passed there as -documented in https://www.rabbitmq.com/runtime.html +usage of the ``rabbitmq_server_additional_erl_args`` variable. The contents of +it are appended to the ``RABBITMQ_SERVER_ADDITIONAL_ERL_ARGS`` environment +variable which is passed to the RabbitMQ server startup script. Kolla Ansible +already configures RabbitMQ server for IPv6 (if necessary). Any argument can be +passed there as documented in https://www.rabbitmq.com/runtime.html + +The default value for ``rabbitmq_server_additional_erl_args`` is ``+S 2:2 +sbwt +none``. + +By default RabbitMQ starts N schedulers where N is the number of CPU cores, +including hyper-threaded cores. This is fine when you assume all CPUs are +dedicated to RabbitMQ. Its not a good idea in a typical Kolla Ansible setup. +Here we go for two scheduler threads (``+S 2:2``). More details can be found +here: https://www.rabbitmq.com/runtime.html#scheduling and here: +https://erlang.org/doc/man/erl.html#emulator-flags + +The ``+sbwt`` argument prevents busy waiting of the scheduler, for more details +see: https://www.rabbitmq.com/runtime.html#busy-waiting. diff --git a/etc/kolla/globals.yml b/etc/kolla/globals.yml index 00abf88819..dde0429c9d 100644 --- a/etc/kolla/globals.yml +++ b/etc/kolla/globals.yml @@ -396,7 +396,12 @@ # See Kolla Ansible docs RabbitMQ section for details. # These are appended to args already provided by Kolla Ansible # to configure IPv6 in RabbitMQ server. -#rabbitmq_server_additional_erl_args: "" +# More details can be found in the RabbitMQ docs: +# https://www.rabbitmq.com/runtime.html#scheduling +# https://www.rabbitmq.com/runtime.html#busy-waiting +# The default tells RabbitMQ to always use two cores (+S 2:2), +# and not to busy wait (+sbwt none): +#rabbitmq_server_additional_erl_args: "+S 2:2 +sbwt none" # Whether to enable TLS encryption for RabbitMQ client-server communication. #rabbitmq_enable_tls: "no" # CA certificate bundle in RabbitMQ container. diff --git a/releasenotes/notes/reduce-rabbit-busy-waiting-085433c822165eab.yaml b/releasenotes/notes/reduce-rabbit-busy-waiting-085433c822165eab.yaml new file mode 100644 index 0000000000..4f83fd1055 --- /dev/null +++ b/releasenotes/notes/reduce-rabbit-busy-waiting-085433c822165eab.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + Fixes an issue where RabbitMQ consumes a large amount of CPU, particularly + on multi-core systems. The default RabbitMQ tuning assumes that RabbitMQ + is running on a dedicated host, which is the opposite of a typical Kolla + Ansible container setup. For more details on tuning RabbitMQ in your + environment, please see: https://www.rabbitmq.com/runtime.html#busy-waiting + https://www.rabbitmq.com/runtime.html#scheduling +upgrade: + - | + Modifies the default value of ``rabbitmq_server_additional_erl_args`` from + an empty string to ``+S 2:2 +sbwt none``.