From 15f2fdcd5dd9e63b477c549c2af990df8235ca90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Weing=C3=A4rtner?= <rafael@apache.org>
Date: Fri, 26 Mar 2021 14:19:39 -0300
Subject: [PATCH] Make setup module arguments configurable

Ansible facts can have a large impact on the performance of the Ansible
control host. This patch introduces some control over which facts are
gathered (kolla_ansible_setup_gather_subset) and which facts are stored
(kolla_ansible_setup_filter). By default we do not change the default
values of these arguments to the setup module. The flexibility of these
arguments is limited, but they do provide enough for a large performance
improvement in a typical moderate to large OpenStack cloud.

In particular, the large complex dict fact for each interface has a
large effect, and on an OpenStack controller or hypervisor there may be
many virtual interfaces. We can use the kolla_ansible_setup_filter
variable to help:

    kolla_ansible_setup_filter: 'ansible_[!qt]*'

This causes Ansible to collect but not store facts matching that
pattern, which includes the virtual interface facts. Currently we are
not referencing other facts matching the pattern within Kolla Ansible.
Note that including the 'ansible_' prefix causes meta facts module_setup
and gather_subset to be filtered, but this seems to be the only way to
get a good match on the interface facts. To work around this, we use
ansible_facts rather than module_setup to detect whether facts exist in
the cache.

The exact improvement will vary, but has been reported to be as large as
18x on systems with many virtual interfaces.

For reference, here are some other tunings tried:

* Increased the number of forks (great speedup depending of the size of
  the deployment)
* Use `strategy = mitogen_linear` (cut processing time in half)
* Ansible caching (little speed up)
* SSH tunning (little speed up)

Co-Authored-By: Mark Goddard <mark@stackhpc.com>
Closes-Bug: #1921538
Change-Id: Iae8ca4aae945892f1dc65e1b10381d2e26e88805
---
 ansible/gather-facts.yml                      | 13 ++++++-
 ansible/group_vars/all.yml                    | 15 ++++++++
 doc/source/user/ansible-tuning.rst            | 37 +++++++++++++++++++
 etc/kolla/globals.yml                         | 16 ++++++++
 .../setup-module-args-c29e1815bbbe8aca.yaml   |  8 ++++
 5 files changed, 87 insertions(+), 2 deletions(-)
 create mode 100644 releasenotes/notes/setup-module-args-c29e1815bbbe8aca.yaml

diff --git a/ansible/gather-facts.yml b/ansible/gather-facts.yml
index 7cb6de6db8..0c7c792982 100644
--- a/ansible/gather-facts.yml
+++ b/ansible/gather-facts.yml
@@ -5,8 +5,15 @@
 - name: Gather facts for all hosts
   hosts: all
   serial: '{{ kolla_serial|default("0") }}'
-  gather_facts: true
+  gather_facts: false
   tasks:
+    - name: Gather facts
+      setup:
+        filter: "{{ kolla_ansible_setup_filter }}"
+        gather_subset: "{{ kolla_ansible_setup_gather_subset }}"
+      when:
+        - not ansible_facts
+
     - name: Group hosts to determine when using --limit
       group_by:
         key: "all_using_limit_{{ (ansible_play_batch | length) != (groups['all'] | length) }}"
@@ -32,10 +39,12 @@
   tasks:
     - name: Gather facts
       setup:
+        filter: "{{ kolla_ansible_setup_filter }}"
+        gather_subset: "{{ kolla_ansible_setup_gather_subset }}"
       delegate_facts: True
       delegate_to: "{{ item }}"
       with_items: "{{ delegate_hosts }}"
       # We gathered facts for all hosts in the batch during the first play.
       when:
-        - not hostvars[item].ansible_facts.module_setup | default(false)
+        - not hostvars[item].ansible_facts
   tags: always
diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml
index 2b9d8a8650..dd8cb88ccf 100644
--- a/ansible/group_vars/all.yml
+++ b/ansible/group_vars/all.yml
@@ -20,6 +20,21 @@ node_config_directory: "/etc/kolla"
 config_owner_user: "root"
 config_owner_group: "root"
 
+###################
+# Ansible options
+###################
+
+# This variable is used as the "filter" argument for the setup module.  For
+# instance, if one wants to remove/ignore all Neutron interface facts:
+# kolla_ansible_setup_filter: "ansible_[!qt]*"
+# By default, we do not provide a filter.
+kolla_ansible_setup_filter: "{{ omit }}"
+
+# This variable is used as the "gather_subset" argument for the setup module.
+# For instance, if one wants to avoid collecting facts via facter:
+# kolla_ansible_setup_gather_subset: "all,!facter"
+# By default, we do not provide a gather subset.
+kolla_ansible_setup_gather_subset: "{{ omit }}"
 
 ###################
 # Kolla options
diff --git a/doc/source/user/ansible-tuning.rst b/doc/source/user/ansible-tuning.rst
index a9f50e6210..e8f39ac0c1 100644
--- a/doc/source/user/ansible-tuning.rst
+++ b/doc/source/user/ansible-tuning.rst
@@ -83,3 +83,40 @@ disable fact variable injection.
 
    [defaults]
    inject_facts_as_vars = False
+
+Fact filtering
+--------------
+
+Ansible facts filtering can be used to speed up Ansible.  Environments with
+many network interfaces on the network and compute nodes can experience very
+slow processing with Kolla Ansible. This happens due to the processing of the
+large per-interface facts with each task.  To avoid storing certain facts, we
+can use the ``kolla_ansible_setup_filter`` variable, which is used as the
+``filter`` argument to the ``setup`` module. For example, to avoid collecting
+facts for virtual interfaces beginning with q or t:
+
+.. code-block:: yaml
+
+   kolla_ansible_setup_filter: "ansible_[!qt]*"
+
+This causes Ansible to collect but not store facts matching that pattern, which
+includes the virtual interface facts. Currently we are not referencing other
+facts matching the pattern within Kolla Ansible.  Note that including the
+``ansible_`` prefix causes meta facts ``module_setup`` and ``gather_subset`` to
+be filtered, but this seems to be the only way to get a good match on the
+interface facts.
+
+The exact improvement will vary, but has been reported to be as large as 18x on
+systems with many virtual interfaces.
+
+Fact gathering subsets
+----------------------
+
+It is also possible to configure which subsets of facts are gathered, via
+``kolla_ansible_setup_gather_subset``, which is used as the ``gather_subset``
+argument to the ``setup`` module. For example, if one wants to avoid collecting
+facts via facter:
+
+.. code-block:: yaml
+
+   kolla_ansible_setup_gather_subset: "all,!facter"
diff --git a/etc/kolla/globals.yml b/etc/kolla/globals.yml
index ed8fb12a4a..98cc3eb55b 100644
--- a/etc/kolla/globals.yml
+++ b/etc/kolla/globals.yml
@@ -5,6 +5,22 @@
 # commented parameters are shown here, To override the default value uncomment
 # the parameter and change its value.
 
+###################
+# Ansible options
+###################
+
+# This variable is used as the "filter" argument for the setup module.  For
+# instance, if one wants to remove/ignore all Neutron interface facts:
+# kolla_ansible_setup_filter: "ansible_[!qt]*"
+# By default, we do not provide a filter.
+#kolla_ansible_setup_filter: "{{ omit }}"
+
+# This variable is used as the "gather_subset" argument for the setup module.
+# For instance, if one wants to avoid collecting facts via facter:
+# kolla_ansible_setup_gather_subset: "all,!facter"
+# By default, we do not provide a gather subset.
+#kolla_ansible_setup_gather_subset: "{{ omit }}"
+
 ###############
 # Kolla options
 ###############
diff --git a/releasenotes/notes/setup-module-args-c29e1815bbbe8aca.yaml b/releasenotes/notes/setup-module-args-c29e1815bbbe8aca.yaml
new file mode 100644
index 0000000000..ee37028934
--- /dev/null
+++ b/releasenotes/notes/setup-module-args-c29e1815bbbe8aca.yaml
@@ -0,0 +1,8 @@
+---
+features:
+  - |
+    Adds support for configuring the ``filter`` and ``gather_subset`` arguments
+    for the ``setup`` module via ``kolla_ansible_setup_filter`` and
+    ``kolla_ansible_setup_gather_subset`` respectively. These can be used to
+    reduce the number of facts, which can have a significant effect on
+    performance of Ansible.