From 6b51d75b0cb4cbff64ad717bfdf0fbc9486e26bc Mon Sep 17 00:00:00 2001
From: Wesley Hershberger <wesley.hershberger@canonical.com>
Date: Fri, 8 Aug 2025 10:11:27 -0500
Subject: [PATCH] Reduce tune.ssl.cachesize for HTTPS terminating listeners

454cff5 introduced haproxy's `tune.ssl.cachesize` for TERMINATED_HTTPS
listeners. During a reload of haproxy the old worker process stays
running until the new worker process is ready. This means that two TLS
session caches are allocated/held simultaneously during a reload of the
service.

For small Amphorae, this works fine. The default connection limit is
50000, which takes enough of a chunk out of the 50% allocation that
there is enough wiggle room for the new haproxy worker to allocate its
cache and coexist with the old worker for some time.

However, for larger amphorae, the memory calculated for the session
cache approaches 50%.

haproxy allocates an additional 48 bytes for each 200 byte chunk, so
the total memory allocated exceeds 50% of the available memory,
triggering the OOM-killer on haproxy reload.

Out of an abundance of caution this also reduces the proportion of
memory Octavia considers "available" for the TLS session cache from 1/2
to 2/5.

Closes-Bug: #2119987
Change-Id: I91b6907c3e3e456860f7274153e0ecf030e0519e
Signed-off-by: Wesley Hershberger <wesley.hershberger@canonical.com>
---
 .../haproxy/combined_listeners/jinja_cfg.py   | 12 +++----
 .../combined_listeners/test_jinja_cfg.py      | 36 +++++++++++++++++++
 ...proxy-ssl-cache-size-5d5842100a87de54.yaml |  5 +++
 3 files changed, 47 insertions(+), 6 deletions(-)
 create mode 100644 releasenotes/notes/fix-haproxy-ssl-cache-size-5d5842100a87de54.yaml

diff --git a/octavia/common/jinja/haproxy/combined_listeners/jinja_cfg.py b/octavia/common/jinja/haproxy/combined_listeners/jinja_cfg.py
index 99cc75459b..4aadb1d132 100644
--- a/octavia/common/jinja/haproxy/combined_listeners/jinja_cfg.py
+++ b/octavia/common/jinja/haproxy/combined_listeners/jinja_cfg.py
@@ -221,14 +221,14 @@ class JinjaTemplater:
                 # because that is what ulimit -n typically returns.
                 max_conn_mem_kb = 32 * loadbalancer.get(
                     "global_connection_limit", 1024)
-                # Use half of the remaining memory for SSL caches
+                # LP #2119987: Use 2/5 of the remaining memory for SSL caches
                 ssl_cache_mem_kb = (mem["free"] + mem["buffers"] +
-                                    mem["cached"] - max_conn_mem_kb) // 2
-                # A cache block uses about 200 bytes of data.
+                                    mem["cached"] - max_conn_mem_kb) * 2 // 5
+                # A cache block uses about 250 bytes of data.
                 # The HAProxy default of ssl_cache (20000) would take up
-                # 4000 KB. We don't want to go below that.
-                if ssl_cache_mem_kb > 4000:
-                    jinja_dict["ssl_cache"] = ssl_cache_mem_kb * 5
+                # 5000 KB. We don't want to go below that.
+                if ssl_cache_mem_kb > 5000:
+                    jinja_dict["ssl_cache"] = ssl_cache_mem_kb * 4
             except (KeyError, TypeError):
                 pass
 
diff --git a/octavia/tests/unit/common/jinja/haproxy/combined_listeners/test_jinja_cfg.py b/octavia/tests/unit/common/jinja/haproxy/combined_listeners/test_jinja_cfg.py
index 8d8970c6a1..48e75f10aa 100644
--- a/octavia/tests/unit/common/jinja/haproxy/combined_listeners/test_jinja_cfg.py
+++ b/octavia/tests/unit/common/jinja/haproxy/combined_listeners/test_jinja_cfg.py
@@ -1140,6 +1140,42 @@ class TestHaproxyCfg(base.TestCase):
             frontend=fe, logging=lg, backend=be, global_opts=g_opts),
             rendered_obj)
 
+    def test_render_template_tls_cachesize(self):
+        g_opts = (f"    maxconn {constants.HAPROXY_DEFAULT_MAXCONN}\n"
+                  f"    tune.ssl.cachesize 101722232\n\n")
+        fe = ("frontend sample_listener_id_1\n"
+              f"    maxconn {constants.HAPROXY_DEFAULT_MAXCONN}\n"
+              "    redirect scheme https if !{ ssl_fc }\n"
+              "    http-response set-header Strict-Transport-Security "
+              "\"max-age=10000000; includeSubDomains; preload;\"\n"
+              "    bind 10.0.0.2:443 "
+              f"ciphers {constants.CIPHERS_OWASP_SUITE_B} "
+              "no-sslv3 no-tlsv10 no-tlsv11 alpn "
+              f"{','.join(constants.AMPHORA_SUPPORTED_ALPN_PROTOCOLS)}\n"
+              "    mode http\n"
+              "    default_backend sample_pool_id_1:sample_listener_id_1\n"
+              "    timeout client 50000\n")
+        tls_tupe = {'cont_id_1':
+                    sample_configs_combined.sample_tls_container_tuple(
+                        id='tls_container_id',
+                        certificate='imaCert1', private_key='imaPrivateKey1',
+                        primary_cn='FakeCN'),
+                    'cont_id_ca': 'client_ca.pem',
+                    'cont_id_crl': 'SHA_ID.pem'}
+        rendered_obj = self.jinja_cfg.render_loadbalancer_obj(
+            sample_configs_combined.sample_amphora_tuple(),
+            [sample_configs_combined.sample_listener_tuple(
+                proto='TERMINATED_HTTPS')],
+            tls_tupe,
+            # 32GiB total
+            amp_details={"memory": {
+                "free": 32864004,
+                "buffers": 32312392 // 2,
+                "cached": 32312392 // 2,
+            }})
+        self.assertEqual(sample_configs_combined.sample_base_expected_config(
+            frontend=fe, global_opts=g_opts), rendered_obj)
+
     def test_render_template_l7policies(self):
         fe = ("frontend sample_listener_id_1\n"
               "    maxconn {maxconn}\n"
diff --git a/releasenotes/notes/fix-haproxy-ssl-cache-size-5d5842100a87de54.yaml b/releasenotes/notes/fix-haproxy-ssl-cache-size-5d5842100a87de54.yaml
new file mode 100644
index 0000000000..3a1c917482
--- /dev/null
+++ b/releasenotes/notes/fix-haproxy-ssl-cache-size-5d5842100a87de54.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    Reduce the value of tune.ssl.cachesize for HTTPS termination listeners to
+    prevent OOM during haproxy reload (LP: #2119987).