Improve health probe logging for nova and neutron

1. Log specific compute services failing rabbitMQ socket tests in nova
   health probe
2. Log specific compute services failing Database socket tests in nova
   health probe
3. Make log level configurable for nova and neutron health probes

Change-Id: I5e5d909d598af734596eb1732ae42808c1f6cd12
This commit is contained in:
Anderson, Craig (ca846m)
2022-02-17 22:23:52 -08:00
committed by Craig Anderson
parent 7c3a9de5aa
commit d514395d81
8 changed files with 19 additions and 6 deletions

View File

@@ -14,7 +14,7 @@ apiVersion: v1
appVersion: v1.0.0 appVersion: v1.0.0
description: OpenStack-Helm Neutron description: OpenStack-Helm Neutron
name: neutron name: neutron
version: 0.2.10 version: 0.2.11
home: https://docs.openstack.org/neutron/latest/ home: https://docs.openstack.org/neutron/latest/
icon: https://www.openstack.org/themes/openstack/images/project-mascots/Neutron/OpenStack_Project_Neutron_vertical.png icon: https://www.openstack.org/themes/openstack/images/project-mascots/Neutron/OpenStack_Project_Neutron_vertical.png
sources: sources:

View File

@@ -53,7 +53,7 @@ rpc_timeout = int(os.getenv('RPC_PROBE_TIMEOUT', '60'))
rpc_retries = int(os.getenv('RPC_PROBE_RETRIES', '2')) rpc_retries = int(os.getenv('RPC_PROBE_RETRIES', '2'))
rabbit_port = 5672 rabbit_port = 5672
tcp_established = "ESTABLISHED" tcp_established = "ESTABLISHED"
log.logging.basicConfig(level=log.ERROR) log.logging.basicConfig(level=log.{{ .Values.health_probe.logging.level }})
def _get_hostname(use_fqdn): def _get_hostname(use_fqdn):

View File

@@ -2516,6 +2516,10 @@ network_policy:
helm3_hook: true helm3_hook: true
health_probe:
logging:
level: ERROR
manifests: manifests:
certificates: false certificates: false
configmap_bin: true configmap_bin: true

View File

@@ -14,7 +14,7 @@ apiVersion: v1
appVersion: v1.0.0 appVersion: v1.0.0
description: OpenStack-Helm Nova description: OpenStack-Helm Nova
name: nova name: nova
version: 0.2.29 version: 0.2.30
home: https://docs.openstack.org/nova/latest/ home: https://docs.openstack.org/nova/latest/
icon: https://www.openstack.org/themes/openstack/images/project-mascots/Nova/OpenStack_Project_Nova_vertical.png icon: https://www.openstack.org/themes/openstack/images/project-mascots/Nova/OpenStack_Project_Nova_vertical.png
sources: sources:

View File

@@ -161,8 +161,10 @@ def test_tcp_socket(service):
if service in dict_services: if service in dict_services:
proc = dict_services[service] proc = dict_services[service]
transport = oslo_messaging.TransportURL.parse(cfg.CONF)
if r_ports and tcp_socket_status(proc, r_ports) == 0: if r_ports and tcp_socket_status(proc, r_ports) == 0:
sys.stderr.write("RabbitMQ socket not established") sys.stderr.write("RabbitMQ socket not established for service "
"%s with transport %s" % (proc, transport))
# Do not kill the pod if RabbitMQ is not reachable/down # Do not kill the pod if RabbitMQ is not reachable/down
if not cfg.CONF.liveness_probe: if not cfg.CONF.liveness_probe:
sys.exit(1) sys.exit(1)
@@ -170,7 +172,8 @@ def test_tcp_socket(service):
# let's do the db check # let's do the db check
if service != "compute": if service != "compute":
if d_ports and tcp_socket_status(proc, d_ports) == 0: if d_ports and tcp_socket_status(proc, d_ports) == 0:
sys.stderr.write("Database socket not established") sys.stderr.write("Database socket not established for service "
"%s with transport %s" % (proc, transport))
# Do not kill the pod if database is not reachable/down # Do not kill the pod if database is not reachable/down
# there could be no socket as well as typically connections # there could be no socket as well as typically connections
# get closed after an idle timeout # get closed after an idle timeout
@@ -194,7 +197,7 @@ def test_rpc_liveness():
cfg.CONF(sys.argv[1:]) cfg.CONF(sys.argv[1:])
log.logging.basicConfig(level=log.ERROR) log.logging.basicConfig(level=log.{{ .Values.health_probe.logging.level }})
try: try:
transport = oslo_messaging.get_transport(cfg.CONF) transport = oslo_messaging.get_transport(cfg.CONF)

View File

@@ -2556,6 +2556,10 @@ network_policy:
# set helm3_hook: false when using the helm2 binary. # set helm3_hook: false when using the helm2 binary.
helm3_hook: true helm3_hook: true
health_probe:
logging:
level: ERROR
manifests: manifests:
certificates: false certificates: false
configmap_bin: true configmap_bin: true

View File

@@ -24,4 +24,5 @@ neutron:
- 0.2.8 Add Victoria and Wallaby releases support - 0.2.8 Add Victoria and Wallaby releases support
- 0.2.9 Add option to disable helm.sh/hook annotations - 0.2.9 Add option to disable helm.sh/hook annotations
- 0.2.10 Update htk requirements repo - 0.2.10 Update htk requirements repo
- 0.2.11 Improve health probe logging
... ...

View File

@@ -50,4 +50,5 @@ nova:
- 0.2.27 Add tls1.2 minimum version to tls overrides - 0.2.27 Add tls1.2 minimum version to tls overrides
- 0.2.28 Move ssl_minimum_version to console section - 0.2.28 Move ssl_minimum_version to console section
- 0.2.29 Remove ssh-config - 0.2.29 Remove ssh-config
- 0.2.30 Improve health probe logging
... ...