[mariadb] Refactor liveness/readiness probes
* Move all probes into single script to reduce code duplication * Check free disk percent, fail when we consume 99% to avoid data corruption * Do not restart container when SST is in progress Change-Id: I6efc7596753dc988aa9edd7ade4d57107db98bdd
This commit is contained in:
parent
842f0f11dc
commit
174f6f5bd5
@ -15,7 +15,7 @@ apiVersion: v1
|
||||
appVersion: v10.6.7
|
||||
description: OpenStack-Helm MariaDB
|
||||
name: mariadb
|
||||
version: 0.2.59
|
||||
version: 0.2.60
|
||||
home: https://mariadb.com/kb/en/
|
||||
icon: http://badges.mariadb.org/mariadb-badge-180x60.png
|
||||
sources:
|
||||
|
139
mariadb/templates/bin/_health.sh.tpl
Normal file
139
mariadb/templates/bin/_health.sh.tpl
Normal file
@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
###########################################################################
|
||||
# Copyright 2017 The Openstack-Helm Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#########################################################################
|
||||
|
||||
set -e
|
||||
|
||||
MYSQL="mysql \
|
||||
--defaults-file=/etc/mysql/admin_user.cnf \
|
||||
--host=localhost \
|
||||
{{- if .Values.manifests.certificates }}
|
||||
--ssl-verify-server-cert=false \
|
||||
--ssl-ca=/etc/mysql/certs/ca.crt \
|
||||
--ssl-key=/etc/mysql/certs/tls.key \
|
||||
--ssl-cert=/etc/mysql/certs/tls.crt \
|
||||
{{- end }}
|
||||
--connect-timeout 2"
|
||||
|
||||
mysql_query () {
|
||||
TABLE=$1
|
||||
KEY=$2
|
||||
$MYSQL -e "show ${TABLE} like \"${KEY}\"" | \
|
||||
awk "/${KEY}/ { print \$NF; exit }"
|
||||
}
|
||||
|
||||
function usage {
|
||||
echo "Usage: $0 [-t <liveness|readiness>] [-d <percent>]" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
PROBE_TYPE=''
|
||||
|
||||
while getopts ":t:d:" opt; do
|
||||
case $opt in
|
||||
t)
|
||||
PROBE_TYPE=$OPTARG
|
||||
;;
|
||||
d)
|
||||
DISK_ALARM_LIMIT=$OPTARG
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND-1))
|
||||
|
||||
check_readiness () {
|
||||
if ! $MYSQL -e 'select 1' > /dev/null 2>&1 ; then
|
||||
echo "Select from mysql failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DATADIR=$(mysql_query variables datadir)
|
||||
TMPDIR=$(mysql_query variables tmpdir)
|
||||
for partition in ${DATADIR} ${TMPDIR}; do
|
||||
if [ "$(df --output=pcent ${partition} | grep -Po '\d+')" -ge "${DISK_ALARM_LIMIT:-100}" ]; then
|
||||
echo "[ALARM] Critical high disk space utilization of ${partition}"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "x$(mysql_query status wsrep_ready)" != "xON" ]; then
|
||||
echo "WSREP says the node can not receive queries"
|
||||
exit 1
|
||||
fi
|
||||
if [ "x$(mysql_query status wsrep_connected)" != "xON" ]; then
|
||||
echo "WSREP not connected"
|
||||
exit 1
|
||||
fi
|
||||
if [ "x$(mysql_query status wsrep_cluster_status)" != "xPrimary" ]; then
|
||||
echo "Not in primary cluster"
|
||||
exit 1
|
||||
fi
|
||||
if [ "x$(mysql_query status wsrep_local_state_comment)" != "xSynced" ]; then
|
||||
echo "WSREP not synced"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_liveness () {
|
||||
if pidof mysql_upgrade > /dev/null 2>&1 ; then
|
||||
echo "The process mysql_upgrade is active. Skip rest checks"
|
||||
exit 0
|
||||
fi
|
||||
if ! pidof mysqld > /dev/null 2>&1 ; then
|
||||
echo "The mysqld pid not found"
|
||||
exit 1
|
||||
fi
|
||||
# NOTE(mkarpin): SST process may take significant time in case of large databases,
|
||||
# killing mysqld during SST may destroy all data on the node.
|
||||
local datadir="/var/lib/mysql"
|
||||
if [ -f ${datadir}/sst_in_progress ]; then
|
||||
echo "SST is still in progress, skip further checks as mysql won't respond"
|
||||
else
|
||||
# NOTE(vsaienko): in some cases maria might stuck during IST, or when neighbours
|
||||
# IPs are changed. Here we check that we can connect to mysql socket to ensure
|
||||
# process is alive.
|
||||
if ! $MYSQL -e "show status like 'wsrep_cluster_status'" > /dev/null 2>&1 ; then
|
||||
echo "Can't connect to mysql socket"
|
||||
exit 1
|
||||
fi
|
||||
# Detect node that is not connected to wsrep provider
|
||||
if [ "x$(mysql_query status wsrep_ready)" != "xON" ]; then
|
||||
echo "WSREP says the node can not receive queries"
|
||||
exit 1
|
||||
fi
|
||||
if [ "x$(mysql_query status wsrep_connected)" != "xON" ]; then
|
||||
echo "WSREP not connected"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
case $PROBE_TYPE in
|
||||
liveness)
|
||||
check_liveness
|
||||
;;
|
||||
readiness)
|
||||
check_readiness
|
||||
;;
|
||||
*)
|
||||
echo "Unknown probe type: ${PROBE_TYPE}"
|
||||
usage
|
||||
;;
|
||||
esac
|
@ -1,68 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
set -e
|
||||
|
||||
MYSQL="mysql \
|
||||
--defaults-file=/etc/mysql/admin_user.cnf \
|
||||
--host=localhost \
|
||||
{{- if .Values.manifests.certificates }}
|
||||
--ssl-verify-server-cert=false \
|
||||
--ssl-ca=/etc/mysql/certs/ca.crt \
|
||||
--ssl-key=/etc/mysql/certs/tls.key \
|
||||
--ssl-cert=/etc/mysql/certs/tls.crt \
|
||||
{{- end }}
|
||||
--connect-timeout 2"
|
||||
|
||||
mysql_status_query () {
|
||||
STATUS=$1
|
||||
$MYSQL -e "show status like \"${STATUS}\"" | \
|
||||
awk "/${STATUS}/ { print \$NF; exit }"
|
||||
}
|
||||
|
||||
{{- if eq (int .Values.pod.replicas.server) 1 }}
|
||||
if ! $MYSQL -e 'select 1' > /dev/null 2>&1 ; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
{{- else }}
|
||||
if [ -f /var/lib/mysql/sst_in_progress ]; then
|
||||
# SST in progress, with this node receiving a snapshot.
|
||||
# MariaDB won't be up yet; avoid killing.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "x$(mysql_status_query wsrep_ready)" != "xON" ]; then
|
||||
# WSREP says the node can receive queries
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "x$(mysql_status_query wsrep_connected)" != "xON" ]; then
|
||||
# WSREP connected
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "x$(mysql_status_query wsrep_cluster_status)" != "xPrimary" ]; then
|
||||
# Not in primary cluster
|
||||
exit 1
|
||||
fi
|
||||
|
||||
wsrep_local_state_comment=$(mysql_status_query wsrep_local_state_comment)
|
||||
if [ "x${wsrep_local_state_comment}" != "xSynced" ] && [ "x${wsrep_local_state_comment}" != "xDonor/Desynced" ]; then
|
||||
# WSREP not synced or not sending SST
|
||||
exit 1
|
||||
fi
|
||||
{{- end }}
|
@ -1,60 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
set -e
|
||||
|
||||
MYSQL="mysql \
|
||||
--defaults-file=/etc/mysql/admin_user.cnf \
|
||||
--host=localhost \
|
||||
{{- if .Values.manifests.certificates }}
|
||||
--ssl-verify-server-cert=false \
|
||||
--ssl-ca=/etc/mysql/certs/ca.crt \
|
||||
--ssl-key=/etc/mysql/certs/tls.key \
|
||||
--ssl-cert=/etc/mysql/certs/tls.crt \
|
||||
{{- end }}
|
||||
--connect-timeout 2"
|
||||
|
||||
mysql_status_query () {
|
||||
STATUS=$1
|
||||
$MYSQL -e "show status like \"${STATUS}\"" | \
|
||||
awk "/${STATUS}/ { print \$NF; exit }"
|
||||
}
|
||||
|
||||
if ! $MYSQL -e 'select 1' > /dev/null 2>&1 ; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
{{- if gt (int .Values.pod.replicas.server) 1 }}
|
||||
if [ "x$(mysql_status_query wsrep_ready)" != "xON" ]; then
|
||||
# WSREP says the node can receive queries
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "x$(mysql_status_query wsrep_connected)" != "xON" ]; then
|
||||
# WSREP connected
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "x$(mysql_status_query wsrep_cluster_status)" != "xPrimary" ]; then
|
||||
# Not in primary cluster
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "x$(mysql_status_query wsrep_local_state_comment)" != "xSynced" ]; then
|
||||
# WSREP not synced
|
||||
exit 1
|
||||
fi
|
||||
{{- end }}
|
@ -27,10 +27,8 @@ data:
|
||||
image-repo-sync.sh: |
|
||||
{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }}
|
||||
{{- end }}
|
||||
readiness.sh: |
|
||||
{{ tuple "bin/_readiness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
liveness.sh: |
|
||||
{{ tuple "bin/_liveness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
health.sh: |
|
||||
{{ tuple "bin/_health.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
start.py: |
|
||||
{{ tuple "bin/_start.py.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
test.sh: |
|
||||
|
@ -1,7 +1,7 @@
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
Y may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
@ -15,12 +15,18 @@ limitations under the License.
|
||||
{{- define "mariadbReadinessProbe" }}
|
||||
exec:
|
||||
command:
|
||||
- /tmp/readiness.sh
|
||||
- /tmp/health.sh
|
||||
- -t
|
||||
- readiness
|
||||
- -d
|
||||
- {{ .Values.pod.probes.server.mariadb.readiness.disk_usage_percent | quote }}
|
||||
{{- end }}
|
||||
{{- define "mariadbLivenessProbe" }}
|
||||
exec:
|
||||
command:
|
||||
- /tmp/liveness.sh
|
||||
- /tmp/health.sh
|
||||
- -t
|
||||
- liveness
|
||||
{{- end }}
|
||||
|
||||
{{- if (.Values.global).subchart_release_name }}
|
||||
@ -226,12 +232,8 @@ spec:
|
||||
subPath: stop.sh
|
||||
readOnly: true
|
||||
- name: mariadb-bin
|
||||
mountPath: /tmp/readiness.sh
|
||||
subPath: readiness.sh
|
||||
readOnly: true
|
||||
- name: mariadb-bin
|
||||
mountPath: /tmp/liveness.sh
|
||||
subPath: liveness.sh
|
||||
mountPath: /tmp/health.sh
|
||||
subPath: health.sh
|
||||
readOnly: true
|
||||
- name: mariadb-etc
|
||||
mountPath: /etc/mysql/my.cnf
|
||||
|
@ -65,6 +65,7 @@ pod:
|
||||
mariadb:
|
||||
readiness:
|
||||
enabled: true
|
||||
disk_usage_percent: 99
|
||||
params:
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
|
@ -75,4 +75,5 @@ mariadb:
|
||||
- 0.2.57 Remove useless retries on conflicts during cm update
|
||||
- 0.2.58 Prevent TypeError in get_active_endpoint function
|
||||
- 0.2.59 Give more time on resolving configmap update conflicts
|
||||
- 0.2.60 Refactor liveness/readiness probes
|
||||
...
|
||||
|
Loading…
x
Reference in New Issue
Block a user