[grafana] Migrator job is mariadb-fail-proof
The main goal of this PS is to make sure the migrator can complete the migrations even if mariadb galera cluster dropped the migrator connection leaving the database in inconsistent state. It may happen that migration_log has a record of a successfully performed migration while the database scheme misses an entity so any further attempts to re-run the migrator fail because of missed entity the migrator expects to be present. Also the migrator is running mariadb image as a main one and grafana binaries are mounted as /usr/share/grafana. Migrator job container is running under nobody user uid. This PS runs migrator in a safe way: - prepares database backup - runs a single instance of grafana as migrator with log file as a background process in a loop - constantly checks the log file in the main process - in case of the migrations completed it stops grafana-server process and completed the job - in case of a migration error it restores the previously prepared backup so the grafana-server that is running in a background loop can re-try the migration - the database operations are prefixed with code that makes sure the database is reachable. Change-Id: I4e1542b62777f25c08ddd2cb74f0a0e7bfea5145
This commit is contained in:
parent
05ba56e0a9
commit
00846e2e02
@ -15,7 +15,7 @@ apiVersion: v1
|
||||
appVersion: v8.5.10
|
||||
description: OpenStack-Helm Grafana
|
||||
name: grafana
|
||||
version: 0.1.17
|
||||
version: 0.1.18
|
||||
home: https://grafana.com/
|
||||
sources:
|
||||
- https://github.com/grafana/grafana
|
||||
|
@ -17,15 +17,101 @@ set -exo pipefail
|
||||
COMMAND="${@:-start}"
|
||||
PORT={{ tuple "grafana" "internal" "grafana" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
|
||||
PIDFILE=/tmp/pid
|
||||
DB_HOST={{ tuple "oslo_db" "direct" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}
|
||||
DB_PORT={{ tuple "oslo_db" "direct" "mysql" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
|
||||
MYSQL_PARAMS=" \
|
||||
--defaults-file=/tmp/my.cnf \
|
||||
--host=${DB_HOST} \
|
||||
--port=${DB_PORT}
|
||||
{{- if .Values.manifests.certificates }}
|
||||
--ssl-verify-server-cert=false \
|
||||
--ssl-ca=/etc/mysql/certs/ca.crt \
|
||||
--ssl-key=/etc/mysql/certs/tls.key \
|
||||
--ssl-cert=/etc/mysql/certs/tls.crt \
|
||||
{{- end }}
|
||||
"
|
||||
|
||||
function start () {
|
||||
exec /usr/share/grafana/bin/grafana-server -homepath=/usr/share/grafana -config=/etc/grafana/grafana.ini --pidfile="$PIDFILE"
|
||||
}
|
||||
|
||||
function run_migrator () {
|
||||
start &
|
||||
timeout 60 bash -c "until timeout 5 bash -c '</dev/tcp/127.0.0.1/${PORT}'; do sleep 1; done"
|
||||
BACKUP_FILE=$(mktemp)
|
||||
LOG_FILE=$(mktemp)
|
||||
STOP_FLAG=$(mktemp)
|
||||
echo "Making sure the database is reachable...."
|
||||
set +e
|
||||
until mysql ${MYSQL_PARAMS} grafana -e "select 1;"
|
||||
do
|
||||
echo \"Database ${DB_HOST} is not reachable. Sleeping for 10 seconds...\"
|
||||
sleep 10
|
||||
done
|
||||
set -e
|
||||
echo "Preparing initial database backup..."
|
||||
mysqldump ${MYSQL_PARAMS} --add-drop-table --quote-names grafana > "${BACKUP_FILE}"
|
||||
echo "Backup SQL file ${BACKUP_FILE}"
|
||||
ls -lh "${BACKUP_FILE}"
|
||||
{
|
||||
# this is the background process that re-starts grafana-server
|
||||
# in prder to process grafana database migration
|
||||
set +e
|
||||
while true
|
||||
do
|
||||
start 2>&1 | tee "$LOG_FILE"
|
||||
sleep 10
|
||||
echo "Restarting the grafana-server..."
|
||||
stop
|
||||
echo "Emptying log file..."
|
||||
echo > "$LOG_FILE"
|
||||
while [ -f ${STOP_FLAG} ]
|
||||
do
|
||||
echo "Lock file still exists - ${STOP_FLAG}..."
|
||||
ls -la ${STOP_FLAG}
|
||||
echo "Waiting for lock file to get removed..."
|
||||
sleep 5
|
||||
done
|
||||
echo "Lock file is removed, proceeding with grafana re-start.."
|
||||
done
|
||||
set -e
|
||||
} &
|
||||
until cat "${LOG_FILE}" | grep -E "migrations completed"
|
||||
do
|
||||
echo "The migrations are not completed yet..."
|
||||
if cat "${LOG_FILE}" | grep -E "migration failed"
|
||||
then
|
||||
echo "Locking server restart by placing a flag file ${STOP_FLAG} .."
|
||||
touch "${STOP_FLAG}"
|
||||
echo "Migration failure has been detected. Stopping the grafana-server..."
|
||||
set +e
|
||||
stop
|
||||
set -e
|
||||
echo "Making sure the database is reachable...."
|
||||
set +e
|
||||
until mysql ${MYSQL_PARAMS} grafana -e "select 1;"
|
||||
do
|
||||
echo \"Database ${DB_HOST} is not reachable. Sleeping for 10 seconds...\"
|
||||
sleep 10
|
||||
done
|
||||
set -e
|
||||
echo "Cleaning the database..."
|
||||
TABLES=$(
|
||||
mysql ${MYSQL_PARAMS} grafana -e "show tables\G;" | grep Tables | cut -d " " -f 2
|
||||
)
|
||||
for TABLE in ${TABLES}
|
||||
do
|
||||
echo ${TABLE}
|
||||
mysql ${MYSQL_PARAMS} grafana -e "drop table ${TABLE};"
|
||||
done
|
||||
echo "Restoring the database backup..."
|
||||
mysql ${MYSQL_PARAMS} grafana < "${BACKUP_FILE}"
|
||||
echo "Removing lock file ${STOP_FLAG} ..."
|
||||
rm -f "${STOP_FLAG}"
|
||||
echo "${STOP_FLAG} has been removed"
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
stop
|
||||
rm -f "${BACKUP_FILE}"
|
||||
}
|
||||
|
||||
function stop () {
|
||||
|
@ -20,6 +20,19 @@ limitations under the License.
|
||||
{{- $serviceAccountName := "grafana-run-migrator" }}
|
||||
{{ tuple $envAll "run_migrator" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prepare-grafana-migrator
|
||||
annotations:
|
||||
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
|
||||
data:
|
||||
prepare-grafana-migrator.sh: |
|
||||
#!/bin/bash
|
||||
set -xe
|
||||
cp -av /usr/share/grafana/* /usr/share/grafana-prepare/
|
||||
exit 0
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
@ -36,7 +49,7 @@ spec:
|
||||
annotations:
|
||||
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
|
||||
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
|
||||
{{ dict "envAll" $envAll "podName" "grafana-run-migrator" "containerNames" (list "grafana-run-migrator" "init") | include "helm-toolkit.snippets.kubernetes_mandatory_access_control_annotation" | indent 8 }}
|
||||
{{ dict "envAll" $envAll "podName" "grafana-run-migrator" "containerNames" (list "prepare-grafana-migrator" "grafana-run-migrator" "init") | include "helm-toolkit.snippets.kubernetes_mandatory_access_control_annotation" | indent 8 }}
|
||||
spec:
|
||||
{{ dict "envAll" $envAll "application" "run_migrator" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }}
|
||||
serviceAccountName: {{ $serviceAccountName }}
|
||||
@ -45,9 +58,24 @@ spec:
|
||||
{{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value | quote }}
|
||||
initContainers:
|
||||
{{ tuple $envAll "run_migrator" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
|
||||
- name: prepare-grafana-migrator
|
||||
{{ tuple $envAll "grafana" | include "helm-toolkit.snippets.image" | indent 10 }}
|
||||
{{ dict "envAll" $envAll "application" "run_migrator" "container" "prepare_grafana_migrator" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
|
||||
command:
|
||||
- /tmp/prepare-grafana-migrator.sh
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: pod-tmp
|
||||
mountPath: /tmp
|
||||
- name: grafana-binary-image
|
||||
mountPath: /usr/share/grafana-prepare
|
||||
- name: prepare-grafana-migrator
|
||||
mountPath: /tmp/prepare-grafana-migrator.sh
|
||||
readOnly: true
|
||||
subPath: prepare-grafana-migrator.sh
|
||||
containers:
|
||||
- name: grafana-run-migrator
|
||||
{{ tuple $envAll "grafana" | include "helm-toolkit.snippets.image" | indent 10 }}
|
||||
{{ tuple $envAll "mariadb" | include "helm-toolkit.snippets.image" | indent 10 }}
|
||||
{{ tuple $envAll $envAll.Values.pod.resources.run_migrator | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
|
||||
{{ dict "envAll" $envAll "application" "run_migrator" "container" "grafana_run_migrator" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }}
|
||||
command:
|
||||
@ -56,12 +84,12 @@ spec:
|
||||
ports:
|
||||
- name: dashboard
|
||||
containerPort: {{ tuple "grafana" "internal" "grafana" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /login
|
||||
port: {{ tuple "grafana" "internal" "grafana" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 30
|
||||
# readinessProbe:
|
||||
# httpGet:
|
||||
# path: /login
|
||||
# port: {{ tuple "grafana" "internal" "grafana" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
|
||||
# initialDelaySeconds: 30
|
||||
# timeoutSeconds: 30
|
||||
env:
|
||||
- name: GF_SECURITY_ADMIN_USER
|
||||
valueFrom:
|
||||
@ -103,6 +131,8 @@ spec:
|
||||
mountPath: {{ .Values.conf.grafana.paths.alerting }}
|
||||
- name: pod-csv-grafana
|
||||
mountPath: {{ .Values.conf.grafana.paths.csv }}
|
||||
- name: grafana-binary-image
|
||||
mountPath: /usr/share/grafana
|
||||
- name: grafana-bin
|
||||
mountPath: /tmp/grafana.sh
|
||||
subPath: grafana.sh
|
||||
@ -119,6 +149,9 @@ spec:
|
||||
- name: grafana-etc
|
||||
mountPath: /etc/grafana/ldap.toml
|
||||
subPath: ldap.toml
|
||||
- name: grafana-db
|
||||
mountPath: /tmp/my.cnf
|
||||
subPath: my.cnf
|
||||
- name: data
|
||||
mountPath: /var/lib/grafana/data
|
||||
{{- range $group, $dashboards := .Values.conf.dashboards }}
|
||||
@ -146,6 +179,8 @@ spec:
|
||||
emptyDir: {}
|
||||
- name: pod-csv-grafana
|
||||
emptyDir: {}
|
||||
- name: grafana-binary-image
|
||||
emptyDir: {}
|
||||
- name: grafana-bin
|
||||
configMap:
|
||||
name: grafana-bin
|
||||
@ -154,6 +189,10 @@ spec:
|
||||
secret:
|
||||
secretName: grafana-etc
|
||||
defaultMode: 0444
|
||||
- name: grafana-db
|
||||
secret:
|
||||
secretName: grafana-db
|
||||
defaultMode: 0444
|
||||
{{- range $group, $dashboards := .Values.conf.dashboards }}
|
||||
- name: grafana-dashboards-{{$group}}
|
||||
configMap:
|
||||
@ -162,6 +201,10 @@ spec:
|
||||
{{- end }}
|
||||
- name: data
|
||||
emptyDir: {}
|
||||
- name: prepare-grafana-migrator
|
||||
configMap:
|
||||
defaultMode: 0555
|
||||
name: prepare-grafana-migrator
|
||||
{{- dict "enabled" $envAll.Values.manifests.certificates "name" $envAll.Values.endpoints.oslo_db.auth.admin.secret.tls.internal | include "helm-toolkit.snippets.tls_volume" | indent 8 }}
|
||||
{{ if $mounts_grafana.volumes }}{{ toYaml $mounts_grafana.volumes | indent 8 }}{{ end }}
|
||||
{{- end }}
|
||||
|
@ -30,4 +30,12 @@ data:
|
||||
DB_CONNECTION: {{ $connection | b64enc -}}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: grafana-db
|
||||
type: Opaque
|
||||
data:
|
||||
my.cnf: {{ tuple "secrets/_my.cnf.tpl" . | include "helm-toolkit.utils.template" | b64enc }}
|
||||
{{- end }}
|
||||
|
17
grafana/templates/secrets/_my.cnf.tpl
Normal file
17
grafana/templates/secrets/_my.cnf.tpl
Normal file
@ -0,0 +1,17 @@
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
[client]
|
||||
user = {{ .Values.endpoints.oslo_db.auth.admin.username }}
|
||||
password = {{ .Values.endpoints.oslo_db.auth.admin.password }}
|
@ -18,6 +18,7 @@
|
||||
images:
|
||||
tags:
|
||||
grafana: docker.io/grafana/grafana:8.5.10
|
||||
mariadb: docker.io/openstackhelm/mariadb:latest-ubuntu_focal
|
||||
dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
|
||||
db_init: docker.io/openstackhelm/heat:stein-ubuntu_bionic
|
||||
grafana_db_session_sync: docker.io/openstackhelm/heat:stein-ubuntu_bionic
|
||||
@ -49,7 +50,6 @@ pod:
|
||||
security_context:
|
||||
dashboard:
|
||||
pod:
|
||||
# The correct grafana uid = 472
|
||||
runAsUser: 472
|
||||
container:
|
||||
grafana:
|
||||
@ -57,7 +57,6 @@ pod:
|
||||
readOnlyRootFilesystem: true
|
||||
db_init:
|
||||
pod:
|
||||
# The correct grafana uid = 472
|
||||
runAsUser: 472
|
||||
container:
|
||||
grafana_db_init_session:
|
||||
@ -68,7 +67,6 @@ pod:
|
||||
readOnlyRootFilesystem: true
|
||||
db_session_sync:
|
||||
pod:
|
||||
# The correct grafana uid = 472
|
||||
runAsUser: 472
|
||||
container:
|
||||
grafana_db_session_sync:
|
||||
@ -76,7 +74,6 @@ pod:
|
||||
readOnlyRootFilesystem: true
|
||||
set_admin_user:
|
||||
pod:
|
||||
# The correct grafana uid = 472
|
||||
runAsUser: 472
|
||||
container:
|
||||
grafana_set_admin_password:
|
||||
@ -84,15 +81,21 @@ pod:
|
||||
readOnlyRootFilesystem: true
|
||||
run_migrator:
|
||||
pod:
|
||||
# The correct grafana uid = 472
|
||||
runAsUser: 472
|
||||
container:
|
||||
prepare_grafana_migrator:
|
||||
runAsUser: 0
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
grafana_run_migrator:
|
||||
runAsUser: 65534
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
grafana_set_admin_password:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
test:
|
||||
pod:
|
||||
# The correct grafana uid = 472
|
||||
runAsUser: 472
|
||||
container:
|
||||
helm_tests:
|
||||
|
@ -19,6 +19,7 @@ pod:
|
||||
init: runtime/default
|
||||
grafana-run-migrator:
|
||||
grafana-run-migrator: runtime/default
|
||||
prepare-grafana-migrator: runtime/default
|
||||
init: runtime/default
|
||||
grafana-test:
|
||||
init: runtime/default
|
||||
|
@ -18,4 +18,5 @@ grafana:
|
||||
- 0.1.15 Added OCI registry authentication
|
||||
- 0.1.16 Grafana 8.5.10 with unified alerting
|
||||
- 0.1.17 Fix uid for the user grafana
|
||||
- 0.1.18 Migrator job is now mariadb-fail-proof
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user