From 13c1d8cd3866bb026cce6e5d80555edfd0afa845 Mon Sep 17 00:00:00 2001 From: Sergiy Markin Date: Mon, 11 Dec 2023 23:42:20 +0000 Subject: [PATCH] [backups] Add throttlling of remote backups This PS adds a possibility to limit (to throttle) the number of simultaneously uploaded backups while keeping the logic on the client side using flag files on remote side. The main idea is to have an ability to limit number of simultaneous remote backups upload sessions. Change-Id: I5464004d4febfbe20df9cd41ca62ceb9fd6f0c0d --- helm-toolkit/Chart.yaml | 2 +- .../db-backup-restore/_backup_main.sh.tpl | 136 +++++++++++++++++- mariadb-backup/Chart.yaml | 2 +- .../templates/cron-job-backup-mariadb.yaml | 10 ++ .../templates/secret-backup-restore.yaml | 5 + mariadb-backup/values.yaml | 6 + mariadb/Chart.yaml | 2 +- .../templates/cron-job-backup-mariadb.yaml | 10 ++ mariadb/templates/secret-backup-restore.yaml | 5 + mariadb/values.yaml | 6 + postgresql/Chart.yaml | 2 +- .../templates/cron-job-backup-postgres.yaml | 10 ++ .../templates/secret-backup-restore.yaml | 5 + postgresql/values.yaml | 6 + releasenotes/notes/helm-toolkit.yaml | 1 + releasenotes/notes/mariadb-backup.yaml | 1 + releasenotes/notes/mariadb.yaml | 1 + releasenotes/notes/postgresql.yaml | 1 + 18 files changed, 206 insertions(+), 5 deletions(-) diff --git a/helm-toolkit/Chart.yaml b/helm-toolkit/Chart.yaml index 2b1a60a04..77fb563aa 100644 --- a/helm-toolkit/Chart.yaml +++ b/helm-toolkit/Chart.yaml @@ -15,7 +15,7 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Helm-Toolkit name: helm-toolkit -version: 0.2.58 +version: 0.2.59 home: https://docs.openstack.org/openstack-helm icon: https://www.openstack.org/themes/openstack/images/project-mascots/OpenStack-Helm/OpenStack_Project_OpenStackHelm_vertical.png sources: diff --git a/helm-toolkit/templates/scripts/db-backup-restore/_backup_main.sh.tpl b/helm-toolkit/templates/scripts/db-backup-restore/_backup_main.sh.tpl index a46924da1..695cb2e47 100755 --- a/helm-toolkit/templates/scripts/db-backup-restore/_backup_main.sh.tpl +++ b/helm-toolkit/templates/scripts/db-backup-restore/_backup_main.sh.tpl @@ -49,6 +49,13 @@ # A random number between min and max delay is generated # to set the delay. # +# RGW backup throttle limits variables: +# export THROTTLE_BACKUPS_ENABLED Boolean variableto control backup functionality +# export THROTTLE_LIMIT Number of simultaneous RGW upload sessions +# export THROTTLE_LOCK_EXPIRE_AFTER Time in seconds to expire flag file is orphaned +# export THROTTLE_RETRY_AFTER Time in seconds to wait before retry +# export THROTTLE_CONTAINER_NAME Name of RGW container to place flag falies into +# # The database-specific functions that need to be implemented are: # dump_databases_to_directory [scope] # where: @@ -84,8 +91,10 @@ # specified by the "LOCAL_DAYS_TO_KEEP" variable. # 4) Removing remote backup tarballs (from the remote gateway) which are older # than the number of days specified by the "REMOTE_DAYS_TO_KEEP" variable. +# 5) Controlling remote storage gateway load from client side and throttling it +# by using a dedicated RGW container to store flag files defining upload session +# in progress # - # Note: not using set -e in this script because more elaborate error handling # is needed. @@ -218,6 +227,113 @@ send_to_remote_server() { echo "Sleeping for ${DELAY} seconds to spread the load in time..." sleep ${DELAY} + #--------------------------------------------------------------------------- + # Remote backup throttling + export THROTTLE_BACKUPS_ENABLED=$(echo $THROTTLE_BACKUPS_ENABLED | sed 's/"//g') + if $THROTTLE_BACKUPS_ENABLED; then + # Remove Quotes from the constants which were added due to reading + # from secret. + export THROTTLE_LIMIT=$(echo $THROTTLE_LIMIT | sed 's/"//g') + export THROTTLE_LOCK_EXPIRE_AFTER=$(echo $THROTTLE_LOCK_EXPIRE_AFTER | sed 's/"//g') + export THROTTLE_RETRY_AFTER=$(echo $THROTTLE_RETRY_AFTER | sed 's/"//g') + export THROTTLE_CONTAINER_NAME=$(echo $THROTTLE_CONTAINER_NAME | sed 's/"//g') + + # load balance delay + RESULT=$(openstack container list 2>&1) + + if [[ $? -eq 0 ]]; then + echo $RESULT | grep $THROTTLE_CONTAINER_NAME + if [[ $? -ne 0 ]]; then + # Find the swift URL from the keystone endpoint list + SWIFT_URL=$(openstack catalog show object-store -c endpoints | grep public | awk '{print $4}') + if [[ $? -ne 0 ]]; then + log WARN "${DB_NAME}_backup" "Unable to get object-store enpoints from keystone catalog." + return 2 + fi + + # Get a token from keystone + TOKEN=$(openstack token issue -f value -c id) + if [[ $? -ne 0 ]]; then + log WARN "${DB_NAME}_backup" "Unable to get keystone token." + return 2 + fi + + # Create the container + RES_FILE=$(mktemp -p /tmp) + curl -g -i -X PUT ${SWIFT_URL}/${THROTTLE_CONTAINER_NAME} \ + -H "X-Auth-Token: ${TOKEN}" \ + -H "X-Storage-Policy: ${STORAGE_POLICY}" 2>&1 > $RES_FILE + + if [[ $? -ne 0 || $(grep "HTTP" $RES_FILE | awk '{print $2}') -ge 400 ]]; then + log WARN "${DB_NAME}_backup" "Unable to create container ${THROTTLE_CONTAINER_NAME}" + cat $RES_FILE + rm -f $RES_FILE + return 2 + fi + rm -f $RES_FILE + + swift stat $THROTTLE_CONTAINER_NAME + if [[ $? -ne 0 ]]; then + log WARN "${DB_NAME}_backup" "Unable to retrieve container ${THROTTLE_CONTAINER_NAME} details after creation." + return 2 + fi + fi + else + echo $RESULT | grep -E "HTTP 401|HTTP 403" + if [[ $? -eq 0 ]]; then + log ERROR "${DB_NAME}_backup" "Access denied by keystone: ${RESULT}" + return 1 + else + echo $RESULT | grep -E "ConnectionError|Failed to discover available identity versions|Service Unavailable|HTTP 50" + if [[ $? -eq 0 ]]; then + log WARN "${DB_NAME}_backup" "Could not reach the RGW: ${RESULT}" + # In this case, keystone or the site/node may be temporarily down. + # Return slightly different error code so the calling code can retry + return 2 + else + log ERROR "${DB_NAME}_backup" "Could not get container list: ${RESULT}" + return 1 + fi + fi + fi + + NUMBER_OF_SESSIONS=$(openstack object list $THROTTLE_CONTAINER_NAME -f value | wc -l) + log INFO "${DB_NAME}_backup" "There are ${NUMBER_OF_SESSIONS} remote sessions right now." + while [[ ${NUMBER_OF_SESSIONS} -ge ${THROTTLE_LIMIT} ]] + do + log INFO "${DB_NAME}_backup" "Current number of active uploads is ${NUMBER_OF_SESSIONS}>=${THROTTLE_LIMIT}!" + log INFO "${DB_NAME}_backup" "Retrying in ${THROTTLE_RETRY_AFTER} seconds...." + sleep ${THROTTLE_RETRY_AFTER} + NUMBER_OF_SESSIONS=$(openstack object list $THROTTLE_CONTAINER_NAME -f value | wc -l) + log INFO "${DB_NAME}_backup" "There are ${NUMBER_OF_SESSIONS} remote sessions right now." + done + + # Create a lock file in THROTTLE_CONTAINER + THROTTLE_FILEPATH=$(mktemp -d) + THROTTLE_FILE=${CONTAINER_NAME}.lock + date +%s > $THROTTLE_FILEPATH/$THROTTLE_FILE + + # Create an object to store the file + openstack object create --name $THROTTLE_FILE $THROTTLE_CONTAINER_NAME $THROTTLE_FILEPATH/$THROTTLE_FILE + if [[ $? -ne 0 ]]; then + log WARN "${DB_NAME}_backup" "Cannot create throttle container object ${THROTTLE_FILE}!" + return 2 + fi + + swift post $THROTTLE_CONTAINER_NAME $THROTTLE_FILE -H "X-Delete-After:${THROTTLE_LOCK_EXPIRE_AFTER}" + if [[ $? -ne 0 ]]; then + log WARN "${DB_NAME}_backup" "Cannot set throttle container object ${THROTTLE_FILE} expiration header!" + return 2 + fi + openstack object show $THROTTLE_CONTAINER_NAME $THROTTLE_FILE + if [[ $? -ne 0 ]]; then + log WARN "${DB_NAME}_backup" "Unable to retrieve throttle container object $THROTTLE_FILE after creation." + return 2 + fi + fi + + #--------------------------------------------------------------------------- + # Create an object to store the file openstack object create --name $FILE $CONTAINER_NAME $FILEPATH/$FILE if [[ $? -ne 0 ]]; then @@ -245,6 +361,24 @@ send_to_remote_server() { fi rm -f ${REMOTE_FILE} + #--------------------------------------------------------------------------- + # Remote backup throttling + export THROTTLE_BACKUPS_ENABLED=$(echo $THROTTLE_BACKUPS_ENABLED | sed 's/"//g') + if $THROTTLE_BACKUPS_ENABLED; then + # Remove flag file + # Delete an object to remove the flag file + openstack object delete $THROTTLE_CONTAINER_NAME $THROTTLE_FILE + if [[ $? -ne 0 ]]; then + log WARN "${DB_NAME}_backup" "Cannot delete throttle container object ${THROTTLE_FILE}" + return 0 + else + log INFO "${DB_NAME}_backup" "The throttle container object ${THROTTLE_FILE} has been successfully removed." + fi + rm -f ${THROTTLE_FILEPATH}/${THROTTLE_FILE} + fi + + #--------------------------------------------------------------------------- + log INFO "${DB_NAME}_backup" "Created file $FILE in container $CONTAINER_NAME successfully." return 0 } diff --git a/mariadb-backup/Chart.yaml b/mariadb-backup/Chart.yaml index 689383c49..b70f096cf 100644 --- a/mariadb-backup/Chart.yaml +++ b/mariadb-backup/Chart.yaml @@ -15,7 +15,7 @@ apiVersion: v1 appVersion: v10.6.14 description: OpenStack-Helm MariaDB backups name: mariadb-backup -version: 0.0.3 +version: 0.0.4 home: https://mariadb.com/kb/en/ icon: http://badges.mariadb.org/mariadb-badge-180x60.png sources: diff --git a/mariadb-backup/templates/cron-job-backup-mariadb.yaml b/mariadb-backup/templates/cron-job-backup-mariadb.yaml index 381e23018..d84ec1694 100644 --- a/mariadb-backup/templates/cron-job-backup-mariadb.yaml +++ b/mariadb-backup/templates/cron-job-backup-mariadb.yaml @@ -132,6 +132,16 @@ spec: value: {{ .Values.conf.backup.remote_backup.delay_range.min | quote }} - name: MAX_DELAY_SEND_BACKUP_TO_REMOTE value: {{ .Values.conf.backup.remote_backup.delay_range.max | quote }} + - name: THROTTLE_BACKUPS_ENABLED + value: "{{ .Values.conf.backup.remote_backup.throttle_backups.enabled }}" + - name: THROTTLE_LIMIT + value: {{ .Values.conf.backup.remote_backup.throttle_backups.sessions_limit | quote }} + - name: THROTTLE_LOCK_EXPIRE_AFTER + value: {{ .Values.conf.backup.remote_backup.throttle_backups.lock_expire_after | quote }} + - name: THROTTLE_RETRY_AFTER + value: {{ .Values.conf.backup.remote_backup.throttle_backups.retry_after | quote }} + - name: THROTTLE_CONTAINER_NAME + value: {{ .Values.conf.backup.remote_backup.throttle_backups.container_name | quote }} {{- with $env := dict "ksUserSecret" $envAll.Values.secrets.identity.mariadb }} {{- include "helm-toolkit.snippets.keystone_openrc_env_vars" $env | indent 16 }} {{- end }} diff --git a/mariadb-backup/templates/secret-backup-restore.yaml b/mariadb-backup/templates/secret-backup-restore.yaml index c3ed882f3..1a37290b7 100644 --- a/mariadb-backup/templates/secret-backup-restore.yaml +++ b/mariadb-backup/templates/secret-backup-restore.yaml @@ -26,5 +26,10 @@ data: REMOTE_BACKUP_RETRIES: {{ $envAll.Values.conf.backup.remote_backup.number_of_retries | quote | b64enc }} REMOTE_BACKUP_SEND_DELAY_MIN: {{ $envAll.Values.conf.backup.remote_backup.delay_range.min | quote | b64enc }} REMOTE_BACKUP_SEND_DELAY_MAX: {{ $envAll.Values.conf.backup.remote_backup.delay_range.max | quote | b64enc }} + THROTTLE_BACKUPS_ENABLED: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.enabled | quote | b64enc }} + THROTTLE_LIMIT: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.sessions_limit | quote | b64enc }} + THROTTLE_LOCK_EXPIRE_AFTER: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.lock_expire_after | quote | b64enc }} + THROTTLE_RETRY_AFTER: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.retry_after | quote | b64enc }} + THROTTLE_CONTAINER_NAME: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.container_name | quote | b64enc }} ... {{- end }} diff --git a/mariadb-backup/values.yaml b/mariadb-backup/values.yaml index ed487169a..6fe828607 100644 --- a/mariadb-backup/values.yaml +++ b/mariadb-backup/values.yaml @@ -235,6 +235,12 @@ conf: delay_range: min: 30 max: 60 + throttle_backups: + enabled: false + sessions_limit: 480 + lock_expire_after: 7200 + retry_after: 3600 + container_name: throttle-backups-manager secrets: identity: diff --git a/mariadb/Chart.yaml b/mariadb/Chart.yaml index e00e1ac5d..095d326f0 100644 --- a/mariadb/Chart.yaml +++ b/mariadb/Chart.yaml @@ -15,7 +15,7 @@ apiVersion: v1 appVersion: v10.6.7 description: OpenStack-Helm MariaDB name: mariadb -version: 0.2.37 +version: 0.2.38 home: https://mariadb.com/kb/en/ icon: http://badges.mariadb.org/mariadb-badge-180x60.png sources: diff --git a/mariadb/templates/cron-job-backup-mariadb.yaml b/mariadb/templates/cron-job-backup-mariadb.yaml index 619accba4..cb8381254 100644 --- a/mariadb/templates/cron-job-backup-mariadb.yaml +++ b/mariadb/templates/cron-job-backup-mariadb.yaml @@ -132,6 +132,16 @@ spec: value: {{ .Values.conf.backup.remote_backup.delay_range.min | quote }} - name: MAX_DELAY_SEND_BACKUP_TO_REMOTE value: {{ .Values.conf.backup.remote_backup.delay_range.max | quote }} + - name: THROTTLE_BACKUPS_ENABLED + value: "{{ .Values.conf.backup.remote_backup.throttle_backups.enabled }}" + - name: THROTTLE_LIMIT + value: {{ .Values.conf.backup.remote_backup.throttle_backups.sessions_limit | quote }} + - name: THROTTLE_LOCK_EXPIRE_AFTER + value: {{ .Values.conf.backup.remote_backup.throttle_backups.lock_expire_after | quote }} + - name: THROTTLE_RETRY_AFTER + value: {{ .Values.conf.backup.remote_backup.throttle_backups.retry_after | quote }} + - name: THROTTLE_CONTAINER_NAME + value: {{ .Values.conf.backup.remote_backup.throttle_backups.container_name | quote }} {{- with $env := dict "ksUserSecret" $envAll.Values.secrets.identity.mariadb }} {{- include "helm-toolkit.snippets.keystone_openrc_env_vars" $env | indent 16 }} {{- end }} diff --git a/mariadb/templates/secret-backup-restore.yaml b/mariadb/templates/secret-backup-restore.yaml index c3ed882f3..1a37290b7 100644 --- a/mariadb/templates/secret-backup-restore.yaml +++ b/mariadb/templates/secret-backup-restore.yaml @@ -26,5 +26,10 @@ data: REMOTE_BACKUP_RETRIES: {{ $envAll.Values.conf.backup.remote_backup.number_of_retries | quote | b64enc }} REMOTE_BACKUP_SEND_DELAY_MIN: {{ $envAll.Values.conf.backup.remote_backup.delay_range.min | quote | b64enc }} REMOTE_BACKUP_SEND_DELAY_MAX: {{ $envAll.Values.conf.backup.remote_backup.delay_range.max | quote | b64enc }} + THROTTLE_BACKUPS_ENABLED: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.enabled | quote | b64enc }} + THROTTLE_LIMIT: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.sessions_limit | quote | b64enc }} + THROTTLE_LOCK_EXPIRE_AFTER: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.lock_expire_after | quote | b64enc }} + THROTTLE_RETRY_AFTER: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.retry_after | quote | b64enc }} + THROTTLE_CONTAINER_NAME: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.container_name | quote | b64enc }} ... {{- end }} diff --git a/mariadb/values.yaml b/mariadb/values.yaml index 340b5d1ac..e592c5620 100644 --- a/mariadb/values.yaml +++ b/mariadb/values.yaml @@ -356,6 +356,12 @@ conf: delay_range: min: 30 max: 60 + throttle_backups: + enabled: false + sessions_limit: 480 + lock_expire_after: 7200 + retry_after: 3600 + container_name: throttle-backups-manager database: mysql_histfile: "/dev/null" my: | diff --git a/postgresql/Chart.yaml b/postgresql/Chart.yaml index a5443202b..f4aeae665 100644 --- a/postgresql/Chart.yaml +++ b/postgresql/Chart.yaml @@ -15,7 +15,7 @@ apiVersion: v1 appVersion: v14.5 description: OpenStack-Helm PostgreSQL name: postgresql -version: 0.1.19 +version: 0.1.20 home: https://www.postgresql.org sources: - https://github.com/postgres/postgres diff --git a/postgresql/templates/cron-job-backup-postgres.yaml b/postgresql/templates/cron-job-backup-postgres.yaml index c2e2e8d26..8331049ac 100644 --- a/postgresql/templates/cron-job-backup-postgres.yaml +++ b/postgresql/templates/cron-job-backup-postgres.yaml @@ -122,6 +122,16 @@ spec: value: {{ .Values.conf.backup.remote_backup.delay_range.min | quote }} - name: MAX_DELAY_SEND_BACKUP_TO_REMOTE value: {{ .Values.conf.backup.remote_backup.delay_range.max | quote }} + - name: THROTTLE_BACKUPS_ENABLED + value: "{{ .Values.conf.backup.remote_backup.throttle_backups.enabled }}" + - name: THROTTLE_LIMIT + value: {{ .Values.conf.backup.remote_backup.throttle_backups.sessions_limit | quote }} + - name: THROTTLE_LOCK_EXPIRE_AFTER + value: {{ .Values.conf.backup.remote_backup.throttle_backups.lock_expire_after | quote }} + - name: THROTTLE_RETRY_AFTER + value: {{ .Values.conf.backup.remote_backup.throttle_backups.retry_after | quote }} + - name: THROTTLE_CONTAINER_NAME + value: {{ .Values.conf.backup.remote_backup.throttle_backups.container_name | quote }} {{- with $env := dict "ksUserSecret" $envAll.Values.secrets.identity.postgresql }} {{- include "helm-toolkit.snippets.keystone_openrc_env_vars" $env | indent 16 }} {{- end }} diff --git a/postgresql/templates/secret-backup-restore.yaml b/postgresql/templates/secret-backup-restore.yaml index b9e2f298e..497a8270b 100644 --- a/postgresql/templates/secret-backup-restore.yaml +++ b/postgresql/templates/secret-backup-restore.yaml @@ -26,5 +26,10 @@ data: REMOTE_BACKUP_RETRIES: {{ $envAll.Values.conf.backup.remote_backup.number_of_retries | quote | b64enc }} REMOTE_BACKUP_SEND_DELAY_MIN: {{ $envAll.Values.conf.backup.remote_backup.delay_range.min | quote | b64enc }} REMOTE_BACKUP_SEND_DELAY_MAX: {{ $envAll.Values.conf.backup.remote_backup.delay_range.max | quote | b64enc }} + THROTTLE_BACKUPS_ENABLED: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.enabled | quote | b64enc }} + THROTTLE_LIMIT: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.sessions_limit | quote | b64enc }} + THROTTLE_LOCK_EXPIRE_AFTER: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.lock_expire_after | quote | b64enc }} + THROTTLE_RETRY_AFTER: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.retry_after | quote | b64enc }} + THROTTLE_CONTAINER_NAME: {{ $envAll.Values.conf.backup.remote_backup.throttle_backups.container_name | quote | b64enc }} ... {{- end }} diff --git a/postgresql/values.yaml b/postgresql/values.yaml index 3a077dbb4..425dd1734 100644 --- a/postgresql/values.yaml +++ b/postgresql/values.yaml @@ -328,6 +328,12 @@ conf: delay_range: min: 30 max: 60 + throttle_backups: + enabled: false + sessions_limit: 480 + lock_expire_after: 7200 + retry_after: 3600 + container_name: throttle-backups-manager exporter: queries: diff --git a/releasenotes/notes/helm-toolkit.yaml b/releasenotes/notes/helm-toolkit.yaml index 3ce80f978..0e74f12c2 100644 --- a/releasenotes/notes/helm-toolkit.yaml +++ b/releasenotes/notes/helm-toolkit.yaml @@ -65,4 +65,5 @@ helm-toolkit: - 0.2.56 Expose S3 credentials from Rook bucket CRD secret - 0.2.57 Safer file removal - 0.2.58 Backups verification improvements + - 0.2.59 Added throttling remote backups ... diff --git a/releasenotes/notes/mariadb-backup.yaml b/releasenotes/notes/mariadb-backup.yaml index 6b6939f94..8d5cdf043 100644 --- a/releasenotes/notes/mariadb-backup.yaml +++ b/releasenotes/notes/mariadb-backup.yaml @@ -3,4 +3,5 @@ mariadb-backup: - 0.0.1 Initial Chart - 0.0.2 Added staggered backups support - 0.0.3 Backups verification improvements + - 0.0.4 Added throttling remote backups ... diff --git a/releasenotes/notes/mariadb.yaml b/releasenotes/notes/mariadb.yaml index 24818891f..33a091826 100644 --- a/releasenotes/notes/mariadb.yaml +++ b/releasenotes/notes/mariadb.yaml @@ -53,4 +53,5 @@ mariadb: - 0.2.35 Update apparmor override - 0.2.36 Added staggered backups support - 0.2.37 Backups verification improvements + - 0.2.38 Added throttling remote backups ... diff --git a/releasenotes/notes/postgresql.yaml b/releasenotes/notes/postgresql.yaml index 9a8368448..563e94049 100644 --- a/releasenotes/notes/postgresql.yaml +++ b/releasenotes/notes/postgresql.yaml @@ -20,4 +20,5 @@ postgresql: - 0.1.17 Added empty verify_databases_backup_archives() function implementation to match updated backup_databases() function in helm-toolkit - 0.1.18 Updated postgres to 14.5 and replaced deprecated config item wal_keep_segments with wal_keep_size - 0.1.19 Added staggered backups support + - 0.1.20 Added throttling remote backups ...