diff --git a/postgresql/templates/bin/_backup_postgresql.sh.tpl b/postgresql/templates/bin/_backup_postgresql.sh.tpl index 6fff8543a..163244e29 100755 --- a/postgresql/templates/bin/_backup_postgresql.sh.tpl +++ b/postgresql/templates/bin/_backup_postgresql.sh.tpl @@ -17,9 +17,12 @@ export PGPASSWORD=$(cat /etc/postgresql/admin_user.conf \ | grep postgres | awk -F: '{print $5}') +# Note: not using set -e in this script because more elaborate error handling +# is needed. set -x PG_DUMPALL_OPTIONS=$POSTGRESQL_BACKUP_PG_DUMPALL_OPTIONS +TMP_DIR=/tmp/pg_backup BACKUPS_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/current ARCHIVE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/archive LOG_FILE=/tmp/dberror.log @@ -28,64 +31,101 @@ PG_DUMPALL="pg_dumpall \ -U $POSTGRESQL_BACKUP_USER \ -h $POSTGRESQL_SERVICE_HOST" -#Get the day delta since the archive file backup -seconds_difference() { - archive_date=$( date --date="$1" +%s ) - if [ "$?" -ne 0 ] - then - second_delta=0 - fi - current_date=$( date +%s ) - second_delta=$(($current_date-$archive_date)) - if [ "$second_delta" -lt 0 ] - then - second_delta=0 - fi - echo $second_delta -} +source /tmp/common_backup_restore.sh -#Create backups directory if it does not exists. -mkdir -p $BACKUPS_DIR $ARCHIVE_DIR +# Create necessary directories if they do not exist. +mkdir -p $BACKUPS_DIR || log_backup_error_exit "Cannot create directory ${BACKUPS_DIR}!" +mkdir -p $ARCHIVE_DIR || log_backup_error_exit "Cannot create directory ${ARCHIVE_DIR}!" +mkdir -p $TMP_DIR || log_backup_error_exit "Cannot create directory ${TMP_DIR}!" + +# Remove temporary directory contents. +rm -rf $BACKUPS_DIR/* || log_backup_error_exit "Cannot clear ${BACKUPS_DIR} directory contents!" +rm -rf $TMP_DIR/* || log_backup_error_exit "Cannot clear ${TMP_DIR} directory contents!" + +NOW=$(date +"%Y-%m-%dT%H:%M:%SZ") +SQL_FILE=postgres.$POSTGRESQL_POD_NAMESPACE.all +TARBALL_FILE=${SQL_FILE}.${NOW}.tar.gz + +cd $TMP_DIR || log_backup_error_exit "Cannot change to directory $TMP_DIR" + +rm -f $LOG_FILE #Dump all databases -DATE=$(date +"%Y-%m-%dT%H:%M:%SZ") -$PG_DUMPALL --file=$BACKUPS_DIR/postgres.all.sql 2>>$LOG_FILE -if [[ $? -eq 0 && -s "$BACKUPS_DIR/postgres.all.sql" ]] +$PG_DUMPALL --file=${TMP_DIR}/${SQL_FILE}.sql 2>>$LOG_FILE +if [[ $? -eq 0 && -s "${TMP_DIR}/${SQL_FILE}.sql" ]] then - #Archive the current databases files - pushd $BACKUPS_DIR 1>/dev/null - tar zcvf $ARCHIVE_DIR/postgres.all.${DATE}.tar.gz * - ARCHIVE_RET=$? - popd 1>/dev/null - #Remove the current backup - if [ -d $BACKUPS_DIR ] + log INFO postgresql_backup "Databases dumped successfully. Creating tarball..." + + #Archive the current database files + tar zcvf $ARCHIVE_DIR/$TARBALL_FILE * + if [[ $? -ne 0 ]] then - rm -rf $BACKUPS_DIR/*.sql + log_backup_error_exit "Backup tarball could not be created." + fi + + log INFO postgresql_backup "Tarball $TARBALL_FILE created successfully." + + # Remove the sql files as they are no longer needed. + rm -rf $TMP_DIR/* + + if {{ .Values.conf.backup.remote_backup.enabled }} + then + # Copy the tarball back to the BACKUPS_DIR so that the other container + # can access it for sending it to remote storage. + cp $ARCHIVE_DIR/$TARBALL_FILE $BACKUPS_DIR/$TARBALL_FILE + + if [[ $? -ne 0 ]] + then + log_backup_error_exit "Backup tarball could not be copied to backup directory ${BACKUPS_DIR}." + fi + + # Sleep for a few seconds to allow the file system to get caught up...also to + # help prevent race condition where the other container grabs the backup_completed + # token and the backup file hasn't completed writing to disk. + sleep 30 + + # Note: this next line is the trigger that tells the other container to + # start sending to remote storage. After this backup is sent to remote + # storage, the other container will delete the "current" backup. + touch $BACKUPS_DIR/backup_completed + else + # Remote backup is not enabled. This is ok; at least we have a local backup. + log INFO postgresql_backup "Skipping remote backup, as it is not enabled." fi else - #TODO: This can be convert into mail alert of alert send to a monitoring system - echo "Backup of postgresql failed and need attention." cat $LOG_FILE - exit 1 + rm $LOG_FILE + log_backup_error_exit "Backup of the postgresql database failed and needs attention." fi #Only delete the old archive after a successful archive -if [ $ARCHIVE_RET -eq 0 ] +if [ "$POSTGRESQL_BACKUP_DAYS_TO_KEEP" -gt 0 ] then - if [ "$POSTGRESQL_BACKUP_DAYS_TO_KEEP" -gt 0 ] + log INFO postgresql_backup "Deleting backups older than ${POSTGRESQL_BACKUP_DAYS_TO_KEEP} days" + if [ -d $ARCHIVE_DIR ] then - echo "Deleting backups older than $POSTGRESQL_BACKUP_DAYS_TO_KEEP days" - if [ -d $ARCHIVE_DIR ] - then - for archive_file in $(ls -1 $ARCHIVE_DIR/*.gz) - do - archive_date=$( echo $archive_file | awk -F/ '{print $NF}' | cut -d'.' -f 3) - if [ "$(seconds_difference $archive_date)" -gt "$(($POSTGRESQL_BACKUP_DAYS_TO_KEEP*86400))" ] - then - rm -rf $archive_file + for ARCHIVE_FILE in $(ls -1 $ARCHIVE_DIR/*.gz) + do + ARCHIVE_DATE=$( echo $ARCHIVE_FILE | awk -F/ '{print $NF}' | cut -d'.' -f 4) + if [ "$(seconds_difference $ARCHIVE_DATE)" -gt "$(($POSTGRESQL_BACKUP_DAYS_TO_KEEP*86400))" ] + then + log INFO postgresql_backup "Deleting file $ARCHIVE_FILE." + rm -rf $ARCHIVE_FILE + if [[ $? -ne 0 ]] + fhen + rm -rf $BACKUPS_DIR/* + log_backup_error_exit "Cannot remove ${ARCHIVE_FILE}" fi - done - fi + else + log INFO postgresql_backup "Keeping file ${ARCHIVE_FILE}." + fi + done fi fi +# Turn off trace just for a clearer printout of backup status - for manual backups, mainly. +set +x +echo "==================================================================" +echo "Backup successful!" +echo "Backup archive name: $TARBALL_FILE" +echo "==================================================================" diff --git a/postgresql/templates/bin/_common_backup_restore.sh.tpl b/postgresql/templates/bin/_common_backup_restore.sh.tpl new file mode 100644 index 000000000..39e725ba8 --- /dev/null +++ b/postgresql/templates/bin/_common_backup_restore.sh.tpl @@ -0,0 +1,94 @@ +#!/bin/bash + +# Copyright 2018 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Do not use set -x here because the manual backup or restore pods may be using +# these functions, and it will distort the command output to have tracing on. + +log_backup_error_exit() { + MSG=$1 + ERRCODE=$2 + log ERROR postgresql_backup "${MSG}" + exit $ERRCODE +} + +log() { + #Log message to a file or stdout + #TODO: This can be convert into mail alert of alert send to a monitoring system + #Params: $1 log level + #Params: $2 service + #Params: $3 message + #Params: $4 Destination + LEVEL=$1 + SERVICE=$2 + MSG=$3 + DEST=$4 + DATE=$(date +"%m-%d-%y %H:%M:%S") + if [ -z "$DEST" ] + then + echo "${DATE} ${LEVEL}: $(hostname) ${SERVICE}: ${MSG}" + else + echo "${DATE} ${LEVEL}: $(hostname) ${SERVICE}: ${MSG}" >>$DEST + fi +} + +#Get the day delta since the archive file backup +seconds_difference() { + archive_date=$( date --date="$1" +%s ) + if [ "$?" -ne 0 ] + then + second_delta=0 + fi + current_date=$( date +%s ) + second_delta=$(($current_date-$archive_date)) + if [ "$second_delta" -lt 0 ] + then + second_delta=0 + fi + echo $second_delta +} + +# Wait for a file to be available on the file system (written by the other +# container). +wait_for_file() { + WAIT_FILE=$1 + NO_TIMEOUT=${2:-false} + TIMEOUT=300 + if [[ $NO_TIMEOUT == "true" ]] + then + # Such a large value to virtually never timeout + TIMEOUT=999999999 + fi + TIMEOUT_EXP=$(( $(date +%s) + $TIMEOUT )) + DONE=false + while [[ $DONE == "false" ]] + do + DELTA=$(( TIMEOUT_EXP - $(date +%s) )) + if [[ "$(ls -l ${WAIT_FILE} 2>/dev/null | wc -l)" -gt 0 ]]; + then + DONE=true + elif [[ $DELTA -lt 0 ]] + then + DONE=true + echo "Timed out waiting for file ${WAIT_FILE}." + return 1 + else + echo "Still waiting ...will time out in ${DELTA} seconds..." + sleep 5 + fi + done + return 0 +} + diff --git a/postgresql/templates/bin/_remote_retrieve_postgresql.sh.tpl b/postgresql/templates/bin/_remote_retrieve_postgresql.sh.tpl new file mode 100755 index 000000000..fc685b612 --- /dev/null +++ b/postgresql/templates/bin/_remote_retrieve_postgresql.sh.tpl @@ -0,0 +1,81 @@ +#!/bin/bash + +# Copyright 2018 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +set -x + +RESTORE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/restore +ARCHIVE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/archive + +source /tmp/common_backup_restore.sh + +# Keep processing requests for the life of the pod. +while true +do + # Wait until a restore request file is present on the disk + echo "Waiting for a restore request..." + NO_TIMEOUT=true + wait_for_file $RESTORE_DIR/*_request $NO_TIMEOUT + + echo "Done waiting. Request received" + + CONTAINER_NAME={{ .Values.conf.backup.remote_backup.container_name }} + + if [[ -e $RESTORE_DIR/archive_listing_request ]] + then + # We've finished consuming the request, so delete the request file. + rm -rf $RESTORE_DIR/*_request + + openstack container show $CONTAINER_NAME + if [[ $? -eq 0 ]] + then + # Get the list, ensureing that we only pick up postgres backups from the + # requested namespace + openstack object list $CONTAINER_NAME | grep postgres | grep $POSTGRESQL_POD_NAMESPACE | awk '{print $2}' > $RESTORE_DIR/archive_list_response + if [[ $? != 0 ]] + then + echo "Container object listing could not be obtained." >> $RESTORE_DIR/archive_list_error + else + echo "Archive listing successfully retrieved." + fi + else + echo "Container $CONTAINER_NAME does not exist." >> $RESTORE_DIR/archive_list_error + fi + elif [[ -e $RESTORE_DIR/get_archive_request ]] + then + ARCHIVE=`cat $RESTORE_DIR/get_archive_request` + + echo "Request for archive $ARCHIVE received." + + # We've finished consuming the request, so delete the request file. + rm -rf $RESTORE_DIR/*_request + + openstack object save --file $RESTORE_DIR/$ARCHIVE $CONTAINER_NAME $ARCHIVE + if [[ $? != 0 ]] + then + echo "Archive $ARCHIVE could not be retrieved." >> $RESTORE_DIR/archive_error + else + echo "Archive $ARCHIVE successfully retrieved." + fi + + # Signal to the other container that the archive is available. + touch $RESTORE_DIR/archive_response + else + rm -rf $RESTORE_DIR/*_request + echo "Invalid request received." + fi + + sleep 5 +done diff --git a/postgresql/templates/bin/_remote_store_postgresql.sh.tpl b/postgresql/templates/bin/_remote_store_postgresql.sh.tpl new file mode 100755 index 000000000..6eb2b3a13 --- /dev/null +++ b/postgresql/templates/bin/_remote_store_postgresql.sh.tpl @@ -0,0 +1,208 @@ +#!/bin/bash + +# Copyright 2018 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Note: not using set -e because more elaborate error handling is required. +set -x + +BACKUPS_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/current + +# Create the working backups directory if the other container didn't already, +# and if this container creates it first, ensure that permissions are writable +# for the other container (running as "postgres" user) in the same "postgres" +# group. +mkdir -p $BACKUPS_DIR || log_backup_error_exit "Cannot create directory ${BACKUPS_DIR}!" 1 +chmod 775 $BACKUPS_DIR + +source /tmp/common_backup_restore.sh + +#Send backup file to storage +send_to_storage() { + FILEPATH=$1 + FILE=$2 + + CONTAINER_NAME={{ .Values.conf.backup.remote_backup.container_name }} + + # Grab the list of containers on the remote site + RESULT=$(openstack container list 2>&1) + + if [[ $? == 0 ]] + then + echo $RESULT | grep $CONTAINER_NAME + if [[ $? != 0 ]] + then + # Create the container + openstack container create $CONTAINER_NAME || log ERROR postgresql_backup "Cannot create container ${CONTAINER_NAME}!" + openstack container show $CONTAINER_NAME + if [[ $? != 0 ]] + then + log ERROR postgresql_backup "Error retrieving container $CONTAINER_NAME after creation." + return 1 + fi + fi + else + echo $RESULT | grep "HTTP 401" + if [[ $? == 0 ]] + then + log ERROR postgresql_backup "Could not access keystone: HTTP 401" + return 1 + else + echo $RESULT | grep "ConnectionError" + if [[ $? == 0 ]] + then + log ERROR postgresql_backup "Could not access keystone: ConnectionError" + # In this case, keystone or the site/node may be temporarily down. + # Return slightly different error code so the calling code can retry + return 2 + else + log ERROR postgresql_backup "Could not get container list: ${RESULT}" + return 1 + fi + fi + fi + + # Create an object to store the file + openstack object create --name $FILE $CONTAINER_NAME $FILEPATH/$FILE || log ERROR postgresql_backup "Cannot create container object ${FILE}!" + openstack object show $CONTAINER_NAME $FILE + if [[ $? != 0 ]] + then + log ERROR postgresql_backup "Error retrieving container object $FILE after creation." + return 1 + fi + + log INFO postgresql_backup "Created file $FILE in container $CONTAINER_NAME successfully." + return 0 +} + +if {{ .Values.conf.backup.remote_backup.enabled }} +then + WAIT_FOR_BACKUP_TIMEOUT=1800 + WAIT_FOR_RGW_AVAIL_TIMEOUT=1800 + + # Wait until a backup file is ready to ship to RGW, or until we time out. + DONE=false + TIMEOUT_EXP=$(( $(date +%s) + $WAIT_FOR_BACKUP_TIMEOUT )) + while [[ $DONE == "false" ]] + do + log INFO postgresql_backup "Waiting for a backup file to be written to the disk." + sleep 5 + DELTA=$(( TIMEOUT_EXP - $(date +%s) )) + ls -l ${BACKUPS_DIR}/backup_completed + if [[ $? -eq 0 ]] + then + DONE=true + elif [[ $DELTA -lt 0 ]] + then + DONE=true + fi + done + + log INFO postgresql_backup "Done waiting." + FILE_TO_SEND=$(ls $BACKUPS_DIR/*.tar.gz) + + ERROR_SEEN=false + + if [[ $FILE_TO_SEND != "" ]] + then + if [[ $(echo $FILE_TO_SEND | wc -w) -gt 1 ]] + then + # There should only be one backup file to send - this is an error + log_backup_error_exit "More than one backup file found (${FILE_TO_SEND}) - can only handle 1!" 1 + fi + + # Get just the filename from the file (strip the path) + FILE=$(basename $FILE_TO_SEND) + + log INFO postgresql_backup "Backup file ${BACKUPS_DIR}/${FILE} found." + + DONE=false + TIMEOUT_EXP=$(( $(date +%s) + $WAIT_FOR_RGW_AVAIL_TIMEOUT )) + while [[ $DONE == "false" ]] + do + # Store the new archive to the remote backup storage facility. + send_to_storage $BACKUPS_DIR $FILE + + # Check if successful + if [[ $? -eq 0 ]] + then + log INFO postgresql_backup "Backup file ${BACKUPS_DIR}/${FILE} successfully sent to RGW. Deleting from current backup directory." + DONE=true + elif [[ $? -eq 2 ]] + then + # Temporary failure occurred. We need to retry if we haven't timed out + log WARN postgresql_backup "Backup file ${BACKUPS_DIR}/${FILE} could not be sent to RGW due to connection issue." + DELTA=$(( TIMEOUT_EXP - $(date +%s) )) + if [[ $DELTA -lt 0 ]] + then + DONE=true + log ERROR postgresql_backup "Timed out waiting for RGW to become available." + ERROR_SEEN=true + else + log INFO postgresql_backup "Sleeping 30 seconds waiting for RGW to become available..." + sleep 30 + log INFO postgresql_backup "Retrying..." + fi + else + log ERROR postgresql_backup "Backup file ${BACKUPS_DIR}/${FILE} could not be sent to the RGW." + ERROR_SEEN=true + DONE=true + fi + done + else + log ERROR postgresql_backup "No backup file found in $BACKUPS_DIR." + ERROR_SEEN=true + fi + + if [[ $ERROR_SEEN == "true" ]] + then + log ERROR postgresql_backup "Errors encountered. Exiting." + exit 1 + fi + + # At this point, we should remove the files in current dir. + # If an error occurred, then we need the file to remain there for future + # container restarts, and maybe it will eventually succeed. + rm -rf $BACKUPS_DIR/* + + #Only delete an old archive after a successful archive + if [ "${POSTGRESQL_BACKUP_DAYS_TO_KEEP}" -gt 0 ] + then + log INFO postgresql_backup "Deleting backups older than ${POSTGRESQL_BACKUP_DAYS_TO_KEEP} days" + BACKUP_FILES=/tmp/backup_files + PG_BACKUP_FILES=/tmp/pg_backup_files + + openstack object list $CONTAINER_NAME > $BACKUP_FILES + if [[ $? != 0 ]] + then + log_backup_error_exit "Could not obtain a list of current backup files in the RGW" 1 + fi + + # Filter out other types of files like mariadb, etcd backupes etc.. + cat $BACKUP_FILES | grep postgres | grep $POSTGRESQL_POD_NAMESPACE | awk '{print $2}' > $PG_BACKUP_FILES + + for ARCHIVE_FILE in $(cat $PG_BACKUP_FILES) + do + ARCHIVE_DATE=$( echo $ARCHIVE_FILE | awk -F/ '{print $NF}' | cut -d'.' -f 4) + if [ "$(seconds_difference ${ARCHIVE_DATE})" -gt "$((${POSTGRESQL_BACKUP_DAYS_TO_KEEP}*86400))" ] + then + log INFO postgresql_backup "Deleting file ${ARCHIVE_FILE} from the RGW" + openstack object delete $CONTAINER_NAME $ARCHIVE_FILE || log_backup_error_exit "Cannot delete container object ${ARCHIVE_FILE}!" 1 + fi + done + fi +else + log INFO postgresql_backup "Remote backup is not enabled" + exit 0 +fi diff --git a/postgresql/templates/bin/_restore_postgresql.sh.tpl b/postgresql/templates/bin/_restore_postgresql.sh.tpl index 43ba52af4..c26eca563 100755 --- a/postgresql/templates/bin/_restore_postgresql.sh.tpl +++ b/postgresql/templates/bin/_restore_postgresql.sh.tpl @@ -17,11 +17,6 @@ export PGPASSWORD=$(cat /etc/postgresql/admin_user.conf \ | grep postgres | awk -F: '{print $5}') -log_error() { - echo $1 - exit 1 -} - ARCHIVE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/archive RESTORE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/restore POSTGRESQL_HOST=$(cat /etc/postgresql/admin_user.conf | cut -d: -f 1) @@ -29,29 +24,19 @@ LOG_FILE=/tmp/dbrestore.log ARGS=("$@") PSQL="psql -U $POSTGRESQL_BACKUP_USER -h $POSTGRESQL_HOST" +source /tmp/common_backup_restore.sh + usage() { ret_val=$1 echo "Usage:" echo "Restore command options" echo "=============================" echo "help" - echo "list_archives" - echo "list_databases " - echo "restore [ | ALL]" - exit $ret_val -} - -#Delete file -delete_files() { - files_to_delete=("$@") - for f in "${files_to_delete[@]}" - do - if [ -f $f ] - then - echo "Deleting file $f." - rm -rf $f - fi - done + echo "list_archives [remote]" + echo "list_databases [remote]" + echo "restore [remote]" + echo " where = | ALL" + clean_and_exit $ret_val "" } #Extract Single Database SQL Dump from pg_dumpall dump file @@ -60,36 +45,153 @@ extract_single_db_dump() { ${RESTORE_DIR}/$2.sql } +#Exit cleanly with some message and return code +clean_and_exit() { + RETCODE=$1 + MSG=$2 + + #Cleanup Restore Directory + rm -rf $RESTORE_DIR/* + + if [[ "x${MSG}" != "x" ]]; + then + echo $MSG + fi + exit $RETCODE +} + +# Signal the other container that it should retrieve a list of archives +# from the RGW. +retrieve_remote_listing() { + # Remove the last response, if there was any + rm -rf $RESTORE_DIR/archive_list_* + + # Signal by creating a file in the restore directory + touch $RESTORE_DIR/archive_listing_request + + # Wait until the archive listing has been retrieved from the other container. + echo "Waiting for archive listing..." + wait_for_file $RESTORE_DIR/archive_list_* + + if [[ $? -eq 1 ]] + then + clean_and_exit 1 "Request failed - container did not respond. Archive listing is NOT available." + fi + + ERR=$(cat $RESTORE_DIR/archive_list_error 2>/dev/null) + if [[ $? -eq 0 ]] + then + clean_and_exit 1 "Request failed - ${ERR}" + fi + + echo "Done waiting. Archive list is available." +} + +# Signal the other container that it should retrieve a single archive +# from the RGW. +retrieve_remote_archive() { + ARCHIVE=$1 + + # Remove the last response, if there was any + rm -rf $RESTORE_DIR/archive_* + + # Signal by creating a file in the restore directory containing the archive + # name. + echo "$ARCHIVE" > $RESTORE_DIR/get_archive_request + + # Wait until the archive has been retrieved from the other container. + echo "Waiting for requested archive ${ARCHIVE}..." + wait_for_file $RESTORE_DIR/archive_* + + if [[ $? -eq 1 ]] + then + clean_and_exit 1 "Request failed - container did not respond. Archive ${ARCHIVE} is NOT available." + fi + + ERR=$(cat $RESTORE_DIR/archive_error 2>/dev/null) + if [[ $? -eq 0 ]] + then + clean_and_exit 1 "Request failed - ${ERR}" + fi + + rm -rf $RESTORE_DIR/archive_response + if [[ -e $RESTORE_DIR/$ARCHIVE ]] + then + echo "Done waiting. Archive $ARCHIVE is available." + else + clean_and_exit 1 "Request failed - Archive $ARCHIVE is NOT available." + fi +} + #Display all archives list_archives() { - if [ -d ${ARCHIVE_DIR} ] + REMOTE=$1 + + if [[ "x${REMOTE^^}" == "xREMOTE" ]] then - archives=$(find ${ARCHIVE_DIR}/ -iname "*.gz" -print) - echo "All Archives" - echo "==================================" - for archive in $archives - do - echo $archive | cut -d '/' -f 8 - done + retrieve_remote_listing + if [[ -e $RESTORE_DIR/archive_list_response ]] + then + echo + echo "All Archives from RGW Data Store" + echo "==============================================" + cat $RESTORE_DIR/archive_list_response + clean_and_exit 0 "" + else + clean_and_exit 1 "Archives could not be retrieved from the RGW." + fi + elif [[ "x${REMOTE}" == "x" ]] + then + if [ -d $ARCHIVE_DIR ] + then + archives=$(find $ARCHIVE_DIR/ -iname "*.gz" -print) + echo + echo "All Local Archives" + echo "==============================================" + for archive in $archives + do + echo $archive | cut -d '/' -f 8 + done + clean_and_exit 0 "" + else + clean_and_exit 1 "Local archive directory is not available." + fi else - log_error "Archive directory is not available." + usage 1 fi } #Return all databases from an archive get_databases() { - archive_file=$1 - if [ -e ${ARCHIVE_DIR}/${archive_file} ] + ARCHIVE_FILE=$1 + REMOTE=$2 + + if [[ "x$REMOTE" == "xremote" ]] then - files_to_purge=$(find $RESTORE_DIR/ -iname "*.sql" -print) - delete_files $files_to_purge - tar zxvf ${ARCHIVE_DIR}/${archive_file} -C ${RESTORE_DIR} 1>/dev/null - if [ -e ${RESTORE_DIR}/postgres.all.sql ] + retrieve_remote_archive $ARCHIVE_FILE + elif [[ "x$REMOTE" == "x" ]] + then + if [ -e $ARCHIVE_DIR/$ARCHIVE_FILE ] then - DBS=$( grep 'CREATE DATABASE' ${RESTORE_DIR}/postgres.all.sql | awk '{ print $3 }' ) + cp $ARCHIVE_DIR/$ARCHIVE_FILE $RESTORE_DIR/$ARCHIVE_FILE + if [[ $? != 0 ]] + then + clean_and_exit 1 "Could not copy local archive to restore directory." + fi else - DBS=" " + clean_and_exit 1 "Local archive file could not be found." fi + else + usage 1 + fi + + echo "Decompressing archive $ARCHIVE_FILE..." + cd $RESTORE_DIR + tar zxvf - < $RESTORE_DIR/$ARCHIVE_FILE 1>/dev/null + SQL_FILE=postgres.$POSTGRESQL_POD_NAMESPACE.all.sql + if [ -e $RESTORE_DIR/$SQL_FILE ] + then + DBS=$( grep 'CREATE DATABASE' $RESTORE_DIR/$SQL_FILE | awk '{ print $3 }' ) else DBS=" " fi @@ -97,14 +199,21 @@ get_databases() { #Display all databases from an archive list_databases() { - archive_file=$1 - get_databases $archive_file - #echo $DBS + ARCHIVE_FILE=$1 + REMOTE=$2 + WHERE="local" + + if [[ "x${REMOTE}" != "x" ]] + then + WHERE="remote" + fi + + get_databases $ARCHIVE_FILE $REMOTE if [ -n "$DBS" ] then echo " " - echo "Databases in the archive $archive_file" - echo "=================================================================" + echo "Databases in the $WHERE archive $ARCHIVE_FILE" + echo "================================================================================" for db in $DBS do echo $db @@ -112,7 +221,6 @@ list_databases() { else echo "There is no database in the archive." fi - } create_db_if_not_exist() { @@ -125,62 +233,49 @@ create_db_if_not_exist() { #Restore a single database dump from pg_dumpall dump. restore_single_db() { - single_db_name=$1 - if [ -z "$single_db_name" ] + SINGLE_DB_NAME=$1 + if [ -z "$SINGLE_DB_NAME" ] then usage 1 fi - if [ -f ${ARCHIVE_DIR}/${archive_file} ] + + SQL_FILE=postgres.$POSTGRESQL_POD_NAMESPACE.all.sql + if [ -f $RESTORE_DIR/$SQL_FILE ] then - files_to_purge=$(find $RESTORE_DIR/ -iname "*.sql" -print) - delete_files $files_to_purge - tar zxvf ${ARCHIVE_DIR}/${archive_file} -C ${RESTORE_DIR} 1>/dev/null - if [ -f ${RESTORE_DIR}/postgres.all.sql ] + extract_single_db_dump $RESTORE_DIR/$SQL_FILE $SINGLE_DB_NAME + if [[ -f $RESTORE_DIR/$SINGLE_DB_NAME.sql && -s $RESTORE_DIR/$SINGLE_DB_NAME.sql ]] then - extract_single_db_dump ${RESTORE_DIR}/postgres.all.sql $single_db_name - if [[ -f ${RESTORE_DIR}/${single_db_name}.sql && -s ${RESTORE_DIR}/${single_db_name}.sql ]] + create_db_if_not_exist $single_db_name + $PSQL -d $SINGLE_DB_NAME -f ${RESTORE_DIR}/${SINGLE_DB_NAME}.sql 2>>$LOG_FILE >> $LOG_FILE + if [ "$?" -eq 0 ] then - create_db_if_not_exist $single_db_name - $PSQL -d $single_db_name -f ${RESTORE_DIR}/${single_db_name}.sql 2>>$LOG_FILE - if [ "$?" -eq 0 ] - then - echo "Database Restore Successful." - else - log_error "Database Restore Failed." - fi + echo "Database Restore Successful." else - log_error "Database Dump For $single_db_name is empty or not available." + clean_and_exit 1 "Database Restore Failed." fi else - log_error "Database file for dump_all not available to restore from" + clean_and_exit 1 "Database Dump For $SINGLE_DB_NAME is empty or not available." fi else - log_error "Archive does not exist" + clean_and_exit 1 "Database file for dump_all not available to restore from" fi } #Restore all the databases restore_all_dbs() { - if [ -f ${ARCHIVE_DIR}/${archive_file} ] + SQL_FILE=postgres.$POSTGRESQL_POD_NAMESPACE.all.sql + if [ -f $RESTORE_DIR/$SQL_FILE ] then - files_to_purge=$(find $RESTORE_DIR/ -iname "*.sql" -print) - delete_files $files_to_purge - tar zxvf ${ARCHIVE_DIR}/${archive_file} -C ${RESTORE_DIR} 1>/dev/null - if [ -f ${RESTORE_DIR}/postgres.all.sql ] + $PSQL postgres -f $RESTORE_DIR/$SQL_FILE 2>>$LOG_FILE >> $LOG_FILE + if [ "$?" -eq 0 ] then - $PSQL postgres -f ${RESTORE_DIR}/postgres.all.sql 2>>$LOG_FILE - if [ "$?" -eq 0 ] - then - echo "Database Restore successful." - else - log_error "Database Restore failed." - fi + echo "Database Restore successful." else - log_error "There is no database file available to restore from" + clean_and_exit 1 "Database Restore failed." fi else - log_error "Archive does not exist" - fi + clean_and_exit 1 "There is no database file available to restore from" + fi } @@ -199,16 +294,21 @@ is_Option() { } #Main -#Create Restore Directory +#Create Restore Directory if it's not created already mkdir -p $RESTORE_DIR -if [ ${#ARGS[@]} -gt 3 ] + +#Cleanup Restore Directory +rm -rf $RESTORE_DIR/* + +if [ ${#ARGS[@]} -gt 4 ] then - usage 0 + usage 1 elif [ ${#ARGS[@]} -eq 1 ] then if [ "${ARGS[0]}" == "list_archives" ] then list_archives + clean_and_exit 0 "" elif [ "${ARGS[0]}" == "help" ] then usage 0 @@ -220,40 +320,53 @@ then if [ "${ARGS[0]}" == "list_databases" ] then list_databases ${ARGS[1]} + clean_and_exit 0 "" + elif [ "${ARGS[0]}" == "list_archives" ] + then + list_archives ${ARGS[1]} + clean_and_exit 0 "" else usage 1 fi -elif [ ${#ARGS[@]} -eq 3 ] +elif [[ ${#ARGS[@]} -eq 3 ]] || [[ ${#ARGS[@]} -eq 4 ]] then - if [ "${ARGS[0]}" != "restore" ] + if [ "${ARGS[0]}" == "list_databases" ] + then + list_databases ${ARGS[1]} ${ARGS[2]} + clean_and_exit 0 "" + elif [ "${ARGS[0]}" != "restore" ] then usage 1 else - if [ -f ${ARCHIVE_DIR}/${ARGS[1]} ] + ARCHIVE=${ARGS[1]} + DB_SPEC=${ARGS[2]} + REMOTE="" + if [ ${#ARGS[@]} -eq 4 ] then - #Get all the databases in that archive - get_databases ${ARGS[1]} + REMOTE=${ARGS[3]} + fi - #check if the requested database is available in the archive - if [ $(is_Option "$DBS" ${ARGS[2]}) -eq 1 ] - then - echo "Restoring Database ${ARGS[2]} And Grants" - restore_single_db ${ARGS[2]} - echo "Tail ${LOG_FILE} for restore log." - elif [ "$( echo ${ARGS[2]} | tr '[a-z]' '[A-Z]')" == "ALL" ] - then - echo "Restoring All The Database." - restore_all_dbs - echo "Tail ${LOG_FILE} for restore log." - else - echo "There is no database with that name" - fi + #Get all the databases in that archive + get_databases $ARCHIVE $REMOTE + + #check if the requested database is available in the archive + if [ $(is_Option "$DBS" $DB_SPEC) -eq 1 ] + then + echo "Restoring Database $DB_SPEC And Grants" + restore_single_db $DB_SPEC + echo "Tail ${LOG_FILE} for restore log." + clean_and_exit 0 "" + elif [ "$( echo $DB_SPEC | tr '[a-z]' '[A-Z]')" == "ALL" ] + then + echo "Restoring All The Databases. This could take a few minutes..." + restore_all_dbs + clean_and_exit 0 "Tail ${LOG_FILE} for restore log." else - echo "Archive file not found" + clean_and_exit 1 "There is no database with that name" fi fi else usage 1 fi -exit 0 +clean_and_exit 0 "Done" diff --git a/postgresql/templates/configmap-bin.yaml b/postgresql/templates/configmap-bin.yaml index f5c931ea5..42472f519 100644 --- a/postgresql/templates/configmap-bin.yaml +++ b/postgresql/templates/configmap-bin.yaml @@ -34,6 +34,12 @@ data: {{- if .Values.conf.backup.enabled }} backup_postgresql.sh: {{ tuple "bin/_backup_postgresql.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }} restore_postgresql.sh: {{ tuple "bin/_restore_postgresql.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }} + remote_store_postgresql.sh: {{ tuple "bin/_remote_store_postgresql.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }} + remote_retrieve_postgresql.sh: {{ tuple "bin/_remote_retrieve_postgresql.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }} + common_backup_restore.sh: {{ tuple "bin/_common_backup_restore.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }} +{{- end }} +{{- if .Values.manifests.job_ks_user }} + ks-user.sh: {{ include "helm-toolkit.scripts.keystone_user" . | b64enc }} {{- end }} set_password.sh: {{ tuple "bin/_set_password.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }} patroni_conversion.sh: {{ tuple "bin/_patroni_conversion.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }} diff --git a/postgresql/templates/cron-job-backup-postgres.yaml b/postgresql/templates/cron-job-backup-postgres.yaml index e69afd9c8..d5a4e77b5 100644 --- a/postgresql/templates/cron-job-backup-postgres.yaml +++ b/postgresql/templates/cron-job-backup-postgres.yaml @@ -29,6 +29,12 @@ metadata: labels: {{ tuple $envAll "postgresql-backup" "backup" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} spec: +{{- if .Values.jobs.backup_postgresql.backoffLimit }} + backoffLimit: {{ .Values.jobs.backup_postgresql.backoffLimit }} +{{- end }} +{{- if .Values.jobs.backup_postgresql.activeDeadlineSeconds }} + activeDeadlineSeconds: {{ .Values.jobs.backup_postgresql.activeDeadlineSeconds }} +{{- end }} schedule: {{ .Values.jobs.backup_postgresql.cron | quote }} successfulJobsHistoryLimit: {{ .Values.jobs.backup_postgresql.history.success }} failedJobsHistoryLimit: {{ .Values.jobs.backup_postgresql.history.failed }} @@ -79,6 +85,10 @@ spec: volumeMounts: - name: pod-tmp mountPath: /tmp + - mountPath: /tmp/common_backup_restore.sh + name: postgresql-bin + readOnly: true + subPath: common_backup_restore.sh - mountPath: /tmp/backup_postgresql.sh name: postgresql-bin readOnly: true @@ -89,6 +99,33 @@ spec: mountPath: /etc/postgresql/admin_user.conf subPath: admin_user.conf readOnly: true + - name: postgresql-remote-store +{{ tuple $envAll "postgresql_remote_store" | include "helm-toolkit.snippets.image" | indent 14 }} + command: + - /tmp/remote_store_postgresql.sh + env: +{{- with $env := dict "ksUserSecret" $envAll.Values.secrets.identity.postgresql }} +{{- include "helm-toolkit.snippets.keystone_openrc_env_vars" $env | indent 16 }} +{{- end }} + - name: POSTGRESQL_BACKUP_BASE_DIR + value: {{ .Values.conf.backup.base_path }} + - name: POSTGRESQL_BACKUP_DAYS_TO_KEEP + value: "{{ .Values.conf.backup.days_of_backup_to_keep }}" + - name: POSTGRESQL_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - mountPath: /tmp/common_backup_restore.sh + name: postgresql-bin + readOnly: true + subPath: common_backup_restore.sh + - mountPath: /tmp/remote_store_postgresql.sh + name: postgresql-bin + readOnly: true + subPath: remote_store_postgresql.sh + - mountPath: {{ .Values.conf.backup.base_path }} + name: postgresql-backup-dir restartPolicy: OnFailure serviceAccount: {{ $serviceAccountName }} serviceAccountName: {{ $serviceAccountName }} diff --git a/postgresql/templates/job-ks-user.yaml b/postgresql/templates/job-ks-user.yaml new file mode 100644 index 000000000..8a3a03368 --- /dev/null +++ b/postgresql/templates/job-ks-user.yaml @@ -0,0 +1,22 @@ +{{/* +Copyright 2019 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.job_ks_user }} +{{- $backoffLimit := .Values.jobs.ks_user.backoffLimit }} +{{- $activeDeadlineSeconds := .Values.jobs.ks_user.activeDeadlineSeconds }} +{{- $ksUserJob := dict "envAll" . "serviceName" "postgresql" "secretBin" "postgresql-bin" "backoffLimit" $backoffLimit "activeDeadlineSeconds" $activeDeadlineSeconds -}} +{{ $ksUserJob | include "helm-toolkit.manifests.job_ks_user" }} +{{- end }} diff --git a/postgresql/templates/secret-rgw.yaml b/postgresql/templates/secret-rgw.yaml new file mode 100644 index 000000000..e98825baf --- /dev/null +++ b/postgresql/templates/secret-rgw.yaml @@ -0,0 +1,64 @@ +{{/* +This manifest results in two secrets being created: + 1) Keystone "postgresql" secret, which is needed to access the cluster + (remote or same cluster) for storing postgresql backups. If the + cluster is remote, the auth_url would be non-null. + 2) Keystone "admin" secret, which is needed to create the "postgresql" + keystone account mentioned above. This may not be needed if the + account is in a remote cluster (auth_url is non-null in that case). +*/}} + +{{- if .Values.conf.backup.remote_backup.enabled }} + +{{- $envAll := . }} +{{- $userClass := "postgresql" }} +{{- $secretName := index $envAll.Values.secrets.identity $userClass }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ $secretName }} +type: Opaque +data: +{{- $identityClass := .Values.endpoints.identity.auth.postgresql }} +{{- if $identityClass.auth_url }} + OS_AUTH_URL: {{ $identityClass.auth_url | b64enc }} +{{- else }} + OS_AUTH_URL: {{ tuple "identity" "internal" "api" $envAll | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | b64enc }} +{{- end }} + OS_REGION_NAME: {{ $identityClass.region_name | b64enc }} + OS_INTERFACE: {{ $identityClass.interface | default "internal" | b64enc }} + OS_PROJECT_DOMAIN_NAME: {{ $identityClass.project_domain_name | b64enc }} + OS_PROJECT_NAME: {{ $identityClass.project_name | b64enc }} + OS_USER_DOMAIN_NAME: {{ $identityClass.user_domain_name | b64enc }} + OS_USERNAME: {{ $identityClass.username | b64enc }} + OS_PASSWORD: {{ $identityClass.password | b64enc }} + OS_DEFAULT_DOMAIN: {{ $identityClass.default_domain_id | default "default" | b64enc }} +... +{{- if .Values.manifests.job_ks_user }} +{{- $userClass := "admin" }} +{{- $secretName := index $envAll.Values.secrets.identity $userClass }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ $secretName }} +type: Opaque +data: +{{- $identityClass := index .Values.endpoints.identity.auth $userClass }} +{{- if $identityClass.auth_url }} + OS_AUTH_URL: {{ $identityClass.auth_url }} +{{- else }} + OS_AUTH_URL: {{ tuple "identity" "internal" "api" $envAll | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | b64enc }} +{{- end }} + OS_REGION_NAME: {{ $identityClass.region_name | b64enc }} + OS_INTERFACE: {{ $identityClass.interface | default "internal" | b64enc }} + OS_PROJECT_DOMAIN_NAME: {{ $identityClass.project_domain_name | b64enc }} + OS_PROJECT_NAME: {{ $identityClass.project_name | b64enc }} + OS_USER_DOMAIN_NAME: {{ $identityClass.user_domain_name | b64enc }} + OS_USERNAME: {{ $identityClass.username | b64enc }} + OS_PASSWORD: {{ $identityClass.password | b64enc }} + OS_DEFAULT_DOMAIN: {{ $identityClass.default_domain_id | default "default" | b64enc }} +... +{{- end }} +{{- end }} diff --git a/postgresql/values.yaml b/postgresql/values.yaml index 681676afa..3f1155045 100644 --- a/postgresql/values.yaml +++ b/postgresql/values.yaml @@ -117,6 +117,13 @@ pod: limits: memory: "1024Mi" cpu: "2000m" + ks_user: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" # using dockerhub patroni: https://hub.docker.com/r/openstackhelm/patroni/tags/ images: @@ -124,8 +131,10 @@ images: postgresql: "docker.io/openstackhelm/patroni:latest-ubuntu_xenial" dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0 image_repo_sync: docker.io/docker:17.07.0 + ks_user: docker.io/openstackhelm/heat:stein-ubuntu_bionic prometheus_postgresql_exporter: docker.io/wrouesnel/postgres_exporter:v0.4.6 prometheus_postgresql_exporter_create_user: "docker.io/postgres:9.5" + postgresql_remote_store: docker.io/openstackhelm/heat:stein-ubuntu_bionic pull_policy: "IfNotPresent" local_registry: active: false @@ -169,8 +178,9 @@ dependencies: - endpoint: node service: local_image_registry static: - postgresql: - jobs: null + backup_postgresql: + jobs: + - postgresql-ks-user tests: services: - endpoint: internal @@ -208,10 +218,17 @@ volume: jobs: backup_postgresql: + # activeDeadlineSeconds == 0 means no deadline + activeDeadlineSeconds: 0 + backoffLimit: 6 cron: "0 0 * * *" history: success: 3 failed: 1 + ks_user: + # activeDeadlineSeconds == 0 means no deadline + activeDeadlineSeconds: 0 + backoffLimit: 6 network_policy: postgresql: @@ -350,10 +367,13 @@ conf: watchdog: mode: off # Allowed values: off, automatic, required backup: - enabled: true + enabled: false base_path: /var/backup days_of_backup_to_keep: 3 pg_dumpall_options: null + remote_backup: + enabled: false + container_name: postgresql exporter: queries: pg_replication: @@ -397,6 +417,9 @@ secrets: server: postgresql-server-pki exporter: postgresql-exporter audit: postgresql-audit + identity: + admin: keystone-admin-user + postgresql: postgresql-backup-user endpoints: cluster_domain_suffix: cluster.local @@ -457,12 +480,51 @@ endpoints: port: metrics: default: 9187 + identity: + name: backup-storage-auth + namespace: openstack + auth: + admin: + # Auth URL of null indicates local authentication + # HTK will form the URL unless specified here + auth_url: null + region_name: RegionOne + username: admin + password: password + project_name: admin + user_domain_name: default + project_domain_name: default + postgresql: + # Auth URL of null indicates local authentication + # HTK will form the URL unless specified here + auth_url: null + role: admin + region_name: RegionOne + username: postgresql-backup-user + password: password + project_name: service + user_domain_name: service + project_domain_name: service + hosts: + default: keystone + internal: keystone-api + host_fqdn_override: + default: null + path: + default: /v3 + scheme: + default: 'http' + port: + api: + default: 80 + internal: 5000 manifests: configmap_bin: true configmap_etc: true job_image_repo_sync: true network_policy: false + job_ks_user: false secret_admin: true secret_replica: true secret_server: true