Update Airflow logrotate logic

The current logrotate logic deletes logs that are
more than X days old in the Airflow log path, however
the Airflow log archive may still reach 100%
usage and cause the airflow-worker to crashloop.

This PS adds logic to logrotate.sh to delete the oldest
logs and empty dirs when the Airflow log archive
reaches the max usage specified in values.yaml.

Change-Id: I3dcb80901d7dd36da6812850a1f54e7ebf3b1cf2
This commit is contained in:
anthony.bellino 2019-04-26 22:04:26 +00:00
parent b5469c39ec
commit 5f92be2f07
3 changed files with 15 additions and 0 deletions

View File

@ -149,6 +149,8 @@ spec:
imagePullPolicy: {{ .Values.images.pull_policy }} imagePullPolicy: {{ .Values.images.pull_policy }}
{{ tuple $envAll $envAll.Values.pod.resources.airflow.logrotate | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.airflow.logrotate | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env: env:
- name: PERCENT_MAX_LOG_FS_USAGE
value: {{ .Values.logrotate.percent_max_log_fs_usage | quote }}
- name: DAYS_BEFORE_LOG_DELETION - name: DAYS_BEFORE_LOG_DELETION
value: {{ .Values.logrotate.days_before_deletion | quote }} value: {{ .Values.logrotate.days_before_deletion | quote }}
- name: LOGROTATE_PATH - name: LOGROTATE_PATH

View File

@ -146,6 +146,7 @@ volume_worker:
logrotate: logrotate:
days_before_deletion: 30 days_before_deletion: 30
percent_max_log_fs_usage: 80
# typically overriden by environmental # typically overriden by environmental
# values, but should include all endpoints # values, but should include all endpoints

View File

@ -16,6 +16,10 @@
set -ex set -ex
get_usage() {
df /usr/local/airflow/logs/ --output='pcent' | grep -o '[0-9]*'
}
while true; do while true; do
# Delete logs that are more than 30 days old in the directories # Delete logs that are more than 30 days old in the directories
@ -23,6 +27,14 @@ while true; do
# Delete empty directories under the Airflow log path # Delete empty directories under the Airflow log path
find ${LOGROTATE_PATH} \( -type f -name '*.log' -mtime +${DAYS_BEFORE_LOG_DELETION} -o -type d -empty \) -print -delete find ${LOGROTATE_PATH} \( -type f -name '*.log' -mtime +${DAYS_BEFORE_LOG_DELETION} -o -type d -empty \) -print -delete
# Delete oldest logs and empty directories when
# the Airflow log path filesystem reaches max usage
CURR_USAGE=$(get_usage)
while [ $CURR_USAGE -gt ${PERCENT_MAX_LOG_FS_USAGE} ]; do
find ${LOGROTATE_PATH} \( -type f -name '*.log' -o -type d -empty \) -printf '%T+ %p\n' | sort | head -n 1 | xargs -r -l1 sh -c 'rm -rf $1'
CURR_USAGE=$(get_usage)
done
# Sleep for 1 hr between each wait loop # Sleep for 1 hr between each wait loop
sleep 3600 sleep 3600