From d53ad0b07d3e7bdd2668c2d3f1815d95d4b8f532 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Thu, 20 Feb 2014 13:55:13 +1100 Subject: [PATCH] Add GIT_TIMEOUT variable to watch git operations During my CI testing of each devstack change I can often see git get itself stuck and hang indefinitely. I'm not sure if it's transient network issues, or issues at the remote end (seen with both github.com and git.openstack.org) but it hits fairly frequently. Retrying the command usually gets it going again. Searching for "git hanging" and similar shows its not entirely uncommon... This adds a watchdog timeout for remote git operations based on a new environment variable GIT_TIMEOUT. It will retry 3 times before giving up. The wrapper is applied to the main remote git calls. Change-Id: I5b0114ca26b7ac2f25993264f761cba9ec8c09e1 --- functions-common | 41 ++++++++++++++++++++++++++++++++++++----- stackrc | 11 +++++++++++ 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/functions-common b/functions-common index d92e39cd91..9cd5acd47b 100644 --- a/functions-common +++ b/functions-common @@ -498,16 +498,16 @@ function git_clone { if [[ ! -d $GIT_DEST ]]; then [[ "$ERROR_ON_CLONE" = "True" ]] && \ die $LINENO "Cloning not allowed in this configuration" - git clone $GIT_REMOTE $GIT_DEST + git_timed clone $GIT_REMOTE $GIT_DEST fi cd $GIT_DEST - git fetch $GIT_REMOTE $GIT_REF && git checkout FETCH_HEAD + git_timed fetch $GIT_REMOTE $GIT_REF && git checkout FETCH_HEAD else # do a full clone only if the directory doesn't exist if [[ ! -d $GIT_DEST ]]; then [[ "$ERROR_ON_CLONE" = "True" ]] && \ die $LINENO "Cloning not allowed in this configuration" - git clone $GIT_REMOTE $GIT_DEST + git_timed clone $GIT_REMOTE $GIT_DEST cd $GIT_DEST # This checkout syntax works for both branches and tags git checkout $GIT_REF @@ -516,7 +516,7 @@ function git_clone { cd $GIT_DEST # set the url to pull from and fetch git remote set-url origin $GIT_REMOTE - git fetch origin + git_timed fetch origin # remove the existing ignored files (like pyc) as they cause breakage # (due to the py files having older timestamps than our pyc, so python # thinks the pyc files are correct using them) @@ -541,6 +541,37 @@ function git_clone { git show --oneline | head -1 } +# git can sometimes get itself infinitely stuck with transient network +# errors or other issues with the remote end. This wraps git in a +# timeout/retry loop and is intended to watch over non-local git +# processes that might hang. GIT_TIMEOUT, if set, is passed directly +# to timeout(1); otherwise the default value of 0 maintains the status +# quo of waiting forever. +# usage: git_timed +function git_timed() { + local count=0 + local timeout=0 + + if [[ -n "${GIT_TIMEOUT}" ]]; then + timeout=${GIT_TIMEOUT} + fi + + until timeout -s SIGINT ${timeout} git "$@"; do + # 124 is timeout(1)'s special return code when it reached the + # timeout; otherwise assume fatal failure + if [[ $? -ne 124 ]]; then + die $LINENO "git call failed: [git $@]" + fi + + count=$(($count + 1)) + warn "timeout ${count} for git call: [git $@]" + if [ $count -eq 3 ]; then + die $LINENO "Maximum of 3 git retries reached" + fi + sleep 5 + done +} + # git update using reference as a branch. # git_update_branch ref function git_update_branch() { @@ -571,7 +602,7 @@ function git_update_tag() { git tag -d $GIT_TAG # fetching given tag only - git fetch origin tag $GIT_TAG + git_timed fetch origin tag $GIT_TAG git checkout -f $GIT_TAG } diff --git a/stackrc b/stackrc index 56fa40269c..8cec09eb28 100644 --- a/stackrc +++ b/stackrc @@ -69,6 +69,17 @@ fi # (currently only implemented for MySQL backend) DATABASE_QUERY_LOGGING=$(trueorfalse True $DATABASE_QUERY_LOGGING) +# Set a timeout for git operations. If git is still running when the +# timeout expires, the command will be retried up to 3 times. This is +# in the format for timeout(1); +# +# DURATION is a floating point number with an optional suffix: 's' +# for seconds (the default), 'm' for minutes, 'h' for hours or 'd' +# for days. +# +# Zero disables timeouts +GIT_TIMEOUT=${GIT_TIMEOUT:-0} + # Repositories # ------------