Add --timeout option to collect tool
This update adds a new --timeout command line option to the collect tool so that users can extend collect's global timeout. Prior to this update the collect tool had a fixed 1000 second or 16.6 minute timeout. Collect of hosts in large busy systems can take an unpredictably long time. Sometimes longer than 1000 seconds. This can be particularly true when collecting from the active controller deploying and managing lots of pods across many hosts. This new timeout option allows the user to specify a specific timeout in minutes, between 10 and 120, while defaulting to 20 minutes. The default or user specified global timeout is passed to subclouds for subcloud collect as well. Test Plan: PASS: Verify new --timeout or -t options at command line arg level PASS: Verify --timeout <minutes> parse; error, in and out of bounds PASS: Verify timeout option is described in collect help PASS: Verify 110 minute collect with --timeout 120 PASS: Verify 45 minute collect times out with --timeout 40 PASS: Verify 2 minute collect with --timeout 10 PASS: Verify default timeout is 20 minutes PASS: Verify default or specified timeout is displayed PASS: Verify default or specified timeout is shared with the subcloud PASS: Verify timeout error handling. PASS: Verify collect error handling behavior if --timeout or -t is specified but the number of minutes is missing. Regression: PASS: Verify collect system and subcloud handling PASS: Verify system and subcloud dated collects ; verified content PASS: Verify collect with a variety of options Closes-Bug: 2004666 Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com> Change-Id: Ib68b78f7c810f43fc8d13cbf291ac00f08c3c4f4
This commit is contained in:
parent
db7144f1cf
commit
0b079b4804
@ -342,6 +342,11 @@ function print_help()
|
||||
echo " collect -a -sc [--inline | -in] ... collect logs for all subclouds one after the other"
|
||||
echo " collect --subcloud --continue ... continue a suspended subcloud collect"
|
||||
echo ""
|
||||
echo "Collect Timeout"
|
||||
echo ""
|
||||
echo "collect [--timeout | -t] <minutes> ... collect with user specified timeout"
|
||||
echo " valid change range is 10-120 minutes"
|
||||
echo " default: 20 mins"
|
||||
echo "Dated Collect:"
|
||||
echo ""
|
||||
echo "collect [--start-date | -s] YYYYMMDD ... collection of logs on and after this date"
|
||||
@ -415,10 +420,16 @@ COLLECT_CONTINUE_MSG_NEEDED=false
|
||||
SUBCLOUD_COLLECT_CONTINUE=false
|
||||
SUBCLOUD_COLLECT_CONTINUE_LIST_FILE="/tmp/collect_continue.lst"
|
||||
|
||||
declare -i TIMEOUT_MIN_MINS=10
|
||||
declare -i TIMEOUT_MAX_MINS=120
|
||||
declare -i TIMEOUT_DEF_MINS=20
|
||||
declare -i TIMEOUT_MIN_SECS=$(($TIMEOUT_MAX_MINS*60))
|
||||
declare -i TIMEOUT_MAX_SECS=$(($TIMEOUT_MAX_MINS*60))
|
||||
declare -i TIMEOUT_DEF_SECS=$(($TIMEOUT_DEF_MINS*60)) # 20 minutes
|
||||
|
||||
# overall collect timeout
|
||||
TIMEOUT=1000
|
||||
declare -i TIMEOUT=${TIMEOUT_DEF_SECS}
|
||||
SECONDS=0
|
||||
let UNTIL=${SECONDS}+${TIMEOUT}
|
||||
|
||||
COLLECT_NAME=""
|
||||
|
||||
@ -707,6 +718,22 @@ while [[ ${#} -gt 0 ]] ; do
|
||||
clear_variable_args
|
||||
;;
|
||||
|
||||
-t|--timeout)
|
||||
if [[ ${2} =~ ^[0-9]+$ ]] ; then
|
||||
if [ ${2} -lt ${TIMEOUT_MIN_MINS} -o \
|
||||
${2} -gt ${TIMEOUT_MAX_MINS} ] ; then
|
||||
elog "timeout must be between ${TIMEOUT_MIN_MINS} and ${TIMEOUT_MAX_MINS} minutes"
|
||||
collect_exit ${FAIL_TIMEOUT_ARG}
|
||||
else
|
||||
TIMEOUT="$((${2}*60))"
|
||||
fi
|
||||
else
|
||||
elog "timeout value must be an integer"
|
||||
collect_exit ${FAIL_TIMEOUT_ARG}
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
|
||||
--skip-mask)
|
||||
SKIP_MASK=true
|
||||
shift
|
||||
@ -758,6 +785,9 @@ while [[ ${#} -gt 0 ]] ; do
|
||||
shift # past argument or value
|
||||
done
|
||||
|
||||
# The default TIMEOUT may have been revised with the --timeout option.
|
||||
# Update UNTIL with updated global timeout time in secs.
|
||||
let UNTIL=${SECONDS}+${TIMEOUT}
|
||||
|
||||
date -d $STARTDATE > /dev/null 2>/dev/null
|
||||
rc_start_date=${?}
|
||||
@ -1093,6 +1123,8 @@ pw=${pw/\[/\\\[} # replace '[' with '\['
|
||||
pw=${pw/$/\\$} # replace '$' with '\$'
|
||||
pw=${pw/\"/\\\"} # replace '"' with '\"'
|
||||
|
||||
ilog "collect bundle timeout set to $((${TIMEOUT}/60)) minutes"
|
||||
|
||||
###########################################################################
|
||||
#
|
||||
# Name : passwordless_sudo_test
|
||||
@ -1908,6 +1940,10 @@ function collect_subcloud_run()
|
||||
collect_cmd+=("-v")
|
||||
fi
|
||||
|
||||
# pass the timeout to the subcloud
|
||||
collect_cmd+=("-t $((${TIMEOUT}/60))")
|
||||
|
||||
# pass the date range to the subcloud
|
||||
collect_cmd+=("--start-date ${STARTDATE}")
|
||||
collect_cmd+=("--end-date $ENDDATE")
|
||||
|
||||
@ -3068,7 +3104,7 @@ if [ "${SUBCLOUD_COLLECT}" = true ] ; then
|
||||
if [ ${SUBCLOUDS} -gt ${TIMEOUT_THRESHOLD_FACTOR} -a "${PARALLEL_COLLECT_MODE}" = true ] ; then
|
||||
# adjust overall timeout to account for the large number of subclouds
|
||||
let UNTIL=$(((SUBCLOUDS*SUBCLOUDS_TIMEOUT_BOOST)+TIMEOUT))
|
||||
ilog "adjusted subcloud collect timout from ${TIMEOUT} to ${UNTIL} secs to account for ${SUBCLOUDS} subclouds"
|
||||
ilog "adjusted subcloud collect timeout from ${TIMEOUT} to ${UNTIL} secs to account for ${SUBCLOUDS} subclouds"
|
||||
fi
|
||||
if [ "${ALLHOSTS}" = true ] ; then
|
||||
if [ ${SUBCLOUDS} -gt ${MAX_LIST_PRINT} ] ; then
|
||||
|
@ -58,6 +58,7 @@ FAIL_NAME_TOO_LONG=55
|
||||
FAIL_INVALID_START_DATE=56
|
||||
FAIL_INVALID_END_DATE=57
|
||||
FAIL_INVALID_DATE_RANGE=58
|
||||
FAIL_TIMEOUT_ARG=59
|
||||
|
||||
# Warnings are above 200
|
||||
WARN_WARNING=200
|
||||
|
Loading…
Reference in New Issue
Block a user