cdc60aac81
- add barbican logs in syslog - support no reboot patching for barbican processes - get information about barbican in collect script Change-Id: I75557a2d35d3861c2dee3d0a5a0960bebc6d0e48 Story: 2003108 Task: 27700 Depends-On: I6b0b0c90456627bebde2b834b339bc968100b6f9 Signed-off-by: Alex Kozyrev <alex.kozyrev@windriver.com>
365 lines
15 KiB
Bash
Executable File
365 lines
15 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
#Copyright (c) 2016-2017 Wind River Systems, Inc.
|
|
#
|
|
#SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
# This script is used to parse all stats data. It is designed to be called by either
|
|
# parse-controllers.sh or parse-computes.sh and not used as a standalone script.
|
|
# If the input node is a controller, it will parse controller specific postgres &
|
|
# and rabbitmq stats first. If the input node is a compute, it will pars the compute
|
|
# specific vswitch stats first.
|
|
#
|
|
# The following parsing steps are common to all hosts and are executed in the specified order:
|
|
# - Parse occtop
|
|
# - Parse memtop
|
|
# - Parse memstats (summary)
|
|
# - Parse netstats
|
|
# - Parse schedtop (summary)
|
|
# - Parse iostats
|
|
# - Parse diskstats
|
|
# - Parse filestats (summary)
|
|
# - Parse process level schedtop (optional step, configured in lab.conf)
|
|
# - Generate tarball
|
|
|
|
if [[ $# != 1 ]]; then
|
|
echo "ERROR: This script is meant to be called by either parse-controllers.sh or parse-computes.sh script."
|
|
echo "To run it separately, copy the script to the host directory that contains *.bz2 files."
|
|
echo "It takes a single argument - the name of the host directory (e.g. ./parse-all.sh controller-0)."
|
|
exit 1
|
|
fi
|
|
|
|
source ../lab.conf
|
|
source ./host.conf
|
|
|
|
PARSERDIR=$(dirname $0)
|
|
. ${PARSERDIR}/parse-util.sh
|
|
|
|
NODE=$1
|
|
|
|
CURDATE=$(date)
|
|
DATESTAMP=$(date +%b-%d)
|
|
|
|
function sedit {
|
|
local FILETOSED=$1
|
|
sed -i -e "s/ */ /g" ${FILETOSED}
|
|
sed -i -e "s/ /,/g" ${FILETOSED}
|
|
# Remove any trailing comma
|
|
sed -i "s/,$//" ${FILETOSED}
|
|
}
|
|
|
|
function get_filename_from_mountname {
|
|
local name=$1
|
|
local fname
|
|
if test "${name#*"scratch"}" != "${name}"; then
|
|
fname="scratch"
|
|
elif test "${name#*"log"}" != "${name}"; then
|
|
fname="log"
|
|
elif test "${name#*"backup"}" != "${name}"; then
|
|
fname="backup"
|
|
elif test "${name#*"ceph/mon"}" != "${name}"; then
|
|
fname="cephmon"
|
|
elif test "${name#*"conversion"}" != "${name}"; then
|
|
fname="img-conversion"
|
|
elif test "${name#*"platform"}" != "${name}"; then
|
|
fname="platform"
|
|
elif test "${name#*"postgres"}" != "${name}"; then
|
|
fname="postgres"
|
|
elif test "${name#*"cgcs"}" != "${name}"; then
|
|
fname="cgcs"
|
|
elif test "${name#*"rabbitmq"}" != "${name}"; then
|
|
fname="rabbitmq"
|
|
elif test "${name#*"instances"}" != "${name}"; then
|
|
fname="pv"
|
|
elif test "${name#*"ceph/osd"}" != "${name}"; then
|
|
# The ceph disk partition has the following mount name convention
|
|
# /var/lib/ceph/osd/ceph-0
|
|
fname=`basename ${name}`
|
|
fi
|
|
echo $fname
|
|
}
|
|
|
|
function parse_process_schedtop_data {
|
|
# Logic has been moved to a separate script so that parsing process level schedtop
|
|
# can be run either as part of parse-all.sh script or independently.
|
|
LOG "Process level schedtop parsing is turned on in lab.conf. Parsing schedtop detail..."
|
|
cd ..
|
|
./parse-schedtop.sh ${NODE}
|
|
cd ${NODE}
|
|
}
|
|
|
|
function parse_controller_specific {
|
|
# Parsing Postgres data, removing data from previous run if there are any. Generate summary
|
|
# data for each database and detail data for specified tables
|
|
LOG "Parsing postgres data for ${NODE}"
|
|
if [ -z "${DATABASE_LIST}" ]; then
|
|
WARNLOG "DATABASE_LIST is not set in the lab.conf file. Use default setting"
|
|
DATABASE_LIST="cinder glance keystone nova neutron ceilometer heat sysinv aodh barbican postgres nova_api"
|
|
fi
|
|
|
|
for DB in ${DATABASE_LIST}; do
|
|
rm /tmp/${DB}*.csv
|
|
done
|
|
../parse_postgres *postgres.bz2 >postgres-summary-${NODE}-${DATESTAMP}.txt
|
|
for DB in ${DATABASE_LIST}; do
|
|
cp /tmp/${DB}_size.csv postgres_${DB}_size.csv
|
|
done
|
|
for TABLE in ${TABLE_LIST}; do
|
|
cp /tmp/${TABLE}.csv postgres_${TABLE}.csv
|
|
done
|
|
|
|
# Parsing RabbitMQ data
|
|
LOG "Parsing rabbitmq data for ${NODE}"
|
|
../parse-rabbitmq.sh rabbitmq-${NODE}.csv
|
|
|
|
for QUEUE in ${RABBITMQ_QUEUE_LIST}; do
|
|
# If node is not a controller node then parse-rabbitmq-queue.sh should skip
|
|
../parse-rabbitmq-queue.sh rabbitmq-${QUEUE}-${NODE}.csv ${QUEUE}
|
|
done
|
|
}
|
|
|
|
function parse_compute_specific {
|
|
LOG "Parsing vswitch data for ${NODE}"
|
|
../parse-vswitch.sh ${NODE}
|
|
}
|
|
|
|
function parse_occtop_data {
|
|
LOG "Parsing occtop data for ${NODE}"
|
|
bzcat *occtop.bz2 >occtop-${NODE}-${DATESTAMP}.txt
|
|
cp occtop-${NODE}-${DATESTAMP}.txt tmp.txt
|
|
sedit tmp.txt
|
|
# Get the highest column count
|
|
column_count=$(awk -F "," '{print NF}' tmp.txt | sort -nu | tail -n 1)
|
|
grep '^[0-9]' tmp.txt |cut -d, -f1,2 | awk -F "," '{print $1" "$2}' > tmpdate.txt
|
|
grep '^[0-9]' tmp.txt |cut -d, -f3-$column_count > tmpcore.txt
|
|
paste -d, tmpdate.txt tmpcore.txt > tmp2.txt
|
|
# Generate header based on the number of columns. The Date/Time column consists of date and time fields
|
|
header="Date/Time,Total"
|
|
count=$(($column_count-3))
|
|
for i in $(seq 0 $(($count-1))); do
|
|
header="$header,$i"
|
|
done
|
|
|
|
# Generate detailed CSV with Date/Time, Total CPU occupancy and individual core occupancies e.g.
|
|
# Date/Time,Total,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35
|
|
# 2016-11-22 00:29:16.523,759.5,21.4,18.9,43.8,24.5,23.1,25.3,28.1,25.5,20.5,27.8,26.8,32.7,27.3,25.1,21.1,23.2,21.7,36.4,23.3,16.6,15.3,13.9,14.4,15.0,14.7,14.4,16.4,13.8,17.0,17.8,19.0,15.1,14.0,13.2,14.5,17.8
|
|
echo "${header}" > occtop-${NODE}-detailed.csv
|
|
cat tmp2.txt >> occtop-${NODE}-detailed.csv
|
|
|
|
# Generate simple CSV file which is used to generate host CPU occupancy chart. Platform cores are
|
|
# defined in the host.conf. The simple CSV contains only the Date/Time and Total platform CPU occupancy e.g.
|
|
# Date/Time,Total
|
|
# 2016-11-22 00:29:16.523,94.9
|
|
# 2016-11-22 00:30:16.526,71.3
|
|
|
|
if [ -z "${PLATFORM_CPU_LIST}" ]; then
|
|
# A controller node in standard system. In this case, all cores are dedicated to platform use.
|
|
# Simply extract the Date/Time and Total CPU occupancy
|
|
cut -d, -f1,2 occtop-${NODE}-detailed.csv > occtop-${NODE}.csv
|
|
else
|
|
# A CPE, compute or storage node. The cores dedicated to platform use are specified in the config.
|
|
echo "Date/Time,Total" > occtop-${NODE}.csv
|
|
while read -r line || [[ -n "$line" ]]; do
|
|
IFS="," read -r -a arr <<< "${line}"
|
|
total=0
|
|
for CORE in ${PLATFORM_CPU_LIST}; do
|
|
# Add 2 to the index as occupancy of each individual core starts after Date/Time and Total
|
|
idx=$(($CORE+2))
|
|
total=`echo $total + ${arr[$idx]} | bc`
|
|
done
|
|
echo "${arr[0]},${total}" >> occtop-${NODE}.csv
|
|
done < tmp2.txt
|
|
fi
|
|
# Remove temporary files
|
|
rm tmp.txt tmp2.txt tmpdate.txt tmpcore.txt
|
|
}
|
|
|
|
function parse_memtop_data {
|
|
LOG "Parsing memtop data for ${NODE}"
|
|
bzcat *memtop.bz2 > memtop-${NODE}-${DATESTAMP}.txt
|
|
cp memtop-${NODE}-${DATESTAMP}.txt tmp.txt
|
|
sedit tmp.txt
|
|
|
|
# After dumping all memtop bz2 output into one text file and in-place sed, grab only relevant data
|
|
# for CSV output. Generate both detailed and simple CSV files. Simple output will be used to generate
|
|
# chart.
|
|
grep '^[0-9]' tmp.txt | awk -F "," '{print $1" "$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14","$15","$16","$17","$18}' > tmp2.txt
|
|
echo "Date/Time,Total,Used,Free,Cached,Buf,Slab,CAS,CLim,Dirty,WBack,Anon,Avail,0:Avail,0:HFree,1:Avail,1:HFree" > memtop-${NODE}-detailed.csv
|
|
cat tmp2.txt >> memtop-${NODE}-detailed.csv
|
|
echo "Date/Time,Total,Anon" > memtop-${NODE}.csv
|
|
cut -d, -f1-2,12 tmp2.txt >> memtop-${NODE}.csv
|
|
# Remove temporary files
|
|
rm tmp.txt tmp2.txt
|
|
}
|
|
|
|
function parse_netstats_data {
|
|
LOG "Parsing netstats data for ${NODE}"
|
|
# First generate the summary data then detail data for specified interfaces
|
|
../parse_netstats *netstats.bz2 > netstats-summary-${NODE}-${DATESTAMP}.txt
|
|
if [ -z "${NETSTATS_INTERFACE_LIST}" ]; then
|
|
ERRLOG "NETSTATS_INTERFACE_LIST is not set in host.conf. Skipping detail netstats..."
|
|
else
|
|
for INTERFACE in ${NETSTATS_INTERFACE_LIST}; do
|
|
echo "Date/Time,Interface,Rx PPS,Rx Mbps,Rx Packet Size,Tx PPS,Tx Mbps,Tx Packet Size" > netstats-${NODE}-${INTERFACE}.csv
|
|
../parse_netstats *netstats.bz2 | grep " ${INTERFACE} " > tmp.txt
|
|
sed -i -e "s/|/ /g" tmp.txt
|
|
sed -i -e "s/ */ /g;s/ */ /g" tmp.txt
|
|
sed -i -e "s/ /,/g" tmp.txt
|
|
# Remove the leading comma
|
|
sed -i 's/,//' tmp.txt
|
|
while read -r line || [[ -n "$line" ]]; do
|
|
IFS="," read -r -a arr <<< "${line}"
|
|
echo "${arr[8]} ${arr[9]},${arr[0]},${arr[2]},${arr[3]},${arr[4]},${arr[5]},${arr[6]},${arr[7]}" >> netstats-${NODE}-${INTERFACE}.csv
|
|
done < tmp.txt
|
|
done
|
|
rm tmp.txt
|
|
fi
|
|
}
|
|
|
|
function parse_iostats_data {
|
|
LOG "Parsing iostat data for ${NODE}"
|
|
if [ -z "${IOSTATS_DEVICE_LIST}" ]; then
|
|
ERRLOG "IOSTAT_DEVICE_LIST is not set in host.conf. Skipping iostats..."
|
|
else
|
|
for DEVICE in ${IOSTATS_DEVICE_LIST}; do
|
|
# Add header to output csv file
|
|
echo "Date/Time,${DEVICE},rqm/s,wrqm/s,r/s,w/s,rkB/s,wkB/s,avgrq-sz,avgqu-sz,await,r_await,w_await,svctm,%util" > iostat-${NODE}-${DEVICE}.csv
|
|
# Dumping iostat content to tmp file
|
|
bzcat *iostat.bz2 | grep -E "/2015|/2016|/2017|${DEVICE}" | awk '{print $1","$2","$3","$4","$5","$6","$7","$8","$9","$10","$11","$12","$13","$14}' > tmp.txt
|
|
while IFS= read -r current; do
|
|
if test "${current#*Linux}" != "$current"
|
|
then
|
|
# Skip the line that contains the word "Linux"
|
|
continue
|
|
else
|
|
if test "${current#*$DEVICE}" == "$current"
|
|
then
|
|
# It's a date entry, look ahead
|
|
read -r next
|
|
if test "${next#*$DEVICE}" != "${next}"
|
|
then
|
|
# This next line contains the device stats
|
|
# Combine date and time fields
|
|
current="${current//2016,/2016 }"
|
|
current="${current//2017,/2017 }"
|
|
# Combine time and AM/PM fields
|
|
current="${current//,AM/ AM}"
|
|
current="${current//,PM/ PM}"
|
|
# Write both lines to intermediate file
|
|
echo "${current}" >> tmp2.txt
|
|
echo "${next}" >> tmp2.txt
|
|
fi
|
|
fi
|
|
fi
|
|
done < tmp.txt
|
|
mv tmp2.txt tmp.txt
|
|
# Combine the time and stats data into one line
|
|
# 11/22/2016 06:34:00 AM,,,,,,,,,,,
|
|
# dm-0,0.00,0.00,0.00,1.07,0.00,38.00,71.25,0.00,0.19,0.00,0.19,0.03,0.00
|
|
paste -d "" - - < tmp.txt > tmp2.txt
|
|
# Remove empty fields, use "complement" option for contiguous fields/range
|
|
cut -d, -f2-11 --complement tmp2.txt > tmp.txt
|
|
# Write final content to output csv
|
|
cat tmp.txt >> iostat-${NODE}-${DEVICE}.csv
|
|
rm tmp.txt tmp2.txt
|
|
done
|
|
fi
|
|
}
|
|
|
|
function parse_diskstats_data {
|
|
LOG "Parsing diskstats data for ${NODE}"
|
|
|
|
if [ -z "${DISKSTATS_FILESYSTEM_LIST}" ]; then
|
|
ERRLOG "DISKSTATS_FILESYSTEM_LIST is not set in host.conf. Skipping diskstats..."
|
|
else
|
|
for FS in ${DISKSTATS_FILESYSTEM_LIST}; do
|
|
fspair=(${FS//|/ })
|
|
fsname=${fspair[0]}
|
|
mountname=${fspair[1]}
|
|
if [ ${mountname} == "/" ]; then
|
|
mountname=" /"
|
|
echo "File system,Type,Size,Used,Avail,Used(%)" > diskstats-${NODE}-root.csv
|
|
bzcat *diskstats.bz2 | grep $fsname | grep $mountname | grep G | awk '{print $1","$2","$3","$4","$5","$6}' >> diskstats-${NODE}-root.csv
|
|
else
|
|
fname=$(get_filename_from_mountname $mountname)
|
|
echo "File system,Type,Size,Used,Avail,Used(%)" > diskstats-${NODE}-$fname.csv
|
|
bzcat *diskstats.bz2 | grep $fsname | grep $mountname | grep G | awk '{print $1","$2","$3","$4","$5","$6}' >> diskstats-${NODE}-$fname.csv
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
|
|
# Parsing starts here ...
|
|
LOG "Parsing ${NODE} files - ${CURDATE}"
|
|
|
|
# Let's get the host specific parsing out of the way
|
|
if test "${NODE#*"controller"}" != "${NODE}"; then
|
|
parse_controller_specific
|
|
elif test "${NODE#*"compute"}" != "${NODE}"; then
|
|
parse_compute_specific
|
|
fi
|
|
|
|
# Parsing CPU occtop data
|
|
parse_occtop_data
|
|
|
|
# Parsing memtop data
|
|
parse_memtop_data
|
|
|
|
# Parsing memstats data to generate the high level report. The most important piece of info is the list of
|
|
# hi-runners at the end of the file. If there is a leak, run parse-daily.sh script to generate the time
|
|
# series data for the offending processes only. Use process name, not PID as most Titanium Cloud processes have
|
|
# workers.
|
|
LOG "Parsing memstats summary for ${NODE}"
|
|
../parse_memstats --report *memstats.bz2 > memstats-summary-${NODE}-${DATESTAMP}.txt
|
|
#tar czf pidstats.tgz pid-*.csv
|
|
rm pid-*.csv
|
|
|
|
|
|
# Parsing netstats data
|
|
parse_netstats_data
|
|
|
|
# Parsing schedtop data to generate the high level report. Leave the process level schedtop parsing till
|
|
# the end as it is a long running task.
|
|
LOG "Parsing schedtop summary for ${NODE}"
|
|
FILES=$(ls *schedtop.bz2)
|
|
../parse_schedtop ${FILES} > schedtop-summary-${NODE}-${DATESTAMP}.txt
|
|
|
|
# Parsing iostats data
|
|
parse_iostats_data
|
|
|
|
# Parsing diskstats data
|
|
parse_diskstats_data
|
|
|
|
# Parsing filestats data to generate the high level report. If there is a file descriptor leak, run parse-daily.sh
|
|
# script to generate the time series data for the offending processes only. Use process name, not PID as most
|
|
# Titanium Cloud processes have workers.
|
|
LOG "Parsing filestats summary for ${NODE}"
|
|
../parse_filestats --all *filestats.bz2 > filestats-summary-${NODE}-${DATESTAMP}.txt
|
|
|
|
# Parsing process level schedtop data. This is a long running task. To skip this step or generate data for
|
|
# only specific processes, update the lab.conf and host.conf files.
|
|
[[ ${GENERATE_PROCESS_SCHEDTOP} == Y ]] && parse_process_schedtop_data || WARNLOG "Parsing process level schedtop is skipped."
|
|
|
|
# Done parsing for this host. If it's a controller host, check if the parsing of postgres connection stats which is run in
|
|
# parallel is done before creating a tar file.
|
|
if test "${NODE#*"controller"}" != "${NODE}"; then
|
|
# If postgres-conns.csv file has not been created which is highly unlikely, wait a couple of minutes
|
|
[ ! -e postgres-conns.csv ] && sleep 120
|
|
|
|
# Now check the stats of this file every 5 seconds to see if it's still being updated. Another option
|
|
# is to use inotify which requires another inotify-tools package.
|
|
oldsize=0
|
|
newsize=0
|
|
while true; do
|
|
newsize=$(stat -c %s postgres-conns.csv)
|
|
if [ "$oldsize" == "$newsize" ]; then
|
|
break
|
|
fi
|
|
oldsize=$newsize
|
|
sleep 5
|
|
done
|
|
fi
|
|
tar czf syseng-data-${NODE}-${DATESTAMP}.tgz *.csv *.txt
|
|
LOG "Parsing stats data for ${NODE} completed!"
|