Add a peak memory tracker to dstat
We can see at-a-glance memory usage during the run with dstat but we have no way to break that down into an overview of where memory is going. This adds a peer-service to dstat that records snapshots of the system during peak memory usage. It checks periodically if there is less memory available than before and, if so, records the running processes and vm overview. The intent is to add logic into the verify-pipeline jobs to use this report and send statistics on peak memory usage to statsd [1]. We can then build a picture of memory-usage growth over time. This type of report would have allowed better insight into issues such as introduced by Idf3a3a914b54779172776822710b3e52e751b1d1 where memory-usage jumped dramatically after switching to pip versions of libraries. Tracking details of memory usage is going to be an important part of future development. [1] http://graphite.openstack.org/ Change-Id: I4b0a8f382dcaa09331987ab84a68546ec29cbc18
This commit is contained in:
parent
c00d2a5313
commit
72a8be60cd
@ -21,11 +21,17 @@ function start_dstat {
|
||||
# A better kind of sysstat, with the top process per time slice
|
||||
DSTAT_OPTS="-tcmndrylpg --top-cpu-adv --top-io-adv"
|
||||
run_process dstat "dstat $DSTAT_OPTS"
|
||||
|
||||
# To enable peakmem_tracker add:
|
||||
# enable_service peakmem_tracker
|
||||
# to your localrc
|
||||
run_process peakmem_tracker "$TOP_DIR/tools/peakmem_tracker.sh"
|
||||
}
|
||||
|
||||
# stop_dstat() stop dstat process
|
||||
function stop_dstat {
|
||||
stop_process dstat
|
||||
stop_process peakmem_tracker
|
||||
}
|
||||
|
||||
# Restore xtrace
|
||||
|
96
tools/peakmem_tracker.sh
Executable file
96
tools/peakmem_tracker.sh
Executable file
@ -0,0 +1,96 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
set -o errexit
|
||||
|
||||
# time to sleep between checks
|
||||
SLEEP_TIME=20
|
||||
|
||||
# MemAvailable is the best estimation and has built-in heuristics
|
||||
# around reclaimable memory. However, it is not available until 3.14
|
||||
# kernel (i.e. Ubuntu LTS Trusty misses it). In that case, we fall
|
||||
# back to free+buffers+cache as the available memory.
|
||||
USE_MEM_AVAILBLE=0
|
||||
if grep -q '^MemAvailable:' /proc/meminfo; then
|
||||
USE_MEM_AVAILABLE=1
|
||||
fi
|
||||
|
||||
function get_mem_available {
|
||||
if [[ $USE_MEM_AVAILABLE -eq 1 ]]; then
|
||||
awk '/^MemAvailable:/ {print $2}' /proc/meminfo
|
||||
else
|
||||
awk '/^MemFree:/ {free=$2}
|
||||
/^Buffers:/ {buffers=$2}
|
||||
/^Cached:/ {cached=$2}
|
||||
END { print free+buffers+cached }' /proc/meminfo
|
||||
fi
|
||||
}
|
||||
|
||||
# whenever we see less memory available than last time, dump the
|
||||
# snapshot of current usage; i.e. checking the latest entry in the
|
||||
# file will give the peak-memory usage
|
||||
function tracker {
|
||||
local low_point=$(get_mem_available)
|
||||
while [ 1 ]; do
|
||||
|
||||
local mem_available=$(get_mem_available)
|
||||
|
||||
if [[ $mem_available -lt $low_point ]]; then
|
||||
low_point=$mem_available
|
||||
echo "[[["
|
||||
date
|
||||
echo "---"
|
||||
# always available greppable output; given difference in
|
||||
# meminfo output as described above...
|
||||
echo "peakmem_tracker low_point: $mem_available"
|
||||
echo "---"
|
||||
cat /proc/meminfo
|
||||
echo "---"
|
||||
# would hierarchial view be more useful (-H)? output is
|
||||
# not sorted by usage then, however, and the first
|
||||
# question is "what's using up the memory"
|
||||
#
|
||||
# there are a lot of kernel threads, especially on a 8-cpu
|
||||
# system. do a best-effort removal to improve
|
||||
# signal/noise ratio of output.
|
||||
ps --sort=-pmem -eo pid:10,pmem:6,rss:15,ppid:10,cputime:10,nlwp:8,wchan:25,args:100 |
|
||||
grep -v ']$'
|
||||
echo "]]]"
|
||||
fi
|
||||
|
||||
sleep $SLEEP_TIME
|
||||
done
|
||||
}
|
||||
|
||||
function usage {
|
||||
echo "Usage: $0 [-x] [-s N]" 1>&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
while getopts ":s:x" opt; do
|
||||
case $opt in
|
||||
s)
|
||||
SLEEP_TIME=$OPTARG
|
||||
;;
|
||||
x)
|
||||
set -o xtrace
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND-1))
|
||||
|
||||
tracker
|
Loading…
Reference in New Issue
Block a user