swift/bin/swift-recon
Christian Schwede 33706cb974 Add option to return disk usage report with byte suffix.
Disk usage is currently returned as bytes. This patch adds the option
to return the value with a more human-friendly suffix, eg. MB/GB/TB/PB.

Change-Id: I3d55181f7e6085e711bd6e09ea2ce0a12b290cc5
2013-12-03 16:29:00 +00:00

870 lines
35 KiB
Python
Executable File

#! /usr/bin/env python
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
cmdline utility to perform cluster reconnaissance
"""
from eventlet.green import urllib2
from swift.common.ring import Ring
from urlparse import urlparse
try:
import simplejson as json
except ImportError:
import json
from hashlib import md5
import eventlet
import optparse
import time
import sys
import os
def seconds2timeunit(seconds):
elapsed = seconds
unit = 'seconds'
if elapsed >= 60:
elapsed = elapsed / 60.0
unit = 'minutes'
if elapsed >= 60:
elapsed = elapsed / 60.0
unit = 'hours'
if elapsed >= 24:
elapsed = elapsed / 24.0
unit = 'days'
return elapsed, unit
def size_suffix(size):
suffixes = ['bytes', 'kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
for suffix in suffixes:
if size < 1000:
return "%s %s" % (size, suffix)
size = size / 1000
return "%s %s" % (size, suffix)
class Scout(object):
"""
Obtain swift recon information
"""
def __init__(self, recon_type, verbose=False, suppress_errors=False,
timeout=5):
self.recon_type = recon_type
self.verbose = verbose
self.suppress_errors = suppress_errors
self.timeout = timeout
def scout_host(self, base_url, recon_type):
"""
Perform the actual HTTP request to obtain swift recon telemtry.
:param base_url: the base url of the host you wish to check. str of the
format 'http://127.0.0.1:6000/recon/'
:param recon_type: the swift recon check to request.
:returns: tuple of (recon url used, response body, and status)
"""
url = base_url + recon_type
try:
body = urllib2.urlopen(url, timeout=self.timeout).read()
content = json.loads(body)
if self.verbose:
print "-> %s: %s" % (url, content)
status = 200
except urllib2.HTTPError as err:
if not self.suppress_errors or self.verbose:
print "-> %s: %s" % (url, err)
content = err
status = err.code
except urllib2.URLError as err:
if not self.suppress_errors or self.verbose:
print "-> %s: %s" % (url, err)
content = err
status = -1
return url, content, status
def scout(self, host):
"""
Obtain telemetry from a host running the swift recon middleware.
:param host: host to check
:returns: tuple of (recon url used, response body, and status)
"""
base_url = "http://%s:%s/recon/" % (host[0], host[1])
url, content, status = self.scout_host(base_url, self.recon_type)
return url, content, status
class SwiftRecon(object):
"""
Retrieve and report cluster info from hosts running recon middleware.
"""
def __init__(self):
self.verbose = False
self.suppress_errors = False
self.timeout = 5
self.pool_size = 30
self.pool = eventlet.GreenPool(self.pool_size)
self.check_types = ['account', 'container', 'object']
self.server_type = 'object'
def _gen_stats(self, stats, name=None):
"""Compute various stats from a list of values."""
cstats = [x for x in stats if x is not None]
if len(cstats) > 0:
ret_dict = {'low': min(cstats), 'high': max(cstats),
'total': sum(cstats), 'reported': len(cstats),
'number_none': len(stats) - len(cstats), 'name': name}
ret_dict['average'] = \
ret_dict['total'] / float(len(cstats))
ret_dict['perc_none'] = \
ret_dict['number_none'] * 100.0 / len(stats)
else:
ret_dict = {'reported': 0}
return ret_dict
def _print_stats(self, stats):
"""
print out formatted stats to console
:param stats: dict of stats generated by _gen_stats
"""
print '[%(name)s] low: %(low)d, high: %(high)d, avg: ' \
'%(average).1f, total: %(total)d, ' \
'Failed: %(perc_none).1f%%, no_result: %(number_none)d, ' \
'reported: %(reported)d' % stats
def _ptime(self, timev=None):
"""
:param timev: a unix timestamp or None
:returns: a pretty string of the current time or provided time
"""
if timev:
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timev))
else:
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
def get_devices(self, zone_filter, swift_dir, ring_name):
"""
Get a list of hosts in the ring
:param zone_filter: Only list zones matching given filter
:param swift_dir: Directory of swift config, usually /etc/swift
:param ring_name: Name of the ring, such as 'object'
:returns: a set of tuples containing the ip and port of hosts
"""
ring_data = Ring(swift_dir, ring_name=ring_name)
if zone_filter:
ips = set((n['ip'], n['port']) for n in ring_data.devs
if n and n['zone'] == zone_filter)
else:
ips = set((n['ip'], n['port']) for n in ring_data.devs if n)
return ips
def get_ringmd5(self, hosts, ringfile):
"""
Compare ring md5sum's with those on remote host
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
:param ringfile: The local ring file to compare the md5sum with.
"""
stats = {}
matches = 0
errors = 0
md5sum = md5()
with open(ringfile, 'rb') as f:
block = f.read(4096)
while block:
md5sum.update(block)
block = f.read(4096)
ring_sum = md5sum.hexdigest()
recon = Scout("ringmd5", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking ring md5sums" % self._ptime()
if self.verbose:
print "-> On disk %s md5sum: %s" % (ringfile, ring_sum)
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
stats[url] = response[ringfile]
if response[ringfile] != ring_sum:
print "!! %s (%s) doesn't match on disk md5sum" % \
(url, response[ringfile])
else:
matches = matches + 1
if self.verbose:
print "-> %s matches." % url
else:
errors = errors + 1
print "%s/%s hosts matched, %s error[s] while checking hosts." \
% (matches, len(hosts), errors)
print "=" * 79
def async_check(self, hosts):
"""
Obtain and print async pending statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
scan = {}
recon = Scout("async", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking async pendings" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
scan[url] = response['async_pending']
stats = self._gen_stats(scan.values(), 'async_pending')
if stats['reported'] > 0:
self._print_stats(stats)
else:
print "[async_pending] - No hosts returned valid data."
print "=" * 79
def umount_check(self, hosts):
"""
Check for and print unmounted drives
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = {}
recon = Scout("unmounted", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Getting unmounted drives from %s hosts..." % \
(self._ptime(), len(hosts))
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
stats[url] = []
for i in response:
stats[url].append(i['device'])
for host in stats:
node = urlparse(host).netloc
for entry in stats[host]:
print "Not mounted: %s on %s" % (entry, node)
print "=" * 79
def expirer_check(self, hosts):
"""
Obtain and print expirer statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = {'object_expiration_pass': [], 'expired_last_pass': []}
recon = Scout("expirer/%s" % self.server_type, self.verbose,
self.suppress_errors, self.timeout)
print "[%s] Checking on expirers" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
stats['object_expiration_pass'].append(
response.get('object_expiration_pass'))
stats['expired_last_pass'].append(
response.get('expired_last_pass'))
for k in stats:
if stats[k]:
computed = self._gen_stats(stats[k], name=k)
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[%s] - No hosts returned valid data." % k
else:
print "[%s] - No hosts returned valid data." % k
print "=" * 79
def replication_check(self, hosts):
"""
Obtain and print replication statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = {'replication_time': [], 'failure': [], 'success': [],
'attempted': []}
recon = Scout("replication/%s" % self.server_type, self.verbose,
self.suppress_errors, self.timeout)
print "[%s] Checking on replication" % self._ptime()
least_recent_time = 9999999999
least_recent_url = None
most_recent_time = 0
most_recent_url = None
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
stats['replication_time'].append(
response.get('replication_time'))
repl_stats = response['replication_stats']
if repl_stats:
for stat_key in ['attempted', 'failure', 'success']:
stats[stat_key].append(repl_stats.get(stat_key))
last = response.get('replication_last', 0)
if last < least_recent_time:
least_recent_time = last
least_recent_url = url
if last > most_recent_time:
most_recent_time = last
most_recent_url = url
for k in stats:
if stats[k]:
if k != 'replication_time':
computed = self._gen_stats(stats[k],
name='replication_%s' % k)
else:
computed = self._gen_stats(stats[k], name=k)
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[%s] - No hosts returned valid data." % k
else:
print "[%s] - No hosts returned valid data." % k
if least_recent_url is not None:
host = urlparse(least_recent_url).netloc
if not least_recent_time:
print 'Oldest completion was NEVER by %s.' % host
else:
elapsed = time.time() - least_recent_time
elapsed, elapsed_unit = seconds2timeunit(elapsed)
print 'Oldest completion was %s (%d %s ago) by %s.' % (
time.strftime('%Y-%m-%d %H:%M:%S',
time.gmtime(least_recent_time)),
elapsed, elapsed_unit, host)
if most_recent_url is not None:
host = urlparse(most_recent_url).netloc
elapsed = time.time() - most_recent_time
elapsed, elapsed_unit = seconds2timeunit(elapsed)
print 'Most recent completion was %s (%d %s ago) by %s.' % (
time.strftime('%Y-%m-%d %H:%M:%S',
time.gmtime(most_recent_time)),
elapsed, elapsed_unit, host)
print "=" * 79
def object_replication_check(self, hosts):
"""
Obtain and print replication statistics from object servers
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = {}
recon = Scout("replication", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking on replication" % self._ptime()
least_recent_time = 9999999999
least_recent_url = None
most_recent_time = 0
most_recent_url = None
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
stats[url] = response['object_replication_time']
last = response.get('object_replication_last', 0)
if last < least_recent_time:
least_recent_time = last
least_recent_url = url
if last > most_recent_time:
most_recent_time = last
most_recent_url = url
times = [x for x in stats.values() if x is not None]
if len(stats) > 0 and len(times) > 0:
computed = self._gen_stats(times, 'replication_time')
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[replication_time] - No hosts returned valid data."
else:
print "[replication_time] - No hosts returned valid data."
if least_recent_url is not None:
host = urlparse(least_recent_url).netloc
if not least_recent_time:
print 'Oldest completion was NEVER by %s.' % host
else:
elapsed = time.time() - least_recent_time
elapsed, elapsed_unit = seconds2timeunit(elapsed)
print 'Oldest completion was %s (%d %s ago) by %s.' % (
time.strftime('%Y-%m-%d %H:%M:%S',
time.gmtime(least_recent_time)),
elapsed, elapsed_unit, host)
if most_recent_url is not None:
host = urlparse(most_recent_url).netloc
elapsed = time.time() - most_recent_time
elapsed, elapsed_unit = seconds2timeunit(elapsed)
print 'Most recent completion was %s (%d %s ago) by %s.' % (
time.strftime('%Y-%m-%d %H:%M:%S',
time.gmtime(most_recent_time)),
elapsed, elapsed_unit, host)
print "=" * 79
def updater_check(self, hosts):
"""
Obtain and print updater statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = []
recon = Scout("updater/%s" % self.server_type, self.verbose,
self.suppress_errors, self.timeout)
print "[%s] Checking updater times" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
if response['%s_updater_sweep' % self.server_type]:
stats.append(response['%s_updater_sweep' %
self.server_type])
if len(stats) > 0:
computed = self._gen_stats(stats, name='updater_last_sweep')
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[updater_last_sweep] - No hosts returned valid data."
else:
print "[updater_last_sweep] - No hosts returned valid data."
print "=" * 79
def auditor_check(self, hosts):
"""
Obtain and print obj auditor statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
scan = {}
adone = '%s_auditor_pass_completed' % self.server_type
afail = '%s_audits_failed' % self.server_type
apass = '%s_audits_passed' % self.server_type
asince = '%s_audits_since' % self.server_type
recon = Scout("auditor/%s" % self.server_type, self.verbose,
self.suppress_errors, self.timeout)
print "[%s] Checking auditor stats" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
scan[url] = response
if len(scan) < 1:
print "Error: No hosts available"
return
stats = {}
stats[adone] = [scan[i][adone] for i in scan
if scan[i][adone] is not None]
stats[afail] = [scan[i][afail] for i in scan
if scan[i][afail] is not None]
stats[apass] = [scan[i][apass] for i in scan
if scan[i][apass] is not None]
stats[asince] = [scan[i][asince] for i in scan
if scan[i][asince] is not None]
for k in stats:
if len(stats[k]) < 1:
print "[%s] - No hosts returned valid data." % k
else:
if k != asince:
computed = self._gen_stats(stats[k], k)
if computed['reported'] > 0:
self._print_stats(computed)
if len(stats[asince]) >= 1:
low = min(stats[asince])
high = max(stats[asince])
total = sum(stats[asince])
average = total / len(stats[asince])
print '[last_pass] oldest: %s, newest: %s, avg: %s' % \
(self._ptime(low), self._ptime(high), self._ptime(average))
print "=" * 79
def object_auditor_check(self, hosts):
"""
Obtain and print obj auditor statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
all_scan = {}
zbf_scan = {}
atime = 'audit_time'
bprocessed = 'bytes_processed'
passes = 'passes'
errors = 'errors'
quarantined = 'quarantined'
recon = Scout("auditor/object", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking auditor stats " % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
if response['object_auditor_stats_ALL']:
all_scan[url] = response['object_auditor_stats_ALL']
if response['object_auditor_stats_ZBF']:
zbf_scan[url] = response['object_auditor_stats_ZBF']
if len(all_scan) > 0:
stats = {}
stats[atime] = [all_scan[i][atime] for i in all_scan]
stats[bprocessed] = [all_scan[i][bprocessed] for i in all_scan]
stats[passes] = [all_scan[i][passes] for i in all_scan]
stats[errors] = [all_scan[i][errors] for i in all_scan]
stats[quarantined] = [all_scan[i][quarantined] for i in all_scan]
for k in stats:
if None in stats[k]:
stats[k] = [x for x in stats[k] if x is not None]
if len(stats[k]) < 1:
print "[Auditor %s] - No hosts returned valid data." % k
else:
computed = self._gen_stats(stats[k],
name='ALL_%s_last_path' % k)
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[ALL_auditor] - No hosts returned valid data."
else:
print "[ALL_auditor] - No hosts returned valid data."
if len(zbf_scan) > 0:
stats = {}
stats[atime] = [zbf_scan[i][atime] for i in zbf_scan]
stats[bprocessed] = [zbf_scan[i][bprocessed] for i in zbf_scan]
stats[errors] = [zbf_scan[i][errors] for i in zbf_scan]
stats[quarantined] = [zbf_scan[i][quarantined] for i in zbf_scan]
for k in stats:
if None in stats[k]:
stats[k] = [x for x in stats[k] if x is not None]
if len(stats[k]) < 1:
print "[Auditor %s] - No hosts returned valid data." % k
else:
computed = self._gen_stats(stats[k],
name='ZBF_%s_last_path' % k)
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[ZBF_auditor] - No hosts returned valid data."
else:
print "[ZBF_auditor] - No hosts returned valid data."
print "=" * 79
def load_check(self, hosts):
"""
Obtain and print load average statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
load1 = {}
load5 = {}
load15 = {}
recon = Scout("load", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking load averages" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
load1[url] = response['1m']
load5[url] = response['5m']
load15[url] = response['15m']
stats = {"1m": load1, "5m": load5, "15m": load15}
for item in stats:
if len(stats[item]) > 0:
computed = self._gen_stats(stats[item].values(),
name='%s_load_avg' % item)
self._print_stats(computed)
else:
print "[%s_load_avg] - No hosts returned valid data." % item
print "=" * 79
def quarantine_check(self, hosts):
"""
Obtain and print quarantine statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
objq = {}
conq = {}
acctq = {}
recon = Scout("quarantined", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking quarantine" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
objq[url] = response['objects']
conq[url] = response['containers']
acctq[url] = response['accounts']
stats = {"objects": objq, "containers": conq, "accounts": acctq}
for item in stats:
if len(stats[item]) > 0:
computed = self._gen_stats(stats[item].values(),
name='quarantined_%s' % item)
self._print_stats(computed)
else:
print "No hosts returned valid data."
print "=" * 79
def socket_usage(self, hosts):
"""
Obtain and print /proc/net/sockstat statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
inuse4 = {}
mem = {}
inuse6 = {}
timewait = {}
orphan = {}
recon = Scout("sockstat", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking socket usage" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
inuse4[url] = response['tcp_in_use']
mem[url] = response['tcp_mem_allocated_bytes']
inuse6[url] = response['tcp6_in_use']
timewait[url] = response['time_wait']
orphan[url] = response['orphan']
stats = {"tcp_in_use": inuse4, "tcp_mem_allocated_bytes": mem,
"tcp6_in_use": inuse6, "time_wait": timewait,
"orphan": orphan}
for item in stats:
if len(stats[item]) > 0:
computed = self._gen_stats(stats[item].values(), item)
self._print_stats(computed)
else:
print "No hosts returned valid data."
print "=" * 79
def disk_usage(self, hosts, top=0, human_readable=False):
"""
Obtain and print disk usage statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = {}
highs = []
lows = []
raw_total_used = []
raw_total_avail = []
percents = {}
top_percents = [(None, 0)] * top
recon = Scout("diskusage", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking disk usage now" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
hostusage = []
for entry in response:
if entry['mounted']:
used = float(entry['used']) / float(entry['size']) \
* 100.0
raw_total_used.append(entry['used'])
raw_total_avail.append(entry['avail'])
hostusage.append(round(used, 2))
for ident, oused in top_percents:
if oused < used:
top_percents.append(
(url + ' ' + entry['device'], used))
top_percents.sort(key=lambda x: -x[1])
top_percents.pop()
break
stats[url] = hostusage
for url in stats:
if len(stats[url]) > 0:
#get per host hi/los for another day
low = min(stats[url])
high = max(stats[url])
highs.append(high)
lows.append(low)
for percent in stats[url]:
percents[int(percent)] = percents.get(int(percent), 0) + 1
else:
print "-> %s: Error. No drive info available." % url
if len(lows) > 0:
low = min(lows)
high = max(highs)
#dist graph shamelessly stolen from https://github.com/gholt/tcod
print "Distribution Graph:"
mul = 69.0 / max(percents.values())
for percent in sorted(percents):
print '% 3d%%%5d %s' % (percent, percents[percent],
'*' * int(percents[percent] * mul))
raw_used = sum(raw_total_used)
raw_avail = sum(raw_total_avail)
raw_total = raw_used + raw_avail
avg_used = 100.0 * raw_used / raw_total
if human_readable:
raw_used = size_suffix(raw_used)
raw_avail = size_suffix(raw_avail)
raw_total = size_suffix(raw_total)
print "Disk usage: space used: %s of %s" % (raw_used, raw_total)
print "Disk usage: space free: %s of %s" % (raw_avail, raw_total)
print "Disk usage: lowest: %s%%, highest: %s%%, avg: %s%%" % \
(low, high, avg_used)
else:
print "No hosts returned valid data."
print "=" * 79
if top_percents:
print 'TOP %s' % top
for ident, used in top_percents:
if ident:
url, device = ident.split()
host = urlparse(url).netloc.split(':')[0]
print '%.02f%% %s' % (used, '%-15s %s' % (host, device))
def main(self):
"""
Retrieve and report cluster info from hosts running recon middleware.
"""
print "=" * 79
usage = '''
usage: %prog <server_type> [-v] [--suppress] [-a] [-r] [-u] [-d]
[-l] [--md5] [--auditor] [--updater] [--expirer] [--sockstat]
[--human-readable]
<server_type>\taccount|container|object
Defaults to object server.
ex: %prog container -l --auditor
'''
args = optparse.OptionParser(usage)
args.add_option('--verbose', '-v', action="store_true",
help="Print verbose info")
args.add_option('--suppress', action="store_true",
help="Suppress most connection related errors")
args.add_option('--async', '-a', action="store_true",
help="Get async stats")
args.add_option('--replication', '-r', action="store_true",
help="Get replication stats")
args.add_option('--auditor', action="store_true",
help="Get auditor stats")
args.add_option('--updater', action="store_true",
help="Get updater stats")
args.add_option('--expirer', action="store_true",
help="Get expirer stats")
args.add_option('--unmounted', '-u', action="store_true",
help="Check cluster for unmounted devices")
args.add_option('--diskusage', '-d', action="store_true",
help="Get disk usage stats")
args.add_option('--human-readable', action="store_true",
help="Use human readable suffix for disk usage stats")
args.add_option('--loadstats', '-l', action="store_true",
help="Get cluster load average stats")
args.add_option('--quarantined', '-q', action="store_true",
help="Get cluster quarantine stats")
args.add_option('--md5', action="store_true",
help="Get md5sum of servers ring and compare to "
"local copy")
args.add_option('--sockstat', action="store_true",
help="Get cluster socket usage stats")
args.add_option('--top', type='int', metavar='COUNT', default=0,
help='Also show the top COUNT entries in rank order.')
args.add_option('--all', action="store_true",
help="Perform all checks. Equal to -arudlq --md5 "
"--sockstat")
args.add_option('--zone', '-z', type="int",
help="Only query servers in specified zone")
args.add_option('--timeout', '-t', type="int", metavar="SECONDS",
help="Time to wait for a response from a server",
default=5)
args.add_option('--swiftdir', default="/etc/swift",
help="Default = /etc/swift")
options, arguments = args.parse_args()
if len(sys.argv) <= 1 or len(arguments) > 1:
args.print_help()
sys.exit(0)
if arguments:
if arguments[0] in self.check_types:
self.server_type = arguments[0]
else:
print "Invalid Server Type"
args.print_help()
sys.exit(1)
else:
self.server_type = 'object'
swift_dir = options.swiftdir
ring_file = os.path.join(swift_dir, '%s.ring.gz' % self.server_type)
self.verbose = options.verbose
self.suppress_errors = options.suppress
self.timeout = options.timeout
if options.zone:
hosts = self.get_devices(options.zone, swift_dir, self.server_type)
else:
hosts = self.get_devices(None, swift_dir, self.server_type)
print "--> Starting reconnaissance on %s hosts" % len(hosts)
print "=" * 79
if options.all:
if self.server_type == 'object':
self.async_check(hosts)
self.object_replication_check(hosts)
self.object_auditor_check(hosts)
self.updater_check(hosts)
self.expirer_check(hosts)
elif self.server_type == 'container':
self.replication_check(hosts)
self.auditor_check(hosts)
self.updater_check(hosts)
elif self.server_type == 'account':
self.replication_check(hosts)
self.auditor_check(hosts)
self.umount_check(hosts)
self.load_check(hosts)
self.disk_usage(hosts)
self.get_ringmd5(hosts, ring_file)
self.quarantine_check(hosts)
self.socket_usage(hosts)
else:
if options.async:
if self.server_type == 'object':
self.async_check(hosts)
else:
print "Error: Can't check async's on non object servers."
if options.unmounted:
self.umount_check(hosts)
if options.replication:
if self.server_type == 'object':
self.object_replication_check(hosts)
else:
self.replication_check(hosts)
if options.auditor:
if self.server_type == 'object':
self.object_auditor_check(hosts)
else:
self.auditor_check(hosts)
if options.updater:
if self.server_type == 'account':
print "Error: Can't check updaters on account servers."
else:
self.updater_check(hosts)
if options.expirer:
if self.server_type == 'object':
self.expirer_check(hosts)
else:
print "Error: Can't check expired on non object servers."
if options.loadstats:
self.load_check(hosts)
if options.diskusage:
self.disk_usage(hosts, options.top, options.human_readable)
if options.md5:
self.get_ringmd5(hosts, ring_file)
if options.quarantined:
self.quarantine_check(hosts)
if options.sockstat:
self.socket_usage(hosts)
if __name__ == '__main__':
try:
reconnoiter = SwiftRecon()
reconnoiter.main()
except KeyboardInterrupt:
print '\n'