
385 lines
16 KiB
Raw Normal View History

#!/usr/bin/env python
# Copyright (c) 2010-2012 OpenStack Foundation
2010-07-12 17:03:45 -05:00
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
2010-07-12 17:03:45 -05:00
import os
import sys
from hashlib import md5
import getopt
from itertools import chain
import json
2010-07-12 17:03:45 -05:00
from eventlet.greenpool import GreenPool
from eventlet.event import Event
from six.moves.urllib.parse import quote
2010-07-12 17:03:45 -05:00
from swift.common.ring import Ring
from swift.common.utils import split_path
from swift.common.bufferedhttp import http_connect
usage = """
%(cmd)s [options] [url 1] [url 2] ...
-c [concurrency] Set the concurrency, default 50
-r [ring dir] Ring locations, default /etc/swift
-e [filename] File for writing a list of inconsistent urls
-d Also download files and verify md5
You can also feed a list of urls to the script through stdin.
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container/object
%(cmd)s -e errors.txt AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container
2010-07-12 17:03:45 -05:00
%(cmd)s < errors.txt
%(cmd)s -c 25 -d < errors.txt
""" % {'cmd': sys.argv[0]}
class Auditor(object):
def __init__(self, swift_dir='/etc/swift', concurrency=50, deep=False,
2010-07-12 17:03:45 -05:00
self.pool = GreenPool(concurrency)
self.object_ring = Ring(swift_dir, ring_name='object')
self.container_ring = Ring(swift_dir, ring_name='container')
self.account_ring = Ring(swift_dir, ring_name='account')
2010-07-12 17:03:45 -05:00
self.deep = deep
self.error_file = error_file
# zero out stats
self.accounts_checked = self.account_exceptions = \
self.account_not_found = self.account_container_mismatch = \
self.account_object_mismatch = self.objects_checked = \
self.object_exceptions = self.object_not_found = \
self.object_checksum_mismatch = self.containers_checked = \
self.container_exceptions = self.container_count_mismatch = \
self.container_not_found = self.container_obj_mismatch = 0
self.list_cache = {}
self.in_progress = {}
def audit_object(self, account, container, name):
path = '/%s/%s/%s' % (account, container, name)
part, nodes = self.object_ring.get_nodes(
account, container.encode('utf-8'), name.encode('utf-8'))
2010-07-12 17:03:45 -05:00
container_listing = self.audit_container(account, container)
consistent = True
if name not in container_listing:
print(" Object %s missing in container listing!" % path)
2010-07-12 17:03:45 -05:00
consistent = False
hash = None
hash = container_listing[name]['hash']
etags = []
for node in nodes:
if self.deep:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET', path, {})
2010-07-12 17:03:45 -05:00
resp = conn.getresponse()
calc_hash = md5()
chunk = True
while chunk:
chunk = resp.read(8192)
calc_hash = calc_hash.hexdigest()
if resp.status // 100 != 2:
self.object_not_found += 1
consistent = False
print(' Bad status GETting object "%s" on %s/%s'
% (path, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
if resp.getheader('ETag').strip('"') != calc_hash:
self.object_checksum_mismatch += 1
consistent = False
print(' MD5 does not match etag for "%s" on %s/%s'
% (path, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'HEAD',
path.encode('utf-8'), {})
2010-07-12 17:03:45 -05:00
resp = conn.getresponse()
if resp.status // 100 != 2:
self.object_not_found += 1
consistent = False
print(' Bad status HEADing object "%s" on %s/%s'
% (path, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
override_etag = resp.getheader(
if override_etag:
etags.append((override_etag, node))
etags.append((resp.getheader('ETag'), node))
2010-07-12 17:03:45 -05:00
except Exception:
self.object_exceptions += 1
consistent = False
print(' Exception fetching object "%s" on %s/%s'
% (path, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
if not etags:
consistent = False
print(" Failed fo fetch object %s at all!" % path)
2010-07-12 17:03:45 -05:00
elif hash:
for etag, node in etags:
if etag.strip('"') != hash:
2010-07-12 17:03:45 -05:00
consistent = False
self.object_checksum_mismatch += 1
print(' ETag mismatch for "%s" on %s/%s'
% (path, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
if not consistent and self.error_file:
with open(self.error_file, 'a') as err_file:
print(path, file=err_file)
2010-07-12 17:03:45 -05:00
self.objects_checked += 1
def audit_container(self, account, name, recurse=False):
if (account, name) in self.in_progress:
self.in_progress[(account, name)].wait()
if (account, name) in self.list_cache:
return self.list_cache[(account, name)]
self.in_progress[(account, name)] = Event()
print('Auditing container "%s"' % name)
path = '/%s/%s' % (account, name)
2010-07-12 17:03:45 -05:00
account_listing = self.audit_account(account)
consistent = True
if name not in account_listing:
consistent = False
print(" Container %s not in account listing!" % path)
part, nodes = \
self.container_ring.get_nodes(account, name.encode('utf-8'))
2010-07-12 17:03:45 -05:00
rec_d = {}
responses = {}
for node in nodes:
marker = ''
results = True
while results:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET',
path.encode('utf-8'), {},
'format=json&marker=%s' %
2010-07-12 17:03:45 -05:00
resp = conn.getresponse()
if resp.status // 100 != 2:
self.container_not_found += 1
consistent = False
print(' Bad status GETting container "%s" on %s/%s' %
(path, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
if node['id'] not in responses:
responses[node['id']] = {
h.lower(): v for h, v in resp.getheaders()}
results = json.loads(resp.read())
2010-07-12 17:03:45 -05:00
except Exception:
self.container_exceptions += 1
consistent = False
print(' Exception GETting container "%s" on %s/%s' %
(path, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
if results:
marker = results[-1]['name']
for obj in results:
obj_name = obj['name']
if obj_name not in rec_d:
rec_d[obj_name] = obj
if (obj['last_modified'] !=
2010-07-12 17:03:45 -05:00
self.container_obj_mismatch += 1
consistent = False
print(" Different versions of %s/%s "
"in container dbs." % (name, obj['name']))
if (obj['last_modified'] >
2010-07-12 17:03:45 -05:00
rec_d[obj_name] = obj
obj_counts = [int(header['x-container-object-count'])
for header in responses.values()]
2010-07-12 17:03:45 -05:00
if not obj_counts:
consistent = False
print(" Failed to fetch container %s at all!" % path)
2010-07-12 17:03:45 -05:00
if len(set(obj_counts)) != 1:
self.container_count_mismatch += 1
consistent = False
" Container databases don't agree on number of objects.")
" Max: %s, Min: %s" % (max(obj_counts), min(obj_counts)))
2010-07-12 17:03:45 -05:00
self.containers_checked += 1
self.list_cache[(account, name)] = rec_d
self.in_progress[(account, name)].send(True)
del self.in_progress[(account, name)]
if recurse:
for obj in rec_d.keys():
self.pool.spawn_n(self.audit_object, account, name, obj)
if not consistent and self.error_file:
with open(self.error_file, 'a') as error_file:
print(path, file=error_file)
2010-07-12 17:03:45 -05:00
return rec_d
def audit_account(self, account, recurse=False):
if account in self.in_progress:
if account in self.list_cache:
return self.list_cache[account]
self.in_progress[account] = Event()
print('Auditing account "%s"' % account)
2010-07-12 17:03:45 -05:00
consistent = True
path = '/%s' % account
part, nodes = self.account_ring.get_nodes(account)
responses = {}
for node in nodes:
marker = ''
results = True
while results:
node_id = node['id']
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET', path, {},
'format=json&marker=%s' %
2010-07-12 17:03:45 -05:00
resp = conn.getresponse()
if resp.status // 100 != 2:
self.account_not_found += 1
consistent = False
print(" Bad status GETting account '%s' "
" from %s:%s" %
(account, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
results = json.loads(resp.read())
2010-07-12 17:03:45 -05:00
except Exception:
self.account_exceptions += 1
consistent = False
print(" Exception GETting account '%s' on %s:%s" %
(account, node['ip'], node['device']))
2010-07-12 17:03:45 -05:00
if node_id not in responses:
responses[node_id] = [
{h.lower(): v for h, v in resp.getheaders()}, []]
2010-07-12 17:03:45 -05:00
if results:
marker = results[-1]['name']
headers = [r[0] for r in responses.values()]
2010-07-12 17:03:45 -05:00
cont_counts = [int(header['x-account-container-count'])
for header in headers]
2010-07-12 17:03:45 -05:00
if len(set(cont_counts)) != 1:
self.account_container_mismatch += 1
consistent = False
print(" Account databases for '%s' don't agree on"
" number of containers." % account)
if cont_counts:
print(" Max: %s, Min: %s" % (max(cont_counts),
2010-07-12 17:03:45 -05:00
obj_counts = [int(header['x-account-object-count'])
for header in headers]
2010-07-12 17:03:45 -05:00
if len(set(obj_counts)) != 1:
self.account_object_mismatch += 1
consistent = False
print(" Account databases for '%s' don't agree on"
" number of objects." % account)
if obj_counts:
print(" Max: %s, Min: %s" % (max(obj_counts),
2010-07-12 17:03:45 -05:00
containers = set()
for resp in responses.values():
containers.update(container['name'] for container in resp[1])
self.list_cache[account] = containers
del self.in_progress[account]
self.accounts_checked += 1
if recurse:
for container in containers:
self.pool.spawn_n(self.audit_container, account,
container, True)
2010-07-12 17:03:45 -05:00
if not consistent and self.error_file:
with open(self.error_file, 'a') as error_file:
print(path, error_file)
2010-07-12 17:03:45 -05:00
return containers
def audit(self, account, container=None, obj=None):
if obj and container:
self.pool.spawn_n(self.audit_object, account, container, obj)
elif container:
self.pool.spawn_n(self.audit_container, account, container, True)
self.pool.spawn_n(self.audit_account, account, True)
def wait(self):
def print_stats(self):
def _print_stat(name, stat):
# Right align stat name in a field of 18 characters
print("{0:>18}: {1}".format(name, stat))
_print_stat("Accounts checked", self.accounts_checked)
2010-07-12 17:03:45 -05:00
if self.account_not_found:
_print_stat("Missing Replicas", self.account_not_found)
2010-07-12 17:03:45 -05:00
if self.account_exceptions:
_print_stat("Exceptions", self.account_exceptions)
2010-07-12 17:03:45 -05:00
if self.account_container_mismatch:
_print_stat("Container mismatch", self.account_container_mismatch)
2010-07-12 17:03:45 -05:00
if self.account_object_mismatch:
_print_stat("Object mismatch", self.account_object_mismatch)
_print_stat("Containers checked", self.containers_checked)
2010-07-12 17:03:45 -05:00
if self.container_not_found:
_print_stat("Missing Replicas", self.container_not_found)
2010-07-12 17:03:45 -05:00
if self.container_exceptions:
_print_stat("Exceptions", self.container_exceptions)
2010-07-12 17:03:45 -05:00
if self.container_count_mismatch:
_print_stat("Count mismatch", self.container_count_mismatch)
2010-07-12 17:03:45 -05:00
if self.container_obj_mismatch:
_print_stat("Object mismatch", self.container_obj_mismatch)
_print_stat("Objects checked", self.objects_checked)
2010-07-12 17:03:45 -05:00
if self.object_not_found:
_print_stat("Missing Replicas", self.object_not_found)
2010-07-12 17:03:45 -05:00
if self.object_exceptions:
_print_stat("Exceptions", self.object_exceptions)
2010-07-12 17:03:45 -05:00
if self.object_checksum_mismatch:
_print_stat("MD5 Mismatch", self.object_checksum_mismatch)
2010-07-12 17:03:45 -05:00
if __name__ == '__main__':
optlist, args = getopt.getopt(sys.argv[1:], 'c:r:e:d')
except getopt.GetoptError as err:
2010-07-12 17:03:45 -05:00
if not args and os.isatty(sys.stdin.fileno()):
2010-07-12 17:03:45 -05:00
opts = dict(optlist)
options = {
'concurrency': int(opts.get('-c', 50)),
'error_file': opts.get('-e', None),
'swift_dir': opts.get('-r', '/etc/swift'),
'deep': '-d' in opts,
auditor = Auditor(**options)
if not os.isatty(sys.stdin.fileno()):
args = chain(args, sys.stdin)
for path in args:
path = '/' + path.rstrip('\r\n').lstrip('/')
auditor.audit(*split_path(path, 1, 3, True))