Move remaining bin scripts to cli modules

- swift-account-audit
  - swift-config
  - swift-drive-audit
  - swift-get-nodes
  - swift-reconciler-enqueue
  - swift-oldies
  - swift-orphans

Related-Change: Ifcc8138e7b55d5b82bea0d411ec6bfcca2c77c83
Change-Id: Ie4a252ec51d23caa9e90e6c2f772847d0d71b1c8
This commit is contained in:
Tim Burke 2024-07-23 11:47:48 -07:00
parent 2c5dc64d25
commit c3e1d91d1f
15 changed files with 1157 additions and 1010 deletions

View File

@ -14,373 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
from hashlib import md5
import getopt
from itertools import chain
from swift.cli import account_audit
import json
from eventlet.greenpool import GreenPool
from eventlet.event import Event
from six.moves.urllib.parse import quote
from swift.common.ring import Ring
from swift.common.utils import split_path
from swift.common.bufferedhttp import http_connect
usage = """
Usage!
%(cmd)s [options] [url 1] [url 2] ...
-c [concurrency] Set the concurrency, default 50
-r [ring dir] Ring locations, default /etc/swift
-e [filename] File for writing a list of inconsistent urls
-d Also download files and verify md5
You can also feed a list of urls to the script through stdin.
Examples!
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container/object
%(cmd)s -e errors.txt AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container
%(cmd)s < errors.txt
%(cmd)s -c 25 -d < errors.txt
""" % {'cmd': sys.argv[0]}
class Auditor(object):
def __init__(self, swift_dir='/etc/swift', concurrency=50, deep=False,
error_file=None):
self.pool = GreenPool(concurrency)
self.object_ring = Ring(swift_dir, ring_name='object')
self.container_ring = Ring(swift_dir, ring_name='container')
self.account_ring = Ring(swift_dir, ring_name='account')
self.deep = deep
self.error_file = error_file
# zero out stats
self.accounts_checked = self.account_exceptions = \
self.account_not_found = self.account_container_mismatch = \
self.account_object_mismatch = self.objects_checked = \
self.object_exceptions = self.object_not_found = \
self.object_checksum_mismatch = self.containers_checked = \
self.container_exceptions = self.container_count_mismatch = \
self.container_not_found = self.container_obj_mismatch = 0
self.list_cache = {}
self.in_progress = {}
def audit_object(self, account, container, name):
path = '/%s/%s/%s' % (account, container, name)
part, nodes = self.object_ring.get_nodes(
account, container.encode('utf-8'), name.encode('utf-8'))
container_listing = self.audit_container(account, container)
consistent = True
if name not in container_listing:
print(" Object %s missing in container listing!" % path)
consistent = False
hash = None
else:
hash = container_listing[name]['hash']
etags = []
for node in nodes:
try:
if self.deep:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET', path, {})
resp = conn.getresponse()
calc_hash = md5()
chunk = True
while chunk:
chunk = resp.read(8192)
calc_hash.update(chunk)
calc_hash = calc_hash.hexdigest()
if resp.status // 100 != 2:
self.object_not_found += 1
consistent = False
print(' Bad status %s GETting object "%s" on %s/%s'
% (resp.status, path,
node['ip'], node['device']))
continue
if resp.getheader('ETag').strip('"') != calc_hash:
self.object_checksum_mismatch += 1
consistent = False
print(' MD5 does not match etag for "%s" on %s/%s'
% (path, node['ip'], node['device']))
else:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'HEAD',
path.encode('utf-8'), {})
resp = conn.getresponse()
if resp.status // 100 != 2:
self.object_not_found += 1
consistent = False
print(' Bad status %s HEADing object "%s" on %s/%s'
% (resp.status, path,
node['ip'], node['device']))
continue
override_etag = resp.getheader(
'X-Object-Sysmeta-Container-Update-Override-Etag')
if override_etag:
etags.append((override_etag, node))
else:
etags.append((resp.getheader('ETag'), node))
except Exception:
self.object_exceptions += 1
consistent = False
print(' Exception fetching object "%s" on %s/%s'
% (path, node['ip'], node['device']))
continue
if not etags:
consistent = False
print(" Failed fo fetch object %s at all!" % path)
elif hash:
for etag, node in etags:
if etag.strip('"') != hash:
consistent = False
self.object_checksum_mismatch += 1
print(' ETag mismatch for "%s" on %s/%s'
% (path, node['ip'], node['device']))
if not consistent and self.error_file:
with open(self.error_file, 'a') as err_file:
print(path, file=err_file)
self.objects_checked += 1
def audit_container(self, account, name, recurse=False):
if (account, name) in self.in_progress:
self.in_progress[(account, name)].wait()
if (account, name) in self.list_cache:
return self.list_cache[(account, name)]
self.in_progress[(account, name)] = Event()
print('Auditing container "%s"' % name)
path = '/%s/%s' % (account, name)
account_listing = self.audit_account(account)
consistent = True
if name not in account_listing:
consistent = False
print(" Container %s not in account listing!" % path)
part, nodes = \
self.container_ring.get_nodes(account, name.encode('utf-8'))
rec_d = {}
responses = {}
for node in nodes:
marker = ''
results = True
while results:
try:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET',
path.encode('utf-8'), {},
'format=json&marker=%s' %
quote(marker.encode('utf-8')))
resp = conn.getresponse()
if resp.status // 100 != 2:
self.container_not_found += 1
consistent = False
print(' Bad status GETting container "%s" on %s/%s' %
(path, node['ip'], node['device']))
break
if node['id'] not in responses:
responses[node['id']] = {
h.lower(): v for h, v in resp.getheaders()}
results = json.loads(resp.read())
except Exception:
self.container_exceptions += 1
consistent = False
print(' Exception GETting container "%s" on %s/%s' %
(path, node['ip'], node['device']))
break
if results:
marker = results[-1]['name']
for obj in results:
obj_name = obj['name']
if obj_name not in rec_d:
rec_d[obj_name] = obj
if (obj['last_modified'] !=
rec_d[obj_name]['last_modified']):
self.container_obj_mismatch += 1
consistent = False
print(" Different versions of %s/%s "
"in container dbs." % (name, obj['name']))
if (obj['last_modified'] >
rec_d[obj_name]['last_modified']):
rec_d[obj_name] = obj
obj_counts = [int(header['x-container-object-count'])
for header in responses.values()]
if not obj_counts:
consistent = False
print(" Failed to fetch container %s at all!" % path)
else:
if len(set(obj_counts)) != 1:
self.container_count_mismatch += 1
consistent = False
print(
" Container databases don't agree on number of objects.")
print(
" Max: %s, Min: %s" % (max(obj_counts), min(obj_counts)))
self.containers_checked += 1
self.list_cache[(account, name)] = rec_d
self.in_progress[(account, name)].send(True)
del self.in_progress[(account, name)]
if recurse:
for obj in rec_d.keys():
self.pool.spawn_n(self.audit_object, account, name, obj)
if not consistent and self.error_file:
with open(self.error_file, 'a') as error_file:
print(path, file=error_file)
return rec_d
def audit_account(self, account, recurse=False):
if account in self.in_progress:
self.in_progress[account].wait()
if account in self.list_cache:
return self.list_cache[account]
self.in_progress[account] = Event()
print('Auditing account "%s"' % account)
consistent = True
path = '/%s' % account
part, nodes = self.account_ring.get_nodes(account)
responses = {}
for node in nodes:
marker = ''
results = True
while results:
node_id = node['id']
try:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET', path, {},
'format=json&marker=%s' %
quote(marker.encode('utf-8')))
resp = conn.getresponse()
if resp.status // 100 != 2:
self.account_not_found += 1
consistent = False
print(" Bad status GETting account '%s' "
" from %s:%s" %
(account, node['ip'], node['device']))
break
results = json.loads(resp.read())
except Exception:
self.account_exceptions += 1
consistent = False
print(" Exception GETting account '%s' on %s:%s" %
(account, node['ip'], node['device']))
break
if node_id not in responses:
responses[node_id] = [
{h.lower(): v for h, v in resp.getheaders()}, []]
responses[node_id][1].extend(results)
if results:
marker = results[-1]['name']
headers = [r[0] for r in responses.values()]
cont_counts = [int(header['x-account-container-count'])
for header in headers]
if len(set(cont_counts)) != 1:
self.account_container_mismatch += 1
consistent = False
print(" Account databases for '%s' don't agree on"
" number of containers." % account)
if cont_counts:
print(" Max: %s, Min: %s" % (max(cont_counts),
min(cont_counts)))
obj_counts = [int(header['x-account-object-count'])
for header in headers]
if len(set(obj_counts)) != 1:
self.account_object_mismatch += 1
consistent = False
print(" Account databases for '%s' don't agree on"
" number of objects." % account)
if obj_counts:
print(" Max: %s, Min: %s" % (max(obj_counts),
min(obj_counts)))
containers = set()
for resp in responses.values():
containers.update(container['name'] for container in resp[1])
self.list_cache[account] = containers
self.in_progress[account].send(True)
del self.in_progress[account]
self.accounts_checked += 1
if recurse:
for container in containers:
self.pool.spawn_n(self.audit_container, account,
container, True)
if not consistent and self.error_file:
with open(self.error_file, 'a') as error_file:
print(path, error_file)
return containers
def audit(self, account, container=None, obj=None):
if obj and container:
self.pool.spawn_n(self.audit_object, account, container, obj)
elif container:
self.pool.spawn_n(self.audit_container, account, container, True)
else:
self.pool.spawn_n(self.audit_account, account, True)
def wait(self):
self.pool.waitall()
def print_stats(self):
def _print_stat(name, stat):
# Right align stat name in a field of 18 characters
print("{0:>18}: {1}".format(name, stat))
print()
_print_stat("Accounts checked", self.accounts_checked)
if self.account_not_found:
_print_stat("Missing Replicas", self.account_not_found)
if self.account_exceptions:
_print_stat("Exceptions", self.account_exceptions)
if self.account_container_mismatch:
_print_stat("Container mismatch", self.account_container_mismatch)
if self.account_object_mismatch:
_print_stat("Object mismatch", self.account_object_mismatch)
print()
_print_stat("Containers checked", self.containers_checked)
if self.container_not_found:
_print_stat("Missing Replicas", self.container_not_found)
if self.container_exceptions:
_print_stat("Exceptions", self.container_exceptions)
if self.container_count_mismatch:
_print_stat("Count mismatch", self.container_count_mismatch)
if self.container_obj_mismatch:
_print_stat("Object mismatch", self.container_obj_mismatch)
print()
_print_stat("Objects checked", self.objects_checked)
if self.object_not_found:
_print_stat("Missing Replicas", self.object_not_found)
if self.object_exceptions:
_print_stat("Exceptions", self.object_exceptions)
if self.object_checksum_mismatch:
_print_stat("MD5 Mismatch", self.object_checksum_mismatch)
if __name__ == '__main__':
try:
optlist, args = getopt.getopt(sys.argv[1:], 'c:r:e:d')
except getopt.GetoptError as err:
print(str(err))
print(usage)
sys.exit(2)
if not args and os.isatty(sys.stdin.fileno()):
print(usage)
sys.exit()
opts = dict(optlist)
options = {
'concurrency': int(opts.get('-c', 50)),
'error_file': opts.get('-e', None),
'swift_dir': opts.get('-r', '/etc/swift'),
'deep': '-d' in opts,
}
auditor = Auditor(**options)
if not os.isatty(sys.stdin.fileno()):
args = chain(args, sys.stdin)
for path in args:
path = '/' + path.rstrip('\r\n').lstrip('/')
auditor.audit(*split_path(path, 1, 3, True))
auditor.wait()
auditor.print_stats()
if __name__ == "__main__":
account_audit.main()

View File

@ -12,80 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import optparse
import os
import sys
from swift.common.manager import Server
from swift.common.utils import readconf
from swift.common.wsgi import appconfig
parser = optparse.OptionParser('%prog [options] SERVER')
parser.add_option('-c', '--config-num', metavar="N", type="int",
dest="number", default=0,
help="parse config for the Nth server only")
parser.add_option('-s', '--section', help="only display matching sections")
parser.add_option('-w', '--wsgi', action='store_true',
help="use wsgi/paste parser instead of readconf")
def _context_name(context):
return ':'.join((context.object_type.name, context.name))
def inspect_app_config(app_config):
conf = {}
context = app_config.context
section_name = _context_name(context)
conf[section_name] = context.config()
if context.object_type.name == 'pipeline':
filters = context.filter_contexts
pipeline = []
for filter_context in filters:
conf[_context_name(filter_context)] = filter_context.config()
pipeline.append(filter_context.entry_point_name)
app_context = context.app_context
conf[_context_name(app_context)] = app_context.config()
pipeline.append(app_context.entry_point_name)
conf[section_name]['pipeline'] = ' '.join(pipeline)
return conf
def main():
options, args = parser.parse_args()
options = dict(vars(options))
if not args:
return 'ERROR: specify type of server or conf_path'
conf_files = []
for arg in args:
if os.path.exists(arg):
conf_files.append(arg)
else:
conf_files += Server(arg).conf_files(**options)
for conf_file in conf_files:
print('# %s' % conf_file)
if options['wsgi']:
app_config = appconfig(conf_file)
conf = inspect_app_config(app_config)
else:
conf = readconf(conf_file)
flat_vars = {}
for k, v in conf.items():
if options['section'] and k != options['section']:
continue
if not isinstance(v, dict):
flat_vars[k] = v
continue
print('[%s]' % k)
for opt, value in v.items():
print('%s = %s' % (opt, value))
print()
for k, v in flat_vars.items():
print('# %s = %s' % (k, v))
print()
from swift.cli import config
if __name__ == "__main__":
sys.exit(main())
config.main()

View File

@ -14,251 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import glob
import locale
import os
import os.path
import re
import subprocess
import sys
from swift.cli import drive_audit
import six
from six.moves.configparser import ConfigParser
from swift.common.utils import backward, get_logger, dump_recon_cache, \
config_true_value
def get_devices(device_dir, logger):
devices = []
majmin_devices = {}
# List /dev/block
# Using os.scandir on recent versions of python, else os.listdir
if 'scandir' in dir(os):
with os.scandir("/dev/block") as it:
for ent in it:
if ent.is_symlink():
dev_name = os.path.basename(os.readlink(ent.path))
majmin = os.path.basename(ent.path).split(':')
majmin_devices[dev_name] = {'major': majmin[0],
'minor': majmin[1]}
else:
for ent in os.listdir("/dev/block"):
ent_path = os.path.join("/dev/block", ent)
if os.path.is_symlink(ent_path):
dev_name = os.path.basename(os.readlink(ent_path))
majmin = os.path.basename(ent_path).split(':')
majmin_devices[dev_name] = {'major': majmin[0],
'minor': majmin[1]}
for line in open('/proc/mounts').readlines():
data = line.strip().split()
block_device = data[0]
mount_point = data[1]
if mount_point.startswith(device_dir):
device = {}
device['mount_point'] = mount_point
device['block_device'] = block_device
dev_name = os.path.basename(block_device)
if dev_name in majmin_devices:
# If symlink is in /dev/block
device['major'] = majmin_devices[dev_name]['major']
device['minor'] = majmin_devices[dev_name]['minor']
else:
# Else we try to stat block_device
try:
device_num = os.stat(block_device).st_rdev
except OSError:
# If we can't stat the device,
# then something weird is going on
logger.error(
'Could not determine major:minor numbers for %s '
'(mounted at %s)! Skipping...',
block_device, mount_point)
continue
device['major'] = str(os.major(device_num))
device['minor'] = str(os.minor(device_num))
devices.append(device)
for line in open('/proc/partitions').readlines()[2:]:
major, minor, blocks, kernel_device = line.strip().split()
device = [d for d in devices
if d['major'] == major and d['minor'] == minor]
if device:
device[0]['kernel_device'] = kernel_device
return devices
def get_errors(error_re, log_file_pattern, minutes, logger,
log_file_encoding):
# Assuming log rotation is being used, we need to examine
# recently rotated files in case the rotation occurred
# just before the script is being run - the data we are
# looking for may have rotated.
#
# The globbing used before would not work with all out-of-box
# distro setup for logrotate and syslog therefore moving this
# to the config where one can set it with the desired
# globbing pattern.
log_files = [f for f in glob.glob(log_file_pattern)]
try:
log_files.sort(key=lambda f: os.stat(f).st_mtime, reverse=True)
except (IOError, OSError) as exc:
logger.error(exc)
print(exc)
sys.exit(1)
now_time = datetime.datetime.now()
end_time = now_time - datetime.timedelta(minutes=minutes)
# kern.log does not contain the year so we need to keep
# track of the year and month in case the year recently
# ticked over
year = now_time.year
prev_ent_month = now_time.strftime('%b')
errors = {}
reached_old_logs = False
for path in log_files:
try:
f = open(path, 'rb')
except IOError:
logger.error("Error: Unable to open " + path)
print("Unable to open " + path)
sys.exit(1)
for line in backward(f):
if not six.PY2:
line = line.decode(log_file_encoding, 'surrogateescape')
if '[ 0.000000]' in line \
or 'KERNEL supported cpus:' in line \
or 'BIOS-provided physical RAM map:' in line:
# Ignore anything before the last boot
reached_old_logs = True
break
# Solves the problem with year change - kern.log does not
# keep track of the year.
log_time_ent = line.split()[:3]
if log_time_ent[0] == 'Dec' and prev_ent_month == 'Jan':
year -= 1
prev_ent_month = log_time_ent[0]
log_time_string = '%d %s' % (year, ' '.join(log_time_ent))
try:
log_time = datetime.datetime.strptime(
log_time_string, '%Y %b %d %H:%M:%S')
except ValueError:
# Some versions use ISO timestamps instead
try:
log_time = datetime.datetime.strptime(
line[0:19], '%Y-%m-%dT%H:%M:%S')
except ValueError:
continue
if log_time > end_time:
for err in error_re:
for device in err.findall(line):
errors[device] = errors.get(device, 0) + 1
else:
reached_old_logs = True
break
if reached_old_logs:
break
return errors
def comment_fstab(mount_point):
with open('/etc/fstab', 'r') as fstab:
with open('/etc/fstab.new', 'w') as new_fstab:
for line in fstab:
parts = line.split()
if len(parts) > 2 \
and parts[1] == mount_point \
and not line.startswith('#'):
new_fstab.write('#' + line)
else:
new_fstab.write(line)
os.rename('/etc/fstab.new', '/etc/fstab')
if __name__ == '__main__':
c = ConfigParser()
try:
conf_path = sys.argv[1]
except Exception:
print("Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1])
sys.exit(1)
if not c.read(conf_path):
print("Unable to read config file %s" % conf_path)
sys.exit(1)
conf = dict(c.items('drive-audit'))
device_dir = conf.get('device_dir', '/srv/node')
minutes = int(conf.get('minutes', 60))
error_limit = int(conf.get('error_limit', 1))
recon_cache_path = conf.get('recon_cache_path', "/var/cache/swift")
log_file_pattern = conf.get('log_file_pattern',
'/var/log/kern.*[!.][!g][!z]')
log_file_encoding = conf.get('log_file_encoding', 'auto')
if log_file_encoding == 'auto':
log_file_encoding = locale.getpreferredencoding()
log_to_console = config_true_value(conf.get('log_to_console', False))
error_re = []
for conf_key in conf:
if conf_key.startswith('regex_pattern_'):
error_pattern = conf[conf_key]
try:
r = re.compile(error_pattern)
except re.error:
sys.exit('Error: unable to compile regex pattern "%s"' %
error_pattern)
error_re.append(r)
if not error_re:
error_re = [
re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
]
conf['log_name'] = conf.get('log_name', 'drive-audit')
logger = get_logger(conf, log_to_console=log_to_console,
log_route='drive-audit')
devices = get_devices(device_dir, logger)
logger.debug("Devices found: %s" % str(devices))
if not devices:
logger.error("Error: No devices found!")
recon_errors = {}
total_errors = 0
for device in devices:
recon_errors[device['mount_point']] = 0
errors = get_errors(error_re, log_file_pattern, minutes, logger,
log_file_encoding)
logger.debug("Errors found: %s" % str(errors))
unmounts = 0
for kernel_device, count in errors.items():
if count >= error_limit:
device = \
[d for d in devices if d['kernel_device'] == kernel_device]
if device:
mount_point = device[0]['mount_point']
if mount_point.startswith(device_dir):
if config_true_value(conf.get('unmount_failed_device',
True)):
logger.info("Unmounting %s with %d errors" %
(mount_point, count))
subprocess.call(['umount', '-fl', mount_point])
logger.info("Commenting out %s from /etc/fstab" %
(mount_point))
comment_fstab(mount_point)
unmounts += 1
else:
logger.info("Detected %s with %d errors "
"(Device not unmounted)" %
(mount_point, count))
recon_errors[mount_point] = count
total_errors += count
recon_file = recon_cache_path + "/drive.recon"
dump_recon_cache(recon_errors, recon_file, logger)
dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger,
set_owner=conf.get("user", "swift"))
if unmounts == 0:
logger.info("No drives were unmounted")
elif os.path.isdir("/run/systemd/system"):
logger.debug("fstab updated, calling systemctl daemon-reload")
subprocess.call(["/usr/bin/systemctl", "daemon-reload"])
if __name__ == "__main__":
drive_audit.main()

View File

@ -14,63 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from optparse import OptionParser
from os.path import basename
from swift.cli import get_nodes
from swift.common.ring import Ring
from swift.common.storage_policy import reload_storage_policies
from swift.common.utils import set_swift_dir
from swift.cli.info import (parse_get_node_args, print_item_locations,
InfoSystemExit)
if __name__ == '__main__':
usage = '''
Shows the nodes responsible for the item specified.
Usage: %prog [-a] <ring.gz> <account> [<container> [<object>]]
Or: %prog [-a] <ring.gz> -p partition
Or: %prog [-a] -P policy_name <account> [<container> [<object>]]
Or: %prog [-a] -P policy_name -p partition
Note: account, container, object can also be a single arg separated by /
Example:
$ %prog -a /etc/swift/account.ring.gz MyAccount
Partition 5743883
Hash 96ae332a60b58910784e4417a03e1ad0
10.1.1.7:8000 sdd1
10.1.9.2:8000 sdb1
10.1.5.5:8000 sdf1
10.1.5.9:8000 sdt1 # [Handoff]
'''
parser = OptionParser(usage)
parser.add_option('-a', '--all', action='store_true',
help='Show all handoff nodes')
parser.add_option('-p', '--partition', metavar='PARTITION',
help='Show nodes for a given partition')
parser.add_option('-P', '--policy-name', dest='policy_name',
help='Specify which policy to use')
parser.add_option('-d', '--swift-dir', default='/etc/swift',
dest='swift_dir', help='Path to swift directory')
parser.add_option('-Q', '--quoted', action='store_true',
help='Assume swift paths are quoted')
options, args = parser.parse_args()
if set_swift_dir(options.swift_dir):
reload_storage_policies()
try:
ring_path, args = parse_get_node_args(options, args)
except InfoSystemExit as e:
parser.print_help()
sys.exit('ERROR: %s' % e)
ring = ring_name = None
if ring_path:
ring_name = basename(ring_path)[:-len('.ring.gz')]
ring = Ring(ring_path)
try:
print_item_locations(ring, ring_name, *args, **vars(options))
except InfoSystemExit:
sys.exit(1)
if __name__ == "__main__":
get_nodes.main()

View File

@ -12,79 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import optparse
import subprocess
import sys
from swift.cli import oldies
if __name__ == '__main__':
parser = optparse.OptionParser(usage='''%prog [options]
Lists old Swift processes.
'''.strip())
parser.add_option('-a', '--age', dest='hours', type='int', default=720,
help='look for processes at least HOURS old; '
'default: 720 (30 days)')
parser.add_option('-p', '--pids', action='store_true',
help='only print the pids found; for example, to pipe '
'to xargs kill')
(options, args) = parser.parse_args()
listing = []
for line in subprocess.Popen(
['ps', '-eo', 'etime,pid,args', '--no-headers'],
stdout=subprocess.PIPE).communicate()[0].split(b'\n'):
if not line:
continue
hours = 0
try:
etime, pid, args = line.decode('ascii').split(None, 2)
except ValueError:
# This covers both decoding and not-enough-values-to-unpack errors
sys.exit('Could not process ps line %r' % line)
if not args.startswith((
'/usr/bin/python /usr/bin/swift-',
'/usr/bin/python /usr/local/bin/swift-',
'/bin/python /usr/bin/swift-',
'/usr/bin/python3 /usr/bin/swift-',
'/usr/bin/python3 /usr/local/bin/swift-',
'/bin/python3 /usr/bin/swift-')):
continue
args = args.split('-', 1)[1]
etime = etime.split('-')
if len(etime) == 2:
hours = int(etime[0]) * 24
etime = etime[1]
elif len(etime) == 1:
etime = etime[0]
else:
sys.exit('Could not process etime value from %r' % line)
etime = etime.split(':')
if len(etime) == 3:
hours += int(etime[0])
elif len(etime) != 2:
sys.exit('Could not process etime value from %r' % line)
if hours >= options.hours:
listing.append((str(hours), pid, args))
if not listing:
sys.exit()
if options.pids:
for hours, pid, args in listing:
print(pid)
else:
hours_len = len('Hours')
pid_len = len('PID')
args_len = len('Command')
for hours, pid, args in listing:
hours_len = max(hours_len, len(hours))
pid_len = max(pid_len, len(pid))
args_len = max(args_len, len(args))
args_len = min(args_len, 78 - hours_len - pid_len)
print('%*s %*s %s' % (hours_len, 'Hours', pid_len, 'PID', 'Command'))
for hours, pid, args in listing:
print('%*s %*s %s' % (hours_len, hours, pid_len,
pid, args[:args_len]))
oldies.main()

View File

@ -12,121 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import optparse
import os
import re
import signal
import subprocess
import sys
from swift.common.manager import RUN_DIR
from swift.cli import orphans
if __name__ == '__main__':
parser = optparse.OptionParser(usage='''%prog [options]
Lists and optionally kills orphaned Swift processes. This is done by scanning
/var/run/swift for .pid files and listing any processes that look like Swift
processes but aren't associated with the pids in those .pid files. Any Swift
processes running with the 'once' parameter are ignored, as those are usually
for full-speed audit scans and such.
Example (sends SIGTERM to all orphaned Swift processes older than two hours):
%prog -a 2 -k TERM
'''.strip())
parser.add_option('-a', '--age', dest='hours', type='int', default=24,
help="look for processes at least HOURS old; "
"default: 24")
parser.add_option('-k', '--kill', dest='signal',
help='send SIGNAL to matched processes; default: just '
'list process information')
parser.add_option('-w', '--wide', dest='wide', default=False,
action='store_true',
help="don't clip the listing at 80 characters")
parser.add_option('-r', '--run-dir', type="str",
dest="run_dir", default=RUN_DIR,
help="alternative directory to store running pid files "
"default: %s" % RUN_DIR)
(options, args) = parser.parse_args()
pids = []
for root, directories, files in os.walk(options.run_dir):
for name in files:
if name.endswith(('.pid', '.pid.d')):
pids.append(open(os.path.join(root, name)).read().strip())
pids.extend(subprocess.Popen(
['ps', '--ppid', pids[-1], '-o', 'pid', '--no-headers'],
stdout=subprocess.PIPE).communicate()[0].decode().split())
listing = []
swift_cmd_re = re.compile(
'^/usr/bin/python[23]? /usr(?:/local)?/bin/swift-')
for line in subprocess.Popen(
['ps', '-eo', 'etime,pid,args', '--no-headers'],
stdout=subprocess.PIPE).communicate()[0].split(b'\n'):
if not line:
continue
hours = 0
try:
etime, pid, args = line.decode('ascii').split(None, 2)
except ValueError:
sys.exit('Could not process ps line %r' % line)
if pid in pids:
continue
if any([
not swift_cmd_re.match(args),
'swift-orphans' in args,
'once' in args.split(),
]):
continue
args = args.split('swift-', 1)[1]
etime = etime.split('-')
if len(etime) == 2:
hours = int(etime[0]) * 24
etime = etime[1]
elif len(etime) == 1:
etime = etime[0]
else:
sys.exit('Could not process etime value from %r' % line)
etime = etime.split(':')
if len(etime) == 3:
hours += int(etime[0])
elif len(etime) != 2:
sys.exit('Could not process etime value from %r' % line)
if hours >= options.hours:
listing.append((str(hours), pid, args))
if not listing:
sys.exit()
hours_len = len('Hours')
pid_len = len('PID')
args_len = len('Command')
for hours, pid, args in listing:
hours_len = max(hours_len, len(hours))
pid_len = max(pid_len, len(pid))
args_len = max(args_len, len(args))
args_len = min(args_len, 78 - hours_len - pid_len)
print('%*s %*s %s' %
(hours_len, 'Hours', pid_len, 'PID', 'Command'))
for hours, pid, args in listing:
print('%*s %*s %s' %
(hours_len, hours, pid_len, pid, args[:args_len]))
if options.signal:
try:
signum = int(options.signal)
except ValueError:
signum = getattr(signal, options.signal.upper(),
getattr(signal, 'SIG' + options.signal.upper(),
None))
if not signum:
sys.exit('Could not translate %r to a signal number.' %
options.signal)
print('Sending processes %s (%d) signal...' % (options.signal, signum),
end='')
for hours, pid, args in listing:
os.kill(int(pid), signum)
print('Done.')
orphans.main()

View File

@ -11,65 +11,7 @@
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
from optparse import OptionParser
import eventlet.debug
eventlet.debug.hub_exceptions(True)
from swift.common.ring import Ring
from swift.common.utils import split_path
from swift.common.storage_policy import POLICIES
from swift.container.reconciler import add_to_reconciler_queue
"""
This tool is primarily for debugging and development but can be used an example
of how an operator could enqueue objects manually if a problem is discovered -
might be particularly useful if you need to hack a fix into the reconciler
and re-run it.
"""
USAGE = """
%prog <policy_index> </a/c/o> <timestamp> [options]
This script enqueues an object to be evaluated by the reconciler.
Arguments:
policy_index: the policy the object is currently stored in.
/a/c/o: the full path of the object - utf-8
timestamp: the timestamp of the datafile/tombstone.
""".strip()
parser = OptionParser(USAGE)
parser.add_option('-X', '--op', default='PUT', choices=('PUT', 'DELETE'),
help='the method of the misplaced operation')
parser.add_option('-f', '--force', action='store_true',
help='force an object to be re-enqueued')
def main():
options, args = parser.parse_args()
try:
policy_index, path, timestamp = args
except ValueError:
sys.exit(parser.print_help())
container_ring = Ring('/etc/swift/container.ring.gz')
policy = POLICIES.get_by_index(policy_index)
if not policy:
return 'ERROR: invalid storage policy index: %s' % policy
try:
account, container, obj = split_path(path, 3, 3, True)
except ValueError as e:
return 'ERROR: %s' % e
container_name = add_to_reconciler_queue(
container_ring, account, container, obj,
policy.idx, timestamp, options.op, force=options.force)
if not container_name:
return 'ERROR: unable to enqueue!'
print(container_name)
from swift.cli import reconciler_enqueue
if __name__ == "__main__":
sys.exit(main())
reconciler_enqueue.main()

View File

@ -40,16 +40,6 @@ skip_reno = True
packages =
swift
[files]
scripts =
bin/swift-account-audit
bin/swift-config
bin/swift-reconciler-enqueue
bin/swift-drive-audit
bin/swift-get-nodes
bin/swift-oldies
bin/swift-orphans
[extras]
kms_keymaster =
oslo.config>=4.0.0,!=4.3.0,!=4.4.0 # Apache-2.0
@ -63,11 +53,13 @@ keystone =
[entry_points]
console_scripts =
swift-account-audit = swift.cli.account_audit:main
swift-account-auditor = swift.account.auditor:main
swift-account-info = swift.cli.info:account_main
swift-account-reaper = swift.account.reaper:main
swift-account-replicator = swift.account.replicator:main
swift-account-server = swift.account.server:main
swift-config = swift.cli.config:main
swift-container-auditor = swift.container.auditor:main
swift-container-deleter = swift.cli.container_deleter:main
swift-container-info = swift.cli.info:container_main
@ -79,7 +71,9 @@ console_scripts =
swift-container-updater = swift.container.updater:main
swift-dispersion-populate = swift.cli.dispersion_populate:main
swift-dispersion-report = swift.cli.dispersion_report:main
swift-drive-audit = swift.cli.drive_audit:main
swift-form-signature = swift.cli.form_signature:main
swift-get-nodes = swift.cli.get_nodes:main
swift-init = swift.common.manager:main
swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main
swift-object-auditor = swift.obj.auditor:main
@ -90,9 +84,12 @@ console_scripts =
swift-object-replicator = swift.obj.replicator:main
swift-object-server = swift.obj.server:main
swift-object-updater = swift.obj.updater:main
swift-oldies = swift.cli.oldies:main
swift-orphans = swift.cli.orphans:main
swift-proxy-server = swift.proxy.server:main
swift-recon = swift.cli.recon:main
swift-recon-cron = swift.cli.recon_cron:main
swift-reconciler-enqueue = swift.cli.reconciler_enqueue:main
swift-reload = swift.cli.reload:main
swift-ring-builder = swift.cli.ringbuilder:error_handling_main
swift-ring-builder-analyzer = swift.cli.ring_builder_analyzer:main

390
swift/cli/account_audit.py Executable file
View File

@ -0,0 +1,390 @@
#!/usr/bin/env python
# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
from hashlib import md5
import getopt
from itertools import chain
import json
from eventlet.greenpool import GreenPool
from eventlet.event import Event
from six.moves.urllib.parse import quote
from swift.common.ring import Ring
from swift.common.utils import split_path
from swift.common.bufferedhttp import http_connect
usage = """
Usage!
%(cmd)s [options] [url 1] [url 2] ...
-c [concurrency] Set the concurrency, default 50
-r [ring dir] Ring locations, default /etc/swift
-e [filename] File for writing a list of inconsistent urls
-d Also download files and verify md5
You can also feed a list of urls to the script through stdin.
Examples!
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container/object
%(cmd)s -e errors.txt AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container
%(cmd)s < errors.txt
%(cmd)s -c 25 -d < errors.txt
""" % {'cmd': sys.argv[0]}
class Auditor(object):
def __init__(self, swift_dir='/etc/swift', concurrency=50, deep=False,
error_file=None):
self.pool = GreenPool(concurrency)
self.object_ring = Ring(swift_dir, ring_name='object')
self.container_ring = Ring(swift_dir, ring_name='container')
self.account_ring = Ring(swift_dir, ring_name='account')
self.deep = deep
self.error_file = error_file
# zero out stats
self.accounts_checked = self.account_exceptions = \
self.account_not_found = self.account_container_mismatch = \
self.account_object_mismatch = self.objects_checked = \
self.object_exceptions = self.object_not_found = \
self.object_checksum_mismatch = self.containers_checked = \
self.container_exceptions = self.container_count_mismatch = \
self.container_not_found = self.container_obj_mismatch = 0
self.list_cache = {}
self.in_progress = {}
def audit_object(self, account, container, name):
path = '/%s/%s/%s' % (account, container, name)
part, nodes = self.object_ring.get_nodes(
account, container.encode('utf-8'), name.encode('utf-8'))
container_listing = self.audit_container(account, container)
consistent = True
if name not in container_listing:
print(" Object %s missing in container listing!" % path)
consistent = False
hash = None
else:
hash = container_listing[name]['hash']
etags = []
for node in nodes:
try:
if self.deep:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET', path, {})
resp = conn.getresponse()
calc_hash = md5()
chunk = True
while chunk:
chunk = resp.read(8192)
calc_hash.update(chunk)
calc_hash = calc_hash.hexdigest()
if resp.status // 100 != 2:
self.object_not_found += 1
consistent = False
print(' Bad status %s GETting object "%s" on %s/%s'
% (resp.status, path,
node['ip'], node['device']))
continue
if resp.getheader('ETag').strip('"') != calc_hash:
self.object_checksum_mismatch += 1
consistent = False
print(' MD5 does not match etag for "%s" on %s/%s'
% (path, node['ip'], node['device']))
else:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'HEAD',
path.encode('utf-8'), {})
resp = conn.getresponse()
if resp.status // 100 != 2:
self.object_not_found += 1
consistent = False
print(' Bad status %s HEADing object "%s" on %s/%s'
% (resp.status, path,
node['ip'], node['device']))
continue
override_etag = resp.getheader(
'X-Object-Sysmeta-Container-Update-Override-Etag')
if override_etag:
etags.append((override_etag, node))
else:
etags.append((resp.getheader('ETag'), node))
except Exception:
self.object_exceptions += 1
consistent = False
print(' Exception fetching object "%s" on %s/%s'
% (path, node['ip'], node['device']))
continue
if not etags:
consistent = False
print(" Failed fo fetch object %s at all!" % path)
elif hash:
for etag, node in etags:
if etag.strip('"') != hash:
consistent = False
self.object_checksum_mismatch += 1
print(' ETag mismatch for "%s" on %s/%s'
% (path, node['ip'], node['device']))
if not consistent and self.error_file:
with open(self.error_file, 'a') as err_file:
print(path, file=err_file)
self.objects_checked += 1
def audit_container(self, account, name, recurse=False):
if (account, name) in self.in_progress:
self.in_progress[(account, name)].wait()
if (account, name) in self.list_cache:
return self.list_cache[(account, name)]
self.in_progress[(account, name)] = Event()
print('Auditing container "%s"' % name)
path = '/%s/%s' % (account, name)
account_listing = self.audit_account(account)
consistent = True
if name not in account_listing:
consistent = False
print(" Container %s not in account listing!" % path)
part, nodes = \
self.container_ring.get_nodes(account, name.encode('utf-8'))
rec_d = {}
responses = {}
for node in nodes:
marker = ''
results = True
while results:
try:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET',
path.encode('utf-8'), {},
'format=json&marker=%s' %
quote(marker.encode('utf-8')))
resp = conn.getresponse()
if resp.status // 100 != 2:
self.container_not_found += 1
consistent = False
print(' Bad status GETting container "%s" on %s/%s' %
(path, node['ip'], node['device']))
break
if node['id'] not in responses:
responses[node['id']] = {
h.lower(): v for h, v in resp.getheaders()}
results = json.loads(resp.read())
except Exception:
self.container_exceptions += 1
consistent = False
print(' Exception GETting container "%s" on %s/%s' %
(path, node['ip'], node['device']))
break
if results:
marker = results[-1]['name']
for obj in results:
obj_name = obj['name']
if obj_name not in rec_d:
rec_d[obj_name] = obj
if (obj['last_modified'] !=
rec_d[obj_name]['last_modified']):
self.container_obj_mismatch += 1
consistent = False
print(" Different versions of %s/%s "
"in container dbs." % (name, obj['name']))
if (obj['last_modified'] >
rec_d[obj_name]['last_modified']):
rec_d[obj_name] = obj
obj_counts = [int(header['x-container-object-count'])
for header in responses.values()]
if not obj_counts:
consistent = False
print(" Failed to fetch container %s at all!" % path)
else:
if len(set(obj_counts)) != 1:
self.container_count_mismatch += 1
consistent = False
print(
" Container databases don't agree on number of objects.")
print(
" Max: %s, Min: %s" % (max(obj_counts), min(obj_counts)))
self.containers_checked += 1
self.list_cache[(account, name)] = rec_d
self.in_progress[(account, name)].send(True)
del self.in_progress[(account, name)]
if recurse:
for obj in rec_d.keys():
self.pool.spawn_n(self.audit_object, account, name, obj)
if not consistent and self.error_file:
with open(self.error_file, 'a') as error_file:
print(path, file=error_file)
return rec_d
def audit_account(self, account, recurse=False):
if account in self.in_progress:
self.in_progress[account].wait()
if account in self.list_cache:
return self.list_cache[account]
self.in_progress[account] = Event()
print('Auditing account "%s"' % account)
consistent = True
path = '/%s' % account
part, nodes = self.account_ring.get_nodes(account)
responses = {}
for node in nodes:
marker = ''
results = True
while results:
node_id = node['id']
try:
conn = http_connect(node['ip'], node['port'],
node['device'], part, 'GET', path, {},
'format=json&marker=%s' %
quote(marker.encode('utf-8')))
resp = conn.getresponse()
if resp.status // 100 != 2:
self.account_not_found += 1
consistent = False
print(" Bad status GETting account '%s' "
" from %s:%s" %
(account, node['ip'], node['device']))
break
results = json.loads(resp.read())
except Exception:
self.account_exceptions += 1
consistent = False
print(" Exception GETting account '%s' on %s:%s" %
(account, node['ip'], node['device']))
break
if node_id not in responses:
responses[node_id] = [
{h.lower(): v for h, v in resp.getheaders()}, []]
responses[node_id][1].extend(results)
if results:
marker = results[-1]['name']
headers = [r[0] for r in responses.values()]
cont_counts = [int(header['x-account-container-count'])
for header in headers]
if len(set(cont_counts)) != 1:
self.account_container_mismatch += 1
consistent = False
print(" Account databases for '%s' don't agree on"
" number of containers." % account)
if cont_counts:
print(" Max: %s, Min: %s" % (max(cont_counts),
min(cont_counts)))
obj_counts = [int(header['x-account-object-count'])
for header in headers]
if len(set(obj_counts)) != 1:
self.account_object_mismatch += 1
consistent = False
print(" Account databases for '%s' don't agree on"
" number of objects." % account)
if obj_counts:
print(" Max: %s, Min: %s" % (max(obj_counts),
min(obj_counts)))
containers = set()
for resp in responses.values():
containers.update(container['name'] for container in resp[1])
self.list_cache[account] = containers
self.in_progress[account].send(True)
del self.in_progress[account]
self.accounts_checked += 1
if recurse:
for container in containers:
self.pool.spawn_n(self.audit_container, account,
container, True)
if not consistent and self.error_file:
with open(self.error_file, 'a') as error_file:
print(path, error_file)
return containers
def audit(self, account, container=None, obj=None):
if obj and container:
self.pool.spawn_n(self.audit_object, account, container, obj)
elif container:
self.pool.spawn_n(self.audit_container, account, container, True)
else:
self.pool.spawn_n(self.audit_account, account, True)
def wait(self):
self.pool.waitall()
def print_stats(self):
def _print_stat(name, stat):
# Right align stat name in a field of 18 characters
print("{0:>18}: {1}".format(name, stat))
print()
_print_stat("Accounts checked", self.accounts_checked)
if self.account_not_found:
_print_stat("Missing Replicas", self.account_not_found)
if self.account_exceptions:
_print_stat("Exceptions", self.account_exceptions)
if self.account_container_mismatch:
_print_stat("Container mismatch", self.account_container_mismatch)
if self.account_object_mismatch:
_print_stat("Object mismatch", self.account_object_mismatch)
print()
_print_stat("Containers checked", self.containers_checked)
if self.container_not_found:
_print_stat("Missing Replicas", self.container_not_found)
if self.container_exceptions:
_print_stat("Exceptions", self.container_exceptions)
if self.container_count_mismatch:
_print_stat("Count mismatch", self.container_count_mismatch)
if self.container_obj_mismatch:
_print_stat("Object mismatch", self.container_obj_mismatch)
print()
_print_stat("Objects checked", self.objects_checked)
if self.object_not_found:
_print_stat("Missing Replicas", self.object_not_found)
if self.object_exceptions:
_print_stat("Exceptions", self.object_exceptions)
if self.object_checksum_mismatch:
_print_stat("MD5 Mismatch", self.object_checksum_mismatch)
def main():
try:
optlist, args = getopt.getopt(sys.argv[1:], 'c:r:e:d')
except getopt.GetoptError as err:
print(str(err))
print(usage)
sys.exit(2)
if not args and os.isatty(sys.stdin.fileno()):
print(usage)
sys.exit()
opts = dict(optlist)
options = {
'concurrency': int(opts.get('-c', 50)),
'error_file': opts.get('-e', None),
'swift_dir': opts.get('-r', '/etc/swift'),
'deep': '-d' in opts,
}
auditor = Auditor(**options)
if not os.isatty(sys.stdin.fileno()):
args = chain(args, sys.stdin)
for path in args:
path = '/' + path.rstrip('\r\n').lstrip('/')
auditor.audit(*split_path(path, 1, 3, True))
auditor.wait()
auditor.print_stats()
if __name__ == '__main__':
main()

91
swift/cli/config.py Executable file
View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import optparse
import os
import sys
from swift.common.manager import Server
from swift.common.utils import readconf
from swift.common.wsgi import appconfig
parser = optparse.OptionParser('%prog [options] SERVER')
parser.add_option('-c', '--config-num', metavar="N", type="int",
dest="number", default=0,
help="parse config for the Nth server only")
parser.add_option('-s', '--section', help="only display matching sections")
parser.add_option('-w', '--wsgi', action='store_true',
help="use wsgi/paste parser instead of readconf")
def _context_name(context):
return ':'.join((context.object_type.name, context.name))
def inspect_app_config(app_config):
conf = {}
context = app_config.context
section_name = _context_name(context)
conf[section_name] = context.config()
if context.object_type.name == 'pipeline':
filters = context.filter_contexts
pipeline = []
for filter_context in filters:
conf[_context_name(filter_context)] = filter_context.config()
pipeline.append(filter_context.entry_point_name)
app_context = context.app_context
conf[_context_name(app_context)] = app_context.config()
pipeline.append(app_context.entry_point_name)
conf[section_name]['pipeline'] = ' '.join(pipeline)
return conf
def main():
options, args = parser.parse_args()
options = dict(vars(options))
if not args:
return 'ERROR: specify type of server or conf_path'
conf_files = []
for arg in args:
if os.path.exists(arg):
conf_files.append(arg)
else:
conf_files += Server(arg).conf_files(**options)
for conf_file in conf_files:
print('# %s' % conf_file)
if options['wsgi']:
app_config = appconfig(conf_file)
conf = inspect_app_config(app_config)
else:
conf = readconf(conf_file)
flat_vars = {}
for k, v in conf.items():
if options['section'] and k != options['section']:
continue
if not isinstance(v, dict):
flat_vars[k] = v
continue
print('[%s]' % k)
for opt, value in v.items():
print('%s = %s' % (opt, value))
print()
for k, v in flat_vars.items():
print('# %s = %s' % (k, v))
print()
if __name__ == "__main__":
sys.exit(main())

268
swift/cli/drive_audit.py Executable file
View File

@ -0,0 +1,268 @@
#!/usr/bin/env python
# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import glob
import locale
import os
import os.path
import re
import subprocess
import sys
import six
from six.moves.configparser import ConfigParser
from swift.common.utils import backward, get_logger, dump_recon_cache, \
config_true_value
def get_devices(device_dir, logger):
devices = []
majmin_devices = {}
# List /dev/block
# Using os.scandir on recent versions of python, else os.listdir
if 'scandir' in dir(os):
with os.scandir("/dev/block") as it:
for ent in it:
if ent.is_symlink():
dev_name = os.path.basename(os.readlink(ent.path))
majmin = os.path.basename(ent.path).split(':')
majmin_devices[dev_name] = {'major': majmin[0],
'minor': majmin[1]}
else:
for ent in os.listdir("/dev/block"):
ent_path = os.path.join("/dev/block", ent)
if os.path.is_symlink(ent_path):
dev_name = os.path.basename(os.readlink(ent_path))
majmin = os.path.basename(ent_path).split(':')
majmin_devices[dev_name] = {'major': majmin[0],
'minor': majmin[1]}
for line in open('/proc/mounts').readlines():
data = line.strip().split()
block_device = data[0]
mount_point = data[1]
if mount_point.startswith(device_dir):
device = {}
device['mount_point'] = mount_point
device['block_device'] = block_device
dev_name = os.path.basename(block_device)
if dev_name in majmin_devices:
# If symlink is in /dev/block
device['major'] = majmin_devices[dev_name]['major']
device['minor'] = majmin_devices[dev_name]['minor']
else:
# Else we try to stat block_device
try:
device_num = os.stat(block_device).st_rdev
except OSError:
# If we can't stat the device,
# then something weird is going on
logger.error(
'Could not determine major:minor numbers for %s '
'(mounted at %s)! Skipping...',
block_device, mount_point)
continue
device['major'] = str(os.major(device_num))
device['minor'] = str(os.minor(device_num))
devices.append(device)
for line in open('/proc/partitions').readlines()[2:]:
major, minor, blocks, kernel_device = line.strip().split()
device = [d for d in devices
if d['major'] == major and d['minor'] == minor]
if device:
device[0]['kernel_device'] = kernel_device
return devices
def get_errors(error_re, log_file_pattern, minutes, logger,
log_file_encoding):
# Assuming log rotation is being used, we need to examine
# recently rotated files in case the rotation occurred
# just before the script is being run - the data we are
# looking for may have rotated.
#
# The globbing used before would not work with all out-of-box
# distro setup for logrotate and syslog therefore moving this
# to the config where one can set it with the desired
# globbing pattern.
log_files = [f for f in glob.glob(log_file_pattern)]
try:
log_files.sort(key=lambda f: os.stat(f).st_mtime, reverse=True)
except (IOError, OSError) as exc:
logger.error(exc)
print(exc)
sys.exit(1)
now_time = datetime.datetime.now()
end_time = now_time - datetime.timedelta(minutes=minutes)
# kern.log does not contain the year so we need to keep
# track of the year and month in case the year recently
# ticked over
year = now_time.year
prev_ent_month = now_time.strftime('%b')
errors = {}
reached_old_logs = False
for path in log_files:
try:
f = open(path, 'rb')
except IOError:
logger.error("Error: Unable to open " + path)
print("Unable to open " + path)
sys.exit(1)
for line in backward(f):
if not six.PY2:
line = line.decode(log_file_encoding, 'surrogateescape')
if '[ 0.000000]' in line \
or 'KERNEL supported cpus:' in line \
or 'BIOS-provided physical RAM map:' in line:
# Ignore anything before the last boot
reached_old_logs = True
break
# Solves the problem with year change - kern.log does not
# keep track of the year.
log_time_ent = line.split()[:3]
if log_time_ent[0] == 'Dec' and prev_ent_month == 'Jan':
year -= 1
prev_ent_month = log_time_ent[0]
log_time_string = '%d %s' % (year, ' '.join(log_time_ent))
try:
log_time = datetime.datetime.strptime(
log_time_string, '%Y %b %d %H:%M:%S')
except ValueError:
# Some versions use ISO timestamps instead
try:
log_time = datetime.datetime.strptime(
line[0:19], '%Y-%m-%dT%H:%M:%S')
except ValueError:
continue
if log_time > end_time:
for err in error_re:
for device in err.findall(line):
errors[device] = errors.get(device, 0) + 1
else:
reached_old_logs = True
break
if reached_old_logs:
break
return errors
def comment_fstab(mount_point):
with open('/etc/fstab', 'r') as fstab:
with open('/etc/fstab.new', 'w') as new_fstab:
for line in fstab:
parts = line.split()
if len(parts) > 2 \
and parts[1] == mount_point \
and not line.startswith('#'):
new_fstab.write('#' + line)
else:
new_fstab.write(line)
os.rename('/etc/fstab.new', '/etc/fstab')
def main():
c = ConfigParser()
try:
conf_path = sys.argv[1]
except Exception:
print("Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1])
sys.exit(1)
if not c.read(conf_path):
print("Unable to read config file %s" % conf_path)
sys.exit(1)
conf = dict(c.items('drive-audit'))
device_dir = conf.get('device_dir', '/srv/node')
minutes = int(conf.get('minutes', 60))
error_limit = int(conf.get('error_limit', 1))
recon_cache_path = conf.get('recon_cache_path', "/var/cache/swift")
log_file_pattern = conf.get('log_file_pattern',
'/var/log/kern.*[!.][!g][!z]')
log_file_encoding = conf.get('log_file_encoding', 'auto')
if log_file_encoding == 'auto':
log_file_encoding = locale.getpreferredencoding()
log_to_console = config_true_value(conf.get('log_to_console', False))
error_re = []
for conf_key in conf:
if conf_key.startswith('regex_pattern_'):
error_pattern = conf[conf_key]
try:
r = re.compile(error_pattern)
except re.error:
sys.exit('Error: unable to compile regex pattern "%s"' %
error_pattern)
error_re.append(r)
if not error_re:
error_re = [
re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
]
conf['log_name'] = conf.get('log_name', 'drive-audit')
logger = get_logger(conf, log_to_console=log_to_console,
log_route='drive-audit')
devices = get_devices(device_dir, logger)
logger.debug("Devices found: %s" % str(devices))
if not devices:
logger.error("Error: No devices found!")
recon_errors = {}
total_errors = 0
for device in devices:
recon_errors[device['mount_point']] = 0
errors = get_errors(error_re, log_file_pattern, minutes, logger,
log_file_encoding)
logger.debug("Errors found: %s" % str(errors))
unmounts = 0
for kernel_device, count in errors.items():
if count >= error_limit:
device = \
[d for d in devices if d['kernel_device'] == kernel_device]
if device:
mount_point = device[0]['mount_point']
if mount_point.startswith(device_dir):
if config_true_value(conf.get('unmount_failed_device',
True)):
logger.info("Unmounting %s with %d errors" %
(mount_point, count))
subprocess.call(['umount', '-fl', mount_point])
logger.info("Commenting out %s from /etc/fstab" %
(mount_point))
comment_fstab(mount_point)
unmounts += 1
else:
logger.info("Detected %s with %d errors "
"(Device not unmounted)" %
(mount_point, count))
recon_errors[mount_point] = count
total_errors += count
recon_file = recon_cache_path + "/drive.recon"
dump_recon_cache(recon_errors, recon_file, logger)
dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger,
set_owner=conf.get("user", "swift"))
if unmounts == 0:
logger.info("No drives were unmounted")
elif os.path.isdir("/run/systemd/system"):
logger.debug("fstab updated, calling systemctl daemon-reload")
subprocess.call(["/usr/bin/systemctl", "daemon-reload"])
if __name__ == '__main__':
main()

80
swift/cli/get_nodes.py Executable file
View File

@ -0,0 +1,80 @@
#!/usr/bin/env python
# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from optparse import OptionParser
from os.path import basename
from swift.common.ring import Ring
from swift.common.storage_policy import reload_storage_policies
from swift.common.utils import set_swift_dir
from swift.cli.info import (parse_get_node_args, print_item_locations,
InfoSystemExit)
def main():
usage = '''
Shows the nodes responsible for the item specified.
Usage: %prog [-a] <ring.gz> <account> [<container> [<object>]]
Or: %prog [-a] <ring.gz> -p partition
Or: %prog [-a] -P policy_name <account> [<container> [<object>]]
Or: %prog [-a] -P policy_name -p partition
Note: account, container, object can also be a single arg separated by /
Example:
$ %prog -a /etc/swift/account.ring.gz MyAccount
Partition 5743883
Hash 96ae332a60b58910784e4417a03e1ad0
10.1.1.7:8000 sdd1
10.1.9.2:8000 sdb1
10.1.5.5:8000 sdf1
10.1.5.9:8000 sdt1 # [Handoff]
'''
parser = OptionParser(usage)
parser.add_option('-a', '--all', action='store_true',
help='Show all handoff nodes')
parser.add_option('-p', '--partition', metavar='PARTITION',
help='Show nodes for a given partition')
parser.add_option('-P', '--policy-name', dest='policy_name',
help='Specify which policy to use')
parser.add_option('-d', '--swift-dir', default='/etc/swift',
dest='swift_dir', help='Path to swift directory')
parser.add_option('-Q', '--quoted', action='store_true',
help='Assume swift paths are quoted')
options, args = parser.parse_args()
if set_swift_dir(options.swift_dir):
reload_storage_policies()
try:
ring_path, args = parse_get_node_args(options, args)
except InfoSystemExit as e:
parser.print_help()
sys.exit('ERROR: %s' % e)
ring = ring_name = None
if ring_path:
ring_name = basename(ring_path)[:-len('.ring.gz')]
ring = Ring(ring_path)
try:
print_item_locations(ring, ring_name, *args, **vars(options))
except InfoSystemExit:
sys.exit(1)
if __name__ == '__main__':
main()

93
swift/cli/oldies.py Executable file
View File

@ -0,0 +1,93 @@
#!/usr/bin/env python
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import optparse
import subprocess
import sys
def main():
parser = optparse.OptionParser(usage='''%prog [options]
Lists old Swift processes.
'''.strip())
parser.add_option('-a', '--age', dest='hours', type='int', default=720,
help='look for processes at least HOURS old; '
'default: 720 (30 days)')
parser.add_option('-p', '--pids', action='store_true',
help='only print the pids found; for example, to pipe '
'to xargs kill')
(options, args) = parser.parse_args()
listing = []
for line in subprocess.Popen(
['ps', '-eo', 'etime,pid,args', '--no-headers'],
stdout=subprocess.PIPE).communicate()[0].split(b'\n'):
if not line:
continue
hours = 0
try:
etime, pid, args = line.decode('ascii').split(None, 2)
except ValueError:
# This covers both decoding and not-enough-values-to-unpack errors
sys.exit('Could not process ps line %r' % line)
if not args.startswith((
'/usr/bin/python /usr/bin/swift-',
'/usr/bin/python /usr/local/bin/swift-',
'/bin/python /usr/bin/swift-',
'/usr/bin/python3 /usr/bin/swift-',
'/usr/bin/python3 /usr/local/bin/swift-',
'/bin/python3 /usr/bin/swift-')):
continue
args = args.split('-', 1)[1]
etime = etime.split('-')
if len(etime) == 2:
hours = int(etime[0]) * 24
etime = etime[1]
elif len(etime) == 1:
etime = etime[0]
else:
sys.exit('Could not process etime value from %r' % line)
etime = etime.split(':')
if len(etime) == 3:
hours += int(etime[0])
elif len(etime) != 2:
sys.exit('Could not process etime value from %r' % line)
if hours >= options.hours:
listing.append((str(hours), pid, args))
if not listing:
sys.exit()
if options.pids:
for hours, pid, args in listing:
print(pid)
else:
hours_len = len('Hours')
pid_len = len('PID')
args_len = len('Command')
for hours, pid, args in listing:
hours_len = max(hours_len, len(hours))
pid_len = max(pid_len, len(pid))
args_len = max(args_len, len(args))
args_len = min(args_len, 78 - hours_len - pid_len)
print('%*s %*s %s' % (hours_len, 'Hours', pid_len, 'PID', 'Command'))
for hours, pid, args in listing:
print('%*s %*s %s' % (hours_len, hours, pid_len,
pid, args[:args_len]))
if __name__ == '__main__':
main()

137
swift/cli/orphans.py Executable file
View File

@ -0,0 +1,137 @@
#!/usr/bin/env python
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import optparse
import os
import re
import signal
import subprocess
import sys
from swift.common.manager import RUN_DIR
def main():
parser = optparse.OptionParser(usage='''%prog [options]
Lists and optionally kills orphaned Swift processes. This is done by scanning
/var/run/swift for .pid files and listing any processes that look like Swift
processes but aren't associated with the pids in those .pid files. Any Swift
processes running with the 'once' parameter are ignored, as those are usually
for full-speed audit scans and such.
Example (sends SIGTERM to all orphaned Swift processes older than two hours):
%prog -a 2 -k TERM
'''.strip())
parser.add_option('-a', '--age', dest='hours', type='int', default=24,
help="look for processes at least HOURS old; "
"default: 24")
parser.add_option('-k', '--kill', dest='signal',
help='send SIGNAL to matched processes; default: just '
'list process information')
parser.add_option('-w', '--wide', dest='wide', default=False,
action='store_true',
help="don't clip the listing at 80 characters")
parser.add_option('-r', '--run-dir', type="str",
dest="run_dir", default=RUN_DIR,
help="alternative directory to store running pid files "
"default: %s" % RUN_DIR)
(options, args) = parser.parse_args()
pids = []
for root, directories, files in os.walk(options.run_dir):
for name in files:
if name.endswith(('.pid', '.pid.d')):
pids.append(open(os.path.join(root, name)).read().strip())
pids.extend(subprocess.Popen(
['ps', '--ppid', pids[-1], '-o', 'pid', '--no-headers'],
stdout=subprocess.PIPE).communicate()[0].decode().split())
listing = []
swift_cmd_re = re.compile(
'^/usr/bin/python[23]? /usr(?:/local)?/bin/swift-')
for line in subprocess.Popen(
['ps', '-eo', 'etime,pid,args', '--no-headers'],
stdout=subprocess.PIPE).communicate()[0].split(b'\n'):
if not line:
continue
hours = 0
try:
etime, pid, args = line.decode('ascii').split(None, 2)
except ValueError:
sys.exit('Could not process ps line %r' % line)
if pid in pids:
continue
if any([
not swift_cmd_re.match(args),
'swift-orphans' in args,
'once' in args.split(),
]):
continue
args = args.split('swift-', 1)[1]
etime = etime.split('-')
if len(etime) == 2:
hours = int(etime[0]) * 24
etime = etime[1]
elif len(etime) == 1:
etime = etime[0]
else:
sys.exit('Could not process etime value from %r' % line)
etime = etime.split(':')
if len(etime) == 3:
hours += int(etime[0])
elif len(etime) != 2:
sys.exit('Could not process etime value from %r' % line)
if hours >= options.hours:
listing.append((str(hours), pid, args))
if not listing:
sys.exit()
hours_len = len('Hours')
pid_len = len('PID')
args_len = len('Command')
for hours, pid, args in listing:
hours_len = max(hours_len, len(hours))
pid_len = max(pid_len, len(pid))
args_len = max(args_len, len(args))
args_len = min(args_len, 78 - hours_len - pid_len)
print('%*s %*s %s' %
(hours_len, 'Hours', pid_len, 'PID', 'Command'))
for hours, pid, args in listing:
print('%*s %*s %s' %
(hours_len, hours, pid_len, pid, args[:args_len]))
if options.signal:
try:
signum = int(options.signal)
except ValueError:
signum = getattr(signal, options.signal.upper(),
getattr(signal, 'SIG' + options.signal.upper(),
None))
if not signum:
sys.exit('Could not translate %r to a signal number.' %
options.signal)
print('Sending processes %s (%d) signal...' % (options.signal, signum),
end='')
for hours, pid, args in listing:
os.kill(int(pid), signum)
print('Done.')
if __name__ == '__main__':
main()

View File

@ -0,0 +1,74 @@
#!/usr/bin/env python
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from optparse import OptionParser
import eventlet.debug
from swift.common.ring import Ring
from swift.common.utils import split_path
from swift.common.storage_policy import POLICIES
from swift.container.reconciler import add_to_reconciler_queue
"""
This tool is primarily for debugging and development but can be used an example
of how an operator could enqueue objects manually if a problem is discovered -
might be particularly useful if you need to hack a fix into the reconciler
and re-run it.
"""
USAGE = """
%prog <policy_index> </a/c/o> <timestamp> [options]
This script enqueues an object to be evaluated by the reconciler.
Arguments:
policy_index: the policy the object is currently stored in.
/a/c/o: the full path of the object - utf-8
timestamp: the timestamp of the datafile/tombstone.
""".strip()
parser = OptionParser(USAGE)
parser.add_option('-X', '--op', default='PUT', choices=('PUT', 'DELETE'),
help='the method of the misplaced operation')
parser.add_option('-f', '--force', action='store_true',
help='force an object to be re-enqueued')
def main():
eventlet.debug.hub_exceptions(True)
options, args = parser.parse_args()
try:
policy_index, path, timestamp = args
except ValueError:
sys.exit(parser.print_help())
container_ring = Ring('/etc/swift/container.ring.gz')
policy = POLICIES.get_by_index(policy_index)
if not policy:
return 'ERROR: invalid storage policy index: %s' % policy
try:
account, container, obj = split_path(path, 3, 3, True)
except ValueError as e:
return 'ERROR: %s' % e
container_name = add_to_reconciler_queue(
container_ring, account, container, obj,
policy.idx, timestamp, options.op, force=options.force)
if not container_name:
return 'ERROR: unable to enqueue!'
print(container_name)
if __name__ == "__main__":
sys.exit(main())