update with trunk
This commit is contained in:
commit
bc894f9efb
10
CHANGELOG
Normal file
10
CHANGELOG
Normal file
@ -0,0 +1,10 @@
|
||||
swift (x.x.x)
|
||||
|
||||
* Renamed swift-stats-populate to swift-dispersion-populate and
|
||||
swift-stats-report to swift-dispersion-report with extraneous unused
|
||||
options removed. The new tools use dispersion.conf instead of stats.conf.
|
||||
|
||||
* Transaction id for each request is returned in a response header
|
||||
(X-Trans-ID). Setting the transaction id has moved from the proxy server to
|
||||
the catch_errors middleware. Additionally, the internal header has changed
|
||||
from X-CF-Trans-ID to X-Trans-ID.
|
155
bin/swift-dispersion-populate
Executable file
155
bin/swift-dispersion-populate
Executable file
@ -0,0 +1,155 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2010-2011 OpenStack, LLC.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import traceback
|
||||
from ConfigParser import ConfigParser
|
||||
from cStringIO import StringIO
|
||||
from sys import exit, argv, stdout
|
||||
from time import time
|
||||
from uuid import uuid4
|
||||
|
||||
from eventlet import GreenPool, patcher, sleep
|
||||
from eventlet.pools import Pool
|
||||
|
||||
from swift.common.client import Connection, get_auth
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.utils import compute_eta, get_time_units
|
||||
|
||||
|
||||
def put_container(connpool, container, report):
|
||||
global retries_done
|
||||
try:
|
||||
with connpool.item() as conn:
|
||||
conn.put_container(container)
|
||||
retries_done += conn.attempts - 1
|
||||
if report:
|
||||
report(True)
|
||||
except Exception:
|
||||
if report:
|
||||
report(False)
|
||||
raise
|
||||
|
||||
|
||||
def put_object(connpool, container, obj, report):
|
||||
global retries_done
|
||||
try:
|
||||
with connpool.item() as conn:
|
||||
conn.put_object(container, obj, StringIO(obj),
|
||||
headers={'x-object-meta-dispersion': obj})
|
||||
retries_done += conn.attempts - 1
|
||||
if report:
|
||||
report(True)
|
||||
except Exception:
|
||||
if report:
|
||||
report(False)
|
||||
raise
|
||||
|
||||
|
||||
def report(success):
|
||||
global begun, created, item_type, next_report, need_to_create, retries_done
|
||||
if not success:
|
||||
traceback.print_exc()
|
||||
exit('Gave up due to error(s).')
|
||||
created += 1
|
||||
if time() < next_report:
|
||||
return
|
||||
next_report = time() + 5
|
||||
eta, eta_unit = compute_eta(begun, created, need_to_create)
|
||||
print '\r\x1B[KCreating %s: %d of %d, %d%s left, %d retries' % (item_type,
|
||||
created, need_to_create, round(eta), eta_unit, retries_done),
|
||||
stdout.flush()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
global begun, created, item_type, next_report, need_to_create, retries_done
|
||||
patcher.monkey_patch()
|
||||
|
||||
conffile = '/etc/swift/dispersion.conf'
|
||||
if len(argv) == 2:
|
||||
conffile = argv[1]
|
||||
elif len(argv) > 2:
|
||||
exit('Syntax: %s [conffile]' % argv[0])
|
||||
c = ConfigParser()
|
||||
if not c.read(conffile):
|
||||
exit('Unable to read config file: %s' % conffile)
|
||||
conf = dict(c.items('dispersion'))
|
||||
swift_dir = conf.get('swift_dir', '/etc/swift')
|
||||
dispersion_coverage = int(conf.get('dispersion_coverage', 1))
|
||||
retries = int(conf.get('retries', 5))
|
||||
concurrency = int(conf.get('concurrency', 25))
|
||||
|
||||
coropool = GreenPool(size=concurrency)
|
||||
retries_done = 0
|
||||
|
||||
url, token = get_auth(conf['auth_url'], conf['auth_user'],
|
||||
conf['auth_key'])
|
||||
account = url.rsplit('/', 1)[1]
|
||||
connpool = Pool(max_size=concurrency)
|
||||
connpool.create = lambda: Connection(conf['auth_url'],
|
||||
conf['auth_user'], conf['auth_key'],
|
||||
retries=retries,
|
||||
preauthurl=url, preauthtoken=token)
|
||||
|
||||
container_ring = Ring(os.path.join(swift_dir, 'container.ring.gz'))
|
||||
parts_left = dict((x, x) for x in xrange(container_ring.partition_count))
|
||||
item_type = 'containers'
|
||||
created = 0
|
||||
retries_done = 0
|
||||
need_to_create = need_to_queue = \
|
||||
dispersion_coverage / 100.0 * container_ring.partition_count
|
||||
begun = next_report = time()
|
||||
next_report += 2
|
||||
while need_to_queue >= 1:
|
||||
container = 'dispersion_%s' % uuid4().hex
|
||||
part, _junk = container_ring.get_nodes(account, container)
|
||||
if part in parts_left:
|
||||
coropool.spawn(put_container, connpool, container, report)
|
||||
sleep()
|
||||
del parts_left[part]
|
||||
need_to_queue -= 1
|
||||
coropool.waitall()
|
||||
elapsed, elapsed_unit = get_time_units(time() - begun)
|
||||
print '\r\x1B[KCreated %d containers for dispersion reporting, %d%s, %d ' \
|
||||
'retries' % \
|
||||
(need_to_create, round(elapsed), elapsed_unit, retries_done)
|
||||
stdout.flush()
|
||||
|
||||
container = 'dispersion_objects'
|
||||
put_container(connpool, container, None)
|
||||
object_ring = Ring(os.path.join(swift_dir, 'object.ring.gz'))
|
||||
parts_left = dict((x, x) for x in xrange(object_ring.partition_count))
|
||||
item_type = 'objects'
|
||||
created = 0
|
||||
retries_done = 0
|
||||
need_to_create = need_to_queue = \
|
||||
dispersion_coverage / 100.0 * object_ring.partition_count
|
||||
begun = next_report = time()
|
||||
next_report += 2
|
||||
while need_to_queue >= 1:
|
||||
obj = 'dispersion_%s' % uuid4().hex
|
||||
part, _junk = object_ring.get_nodes(account, container, obj)
|
||||
if part in parts_left:
|
||||
coropool.spawn(put_object, connpool, container, obj, report)
|
||||
sleep()
|
||||
del parts_left[part]
|
||||
need_to_queue -= 1
|
||||
coropool.waitall()
|
||||
elapsed, elapsed_unit = get_time_units(time() - begun)
|
||||
print '\r\x1B[KCreated %d objects for dispersion reporting, %d%s, %d ' \
|
||||
'retries' % \
|
||||
(need_to_create, round(elapsed), elapsed_unit, retries_done)
|
||||
stdout.flush()
|
261
bin/swift-dispersion-report
Executable file
261
bin/swift-dispersion-report
Executable file
@ -0,0 +1,261 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2010-2011 OpenStack, LLC.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import csv
|
||||
import os
|
||||
import socket
|
||||
from ConfigParser import ConfigParser
|
||||
from httplib import HTTPException
|
||||
from optparse import OptionParser
|
||||
from sys import argv, exit, stdout, stderr
|
||||
from time import time
|
||||
from uuid import uuid4
|
||||
|
||||
from eventlet import GreenPool, hubs, patcher, sleep, Timeout
|
||||
from eventlet.pools import Pool
|
||||
|
||||
from swift.common import direct_client
|
||||
from swift.common.client import ClientException, Connection, get_auth
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.utils import compute_eta, get_time_units
|
||||
|
||||
|
||||
unmounted = []
|
||||
|
||||
|
||||
def get_error_log(prefix):
|
||||
def error_log(msg_or_exc):
|
||||
global unmounted
|
||||
if hasattr(msg_or_exc, 'http_status') and \
|
||||
msg_or_exc.http_status == 507:
|
||||
identifier = '%s:%s/%s'
|
||||
if identifier not in unmounted:
|
||||
unmounted.append(identifier)
|
||||
print >>stderr, 'ERROR: %s:%s/%s is unmounted -- This will ' \
|
||||
'cause replicas designated for that device to be ' \
|
||||
'considered missing until resolved or the ring is ' \
|
||||
'updated.' % (msg_or_exc.http_host, msg_or_exc.http_port,
|
||||
msg_or_exc.http_device)
|
||||
stderr.flush()
|
||||
if not hasattr(msg_or_exc, 'http_status') or \
|
||||
msg_or_exc.http_status not in (404, 507):
|
||||
print >>stderr, 'ERROR: %s: %s' % (prefix, msg_or_exc)
|
||||
stderr.flush()
|
||||
return error_log
|
||||
|
||||
|
||||
def container_dispersion_report(coropool, connpool, account, container_ring,
|
||||
retries):
|
||||
with connpool.item() as conn:
|
||||
containers = [c['name'] for c in conn.get_account(prefix='dispersion_',
|
||||
full_listing=True)[1]]
|
||||
containers_listed = len(containers)
|
||||
if not containers_listed:
|
||||
print >>stderr, 'No containers to query. Has ' \
|
||||
'swift-dispersion-populate been run?'
|
||||
stderr.flush()
|
||||
return
|
||||
retries_done = [0]
|
||||
containers_queried = [0]
|
||||
container_copies_found = [0, 0, 0, 0]
|
||||
begun = time()
|
||||
next_report = [time() + 2]
|
||||
|
||||
def direct(container, part, nodes):
|
||||
found_count = 0
|
||||
for node in nodes:
|
||||
error_log = get_error_log('%(ip)s:%(port)s/%(device)s' % node)
|
||||
try:
|
||||
attempts, _junk = direct_client.retry(
|
||||
direct_client.direct_head_container, node,
|
||||
part, account, container, error_log=error_log,
|
||||
retries=retries)
|
||||
retries_done[0] += attempts - 1
|
||||
found_count += 1
|
||||
except ClientException, err:
|
||||
if err.http_status not in (404, 507):
|
||||
error_log('Giving up on /%s/%s/%s: %s' % (part, account,
|
||||
container, err))
|
||||
except (Exception, Timeout), err:
|
||||
error_log('Giving up on /%s/%s/%s: %s' % (part, account,
|
||||
container, err))
|
||||
container_copies_found[found_count] += 1
|
||||
containers_queried[0] += 1
|
||||
if time() >= next_report[0]:
|
||||
next_report[0] = time() + 5
|
||||
eta, eta_unit = compute_eta(begun, containers_queried[0],
|
||||
containers_listed)
|
||||
print '\r\x1B[KQuerying containers: %d of %d, %d%s left, %d ' \
|
||||
'retries' % (containers_queried[0], containers_listed,
|
||||
round(eta), eta_unit, retries_done[0]),
|
||||
stdout.flush()
|
||||
container_parts = {}
|
||||
for container in containers:
|
||||
part, nodes = container_ring.get_nodes(account, container)
|
||||
if part not in container_parts:
|
||||
container_parts[part] = part
|
||||
coropool.spawn(direct, container, part, nodes)
|
||||
coropool.waitall()
|
||||
distinct_partitions = len(container_parts)
|
||||
copies_expected = distinct_partitions * container_ring.replica_count
|
||||
copies_found = sum(a * b for a, b in enumerate(container_copies_found))
|
||||
value = 100.0 * copies_found / copies_expected
|
||||
elapsed, elapsed_unit = get_time_units(time() - begun)
|
||||
print '\r\x1B[KQueried %d containers for dispersion reporting, ' \
|
||||
'%d%s, %d retries' % (containers_listed, round(elapsed),
|
||||
elapsed_unit, retries_done[0])
|
||||
if containers_listed - distinct_partitions:
|
||||
print 'There were %d overlapping partitions' % (
|
||||
containers_listed - distinct_partitions)
|
||||
if container_copies_found[2]:
|
||||
print 'There were %d partitions missing one copy.' % \
|
||||
container_copies_found[2]
|
||||
if container_copies_found[1]:
|
||||
print '! There were %d partitions missing two copies.' % \
|
||||
container_copies_found[1]
|
||||
if container_copies_found[0]:
|
||||
print '!!! There were %d partitions missing all copies.' % \
|
||||
container_copies_found[0]
|
||||
print '%.02f%% of container copies found (%d of %d)' % (
|
||||
value, copies_found, copies_expected)
|
||||
print 'Sample represents %.02f%% of the container partition space' % (
|
||||
100.0 * distinct_partitions / container_ring.partition_count)
|
||||
stdout.flush()
|
||||
|
||||
|
||||
def object_dispersion_report(coropool, connpool, account, object_ring,
|
||||
retries):
|
||||
container = 'dispersion_objects'
|
||||
with connpool.item() as conn:
|
||||
try:
|
||||
objects = [o['name'] for o in conn.get_container(container,
|
||||
prefix='dispersion_', full_listing=True)[1]]
|
||||
except ClientException, err:
|
||||
if err.http_status != 404:
|
||||
raise
|
||||
print >>stderr, 'No objects to query. Has ' \
|
||||
'swift-dispersion-populate been run?'
|
||||
stderr.flush()
|
||||
return
|
||||
objects_listed = len(objects)
|
||||
if not objects_listed:
|
||||
print >>stderr, 'No objects to query. Has swift-dispersion-populate ' \
|
||||
'been run?'
|
||||
stderr.flush()
|
||||
return
|
||||
retries_done = [0]
|
||||
objects_queried = [0]
|
||||
object_copies_found = [0, 0, 0, 0]
|
||||
begun = time()
|
||||
next_report = [time() + 2]
|
||||
|
||||
def direct(obj, part, nodes):
|
||||
found_count = 0
|
||||
for node in nodes:
|
||||
error_log = get_error_log('%(ip)s:%(port)s/%(device)s' % node)
|
||||
try:
|
||||
attempts, _junk = direct_client.retry(
|
||||
direct_client.direct_head_object, node, part,
|
||||
account, container, obj, error_log=error_log,
|
||||
retries=retries)
|
||||
retries_done[0] += attempts - 1
|
||||
found_count += 1
|
||||
except ClientException, err:
|
||||
if err.http_status not in (404, 507):
|
||||
error_log('Giving up on /%s/%s/%s/%s: %s' % (part, account,
|
||||
container, obj, err))
|
||||
except (Exception, Timeout), err:
|
||||
error_log('Giving up on /%s/%s/%s/%s: %s' % (part, account,
|
||||
container, obj, err))
|
||||
object_copies_found[found_count] += 1
|
||||
objects_queried[0] += 1
|
||||
if time() >= next_report[0]:
|
||||
next_report[0] = time() + 5
|
||||
eta, eta_unit = compute_eta(begun, objects_queried[0],
|
||||
objects_listed)
|
||||
print '\r\x1B[KQuerying objects: %d of %d, %d%s left, %d ' \
|
||||
'retries' % (objects_queried[0], objects_listed, round(eta),
|
||||
eta_unit, retries_done[0]),
|
||||
stdout.flush()
|
||||
object_parts = {}
|
||||
for obj in objects:
|
||||
part, nodes = object_ring.get_nodes(account, container, obj)
|
||||
if part not in object_parts:
|
||||
object_parts[part] = part
|
||||
coropool.spawn(direct, obj, part, nodes)
|
||||
coropool.waitall()
|
||||
distinct_partitions = len(object_parts)
|
||||
copies_expected = distinct_partitions * object_ring.replica_count
|
||||
copies_found = sum(a * b for a, b in enumerate(object_copies_found))
|
||||
value = 100.0 * copies_found / copies_expected
|
||||
elapsed, elapsed_unit = get_time_units(time() - begun)
|
||||
print '\r\x1B[KQueried %d objects for dispersion reporting, ' \
|
||||
'%d%s, %d retries' % (objects_listed, round(elapsed),
|
||||
elapsed_unit, retries_done[0])
|
||||
if objects_listed - distinct_partitions:
|
||||
print 'There were %d overlapping partitions' % (
|
||||
objects_listed - distinct_partitions)
|
||||
if object_copies_found[2]:
|
||||
print 'There were %d partitions missing one copy.' % \
|
||||
object_copies_found[2]
|
||||
if object_copies_found[1]:
|
||||
print '! There were %d partitions missing two copies.' % \
|
||||
object_copies_found[1]
|
||||
if object_copies_found[0]:
|
||||
print '!!! There were %d partitions missing all copies.' % \
|
||||
object_copies_found[0]
|
||||
print '%.02f%% of object copies found (%d of %d)' % \
|
||||
(value, copies_found, copies_expected)
|
||||
print 'Sample represents %.02f%% of the object partition space' % (
|
||||
100.0 * distinct_partitions / object_ring.partition_count)
|
||||
stdout.flush()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
patcher.monkey_patch()
|
||||
hubs.get_hub().debug_exceptions = False
|
||||
|
||||
conffile = '/etc/swift/dispersion.conf'
|
||||
if len(argv) == 2:
|
||||
conffile = argv[1]
|
||||
elif len(argv) > 2:
|
||||
exit('Syntax: %s [conffile]' % argv[0])
|
||||
c = ConfigParser()
|
||||
if not c.read(conffile):
|
||||
exit('Unable to read config file: %s' % conffile)
|
||||
conf = dict(c.items('dispersion'))
|
||||
swift_dir = conf.get('swift_dir', '/etc/swift')
|
||||
dispersion_coverage = int(conf.get('dispersion_coverage', 1))
|
||||
retries = int(conf.get('retries', 5))
|
||||
concurrency = int(conf.get('concurrency', 25))
|
||||
|
||||
coropool = GreenPool(size=concurrency)
|
||||
|
||||
url, token = get_auth(conf['auth_url'], conf['auth_user'],
|
||||
conf['auth_key'])
|
||||
account = url.rsplit('/', 1)[1]
|
||||
connpool = Pool(max_size=concurrency)
|
||||
connpool.create = lambda: Connection(conf['auth_url'],
|
||||
conf['auth_user'], conf['auth_key'],
|
||||
retries=retries,
|
||||
preauthurl=url, preauthtoken=token)
|
||||
|
||||
container_ring = Ring(os.path.join(swift_dir, 'container.ring.gz'))
|
||||
object_ring = Ring(os.path.join(swift_dir, 'object.ring.gz'))
|
||||
|
||||
container_dispersion_report(coropool, connpool, account, container_ring,
|
||||
retries)
|
||||
object_dispersion_report(coropool, connpool, account, object_ring, retries)
|
@ -18,7 +18,7 @@ import os
|
||||
import traceback
|
||||
from ConfigParser import ConfigParser
|
||||
from optparse import OptionParser
|
||||
from sys import exit, argv
|
||||
from sys import exit, argv, stderr
|
||||
from time import time
|
||||
from uuid import uuid4
|
||||
|
||||
@ -77,6 +77,11 @@ if __name__ == '__main__':
|
||||
global begun, created, item_type, next_report, need_to_create, retries_done
|
||||
patcher.monkey_patch()
|
||||
|
||||
print >>stderr, '''
|
||||
WARNING: This command is being replaced with swift-dispersion-populate; you
|
||||
should switch to that before the next Swift release.
|
||||
'''
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option('-d', '--dispersion', action='store_true',
|
||||
dest='dispersion', default=False,
|
||||
|
@ -749,6 +749,11 @@ if __name__ == '__main__':
|
||||
patcher.monkey_patch()
|
||||
hubs.get_hub().debug_exceptions = False
|
||||
|
||||
print >>stderr, '''
|
||||
WARNING: This command is being replaced with swift-dispersion-report; you
|
||||
should switch to that before the next Swift release.
|
||||
'''
|
||||
|
||||
parser = OptionParser(usage='''
|
||||
Usage: %prog [options] [conf_file]
|
||||
|
||||
|
@ -134,9 +134,9 @@ different distro or OS, some care should be taken before using in production.
|
||||
Cluster Health
|
||||
--------------
|
||||
|
||||
There is a swift-stats-report tool for measuring overall cluster health. This
|
||||
is accomplished by checking if a set of deliberately distributed containers and
|
||||
objects are currently in their proper places within the cluster.
|
||||
There is a swift-dispersion-report tool for measuring overall cluster health.
|
||||
This is accomplished by checking if a set of deliberately distributed
|
||||
containers and objects are currently in their proper places within the cluster.
|
||||
|
||||
For instance, a common deployment has three replicas of each object. The health
|
||||
of that object can be measured by checking if each replica is in its proper
|
||||
@ -153,15 +153,15 @@ to gather results.
|
||||
The first thing that needs to be done to provide this health value is create a
|
||||
new account solely for this usage. Next, we need to place the containers and
|
||||
objects throughout the system so that they are on distinct partitions. The
|
||||
swift-stats-populate tool does this by making up random container and object
|
||||
names until they fall on distinct partitions. Last, and repeatedly for the life
|
||||
of the cluster, we need to run the swift-stats-report tool to check the health
|
||||
of each of these containers and objects.
|
||||
swift-dispersion-populate tool does this by making up random container and
|
||||
object names until they fall on distinct partitions. Last, and repeatedly for
|
||||
the life of the cluster, we need to run the swift-dispersion-report tool to
|
||||
check the health of each of these containers and objects.
|
||||
|
||||
These tools need direct access to the entire cluster and to the ring files
|
||||
(installing them on a proxy server will probably do). Both
|
||||
swift-stats-populate and swift-stats-report use the same configuration file,
|
||||
/etc/swift/stats.conf. Example conf file::
|
||||
swift-dispersion-populate and swift-dispersion-report use the same
|
||||
configuration file, /etc/swift/dispersion.conf. Example conf file::
|
||||
|
||||
[stats]
|
||||
auth_url = http://saio:11000/auth/v1.0
|
||||
@ -169,17 +169,17 @@ swift-stats-populate and swift-stats-report use the same configuration file,
|
||||
auth_key = testing
|
||||
|
||||
There are also options for the conf file for specifying the dispersion coverage
|
||||
(defaults to 1%), retries, concurrency, CSV output file, etc. though usually
|
||||
the defaults are fine.
|
||||
(defaults to 1%), retries, concurrency, etc. though usually the defaults are
|
||||
fine.
|
||||
|
||||
Once the configuration is in place, run `swift-stats-populate -d` to populate
|
||||
Once the configuration is in place, run `swift-dispersion-populate` to populate
|
||||
the containers and objects throughout the cluster.
|
||||
|
||||
Now that those containers and objects are in place, you can run
|
||||
`swift-stats-report -d` to get a dispersion report, or the overall health of
|
||||
`swift-dispersion-report` to get a dispersion report, or the overall health of
|
||||
the cluster. Here is an example of a cluster in perfect health::
|
||||
|
||||
$ swift-stats-report -d
|
||||
$ swift-dispersion-report
|
||||
Queried 2621 containers for dispersion reporting, 19s, 0 retries
|
||||
100.00% of container copies found (7863 of 7863)
|
||||
Sample represents 1.00% of the container partition space
|
||||
@ -195,7 +195,7 @@ that has::
|
||||
$ swift-ring-builder object.builder set_weight d0 200
|
||||
$ swift-ring-builder object.builder rebalance
|
||||
...
|
||||
$ swift-stats-report -d
|
||||
$ swift-dispersion-report
|
||||
Queried 2621 containers for dispersion reporting, 8s, 0 retries
|
||||
100.00% of container copies found (7863 of 7863)
|
||||
Sample represents 1.00% of the container partition space
|
||||
@ -212,7 +212,7 @@ is much less. Next, I'll run the replicators to get everything put back into
|
||||
place and then rerun the dispersion report::
|
||||
|
||||
... start object replicators and monitor logs until they're caught up ...
|
||||
$ swift-stats-report -d
|
||||
$ swift-dispersion-report
|
||||
Queried 2621 containers for dispersion reporting, 17s, 0 retries
|
||||
100.00% of container copies found (7863 of 7863)
|
||||
Sample represents 1.00% of the container partition space
|
||||
@ -221,13 +221,6 @@ place and then rerun the dispersion report::
|
||||
100.00% of object copies found (7857 of 7857)
|
||||
Sample represents 1.00% of the object partition space
|
||||
|
||||
So that's a summation of how to use swift-stats-report to monitor the health of
|
||||
a cluster. There are a few other things it can do, such as performance
|
||||
monitoring, but those are currently in their infancy and little used. For
|
||||
instance, you can run `swift-stats-populate -p` and `swift-stats-report -p` to
|
||||
get performance timings (warning: the initial populate takes a while). These
|
||||
timings are dumped into a CSV file (/etc/swift/stats.csv by default) and can
|
||||
then be graphed to see how cluster performance is trending.
|
||||
|
||||
------------------------------------
|
||||
Additional Cleanup Script for Swauth
|
||||
|
8
etc/dispersion.conf-sample
Normal file
8
etc/dispersion.conf-sample
Normal file
@ -0,0 +1,8 @@
|
||||
[dispersion]
|
||||
auth_url = http://saio:8080/auth/v1.0
|
||||
auth_user = test:tester
|
||||
auth_key = testing
|
||||
# swift_dir = /etc/swift
|
||||
# dispersion_coverage = 1
|
||||
# retries = 5
|
||||
# concurrency = 25
|
@ -1,3 +1,6 @@
|
||||
# WARNING: The swift-stats-populate and swift-stats-report commands are being
|
||||
# replaced with swift-dispersion-populate and swift-dispersion-report; you
|
||||
# should switch to those before the next Swift release.
|
||||
[stats]
|
||||
auth_url = http://saio:8080/auth/v1.0
|
||||
auth_user = test:tester
|
||||
|
1
setup.py
1
setup.py
@ -90,6 +90,7 @@ setup(
|
||||
'bin/swift-object-updater', 'bin/swift-proxy-server',
|
||||
'bin/swift-ring-builder', 'bin/swift-stats-populate',
|
||||
'bin/swift-stats-report',
|
||||
'bin/swift-dispersion-populate', 'bin/swift-dispersion-report',
|
||||
'bin/swift-bench',
|
||||
'bin/swift-log-uploader',
|
||||
'bin/swift-log-stats-collector',
|
||||
|
@ -86,7 +86,7 @@ class AccountController(object):
|
||||
return Response(status='507 %s is not mounted' % drive)
|
||||
broker = self._get_account_broker(drive, part, account)
|
||||
if container: # put account container
|
||||
if 'x-cf-trans-id' in req.headers:
|
||||
if 'x-trans-id' in req.headers:
|
||||
broker.pending_timeout = 3
|
||||
if req.headers.get('x-account-override-deleted', 'no').lower() != \
|
||||
'yes' and broker.is_deleted():
|
||||
@ -296,7 +296,7 @@ class AccountController(object):
|
||||
def __call__(self, env, start_response):
|
||||
start_time = time.time()
|
||||
req = Request(env)
|
||||
self.logger.txn_id = req.headers.get('x-cf-trans-id', None)
|
||||
self.logger.txn_id = req.headers.get('x-trans-id', None)
|
||||
if not check_utf8(req.path_info):
|
||||
res = HTTPPreconditionFailed(body='Invalid UTF8')
|
||||
else:
|
||||
@ -319,7 +319,7 @@ class AccountController(object):
|
||||
time.strftime('%d/%b/%Y:%H:%M:%S +0000', time.gmtime()),
|
||||
req.method, req.path,
|
||||
res.status.split()[0], res.content_length or '-',
|
||||
req.headers.get('x-cf-trans-id', '-'),
|
||||
req.headers.get('x-trans-id', '-'),
|
||||
req.referer or '-', req.user_agent or '-',
|
||||
trans_time,
|
||||
additional_info)
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
from webob import Request
|
||||
from webob.exc import HTTPServerError
|
||||
import uuid
|
||||
|
||||
from swift.common.utils import get_logger
|
||||
|
||||
@ -29,13 +30,23 @@ class CatchErrorMiddleware(object):
|
||||
self.logger = get_logger(conf, log_route='catch-errors')
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
trans_id = env.get('HTTP_X_TRANS_ID')
|
||||
if not trans_id:
|
||||
trans_id = 'tx' + uuid.uuid4().hex
|
||||
env['HTTP_X_TRANS_ID'] = trans_id
|
||||
try:
|
||||
return self.app(env, start_response)
|
||||
|
||||
def my_start_response(status, response_headers, exc_info=None):
|
||||
trans_header = ('x-trans-id', trans_id)
|
||||
response_headers.append(trans_header)
|
||||
return start_response(status, response_headers, exc_info)
|
||||
return self.app(env, my_start_response)
|
||||
except Exception, err:
|
||||
self.logger.exception(_('Error: %s'), err)
|
||||
resp = HTTPServerError(request=Request(env),
|
||||
body='An error occurred',
|
||||
content_type='text/plain')
|
||||
resp.headers['x-trans-id'] = trans_id
|
||||
return resp(env, start_response)
|
||||
|
||||
|
||||
|
@ -1360,7 +1360,7 @@ class Swauth(object):
|
||||
getattr(req, 'bytes_transferred', 0) or '-',
|
||||
getattr(response, 'bytes_transferred', 0) or '-',
|
||||
req.headers.get('etag', '-'),
|
||||
req.headers.get('x-cf-trans-id', '-'), logged_headers or '-',
|
||||
req.headers.get('x-trans-id', '-'), logged_headers or '-',
|
||||
trans_time)))
|
||||
|
||||
|
||||
|
@ -96,7 +96,7 @@ class ContainerController(object):
|
||||
'x-delete-timestamp': info['delete_timestamp'],
|
||||
'x-object-count': info['object_count'],
|
||||
'x-bytes-used': info['bytes_used'],
|
||||
'x-cf-trans-id': req.headers.get('X-Cf-Trans-Id', '-')}
|
||||
'x-trans-id': req.headers.get('x-trans-id', '-')}
|
||||
if req.headers.get('x-account-override-deleted', 'no').lower() == \
|
||||
'yes':
|
||||
account_headers['x-account-override-deleted'] = 'yes'
|
||||
@ -385,7 +385,7 @@ class ContainerController(object):
|
||||
def __call__(self, env, start_response):
|
||||
start_time = time.time()
|
||||
req = Request(env)
|
||||
self.logger.txn_id = req.headers.get('x-cf-trans-id', None)
|
||||
self.logger.txn_id = req.headers.get('x-trans-id', None)
|
||||
if not check_utf8(req.path_info):
|
||||
res = HTTPPreconditionFailed(body='Invalid UTF8')
|
||||
else:
|
||||
@ -405,7 +405,7 @@ class ContainerController(object):
|
||||
time.gmtime()),
|
||||
req.method, req.path,
|
||||
res.status.split()[0], res.content_length or '-',
|
||||
req.headers.get('x-cf-trans-id', '-'),
|
||||
req.headers.get('x-trans-id', '-'),
|
||||
req.referer or '-', req.user_agent or '-',
|
||||
trans_time)
|
||||
if req.method.upper() == 'REPLICATE':
|
||||
|
@ -549,7 +549,7 @@ class ObjectController(object):
|
||||
'x-content-type': file.metadata['Content-Type'],
|
||||
'x-timestamp': file.metadata['X-Timestamp'],
|
||||
'x-etag': file.metadata['ETag'],
|
||||
'x-cf-trans-id': request.headers.get('x-cf-trans-id', '-')},
|
||||
'x-trans-id': request.headers.get('x-trans-id', '-')},
|
||||
device)
|
||||
resp = HTTPCreated(request=request, etag=etag)
|
||||
return resp
|
||||
@ -686,7 +686,7 @@ class ObjectController(object):
|
||||
file.unlinkold(metadata['X-Timestamp'])
|
||||
self.container_update('DELETE', account, container, obj,
|
||||
request.headers, {'x-timestamp': metadata['X-Timestamp'],
|
||||
'x-cf-trans-id': request.headers.get('x-cf-trans-id', '-')},
|
||||
'x-trans-id': request.headers.get('x-trans-id', '-')},
|
||||
device)
|
||||
resp = response_class(request=request)
|
||||
return resp
|
||||
@ -719,7 +719,7 @@ class ObjectController(object):
|
||||
"""WSGI Application entry point for the Swift Object Server."""
|
||||
start_time = time.time()
|
||||
req = Request(env)
|
||||
self.logger.txn_id = req.headers.get('x-cf-trans-id', None)
|
||||
self.logger.txn_id = req.headers.get('x-trans-id', None)
|
||||
if not check_utf8(req.path_info):
|
||||
res = HTTPPreconditionFailed(body='Invalid UTF8')
|
||||
else:
|
||||
@ -740,7 +740,7 @@ class ObjectController(object):
|
||||
time.gmtime()),
|
||||
req.method, req.path, res.status.split()[0],
|
||||
res.content_length or '-', req.referer or '-',
|
||||
req.headers.get('x-cf-trans-id', '-'),
|
||||
req.headers.get('x-trans-id', '-'),
|
||||
req.user_agent or '-',
|
||||
trans_time)
|
||||
if req.method == 'REPLICATE':
|
||||
|
@ -358,7 +358,7 @@ class Controller(object):
|
||||
result_code = 0
|
||||
attempts_left = self.app.account_ring.replica_count
|
||||
path = '/%s' % account
|
||||
headers = {'x-cf-trans-id': self.trans_id}
|
||||
headers = {'x-trans-id': self.trans_id}
|
||||
for node in self.iter_nodes(partition, nodes, self.app.account_ring):
|
||||
try:
|
||||
with ConnectionTimeout(self.app.conn_timeout):
|
||||
@ -430,7 +430,7 @@ class Controller(object):
|
||||
write_acl = None
|
||||
container_size = None
|
||||
attempts_left = self.app.container_ring.replica_count
|
||||
headers = {'x-cf-trans-id': self.trans_id}
|
||||
headers = {'x-trans-id': self.trans_id}
|
||||
for node in self.iter_nodes(partition, nodes, self.app.container_ring):
|
||||
try:
|
||||
with ConnectionTimeout(self.app.conn_timeout):
|
||||
@ -1227,7 +1227,7 @@ class ContainerController(Controller):
|
||||
headers = []
|
||||
for account in accounts:
|
||||
nheaders = {'X-Timestamp': normalize_timestamp(time.time()),
|
||||
'x-cf-trans-id': self.trans_id,
|
||||
'x-trans-id': self.trans_id,
|
||||
'X-Account-Host': '%(ip)s:%(port)s' % account,
|
||||
'X-Account-Partition': account_partition,
|
||||
'X-Account-Device': account['device']}
|
||||
@ -1255,7 +1255,7 @@ class ContainerController(Controller):
|
||||
container_partition, containers = self.app.container_ring.get_nodes(
|
||||
self.account_name, self.container_name)
|
||||
headers = {'X-Timestamp': normalize_timestamp(time.time()),
|
||||
'x-cf-trans-id': self.trans_id}
|
||||
'x-trans-id': self.trans_id}
|
||||
headers.update(value for value in req.headers.iteritems()
|
||||
if value[0].lower() in self.pass_through_headers or
|
||||
value[0].lower().startswith('x-container-meta-'))
|
||||
@ -1278,7 +1278,7 @@ class ContainerController(Controller):
|
||||
headers = []
|
||||
for account in accounts:
|
||||
headers.append({'X-Timestamp': normalize_timestamp(time.time()),
|
||||
'X-Cf-Trans-Id': self.trans_id,
|
||||
'X-Trans-Id': self.trans_id,
|
||||
'X-Account-Host': '%(ip)s:%(port)s' % account,
|
||||
'X-Account-Partition': account_partition,
|
||||
'X-Account-Device': account['device']})
|
||||
@ -1323,7 +1323,7 @@ class AccountController(Controller):
|
||||
account_partition, accounts = \
|
||||
self.app.account_ring.get_nodes(self.account_name)
|
||||
headers = {'X-Timestamp': normalize_timestamp(time.time()),
|
||||
'x-cf-trans-id': self.trans_id}
|
||||
'x-trans-id': self.trans_id}
|
||||
headers.update(value for value in req.headers.iteritems()
|
||||
if value[0].lower().startswith('x-account-meta-'))
|
||||
if self.app.memcache:
|
||||
@ -1340,7 +1340,7 @@ class AccountController(Controller):
|
||||
account_partition, accounts = \
|
||||
self.app.account_ring.get_nodes(self.account_name)
|
||||
headers = {'X-Timestamp': normalize_timestamp(time.time()),
|
||||
'X-CF-Trans-Id': self.trans_id}
|
||||
'X-Trans-Id': self.trans_id}
|
||||
headers.update(value for value in req.headers.iteritems()
|
||||
if value[0].lower().startswith('x-account-meta-'))
|
||||
if self.app.memcache:
|
||||
@ -1357,7 +1357,7 @@ class AccountController(Controller):
|
||||
account_partition, accounts = \
|
||||
self.app.account_ring.get_nodes(self.account_name)
|
||||
headers = {'X-Timestamp': normalize_timestamp(time.time()),
|
||||
'X-CF-Trans-Id': self.trans_id}
|
||||
'X-Trans-Id': self.trans_id}
|
||||
if self.app.memcache:
|
||||
self.app.memcache.delete('account%s' % req.path_info.rstrip('/'))
|
||||
return self.make_requests(req, self.app.account_ring,
|
||||
@ -1473,8 +1473,6 @@ class BaseApplication(object):
|
||||
def update_request(self, req):
|
||||
req.bytes_transferred = '-'
|
||||
req.client_disconnect = False
|
||||
if 'x-cf-trans-id' not in req.headers:
|
||||
req.headers['x-cf-trans-id'] = 'tx' + str(uuid.uuid4())
|
||||
if 'x-storage-token' in req.headers and \
|
||||
'x-auth-token' not in req.headers:
|
||||
req.headers['x-auth-token'] = req.headers['x-storage-token']
|
||||
@ -1498,8 +1496,8 @@ class BaseApplication(object):
|
||||
return HTTPPreconditionFailed(request=req, body='Bad URL')
|
||||
|
||||
controller = controller(self, **path_parts)
|
||||
controller.trans_id = req.headers.get('x-cf-trans-id', '-')
|
||||
self.logger.txn_id = req.headers.get('x-cf-trans-id', None)
|
||||
controller.trans_id = req.headers.get('x-trans-id', '-')
|
||||
self.logger.txn_id = req.headers.get('x-trans-id', None)
|
||||
try:
|
||||
handler = getattr(controller, req.method)
|
||||
if not getattr(handler, 'publicly_accessible'):
|
||||
@ -1579,7 +1577,7 @@ class Application(BaseApplication):
|
||||
getattr(req, 'bytes_transferred', 0) or '-',
|
||||
getattr(response, 'bytes_transferred', 0) or '-',
|
||||
req.headers.get('etag', '-'),
|
||||
req.headers.get('x-cf-trans-id', '-'),
|
||||
req.headers.get('x-trans-id', '-'),
|
||||
logged_headers or '-',
|
||||
trans_time,
|
||||
)))
|
||||
|
@ -45,5 +45,28 @@ class TestCatchErrors(unittest.TestCase):
|
||||
resp = app(req.environ, start_response)
|
||||
self.assertEquals(resp, ['An error occurred'])
|
||||
|
||||
def test_trans_id_header(self):
|
||||
|
||||
def start_response(status, headers):
|
||||
self.assert_('x-trans-id' in (x[0] for x in headers))
|
||||
app = catch_errors.CatchErrorMiddleware(FakeApp(), {})
|
||||
req = Request.blank('/v1/a')
|
||||
app(req.environ, start_response)
|
||||
app = catch_errors.CatchErrorMiddleware(FakeApp(), {})
|
||||
req = Request.blank('/v1/a/c')
|
||||
app(req.environ, start_response)
|
||||
app = catch_errors.CatchErrorMiddleware(FakeApp(), {})
|
||||
req = Request.blank('/v1/a/c/o')
|
||||
app(req.environ, start_response)
|
||||
app = catch_errors.CatchErrorMiddleware(FakeApp(True), {})
|
||||
req = Request.blank('/v1/a')
|
||||
app(req.environ, start_response)
|
||||
app = catch_errors.CatchErrorMiddleware(FakeApp(True), {})
|
||||
req = Request.blank('/v1/a/c')
|
||||
app(req.environ, start_response)
|
||||
app = catch_errors.CatchErrorMiddleware(FakeApp(True), {})
|
||||
req = Request.blank('/v1/a/c/o')
|
||||
app(req.environ, start_response)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -124,7 +124,7 @@ def setup():
|
||||
for node in nodes:
|
||||
conn = proxy_server.http_connect(node['ip'], node['port'],
|
||||
node['device'], partition, 'PUT', '/a',
|
||||
{'X-Timestamp': ts, 'X-CF-Trans-Id': 'test'})
|
||||
{'X-Timestamp': ts, 'x-trans-id': 'test'})
|
||||
resp = conn.getresponse()
|
||||
assert(resp.status == 201)
|
||||
# Create container
|
||||
|
Loading…
Reference in New Issue
Block a user