update with trunk

This commit is contained in:
David Goetz 2011-05-12 14:24:51 -07:00
commit bc894f9efb
17 changed files with 523 additions and 50 deletions

10
CHANGELOG Normal file
View File

@ -0,0 +1,10 @@
swift (x.x.x)
* Renamed swift-stats-populate to swift-dispersion-populate and
swift-stats-report to swift-dispersion-report with extraneous unused
options removed. The new tools use dispersion.conf instead of stats.conf.
* Transaction id for each request is returned in a response header
(X-Trans-ID). Setting the transaction id has moved from the proxy server to
the catch_errors middleware. Additionally, the internal header has changed
from X-CF-Trans-ID to X-Trans-ID.

155
bin/swift-dispersion-populate Executable file
View File

@ -0,0 +1,155 @@
#!/usr/bin/env python
# Copyright (c) 2010-2011 OpenStack, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import traceback
from ConfigParser import ConfigParser
from cStringIO import StringIO
from sys import exit, argv, stdout
from time import time
from uuid import uuid4
from eventlet import GreenPool, patcher, sleep
from eventlet.pools import Pool
from swift.common.client import Connection, get_auth
from swift.common.ring import Ring
from swift.common.utils import compute_eta, get_time_units
def put_container(connpool, container, report):
global retries_done
try:
with connpool.item() as conn:
conn.put_container(container)
retries_done += conn.attempts - 1
if report:
report(True)
except Exception:
if report:
report(False)
raise
def put_object(connpool, container, obj, report):
global retries_done
try:
with connpool.item() as conn:
conn.put_object(container, obj, StringIO(obj),
headers={'x-object-meta-dispersion': obj})
retries_done += conn.attempts - 1
if report:
report(True)
except Exception:
if report:
report(False)
raise
def report(success):
global begun, created, item_type, next_report, need_to_create, retries_done
if not success:
traceback.print_exc()
exit('Gave up due to error(s).')
created += 1
if time() < next_report:
return
next_report = time() + 5
eta, eta_unit = compute_eta(begun, created, need_to_create)
print '\r\x1B[KCreating %s: %d of %d, %d%s left, %d retries' % (item_type,
created, need_to_create, round(eta), eta_unit, retries_done),
stdout.flush()
if __name__ == '__main__':
global begun, created, item_type, next_report, need_to_create, retries_done
patcher.monkey_patch()
conffile = '/etc/swift/dispersion.conf'
if len(argv) == 2:
conffile = argv[1]
elif len(argv) > 2:
exit('Syntax: %s [conffile]' % argv[0])
c = ConfigParser()
if not c.read(conffile):
exit('Unable to read config file: %s' % conffile)
conf = dict(c.items('dispersion'))
swift_dir = conf.get('swift_dir', '/etc/swift')
dispersion_coverage = int(conf.get('dispersion_coverage', 1))
retries = int(conf.get('retries', 5))
concurrency = int(conf.get('concurrency', 25))
coropool = GreenPool(size=concurrency)
retries_done = 0
url, token = get_auth(conf['auth_url'], conf['auth_user'],
conf['auth_key'])
account = url.rsplit('/', 1)[1]
connpool = Pool(max_size=concurrency)
connpool.create = lambda: Connection(conf['auth_url'],
conf['auth_user'], conf['auth_key'],
retries=retries,
preauthurl=url, preauthtoken=token)
container_ring = Ring(os.path.join(swift_dir, 'container.ring.gz'))
parts_left = dict((x, x) for x in xrange(container_ring.partition_count))
item_type = 'containers'
created = 0
retries_done = 0
need_to_create = need_to_queue = \
dispersion_coverage / 100.0 * container_ring.partition_count
begun = next_report = time()
next_report += 2
while need_to_queue >= 1:
container = 'dispersion_%s' % uuid4().hex
part, _junk = container_ring.get_nodes(account, container)
if part in parts_left:
coropool.spawn(put_container, connpool, container, report)
sleep()
del parts_left[part]
need_to_queue -= 1
coropool.waitall()
elapsed, elapsed_unit = get_time_units(time() - begun)
print '\r\x1B[KCreated %d containers for dispersion reporting, %d%s, %d ' \
'retries' % \
(need_to_create, round(elapsed), elapsed_unit, retries_done)
stdout.flush()
container = 'dispersion_objects'
put_container(connpool, container, None)
object_ring = Ring(os.path.join(swift_dir, 'object.ring.gz'))
parts_left = dict((x, x) for x in xrange(object_ring.partition_count))
item_type = 'objects'
created = 0
retries_done = 0
need_to_create = need_to_queue = \
dispersion_coverage / 100.0 * object_ring.partition_count
begun = next_report = time()
next_report += 2
while need_to_queue >= 1:
obj = 'dispersion_%s' % uuid4().hex
part, _junk = object_ring.get_nodes(account, container, obj)
if part in parts_left:
coropool.spawn(put_object, connpool, container, obj, report)
sleep()
del parts_left[part]
need_to_queue -= 1
coropool.waitall()
elapsed, elapsed_unit = get_time_units(time() - begun)
print '\r\x1B[KCreated %d objects for dispersion reporting, %d%s, %d ' \
'retries' % \
(need_to_create, round(elapsed), elapsed_unit, retries_done)
stdout.flush()

261
bin/swift-dispersion-report Executable file
View File

@ -0,0 +1,261 @@
#!/usr/bin/env python
# Copyright (c) 2010-2011 OpenStack, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import csv
import os
import socket
from ConfigParser import ConfigParser
from httplib import HTTPException
from optparse import OptionParser
from sys import argv, exit, stdout, stderr
from time import time
from uuid import uuid4
from eventlet import GreenPool, hubs, patcher, sleep, Timeout
from eventlet.pools import Pool
from swift.common import direct_client
from swift.common.client import ClientException, Connection, get_auth
from swift.common.ring import Ring
from swift.common.utils import compute_eta, get_time_units
unmounted = []
def get_error_log(prefix):
def error_log(msg_or_exc):
global unmounted
if hasattr(msg_or_exc, 'http_status') and \
msg_or_exc.http_status == 507:
identifier = '%s:%s/%s'
if identifier not in unmounted:
unmounted.append(identifier)
print >>stderr, 'ERROR: %s:%s/%s is unmounted -- This will ' \
'cause replicas designated for that device to be ' \
'considered missing until resolved or the ring is ' \
'updated.' % (msg_or_exc.http_host, msg_or_exc.http_port,
msg_or_exc.http_device)
stderr.flush()
if not hasattr(msg_or_exc, 'http_status') or \
msg_or_exc.http_status not in (404, 507):
print >>stderr, 'ERROR: %s: %s' % (prefix, msg_or_exc)
stderr.flush()
return error_log
def container_dispersion_report(coropool, connpool, account, container_ring,
retries):
with connpool.item() as conn:
containers = [c['name'] for c in conn.get_account(prefix='dispersion_',
full_listing=True)[1]]
containers_listed = len(containers)
if not containers_listed:
print >>stderr, 'No containers to query. Has ' \
'swift-dispersion-populate been run?'
stderr.flush()
return
retries_done = [0]
containers_queried = [0]
container_copies_found = [0, 0, 0, 0]
begun = time()
next_report = [time() + 2]
def direct(container, part, nodes):
found_count = 0
for node in nodes:
error_log = get_error_log('%(ip)s:%(port)s/%(device)s' % node)
try:
attempts, _junk = direct_client.retry(
direct_client.direct_head_container, node,
part, account, container, error_log=error_log,
retries=retries)
retries_done[0] += attempts - 1
found_count += 1
except ClientException, err:
if err.http_status not in (404, 507):
error_log('Giving up on /%s/%s/%s: %s' % (part, account,
container, err))
except (Exception, Timeout), err:
error_log('Giving up on /%s/%s/%s: %s' % (part, account,
container, err))
container_copies_found[found_count] += 1
containers_queried[0] += 1
if time() >= next_report[0]:
next_report[0] = time() + 5
eta, eta_unit = compute_eta(begun, containers_queried[0],
containers_listed)
print '\r\x1B[KQuerying containers: %d of %d, %d%s left, %d ' \
'retries' % (containers_queried[0], containers_listed,
round(eta), eta_unit, retries_done[0]),
stdout.flush()
container_parts = {}
for container in containers:
part, nodes = container_ring.get_nodes(account, container)
if part not in container_parts:
container_parts[part] = part
coropool.spawn(direct, container, part, nodes)
coropool.waitall()
distinct_partitions = len(container_parts)
copies_expected = distinct_partitions * container_ring.replica_count
copies_found = sum(a * b for a, b in enumerate(container_copies_found))
value = 100.0 * copies_found / copies_expected
elapsed, elapsed_unit = get_time_units(time() - begun)
print '\r\x1B[KQueried %d containers for dispersion reporting, ' \
'%d%s, %d retries' % (containers_listed, round(elapsed),
elapsed_unit, retries_done[0])
if containers_listed - distinct_partitions:
print 'There were %d overlapping partitions' % (
containers_listed - distinct_partitions)
if container_copies_found[2]:
print 'There were %d partitions missing one copy.' % \
container_copies_found[2]
if container_copies_found[1]:
print '! There were %d partitions missing two copies.' % \
container_copies_found[1]
if container_copies_found[0]:
print '!!! There were %d partitions missing all copies.' % \
container_copies_found[0]
print '%.02f%% of container copies found (%d of %d)' % (
value, copies_found, copies_expected)
print 'Sample represents %.02f%% of the container partition space' % (
100.0 * distinct_partitions / container_ring.partition_count)
stdout.flush()
def object_dispersion_report(coropool, connpool, account, object_ring,
retries):
container = 'dispersion_objects'
with connpool.item() as conn:
try:
objects = [o['name'] for o in conn.get_container(container,
prefix='dispersion_', full_listing=True)[1]]
except ClientException, err:
if err.http_status != 404:
raise
print >>stderr, 'No objects to query. Has ' \
'swift-dispersion-populate been run?'
stderr.flush()
return
objects_listed = len(objects)
if not objects_listed:
print >>stderr, 'No objects to query. Has swift-dispersion-populate ' \
'been run?'
stderr.flush()
return
retries_done = [0]
objects_queried = [0]
object_copies_found = [0, 0, 0, 0]
begun = time()
next_report = [time() + 2]
def direct(obj, part, nodes):
found_count = 0
for node in nodes:
error_log = get_error_log('%(ip)s:%(port)s/%(device)s' % node)
try:
attempts, _junk = direct_client.retry(
direct_client.direct_head_object, node, part,
account, container, obj, error_log=error_log,
retries=retries)
retries_done[0] += attempts - 1
found_count += 1
except ClientException, err:
if err.http_status not in (404, 507):
error_log('Giving up on /%s/%s/%s/%s: %s' % (part, account,
container, obj, err))
except (Exception, Timeout), err:
error_log('Giving up on /%s/%s/%s/%s: %s' % (part, account,
container, obj, err))
object_copies_found[found_count] += 1
objects_queried[0] += 1
if time() >= next_report[0]:
next_report[0] = time() + 5
eta, eta_unit = compute_eta(begun, objects_queried[0],
objects_listed)
print '\r\x1B[KQuerying objects: %d of %d, %d%s left, %d ' \
'retries' % (objects_queried[0], objects_listed, round(eta),
eta_unit, retries_done[0]),
stdout.flush()
object_parts = {}
for obj in objects:
part, nodes = object_ring.get_nodes(account, container, obj)
if part not in object_parts:
object_parts[part] = part
coropool.spawn(direct, obj, part, nodes)
coropool.waitall()
distinct_partitions = len(object_parts)
copies_expected = distinct_partitions * object_ring.replica_count
copies_found = sum(a * b for a, b in enumerate(object_copies_found))
value = 100.0 * copies_found / copies_expected
elapsed, elapsed_unit = get_time_units(time() - begun)
print '\r\x1B[KQueried %d objects for dispersion reporting, ' \
'%d%s, %d retries' % (objects_listed, round(elapsed),
elapsed_unit, retries_done[0])
if objects_listed - distinct_partitions:
print 'There were %d overlapping partitions' % (
objects_listed - distinct_partitions)
if object_copies_found[2]:
print 'There were %d partitions missing one copy.' % \
object_copies_found[2]
if object_copies_found[1]:
print '! There were %d partitions missing two copies.' % \
object_copies_found[1]
if object_copies_found[0]:
print '!!! There were %d partitions missing all copies.' % \
object_copies_found[0]
print '%.02f%% of object copies found (%d of %d)' % \
(value, copies_found, copies_expected)
print 'Sample represents %.02f%% of the object partition space' % (
100.0 * distinct_partitions / object_ring.partition_count)
stdout.flush()
if __name__ == '__main__':
patcher.monkey_patch()
hubs.get_hub().debug_exceptions = False
conffile = '/etc/swift/dispersion.conf'
if len(argv) == 2:
conffile = argv[1]
elif len(argv) > 2:
exit('Syntax: %s [conffile]' % argv[0])
c = ConfigParser()
if not c.read(conffile):
exit('Unable to read config file: %s' % conffile)
conf = dict(c.items('dispersion'))
swift_dir = conf.get('swift_dir', '/etc/swift')
dispersion_coverage = int(conf.get('dispersion_coverage', 1))
retries = int(conf.get('retries', 5))
concurrency = int(conf.get('concurrency', 25))
coropool = GreenPool(size=concurrency)
url, token = get_auth(conf['auth_url'], conf['auth_user'],
conf['auth_key'])
account = url.rsplit('/', 1)[1]
connpool = Pool(max_size=concurrency)
connpool.create = lambda: Connection(conf['auth_url'],
conf['auth_user'], conf['auth_key'],
retries=retries,
preauthurl=url, preauthtoken=token)
container_ring = Ring(os.path.join(swift_dir, 'container.ring.gz'))
object_ring = Ring(os.path.join(swift_dir, 'object.ring.gz'))
container_dispersion_report(coropool, connpool, account, container_ring,
retries)
object_dispersion_report(coropool, connpool, account, object_ring, retries)

View File

@ -18,7 +18,7 @@ import os
import traceback import traceback
from ConfigParser import ConfigParser from ConfigParser import ConfigParser
from optparse import OptionParser from optparse import OptionParser
from sys import exit, argv from sys import exit, argv, stderr
from time import time from time import time
from uuid import uuid4 from uuid import uuid4
@ -77,6 +77,11 @@ if __name__ == '__main__':
global begun, created, item_type, next_report, need_to_create, retries_done global begun, created, item_type, next_report, need_to_create, retries_done
patcher.monkey_patch() patcher.monkey_patch()
print >>stderr, '''
WARNING: This command is being replaced with swift-dispersion-populate; you
should switch to that before the next Swift release.
'''
parser = OptionParser() parser = OptionParser()
parser.add_option('-d', '--dispersion', action='store_true', parser.add_option('-d', '--dispersion', action='store_true',
dest='dispersion', default=False, dest='dispersion', default=False,

View File

@ -749,6 +749,11 @@ if __name__ == '__main__':
patcher.monkey_patch() patcher.monkey_patch()
hubs.get_hub().debug_exceptions = False hubs.get_hub().debug_exceptions = False
print >>stderr, '''
WARNING: This command is being replaced with swift-dispersion-report; you
should switch to that before the next Swift release.
'''
parser = OptionParser(usage=''' parser = OptionParser(usage='''
Usage: %prog [options] [conf_file] Usage: %prog [options] [conf_file]

View File

@ -134,9 +134,9 @@ different distro or OS, some care should be taken before using in production.
Cluster Health Cluster Health
-------------- --------------
There is a swift-stats-report tool for measuring overall cluster health. This There is a swift-dispersion-report tool for measuring overall cluster health.
is accomplished by checking if a set of deliberately distributed containers and This is accomplished by checking if a set of deliberately distributed
objects are currently in their proper places within the cluster. containers and objects are currently in their proper places within the cluster.
For instance, a common deployment has three replicas of each object. The health For instance, a common deployment has three replicas of each object. The health
of that object can be measured by checking if each replica is in its proper of that object can be measured by checking if each replica is in its proper
@ -153,15 +153,15 @@ to gather results.
The first thing that needs to be done to provide this health value is create a The first thing that needs to be done to provide this health value is create a
new account solely for this usage. Next, we need to place the containers and new account solely for this usage. Next, we need to place the containers and
objects throughout the system so that they are on distinct partitions. The objects throughout the system so that they are on distinct partitions. The
swift-stats-populate tool does this by making up random container and object swift-dispersion-populate tool does this by making up random container and
names until they fall on distinct partitions. Last, and repeatedly for the life object names until they fall on distinct partitions. Last, and repeatedly for
of the cluster, we need to run the swift-stats-report tool to check the health the life of the cluster, we need to run the swift-dispersion-report tool to
of each of these containers and objects. check the health of each of these containers and objects.
These tools need direct access to the entire cluster and to the ring files These tools need direct access to the entire cluster and to the ring files
(installing them on a proxy server will probably do). Both (installing them on a proxy server will probably do). Both
swift-stats-populate and swift-stats-report use the same configuration file, swift-dispersion-populate and swift-dispersion-report use the same
/etc/swift/stats.conf. Example conf file:: configuration file, /etc/swift/dispersion.conf. Example conf file::
[stats] [stats]
auth_url = http://saio:11000/auth/v1.0 auth_url = http://saio:11000/auth/v1.0
@ -169,17 +169,17 @@ swift-stats-populate and swift-stats-report use the same configuration file,
auth_key = testing auth_key = testing
There are also options for the conf file for specifying the dispersion coverage There are also options for the conf file for specifying the dispersion coverage
(defaults to 1%), retries, concurrency, CSV output file, etc. though usually (defaults to 1%), retries, concurrency, etc. though usually the defaults are
the defaults are fine. fine.
Once the configuration is in place, run `swift-stats-populate -d` to populate Once the configuration is in place, run `swift-dispersion-populate` to populate
the containers and objects throughout the cluster. the containers and objects throughout the cluster.
Now that those containers and objects are in place, you can run Now that those containers and objects are in place, you can run
`swift-stats-report -d` to get a dispersion report, or the overall health of `swift-dispersion-report` to get a dispersion report, or the overall health of
the cluster. Here is an example of a cluster in perfect health:: the cluster. Here is an example of a cluster in perfect health::
$ swift-stats-report -d $ swift-dispersion-report
Queried 2621 containers for dispersion reporting, 19s, 0 retries Queried 2621 containers for dispersion reporting, 19s, 0 retries
100.00% of container copies found (7863 of 7863) 100.00% of container copies found (7863 of 7863)
Sample represents 1.00% of the container partition space Sample represents 1.00% of the container partition space
@ -195,7 +195,7 @@ that has::
$ swift-ring-builder object.builder set_weight d0 200 $ swift-ring-builder object.builder set_weight d0 200
$ swift-ring-builder object.builder rebalance $ swift-ring-builder object.builder rebalance
... ...
$ swift-stats-report -d $ swift-dispersion-report
Queried 2621 containers for dispersion reporting, 8s, 0 retries Queried 2621 containers for dispersion reporting, 8s, 0 retries
100.00% of container copies found (7863 of 7863) 100.00% of container copies found (7863 of 7863)
Sample represents 1.00% of the container partition space Sample represents 1.00% of the container partition space
@ -212,7 +212,7 @@ is much less. Next, I'll run the replicators to get everything put back into
place and then rerun the dispersion report:: place and then rerun the dispersion report::
... start object replicators and monitor logs until they're caught up ... ... start object replicators and monitor logs until they're caught up ...
$ swift-stats-report -d $ swift-dispersion-report
Queried 2621 containers for dispersion reporting, 17s, 0 retries Queried 2621 containers for dispersion reporting, 17s, 0 retries
100.00% of container copies found (7863 of 7863) 100.00% of container copies found (7863 of 7863)
Sample represents 1.00% of the container partition space Sample represents 1.00% of the container partition space
@ -221,13 +221,6 @@ place and then rerun the dispersion report::
100.00% of object copies found (7857 of 7857) 100.00% of object copies found (7857 of 7857)
Sample represents 1.00% of the object partition space Sample represents 1.00% of the object partition space
So that's a summation of how to use swift-stats-report to monitor the health of
a cluster. There are a few other things it can do, such as performance
monitoring, but those are currently in their infancy and little used. For
instance, you can run `swift-stats-populate -p` and `swift-stats-report -p` to
get performance timings (warning: the initial populate takes a while). These
timings are dumped into a CSV file (/etc/swift/stats.csv by default) and can
then be graphed to see how cluster performance is trending.
------------------------------------ ------------------------------------
Additional Cleanup Script for Swauth Additional Cleanup Script for Swauth

View File

@ -0,0 +1,8 @@
[dispersion]
auth_url = http://saio:8080/auth/v1.0
auth_user = test:tester
auth_key = testing
# swift_dir = /etc/swift
# dispersion_coverage = 1
# retries = 5
# concurrency = 25

View File

@ -1,3 +1,6 @@
# WARNING: The swift-stats-populate and swift-stats-report commands are being
# replaced with swift-dispersion-populate and swift-dispersion-report; you
# should switch to those before the next Swift release.
[stats] [stats]
auth_url = http://saio:8080/auth/v1.0 auth_url = http://saio:8080/auth/v1.0
auth_user = test:tester auth_user = test:tester

View File

@ -90,6 +90,7 @@ setup(
'bin/swift-object-updater', 'bin/swift-proxy-server', 'bin/swift-object-updater', 'bin/swift-proxy-server',
'bin/swift-ring-builder', 'bin/swift-stats-populate', 'bin/swift-ring-builder', 'bin/swift-stats-populate',
'bin/swift-stats-report', 'bin/swift-stats-report',
'bin/swift-dispersion-populate', 'bin/swift-dispersion-report',
'bin/swift-bench', 'bin/swift-bench',
'bin/swift-log-uploader', 'bin/swift-log-uploader',
'bin/swift-log-stats-collector', 'bin/swift-log-stats-collector',

View File

@ -86,7 +86,7 @@ class AccountController(object):
return Response(status='507 %s is not mounted' % drive) return Response(status='507 %s is not mounted' % drive)
broker = self._get_account_broker(drive, part, account) broker = self._get_account_broker(drive, part, account)
if container: # put account container if container: # put account container
if 'x-cf-trans-id' in req.headers: if 'x-trans-id' in req.headers:
broker.pending_timeout = 3 broker.pending_timeout = 3
if req.headers.get('x-account-override-deleted', 'no').lower() != \ if req.headers.get('x-account-override-deleted', 'no').lower() != \
'yes' and broker.is_deleted(): 'yes' and broker.is_deleted():
@ -296,7 +296,7 @@ class AccountController(object):
def __call__(self, env, start_response): def __call__(self, env, start_response):
start_time = time.time() start_time = time.time()
req = Request(env) req = Request(env)
self.logger.txn_id = req.headers.get('x-cf-trans-id', None) self.logger.txn_id = req.headers.get('x-trans-id', None)
if not check_utf8(req.path_info): if not check_utf8(req.path_info):
res = HTTPPreconditionFailed(body='Invalid UTF8') res = HTTPPreconditionFailed(body='Invalid UTF8')
else: else:
@ -319,7 +319,7 @@ class AccountController(object):
time.strftime('%d/%b/%Y:%H:%M:%S +0000', time.gmtime()), time.strftime('%d/%b/%Y:%H:%M:%S +0000', time.gmtime()),
req.method, req.path, req.method, req.path,
res.status.split()[0], res.content_length or '-', res.status.split()[0], res.content_length or '-',
req.headers.get('x-cf-trans-id', '-'), req.headers.get('x-trans-id', '-'),
req.referer or '-', req.user_agent or '-', req.referer or '-', req.user_agent or '-',
trans_time, trans_time,
additional_info) additional_info)

View File

@ -15,6 +15,7 @@
from webob import Request from webob import Request
from webob.exc import HTTPServerError from webob.exc import HTTPServerError
import uuid
from swift.common.utils import get_logger from swift.common.utils import get_logger
@ -29,13 +30,23 @@ class CatchErrorMiddleware(object):
self.logger = get_logger(conf, log_route='catch-errors') self.logger = get_logger(conf, log_route='catch-errors')
def __call__(self, env, start_response): def __call__(self, env, start_response):
trans_id = env.get('HTTP_X_TRANS_ID')
if not trans_id:
trans_id = 'tx' + uuid.uuid4().hex
env['HTTP_X_TRANS_ID'] = trans_id
try: try:
return self.app(env, start_response)
def my_start_response(status, response_headers, exc_info=None):
trans_header = ('x-trans-id', trans_id)
response_headers.append(trans_header)
return start_response(status, response_headers, exc_info)
return self.app(env, my_start_response)
except Exception, err: except Exception, err:
self.logger.exception(_('Error: %s'), err) self.logger.exception(_('Error: %s'), err)
resp = HTTPServerError(request=Request(env), resp = HTTPServerError(request=Request(env),
body='An error occurred', body='An error occurred',
content_type='text/plain') content_type='text/plain')
resp.headers['x-trans-id'] = trans_id
return resp(env, start_response) return resp(env, start_response)

View File

@ -1360,7 +1360,7 @@ class Swauth(object):
getattr(req, 'bytes_transferred', 0) or '-', getattr(req, 'bytes_transferred', 0) or '-',
getattr(response, 'bytes_transferred', 0) or '-', getattr(response, 'bytes_transferred', 0) or '-',
req.headers.get('etag', '-'), req.headers.get('etag', '-'),
req.headers.get('x-cf-trans-id', '-'), logged_headers or '-', req.headers.get('x-trans-id', '-'), logged_headers or '-',
trans_time))) trans_time)))

View File

@ -96,7 +96,7 @@ class ContainerController(object):
'x-delete-timestamp': info['delete_timestamp'], 'x-delete-timestamp': info['delete_timestamp'],
'x-object-count': info['object_count'], 'x-object-count': info['object_count'],
'x-bytes-used': info['bytes_used'], 'x-bytes-used': info['bytes_used'],
'x-cf-trans-id': req.headers.get('X-Cf-Trans-Id', '-')} 'x-trans-id': req.headers.get('x-trans-id', '-')}
if req.headers.get('x-account-override-deleted', 'no').lower() == \ if req.headers.get('x-account-override-deleted', 'no').lower() == \
'yes': 'yes':
account_headers['x-account-override-deleted'] = 'yes' account_headers['x-account-override-deleted'] = 'yes'
@ -385,7 +385,7 @@ class ContainerController(object):
def __call__(self, env, start_response): def __call__(self, env, start_response):
start_time = time.time() start_time = time.time()
req = Request(env) req = Request(env)
self.logger.txn_id = req.headers.get('x-cf-trans-id', None) self.logger.txn_id = req.headers.get('x-trans-id', None)
if not check_utf8(req.path_info): if not check_utf8(req.path_info):
res = HTTPPreconditionFailed(body='Invalid UTF8') res = HTTPPreconditionFailed(body='Invalid UTF8')
else: else:
@ -405,7 +405,7 @@ class ContainerController(object):
time.gmtime()), time.gmtime()),
req.method, req.path, req.method, req.path,
res.status.split()[0], res.content_length or '-', res.status.split()[0], res.content_length or '-',
req.headers.get('x-cf-trans-id', '-'), req.headers.get('x-trans-id', '-'),
req.referer or '-', req.user_agent or '-', req.referer or '-', req.user_agent or '-',
trans_time) trans_time)
if req.method.upper() == 'REPLICATE': if req.method.upper() == 'REPLICATE':

View File

@ -549,7 +549,7 @@ class ObjectController(object):
'x-content-type': file.metadata['Content-Type'], 'x-content-type': file.metadata['Content-Type'],
'x-timestamp': file.metadata['X-Timestamp'], 'x-timestamp': file.metadata['X-Timestamp'],
'x-etag': file.metadata['ETag'], 'x-etag': file.metadata['ETag'],
'x-cf-trans-id': request.headers.get('x-cf-trans-id', '-')}, 'x-trans-id': request.headers.get('x-trans-id', '-')},
device) device)
resp = HTTPCreated(request=request, etag=etag) resp = HTTPCreated(request=request, etag=etag)
return resp return resp
@ -686,7 +686,7 @@ class ObjectController(object):
file.unlinkold(metadata['X-Timestamp']) file.unlinkold(metadata['X-Timestamp'])
self.container_update('DELETE', account, container, obj, self.container_update('DELETE', account, container, obj,
request.headers, {'x-timestamp': metadata['X-Timestamp'], request.headers, {'x-timestamp': metadata['X-Timestamp'],
'x-cf-trans-id': request.headers.get('x-cf-trans-id', '-')}, 'x-trans-id': request.headers.get('x-trans-id', '-')},
device) device)
resp = response_class(request=request) resp = response_class(request=request)
return resp return resp
@ -719,7 +719,7 @@ class ObjectController(object):
"""WSGI Application entry point for the Swift Object Server.""" """WSGI Application entry point for the Swift Object Server."""
start_time = time.time() start_time = time.time()
req = Request(env) req = Request(env)
self.logger.txn_id = req.headers.get('x-cf-trans-id', None) self.logger.txn_id = req.headers.get('x-trans-id', None)
if not check_utf8(req.path_info): if not check_utf8(req.path_info):
res = HTTPPreconditionFailed(body='Invalid UTF8') res = HTTPPreconditionFailed(body='Invalid UTF8')
else: else:
@ -740,7 +740,7 @@ class ObjectController(object):
time.gmtime()), time.gmtime()),
req.method, req.path, res.status.split()[0], req.method, req.path, res.status.split()[0],
res.content_length or '-', req.referer or '-', res.content_length or '-', req.referer or '-',
req.headers.get('x-cf-trans-id', '-'), req.headers.get('x-trans-id', '-'),
req.user_agent or '-', req.user_agent or '-',
trans_time) trans_time)
if req.method == 'REPLICATE': if req.method == 'REPLICATE':

View File

@ -358,7 +358,7 @@ class Controller(object):
result_code = 0 result_code = 0
attempts_left = self.app.account_ring.replica_count attempts_left = self.app.account_ring.replica_count
path = '/%s' % account path = '/%s' % account
headers = {'x-cf-trans-id': self.trans_id} headers = {'x-trans-id': self.trans_id}
for node in self.iter_nodes(partition, nodes, self.app.account_ring): for node in self.iter_nodes(partition, nodes, self.app.account_ring):
try: try:
with ConnectionTimeout(self.app.conn_timeout): with ConnectionTimeout(self.app.conn_timeout):
@ -430,7 +430,7 @@ class Controller(object):
write_acl = None write_acl = None
container_size = None container_size = None
attempts_left = self.app.container_ring.replica_count attempts_left = self.app.container_ring.replica_count
headers = {'x-cf-trans-id': self.trans_id} headers = {'x-trans-id': self.trans_id}
for node in self.iter_nodes(partition, nodes, self.app.container_ring): for node in self.iter_nodes(partition, nodes, self.app.container_ring):
try: try:
with ConnectionTimeout(self.app.conn_timeout): with ConnectionTimeout(self.app.conn_timeout):
@ -1227,7 +1227,7 @@ class ContainerController(Controller):
headers = [] headers = []
for account in accounts: for account in accounts:
nheaders = {'X-Timestamp': normalize_timestamp(time.time()), nheaders = {'X-Timestamp': normalize_timestamp(time.time()),
'x-cf-trans-id': self.trans_id, 'x-trans-id': self.trans_id,
'X-Account-Host': '%(ip)s:%(port)s' % account, 'X-Account-Host': '%(ip)s:%(port)s' % account,
'X-Account-Partition': account_partition, 'X-Account-Partition': account_partition,
'X-Account-Device': account['device']} 'X-Account-Device': account['device']}
@ -1255,7 +1255,7 @@ class ContainerController(Controller):
container_partition, containers = self.app.container_ring.get_nodes( container_partition, containers = self.app.container_ring.get_nodes(
self.account_name, self.container_name) self.account_name, self.container_name)
headers = {'X-Timestamp': normalize_timestamp(time.time()), headers = {'X-Timestamp': normalize_timestamp(time.time()),
'x-cf-trans-id': self.trans_id} 'x-trans-id': self.trans_id}
headers.update(value for value in req.headers.iteritems() headers.update(value for value in req.headers.iteritems()
if value[0].lower() in self.pass_through_headers or if value[0].lower() in self.pass_through_headers or
value[0].lower().startswith('x-container-meta-')) value[0].lower().startswith('x-container-meta-'))
@ -1278,7 +1278,7 @@ class ContainerController(Controller):
headers = [] headers = []
for account in accounts: for account in accounts:
headers.append({'X-Timestamp': normalize_timestamp(time.time()), headers.append({'X-Timestamp': normalize_timestamp(time.time()),
'X-Cf-Trans-Id': self.trans_id, 'X-Trans-Id': self.trans_id,
'X-Account-Host': '%(ip)s:%(port)s' % account, 'X-Account-Host': '%(ip)s:%(port)s' % account,
'X-Account-Partition': account_partition, 'X-Account-Partition': account_partition,
'X-Account-Device': account['device']}) 'X-Account-Device': account['device']})
@ -1323,7 +1323,7 @@ class AccountController(Controller):
account_partition, accounts = \ account_partition, accounts = \
self.app.account_ring.get_nodes(self.account_name) self.app.account_ring.get_nodes(self.account_name)
headers = {'X-Timestamp': normalize_timestamp(time.time()), headers = {'X-Timestamp': normalize_timestamp(time.time()),
'x-cf-trans-id': self.trans_id} 'x-trans-id': self.trans_id}
headers.update(value for value in req.headers.iteritems() headers.update(value for value in req.headers.iteritems()
if value[0].lower().startswith('x-account-meta-')) if value[0].lower().startswith('x-account-meta-'))
if self.app.memcache: if self.app.memcache:
@ -1340,7 +1340,7 @@ class AccountController(Controller):
account_partition, accounts = \ account_partition, accounts = \
self.app.account_ring.get_nodes(self.account_name) self.app.account_ring.get_nodes(self.account_name)
headers = {'X-Timestamp': normalize_timestamp(time.time()), headers = {'X-Timestamp': normalize_timestamp(time.time()),
'X-CF-Trans-Id': self.trans_id} 'X-Trans-Id': self.trans_id}
headers.update(value for value in req.headers.iteritems() headers.update(value for value in req.headers.iteritems()
if value[0].lower().startswith('x-account-meta-')) if value[0].lower().startswith('x-account-meta-'))
if self.app.memcache: if self.app.memcache:
@ -1357,7 +1357,7 @@ class AccountController(Controller):
account_partition, accounts = \ account_partition, accounts = \
self.app.account_ring.get_nodes(self.account_name) self.app.account_ring.get_nodes(self.account_name)
headers = {'X-Timestamp': normalize_timestamp(time.time()), headers = {'X-Timestamp': normalize_timestamp(time.time()),
'X-CF-Trans-Id': self.trans_id} 'X-Trans-Id': self.trans_id}
if self.app.memcache: if self.app.memcache:
self.app.memcache.delete('account%s' % req.path_info.rstrip('/')) self.app.memcache.delete('account%s' % req.path_info.rstrip('/'))
return self.make_requests(req, self.app.account_ring, return self.make_requests(req, self.app.account_ring,
@ -1473,8 +1473,6 @@ class BaseApplication(object):
def update_request(self, req): def update_request(self, req):
req.bytes_transferred = '-' req.bytes_transferred = '-'
req.client_disconnect = False req.client_disconnect = False
if 'x-cf-trans-id' not in req.headers:
req.headers['x-cf-trans-id'] = 'tx' + str(uuid.uuid4())
if 'x-storage-token' in req.headers and \ if 'x-storage-token' in req.headers and \
'x-auth-token' not in req.headers: 'x-auth-token' not in req.headers:
req.headers['x-auth-token'] = req.headers['x-storage-token'] req.headers['x-auth-token'] = req.headers['x-storage-token']
@ -1498,8 +1496,8 @@ class BaseApplication(object):
return HTTPPreconditionFailed(request=req, body='Bad URL') return HTTPPreconditionFailed(request=req, body='Bad URL')
controller = controller(self, **path_parts) controller = controller(self, **path_parts)
controller.trans_id = req.headers.get('x-cf-trans-id', '-') controller.trans_id = req.headers.get('x-trans-id', '-')
self.logger.txn_id = req.headers.get('x-cf-trans-id', None) self.logger.txn_id = req.headers.get('x-trans-id', None)
try: try:
handler = getattr(controller, req.method) handler = getattr(controller, req.method)
if not getattr(handler, 'publicly_accessible'): if not getattr(handler, 'publicly_accessible'):
@ -1579,7 +1577,7 @@ class Application(BaseApplication):
getattr(req, 'bytes_transferred', 0) or '-', getattr(req, 'bytes_transferred', 0) or '-',
getattr(response, 'bytes_transferred', 0) or '-', getattr(response, 'bytes_transferred', 0) or '-',
req.headers.get('etag', '-'), req.headers.get('etag', '-'),
req.headers.get('x-cf-trans-id', '-'), req.headers.get('x-trans-id', '-'),
logged_headers or '-', logged_headers or '-',
trans_time, trans_time,
))) )))

View File

@ -45,5 +45,28 @@ class TestCatchErrors(unittest.TestCase):
resp = app(req.environ, start_response) resp = app(req.environ, start_response)
self.assertEquals(resp, ['An error occurred']) self.assertEquals(resp, ['An error occurred'])
def test_trans_id_header(self):
def start_response(status, headers):
self.assert_('x-trans-id' in (x[0] for x in headers))
app = catch_errors.CatchErrorMiddleware(FakeApp(), {})
req = Request.blank('/v1/a')
app(req.environ, start_response)
app = catch_errors.CatchErrorMiddleware(FakeApp(), {})
req = Request.blank('/v1/a/c')
app(req.environ, start_response)
app = catch_errors.CatchErrorMiddleware(FakeApp(), {})
req = Request.blank('/v1/a/c/o')
app(req.environ, start_response)
app = catch_errors.CatchErrorMiddleware(FakeApp(True), {})
req = Request.blank('/v1/a')
app(req.environ, start_response)
app = catch_errors.CatchErrorMiddleware(FakeApp(True), {})
req = Request.blank('/v1/a/c')
app(req.environ, start_response)
app = catch_errors.CatchErrorMiddleware(FakeApp(True), {})
req = Request.blank('/v1/a/c/o')
app(req.environ, start_response)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -124,7 +124,7 @@ def setup():
for node in nodes: for node in nodes:
conn = proxy_server.http_connect(node['ip'], node['port'], conn = proxy_server.http_connect(node['ip'], node['port'],
node['device'], partition, 'PUT', '/a', node['device'], partition, 'PUT', '/a',
{'X-Timestamp': ts, 'X-CF-Trans-Id': 'test'}) {'X-Timestamp': ts, 'x-trans-id': 'test'})
resp = conn.getresponse() resp = conn.getresponse()
assert(resp.status == 201) assert(resp.status == 201)
# Create container # Create container