From a622349eda31fb20340497d881d2df058c7a5cc6 Mon Sep 17 00:00:00 2001 From: David Goetz Date: Wed, 6 Feb 2013 10:57:17 -0800 Subject: [PATCH] Use a doubled proxy-logging instead of each middleware handling it differently (if at all) Adding a swift.source to wsgi pre_auth funcs and all middleware that makes subrequests to proxy server. NOTE: This change will result in a change in the number of proxy logs made for staticweb, formpost, tempurl, and any other middleware that performs sub requests (including swauth and SOS). Please see docs for details. DocImpact Change-Id: I80cf2806add1c3d34054147e2515944be340455b --- etc/proxy-server.conf-sample | 16 +-- swift/common/middleware/bulk.py | 44 +++++-- swift/common/middleware/container_quotas.py | 3 +- swift/common/middleware/formpost.py | 56 ++------- swift/common/middleware/proxy_logging.py | 25 +++- swift/common/middleware/staticweb.py | 111 +++--------------- swift/common/middleware/tempauth.py | 49 +------- swift/common/middleware/tempurl.py | 48 +------- swift/common/wsgi.py | 13 +- swift/proxy/controllers/base.py | 5 +- test/unit/common/middleware/test_staticweb.py | 27 ----- test/unit/common/test_wsgi.py | 14 +++ 12 files changed, 119 insertions(+), 292 deletions(-) diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index f7a7a32aff..5b656dcc10 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -33,7 +33,7 @@ # eventlet_debug = false [pipeline:main] -pipeline = catch_errors healthcheck cache ratelimit tempauth container-quotas proxy-logging proxy-server +pipeline = catch_errors healthcheck proxy-logging cache ratelimit tempauth container-quotas proxy-logging proxy-server [app:proxy-server] use = egg:swift#proxy @@ -265,15 +265,6 @@ use = egg:swift#cname_lookup use = egg:swift#staticweb # Seconds to cache container x-container-meta-web-* header values. # cache_timeout = 300 -# You can override the default log routing for this filter here: -# set log_name = staticweb -# set log_facility = LOG_LOCAL0 -# set log_level = INFO -# set log_address = /dev/log -# set access_log_name = staticweb -# set access_log_facility = LOG_LOCAL0 -# set access_log_level = INFO -# set log_headers = False # Note: Put tempurl just before your auth filter(s) in the pipeline [filter:tempurl] @@ -331,6 +322,11 @@ use = egg:swift#proxy_logging # What HTTP methods are allowed for StatsD logging (comma-sep); request methods # not in this list will have "BAD_METHOD" for the portion of the metric. # log_statsd_valid_http_methods = GET,HEAD,POST,PUT,DELETE,COPY,OPTIONS +# Note: The double proxy-logging in the pipeline is not a mistake. The +# left-most proxy-logging is there to log requests that were handled in +# middleware and never made it through to the right-most middleware (and +# proxy server). Double logging is prevented for normal requests. See +# proxy-logging docs. # Note: Put before both ratelimit and auth in the pipeline. [filter:bulk] diff --git a/swift/common/middleware/bulk.py b/swift/common/middleware/bulk.py index d5a9a2ab8a..bccaca389e 100644 --- a/swift/common/middleware/bulk.py +++ b/swift/common/middleware/bulk.py @@ -79,24 +79,43 @@ class Bulk(object): Acceptable formats are text/plain, application/json, application/xml, and text/xml. + There are proxy logs created for each file (which becomes a subrequest) in + the tar. The subrequest's proxy log will have a swift.source set to "EA" + the log's content length will reflect the unzipped size of the file. If + double proxy-logging is used the leftmost logger will not have a + swift.source set and the content length will reflect the size of the + payload sent to the proxy (the unexpanded size of the tar.gz). + Bulk Delete: - Will delete multiple objects from their account with a single request. - Responds to DELETE requests with query parameter ?bulk-delete set. - The Content-Type should be set to text/plain. The body of the DELETE - request will be a newline separated list of url encoded objects to delete. - You can only delete 1000 (configurable) objects per request. The objects - specified in the DELETE request body must be URL encoded and in the form: + Will delete multiple objects or containers from their account with a + single request. Responds to DELETE requests with query parameter + ?bulk-delete set. The Content-Type should be set to text/plain. + The body of the DELETE request will be a newline separated list of url + encoded objects to delete. You can only delete 1000 (configurable) objects + per request. The objects specified in the DELETE request body must be URL + encoded and in the form: /container_name/obj_name - If all objects were successfully deleted (or did not exist), will return an - HTTPOk. If any objects failed to delete, will return an HTTPBadGateway. In - both cases the response body will specify the number of objects - successfully deleted, not found, and a list of the objects that failed. + or for a container (which must be empty at time of delete) + + /container_name + + If all items were successfully deleted (or did not exist), will return an + HTTPOk. If any failed to delete, will return an HTTPBadGateway. In + both cases the response body will specify the number of items + successfully deleted, not found, and a list of those that failed. The return body will be formatted in the way specified in the request's Accept header. Acceptable formats are text/plain, application/json, - apllication/xml, and text/xml. + application/xml, and text/xml. + + There are proxy logs created for each object or container (which becomes a + subrequest) that is deleted. The subrequest's proxy log will have a + swift.source set to "BD" the log's content length of 0. If double + proxy-logging is used the leftmost logger will not have a + swift.source set and the content length will reflect the size of the + payload sent to the proxy (the list of objects/containers to be deleted). """ def __init__(self, app, conf): @@ -117,6 +136,7 @@ class Bulk(object): """ new_env = req.environ.copy() new_env['PATH_INFO'] = container_path + new_env['swift.source'] = 'EA' create_cont_req = Request.blank(container_path, environ=new_env) resp = create_cont_req.get_response(self.app) if resp.status_int // 100 != 2: @@ -231,6 +251,7 @@ class Bulk(object): new_env['CONTENT_LENGTH'] = 0 new_env['HTTP_USER_AGENT'] = \ '%s BulkDelete' % req.environ.get('HTTP_USER_AGENT') + new_env['swift.source'] = 'BD' delete_obj_req = Request.blank(delete_path, new_env) resp = delete_obj_req.get_response(self.app) if resp.status_int // 100 == 2: @@ -337,6 +358,7 @@ class Bulk(object): new_env['wsgi.input'] = tar_file new_env['PATH_INFO'] = destination new_env['CONTENT_LENGTH'] = tar_info.size + new_env['swift.source'] = 'EA' new_env['HTTP_USER_AGENT'] = \ '%s BulkExpand' % req.environ.get('HTTP_USER_AGENT') create_obj_req = Request.blank(destination, new_env) diff --git a/swift/common/middleware/container_quotas.py b/swift/common/middleware/container_quotas.py index a9504f9516..0c0d539d00 100644 --- a/swift/common/middleware/container_quotas.py +++ b/swift/common/middleware/container_quotas.py @@ -80,7 +80,8 @@ class ContainerQuotaMiddleware(object): # check user uploads against quotas elif obj and req.method == 'PUT': - container_info = get_container_info(req.environ, self.app) + container_info = get_container_info( + req.environ, self.app, swift_source='CQ') if not container_info or not is_success(container_info['status']): # this will hopefully 404 later return self.app diff --git a/swift/common/middleware/formpost.py b/swift/common/middleware/formpost.py index 9dee49c902..92042052ec 100644 --- a/swift/common/middleware/formpost.py +++ b/swift/common/middleware/formpost.py @@ -109,7 +109,7 @@ from StringIO import StringIO from time import gmtime, strftime, time from urllib import quote, unquote -from swift.common.utils import get_logger, streq_const_time +from swift.common.utils import streq_const_time from swift.common.wsgi import make_pre_authed_env from swift.common.http import HTTP_BAD_REQUEST @@ -285,6 +285,9 @@ class FormPost(object): See above for a full description. + The proxy logs created for any subrequests made will have swift.source set + to "FP". + :param app: The next WSGI filter or app in the paste.deploy chain. :param conf: The configuration dict for the middleware. @@ -295,10 +298,6 @@ class FormPost(object): self.app = app #: The filter configuration dict. self.conf = conf - #: The logger to use with this middleware. - self.logger = get_logger(conf, log_route='formpost') - #: The HTTP user agent to use with subrequests. - self.agent = '%(orig)s FormPost' def __call__(self, env, start_response): """ @@ -314,13 +313,12 @@ class FormPost(object): _parse_attrs(env.get('CONTENT_TYPE') or '') if content_type == 'multipart/form-data' and \ 'boundary' in attrs: + env['HTTP_USER_AGENT'] += ' FormPost' status, headers, body = self._translate_form( env, attrs['boundary']) - self._log_request(env, int(status.split(' ', 1)[0])) start_response(status, headers) return body except (FormInvalid, EOFError), err: - self._log_request(env, HTTP_BAD_REQUEST) body = 'FormPost: %s' % err start_response( '400 Bad Request', @@ -413,7 +411,8 @@ class FormPost(object): max_file_size = int(attributes.get('max_file_size') or 0) except ValueError: raise FormInvalid('max_file_size not an integer') - subenv = make_pre_authed_env(orig_env, 'PUT', agent=self.agent) + subenv = make_pre_authed_env(orig_env, 'PUT', agent=None, + swift_source='FP') subenv['HTTP_TRANSFER_ENCODING'] = 'chunked' subenv['wsgi.input'] = _CappedFileLikeObject(fp, max_file_size) if subenv['PATH_INFO'][-1] != '/' and \ @@ -471,7 +470,7 @@ class FormPost(object): key = memcache.get('temp-url-key/%s' % account) if not key: newenv = make_pre_authed_env(env, 'HEAD', '/v1/' + account, - self.agent) + agent=None, swift_source='FP') newenv['CONTENT_LENGTH'] = '0' newenv['wsgi.input'] = StringIO('') key = [None] @@ -491,45 +490,6 @@ class FormPost(object): memcache.set('temp-url-key/%s' % account, key, timeout=60) return key - def _log_request(self, env, response_status_int): - """ - Used when a request might not be logged by the underlying - WSGI application, but we'd still like to record what - happened. An early 401 Unauthorized is a good example of - this. - - :param env: The WSGI environment for the request. - :param response_status_int: The HTTP status we'll be replying - to the request with. - """ - the_request = quote(unquote(env.get('PATH_INFO') or '/')) - if env.get('QUERY_STRING'): - the_request = the_request + '?' + env['QUERY_STRING'] - client = env.get('HTTP_X_CLUSTER_CLIENT_IP') - if not client and 'HTTP_X_FORWARDED_FOR' in env: - # remote host for other lbs - client = env['HTTP_X_FORWARDED_FOR'].split(',')[0].strip() - if not client: - client = env.get('REMOTE_ADDR') - self.logger.info(' '.join(quote(str(x)) for x in ( - client or '-', - env.get('REMOTE_ADDR') or '-', - strftime('%d/%b/%Y/%H/%M/%S', gmtime()), - env.get('REQUEST_METHOD') or 'GET', - the_request, - env.get('SERVER_PROTOCOL') or '1.0', - response_status_int, - env.get('HTTP_REFERER') or '-', - (env.get('HTTP_USER_AGENT') or '-') + ' FormPOST', - env.get('HTTP_X_AUTH_TOKEN') or '-', - '-', - '-', - '-', - env.get('swift.trans_id') or '-', - '-', - '-', - ))) - def filter_factory(global_conf, **local_conf): """ Returns the WSGI filter for use with paste.deploy. """ diff --git a/swift/common/middleware/proxy_logging.py b/swift/common/middleware/proxy_logging.py index 15e6bde16c..0407bb1dc0 100644 --- a/swift/common/middleware/proxy_logging.py +++ b/swift/common/middleware/proxy_logging.py @@ -35,6 +35,27 @@ be separated with a simple .split() * Values that are missing (e.g. due to a header not being present) or zero are generally represented by a single hyphen ('-'). + +The proxy-logging can be used twice in the proxy server's pipeline when there +is middleware installed that can return custom responses that don't follow the +standard pipeline to the proxy server. + +For example, with staticweb, the middleware might intercept a request to +/v1/AUTH_acc/cont/, make a subrequest to the proxy to retrieve +/v1/AUTH_acc/cont/index.html and, in effect, respond to the client's original +request using the 2nd request's body. In this instance the subrequest will be +logged by the rightmost middleware (with a swift.source set) and the outgoing +request (with body overridden) will be logged by leftmost middleware. + +Requests that follow the normal pipeline (use the same wsgi environment +throughout) will not be double logged because an environment variable +(swift.proxy_access_log_made) is checked/set when a log is made. + +All middleware making subrequests should take care to set swift.source when +needed. With the doubled proxy logs, any consumer/processor of swift's proxy +logs should look at the swift.source field, the rightmost log value, to decide +if this is a middleware subrequest or not. A log processor calculating +bandwidth usage will want to only sum up logs with no swift.source. """ import time @@ -56,7 +77,6 @@ class ProxyLoggingMiddleware(object): self.log_hdrs = config_true_value(conf.get( 'access_log_headers', conf.get('log_headers', 'no'))) - # The leading access_* check is in case someone assumes that # log_statsd_valid_http_methods behaves like the other log_statsd_* # settings. @@ -89,6 +109,8 @@ class ProxyLoggingMiddleware(object): :param bytes_sent: bytes yielded to the WSGI server :param request_time: time taken to satisfy the request, in seconds """ + if env.get('swift.proxy_access_log_made'): + return req = Request(env) if client_disconnect: # log disconnected clients as '499' status code status_int = 499 @@ -122,6 +144,7 @@ class ProxyLoggingMiddleware(object): '%.4f' % request_time, req.environ.get('swift.source'), ))) + env['swift.proxy_access_log_made'] = True # Log timing and bytes-transfered data to StatsD if req.path.startswith('/v1/'): try: diff --git a/swift/common/middleware/staticweb.py b/swift/common/middleware/staticweb.py index 5944af2961..c62b5ad96b 100644 --- a/swift/common/middleware/staticweb.py +++ b/swift/common/middleware/staticweb.py @@ -28,8 +28,8 @@ added. For example:: ... [pipeline:main] - pipeline = catch_errors healthcheck cache ratelimit tempauth staticweb - proxy-logging proxy-server + pipeline = catch_errors healthcheck proxy-logging cache ratelimit tempauth + staticweb proxy-logging proxy-server ... @@ -37,14 +37,6 @@ added. For example:: use = egg:swift#staticweb # Seconds to cache container x-container-meta-web-* header values. # cache_timeout = 300 - # You can override the default log routing for this filter here: - # set log_name = staticweb - # set log_facility = LOG_LOCAL0 - # set log_level = INFO - # set access_log_name = staticweb - # set access_log_facility = LOG_LOCAL0 - # set access_log_level = INFO - # set log_headers = False Any publicly readable containers (for example, ``X-Container-Read: .r:*``, see `acls`_ for more information on this) will be checked for @@ -115,8 +107,8 @@ import time from urllib import unquote, quote as urllib_quote -from swift.common.utils import cache_from_env, get_logger, human_readable, \ - split_path, config_true_value, json +from swift.common.utils import cache_from_env, human_readable, split_path, \ + config_true_value, json from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request, \ WSGIContext from swift.common.http import is_success, is_redirection, HTTP_NOT_FOUND @@ -151,9 +143,6 @@ class _StaticWebContext(WSGIContext): self.obj = obj self.app = staticweb.app self.cache_timeout = staticweb.cache_timeout - self.logger = staticweb.logger - self.access_logger = staticweb.access_logger - self.log_headers = staticweb.log_headers self.agent = '%(orig)s StaticWeb' # Results from the last call to self._get_container_info. self._index = self._error = self._listings = self._listings_css = None @@ -167,7 +156,6 @@ class _StaticWebContext(WSGIContext): :param env: The original request WSGI environment. :param start_response: The WSGI start_response hook. """ - self._log_response(env, self._get_status_int()) if not self._error: start_response(self._response_status, self._response_headers, self._response_exc_info) @@ -179,7 +167,7 @@ class _StaticWebContext(WSGIContext): env, 'GET', '/%s/%s/%s/%s%s' % ( self.version, self.account, self.container, self._get_status_int(), self._error), - self.agent)) + self.agent, swift_source='SW')) if is_success(self._get_status_int()): start_response(save_response_status, self._response_headers, self._response_exc_info) @@ -210,7 +198,7 @@ class _StaticWebContext(WSGIContext): resp = make_pre_authed_request( env, 'HEAD', '/%s/%s/%s' % ( self.version, self.account, self.container), - agent=self.agent).get_response(self.app) + agent=self.agent, swift_source='SW').get_response(self.app) if is_success(resp.status_int): self._index = \ resp.headers.get('x-container-meta-web-index', '').strip() @@ -241,7 +229,7 @@ class _StaticWebContext(WSGIContext): tmp_env = make_pre_authed_env( env, 'GET', '/%s/%s/%s' % ( self.version, self.account, self.container), - self.agent) + self.agent, swift_source='SW') tmp_env['QUERY_STRING'] = 'delimiter=/&format=json' if prefix: tmp_env['QUERY_STRING'] += '&prefix=%s' % quote(prefix) @@ -321,7 +309,6 @@ class _StaticWebContext(WSGIContext): ' \n' \ '\n' resp = Response(headers=headers, body=body) - self._log_response(env, resp.status_int) return resp(env, start_response) def _build_css_path(self, prefix=''): @@ -352,13 +339,13 @@ class _StaticWebContext(WSGIContext): if env['PATH_INFO'][-1] != '/': resp = HTTPMovedPermanently( location=(env['PATH_INFO'] + '/')) - self._log_response(env, resp.status_int) return resp(env, start_response) if not self._index: return self._listing(env, start_response) tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') + tmp_env['swift.source'] = 'SW' tmp_env['PATH_INFO'] += self._index resp = self._app_call(tmp_env) status_int = self._get_status_int() @@ -382,6 +369,7 @@ class _StaticWebContext(WSGIContext): tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') + tmp_env['swift.source'] = 'SW' resp = self._app_call(tmp_env) status_int = self._get_status_int() if is_success(status_int) or is_redirection(status_int): @@ -398,6 +386,7 @@ class _StaticWebContext(WSGIContext): tmp_env = dict(env) tmp_env['HTTP_USER_AGENT'] = \ '%s StaticWeb' % env.get('HTTP_USER_AGENT') + tmp_env['swift.source'] = 'SW' if tmp_env['PATH_INFO'][-1] != '/': tmp_env['PATH_INFO'] += '/' tmp_env['PATH_INFO'] += self._index @@ -407,7 +396,6 @@ class _StaticWebContext(WSGIContext): if env['PATH_INFO'][-1] != '/': resp = HTTPMovedPermanently( location=env['PATH_INFO'] + '/') - self._log_response(env, resp.status_int) return resp(env, start_response) start_response(self._response_status, self._response_headers, self._response_exc_info) @@ -417,7 +405,7 @@ class _StaticWebContext(WSGIContext): tmp_env = make_pre_authed_env( env, 'GET', '/%s/%s/%s' % ( self.version, self.account, self.container), - self.agent) + self.agent, swift_source='SW') tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \ '=/&limit=1&prefix=%s' % quote(self.obj + '/') resp = self._app_call(tmp_env) @@ -427,77 +415,18 @@ class _StaticWebContext(WSGIContext): resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) resp = HTTPMovedPermanently(location=env['PATH_INFO'] + '/') - self._log_response(env, resp.status_int) return resp(env, start_response) return self._listing(env, start_response, self.obj) - def _log_response(self, env, status_int): - """ - Logs an access line for StaticWeb responses; use when the next app in - the pipeline will not be handling the final response to the remote - user. - - Assumes that the request and response bodies are 0 bytes or very near 0 - so no bytes transferred are tracked or logged. - - This does mean that the listings responses that actually do transfer - content will not be logged with any bytes transferred, but in counter - to that the full bytes for the underlying listing will be logged by the - proxy even if the remote client disconnects early for the StaticWeb - listing. - - I didn't think the extra complexity of getting the bytes transferred - exactly correct for these requests was worth it, but perhaps someone - else will think it is. - - To get things exact, this filter would need to use an - eventlet.posthooks logger like the proxy does and any log processing - systems would need to ignore some (but not all) proxy requests made by - StaticWeb if they were just interested in the bytes transferred to the - remote client. - """ - trans_time = '%.4f' % (time.time() - - env.get('staticweb.start_time', time.time())) - the_request = quote(unquote(env['PATH_INFO'])) - if env.get('QUERY_STRING'): - the_request = the_request + '?' + env['QUERY_STRING'] - # remote user for zeus - client = env.get('HTTP_X_CLUSTER_CLIENT_IP') - if not client and 'HTTP_X_FORWARDED_FOR' in env: - # remote user for other lbs - client = env['HTTP_X_FORWARDED_FOR'].split(',')[0].strip() - logged_headers = None - if self.log_headers: - headers = [] - for k, v in env.iteritems(): - if k.startswith('HTTP_'): - k = k[len('HTTP_'):].replace('_', '-').title() - headers.append((k, v)) - logged_headers = '\n'.join('%s: %s' % (k, v) for k, v in headers) - self.access_logger.info(' '.join(quote(str(x)) for x in ( - client or '-', - env.get('REMOTE_ADDR', '-'), - time.strftime('%d/%b/%Y/%H/%M/%S', time.gmtime()), - env['REQUEST_METHOD'], - the_request, - env['SERVER_PROTOCOL'], - status_int, - env.get('HTTP_REFERER', '-'), - env.get('HTTP_USER_AGENT', '-'), - env.get('HTTP_X_AUTH_TOKEN', '-'), - '-', - '-', - env.get('HTTP_ETAG', '-'), - env.get('swift.trans_id', '-'), - logged_headers or '-', - trans_time))) - class StaticWeb(object): """ The Static Web WSGI middleware filter; serves container data as a static web site. See `staticweb`_ for an overview. + The proxy logs created for any subrequests made will have swift.source set + to "SW". + :param app: The next WSGI application/filter in the paste.deploy pipeline. :param conf: The filter configuration dict. """ @@ -509,18 +438,6 @@ class StaticWeb(object): self.conf = conf #: The seconds to cache the x-container-meta-web-* headers., self.cache_timeout = int(conf.get('cache_timeout', 300)) - #: Logger for this filter. - self.logger = get_logger(conf, log_route='staticweb') - access_log_conf = {} - for key in ('log_facility', 'log_name', 'log_level'): - value = conf.get('access_' + key, conf.get(key, None)) - if value: - access_log_conf[key] = value - #: Web access logger for this filter. - self.access_logger = get_logger(access_log_conf, - log_route='staticweb-access') - #: Indicates whether full HTTP headers should be logged or not. - self.log_headers = config_true_value(conf.get('log_headers', 'no')) def __call__(self, env, start_response): """ diff --git a/swift/common/middleware/tempauth.py b/swift/common/middleware/tempauth.py index 6e945861d2..12987564be 100644 --- a/swift/common/middleware/tempauth.py +++ b/swift/common/middleware/tempauth.py @@ -309,18 +309,7 @@ class TempAuth(object): if 'x-storage-token' in req.headers and \ 'x-auth-token' not in req.headers: req.headers['x-auth-token'] = req.headers['x-storage-token'] - if 'eventlet.posthooks' in env: - env['eventlet.posthooks'].append( - (self.posthooklogger, (req,), {})) - return self.handle_request(req)(env, start_response) - else: - # Lack of posthook support means that we have to log on the - # start of the response, rather than after all the data has - # been sent. This prevents logging client disconnects - # differently than full transmissions. - response = self.handle_request(req)(env, start_response) - self.posthooklogger(env, req) - return response + return self.handle_request(req)(env, start_response) except (Exception, Timeout): print "EXCEPTION IN handle: %s: %s" % (format_exc(), env) self.logger.increment('errors') @@ -466,42 +455,6 @@ class TempAuth(object): resp.headers['x-storage-url'] = url return resp - def posthooklogger(self, env, req): - if not req.path.startswith(self.auth_prefix): - return - response = getattr(req, 'response', None) - if not response: - return - trans_time = '%.4f' % (time() - req.start_time) - the_request = quote(unquote(req.path)) - if req.query_string: - the_request = the_request + '?' + req.query_string - # remote user for zeus - client = req.headers.get('x-cluster-client-ip') - if not client and 'x-forwarded-for' in req.headers: - # remote user for other lbs - client = req.headers['x-forwarded-for'].split(',')[0].strip() - logged_headers = None - if self.log_headers: - logged_headers = '\n'.join('%s: %s' % (k, v) - for k, v in req.headers.items()) - status_int = response.status_int - if getattr(req, 'client_disconnect', False) or \ - getattr(response, 'client_disconnect', False): - status_int = HTTP_CLIENT_CLOSED_REQUEST - self.logger.info( - ' '.join(quote(str(x)) for x in (client or '-', - req.remote_addr or '-', strftime('%d/%b/%Y/%H/%M/%S', gmtime()), - req.method, the_request, req.environ['SERVER_PROTOCOL'], - status_int, req.referer or '-', req.user_agent or '-', - req.headers.get('x-auth-token', - req.headers.get('x-auth-admin-user', '-')), - getattr(req, 'bytes_transferred', 0) or '-', - getattr(response, 'bytes_transferred', 0) or '-', - req.headers.get('etag', '-'), - req.environ.get('swift.trans_id', '-'), logged_headers or '-', - trans_time))) - def filter_factory(global_conf, **local_conf): """Returns a WSGI filter app for use with paste.deploy.""" diff --git a/swift/common/middleware/tempurl.py b/swift/common/middleware/tempurl.py index 0c87536117..b5988b0d5d 100644 --- a/swift/common/middleware/tempurl.py +++ b/swift/common/middleware/tempurl.py @@ -86,7 +86,6 @@ from time import gmtime, strftime, time from urllib import quote, unquote from urlparse import parse_qs -from swift.common.utils import get_logger from swift.common.wsgi import make_pre_authed_env from swift.common.http import HTTP_UNAUTHORIZED @@ -151,6 +150,9 @@ class TempURL(object): '*' to indicate a prefix match. Default: x-object-meta-public-* + The proxy logs created for any subrequests made will have swift.source set + to "FP". + :param app: The next WSGI filter or app in the paste.deploy chain. :param conf: The configuration dict for the middleware. @@ -161,8 +163,6 @@ class TempURL(object): self.app = app #: The filter configuration dict. self.conf = conf - #: The logger to use with this middleware. - self.logger = get_logger(conf, log_route='tempurl') headers = DEFAULT_INCOMING_REMOVE_HEADERS if 'incoming_remove_headers' in conf: @@ -326,7 +326,7 @@ class TempURL(object): key = memcache.get('temp-url-key/%s' % account) if not key: newenv = make_pre_authed_env(env, 'HEAD', '/v1/' + account, - self.agent) + self.agent, swift_source='TU') newenv['CONTENT_LENGTH'] = '0' newenv['wsgi.input'] = StringIO('') key = [None] @@ -378,7 +378,6 @@ class TempURL(object): :param start_response: The WSGI start_response hook. :returns: 401 response as per WSGI. """ - self._log_request(env, HTTP_UNAUTHORIZED) body = '401 Unauthorized: Temp URL invalid\n' start_response('401 Unauthorized', [('Content-Type', 'text/plain'), @@ -444,45 +443,6 @@ class TempURL(object): del headers[h] return headers.items() - def _log_request(self, env, response_status_int): - """ - Used when a request might not be logged by the underlying - WSGI application, but we'd still like to record what - happened. An early 401 Unauthorized is a good example of - this. - - :param env: The WSGI environment for the request. - :param response_status_int: The HTTP status we'll be replying - to the request with. - """ - the_request = quote(unquote(env.get('PATH_INFO') or '/')) - if env.get('QUERY_STRING'): - the_request = the_request + '?' + env['QUERY_STRING'] - client = env.get('HTTP_X_CLUSTER_CLIENT_IP') - if not client and 'HTTP_X_FORWARDED_FOR' in env: - # remote host for other lbs - client = env['HTTP_X_FORWARDED_FOR'].split(',')[0].strip() - if not client: - client = env.get('REMOTE_ADDR') - self.logger.info(' '.join(quote(str(x)) for x in ( - client or '-', - env.get('REMOTE_ADDR') or '-', - strftime('%d/%b/%Y/%H/%M/%S', gmtime()), - env.get('REQUEST_METHOD') or 'GET', - the_request, - env.get('SERVER_PROTOCOL') or '1.0', - response_status_int, - env.get('HTTP_REFERER') or '-', - (env.get('HTTP_USER_AGENT') or '-') + ' TempURL', - env.get('HTTP_X_AUTH_TOKEN') or '-', - '-', - '-', - '-', - env.get('swift.trans_id') or '-', - '-', - '-', - ))) - def filter_factory(global_conf, **local_conf): """ Returns the WSGI filter for use with paste.deploy. """ diff --git a/swift/common/wsgi.py b/swift/common/wsgi.py index 29e3f1b0b6..f8a4ad3e24 100644 --- a/swift/common/wsgi.py +++ b/swift/common/wsgi.py @@ -300,7 +300,7 @@ class WSGIContext(object): def make_pre_authed_request(env, method=None, path=None, body=None, - headers=None, agent='Swift'): + headers=None, agent='Swift', swift_source=None): """ Makes a new swob.Request based on the current env but with the parameters specified. Note that this request will be preauthorized. @@ -322,13 +322,16 @@ def make_pre_authed_request(env, method=None, path=None, body=None, '%(orig)s StaticWeb'. You also set agent to None to use the original env's HTTP_USER_AGENT or '' to have no HTTP_USER_AGENT. + :param swift_source: Used to mark the request as originating out of + middleware. Will be logged in proxy logs. :returns: Fresh swob.Request object. """ query_string = None if path and '?' in path: path, query_string = path.split('?', 1) newenv = make_pre_authed_env(env, method, path=unquote(path), agent=agent, - query_string=query_string) + query_string=query_string, + swift_source=swift_source) if not headers: headers = {} if body: @@ -338,7 +341,7 @@ def make_pre_authed_request(env, method=None, path=None, body=None, def make_pre_authed_env(env, method=None, path=None, agent='Swift', - query_string=None): + query_string=None, swift_source=None): """ Returns a new fresh WSGI environment with escalated privileges to do backend checks, listings, etc. that the remote user wouldn't @@ -361,6 +364,8 @@ def make_pre_authed_env(env, method=None, path=None, agent='Swift', '%(orig)s StaticWeb'. You also set agent to None to use the original env's HTTP_USER_AGENT or '' to have no HTTP_USER_AGENT. + :param swift_source: Used to mark the request as originating out of + middleware. Will be logged in proxy logs. :returns: Fresh WSGI environment. """ newenv = {} @@ -383,6 +388,8 @@ def make_pre_authed_env(env, method=None, path=None, agent='Swift', agent % {'orig': env.get('HTTP_USER_AGENT', '')}).strip() elif agent == '' and 'HTTP_USER_AGENT' in newenv: del newenv['HTTP_USER_AGENT'] + if swift_source: + newenv['swift.source'] = swift_source newenv['swift.authorize'] = lambda req: None newenv['swift.authorize_override'] = True newenv['REMOTE_USER'] = '.wsgi.pre_authed' diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py index f440bd2c6c..88571f3beb 100644 --- a/swift/proxy/controllers/base.py +++ b/swift/proxy/controllers/base.py @@ -190,7 +190,7 @@ def cors_validation(func): return wrapped -def get_container_info(env, app): +def get_container_info(env, app, swift_source=None): """ Get the info structure for a container, based on env and app. This is useful to middlewares. @@ -208,7 +208,8 @@ def get_container_info(env, app): container_info = cache.get(cache_key) if not container_info: resp = make_pre_authed_request( - env, 'HEAD', '/%s/%s/%s' % (version, account, container) + env, 'HEAD', '/%s/%s/%s' % (version, account, container), + swift_source=swift_source, ).get_response(app) container_info = headers_to_container_info( resp.headers, resp.status_int) diff --git a/test/unit/common/middleware/test_staticweb.py b/test/unit/common/middleware/test_staticweb.py index 9b006bdea7..00ad6773db 100644 --- a/test/unit/common/middleware/test_staticweb.py +++ b/test/unit/common/middleware/test_staticweb.py @@ -624,33 +624,6 @@ class TestStaticWeb(unittest.TestCase): self.assertEquals(resp.body, '1') self.assertEquals(self.app.calls, 1) - def test_log_headers(self): - # Using a listing request since we know that calls StaticWeb's logging - # routines - self.test_staticweb.access_logger = FakeLogger() - self.test_staticweb.log_headers = True - req = Request.blank('/v1/a/c3/subdir/', - headers={'test-header': 'test-value'}) - resp = req.get_response(self.test_staticweb) - self.assertEquals(resp.status_int, 200) - self.assert_('Listing of /v1/a/c3/subdir/' in resp.body) - infos = self.test_staticweb.access_logger.log_dict['info'] - self.assertEquals(len(infos), 1) - info = infos[0][0][0] - self.assertTrue('Test-Header%3A%20test-value' in info, repr(info)) - - self.test_staticweb.access_logger = FakeLogger() - self.test_staticweb.log_headers = False - req = Request.blank('/v1/a/c3/subdir/', - headers={'test-header': 'test-value'}) - resp = req.get_response(self.test_staticweb) - self.assertEquals(resp.status_int, 200) - self.assert_('Listing of /v1/a/c3/subdir/' in resp.body) - infos = self.test_staticweb.access_logger.log_dict['info'] - self.assertEquals(len(infos), 1) - info = infos[0][0][0] - self.assertTrue('Test-Header%3A%20test-value' not in info, repr(info)) - if __name__ == '__main__': unittest.main() diff --git a/test/unit/common/test_wsgi.py b/test/unit/common/test_wsgi.py index aee62556a7..d020212f2d 100644 --- a/test/unit/common/test_wsgi.py +++ b/test/unit/common/test_wsgi.py @@ -187,6 +187,14 @@ class TestWSGI(unittest.TestCase): self.assertTrue('wsgi.input' in newenv) self.assertEquals(newenv['wsgi.input'].read(), '') + oldenv = {'swift.source': 'UT'} + newenv = wsgi.make_pre_authed_env(oldenv) + self.assertEquals(newenv['swift.source'], 'UT') + + oldenv = {'swift.source': 'UT'} + newenv = wsgi.make_pre_authed_env(oldenv, swift_source='SA') + self.assertEquals(newenv['swift.source'], 'SA') + def test_pre_auth_req(self): class FakeReq(object): @classmethod @@ -237,6 +245,12 @@ class TestWSGI(unittest.TestCase): self.assertEquals(e['SCRIPT_NAME'], '') self.assertEquals(e['PATH_INFO'], '/override') + def test_pre_auth_req_swift_source(self): + r = wsgi.make_pre_authed_request( + {'QUERY_STRING': 'original'}, 'GET', 'path', 'the body', + swift_source='UT') + self.assertEquals(r.body, 'the body') + self.assertEquals(r.environ['swift.source'], 'UT') class TestWSGIContext(unittest.TestCase):