diff --git a/swift/common/http_protocol.py b/swift/common/http_protocol.py new file mode 100644 index 0000000000..59d7767dea --- /dev/null +++ b/swift/common/http_protocol.py @@ -0,0 +1,246 @@ +# Copyright (c) 2010-2022 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from eventlet import wsgi, websocket +import six + +from swift.common.swob import wsgi_quote, wsgi_unquote, \ + wsgi_quote_plus, wsgi_unquote_plus, wsgi_to_bytes, bytes_to_wsgi + + +class SwiftHttpProtocol(wsgi.HttpProtocol): + default_request_version = "HTTP/1.0" + + def __init__(self, *args, **kwargs): + # See https://github.com/eventlet/eventlet/pull/590 + self.pre_shutdown_bugfix_eventlet = not getattr( + websocket.WebSocketWSGI, '_WSGI_APP_ALWAYS_IDLE', None) + # Note this is not a new-style class, so super() won't work + wsgi.HttpProtocol.__init__(self, *args, **kwargs) + + def log_request(self, *a): + """ + Turn off logging requests by the underlying WSGI software. + """ + pass + + def log_message(self, f, *a): + """ + Redirect logging other messages by the underlying WSGI software. + """ + logger = getattr(self.server.app, 'logger', None) + if logger: + logger.error('ERROR WSGI: ' + f, *a) + else: + # eventlet<=0.17.4 doesn't have an error method, and in newer + # versions the output from error is same as info anyway + self.server.log.info('ERROR WSGI: ' + f, *a) + + class MessageClass(wsgi.HttpProtocol.MessageClass): + '''Subclass to see when the client didn't provide a Content-Type''' + # for py2: + def parsetype(self): + if self.typeheader is None: + self.typeheader = '' + wsgi.HttpProtocol.MessageClass.parsetype(self) + + # for py3: + def get_default_type(self): + '''If the client didn't provide a content type, leave it blank.''' + return '' + + def parse_request(self): + # Need to track the bytes-on-the-wire for S3 signatures -- eventlet + # would do it for us, but since we rewrite the path on py3, we need to + # fix it ourselves later. + self.__raw_path_info = None + + if not six.PY2: + # request lines *should* be ascii per the RFC, but historically + # we've allowed (and even have func tests that use) arbitrary + # bytes. This breaks on py3 (see https://bugs.python.org/issue33973 + # ) but the work-around is simple: munge the request line to be + # properly quoted. + if self.raw_requestline.count(b' ') >= 2: + parts = self.raw_requestline.split(b' ', 2) + path, q, query = parts[1].partition(b'?') + self.__raw_path_info = path + # unquote first, so we don't over-quote something + # that was *correctly* quoted + path = wsgi_to_bytes(wsgi_quote(wsgi_unquote( + bytes_to_wsgi(path)))) + query = b'&'.join( + sep.join([ + wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus( + bytes_to_wsgi(key)))), + wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus( + bytes_to_wsgi(val)))) + ]) + for part in query.split(b'&') + for key, sep, val in (part.partition(b'='), )) + parts[1] = path + q + query + self.raw_requestline = b' '.join(parts) + # else, mangled protocol, most likely; let base class deal with it + return wsgi.HttpProtocol.parse_request(self) + + if not six.PY2: + def get_environ(self, *args, **kwargs): + environ = wsgi.HttpProtocol.get_environ(self, *args, **kwargs) + environ['RAW_PATH_INFO'] = bytes_to_wsgi( + self.__raw_path_info) + header_payload = self.headers.get_payload() + if isinstance(header_payload, list) and len(header_payload) == 1: + header_payload = header_payload[0].get_payload() + if header_payload: + # This shouldn't be here. We must've bumped up against + # https://bugs.python.org/issue37093 + headers_raw = list(environ['headers_raw']) + for line in header_payload.rstrip('\r\n').split('\n'): + if ':' not in line or line[:1] in ' \t': + # Well, we're no more broken than we were before... + # Should we support line folding? + # Should we 400 a bad header line? + break + header, value = line.split(':', 1) + value = value.strip(' \t\n\r') + # NB: Eventlet looks at the headers obj to figure out + # whether the client said the connection should close; + # see https://github.com/eventlet/eventlet/blob/v0.25.0/ + # eventlet/wsgi.py#L504 + self.headers.add_header(header, value) + headers_raw.append((header, value)) + wsgi_key = 'HTTP_' + header.replace('-', '_').encode( + 'latin1').upper().decode('latin1') + if wsgi_key in ('HTTP_CONTENT_LENGTH', + 'HTTP_CONTENT_TYPE'): + wsgi_key = wsgi_key[5:] + environ[wsgi_key] = value + environ['headers_raw'] = tuple(headers_raw) + # Since we parsed some more headers, check to see if they + # change how our wsgi.input should behave + te = environ.get('HTTP_TRANSFER_ENCODING', '').lower() + if te.rsplit(',', 1)[-1].strip() == 'chunked': + environ['wsgi.input'].chunked_input = True + else: + length = environ.get('CONTENT_LENGTH') + if length: + length = int(length) + environ['wsgi.input'].content_length = length + if environ.get('HTTP_EXPECT', '').lower() == '100-continue': + environ['wsgi.input'].wfile = self.wfile + environ['wsgi.input'].wfile_line = \ + b'HTTP/1.1 100 Continue\r\n' + return environ + + def _read_request_line(self): + # Note this is not a new-style class, so super() won't work + got = wsgi.HttpProtocol._read_request_line(self) + # See https://github.com/eventlet/eventlet/pull/590 + if self.pre_shutdown_bugfix_eventlet: + self.conn_state[2] = wsgi.STATE_REQUEST + return got + + def handle_one_request(self): + # Note this is not a new-style class, so super() won't work + got = wsgi.HttpProtocol.handle_one_request(self) + # See https://github.com/eventlet/eventlet/pull/590 + if self.pre_shutdown_bugfix_eventlet: + if self.conn_state[2] != wsgi.STATE_CLOSE: + self.conn_state[2] = wsgi.STATE_IDLE + return got + + +class SwiftHttpProxiedProtocol(SwiftHttpProtocol): + """ + Protocol object that speaks HTTP, including multiple requests, but with + a single PROXY line as the very first thing coming in over the socket. + This is so we can learn what the client's IP address is when Swift is + behind a TLS terminator, like hitch, that does not understand HTTP and + so cannot add X-Forwarded-For or other similar headers. + + See http://www.haproxy.org/download/1.7/doc/proxy-protocol.txt for + protocol details. + """ + def __init__(self, *a, **kw): + self.proxy_address = None + SwiftHttpProtocol.__init__(self, *a, **kw) + + def handle_error(self, connection_line): + if not six.PY2: + connection_line = connection_line.decode('latin-1') + + # No further processing will proceed on this connection under any + # circumstances. We always send the request into the superclass to + # handle any cleanup - this ensures that the request will not be + # processed. + self.rfile.close() + # We don't really have any confidence that an HTTP Error will be + # processable by the client as our transmission broken down between + # ourselves and our gateway proxy before processing the client + # protocol request. Hopefully the operator will know what to do! + msg = 'Invalid PROXY line %r' % connection_line + self.log_message(msg) + # Even assuming HTTP we don't even known what version of HTTP the + # client is sending? This entire endeavor seems questionable. + self.request_version = self.default_request_version + # appease http.server + self.command = 'PROXY' + self.send_error(400, msg) + + def handle(self): + """Handle multiple requests if necessary.""" + # ensure the opening line for the connection is a valid PROXY protcol + # line; this is the only IO we do on this connection before any + # additional wrapping further pollutes the raw socket. + connection_line = self.rfile.readline(self.server.url_length_limit) + + if not connection_line.startswith(b'PROXY '): + return self.handle_error(connection_line) + + proxy_parts = connection_line.strip(b'\r\n').split(b' ') + if proxy_parts[1].startswith(b'UNKNOWN'): + # "UNKNOWN", in PROXY protocol version 1, means "not + # TCP4 or TCP6". This includes completely legitimate + # things like QUIC or Unix domain sockets. The PROXY + # protocol (section 2.1) states that the receiver + # (that's us) MUST ignore anything after "UNKNOWN" and + # before the CRLF, essentially discarding the first + # line. + pass + elif proxy_parts[1] in (b'TCP4', b'TCP6') and len(proxy_parts) == 6: + if six.PY2: + self.client_address = (proxy_parts[2], proxy_parts[4]) + self.proxy_address = (proxy_parts[3], proxy_parts[5]) + else: + self.client_address = ( + proxy_parts[2].decode('latin-1'), + proxy_parts[4].decode('latin-1')) + self.proxy_address = ( + proxy_parts[3].decode('latin-1'), + proxy_parts[5].decode('latin-1')) + else: + self.handle_error(connection_line) + + return SwiftHttpProtocol.handle(self) + + def get_environ(self, *args, **kwargs): + environ = SwiftHttpProtocol.get_environ(self, *args, **kwargs) + if self.proxy_address: + environ['SERVER_ADDR'] = self.proxy_address[0] + environ['SERVER_PORT'] = self.proxy_address[1] + if self.proxy_address[1] == '443': + environ['wsgi.url_scheme'] = 'https' + environ['HTTPS'] = 'on' + return environ diff --git a/swift/common/wsgi.py b/swift/common/wsgi.py index facad25e16..41d09f6502 100644 --- a/swift/common/wsgi.py +++ b/swift/common/wsgi.py @@ -28,8 +28,7 @@ import time import eventlet import eventlet.debug -from eventlet import greenio, GreenPool, sleep, wsgi, listen, Timeout, \ - websocket +from eventlet import greenio, GreenPool, sleep, wsgi, listen, Timeout from paste.deploy import loadwsgi from eventlet.green import socket, ssl, os as green_os from io import BytesIO @@ -38,9 +37,10 @@ import six from six import StringIO from swift.common import utils, constraints +from swift.common.http_protocol import SwiftHttpProtocol, \ + SwiftHttpProxiedProtocol from swift.common.storage_policy import BindPortsCache -from swift.common.swob import Request, wsgi_quote, wsgi_unquote, \ - wsgi_quote_plus, wsgi_unquote_plus, wsgi_to_bytes, bytes_to_wsgi +from swift.common.swob import Request, wsgi_unquote from swift.common.utils import capture_stdio, disable_fallocate, \ drop_privileges, get_logger, NullLogger, config_true_value, \ validate_configuration, get_hub, config_auto_int_value, \ @@ -384,232 +384,6 @@ def load_app_config(conf_file): return app_conf -class SwiftHttpProtocol(wsgi.HttpProtocol): - default_request_version = "HTTP/1.0" - - def __init__(self, *args, **kwargs): - # See https://github.com/eventlet/eventlet/pull/590 - self.pre_shutdown_bugfix_eventlet = not getattr( - websocket.WebSocketWSGI, '_WSGI_APP_ALWAYS_IDLE', None) - # Note this is not a new-style class, so super() won't work - wsgi.HttpProtocol.__init__(self, *args, **kwargs) - - def log_request(self, *a): - """ - Turn off logging requests by the underlying WSGI software. - """ - pass - - def log_message(self, f, *a): - """ - Redirect logging other messages by the underlying WSGI software. - """ - logger = getattr(self.server.app, 'logger', None) - if logger: - logger.error('ERROR WSGI: ' + f, *a) - else: - # eventlet<=0.17.4 doesn't have an error method, and in newer - # versions the output from error is same as info anyway - self.server.log.info('ERROR WSGI: ' + f, *a) - - class MessageClass(wsgi.HttpProtocol.MessageClass): - '''Subclass to see when the client didn't provide a Content-Type''' - # for py2: - def parsetype(self): - if self.typeheader is None: - self.typeheader = '' - wsgi.HttpProtocol.MessageClass.parsetype(self) - - # for py3: - def get_default_type(self): - '''If the client didn't provide a content type, leave it blank.''' - return '' - - def parse_request(self): - # Need to track the bytes-on-the-wire for S3 signatures -- eventlet - # would do it for us, but since we rewrite the path on py3, we need to - # fix it ourselves later. - self.__raw_path_info = None - - if not six.PY2: - # request lines *should* be ascii per the RFC, but historically - # we've allowed (and even have func tests that use) arbitrary - # bytes. This breaks on py3 (see https://bugs.python.org/issue33973 - # ) but the work-around is simple: munge the request line to be - # properly quoted. - if self.raw_requestline.count(b' ') >= 2: - parts = self.raw_requestline.split(b' ', 2) - path, q, query = parts[1].partition(b'?') - self.__raw_path_info = path - # unquote first, so we don't over-quote something - # that was *correctly* quoted - path = wsgi_to_bytes(wsgi_quote(wsgi_unquote( - bytes_to_wsgi(path)))) - query = b'&'.join( - sep.join([ - wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus( - bytes_to_wsgi(key)))), - wsgi_to_bytes(wsgi_quote_plus(wsgi_unquote_plus( - bytes_to_wsgi(val)))) - ]) - for part in query.split(b'&') - for key, sep, val in (part.partition(b'='), )) - parts[1] = path + q + query - self.raw_requestline = b' '.join(parts) - # else, mangled protocol, most likely; let base class deal with it - return wsgi.HttpProtocol.parse_request(self) - - if not six.PY2: - def get_environ(self, *args, **kwargs): - environ = wsgi.HttpProtocol.get_environ(self, *args, **kwargs) - environ['RAW_PATH_INFO'] = bytes_to_wsgi( - self.__raw_path_info) - header_payload = self.headers.get_payload() - if isinstance(header_payload, list) and len(header_payload) == 1: - header_payload = header_payload[0].get_payload() - if header_payload: - # This shouldn't be here. We must've bumped up against - # https://bugs.python.org/issue37093 - headers_raw = list(environ['headers_raw']) - for line in header_payload.rstrip('\r\n').split('\n'): - if ':' not in line or line[:1] in ' \t': - # Well, we're no more broken than we were before... - # Should we support line folding? - # Should we 400 a bad header line? - break - header, value = line.split(':', 1) - value = value.strip(' \t\n\r') - # NB: Eventlet looks at the headers obj to figure out - # whether the client said the connection should close; - # see https://github.com/eventlet/eventlet/blob/v0.25.0/ - # eventlet/wsgi.py#L504 - self.headers.add_header(header, value) - headers_raw.append((header, value)) - wsgi_key = 'HTTP_' + header.replace('-', '_').encode( - 'latin1').upper().decode('latin1') - if wsgi_key in ('HTTP_CONTENT_LENGTH', - 'HTTP_CONTENT_TYPE'): - wsgi_key = wsgi_key[5:] - environ[wsgi_key] = value - environ['headers_raw'] = tuple(headers_raw) - # Since we parsed some more headers, check to see if they - # change how our wsgi.input should behave - te = environ.get('HTTP_TRANSFER_ENCODING', '').lower() - if te.rsplit(',', 1)[-1].strip() == 'chunked': - environ['wsgi.input'].chunked_input = True - else: - length = environ.get('CONTENT_LENGTH') - if length: - length = int(length) - environ['wsgi.input'].content_length = length - if environ.get('HTTP_EXPECT', '').lower() == '100-continue': - environ['wsgi.input'].wfile = self.wfile - environ['wsgi.input'].wfile_line = \ - b'HTTP/1.1 100 Continue\r\n' - return environ - - def _read_request_line(self): - # Note this is not a new-style class, so super() won't work - got = wsgi.HttpProtocol._read_request_line(self) - # See https://github.com/eventlet/eventlet/pull/590 - if self.pre_shutdown_bugfix_eventlet: - self.conn_state[2] = wsgi.STATE_REQUEST - return got - - def handle_one_request(self): - # Note this is not a new-style class, so super() won't work - got = wsgi.HttpProtocol.handle_one_request(self) - # See https://github.com/eventlet/eventlet/pull/590 - if self.pre_shutdown_bugfix_eventlet: - if self.conn_state[2] != wsgi.STATE_CLOSE: - self.conn_state[2] = wsgi.STATE_IDLE - return got - - -class SwiftHttpProxiedProtocol(SwiftHttpProtocol): - """ - Protocol object that speaks HTTP, including multiple requests, but with - a single PROXY line as the very first thing coming in over the socket. - This is so we can learn what the client's IP address is when Swift is - behind a TLS terminator, like hitch, that does not understand HTTP and - so cannot add X-Forwarded-For or other similar headers. - - See http://www.haproxy.org/download/1.7/doc/proxy-protocol.txt for - protocol details. - """ - def __init__(self, *a, **kw): - self.proxy_address = None - SwiftHttpProtocol.__init__(self, *a, **kw) - - def handle_error(self, connection_line): - if not six.PY2: - connection_line = connection_line.decode('latin-1') - - # No further processing will proceed on this connection under any - # circumstances. We always send the request into the superclass to - # handle any cleanup - this ensures that the request will not be - # processed. - self.rfile.close() - # We don't really have any confidence that an HTTP Error will be - # processable by the client as our transmission broken down between - # ourselves and our gateway proxy before processing the client - # protocol request. Hopefully the operator will know what to do! - msg = 'Invalid PROXY line %r' % connection_line - self.log_message(msg) - # Even assuming HTTP we don't even known what version of HTTP the - # client is sending? This entire endeavor seems questionable. - self.request_version = self.default_request_version - # appease http.server - self.command = 'PROXY' - self.send_error(400, msg) - - def handle(self): - """Handle multiple requests if necessary.""" - # ensure the opening line for the connection is a valid PROXY protcol - # line; this is the only IO we do on this connection before any - # additional wrapping further pollutes the raw socket. - connection_line = self.rfile.readline(self.server.url_length_limit) - - if not connection_line.startswith(b'PROXY '): - return self.handle_error(connection_line) - - proxy_parts = connection_line.strip(b'\r\n').split(b' ') - if proxy_parts[1].startswith(b'UNKNOWN'): - # "UNKNOWN", in PROXY protocol version 1, means "not - # TCP4 or TCP6". This includes completely legitimate - # things like QUIC or Unix domain sockets. The PROXY - # protocol (section 2.1) states that the receiver - # (that's us) MUST ignore anything after "UNKNOWN" and - # before the CRLF, essentially discarding the first - # line. - pass - elif proxy_parts[1] in (b'TCP4', b'TCP6') and len(proxy_parts) == 6: - if six.PY2: - self.client_address = (proxy_parts[2], proxy_parts[4]) - self.proxy_address = (proxy_parts[3], proxy_parts[5]) - else: - self.client_address = ( - proxy_parts[2].decode('latin-1'), - proxy_parts[4].decode('latin-1')) - self.proxy_address = ( - proxy_parts[3].decode('latin-1'), - proxy_parts[5].decode('latin-1')) - else: - self.handle_error(connection_line) - - return SwiftHttpProtocol.handle(self) - - def get_environ(self, *args, **kwargs): - environ = SwiftHttpProtocol.get_environ(self, *args, **kwargs) - if self.proxy_address: - environ['SERVER_ADDR'] = self.proxy_address[0] - environ['SERVER_PORT'] = self.proxy_address[1] - if self.proxy_address[1] == '443': - environ['wsgi.url_scheme'] = 'https' - environ['HTTPS'] = 'on' - return environ - - def run_server(conf, logger, sock, global_conf=None, ready_callback=None, allow_modify_pipeline=True): # Ensure TZ environment variable exists to avoid stat('/etc/localtime') on diff --git a/test/functional/__init__.py b/test/functional/__init__.py index 4b5f42e55d..a17054cfeb 100644 --- a/test/functional/__init__.py +++ b/test/functional/__init__.py @@ -54,7 +54,8 @@ from test.unit import SkipTest from swift.common import constraints, utils, ring, storage_policy from swift.common.ring import Ring -from swift.common.wsgi import loadapp, SwiftHttpProtocol +from swift.common.http_protocol import SwiftHttpProtocol +from swift.common.wsgi import loadapp from swift.common.utils import config_true_value, split_path from swift.account import server as account_server from swift.container import server as container_server diff --git a/test/unit/common/test_http_protocol.py b/test/unit/common/test_http_protocol.py new file mode 100644 index 0000000000..24e5225b23 --- /dev/null +++ b/test/unit/common/test_http_protocol.py @@ -0,0 +1,352 @@ +# Copyright (c) 2010-2022 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from argparse import Namespace +from io import BytesIO +import mock +import types +import unittest +import eventlet.wsgi +import six +from swift.common import http_protocol, swob + + +class TestSwiftHttpProtocol(unittest.TestCase): + def _proto_obj(self): + # Make an object we can exercise... note the base class's __init__() + # does a bunch of work, so we just new up an object like eventlet.wsgi + # does. + proto_class = http_protocol.SwiftHttpProtocol + try: + the_obj = types.InstanceType(proto_class) + except AttributeError: + the_obj = proto_class.__new__(proto_class) + # Install some convenience mocks + the_obj.server = Namespace(app=Namespace(logger=mock.Mock()), + url_length_limit=777, + log=mock.Mock()) + the_obj.send_error = mock.Mock() + + return the_obj + + def test_swift_http_protocol_log_request(self): + proto_obj = self._proto_obj() + self.assertEqual(None, proto_obj.log_request('ignored')) + + def test_swift_http_protocol_log_message(self): + proto_obj = self._proto_obj() + + proto_obj.log_message('a%sc', 'b') + self.assertEqual([mock.call.error('ERROR WSGI: a%sc', 'b')], + proto_obj.server.app.logger.mock_calls) + + def test_swift_http_protocol_log_message_no_logger(self): + # If the app somehow had no logger attribute or it was None, don't blow + # up + proto_obj = self._proto_obj() + delattr(proto_obj.server.app, 'logger') + + proto_obj.log_message('a%sc', 'b') + self.assertEqual([mock.call.info('ERROR WSGI: a%sc', 'b')], + proto_obj.server.log.mock_calls) + + proto_obj.server.log.reset_mock() + proto_obj.server.app.logger = None + + proto_obj.log_message('a%sc', 'b') + self.assertEqual([mock.call.info('ERROR WSGI: a%sc', 'b')], + proto_obj.server.log.mock_calls) + + def test_swift_http_protocol_parse_request_no_proxy(self): + proto_obj = self._proto_obj() + proto_obj.raw_requestline = b'jimmy jam' + proto_obj.client_address = ('a', '123') + + self.assertEqual(False, proto_obj.parse_request()) + + self.assertEqual([ + mock.call(400, "Bad HTTP/0.9 request type ('jimmy')"), + ], proto_obj.send_error.mock_calls) + self.assertEqual(('a', '123'), proto_obj.client_address) + + def test_request_line_cleanup(self): + def do_test(line_from_socket, expected_line=None): + if expected_line is None: + expected_line = line_from_socket + + proto_obj = self._proto_obj() + proto_obj.raw_requestline = line_from_socket + with mock.patch('swift.common.http_protocol.wsgi.HttpProtocol') \ + as mock_super: + proto_obj.parse_request() + + self.assertEqual([mock.call.parse_request(proto_obj)], + mock_super.mock_calls) + self.assertEqual(proto_obj.raw_requestline, expected_line) + + do_test(b'GET / HTTP/1.1') + do_test(b'GET /%FF HTTP/1.1') + + if not six.PY2: + do_test(b'GET /\xff HTTP/1.1', b'GET /%FF HTTP/1.1') + do_test(b'PUT /Here%20Is%20A%20SnowMan:\xe2\x98\x83 HTTP/1.0', + b'PUT /Here%20Is%20A%20SnowMan%3A%E2%98%83 HTTP/1.0') + do_test( + b'POST /?and%20it=fixes+params&' + b'PALMTREE=\xf0%9f\x8c%b4 HTTP/1.1', + b'POST /?and+it=fixes+params&PALMTREE=%F0%9F%8C%B4 HTTP/1.1') + + +class ProtocolTest(unittest.TestCase): + def _run_bytes_through_protocol(self, bytes_from_client): + rfile = BytesIO(bytes_from_client) + wfile = BytesIO() + + # All this fakery is needed to make the WSGI server process one + # connection, possibly with multiple requests, in the main + # greenthread. It doesn't hurt correctness if the function is called + # in a separate greenthread, but it makes using the debugger harder. + class FakeGreenthread(object): + def link(self, a_callable, *args): + a_callable(self, *args) + + class FakePool(object): + def spawn(self, a_callable, *args, **kwargs): + a_callable(*args, **kwargs) + return FakeGreenthread() + + def spawn_n(self, a_callable, *args, **kwargs): + a_callable(*args, **kwargs) + + def waitall(self): + pass + + addr = ('127.0.0.1', 8359) + fake_tcp_socket = mock.Mock( + setsockopt=lambda *a: None, + makefile=lambda mode, bufsize: rfile if 'r' in mode else wfile, + getsockname=lambda *a: addr + ) + fake_listen_socket = mock.Mock( + accept=mock.MagicMock( + side_effect=[[fake_tcp_socket, addr], + # KeyboardInterrupt breaks the WSGI server out of + # its infinite accept-process-close loop. + KeyboardInterrupt]), + getsockname=lambda *a: addr) + del fake_listen_socket.do_handshake + + # If we let the WSGI server close rfile/wfile then we can't access + # their contents any more. + with mock.patch.object(wfile, 'close', lambda: None), \ + mock.patch.object(rfile, 'close', lambda: None): + eventlet.wsgi.server( + fake_listen_socket, self.app, + protocol=self.protocol_class, + custom_pool=FakePool(), + log_output=False, # quiet the test run + ) + return wfile.getvalue() + + +class TestSwiftHttpProtocolSomeMore(ProtocolTest): + protocol_class = http_protocol.SwiftHttpProtocol + + @staticmethod + def app(env, start_response): + start_response("200 OK", []) + return [swob.wsgi_to_bytes(env['RAW_PATH_INFO'])] + + def test_simple(self): + bytes_out = self._run_bytes_through_protocol(( + b"GET /someurl HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + )) + + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check + self.assertEqual(lines[-1], b'/someurl') + + def test_quoted(self): + bytes_out = self._run_bytes_through_protocol(( + b"GET /some%fFpath%D8%AA HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + )) + + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check + self.assertEqual(lines[-1], b'/some%fFpath%D8%AA') + + def test_messy(self): + bytes_out = self._run_bytes_through_protocol(( + b"GET /oh\xffboy%what$now%E2%80%bd HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + )) + + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertEqual(lines[-1], b'/oh\xffboy%what$now%E2%80%bd') + + +class TestProxyProtocol(ProtocolTest): + protocol_class = http_protocol.SwiftHttpProxiedProtocol + + @staticmethod + def app(env, start_response): + start_response("200 OK", []) + body = '\r\n'.join([ + 'got addr: %s %s' % ( + env.get("REMOTE_ADDR", ""), + env.get("REMOTE_PORT", "")), + 'on addr: %s %s' % ( + env.get("SERVER_ADDR", ""), + env.get("SERVER_PORT", "")), + 'https is %s (scheme %s)' % ( + env.get("HTTPS", ""), + env.get("wsgi.url_scheme", "")), + ]) + '\r\n' + return [body.encode("utf-8")] + + def test_request_with_proxy(self): + bytes_out = self._run_bytes_through_protocol(( + b"PROXY TCP4 192.168.0.1 192.168.0.11 56423 4433\r\n" + b"GET /someurl HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + )) + + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check + self.assertEqual(lines[-3:], [ + b"got addr: 192.168.0.1 56423", + b"on addr: 192.168.0.11 4433", + b"https is (scheme http)", + ]) + + def test_request_with_proxy_https(self): + bytes_out = self._run_bytes_through_protocol(( + b"PROXY TCP4 192.168.0.1 192.168.0.11 56423 443\r\n" + b"GET /someurl HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + )) + + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check + self.assertEqual(lines[-3:], [ + b"got addr: 192.168.0.1 56423", + b"on addr: 192.168.0.11 443", + b"https is on (scheme https)", + ]) + + def test_multiple_requests_with_proxy(self): + bytes_out = self._run_bytes_through_protocol(( + b"PROXY TCP4 192.168.0.1 192.168.0.11 56423 443\r\n" + b"GET /someurl HTTP/1.1\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + b"GET /otherurl HTTP/1.1\r\n" + b"User-Agent: something or other\r\n" + b"Connection: close\r\n" + b"\r\n" + )) + + lines = bytes_out.split(b"\r\n") + self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check + + # the address in the PROXY line is applied to every request + addr_lines = [l for l in lines if l.startswith(b"got addr")] + self.assertEqual(addr_lines, [b"got addr: 192.168.0.1 56423"] * 2) + addr_lines = [l for l in lines if l.startswith(b"on addr")] + self.assertEqual(addr_lines, [b"on addr: 192.168.0.11 443"] * 2) + addr_lines = [l for l in lines if l.startswith(b"https is")] + self.assertEqual(addr_lines, [b"https is on (scheme https)"] * 2) + + def test_missing_proxy_line(self): + bytes_out = self._run_bytes_through_protocol(( + # whoops, no PROXY line here + b"GET /someurl HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + )) + + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertIn(b"400 Invalid PROXY line", lines[0]) + + def test_malformed_proxy_lines(self): + for bad_line in [b'PROXY jojo', + b'PROXYjojo a b c d e', + b'PROXY a b c d e', # bad INET protocol and family + ]: + bytes_out = self._run_bytes_through_protocol(bad_line) + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertIn(b"400 Invalid PROXY line", lines[0]) + + def test_unknown_client_addr(self): + # For "UNKNOWN", the rest of the line before the CRLF may be omitted by + # the sender, and the receiver must ignore anything presented before + # the CRLF is found. + for unknown_line in [b'PROXY UNKNOWN', # mimimal valid unknown + b'PROXY UNKNOWNblahblah', # also valid + b'PROXY UNKNOWN a b c d']: + bytes_out = self._run_bytes_through_protocol(( + unknown_line + (b"\r\n" + b"GET /someurl HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n") + )) + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertIn(b"200 OK", lines[0]) + + def test_address_and_environ(self): + # Make an object we can exercise... note the base class's __init__() + # does a bunch of work, so we just new up an object like eventlet.wsgi + # does. + dummy_env = {'OTHER_ENV_KEY': 'OTHER_ENV_VALUE'} + mock_protocol = mock.Mock(get_environ=lambda s: dummy_env) + patcher = mock.patch( + 'swift.common.http_protocol.SwiftHttpProtocol', mock_protocol + ) + self.mock_super = patcher.start() + self.addCleanup(patcher.stop) + + proto_class = http_protocol.SwiftHttpProxiedProtocol + try: + proxy_obj = types.InstanceType(proto_class) + except AttributeError: + proxy_obj = proto_class.__new__(proto_class) + + # Install some convenience mocks + proxy_obj.server = Namespace(app=Namespace(logger=mock.Mock()), + url_length_limit=777, + log=mock.Mock()) + proxy_obj.send_error = mock.Mock() + + proxy_obj.rfile = BytesIO( + b'PROXY TCP4 111.111.111.111 222.222.222.222 111 222' + ) + + assert proxy_obj.handle() + + self.assertEqual(proxy_obj.client_address, ('111.111.111.111', '111')) + self.assertEqual(proxy_obj.proxy_address, ('222.222.222.222', '222')) + expected_env = { + 'SERVER_PORT': '222', + 'SERVER_ADDR': '222.222.222.222', + 'OTHER_ENV_KEY': 'OTHER_ENV_VALUE' + } + self.assertEqual(proxy_obj.get_environ(), expected_env) diff --git a/test/unit/common/test_wsgi.py b/test/unit/common/test_wsgi.py index 731e703882..d43f6730b0 100644 --- a/test/unit/common/test_wsgi.py +++ b/test/unit/common/test_wsgi.py @@ -15,20 +15,16 @@ """Tests for swift.common.wsgi""" -from argparse import Namespace import errno import logging import socket import unittest import os -import types -import eventlet.wsgi from collections import defaultdict from io import BytesIO from textwrap import dedent -import six from six.moves.urllib.parse import quote import mock @@ -40,7 +36,7 @@ import swift.proxy.server import swift.obj.server as obj_server import swift.container.server as container_server import swift.account.server as account_server -from swift.common.swob import Request, wsgi_to_bytes +from swift.common.swob import Request from swift.common import wsgi, utils from swift.common.storage_policy import POLICIES @@ -1064,335 +1060,6 @@ class TestWSGI(unittest.TestCase): self.assertIs(newenv.get('swift.infocache'), oldenv['swift.infocache']) -class TestSwiftHttpProtocol(unittest.TestCase): - def _proto_obj(self): - # Make an object we can exercise... note the base class's __init__() - # does a bunch of work, so we just new up an object like eventlet.wsgi - # does. - proto_class = wsgi.SwiftHttpProtocol - try: - the_obj = types.InstanceType(proto_class) - except AttributeError: - the_obj = proto_class.__new__(proto_class) - # Install some convenience mocks - the_obj.server = Namespace(app=Namespace(logger=mock.Mock()), - url_length_limit=777, - log=mock.Mock()) - the_obj.send_error = mock.Mock() - - return the_obj - - def test_swift_http_protocol_log_request(self): - proto_obj = self._proto_obj() - self.assertEqual(None, proto_obj.log_request('ignored')) - - def test_swift_http_protocol_log_message(self): - proto_obj = self._proto_obj() - - proto_obj.log_message('a%sc', 'b') - self.assertEqual([mock.call.error('ERROR WSGI: a%sc', 'b')], - proto_obj.server.app.logger.mock_calls) - - def test_swift_http_protocol_log_message_no_logger(self): - # If the app somehow had no logger attribute or it was None, don't blow - # up - proto_obj = self._proto_obj() - delattr(proto_obj.server.app, 'logger') - - proto_obj.log_message('a%sc', 'b') - self.assertEqual([mock.call.info('ERROR WSGI: a%sc', 'b')], - proto_obj.server.log.mock_calls) - - proto_obj.server.log.reset_mock() - proto_obj.server.app.logger = None - - proto_obj.log_message('a%sc', 'b') - self.assertEqual([mock.call.info('ERROR WSGI: a%sc', 'b')], - proto_obj.server.log.mock_calls) - - def test_swift_http_protocol_parse_request_no_proxy(self): - proto_obj = self._proto_obj() - proto_obj.raw_requestline = b'jimmy jam' - proto_obj.client_address = ('a', '123') - - self.assertEqual(False, proto_obj.parse_request()) - - self.assertEqual([ - mock.call(400, "Bad HTTP/0.9 request type ('jimmy')"), - ], proto_obj.send_error.mock_calls) - self.assertEqual(('a', '123'), proto_obj.client_address) - - def test_request_line_cleanup(self): - def do_test(line_from_socket, expected_line=None): - if expected_line is None: - expected_line = line_from_socket - - proto_obj = self._proto_obj() - proto_obj.raw_requestline = line_from_socket - with mock.patch('swift.common.wsgi.wsgi.HttpProtocol') \ - as mock_super: - proto_obj.parse_request() - - self.assertEqual([mock.call.parse_request(proto_obj)], - mock_super.mock_calls) - self.assertEqual(proto_obj.raw_requestline, expected_line) - - do_test(b'GET / HTTP/1.1') - do_test(b'GET /%FF HTTP/1.1') - - if not six.PY2: - do_test(b'GET /\xff HTTP/1.1', b'GET /%FF HTTP/1.1') - do_test(b'PUT /Here%20Is%20A%20SnowMan:\xe2\x98\x83 HTTP/1.0', - b'PUT /Here%20Is%20A%20SnowMan%3A%E2%98%83 HTTP/1.0') - do_test( - b'POST /?and%20it=fixes+params&' - b'PALMTREE=\xf0%9f\x8c%b4 HTTP/1.1', - b'POST /?and+it=fixes+params&PALMTREE=%F0%9F%8C%B4 HTTP/1.1') - - -class ProtocolTest(unittest.TestCase): - def _run_bytes_through_protocol(self, bytes_from_client): - rfile = BytesIO(bytes_from_client) - wfile = BytesIO() - - # All this fakery is needed to make the WSGI server process one - # connection, possibly with multiple requests, in the main - # greenthread. It doesn't hurt correctness if the function is called - # in a separate greenthread, but it makes using the debugger harder. - class FakeGreenthread(object): - def link(self, a_callable, *args): - a_callable(self, *args) - - class FakePool(object): - def spawn(self, a_callable, *args, **kwargs): - a_callable(*args, **kwargs) - return FakeGreenthread() - - def spawn_n(self, a_callable, *args, **kwargs): - a_callable(*args, **kwargs) - - def waitall(self): - pass - - addr = ('127.0.0.1', 8359) - fake_tcp_socket = mock.Mock( - setsockopt=lambda *a: None, - makefile=lambda mode, bufsize: rfile if 'r' in mode else wfile, - getsockname=lambda *a: addr - ) - fake_listen_socket = mock.Mock( - accept=mock.MagicMock( - side_effect=[[fake_tcp_socket, addr], - # KeyboardInterrupt breaks the WSGI server out of - # its infinite accept-process-close loop. - KeyboardInterrupt]), - getsockname=lambda *a: addr) - del fake_listen_socket.do_handshake - - # If we let the WSGI server close rfile/wfile then we can't access - # their contents any more. - with mock.patch.object(wfile, 'close', lambda: None), \ - mock.patch.object(rfile, 'close', lambda: None): - eventlet.wsgi.server( - fake_listen_socket, self.app, - protocol=self.protocol_class, - custom_pool=FakePool(), - log_output=False, # quiet the test run - ) - return wfile.getvalue() - - -class TestSwiftHttpProtocolSomeMore(ProtocolTest): - protocol_class = wsgi.SwiftHttpProtocol - - @staticmethod - def app(env, start_response): - start_response("200 OK", []) - return [wsgi_to_bytes(env['RAW_PATH_INFO'])] - - def test_simple(self): - bytes_out = self._run_bytes_through_protocol(( - b"GET /someurl HTTP/1.0\r\n" - b"User-Agent: something or other\r\n" - b"\r\n" - )) - - lines = [l for l in bytes_out.split(b"\r\n") if l] - self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check - self.assertEqual(lines[-1], b'/someurl') - - def test_quoted(self): - bytes_out = self._run_bytes_through_protocol(( - b"GET /some%fFpath%D8%AA HTTP/1.0\r\n" - b"User-Agent: something or other\r\n" - b"\r\n" - )) - - lines = [l for l in bytes_out.split(b"\r\n") if l] - self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check - self.assertEqual(lines[-1], b'/some%fFpath%D8%AA') - - def test_messy(self): - bytes_out = self._run_bytes_through_protocol(( - b"GET /oh\xffboy%what$now%E2%80%bd HTTP/1.0\r\n" - b"User-Agent: something or other\r\n" - b"\r\n" - )) - - lines = [l for l in bytes_out.split(b"\r\n") if l] - self.assertEqual(lines[-1], b'/oh\xffboy%what$now%E2%80%bd') - - -class TestProxyProtocol(ProtocolTest): - protocol_class = wsgi.SwiftHttpProxiedProtocol - - @staticmethod - def app(env, start_response): - start_response("200 OK", []) - body = '\r\n'.join([ - 'got addr: %s %s' % ( - env.get("REMOTE_ADDR", ""), - env.get("REMOTE_PORT", "")), - 'on addr: %s %s' % ( - env.get("SERVER_ADDR", ""), - env.get("SERVER_PORT", "")), - 'https is %s (scheme %s)' % ( - env.get("HTTPS", ""), - env.get("wsgi.url_scheme", "")), - ]) + '\r\n' - return [body.encode("utf-8")] - - def test_request_with_proxy(self): - bytes_out = self._run_bytes_through_protocol(( - b"PROXY TCP4 192.168.0.1 192.168.0.11 56423 4433\r\n" - b"GET /someurl HTTP/1.0\r\n" - b"User-Agent: something or other\r\n" - b"\r\n" - )) - - lines = [l for l in bytes_out.split(b"\r\n") if l] - self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check - self.assertEqual(lines[-3:], [ - b"got addr: 192.168.0.1 56423", - b"on addr: 192.168.0.11 4433", - b"https is (scheme http)", - ]) - - def test_request_with_proxy_https(self): - bytes_out = self._run_bytes_through_protocol(( - b"PROXY TCP4 192.168.0.1 192.168.0.11 56423 443\r\n" - b"GET /someurl HTTP/1.0\r\n" - b"User-Agent: something or other\r\n" - b"\r\n" - )) - - lines = [l for l in bytes_out.split(b"\r\n") if l] - self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check - self.assertEqual(lines[-3:], [ - b"got addr: 192.168.0.1 56423", - b"on addr: 192.168.0.11 443", - b"https is on (scheme https)", - ]) - - def test_multiple_requests_with_proxy(self): - bytes_out = self._run_bytes_through_protocol(( - b"PROXY TCP4 192.168.0.1 192.168.0.11 56423 443\r\n" - b"GET /someurl HTTP/1.1\r\n" - b"User-Agent: something or other\r\n" - b"\r\n" - b"GET /otherurl HTTP/1.1\r\n" - b"User-Agent: something or other\r\n" - b"Connection: close\r\n" - b"\r\n" - )) - - lines = bytes_out.split(b"\r\n") - self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check - - # the address in the PROXY line is applied to every request - addr_lines = [l for l in lines if l.startswith(b"got addr")] - self.assertEqual(addr_lines, [b"got addr: 192.168.0.1 56423"] * 2) - addr_lines = [l for l in lines if l.startswith(b"on addr")] - self.assertEqual(addr_lines, [b"on addr: 192.168.0.11 443"] * 2) - addr_lines = [l for l in lines if l.startswith(b"https is")] - self.assertEqual(addr_lines, [b"https is on (scheme https)"] * 2) - - def test_missing_proxy_line(self): - bytes_out = self._run_bytes_through_protocol(( - # whoops, no PROXY line here - b"GET /someurl HTTP/1.0\r\n" - b"User-Agent: something or other\r\n" - b"\r\n" - )) - - lines = [l for l in bytes_out.split(b"\r\n") if l] - self.assertIn(b"400 Invalid PROXY line", lines[0]) - - def test_malformed_proxy_lines(self): - for bad_line in [b'PROXY jojo', - b'PROXYjojo a b c d e', - b'PROXY a b c d e', # bad INET protocol and family - ]: - bytes_out = self._run_bytes_through_protocol(bad_line) - lines = [l for l in bytes_out.split(b"\r\n") if l] - self.assertIn(b"400 Invalid PROXY line", lines[0]) - - def test_unknown_client_addr(self): - # For "UNKNOWN", the rest of the line before the CRLF may be omitted by - # the sender, and the receiver must ignore anything presented before - # the CRLF is found. - for unknown_line in [b'PROXY UNKNOWN', # mimimal valid unknown - b'PROXY UNKNOWNblahblah', # also valid - b'PROXY UNKNOWN a b c d']: - bytes_out = self._run_bytes_through_protocol(( - unknown_line + (b"\r\n" - b"GET /someurl HTTP/1.0\r\n" - b"User-Agent: something or other\r\n" - b"\r\n") - )) - lines = [l for l in bytes_out.split(b"\r\n") if l] - self.assertIn(b"200 OK", lines[0]) - - def test_address_and_environ(self): - # Make an object we can exercise... note the base class's __init__() - # does a bunch of work, so we just new up an object like eventlet.wsgi - # does. - dummy_env = {'OTHER_ENV_KEY': 'OTHER_ENV_VALUE'} - mock_protocol = mock.Mock(get_environ=lambda s: dummy_env) - patcher = mock.patch( - 'swift.common.wsgi.SwiftHttpProtocol', mock_protocol - ) - self.mock_super = patcher.start() - self.addCleanup(patcher.stop) - - proto_class = wsgi.SwiftHttpProxiedProtocol - try: - proxy_obj = types.InstanceType(proto_class) - except AttributeError: - proxy_obj = proto_class.__new__(proto_class) - - # Install some convenience mocks - proxy_obj.server = Namespace(app=Namespace(logger=mock.Mock()), - url_length_limit=777, - log=mock.Mock()) - proxy_obj.send_error = mock.Mock() - - proxy_obj.rfile = BytesIO( - b'PROXY TCP4 111.111.111.111 222.222.222.222 111 222' - ) - - assert proxy_obj.handle() - - self.assertEqual(proxy_obj.client_address, ('111.111.111.111', '111')) - self.assertEqual(proxy_obj.proxy_address, ('222.222.222.222', '222')) - expected_env = { - 'SERVER_PORT': '222', - 'SERVER_ADDR': '222.222.222.222', - 'OTHER_ENV_KEY': 'OTHER_ENV_VALUE' - } - self.assertEqual(proxy_obj.get_environ(), expected_env) - - class CommonTestMixin(object): @mock.patch('swift.common.wsgi.capture_stdio') diff --git a/test/unit/helpers.py b/test/unit/helpers.py index fa616bc05c..3c0059cac4 100644 --- a/test/unit/helpers.py +++ b/test/unit/helpers.py @@ -40,7 +40,7 @@ from swift.common.storage_policy import StoragePolicy, ECStoragePolicy from swift.common.middleware import listing_formats, proxy_logging from swift.common import utils from swift.common.utils import mkdirs, normalize_timestamp, NullLogger -from swift.common.wsgi import SwiftHttpProtocol +from swift.common.http_protocol import SwiftHttpProtocol from swift.container import server as container_server from swift.obj import server as object_server from swift.proxy import server as proxy_server diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 2f3ac906d1..53c17a812e 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -72,7 +72,8 @@ from swift.common.utils import hash_path, storage_directory, \ parse_content_type, parse_mime_headers, StatsdClient, \ iter_multipart_mime_documents, public, mkdirs, NullLogger, md5, \ node_to_string -from swift.common.wsgi import loadapp, ConfigString, SwiftHttpProtocol +from swift.common.wsgi import loadapp, ConfigString +from swift.common.http_protocol import SwiftHttpProtocol from swift.proxy.controllers import base as proxy_base from swift.proxy.controllers.base import get_cache_key, cors_validation, \ get_account_info, get_container_info