From 0f505ad968571687181834da3559db6e595a52db Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Tue, 23 Oct 2018 00:57:50 -0500 Subject: [PATCH] py3: adapt the account server completely This version scatters the cancer of WSGI strings around, but reduces the size of the patch. In particular, we can continue to iterate strings. Change-Id: Ia5815602d05925c5de110accc4dfb1368203bd8d --- swift/account/server.py | 23 +++++++++-- swift/account/utils.py | 8 ++-- swift/common/db_replicator.py | 5 ++- swift/common/request_helpers.py | 35 ++++++++++++---- swift/common/swob.py | 25 ++++++++---- test/unit/account/test_server.py | 70 ++++++++++++++++---------------- tox.ini | 6 +-- 7 files changed, 108 insertions(+), 64 deletions(-) diff --git a/swift/account/server.py b/swift/account/server.py index 586e7dfab2..3770a5d494 100644 --- a/swift/account/server.py +++ b/swift/account/server.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import os import time import traceback @@ -20,6 +21,8 @@ from swift import gettext_ as _ from eventlet import Timeout +import six + import swift.common.db from swift.account.backend import AccountBroker, DATADIR from swift.account.utils import account_listing_response, get_response_headers @@ -28,7 +31,7 @@ from swift.common.request_helpers import get_param, \ split_and_validate_path from swift.common.utils import get_logger, hash_path, public, \ Timestamp, storage_directory, config_true_value, \ - json, timing_stats, replication, get_log_line, \ + timing_stats, replication, get_log_line, \ config_fallocate_value, fs_has_free_space from swift.common.constraints import valid_timestamp, check_utf8, check_drive from swift.common import constraints @@ -167,9 +170,21 @@ class AccountController(BaseStorageServer): if broker.is_deleted(): return HTTPConflict(request=req) metadata = {} - metadata.update((key, (value, timestamp.internal)) - for key, value in req.headers.items() - if is_sys_or_user_meta('account', key)) + if six.PY2: + metadata.update((key, (value, timestamp.internal)) + for key, value in req.headers.items() + if is_sys_or_user_meta('account', key)) + else: + for key, value in req.headers.items(): + if is_sys_or_user_meta('account', key): + # Cast to native strings, so that json inside + # updata_metadata eats the data. + try: + value = value.encode('latin-1').decode('utf-8') + except UnicodeDecodeError: + raise HTTPBadRequest( + 'Metadata must be valid UTF-8') + metadata[key] = (value, timestamp.internal) if metadata: broker.update_metadata(metadata, validate_metadata=True) if created: diff --git a/swift/account/utils.py b/swift/account/utils.py index 195990674d..c6f6e2b877 100644 --- a/swift/account/utils.py +++ b/swift/account/utils.py @@ -15,6 +15,8 @@ import json +import six + from swift.common.middleware import listing_formats from swift.common.swob import HTTPOk, HTTPNoContent from swift.common.utils import Timestamp @@ -81,12 +83,12 @@ def account_listing_response(account, req, response_content_type, broker=None, data = [] for (name, object_count, bytes_used, put_timestamp, is_subdir) \ in account_list: + name_ = name.decode('utf8') if six.PY2 else name if is_subdir: - data.append({'subdir': name.decode('utf8')}) + data.append({'subdir': name_}) else: data.append( - {'name': name.decode('utf8'), 'count': object_count, - 'bytes': bytes_used, + {'name': name_, 'count': object_count, 'bytes': bytes_used, 'last_modified': Timestamp(put_timestamp).isoformat}) if response_content_type.endswith('/xml'): account_list = listing_formats.account_to_xml(data, account) diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index 3c81985436..639f570892 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import os import random import math @@ -32,7 +33,7 @@ from swift.common.constraints import check_drive from swift.common.utils import get_logger, whataremyips, storage_directory, \ renamer, mkdirs, lock_parent_directory, config_true_value, \ unlink_older_than, dump_recon_cache, rsync_module_interpolation, \ - json, parse_override_options, round_robin_iter, Everything, get_db_files, \ + parse_override_options, round_robin_iter, Everything, get_db_files, \ parse_db_filename, quote, RateLimitedIterator from swift.common import ring from swift.common.ring.utils import is_local_device @@ -476,7 +477,7 @@ class Replicator(Daemon): elif response.status == HTTP_INSUFFICIENT_STORAGE: raise DriveNotMounted() elif 200 <= response.status < 300: - rinfo = json.loads(response.data.decode('ascii')) + rinfo = json.loads(response.data) local_sync = broker.get_sync(rinfo['id'], incoming=False) if rinfo.get('metadata', ''): broker.update_metadata(json.loads(rinfo['metadata'])) diff --git a/swift/common/request_helpers.py b/swift/common/request_helpers.py index 3d0174d85a..a09ee26c15 100644 --- a/swift/common/request_helpers.py +++ b/swift/common/request_helpers.py @@ -56,17 +56,36 @@ def get_param(req, name, default=None): :param name: parameter name :param default: result to return if the parameter is not found :returns: HTTP request parameter value - (as UTF-8 encoded str, not unicode object) + (in py2, as UTF-8 encoded str, not unicode object) :raises HTTPBadRequest: if param not valid UTF-8 byte sequence """ value = req.params.get(name, default) - if value and not isinstance(value, six.text_type): - try: - value.decode('utf8') # Ensure UTF8ness - except UnicodeDecodeError: - raise HTTPBadRequest( - request=req, content_type='text/plain', - body='"%s" parameter not valid UTF-8' % name) + if six.PY2: + if value and not isinstance(value, six.text_type): + try: + value.decode('utf8') # Ensure UTF8ness + except UnicodeDecodeError: + raise HTTPBadRequest( + request=req, content_type='text/plain', + body='"%s" parameter not valid UTF-8' % name) + else: + if value: + try: + if isinstance(value, six.text_type): + value = value.encode('latin1') + except UnicodeEncodeError: + # This happens, remarkably enough, when WSGI string is not + # a valid latin-1, and passed through parse_qsl(). + raise HTTPBadRequest( + request=req, content_type='text/plain', + body='"%s" parameter not valid UTF-8' % name) + try: + # Ensure UTF8ness since we're at it + value = value.decode('utf8') + except UnicodeDecodeError: + raise HTTPBadRequest( + request=req, content_type='text/plain', + body='"%s" parameter not valid UTF-8' % name) return value diff --git a/swift/common/swob.py b/swift/common/swob.py index 95c8f6417f..ffd0cf1fcf 100644 --- a/swift/common/swob.py +++ b/swift/common/swob.py @@ -757,14 +757,18 @@ def _req_environ_property(environ_field, is_wsgi_string_field=True): return self.environ.get(environ_field, None) def setter(self, value): - if six.PY2 and isinstance(value, six.text_type): - self.environ[environ_field] = value.encode('utf-8') - elif not six.PY2 and isinstance(value, six.binary_type): - self.environ[environ_field] = value.decode('latin1') + if six.PY2: + if isinstance(value, six.text_type): + self.environ[environ_field] = value.encode('utf-8') + else: + self.environ[environ_field] = value else: - if not six.PY2 and is_wsgi_string_field: + if is_wsgi_string_field: # Check that input is valid before setting - value.encode('latin1') + if isinstance(value, str): + value.encode('latin1').decode('utf-8') + if isinstance(value, bytes): + value = value.decode('latin1') self.environ[environ_field] = value return property(getter, setter, doc=("Get and set the %s property " @@ -948,8 +952,13 @@ class Request(object): "Provides QUERY_STRING parameters as a dictionary" if self._params_cache is None: if 'QUERY_STRING' in self.environ: - self._params_cache = dict( - urllib.parse.parse_qsl(self.environ['QUERY_STRING'], True)) + if six.PY2: + self._params_cache = dict(urllib.parse.parse_qsl( + self.environ['QUERY_STRING'], True)) + else: + self._params_cache = dict(urllib.parse.parse_qsl( + self.environ['QUERY_STRING'], + keep_blank_values=True, encoding='latin-1')) else: self._params_cache = {} return self._params_cache diff --git a/test/unit/account/test_server.py b/test/unit/account/test_server.py index 61cef2ebe9..4c780d40e2 100644 --- a/test/unit/account/test_server.py +++ b/test/unit/account/test_server.py @@ -229,7 +229,7 @@ class TestAccountController(unittest.TestCase): req = Request.blank('/sda1/p/a/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) - json_string = '["rsync_then_merge", "a.db"]' + json_string = b'["rsync_then_merge", "a.db"]' inbuf = WsgiBytesIO(json_string) req.environ['wsgi.input'] = inbuf resp = req.get_response(self.controller) @@ -244,7 +244,7 @@ class TestAccountController(unittest.TestCase): req = Request.blank('/sda1/p/a/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) - json_string = '["complete_rsync", "a.db"]' + json_string = b'["complete_rsync", "a.db"]' inbuf = WsgiBytesIO(json_string) req.environ['wsgi.input'] = inbuf resp = req.get_response(self.controller) @@ -256,7 +256,7 @@ class TestAccountController(unittest.TestCase): headers={}) # check valuerror - wsgi_input_valuerror = '["sync" : sync, "-1"]' + wsgi_input_valuerror = b'["sync" : sync, "-1"]' inbuf1 = WsgiBytesIO(wsgi_input_valuerror) req.environ['wsgi.input'] = inbuf1 resp = req.get_response(self.controller) @@ -267,7 +267,7 @@ class TestAccountController(unittest.TestCase): req = Request.blank('/sda1/p/a/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) - json_string = '["unknown_sync", "a.db"]' + json_string = b'["unknown_sync", "a.db"]' inbuf = WsgiBytesIO(json_string) req.environ['wsgi.input'] = inbuf resp = req.get_response(self.controller) @@ -281,7 +281,7 @@ class TestAccountController(unittest.TestCase): req = Request.blank('/sda1/p/a/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) - json_string = '["unknown_sync", "a.db"]' + json_string = b'["unknown_sync", "a.db"]' inbuf = WsgiBytesIO(json_string) req.environ['wsgi.input'] = inbuf resp = req.get_response(self.controller) @@ -386,7 +386,7 @@ class TestAccountController(unittest.TestCase): headers={'Accept': 'application/plain;q=1;q=0.5'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 400) - self.assertEqual(resp.body, '') + self.assertEqual(resp.body, b'') def test_HEAD_invalid_format(self): format = '%D1%BD%8A9' # invalid UTF-8; should be %E1%BD%8A9 (E -> D) @@ -497,7 +497,7 @@ class TestAccountController(unittest.TestCase): headers={'X-Timestamp': normalize_timestamp(2)}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 403) - self.assertEqual(resp.body, 'Recently deleted') + self.assertEqual(resp.body, b'Recently deleted') self.assertEqual(resp.headers['X-Account-Status'], 'Deleted') def test_PUT_non_utf8_metadata(self): @@ -885,7 +885,7 @@ class TestAccountController(unittest.TestCase): headers={'Accept': 'application/plain;q=foo'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 400) - self.assertEqual(resp.body, 'Invalid Accept header') + self.assertEqual(resp.body, b'Invalid Accept header') def test_GET_over_limit(self): req = Request.blank( @@ -915,7 +915,8 @@ class TestAccountController(unittest.TestCase): req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2']) + self.assertEqual(resp.body.decode('utf-8').strip().split('\n'), + ['c1', 'c2']) req = Request.blank('/sda1/p/a/c1', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Put-Timestamp': '1', 'X-Delete-Timestamp': '0', @@ -933,7 +934,8 @@ class TestAccountController(unittest.TestCase): req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2']) + self.assertEqual(resp.body.decode('utf-8').strip().split('\n'), + ['c1', 'c2']) self.assertEqual(resp.content_type, 'text/plain') self.assertEqual(resp.charset, 'utf-8') @@ -942,7 +944,8 @@ class TestAccountController(unittest.TestCase): environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2']) + self.assertEqual(resp.body.decode('utf-8').strip().split('\n'), + ['c1', 'c2']) self.assertEqual(resp.content_type, 'text/plain') self.assertEqual(resp.charset, 'utf-8') @@ -1195,12 +1198,14 @@ class TestAccountController(unittest.TestCase): environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body.strip().split('\n'), ['c0', 'c1', 'c2']) + self.assertEqual(resp.body.decode('utf-8').strip().split('\n'), + ['c0', 'c1', 'c2']) req = Request.blank('/sda1/p/a?limit=3&marker=c2', environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body.strip().split('\n'), ['c3', 'c4']) + self.assertEqual(resp.body.decode('utf-8').strip().split('\n'), + ['c3', 'c4']) def test_GET_limit_marker_json(self): req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT', @@ -1341,7 +1346,7 @@ class TestAccountController(unittest.TestCase): req.accept = '*/*' resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body, 'c1\n') + self.assertEqual(resp.body, b'c1\n') def test_GET_accept_application_wildcard(self): req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT', @@ -1416,7 +1421,7 @@ class TestAccountController(unittest.TestCase): req.accept = 'application/json' resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body, 'c1\n') + self.assertEqual(resp.body, b'c1\n') def test_GET_accept_not_valid(self): req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT', @@ -1469,19 +1474,20 @@ class TestAccountController(unittest.TestCase): environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body.strip().split('\n'), ['sub.']) + self.assertEqual(resp.body.decode('utf-8').strip().split('\n'), + ['sub.']) req = Request.blank('/sda1/p/a?prefix=sub.&delimiter=.', environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) self.assertEqual( - resp.body.strip().split('\n'), + resp.body.decode('utf-8').strip().split('\n'), ['sub.0', 'sub.0.', 'sub.1', 'sub.1.', 'sub.2', 'sub.2.']) req = Request.blank('/sda1/p/a?prefix=sub.1.&delimiter=.', environ={'REQUEST_METHOD': 'GET'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body.strip().split('\n'), + self.assertEqual(resp.body.decode('utf-8').strip().split('\n'), ['sub.1.0', 'sub.1.1', 'sub.1.2']) def test_GET_prefix_delimiter_json(self): @@ -1609,7 +1615,7 @@ class TestAccountController(unittest.TestCase): outbuf = StringIO() def start_response(*args): - outbuf.writelines(args) + outbuf.write(args[0]) self.controller.__call__({'REQUEST_METHOD': 'GET', 'SCRIPT_NAME': '', @@ -1635,7 +1641,7 @@ class TestAccountController(unittest.TestCase): outbuf = StringIO() def start_response(*args): - outbuf.writelines(args) + outbuf.write(args[0]) self.controller.__call__({'REQUEST_METHOD': 'GET', 'SCRIPT_NAME': '', @@ -1661,7 +1667,7 @@ class TestAccountController(unittest.TestCase): outbuf = StringIO() def start_response(*args): - outbuf.writelines(args) + outbuf.write(args[0]) self.controller.__call__({'REQUEST_METHOD': 'GET', 'SCRIPT_NAME': '', @@ -1686,7 +1692,7 @@ class TestAccountController(unittest.TestCase): outbuf = StringIO() def start_response(*args): - outbuf.writelines(args) + outbuf.write(args[0]) self.controller.__call__({'REQUEST_METHOD': 'method_doesnt_exist', 'PATH_INFO': '/sda1/p/a'}, @@ -1699,7 +1705,7 @@ class TestAccountController(unittest.TestCase): outbuf = StringIO() def start_response(*args): - outbuf.writelines(args) + outbuf.write(args[0]) self.controller.__call__({'REQUEST_METHOD': '__init__', 'PATH_INFO': '/sda1/p/a'}, @@ -1837,8 +1843,7 @@ class TestAccountController(unittest.TestCase): 'replication_server': 'false'}) def start_response(*args): - """Sends args to outbuf""" - outbuf.writelines(args) + outbuf.write(args[0]) method = 'PUT' env = {'REQUEST_METHOD': method, @@ -1875,8 +1880,7 @@ class TestAccountController(unittest.TestCase): 'replication_server': 'false'}) def start_response(*args): - """Sends args to outbuf""" - outbuf.writelines(args) + outbuf.write(args[0]) method = 'PUT' env = {'REQUEST_METHOD': method, @@ -1894,8 +1898,8 @@ class TestAccountController(unittest.TestCase): 'wsgi.multiprocess': False, 'wsgi.run_once': False} - answer = ['

Method Not Allowed

The method is not ' - 'allowed for this resource.

'] + answer = [b'

Method Not Allowed

The method is not ' + b'allowed for this resource.

'] mock_method = replication(public(lambda x: mock.MagicMock())) with mock.patch.object(self.controller, method, new=mock_method): @@ -1912,8 +1916,7 @@ class TestAccountController(unittest.TestCase): 'replication_server': 'true'}) def start_response(*args): - """Sends args to outbuf""" - outbuf.writelines(args) + outbuf.write(args[0]) obj_methods = ['DELETE', 'PUT', 'HEAD', 'GET', 'POST', 'OPTIONS'] for method in obj_methods: @@ -1946,8 +1949,7 @@ class TestAccountController(unittest.TestCase): logger=self.logger) def start_response(*args): - # Sends args to outbuf - outbuf.writelines(args) + outbuf.write(args[0]) method = 'PUT' @@ -1973,7 +1975,7 @@ class TestAccountController(unittest.TestCase): with mock.patch.object(self.account_controller, method, new=mock_put_method): response = self.account_controller.__call__(env, start_response) - self.assertTrue(response[0].startswith( + self.assertTrue(response[0].decode('ascii').startswith( 'Traceback (most recent call last):')) self.assertEqual(self.logger.get_lines_for_level('error'), [ 'ERROR __call__ error with %(method)s %(path)s : ' % { diff --git a/tox.ini b/tox.ini index eeec59ba2f..4f695b420b 100644 --- a/tox.ini +++ b/tox.ini @@ -29,11 +29,7 @@ setenv = VIRTUAL_ENV={envdir} [testenv:py35] commands = nosetests {posargs:\ - test/unit/account/test_auditor.py \ - test/unit/account/test_backend.py \ - test/unit/account/test_reaper.py \ - test/unit/account/test_replicator.py \ - test/unit/account/test_utils.py \ + test/unit/account \ test/unit/cli/test_dispersion_report.py \ test/unit/cli/test_form_signature.py \ test/unit/cli/test_info.py \