py3: adapt the account server completely

This version scatters the cancer of WSGI strings around, but reduces the size of the patch. In particular, we can continue to iterate strings. Change-Id: Ia5815602d05925c5de110accc4dfb1368203bd8d
2018-10-23 00:57:50 -05:00 · 2018-10-23 00:57:50 -05:00 · 0f505ad968
commit 0f505ad968
parent d645e216c0
7 changed files with 108 additions and 64 deletions
--- a/swift/account/server.py
+++ b/swift/account/server.py
@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import json
 import os
 import time
 import traceback
@ -20,6 +21,8 @@ from swift import gettext_ as _

 from eventlet import Timeout

+import six
+
 import swift.common.db
 from swift.account.backend import AccountBroker, DATADIR
 from swift.account.utils import account_listing_response, get_response_headers
@ -28,7 +31,7 @@ from swift.common.request_helpers import get_param, \
    split_and_validate_path
 from swift.common.utils import get_logger, hash_path, public, \
    Timestamp, storage_directory, config_true_value, \
-    json, timing_stats, replication, get_log_line, \
+    timing_stats, replication, get_log_line, \
    config_fallocate_value, fs_has_free_space
 from swift.common.constraints import valid_timestamp, check_utf8, check_drive
 from swift.common import constraints
@ -167,9 +170,21 @@ class AccountController(BaseStorageServer):
                if broker.is_deleted():
                    return HTTPConflict(request=req)
            metadata = {}
+            if six.PY2:
                metadata.update((key, (value, timestamp.internal))
                                for key, value in req.headers.items()
                                if is_sys_or_user_meta('account', key))
+            else:
+                for key, value in req.headers.items():
+                    if is_sys_or_user_meta('account', key):
+                        # Cast to native strings, so that json inside
+                        # updata_metadata eats the data.
+                        try:
+                            value = value.encode('latin-1').decode('utf-8')
+                        except UnicodeDecodeError:
+                            raise HTTPBadRequest(
+                                'Metadata must be valid UTF-8')
+                        metadata[key] = (value, timestamp.internal)
            if metadata:
                broker.update_metadata(metadata, validate_metadata=True)
            if created:
--- a/swift/account/utils.py
+++ b/swift/account/utils.py
@ -15,6 +15,8 @@

 import json

+import six
+
 from swift.common.middleware import listing_formats
 from swift.common.swob import HTTPOk, HTTPNoContent
 from swift.common.utils import Timestamp
@ -81,12 +83,12 @@ def account_listing_response(account, req, response_content_type, broker=None,
    data = []
    for (name, object_count, bytes_used, put_timestamp, is_subdir) \
            in account_list:
+        name_ = name.decode('utf8') if six.PY2 else name
        if is_subdir:
-            data.append({'subdir': name.decode('utf8')})
+            data.append({'subdir': name_})
        else:
            data.append(
-                {'name': name.decode('utf8'), 'count': object_count,
-                 'bytes': bytes_used,
+                {'name': name_, 'count': object_count, 'bytes': bytes_used,
                 'last_modified': Timestamp(put_timestamp).isoformat})
    if response_content_type.endswith('/xml'):
        account_list = listing_formats.account_to_xml(data, account)
--- a/swift/common/db_replicator.py
+++ b/swift/common/db_replicator.py
@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import json
 import os
 import random
 import math
@ -32,7 +33,7 @@ from swift.common.constraints import check_drive
 from swift.common.utils import get_logger, whataremyips, storage_directory, \
    renamer, mkdirs, lock_parent_directory, config_true_value, \
    unlink_older_than, dump_recon_cache, rsync_module_interpolation, \
-    json, parse_override_options, round_robin_iter, Everything, get_db_files, \
+    parse_override_options, round_robin_iter, Everything, get_db_files, \
    parse_db_filename, quote, RateLimitedIterator
 from swift.common import ring
 from swift.common.ring.utils import is_local_device
@ -476,7 +477,7 @@ class Replicator(Daemon):
        elif response.status == HTTP_INSUFFICIENT_STORAGE:
            raise DriveNotMounted()
        elif 200 <= response.status < 300:
-            rinfo = json.loads(response.data.decode('ascii'))
+            rinfo = json.loads(response.data)
            local_sync = broker.get_sync(rinfo['id'], incoming=False)
            if rinfo.get('metadata', ''):
                broker.update_metadata(json.loads(rinfo['metadata']))
--- a/swift/common/request_helpers.py
+++ b/swift/common/request_helpers.py
@ -56,10 +56,11 @@ def get_param(req, name, default=None):
    :param name: parameter name
    :param default: result to return if the parameter is not found
    :returns: HTTP request parameter value
-              (as UTF-8 encoded str, not unicode object)
+              (in py2, as UTF-8 encoded str, not unicode object)
    :raises HTTPBadRequest: if param not valid UTF-8 byte sequence
    """
    value = req.params.get(name, default)
+    if six.PY2:
        if value and not isinstance(value, six.text_type):
            try:
                value.decode('utf8')    # Ensure UTF8ness
@ -67,6 +68,24 @@ def get_param(req, name, default=None):
                raise HTTPBadRequest(
                    request=req, content_type='text/plain',
                    body='"%s" parameter not valid UTF-8' % name)
+    else:
+        if value:
+            try:
+                if isinstance(value, six.text_type):
+                    value = value.encode('latin1')
+            except UnicodeEncodeError:
+                # This happens, remarkably enough, when WSGI string is not
+                # a valid latin-1, and passed through parse_qsl().
+                raise HTTPBadRequest(
+                    request=req, content_type='text/plain',
+                    body='"%s" parameter not valid UTF-8' % name)
+            try:
+                # Ensure UTF8ness since we're at it
+                value = value.decode('utf8')
+            except UnicodeDecodeError:
+                raise HTTPBadRequest(
+                    request=req, content_type='text/plain',
+                    body='"%s" parameter not valid UTF-8' % name)
    return value


--- a/swift/common/swob.py
+++ b/swift/common/swob.py
@ -757,14 +757,18 @@ def _req_environ_property(environ_field, is_wsgi_string_field=True):
        return self.environ.get(environ_field, None)

    def setter(self, value):
-        if six.PY2 and isinstance(value, six.text_type):
+        if six.PY2:
+            if isinstance(value, six.text_type):
                self.environ[environ_field] = value.encode('utf-8')
-        elif not six.PY2 and isinstance(value, six.binary_type):
-            self.environ[environ_field] = value.decode('latin1')
            else:
-            if not six.PY2 and is_wsgi_string_field:
+                self.environ[environ_field] = value
+        else:
+            if is_wsgi_string_field:
                # Check that input is valid before setting
-                value.encode('latin1')
+                if isinstance(value, str):
+                    value.encode('latin1').decode('utf-8')
+                if isinstance(value, bytes):
+                    value = value.decode('latin1')
            self.environ[environ_field] = value

    return property(getter, setter, doc=("Get and set the %s property "
@ -948,8 +952,13 @@ class Request(object):
        "Provides QUERY_STRING parameters as a dictionary"
        if self._params_cache is None:
            if 'QUERY_STRING' in self.environ:
-                self._params_cache = dict(
-                    urllib.parse.parse_qsl(self.environ['QUERY_STRING'], True))
+                if six.PY2:
+                    self._params_cache = dict(urllib.parse.parse_qsl(
+                        self.environ['QUERY_STRING'], True))
+                else:
+                    self._params_cache = dict(urllib.parse.parse_qsl(
+                        self.environ['QUERY_STRING'],
+                        keep_blank_values=True, encoding='latin-1'))
            else:
                self._params_cache = {}
        return self._params_cache
--- a/test/unit/account/test_server.py
+++ b/test/unit/account/test_server.py
@ -229,7 +229,7 @@ class TestAccountController(unittest.TestCase):
            req = Request.blank('/sda1/p/a/',
                                environ={'REQUEST_METHOD': 'REPLICATE'},
                                headers={})
-            json_string = '["rsync_then_merge", "a.db"]'
+            json_string = b'["rsync_then_merge", "a.db"]'
            inbuf = WsgiBytesIO(json_string)
            req.environ['wsgi.input'] = inbuf
            resp = req.get_response(self.controller)
@ -244,7 +244,7 @@ class TestAccountController(unittest.TestCase):
            req = Request.blank('/sda1/p/a/',
                                environ={'REQUEST_METHOD': 'REPLICATE'},
                                headers={})
-            json_string = '["complete_rsync", "a.db"]'
+            json_string = b'["complete_rsync", "a.db"]'
            inbuf = WsgiBytesIO(json_string)
            req.environ['wsgi.input'] = inbuf
            resp = req.get_response(self.controller)
@ -256,7 +256,7 @@ class TestAccountController(unittest.TestCase):
                            headers={})

        # check valuerror
-        wsgi_input_valuerror = '["sync" : sync, "-1"]'
+        wsgi_input_valuerror = b'["sync" : sync, "-1"]'
        inbuf1 = WsgiBytesIO(wsgi_input_valuerror)
        req.environ['wsgi.input'] = inbuf1
        resp = req.get_response(self.controller)
@ -267,7 +267,7 @@ class TestAccountController(unittest.TestCase):
        req = Request.blank('/sda1/p/a/',
                            environ={'REQUEST_METHOD': 'REPLICATE'},
                            headers={})
-        json_string = '["unknown_sync", "a.db"]'
+        json_string = b'["unknown_sync", "a.db"]'
        inbuf = WsgiBytesIO(json_string)
        req.environ['wsgi.input'] = inbuf
        resp = req.get_response(self.controller)
@ -281,7 +281,7 @@ class TestAccountController(unittest.TestCase):
        req = Request.blank('/sda1/p/a/',
                            environ={'REQUEST_METHOD': 'REPLICATE'},
                            headers={})
-        json_string = '["unknown_sync", "a.db"]'
+        json_string = b'["unknown_sync", "a.db"]'
        inbuf = WsgiBytesIO(json_string)
        req.environ['wsgi.input'] = inbuf
        resp = req.get_response(self.controller)
@ -386,7 +386,7 @@ class TestAccountController(unittest.TestCase):
                            headers={'Accept': 'application/plain;q=1;q=0.5'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 400)
-        self.assertEqual(resp.body, '')
+        self.assertEqual(resp.body, b'')

    def test_HEAD_invalid_format(self):
        format = '%D1%BD%8A9'  # invalid UTF-8; should be %E1%BD%8A9 (E -> D)
@ -497,7 +497,7 @@ class TestAccountController(unittest.TestCase):
                            headers={'X-Timestamp': normalize_timestamp(2)})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 403)
-        self.assertEqual(resp.body, 'Recently deleted')
+        self.assertEqual(resp.body, b'Recently deleted')
        self.assertEqual(resp.headers['X-Account-Status'], 'Deleted')

    def test_PUT_non_utf8_metadata(self):
@ -885,7 +885,7 @@ class TestAccountController(unittest.TestCase):
                            headers={'Accept': 'application/plain;q=foo'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 400)
-        self.assertEqual(resp.body, 'Invalid Accept header')
+        self.assertEqual(resp.body, b'Invalid Accept header')

    def test_GET_over_limit(self):
        req = Request.blank(
@ -915,7 +915,8 @@ class TestAccountController(unittest.TestCase):
        req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c1', 'c2'])
        req = Request.blank('/sda1/p/a/c1', environ={'REQUEST_METHOD': 'PUT'},
                            headers={'X-Put-Timestamp': '1',
                                     'X-Delete-Timestamp': '0',
@ -933,7 +934,8 @@ class TestAccountController(unittest.TestCase):
        req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c1', 'c2'])
        self.assertEqual(resp.content_type, 'text/plain')
        self.assertEqual(resp.charset, 'utf-8')

@ -942,7 +944,8 @@ class TestAccountController(unittest.TestCase):
                            environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c1', 'c2'])
        self.assertEqual(resp.content_type, 'text/plain')
        self.assertEqual(resp.charset, 'utf-8')

@ -1195,12 +1198,14 @@ class TestAccountController(unittest.TestCase):
                            environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c0', 'c1', 'c2'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c0', 'c1', 'c2'])
        req = Request.blank('/sda1/p/a?limit=3&marker=c2',
                            environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c3', 'c4'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c3', 'c4'])

    def test_GET_limit_marker_json(self):
        req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
@ -1341,7 +1346,7 @@ class TestAccountController(unittest.TestCase):
        req.accept = '*/*'
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body, 'c1\n')
+        self.assertEqual(resp.body, b'c1\n')

    def test_GET_accept_application_wildcard(self):
        req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
@ -1416,7 +1421,7 @@ class TestAccountController(unittest.TestCase):
        req.accept = 'application/json'
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body, 'c1\n')
+        self.assertEqual(resp.body, b'c1\n')

    def test_GET_accept_not_valid(self):
        req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
@ -1469,19 +1474,20 @@ class TestAccountController(unittest.TestCase):
                            environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['sub.'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['sub.'])
        req = Request.blank('/sda1/p/a?prefix=sub.&delimiter=.',
                            environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
        self.assertEqual(
-            resp.body.strip().split('\n'),
+            resp.body.decode('utf-8').strip().split('\n'),
            ['sub.0', 'sub.0.', 'sub.1', 'sub.1.', 'sub.2', 'sub.2.'])
        req = Request.blank('/sda1/p/a?prefix=sub.1.&delimiter=.',
                            environ={'REQUEST_METHOD': 'GET'})
        resp = req.get_response(self.controller)
        self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'),
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
                         ['sub.1.0', 'sub.1.1', 'sub.1.2'])

    def test_GET_prefix_delimiter_json(self):
@ -1609,7 +1615,7 @@ class TestAccountController(unittest.TestCase):
        outbuf = StringIO()

        def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        self.controller.__call__({'REQUEST_METHOD': 'GET',
                                  'SCRIPT_NAME': '',
@ -1635,7 +1641,7 @@ class TestAccountController(unittest.TestCase):
        outbuf = StringIO()

        def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        self.controller.__call__({'REQUEST_METHOD': 'GET',
                                  'SCRIPT_NAME': '',
@ -1661,7 +1667,7 @@ class TestAccountController(unittest.TestCase):
        outbuf = StringIO()

        def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        self.controller.__call__({'REQUEST_METHOD': 'GET',
                                  'SCRIPT_NAME': '',
@ -1686,7 +1692,7 @@ class TestAccountController(unittest.TestCase):
        outbuf = StringIO()

        def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        self.controller.__call__({'REQUEST_METHOD': 'method_doesnt_exist',
                                  'PATH_INFO': '/sda1/p/a'},
@ -1699,7 +1705,7 @@ class TestAccountController(unittest.TestCase):
        outbuf = StringIO()

        def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        self.controller.__call__({'REQUEST_METHOD': '__init__',
                                  'PATH_INFO': '/sda1/p/a'},
@ -1837,8 +1843,7 @@ class TestAccountController(unittest.TestCase):
             'replication_server': 'false'})

        def start_response(*args):
-            """Sends args to outbuf"""
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        method = 'PUT'
        env = {'REQUEST_METHOD': method,
@ -1875,8 +1880,7 @@ class TestAccountController(unittest.TestCase):
             'replication_server': 'false'})

        def start_response(*args):
-            """Sends args to outbuf"""
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        method = 'PUT'
        env = {'REQUEST_METHOD': method,
@ -1894,8 +1898,8 @@ class TestAccountController(unittest.TestCase):
               'wsgi.multiprocess': False,
               'wsgi.run_once': False}

-        answer = ['<html><h1>Method Not Allowed</h1><p>The method is not '
-                  'allowed for this resource.</p></html>']
+        answer = [b'<html><h1>Method Not Allowed</h1><p>The method is not '
+                  b'allowed for this resource.</p></html>']
        mock_method = replication(public(lambda x: mock.MagicMock()))
        with mock.patch.object(self.controller, method,
                               new=mock_method):
@ -1912,8 +1916,7 @@ class TestAccountController(unittest.TestCase):
             'replication_server': 'true'})

        def start_response(*args):
-            """Sends args to outbuf"""
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        obj_methods = ['DELETE', 'PUT', 'HEAD', 'GET', 'POST', 'OPTIONS']
        for method in obj_methods:
@ -1946,8 +1949,7 @@ class TestAccountController(unittest.TestCase):
            logger=self.logger)

        def start_response(*args):
-            # Sends args to outbuf
-            outbuf.writelines(args)
+            outbuf.write(args[0])

        method = 'PUT'

@ -1973,7 +1975,7 @@ class TestAccountController(unittest.TestCase):
        with mock.patch.object(self.account_controller, method,
                               new=mock_put_method):
            response = self.account_controller.__call__(env, start_response)
-            self.assertTrue(response[0].startswith(
+            self.assertTrue(response[0].decode('ascii').startswith(
                'Traceback (most recent call last):'))
            self.assertEqual(self.logger.get_lines_for_level('error'), [
                'ERROR __call__ error with %(method)s %(path)s : ' % {
--- a/tox.ini
+++ b/tox.ini
@ -29,11 +29,7 @@ setenv = VIRTUAL_ENV={envdir}
 [testenv:py35]
 commands =
  nosetests {posargs:\
-      test/unit/account/test_auditor.py \
-      test/unit/account/test_backend.py \
-      test/unit/account/test_reaper.py \
-      test/unit/account/test_replicator.py \
-      test/unit/account/test_utils.py \
+      test/unit/account \
      test/unit/cli/test_dispersion_report.py \
      test/unit/cli/test_form_signature.py \
      test/unit/cli/test_info.py \