From 0f505ad968571687181834da3559db6e595a52db Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@kotori.zaitcev.us>
Date: Tue, 23 Oct 2018 00:57:50 -0500
Subject: [PATCH] py3: adapt the account server completely

This version scatters the cancer of WSGI strings around, but
reduces the size of the patch. In particular, we can continue
to iterate strings.

Change-Id: Ia5815602d05925c5de110accc4dfb1368203bd8d
---
 swift/account/server.py          | 23 +++++++++--
 swift/account/utils.py           |  8 ++--
 swift/common/db_replicator.py    |  5 ++-
 swift/common/request_helpers.py  | 35 ++++++++++++----
 swift/common/swob.py             | 25 ++++++++----
 test/unit/account/test_server.py | 70 ++++++++++++++++----------------
 tox.ini                          |  6 +--
 7 files changed, 108 insertions(+), 64 deletions(-)

diff --git a/swift/account/server.py b/swift/account/server.py
index 586e7dfab2..3770a5d494 100644
--- a/swift/account/server.py
+++ b/swift/account/server.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import os
 import time
 import traceback
@@ -20,6 +21,8 @@ from swift import gettext_ as _
 
 from eventlet import Timeout
 
+import six
+
 import swift.common.db
 from swift.account.backend import AccountBroker, DATADIR
 from swift.account.utils import account_listing_response, get_response_headers
@@ -28,7 +31,7 @@ from swift.common.request_helpers import get_param, \
     split_and_validate_path
 from swift.common.utils import get_logger, hash_path, public, \
     Timestamp, storage_directory, config_true_value, \
-    json, timing_stats, replication, get_log_line, \
+    timing_stats, replication, get_log_line, \
     config_fallocate_value, fs_has_free_space
 from swift.common.constraints import valid_timestamp, check_utf8, check_drive
 from swift.common import constraints
@@ -167,9 +170,21 @@ class AccountController(BaseStorageServer):
                 if broker.is_deleted():
                     return HTTPConflict(request=req)
             metadata = {}
-            metadata.update((key, (value, timestamp.internal))
-                            for key, value in req.headers.items()
-                            if is_sys_or_user_meta('account', key))
+            if six.PY2:
+                metadata.update((key, (value, timestamp.internal))
+                                for key, value in req.headers.items()
+                                if is_sys_or_user_meta('account', key))
+            else:
+                for key, value in req.headers.items():
+                    if is_sys_or_user_meta('account', key):
+                        # Cast to native strings, so that json inside
+                        # updata_metadata eats the data.
+                        try:
+                            value = value.encode('latin-1').decode('utf-8')
+                        except UnicodeDecodeError:
+                            raise HTTPBadRequest(
+                                'Metadata must be valid UTF-8')
+                        metadata[key] = (value, timestamp.internal)
             if metadata:
                 broker.update_metadata(metadata, validate_metadata=True)
             if created:
diff --git a/swift/account/utils.py b/swift/account/utils.py
index 195990674d..c6f6e2b877 100644
--- a/swift/account/utils.py
+++ b/swift/account/utils.py
@@ -15,6 +15,8 @@
 
 import json
 
+import six
+
 from swift.common.middleware import listing_formats
 from swift.common.swob import HTTPOk, HTTPNoContent
 from swift.common.utils import Timestamp
@@ -81,12 +83,12 @@ def account_listing_response(account, req, response_content_type, broker=None,
     data = []
     for (name, object_count, bytes_used, put_timestamp, is_subdir) \
             in account_list:
+        name_ = name.decode('utf8') if six.PY2 else name
         if is_subdir:
-            data.append({'subdir': name.decode('utf8')})
+            data.append({'subdir': name_})
         else:
             data.append(
-                {'name': name.decode('utf8'), 'count': object_count,
-                 'bytes': bytes_used,
+                {'name': name_, 'count': object_count, 'bytes': bytes_used,
                  'last_modified': Timestamp(put_timestamp).isoformat})
     if response_content_type.endswith('/xml'):
         account_list = listing_formats.account_to_xml(data, account)
diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py
index 3c81985436..639f570892 100644
--- a/swift/common/db_replicator.py
+++ b/swift/common/db_replicator.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import os
 import random
 import math
@@ -32,7 +33,7 @@ from swift.common.constraints import check_drive
 from swift.common.utils import get_logger, whataremyips, storage_directory, \
     renamer, mkdirs, lock_parent_directory, config_true_value, \
     unlink_older_than, dump_recon_cache, rsync_module_interpolation, \
-    json, parse_override_options, round_robin_iter, Everything, get_db_files, \
+    parse_override_options, round_robin_iter, Everything, get_db_files, \
     parse_db_filename, quote, RateLimitedIterator
 from swift.common import ring
 from swift.common.ring.utils import is_local_device
@@ -476,7 +477,7 @@ class Replicator(Daemon):
         elif response.status == HTTP_INSUFFICIENT_STORAGE:
             raise DriveNotMounted()
         elif 200 <= response.status < 300:
-            rinfo = json.loads(response.data.decode('ascii'))
+            rinfo = json.loads(response.data)
             local_sync = broker.get_sync(rinfo['id'], incoming=False)
             if rinfo.get('metadata', ''):
                 broker.update_metadata(json.loads(rinfo['metadata']))
diff --git a/swift/common/request_helpers.py b/swift/common/request_helpers.py
index 3d0174d85a..a09ee26c15 100644
--- a/swift/common/request_helpers.py
+++ b/swift/common/request_helpers.py
@@ -56,17 +56,36 @@ def get_param(req, name, default=None):
     :param name: parameter name
     :param default: result to return if the parameter is not found
     :returns: HTTP request parameter value
-              (as UTF-8 encoded str, not unicode object)
+              (in py2, as UTF-8 encoded str, not unicode object)
     :raises HTTPBadRequest: if param not valid UTF-8 byte sequence
     """
     value = req.params.get(name, default)
-    if value and not isinstance(value, six.text_type):
-        try:
-            value.decode('utf8')    # Ensure UTF8ness
-        except UnicodeDecodeError:
-            raise HTTPBadRequest(
-                request=req, content_type='text/plain',
-                body='"%s" parameter not valid UTF-8' % name)
+    if six.PY2:
+        if value and not isinstance(value, six.text_type):
+            try:
+                value.decode('utf8')    # Ensure UTF8ness
+            except UnicodeDecodeError:
+                raise HTTPBadRequest(
+                    request=req, content_type='text/plain',
+                    body='"%s" parameter not valid UTF-8' % name)
+    else:
+        if value:
+            try:
+                if isinstance(value, six.text_type):
+                    value = value.encode('latin1')
+            except UnicodeEncodeError:
+                # This happens, remarkably enough, when WSGI string is not
+                # a valid latin-1, and passed through parse_qsl().
+                raise HTTPBadRequest(
+                    request=req, content_type='text/plain',
+                    body='"%s" parameter not valid UTF-8' % name)
+            try:
+                # Ensure UTF8ness since we're at it
+                value = value.decode('utf8')
+            except UnicodeDecodeError:
+                raise HTTPBadRequest(
+                    request=req, content_type='text/plain',
+                    body='"%s" parameter not valid UTF-8' % name)
     return value
 
 
diff --git a/swift/common/swob.py b/swift/common/swob.py
index 95c8f6417f..ffd0cf1fcf 100644
--- a/swift/common/swob.py
+++ b/swift/common/swob.py
@@ -757,14 +757,18 @@ def _req_environ_property(environ_field, is_wsgi_string_field=True):
         return self.environ.get(environ_field, None)
 
     def setter(self, value):
-        if six.PY2 and isinstance(value, six.text_type):
-            self.environ[environ_field] = value.encode('utf-8')
-        elif not six.PY2 and isinstance(value, six.binary_type):
-            self.environ[environ_field] = value.decode('latin1')
+        if six.PY2:
+            if isinstance(value, six.text_type):
+                self.environ[environ_field] = value.encode('utf-8')
+            else:
+                self.environ[environ_field] = value
         else:
-            if not six.PY2 and is_wsgi_string_field:
+            if is_wsgi_string_field:
                 # Check that input is valid before setting
-                value.encode('latin1')
+                if isinstance(value, str):
+                    value.encode('latin1').decode('utf-8')
+                if isinstance(value, bytes):
+                    value = value.decode('latin1')
             self.environ[environ_field] = value
 
     return property(getter, setter, doc=("Get and set the %s property "
@@ -948,8 +952,13 @@ class Request(object):
         "Provides QUERY_STRING parameters as a dictionary"
         if self._params_cache is None:
             if 'QUERY_STRING' in self.environ:
-                self._params_cache = dict(
-                    urllib.parse.parse_qsl(self.environ['QUERY_STRING'], True))
+                if six.PY2:
+                    self._params_cache = dict(urllib.parse.parse_qsl(
+                        self.environ['QUERY_STRING'], True))
+                else:
+                    self._params_cache = dict(urllib.parse.parse_qsl(
+                        self.environ['QUERY_STRING'],
+                        keep_blank_values=True, encoding='latin-1'))
             else:
                 self._params_cache = {}
         return self._params_cache
diff --git a/test/unit/account/test_server.py b/test/unit/account/test_server.py
index 61cef2ebe9..4c780d40e2 100644
--- a/test/unit/account/test_server.py
+++ b/test/unit/account/test_server.py
@@ -229,7 +229,7 @@ class TestAccountController(unittest.TestCase):
             req = Request.blank('/sda1/p/a/',
                                 environ={'REQUEST_METHOD': 'REPLICATE'},
                                 headers={})
-            json_string = '["rsync_then_merge", "a.db"]'
+            json_string = b'["rsync_then_merge", "a.db"]'
             inbuf = WsgiBytesIO(json_string)
             req.environ['wsgi.input'] = inbuf
             resp = req.get_response(self.controller)
@@ -244,7 +244,7 @@ class TestAccountController(unittest.TestCase):
             req = Request.blank('/sda1/p/a/',
                                 environ={'REQUEST_METHOD': 'REPLICATE'},
                                 headers={})
-            json_string = '["complete_rsync", "a.db"]'
+            json_string = b'["complete_rsync", "a.db"]'
             inbuf = WsgiBytesIO(json_string)
             req.environ['wsgi.input'] = inbuf
             resp = req.get_response(self.controller)
@@ -256,7 +256,7 @@ class TestAccountController(unittest.TestCase):
                             headers={})
 
         # check valuerror
-        wsgi_input_valuerror = '["sync" : sync, "-1"]'
+        wsgi_input_valuerror = b'["sync" : sync, "-1"]'
         inbuf1 = WsgiBytesIO(wsgi_input_valuerror)
         req.environ['wsgi.input'] = inbuf1
         resp = req.get_response(self.controller)
@@ -267,7 +267,7 @@ class TestAccountController(unittest.TestCase):
         req = Request.blank('/sda1/p/a/',
                             environ={'REQUEST_METHOD': 'REPLICATE'},
                             headers={})
-        json_string = '["unknown_sync", "a.db"]'
+        json_string = b'["unknown_sync", "a.db"]'
         inbuf = WsgiBytesIO(json_string)
         req.environ['wsgi.input'] = inbuf
         resp = req.get_response(self.controller)
@@ -281,7 +281,7 @@ class TestAccountController(unittest.TestCase):
         req = Request.blank('/sda1/p/a/',
                             environ={'REQUEST_METHOD': 'REPLICATE'},
                             headers={})
-        json_string = '["unknown_sync", "a.db"]'
+        json_string = b'["unknown_sync", "a.db"]'
         inbuf = WsgiBytesIO(json_string)
         req.environ['wsgi.input'] = inbuf
         resp = req.get_response(self.controller)
@@ -386,7 +386,7 @@ class TestAccountController(unittest.TestCase):
                             headers={'Accept': 'application/plain;q=1;q=0.5'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 400)
-        self.assertEqual(resp.body, '')
+        self.assertEqual(resp.body, b'')
 
     def test_HEAD_invalid_format(self):
         format = '%D1%BD%8A9'  # invalid UTF-8; should be %E1%BD%8A9 (E -> D)
@@ -497,7 +497,7 @@ class TestAccountController(unittest.TestCase):
                             headers={'X-Timestamp': normalize_timestamp(2)})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 403)
-        self.assertEqual(resp.body, 'Recently deleted')
+        self.assertEqual(resp.body, b'Recently deleted')
         self.assertEqual(resp.headers['X-Account-Status'], 'Deleted')
 
     def test_PUT_non_utf8_metadata(self):
@@ -885,7 +885,7 @@ class TestAccountController(unittest.TestCase):
                             headers={'Accept': 'application/plain;q=foo'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 400)
-        self.assertEqual(resp.body, 'Invalid Accept header')
+        self.assertEqual(resp.body, b'Invalid Accept header')
 
     def test_GET_over_limit(self):
         req = Request.blank(
@@ -915,7 +915,8 @@ class TestAccountController(unittest.TestCase):
         req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'GET'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c1', 'c2'])
         req = Request.blank('/sda1/p/a/c1', environ={'REQUEST_METHOD': 'PUT'},
                             headers={'X-Put-Timestamp': '1',
                                      'X-Delete-Timestamp': '0',
@@ -933,7 +934,8 @@ class TestAccountController(unittest.TestCase):
         req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'GET'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c1', 'c2'])
         self.assertEqual(resp.content_type, 'text/plain')
         self.assertEqual(resp.charset, 'utf-8')
 
@@ -942,7 +944,8 @@ class TestAccountController(unittest.TestCase):
                             environ={'REQUEST_METHOD': 'GET'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c1', 'c2'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c1', 'c2'])
         self.assertEqual(resp.content_type, 'text/plain')
         self.assertEqual(resp.charset, 'utf-8')
 
@@ -1195,12 +1198,14 @@ class TestAccountController(unittest.TestCase):
                             environ={'REQUEST_METHOD': 'GET'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c0', 'c1', 'c2'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c0', 'c1', 'c2'])
         req = Request.blank('/sda1/p/a?limit=3&marker=c2',
                             environ={'REQUEST_METHOD': 'GET'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['c3', 'c4'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['c3', 'c4'])
 
     def test_GET_limit_marker_json(self):
         req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
@@ -1341,7 +1346,7 @@ class TestAccountController(unittest.TestCase):
         req.accept = '*/*'
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body, 'c1\n')
+        self.assertEqual(resp.body, b'c1\n')
 
     def test_GET_accept_application_wildcard(self):
         req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
@@ -1416,7 +1421,7 @@ class TestAccountController(unittest.TestCase):
         req.accept = 'application/json'
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body, 'c1\n')
+        self.assertEqual(resp.body, b'c1\n')
 
     def test_GET_accept_not_valid(self):
         req = Request.blank('/sda1/p/a', environ={'REQUEST_METHOD': 'PUT',
@@ -1469,19 +1474,20 @@ class TestAccountController(unittest.TestCase):
                             environ={'REQUEST_METHOD': 'GET'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'), ['sub.'])
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
+                         ['sub.'])
         req = Request.blank('/sda1/p/a?prefix=sub.&delimiter=.',
                             environ={'REQUEST_METHOD': 'GET'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
         self.assertEqual(
-            resp.body.strip().split('\n'),
+            resp.body.decode('utf-8').strip().split('\n'),
             ['sub.0', 'sub.0.', 'sub.1', 'sub.1.', 'sub.2', 'sub.2.'])
         req = Request.blank('/sda1/p/a?prefix=sub.1.&delimiter=.',
                             environ={'REQUEST_METHOD': 'GET'})
         resp = req.get_response(self.controller)
         self.assertEqual(resp.status_int, 200)
-        self.assertEqual(resp.body.strip().split('\n'),
+        self.assertEqual(resp.body.decode('utf-8').strip().split('\n'),
                          ['sub.1.0', 'sub.1.1', 'sub.1.2'])
 
     def test_GET_prefix_delimiter_json(self):
@@ -1609,7 +1615,7 @@ class TestAccountController(unittest.TestCase):
         outbuf = StringIO()
 
         def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         self.controller.__call__({'REQUEST_METHOD': 'GET',
                                   'SCRIPT_NAME': '',
@@ -1635,7 +1641,7 @@ class TestAccountController(unittest.TestCase):
         outbuf = StringIO()
 
         def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         self.controller.__call__({'REQUEST_METHOD': 'GET',
                                   'SCRIPT_NAME': '',
@@ -1661,7 +1667,7 @@ class TestAccountController(unittest.TestCase):
         outbuf = StringIO()
 
         def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         self.controller.__call__({'REQUEST_METHOD': 'GET',
                                   'SCRIPT_NAME': '',
@@ -1686,7 +1692,7 @@ class TestAccountController(unittest.TestCase):
         outbuf = StringIO()
 
         def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         self.controller.__call__({'REQUEST_METHOD': 'method_doesnt_exist',
                                   'PATH_INFO': '/sda1/p/a'},
@@ -1699,7 +1705,7 @@ class TestAccountController(unittest.TestCase):
         outbuf = StringIO()
 
         def start_response(*args):
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         self.controller.__call__({'REQUEST_METHOD': '__init__',
                                   'PATH_INFO': '/sda1/p/a'},
@@ -1837,8 +1843,7 @@ class TestAccountController(unittest.TestCase):
              'replication_server': 'false'})
 
         def start_response(*args):
-            """Sends args to outbuf"""
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         method = 'PUT'
         env = {'REQUEST_METHOD': method,
@@ -1875,8 +1880,7 @@ class TestAccountController(unittest.TestCase):
              'replication_server': 'false'})
 
         def start_response(*args):
-            """Sends args to outbuf"""
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         method = 'PUT'
         env = {'REQUEST_METHOD': method,
@@ -1894,8 +1898,8 @@ class TestAccountController(unittest.TestCase):
                'wsgi.multiprocess': False,
                'wsgi.run_once': False}
 
-        answer = ['<html><h1>Method Not Allowed</h1><p>The method is not '
-                  'allowed for this resource.</p></html>']
+        answer = [b'<html><h1>Method Not Allowed</h1><p>The method is not '
+                  b'allowed for this resource.</p></html>']
         mock_method = replication(public(lambda x: mock.MagicMock()))
         with mock.patch.object(self.controller, method,
                                new=mock_method):
@@ -1912,8 +1916,7 @@ class TestAccountController(unittest.TestCase):
              'replication_server': 'true'})
 
         def start_response(*args):
-            """Sends args to outbuf"""
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         obj_methods = ['DELETE', 'PUT', 'HEAD', 'GET', 'POST', 'OPTIONS']
         for method in obj_methods:
@@ -1946,8 +1949,7 @@ class TestAccountController(unittest.TestCase):
             logger=self.logger)
 
         def start_response(*args):
-            # Sends args to outbuf
-            outbuf.writelines(args)
+            outbuf.write(args[0])
 
         method = 'PUT'
 
@@ -1973,7 +1975,7 @@ class TestAccountController(unittest.TestCase):
         with mock.patch.object(self.account_controller, method,
                                new=mock_put_method):
             response = self.account_controller.__call__(env, start_response)
-            self.assertTrue(response[0].startswith(
+            self.assertTrue(response[0].decode('ascii').startswith(
                 'Traceback (most recent call last):'))
             self.assertEqual(self.logger.get_lines_for_level('error'), [
                 'ERROR __call__ error with %(method)s %(path)s : ' % {
diff --git a/tox.ini b/tox.ini
index eeec59ba2f..4f695b420b 100644
--- a/tox.ini
+++ b/tox.ini
@@ -29,11 +29,7 @@ setenv = VIRTUAL_ENV={envdir}
 [testenv:py35]
 commands =
   nosetests {posargs:\
-      test/unit/account/test_auditor.py \
-      test/unit/account/test_backend.py \
-      test/unit/account/test_reaper.py \
-      test/unit/account/test_replicator.py \
-      test/unit/account/test_utils.py \
+      test/unit/account \
       test/unit/cli/test_dispersion_report.py \
       test/unit/cli/test_form_signature.py \
       test/unit/cli/test_info.py \