Handle non-ASCII characters in S3 v2 listings.

When computing the base-64 encoded continuation token, s3Api should
UTF-8 encode the object names.

Change-Id: I3f3edc17e05e7c1e7c6afec66973179e51c7d9d8
This commit is contained in:
Timur Alperovich 2018-09-26 19:02:34 -07:00 committed by Tim Burke
parent c1c65a7e9f
commit 370f7d7a37
4 changed files with 68 additions and 65 deletions

View File

@ -67,7 +67,7 @@ class BucketController(Controller):
for seg in segments:
try:
req.get_response(self.app, 'DELETE', container,
seg['name'])
seg['name'].encode('utf8'))
except NoSuchKey:
pass
except InternalError:
@ -183,10 +183,10 @@ class BucketController(Controller):
if is_truncated:
if 'name' in objects[-1]:
SubElement(elem, 'NextContinuationToken').text = \
b64encode(objects[-1]['name'])
b64encode(objects[-1]['name'].encode('utf8'))
if 'subdir' in objects[-1]:
SubElement(elem, 'NextContinuationToken').text = \
b64encode(objects[-1]['subdir'])
b64encode(objects[-1]['subdir'].encode('utf8'))
if 'continuation-token' in req.params:
SubElement(elem, 'ContinuationToken').text = \
req.params['continuation-token']
@ -210,7 +210,7 @@ class BucketController(Controller):
if 'subdir' not in o:
name = o['name']
if encoding_type == 'url':
name = quote(name)
name = quote(name.encode('utf-8'))
if listing_type == 'object-versions':
contents = SubElement(elem, 'Version')
@ -240,7 +240,7 @@ class BucketController(Controller):
common_prefixes = SubElement(elem, 'CommonPrefixes')
name = o['subdir']
if encoding_type == 'url':
name = quote(name)
name = quote(name.encode('utf-8'))
SubElement(common_prefixes, 'Prefix').text = name
body = tostring(elem)

View File

@ -17,6 +17,8 @@ import os
import test.functional as tf
from boto.s3.connection import S3Connection, OrdinaryCallingFormat, \
BotoClientError, S3ResponseError
import six
RETRY_COUNT = 3
@ -75,6 +77,9 @@ class Connection(object):
break
for bucket in buckets:
if not isinstance(bucket.name, six.binary_type):
bucket.name = bucket.name.encode('utf-8')
try:
for upload in bucket.list_multipart_uploads():
upload.cancel_upload()

View File

@ -406,63 +406,60 @@ class TestS3ApiBucket(S3ApiBase):
self.assertTrue(o.find('Owner/DisplayName').text,
self.conn.user_id)
def test_get_bucket_v2_with_continuation_token(self):
def test_get_bucket_v2_with_continuation_token_and_delimiter(self):
bucket = 'bucket'
put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object',
'dir/subdir/object')
put_objects = ('object', u'object2-\u062a', 'subdir/object',
u'subdir2-\u062a/object', 'dir/subdir/object',
'x', 'y', 'z')
self._prepare_test_get_bucket(bucket, put_objects)
query = 'list-type=2&max-keys=3'
expect_objects = ('dir/subdir/object', 'object', 'object2')
status, headers, body = \
self.conn.make_request('GET', bucket, query=query)
self.assertEqual(status, 200)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('MaxKeys').text, '3')
self.assertEqual(elem.find('KeyCount').text, '3')
self.assertEqual(elem.find('IsTruncated').text, 'true')
next_cont_token_elem = elem.find('NextContinuationToken')
self.assertIsNotNone(next_cont_token_elem)
resp_objects = elem.findall('./Contents')
self.assertEqual(len(list(resp_objects)), len(expect_objects))
for i, o in enumerate(resp_objects):
self.assertEqual(o.find('Key').text, expect_objects[i])
self.assertTrue(o.find('LastModified').text is not None)
self.assertRegexpMatches(
o.find('LastModified').text,
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$')
self.assertTrue(o.find('ETag').text is not None)
self.assertTrue(o.find('Size').text is not None)
self.assertEqual(o.find('StorageClass').text, 'STANDARD')
self.assertIsNone(o.find('Owner/ID'))
self.assertIsNone(o.find('Owner/DisplayName'))
expected = [{'objects': ['object', u'object2-\u062a'],
'subdirs': ['dir/']},
{'objects': ['x'],
'subdirs': ['subdir/', u'subdir2-\u062a/']},
{'objects': ['y', 'z'],
'subdirs': []}]
query = 'list-type=2&max-keys=3&continuation-token=%s' % \
next_cont_token_elem.text
expect_objects = ('subdir/object', 'subdir2/object')
status, headers, body = \
self.conn.make_request('GET', bucket, query=query)
self.assertEqual(status, 200)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('MaxKeys').text, '3')
self.assertEqual(elem.find('KeyCount').text, '2')
self.assertEqual(elem.find('IsTruncated').text, 'false')
self.assertIsNone(elem.find('NextContinuationToken'))
cont_token_elem = elem.find('ContinuationToken')
self.assertEqual(cont_token_elem.text, next_cont_token_elem.text)
resp_objects = elem.findall('./Contents')
self.assertEqual(len(list(resp_objects)), len(expect_objects))
for i, o in enumerate(resp_objects):
self.assertEqual(o.find('Key').text, expect_objects[i])
self.assertTrue(o.find('LastModified').text is not None)
self.assertRegexpMatches(
o.find('LastModified').text,
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$')
self.assertTrue(o.find('ETag').text is not None)
self.assertTrue(o.find('Size').text is not None)
self.assertEqual(o.find('StorageClass').text, 'STANDARD')
self.assertIsNone(o.find('Owner/ID'))
self.assertIsNone(o.find('Owner/DisplayName'))
continuation_token = ''
query = 'list-type=2&max-keys=3&delimiter=/&continuation-token=%s'
for i in range(len(expected)):
status, headers, body = self.conn.make_request(
'GET', bucket, query=query % continuation_token)
self.assertEqual(status, 200)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('MaxKeys').text, '3')
self.assertEqual(
elem.find('KeyCount').text,
str(len(expected[i]['objects']) + len(expected[i]['subdirs'])))
expect_truncated = 'true' if i < len(expected) - 1 else 'false'
self.assertEqual(elem.find('IsTruncated').text, expect_truncated)
next_cont_token_elem = elem.find('NextContinuationToken')
if expect_truncated == 'true':
self.assertIsNotNone(next_cont_token_elem)
continuation_token = next_cont_token_elem.text
resp_objects = elem.findall('./Contents')
self.assertEqual(
len(list(resp_objects)), len(expected[i]['objects']))
for j, o in enumerate(resp_objects):
self.assertEqual(o.find('Key').text,
expected[i]['objects'][j].encode('utf-8'))
self.assertTrue(o.find('LastModified').text is not None)
self.assertRegexpMatches(
o.find('LastModified').text,
r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$')
self.assertTrue(o.find('ETag').text is not None)
self.assertTrue(o.find('Size').text is not None)
self.assertEqual(o.find('StorageClass').text, 'STANDARD')
self.assertIsNone(o.find('Owner/ID'))
self.assertIsNone(o.find('Owner/DisplayName'))
resp_subdirs = elem.findall('./CommonPrefixes')
self.assertEqual(
len(list(resp_subdirs)), len(expected[i]['subdirs']))
for j, o in enumerate(resp_subdirs):
self.assertEqual(
o.find('Prefix').text,
expected[i]['subdirs'][j].encode('utf-8'))
def test_head_bucket_error(self):
self.conn.make_request('PUT', 'bucket')

View File

@ -39,13 +39,14 @@ class TestS3ApiBucket(S3ApiTestCase):
self.objects = (('lily', '2011-01-05T02:19:14.275290', '0', '3909'),
('rose', '2011-01-05T02:19:14.275290', 0, 303),
('viola', '2011-01-05T02:19:14.275290', '0', 3909),
(u'lily-\u062a', '2011-01-05T02:19:14.275290', 0, 390),
('mu', '2011-01-05T02:19:14.275290',
'md5-of-the-manifest; s3_etag=0', '3909'),
('with space', '2011-01-05T02:19:14.275290', 0, 390),
('with%20space', '2011-01-05T02:19:14.275290', 0, 390))
objects = [
{'name': str(item[0]), 'last_modified': str(item[1]),
{'name': item[0], 'last_modified': str(item[1]),
'hash': str(item[2]), 'bytes': str(item[3])}
for item in self.objects]
object_list = json.dumps(objects)
@ -57,7 +58,8 @@ class TestS3ApiBucket(S3ApiTestCase):
swob.HTTPNoContent, {}, json.dumps([]))
for name, _, _, _ in self.objects:
self.swift.register(
'DELETE', '/v1/AUTH_test/bucket+segments/' + name,
'DELETE',
'/v1/AUTH_test/bucket+segments/' + name.encode('utf-8'),
swob.HTTPNoContent, {}, json.dumps([]))
self.swift.register(
'GET',
@ -164,9 +166,8 @@ class TestS3ApiBucket(S3ApiTestCase):
o.find('./LastModified').text)
self.assertEqual('"0"', o.find('./ETag').text)
self.assertEqual(len(names), len(self.objects))
for i in self.objects:
self.assertIn(i[0], names)
self.assertEqual(
names, [obj[0].encode('utf-8') for obj in self.objects])
def test_bucket_GET_url_encoded(self):
bucket_name = 'junk'
@ -192,7 +193,7 @@ class TestS3ApiBucket(S3ApiTestCase):
self.assertEqual(len(names), len(self.objects))
for i in self.objects:
self.assertIn(quote(i[0]), names)
self.assertIn(quote(i[0].encode('utf-8')), names)
def test_bucket_GET_subdir(self):
bucket_name = 'junk-subdir'
@ -518,7 +519,7 @@ class TestS3ApiBucket(S3ApiTestCase):
versions = elem.findall('./Version')
objects = list(self.objects)
self.assertEqual([v.find('./Key').text for v in versions],
[v[0] for v in objects])
[v[0].encode('utf-8') for v in objects])
self.assertEqual([v.find('./IsLatest').text for v in versions],
['true' for v in objects])
self.assertEqual([v.find('./VersionId').text for v in versions],