From fcccb52ff492f59bc26cc650761a3597216dd143 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Mon, 15 Oct 2018 15:44:03 -0700 Subject: [PATCH] s3api: Stop relying on container listings during multipart uploads Previously, we would list the segments container before completing a multipart upload so that we could verify ETags and sizes before attempting to create the SLO. However, container listings are only eventually-consistent, which meant that clients could receive a 400 response complaining that parts could not be found, even though all parts were uploaded successfully. Now, use the new SLO validator callback to validate segment sizes, and use the existing SLO checks to validate ETags. Change-Id: I57ae6756bd5f06b80cf03a6b40bf58c845f710fe Closes-Bug: #1636663 --- .../s3api/controllers/multi_upload.py | 70 +++---- test/functional/s3api/test_multi_upload.py | 8 +- .../middleware/s3api/test_multi_upload.py | 171 ++++++++++-------- 3 files changed, 133 insertions(+), 116 deletions(-) diff --git a/swift/common/middleware/s3api/controllers/multi_upload.py b/swift/common/middleware/s3api/controllers/multi_upload.py index 5066526f22..54b1eabb88 100644 --- a/swift/common/middleware/s3api/controllers/multi_upload.py +++ b/swift/common/middleware/s3api/controllers/multi_upload.py @@ -62,6 +62,7 @@ Static Large Object when the multipart upload is completed. from hashlib import md5 import os import re +import time from swift.common.swob import Range from swift.common.utils import json, public, reiterate @@ -562,21 +563,7 @@ class UploadController(Controller): if content_type: headers['Content-Type'] = content_type - # Query for the objects in the segments area to make sure it completed - query = { - 'format': 'json', - 'prefix': '%s/%s/' % (req.object_name, upload_id), - 'delimiter': '/' - } - container = req.container_name + MULTIUPLOAD_SUFFIX - resp = req.get_response(self.app, 'GET', container, '', query=query) - objinfo = json.loads(resp.body) - objtable = dict((o['name'], - {'path': '/'.join(['', container, o['name']]), - 'etag': o['hash'], - 'size_bytes': o['bytes']}) for o in objinfo) - s3_etag_hasher = md5() manifest = [] previous_number = 0 @@ -598,16 +585,16 @@ class UploadController(Controller): if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': # strip double quotes etag = etag[1:-1] - - info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, - part_number)) - if info is None or info['etag'] != etag: + if len(etag) != 32 or any(c not in '0123456789abcdef' + for c in etag): raise InvalidPart(upload_id=upload_id, part_number=part_number) + manifest.append({ + 'path': '/%s/%s/%s/%d' % ( + container, req.object_name, upload_id, part_number), + 'etag': etag}) s3_etag_hasher.update(etag.decode('hex')) - info['size_bytes'] = int(info['size_bytes']) - manifest.append(info) except (XMLSyntaxError, DocumentInvalid): # NB: our schema definitions catch uploads with no parts here raise MalformedXML() @@ -623,11 +610,21 @@ class UploadController(Controller): c_etag = '; s3_etag=%s' % s3_etag headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag - # Check the size of each segment except the last and make sure they are - # all more than the minimum upload chunk size - for info in manifest[:-1]: - if info['size_bytes'] < self.conf.min_segment_size: - raise EntityTooSmall() + too_small_message = ('s3api requires that each segment be at least ' + '%d bytes' % self.conf.min_segment_size) + + def size_checker(manifest): + # Check the size of each segment except the last and make sure + # they are all more than the minimum upload chunk size. + # Note that we need to use the *internal* keys, since we're + # looking at the manifest that's about to be written. + return [ + (item['name'], too_small_message) + for item in manifest[:-1] + if item and item['bytes'] < self.conf.min_segment_size] + + req.environ['swift.callback.slo_manifest_hook'] = size_checker + start_time = time.time() def response_iter(): # NB: XML requires that the XML declaration, if present, be at the @@ -650,27 +647,36 @@ class UploadController(Controller): put_resp.fix_conditional_response() for chunk in put_resp.response_iter: if not chunk.strip(): + if time.time() - start_time < 10: + # Include some grace period to keep + # ceph-s3tests happy + continue if not yielded_anything: yield ('\n') yielded_anything = True yield chunk + continue body.append(chunk) - body = json.loads(''.join(body)) + body = json.loads(b''.join(body)) if body['Response Status'] != '201 Created': + for seg, err in body['Errors']: + if err == too_small_message: + raise EntityTooSmall() + elif err in ('Etag Mismatch', '404 Not Found'): + raise InvalidPart(upload_id=upload_id) raise InvalidRequest( status=body['Response Status'], msg='\n'.join(': '.join(err) for err in body['Errors'])) except BadSwiftRequest as e: msg = str(e) - expected_msg = ('too small; each segment must be ' - 'at least 1 byte') - if expected_msg in msg: - # FIXME: AWS S3 allows a smaller object than 5 MB if - # there is only one part. Use a COPY request to copy - # the part object from the segments container instead. + if too_small_message in msg: raise EntityTooSmall(msg) + elif ', Etag Mismatch' in msg: + raise InvalidPart(upload_id=upload_id) + elif ', 404 Not Found' in msg: + raise InvalidPart(upload_id=upload_id) else: raise diff --git a/test/functional/s3api/test_multi_upload.py b/test/functional/s3api/test_multi_upload.py index f638181b2a..15f4b84401 100644 --- a/test/functional/s3api/test_multi_upload.py +++ b/test/functional/s3api/test_multi_upload.py @@ -52,11 +52,11 @@ class TestS3ApiMultiUpload(S3ApiBase): self.min_segment_size = int(tf.cluster_info['s3api'].get( 'min_segment_size', 5242880)) - def _gen_comp_xml(self, etags): + def _gen_comp_xml(self, etags, step=1): elem = Element('CompleteMultipartUpload') for i, etag in enumerate(etags): elem_part = SubElement(elem, 'Part') - SubElement(elem_part, 'PartNumber').text = str(i + 1) + SubElement(elem_part, 'PartNumber').text = str(i * step + 1) SubElement(elem_part, 'ETag').text = etag return tostring(elem) @@ -731,13 +731,13 @@ class TestS3ApiMultiUpload(S3ApiBase): etags = [] for i in range(1, 4): - query = 'partNumber=%s&uploadId=%s' % (i, upload_id) + query = 'partNumber=%s&uploadId=%s' % (2 * i - 1, upload_id) status, headers, body = \ self.conn.make_request('PUT', bucket, key, body='A' * 1024 * 1024 * 5, query=query) etags.append(headers['etag']) query = 'uploadId=%s' % upload_id - xml = self._gen_comp_xml(etags[:-1]) + xml = self._gen_comp_xml(etags[:-1], step=2) status, headers, body = \ self.conn.make_request('POST', bucket, key, body=xml, query=query) diff --git a/test/unit/common/middleware/s3api/test_multi_upload.py b/test/unit/common/middleware/s3api/test_multi_upload.py index 819de41801..89b7669e05 100644 --- a/test/unit/common/middleware/s3api/test_multi_upload.py +++ b/test/unit/common/middleware/s3api/test_multi_upload.py @@ -38,11 +38,11 @@ from swift.common.middleware.s3api.s3request import MAX_32BIT_INT xml = '' \ '' \ '1' \ - '0123456789abcdef' \ + '0123456789abcdef0123456789abcdef' \ '' \ '' \ '2' \ - '"fedcba9876543210"' \ + '"fedcba9876543210fedcba9876543210"' \ '' \ '' @@ -66,8 +66,9 @@ multiparts_template = \ ('subdir/object/Z/2', '2014-05-07T19:47:58.592270', 'fedcba9876543210', 41)) -s3_etag = '"%s-2"' % hashlib.md5( - '0123456789abcdeffedcba9876543210'.decode('hex')).hexdigest() +s3_etag = '"%s-2"' % hashlib.md5(( + '0123456789abcdef0123456789abcdef' + 'fedcba9876543210fedcba9876543210').decode('hex')).hexdigest() class TestS3ApiMultiUpload(S3ApiTestCase): @@ -682,9 +683,6 @@ class TestS3ApiMultiUpload(S3ApiTestCase): ('HEAD', '/v1/AUTH_test/bucket'), # Segment container exists ('HEAD', '/v1/AUTH_test/bucket+segments/object/X'), - # Get the currently-uploaded segments - ('GET', '/v1/AUTH_test/bucket+segments?delimiter=/' - '&format=json&prefix=object/X/'), # Create the SLO ('PUT', '/v1/AUTH_test/bucket/object' '?heartbeat=on&multipart-manifest=put'), @@ -700,7 +698,8 @@ class TestS3ApiMultiUpload(S3ApiTestCase): h = 'X-Object-Sysmeta-Container-Update-Override-Etag' self.assertEqual(headers.get(h), override_etag) - def test_object_multipart_upload_complete_with_heartbeat(self): + @patch('swift.common.middleware.s3api.controllers.multi_upload.time') + def test_object_multipart_upload_complete_with_heartbeat(self, mock_time): self.swift.register( 'HEAD', '/v1/AUTH_test/bucket+segments/heartbeat-ok/X', swob.HTTPOk, {}, None) @@ -718,6 +717,13 @@ class TestS3ApiMultiUpload(S3ApiTestCase): 'Response Status': '201 Created', 'Errors': [], })]) + mock_time.return_value.time.side_effect = ( + 1, # start_time + 12, # first whitespace + 13, # second... + 14, # third... + 15, # JSON body + ) self.swift.register( 'DELETE', '/v1/AUTH_test/bucket+segments/heartbeat-ok/X', swob.HTTPNoContent, {}, None) @@ -739,14 +745,63 @@ class TestS3ApiMultiUpload(S3ApiTestCase): self.assertEqual(self.swift.calls, [ ('HEAD', '/v1/AUTH_test/bucket'), ('HEAD', '/v1/AUTH_test/bucket+segments/heartbeat-ok/X'), - ('GET', '/v1/AUTH_test/bucket+segments?' - 'delimiter=/&format=json&prefix=heartbeat-ok/X/'), ('PUT', '/v1/AUTH_test/bucket/heartbeat-ok?' 'heartbeat=on&multipart-manifest=put'), ('DELETE', '/v1/AUTH_test/bucket+segments/heartbeat-ok/X'), ]) - def test_object_multipart_upload_complete_failure_with_heartbeat(self): + @patch('swift.common.middleware.s3api.controllers.multi_upload.time') + def test_object_multipart_upload_complete_failure_with_heartbeat( + self, mock_time): + self.swift.register( + 'HEAD', '/v1/AUTH_test/bucket+segments/heartbeat-fail/X', + swob.HTTPOk, {}, None) + self.swift.register( + 'GET', '/v1/AUTH_test/bucket+segments', swob.HTTPOk, {}, + json.dumps([ + {'name': item[0].replace('object', 'heartbeat-fail'), + 'last_modified': item[1], 'hash': item[2], 'bytes': item[3]} + for item in objects_template + ])) + self.swift.register( + 'PUT', '/v1/AUTH_test/bucket/heartbeat-fail', + swob.HTTPAccepted, {}, [' ', ' ', ' ', json.dumps({ + 'Response Status': '400 Bad Request', + 'Errors': [['some/object', '403 Forbidden']], + })]) + mock_time.return_value.time.side_effect = ( + 1, # start_time + 12, # first whitespace + 13, # second... + 14, # third... + 15, # JSON body + ) + + req = Request.blank('/bucket/heartbeat-fail?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + lines = body.split('\n') + self.assertTrue(lines[0].startswith('' \ - '' \ - '1' \ - 'd41d8cd98f00b204e9800998ecf8427e' \ - '' \ - '' \ - '2' \ - 'd41d8cd98f00b204e9800998ecf8427e' \ - '' \ - '' - - req = Request.blank('/empty-bucket/object?uploadId=X', - environ={'REQUEST_METHOD': 'POST'}, - headers={'Authorization': 'AWS test:tester:hmac', - 'Date': self.get_date_header(), }, - body=xml) - status, headers, body = self.call_s3api(req) - self.assertEqual(self._get_error_code(body), 'EntityTooSmall') - self.assertEqual(status.split()[0], '400') - - self.assertEqual(self.swift.calls, [ - ('HEAD', '/v1/AUTH_test/empty-bucket'), - ('HEAD', '/v1/AUTH_test/empty-bucket+segments/object/X'), - ('GET', '/v1/AUTH_test/empty-bucket+segments?delimiter=/&' - 'format=json&prefix=object/X/'), - ]) - def test_object_multipart_upload_complete_zero_length_final_segment(self): segment_bucket = '/v1/AUTH_test/bucket+segments' @@ -1096,8 +1109,6 @@ class TestS3ApiMultiUpload(S3ApiTestCase): self.assertEqual(self.swift.calls, [ ('HEAD', '/v1/AUTH_test/bucket'), ('HEAD', '/v1/AUTH_test/bucket+segments/object/X'), - ('GET', '/v1/AUTH_test/bucket+segments?delimiter=/&' - 'format=json&prefix=object/X/'), ('PUT', '/v1/AUTH_test/bucket/object?' 'heartbeat=on&multipart-manifest=put'), ('DELETE', '/v1/AUTH_test/bucket+segments/object/X'),