From 52ecbf95399a7e4c0d062af8d885ef7ce684eda4 Mon Sep 17 00:00:00 2001 From: Clay Gerrard Date: Thu, 13 Sep 2018 11:10:52 -0500 Subject: [PATCH] Add a chunks_finished to BaseDiskFileWriter BaseDiskFileWriter will track md5 and expose upload_size, etag via the chunks_finished method. The BaseDiskFileReader already tracks the md5/etag via _iter_etag, for parity we add a _chunks_etag to the BaseDiskFileReader. Instead of returning the upload_size and hexdigest every call to write, we return the tuple from chunks_finished. Change-Id: I26c58719cff5fde941d0248c250a0204e0379ae5 --- swift/obj/diskfile.py | 26 +++++++++++++++----------- swift/obj/mem_diskfile.py | 11 ++++++++++- swift/obj/server.py | 8 ++------ test/unit/obj/test_diskfile.py | 6 ++---- test/unit/proxy/test_server.py | 3 ++- 5 files changed, 31 insertions(+), 23 deletions(-) diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 59ac10bf63..e7fc2fed02 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -39,7 +39,7 @@ import os import re import time import uuid -import hashlib +from hashlib import md5 import logging import traceback import xattr @@ -194,14 +194,14 @@ def read_metadata(fd, add_missing_checksum=False): # exist. This is fine; it just means that this object predates the # introduction of metadata checksums. if add_missing_checksum: - new_checksum = hashlib.md5(metadata).hexdigest() + new_checksum = md5(metadata).hexdigest() try: xattr.setxattr(fd, METADATA_CHECKSUM_KEY, new_checksum) except (IOError, OSError) as e: logging.error("Error adding metadata: %s" % e) if metadata_checksum: - computed_checksum = hashlib.md5(metadata).hexdigest().encode('ascii') + computed_checksum = md5(metadata).hexdigest().encode('ascii') if metadata_checksum != computed_checksum: raise DiskFileBadMetadataChecksum( "Metadata checksum mismatch for %s: " @@ -226,7 +226,7 @@ def write_metadata(fd, metadata, xattr_size=65536): :param metadata: metadata to write """ metastr = pickle.dumps(_encode_metadata(metadata), PICKLE_PROTOCOL) - metastr_md5 = hashlib.md5(metastr).hexdigest().encode('ascii') + metastr_md5 = md5(metastr).hexdigest().encode('ascii') key = 0 try: while metastr: @@ -1084,7 +1084,7 @@ class BaseDiskFileManager(object): :param path: full path to directory """ - hashes = defaultdict(hashlib.md5) + hashes = defaultdict(md5) try: path_contents = sorted(os.listdir(path)) except OSError as err: @@ -1626,6 +1626,7 @@ class BaseDiskFileWriter(object): self._fd = None self._tmppath = None self._size = size + self._chunks_etag = md5() self._bytes_per_sync = bytes_per_sync self._diskfile = diskfile self.next_part_power = next_part_power @@ -1716,13 +1717,10 @@ class BaseDiskFileWriter(object): For this implementation, the data is written into a temporary file. :param chunk: the chunk of data to write as a string object - - :returns: the total number of bytes written to an object """ - if not self._fd: raise ValueError('Writer is not open') - + self._chunks_etag.update(chunk) while chunk: written = os.write(self._fd, chunk) self._upload_size += written @@ -1735,7 +1733,13 @@ class BaseDiskFileWriter(object): drop_buffer_cache(self._fd, self._last_sync, diff) self._last_sync = self._upload_size - return self._upload_size + def chunks_finished(self): + """ + Expose internal stats about written chunks. + + :returns: a tuple, (upload_size, etag) + """ + return self._upload_size, self._chunks_etag.hexdigest() def _finalize_put(self, metadata, target_path, cleanup): # Write the metadata before calling fsync() so that both data and @@ -1940,7 +1944,7 @@ class BaseDiskFileReader(object): def _init_checks(self): if self._fp.tell() == 0: self._started_at_0 = True - self._iter_etag = hashlib.md5() + self._iter_etag = md5() def _update_checks(self, chunk): if self._iter_etag: diff --git a/swift/obj/mem_diskfile.py b/swift/obj/mem_diskfile.py index 3de73790f2..db80fac5b1 100644 --- a/swift/obj/mem_diskfile.py +++ b/swift/obj/mem_diskfile.py @@ -103,6 +103,7 @@ class DiskFileWriter(object): self._name = name self._fp = None self._upload_size = 0 + self._chunks_etag = hashlib.md5() def open(self): """ @@ -130,7 +131,15 @@ class DiskFileWriter(object): """ self._fp.write(chunk) self._upload_size += len(chunk) - return self._upload_size + self._chunks_etag.update(chunk) + + def chunks_finished(self): + """ + Expose internal stats about written chunks. + + :returns: a tuple, (upload_size, etag) + """ + return self._upload_size, self._chunks_etag.hexdigest() def put(self, metadata): """ diff --git a/swift/obj/server.py b/swift/obj/server.py index 7bc001c9c4..1d9f7390b5 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -796,12 +796,9 @@ class ObjectController(BaseStorageServer): headers={'X-Backend-Timestamp': orig_timestamp.internal}) orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) upload_expiration = time.time() + self.max_upload_time - etag = md5() elapsed_time = 0 try: with disk_file.create(size=fsize) as writer: - upload_size = 0 - # If the proxy wants to send us object metadata after the # object body, it sets some headers. We have to tell the # proxy, in the 100 Continue response, that we're able to @@ -853,13 +850,13 @@ class ObjectController(BaseStorageServer): if start_time > upload_expiration: self.logger.increment('PUT.timeouts') return HTTPRequestTimeout(request=request) - etag.update(chunk) - upload_size = writer.write(chunk) + writer.write(chunk) elapsed_time += time.time() - start_time except ChunkReadError: return HTTPClientDisconnect(request=request) except ChunkReadTimeout: return HTTPRequestTimeout(request=request) + upload_size, etag = writer.chunks_finished() if upload_size: self.logger.transfer_rate( 'PUT.' + device + '.timing', elapsed_time, @@ -874,7 +871,6 @@ class ObjectController(BaseStorageServer): request_etag = (footer_meta.get('etag') or request.headers.get('etag', '')).lower() - etag = etag.hexdigest() if request_etag and request_etag != etag: return HTTPUnprocessableEntity(request=request) metadata = { diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index efad2b9eba..7d029946a4 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -3571,7 +3571,6 @@ class DiskFileMixin(BaseDiskFileTestMixin): except IndexError: data = archives[0] - etag = md5() if ts: timestamp = Timestamp(ts) else: @@ -3582,9 +3581,8 @@ class DiskFileMixin(BaseDiskFileTestMixin): prealloc_size = None with df.create(size=prealloc_size) as writer: - upload_size = writer.write(data) - etag.update(data) - etag = etag.hexdigest() + writer.write(data) + upload_size, etag = writer.chunks_finished() metadata = { 'ETag': etag, 'X-Timestamp': timestamp.internal, diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index f53f87c858..2a423e2187 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -5863,7 +5863,8 @@ class BaseTestECObjectController(BaseTestObjectController): prolis = _test_sockets[0] prosrv = _test_servers[0] sock = connect_tcp(('localhost', prolis.getsockname()[1])) - with mock.patch('swift.obj.server.md5', busted_md5_constructor): + with mock.patch('swift.obj.diskfile.md5', + busted_md5_constructor): fd = sock.makefile() fd.write('PUT /v1/a/%s/pimento HTTP/1.1\r\n' 'Host: localhost\r\n'