Improve ChunkedBackupDriver hashlib calls
Currently we have 2 hashlib calls withing the ChunkedBackupDriver, one to calculate the MD5 of the chunk and another to calculate the SHA256 of the blocks within each chunk. This patch improve interactions between cinder and the hashlib library method calls by making sure MD5 and SHA256 related calls are execute in a native thread to improve context switching responsiveness within eventlet. The MD5 of a 1GB chunk could take around 4 seconds, so the overhead of creating a native thread is acceptable, and for the SHA256 instead of creating a thread for each call we create a single thread to do the calculations of all the blocks, thus making it cost effective. Current code slices the data into blocks, which means that the data is being copied, but this has now been switched to a memoryview object to take advantage of the buffer protocol so copying of data is no longer necesary. Change-Id: Ifb65b8008f30bc9cc4b6cd9b867a726ec4ed4707
This commit is contained in:
parent
015b105399
commit
671b02b504
@ -372,7 +372,7 @@ class ChunkedBackupDriver(driver.BackupDriver):
|
||||
container, object_name, extra_metadata=extra_metadata
|
||||
) as writer:
|
||||
writer.write(output_data)
|
||||
md5 = hashlib.md5(data).hexdigest()
|
||||
md5 = eventlet.tpool.execute(hashlib.md5, data).hexdigest()
|
||||
obj[object_name]['md5'] = md5
|
||||
LOG.debug('backup MD5 for %(object_name)s: %(md5)s',
|
||||
{'object_name': object_name, 'md5': md5})
|
||||
@ -470,6 +470,25 @@ class ChunkedBackupDriver(driver.BackupDriver):
|
||||
disk_path)
|
||||
return win32_diskutils.get_disk_size(disk_number)
|
||||
|
||||
def _calculate_sha(self, data):
|
||||
"""Calculate SHA256 of a data chunk.
|
||||
|
||||
This method cannot log anything as it is called on a native thread.
|
||||
"""
|
||||
# NOTE(geguileo): Using memoryview to avoid data copying when slicing
|
||||
# for the sha256 call.
|
||||
chunk = memoryview(data)
|
||||
shalist = []
|
||||
off = 0
|
||||
datalen = len(chunk)
|
||||
while off < datalen:
|
||||
chunk_end = min(datalen, off + self.sha_block_size_bytes)
|
||||
block = chunk[off:chunk_end]
|
||||
sha = hashlib.sha256(block).hexdigest()
|
||||
shalist.append(sha)
|
||||
off += self.sha_block_size_bytes
|
||||
return shalist
|
||||
|
||||
def backup(self, backup, volume_file, backup_metadata=True):
|
||||
"""Backup the given volume.
|
||||
|
||||
@ -562,18 +581,7 @@ class ChunkedBackupDriver(driver.BackupDriver):
|
||||
break
|
||||
|
||||
# Calculate new shas with the datablock.
|
||||
shalist = []
|
||||
off = 0
|
||||
datalen = len(data)
|
||||
while off < datalen:
|
||||
chunk_start = off
|
||||
chunk_end = chunk_start + self.sha_block_size_bytes
|
||||
if chunk_end > datalen:
|
||||
chunk_end = datalen
|
||||
chunk = data[chunk_start:chunk_end]
|
||||
sha = hashlib.sha256(chunk).hexdigest()
|
||||
shalist.append(sha)
|
||||
off += self.sha_block_size_bytes
|
||||
shalist = eventlet.tpool.execute(self._calculate_sha, data)
|
||||
sha256_list.extend(shalist)
|
||||
|
||||
# If parent_backup is not None, that means an incremental
|
||||
@ -600,7 +608,7 @@ class ChunkedBackupDriver(driver.BackupDriver):
|
||||
|
||||
# The last extent extends to the end of data buffer.
|
||||
if extent_off != -1:
|
||||
extent_end = datalen
|
||||
extent_end = len(data)
|
||||
segment = data[extent_off:extent_end]
|
||||
self._backup_chunk(backup, container, segment,
|
||||
data_offset + extent_off,
|
||||
|
Loading…
Reference in New Issue
Block a user