Make swift-bench use less memory with large object sizes.
Before, swift-bench would create a string of the right size and pass that to [direct_]put_object. Uploading 5 GB objects w/concurrency of 4? Hope you've got a spare 20 GB of RAM in that machine. Now it lazily generates chunks (of size 64 KiB) to send. In my limited testing, this keeps RAM usage low while not impacting speed. There's backwards-compatibility code in direct_client.direct_put_object for any callers who are still passing in strings. There aren't any such in Swift's codebase, though. All one of those callers now pass an iterable. Fixes bug 911925. Change-Id: I40669fc00c7fb6699d8fb514e1be9b69436eca42
This commit is contained in:
parent
a69bc08990
commit
24f9fe919e
@ -70,6 +70,40 @@ def create_containers(logger, conf):
|
||||
_func_on_containers(logger, conf, 'put_concurrency', client.put_container)
|
||||
|
||||
|
||||
class SourceFile(object):
|
||||
"""
|
||||
Iterable, file-like object to lazily emit a bunch of zeros in
|
||||
reasonable-size chunks.
|
||||
|
||||
swift.common.direct_client wants iterables, but swiftclient wants
|
||||
file-like objects where hasattr(thing, 'read') is true. Therefore,
|
||||
this class can do both.
|
||||
"""
|
||||
|
||||
def __init__(self, size, chunk_size=1024 * 64):
|
||||
self.pos = 0
|
||||
self.size = size
|
||||
self.chunk_size = chunk_size
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __len__(self):
|
||||
return self.size
|
||||
|
||||
def next(self):
|
||||
if self.pos >= self.size:
|
||||
raise StopIteration
|
||||
chunk_size = min(self.size - self.pos, self.chunk_size)
|
||||
yield '0' * chunk_size
|
||||
self.pos += chunk_size
|
||||
|
||||
def read(self, desired_size):
|
||||
chunk_size = min(self.size - self.pos, desired_size)
|
||||
self.pos += chunk_size
|
||||
return '0' * chunk_size
|
||||
|
||||
|
||||
class ConnectionPool(eventlet.pools.Pool):
|
||||
|
||||
def __init__(self, url, size):
|
||||
@ -423,10 +457,10 @@ class BenchPUT(Bench):
|
||||
if self.object_sources:
|
||||
source = random.choice(self.files)
|
||||
elif self.upper_object_size > self.lower_object_size:
|
||||
source = '0' * random.randint(self.lower_object_size,
|
||||
self.upper_object_size)
|
||||
source = SourceFile(random.randint(self.lower_object_size,
|
||||
self.upper_object_size))
|
||||
else:
|
||||
source = '0' * self.object_size
|
||||
source = SourceFile(self.object_size)
|
||||
device = random.choice(self.devices)
|
||||
partition = str(random.randint(1, 3000))
|
||||
container_name = random.choice(self.containers)
|
||||
|
@ -296,7 +296,7 @@ def direct_put_object(node, part, account, container, name, contents,
|
||||
:param account: account name
|
||||
:param container: container name
|
||||
:param name: object name
|
||||
:param contents: a string to read object data from
|
||||
:param contents: an iterable or string to read object data from
|
||||
:param content_length: value to send as content-length header
|
||||
:param etag: etag of contents
|
||||
:param content_type: value to send as content-type header
|
||||
@ -320,11 +320,14 @@ def direct_put_object(node, part, account, container, name, contents,
|
||||
headers['Content-Type'] = 'application/octet-stream'
|
||||
if not contents:
|
||||
headers['Content-Length'] = '0'
|
||||
if isinstance(contents, basestring):
|
||||
contents = [contents]
|
||||
headers['X-Timestamp'] = normalize_timestamp(time())
|
||||
with Timeout(conn_timeout):
|
||||
conn = http_connect(node['ip'], node['port'], node['device'], part,
|
||||
'PUT', path, headers=headers)
|
||||
conn.send(contents)
|
||||
for chunk in contents:
|
||||
conn.send(chunk)
|
||||
with Timeout(response_timeout):
|
||||
resp = conn.getresponse()
|
||||
resp.read()
|
||||
|
Loading…
Reference in New Issue
Block a user