Make swift-bench use less memory with large object sizes.

Before, swift-bench would create a string of the right size and pass
that to [direct_]put_object. Uploading 5 GB objects w/concurrency of
4? Hope you've got a spare 20 GB of RAM in that machine.

Now it lazily generates chunks (of size 64 KiB) to send. In my limited
testing, this keeps RAM usage low while not impacting speed.

There's backwards-compatibility code in
direct_client.direct_put_object for any callers who are still passing
in strings. There aren't any such in Swift's codebase, though. All one
of those callers now pass an iterable.

Fixes bug 911925.

Change-Id: I40669fc00c7fb6699d8fb514e1be9b69436eca42
This commit is contained in:
Samuel Merritt 2012-11-08 11:13:21 -08:00
parent a69bc08990
commit 24f9fe919e
2 changed files with 42 additions and 5 deletions

View File

@ -70,6 +70,40 @@ def create_containers(logger, conf):
_func_on_containers(logger, conf, 'put_concurrency', client.put_container)
class SourceFile(object):
"""
Iterable, file-like object to lazily emit a bunch of zeros in
reasonable-size chunks.
swift.common.direct_client wants iterables, but swiftclient wants
file-like objects where hasattr(thing, 'read') is true. Therefore,
this class can do both.
"""
def __init__(self, size, chunk_size=1024 * 64):
self.pos = 0
self.size = size
self.chunk_size = chunk_size
def __iter__(self):
return self
def __len__(self):
return self.size
def next(self):
if self.pos >= self.size:
raise StopIteration
chunk_size = min(self.size - self.pos, self.chunk_size)
yield '0' * chunk_size
self.pos += chunk_size
def read(self, desired_size):
chunk_size = min(self.size - self.pos, desired_size)
self.pos += chunk_size
return '0' * chunk_size
class ConnectionPool(eventlet.pools.Pool):
def __init__(self, url, size):
@ -423,10 +457,10 @@ class BenchPUT(Bench):
if self.object_sources:
source = random.choice(self.files)
elif self.upper_object_size > self.lower_object_size:
source = '0' * random.randint(self.lower_object_size,
self.upper_object_size)
source = SourceFile(random.randint(self.lower_object_size,
self.upper_object_size))
else:
source = '0' * self.object_size
source = SourceFile(self.object_size)
device = random.choice(self.devices)
partition = str(random.randint(1, 3000))
container_name = random.choice(self.containers)

View File

@ -296,7 +296,7 @@ def direct_put_object(node, part, account, container, name, contents,
:param account: account name
:param container: container name
:param name: object name
:param contents: a string to read object data from
:param contents: an iterable or string to read object data from
:param content_length: value to send as content-length header
:param etag: etag of contents
:param content_type: value to send as content-type header
@ -320,11 +320,14 @@ def direct_put_object(node, part, account, container, name, contents,
headers['Content-Type'] = 'application/octet-stream'
if not contents:
headers['Content-Length'] = '0'
if isinstance(contents, basestring):
contents = [contents]
headers['X-Timestamp'] = normalize_timestamp(time())
with Timeout(conn_timeout):
conn = http_connect(node['ip'], node['port'], node['device'], part,
'PUT', path, headers=headers)
conn.send(contents)
for chunk in contents:
conn.send(chunk)
with Timeout(response_timeout):
resp = conn.getresponse()
resp.read()