From 24f9fe919e23149ac79cc677ab6f1874e910c743 Mon Sep 17 00:00:00 2001 From: Samuel Merritt Date: Thu, 8 Nov 2012 11:13:21 -0800 Subject: [PATCH] Make swift-bench use less memory with large object sizes. Before, swift-bench would create a string of the right size and pass that to [direct_]put_object. Uploading 5 GB objects w/concurrency of 4? Hope you've got a spare 20 GB of RAM in that machine. Now it lazily generates chunks (of size 64 KiB) to send. In my limited testing, this keeps RAM usage low while not impacting speed. There's backwards-compatibility code in direct_client.direct_put_object for any callers who are still passing in strings. There aren't any such in Swift's codebase, though. All one of those callers now pass an iterable. Fixes bug 911925. Change-Id: I40669fc00c7fb6699d8fb514e1be9b69436eca42 --- swift/common/bench.py | 40 ++++++++++++++++++++++++++++++++--- swift/common/direct_client.py | 7 ++++-- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/swift/common/bench.py b/swift/common/bench.py index 4a4b0a0fe5..644570ca8b 100644 --- a/swift/common/bench.py +++ b/swift/common/bench.py @@ -70,6 +70,40 @@ def create_containers(logger, conf): _func_on_containers(logger, conf, 'put_concurrency', client.put_container) +class SourceFile(object): + """ + Iterable, file-like object to lazily emit a bunch of zeros in + reasonable-size chunks. + + swift.common.direct_client wants iterables, but swiftclient wants + file-like objects where hasattr(thing, 'read') is true. Therefore, + this class can do both. + """ + + def __init__(self, size, chunk_size=1024 * 64): + self.pos = 0 + self.size = size + self.chunk_size = chunk_size + + def __iter__(self): + return self + + def __len__(self): + return self.size + + def next(self): + if self.pos >= self.size: + raise StopIteration + chunk_size = min(self.size - self.pos, self.chunk_size) + yield '0' * chunk_size + self.pos += chunk_size + + def read(self, desired_size): + chunk_size = min(self.size - self.pos, desired_size) + self.pos += chunk_size + return '0' * chunk_size + + class ConnectionPool(eventlet.pools.Pool): def __init__(self, url, size): @@ -423,10 +457,10 @@ class BenchPUT(Bench): if self.object_sources: source = random.choice(self.files) elif self.upper_object_size > self.lower_object_size: - source = '0' * random.randint(self.lower_object_size, - self.upper_object_size) + source = SourceFile(random.randint(self.lower_object_size, + self.upper_object_size)) else: - source = '0' * self.object_size + source = SourceFile(self.object_size) device = random.choice(self.devices) partition = str(random.randint(1, 3000)) container_name = random.choice(self.containers) diff --git a/swift/common/direct_client.py b/swift/common/direct_client.py index fcbb7200f1..97966f510f 100644 --- a/swift/common/direct_client.py +++ b/swift/common/direct_client.py @@ -296,7 +296,7 @@ def direct_put_object(node, part, account, container, name, contents, :param account: account name :param container: container name :param name: object name - :param contents: a string to read object data from + :param contents: an iterable or string to read object data from :param content_length: value to send as content-length header :param etag: etag of contents :param content_type: value to send as content-type header @@ -320,11 +320,14 @@ def direct_put_object(node, part, account, container, name, contents, headers['Content-Type'] = 'application/octet-stream' if not contents: headers['Content-Length'] = '0' + if isinstance(contents, basestring): + contents = [contents] headers['X-Timestamp'] = normalize_timestamp(time()) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'PUT', path, headers=headers) - conn.send(contents) + for chunk in contents: + conn.send(chunk) with Timeout(response_timeout): resp = conn.getresponse() resp.read()