From 42f4b150e3fcd8c49ecac2e42839449d00190053 Mon Sep 17 00:00:00 2001 From: Florian Hines Date: Thu, 5 Sep 2013 18:12:15 -0500 Subject: [PATCH] Faster swift-dispersion-populate - Makes swift-dispersion-populate a bit faster when using a larger dispersion_coverage with a larger part_power. - Adds option to only run population for container OR objects - Adds option to let you resume population at given point (useful if you need to resume population after a previous run error'd out or the like) by specifying which suffix to start at. The original populate just randomly used uuid4().hex as a suffix on the container/object names until all the partition's required where covered. This isn't a big deal if you're only doing 1% coverage on a ring with a small part power but takes ages if you're doing 100% on a larger ring. Change-Id: I52f890a774412c1d6179f12db9081aedc58b6bc2 --- bin/swift-dispersion-populate | 126 ++++++++++++++--------- doc/manpages/swift-dispersion-populate.1 | 12 ++- etc/dispersion.conf-sample | 2 + 3 files changed, 90 insertions(+), 50 deletions(-) diff --git a/bin/swift-dispersion-populate b/bin/swift-dispersion-populate index 1c48526f52..189c427050 100755 --- a/bin/swift-dispersion-populate +++ b/bin/swift-dispersion-populate @@ -20,7 +20,6 @@ from cStringIO import StringIO from optparse import OptionParser from sys import exit, stdout from time import time -from uuid import uuid4 from eventlet import GreenPool, patcher, sleep from eventlet.pools import Pool @@ -87,6 +86,14 @@ if __name__ == '__main__': Usage: %%prog [options] [conf_file] [conf_file] defaults to %s'''.strip() % conffile) + parser.add_option('--container-only', action='store_true', default=False, + help='Only run container population') + parser.add_option('--object-only', action='store_true', default=False, + help='Only run object population') + parser.add_option('--container-suffix-start', type=int, default=0, + help='container suffix start value, defaults to 0') + parser.add_option('--object-suffix-start', type=int, default=0, + help='object suffix start value, defaults to 0') parser.add_option('--insecure', action='store_true', default=False, help='Allow accessing insecure keystone server. ' 'The keystone\'s certificate will not be verified.') @@ -106,6 +113,13 @@ Usage: %%prog [options] [conf_file] endpoint_type = str(conf.get('endpoint_type', 'publicURL')) insecure = options.insecure \ or config_true_value(conf.get('keystone_api_insecure', 'no')) + container_populate = config_true_value( + conf.get('container_populate', 'yes')) and not options.object_only + object_populate = config_true_value( + conf.get('object_populate', 'yes')) and not options.container_only + + if not (object_populate or container_populate): + exit("Neither container or object populate is set to run") coropool = GreenPool(size=concurrency) retries_done = 0 @@ -126,52 +140,66 @@ Usage: %%prog [options] [conf_file] os_options=os_options, insecure=insecure) - container_ring = Ring(swift_dir, ring_name='container') - parts_left = dict((x, x) for x in xrange(container_ring.partition_count)) - item_type = 'containers' - created = 0 - retries_done = 0 - need_to_create = need_to_queue = \ - dispersion_coverage / 100.0 * container_ring.partition_count - begun = next_report = time() - next_report += 2 - while need_to_queue >= 1: - container = 'dispersion_%s' % uuid4().hex - part, _junk = container_ring.get_nodes(account, container) - if part in parts_left: - coropool.spawn(put_container, connpool, container, report) - sleep() - del parts_left[part] - need_to_queue -= 1 - coropool.waitall() - elapsed, elapsed_unit = get_time_units(time() - begun) - print '\r\x1B[KCreated %d containers for dispersion reporting, %d%s, %d ' \ - 'retries' % \ - (need_to_create, round(elapsed), elapsed_unit, retries_done) - stdout.flush() + if container_populate: + container_ring = Ring(swift_dir, ring_name='container') + parts_left = dict((x, x) + for x in xrange(container_ring.partition_count)) + item_type = 'containers' + created = 0 + retries_done = 0 + need_to_create = need_to_queue = \ + dispersion_coverage / 100.0 * container_ring.partition_count + begun = next_report = time() + next_report += 2 + suffix = 0 + while need_to_queue >= 1: + container = 'dispersion_%d' % suffix + part, _junk = container_ring.get_nodes(account, container) + if part in parts_left: + if suffix >= options.container_suffix_start: + coropool.spawn(put_container, connpool, container, report) + sleep() + else: + report(True) + del parts_left[part] + need_to_queue -= 1 + suffix += 1 + coropool.waitall() + elapsed, elapsed_unit = get_time_units(time() - begun) + print '\r\x1B[KCreated %d containers for dispersion reporting, ' \ + '%d%s, %d retries' % \ + (need_to_create, round(elapsed), elapsed_unit, retries_done) + stdout.flush() - container = 'dispersion_objects' - put_container(connpool, container, None) - object_ring = Ring(swift_dir, ring_name='object') - parts_left = dict((x, x) for x in xrange(object_ring.partition_count)) - item_type = 'objects' - created = 0 - retries_done = 0 - need_to_create = need_to_queue = \ - dispersion_coverage / 100.0 * object_ring.partition_count - begun = next_report = time() - next_report += 2 - while need_to_queue >= 1: - obj = 'dispersion_%s' % uuid4().hex - part, _junk = object_ring.get_nodes(account, container, obj) - if part in parts_left: - coropool.spawn(put_object, connpool, container, obj, report) - sleep() - del parts_left[part] - need_to_queue -= 1 - coropool.waitall() - elapsed, elapsed_unit = get_time_units(time() - begun) - print '\r\x1B[KCreated %d objects for dispersion reporting, %d%s, %d ' \ - 'retries' % \ - (need_to_create, round(elapsed), elapsed_unit, retries_done) - stdout.flush() + if object_populate: + container = 'dispersion_objects' + put_container(connpool, container, None) + object_ring = Ring(swift_dir, ring_name='object') + parts_left = dict((x, x) for x in xrange(object_ring.partition_count)) + item_type = 'objects' + created = 0 + retries_done = 0 + need_to_create = need_to_queue = \ + dispersion_coverage / 100.0 * object_ring.partition_count + begun = next_report = time() + next_report += 2 + suffix = 0 + while need_to_queue >= 1: + obj = 'dispersion_%d' % suffix + part, _junk = object_ring.get_nodes(account, container, obj) + if part in parts_left: + if suffix >= options.object_suffix_start: + coropool.spawn( + put_object, connpool, container, obj, report) + sleep() + else: + report(True) + del parts_left[part] + need_to_queue -= 1 + suffix += 1 + coropool.waitall() + elapsed, elapsed_unit = get_time_units(time() - begun) + print '\r\x1B[KCreated %d objects for dispersion reporting, ' \ + '%d%s, %d retries' % \ + (need_to_create, round(elapsed), elapsed_unit, retries_done) + stdout.flush() diff --git a/doc/manpages/swift-dispersion-populate.1 b/doc/manpages/swift-dispersion-populate.1 index b4bed1c07b..01c5fe3295 100644 --- a/doc/manpages/swift-dispersion-populate.1 +++ b/doc/manpages/swift-dispersion-populate.1 @@ -24,7 +24,7 @@ .SH SYNOPSIS .LP -.B swift-dispersion-populate [--insecure] [conf_file] +.B swift-dispersion-populate [--container-suffix-start] [--object-suffix-start] [--container-only|--object-only] [--insecure] [conf_file] .SH DESCRIPTION .PP @@ -62,6 +62,16 @@ privileges. .IP "\fB--insecure\fR" Allow accessing insecure keystone server. The keystone's certificate will not be verified. +.IP "\fB--container-suffix-start=NUMBER\fR" +Start container suffix at NUMBER and resume population at this point; default: 0 +.IP "\fB--object-suffix-start=NUMBER\fR" +Start object suffix at NUMBER and resume population at this point; default: 0 +.IP "\fB--object-only\fR" +Only run object population +.IP "\fB--container-only\fR" +Only run container population +.IP "\fB--object-only\fR" +Only run object population .SH CONFIGURATION .PD 0 diff --git a/etc/dispersion.conf-sample b/etc/dispersion.conf-sample index 47a7f305b9..102a05ab3d 100644 --- a/etc/dispersion.conf-sample +++ b/etc/dispersion.conf-sample @@ -13,6 +13,8 @@ auth_key = testing # dispersion_coverage = 1.0 # retries = 5 # concurrency = 25 +# container_populate = yes +# object_populate = yes # container_report = yes # object_report = yes # dump_json = no