Faster swift-dispersion-populate

- Makes swift-dispersion-populate a bit faster when using a larger
  dispersion_coverage with a larger part_power.
- Adds option to only run population for container OR objects
- Adds option to let you resume population at given point (useful if you
  need to resume population after a previous run error'd out or the
  like) by specifying which suffix to start at.

The original populate just randomly used uuid4().hex as a suffix on the
container/object names until all the partition's required where covered.
This isn't a big deal if you're only doing 1% coverage on a ring with a
small part power but takes ages if you're doing 100% on a larger ring.

Change-Id: I52f890a774412c1d6179f12db9081aedc58b6bc2
This commit is contained in:
Florian Hines 2013-09-05 18:12:15 -05:00
parent 14037b46e6
commit 42f4b150e3
3 changed files with 90 additions and 50 deletions

View File

@ -20,7 +20,6 @@ from cStringIO import StringIO
from optparse import OptionParser from optparse import OptionParser
from sys import exit, stdout from sys import exit, stdout
from time import time from time import time
from uuid import uuid4
from eventlet import GreenPool, patcher, sleep from eventlet import GreenPool, patcher, sleep
from eventlet.pools import Pool from eventlet.pools import Pool
@ -87,6 +86,14 @@ if __name__ == '__main__':
Usage: %%prog [options] [conf_file] Usage: %%prog [options] [conf_file]
[conf_file] defaults to %s'''.strip() % conffile) [conf_file] defaults to %s'''.strip() % conffile)
parser.add_option('--container-only', action='store_true', default=False,
help='Only run container population')
parser.add_option('--object-only', action='store_true', default=False,
help='Only run object population')
parser.add_option('--container-suffix-start', type=int, default=0,
help='container suffix start value, defaults to 0')
parser.add_option('--object-suffix-start', type=int, default=0,
help='object suffix start value, defaults to 0')
parser.add_option('--insecure', action='store_true', default=False, parser.add_option('--insecure', action='store_true', default=False,
help='Allow accessing insecure keystone server. ' help='Allow accessing insecure keystone server. '
'The keystone\'s certificate will not be verified.') 'The keystone\'s certificate will not be verified.')
@ -106,6 +113,13 @@ Usage: %%prog [options] [conf_file]
endpoint_type = str(conf.get('endpoint_type', 'publicURL')) endpoint_type = str(conf.get('endpoint_type', 'publicURL'))
insecure = options.insecure \ insecure = options.insecure \
or config_true_value(conf.get('keystone_api_insecure', 'no')) or config_true_value(conf.get('keystone_api_insecure', 'no'))
container_populate = config_true_value(
conf.get('container_populate', 'yes')) and not options.object_only
object_populate = config_true_value(
conf.get('object_populate', 'yes')) and not options.container_only
if not (object_populate or container_populate):
exit("Neither container or object populate is set to run")
coropool = GreenPool(size=concurrency) coropool = GreenPool(size=concurrency)
retries_done = 0 retries_done = 0
@ -126,52 +140,66 @@ Usage: %%prog [options] [conf_file]
os_options=os_options, os_options=os_options,
insecure=insecure) insecure=insecure)
container_ring = Ring(swift_dir, ring_name='container') if container_populate:
parts_left = dict((x, x) for x in xrange(container_ring.partition_count)) container_ring = Ring(swift_dir, ring_name='container')
item_type = 'containers' parts_left = dict((x, x)
created = 0 for x in xrange(container_ring.partition_count))
retries_done = 0 item_type = 'containers'
need_to_create = need_to_queue = \ created = 0
dispersion_coverage / 100.0 * container_ring.partition_count retries_done = 0
begun = next_report = time() need_to_create = need_to_queue = \
next_report += 2 dispersion_coverage / 100.0 * container_ring.partition_count
while need_to_queue >= 1: begun = next_report = time()
container = 'dispersion_%s' % uuid4().hex next_report += 2
part, _junk = container_ring.get_nodes(account, container) suffix = 0
if part in parts_left: while need_to_queue >= 1:
coropool.spawn(put_container, connpool, container, report) container = 'dispersion_%d' % suffix
sleep() part, _junk = container_ring.get_nodes(account, container)
del parts_left[part] if part in parts_left:
need_to_queue -= 1 if suffix >= options.container_suffix_start:
coropool.waitall() coropool.spawn(put_container, connpool, container, report)
elapsed, elapsed_unit = get_time_units(time() - begun) sleep()
print '\r\x1B[KCreated %d containers for dispersion reporting, %d%s, %d ' \ else:
'retries' % \ report(True)
(need_to_create, round(elapsed), elapsed_unit, retries_done) del parts_left[part]
stdout.flush() need_to_queue -= 1
suffix += 1
coropool.waitall()
elapsed, elapsed_unit = get_time_units(time() - begun)
print '\r\x1B[KCreated %d containers for dispersion reporting, ' \
'%d%s, %d retries' % \
(need_to_create, round(elapsed), elapsed_unit, retries_done)
stdout.flush()
container = 'dispersion_objects' if object_populate:
put_container(connpool, container, None) container = 'dispersion_objects'
object_ring = Ring(swift_dir, ring_name='object') put_container(connpool, container, None)
parts_left = dict((x, x) for x in xrange(object_ring.partition_count)) object_ring = Ring(swift_dir, ring_name='object')
item_type = 'objects' parts_left = dict((x, x) for x in xrange(object_ring.partition_count))
created = 0 item_type = 'objects'
retries_done = 0 created = 0
need_to_create = need_to_queue = \ retries_done = 0
dispersion_coverage / 100.0 * object_ring.partition_count need_to_create = need_to_queue = \
begun = next_report = time() dispersion_coverage / 100.0 * object_ring.partition_count
next_report += 2 begun = next_report = time()
while need_to_queue >= 1: next_report += 2
obj = 'dispersion_%s' % uuid4().hex suffix = 0
part, _junk = object_ring.get_nodes(account, container, obj) while need_to_queue >= 1:
if part in parts_left: obj = 'dispersion_%d' % suffix
coropool.spawn(put_object, connpool, container, obj, report) part, _junk = object_ring.get_nodes(account, container, obj)
sleep() if part in parts_left:
del parts_left[part] if suffix >= options.object_suffix_start:
need_to_queue -= 1 coropool.spawn(
coropool.waitall() put_object, connpool, container, obj, report)
elapsed, elapsed_unit = get_time_units(time() - begun) sleep()
print '\r\x1B[KCreated %d objects for dispersion reporting, %d%s, %d ' \ else:
'retries' % \ report(True)
(need_to_create, round(elapsed), elapsed_unit, retries_done) del parts_left[part]
stdout.flush() need_to_queue -= 1
suffix += 1
coropool.waitall()
elapsed, elapsed_unit = get_time_units(time() - begun)
print '\r\x1B[KCreated %d objects for dispersion reporting, ' \
'%d%s, %d retries' % \
(need_to_create, round(elapsed), elapsed_unit, retries_done)
stdout.flush()

View File

@ -24,7 +24,7 @@
.SH SYNOPSIS .SH SYNOPSIS
.LP .LP
.B swift-dispersion-populate [--insecure] [conf_file] .B swift-dispersion-populate [--container-suffix-start] [--object-suffix-start] [--container-only|--object-only] [--insecure] [conf_file]
.SH DESCRIPTION .SH DESCRIPTION
.PP .PP
@ -62,6 +62,16 @@ privileges.
.IP "\fB--insecure\fR" .IP "\fB--insecure\fR"
Allow accessing insecure keystone server. The keystone's certificate will not Allow accessing insecure keystone server. The keystone's certificate will not
be verified. be verified.
.IP "\fB--container-suffix-start=NUMBER\fR"
Start container suffix at NUMBER and resume population at this point; default: 0
.IP "\fB--object-suffix-start=NUMBER\fR"
Start object suffix at NUMBER and resume population at this point; default: 0
.IP "\fB--object-only\fR"
Only run object population
.IP "\fB--container-only\fR"
Only run container population
.IP "\fB--object-only\fR"
Only run object population
.SH CONFIGURATION .SH CONFIGURATION
.PD 0 .PD 0

View File

@ -13,6 +13,8 @@ auth_key = testing
# dispersion_coverage = 1.0 # dispersion_coverage = 1.0
# retries = 5 # retries = 5
# concurrency = 25 # concurrency = 25
# container_populate = yes
# object_populate = yes
# container_report = yes # container_report = yes
# object_report = yes # object_report = yes
# dump_json = no # dump_json = no