swift/bin/swift-dispersion-populate
Florian Hines 42f4b150e3 Faster swift-dispersion-populate
- Makes swift-dispersion-populate a bit faster when using a larger
  dispersion_coverage with a larger part_power.
- Adds option to only run population for container OR objects
- Adds option to let you resume population at given point (useful if you
  need to resume population after a previous run error'd out or the
  like) by specifying which suffix to start at.

The original populate just randomly used uuid4().hex as a suffix on the
container/object names until all the partition's required where covered.
This isn't a big deal if you're only doing 1% coverage on a ring with a
small part power but takes ages if you're doing 100% on a larger ring.

Change-Id: I52f890a774412c1d6179f12db9081aedc58b6bc2
2013-09-05 18:12:15 -05:00

206 lines
7.7 KiB
Python
Executable File

#!/usr/bin/env python
# Copyright (c) 2010-2012 OpenStack, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import traceback
from ConfigParser import ConfigParser
from cStringIO import StringIO
from optparse import OptionParser
from sys import exit, stdout
from time import time
from eventlet import GreenPool, patcher, sleep
from eventlet.pools import Pool
from swiftclient import Connection, get_auth
from swift.common.ring import Ring
from swift.common.utils import compute_eta, get_time_units, config_true_value
insecure = False
def put_container(connpool, container, report):
global retries_done
try:
with connpool.item() as conn:
conn.put_container(container)
retries_done += conn.attempts - 1
if report:
report(True)
except Exception:
if report:
report(False)
raise
def put_object(connpool, container, obj, report):
global retries_done
try:
with connpool.item() as conn:
conn.put_object(container, obj, StringIO(obj),
headers={'x-object-meta-dispersion': obj})
retries_done += conn.attempts - 1
if report:
report(True)
except Exception:
if report:
report(False)
raise
def report(success):
global begun, created, item_type, next_report, need_to_create, retries_done
if not success:
traceback.print_exc()
exit('Gave up due to error(s).')
created += 1
if time() < next_report:
return
next_report = time() + 5
eta, eta_unit = compute_eta(begun, created, need_to_create)
print '\r\x1B[KCreating %s: %d of %d, %d%s left, %d retries' % (item_type,
created, need_to_create, round(eta), eta_unit, retries_done),
stdout.flush()
if __name__ == '__main__':
global begun, created, item_type, next_report, need_to_create, retries_done
patcher.monkey_patch()
conffile = '/etc/swift/dispersion.conf'
parser = OptionParser(usage='''
Usage: %%prog [options] [conf_file]
[conf_file] defaults to %s'''.strip() % conffile)
parser.add_option('--container-only', action='store_true', default=False,
help='Only run container population')
parser.add_option('--object-only', action='store_true', default=False,
help='Only run object population')
parser.add_option('--container-suffix-start', type=int, default=0,
help='container suffix start value, defaults to 0')
parser.add_option('--object-suffix-start', type=int, default=0,
help='object suffix start value, defaults to 0')
parser.add_option('--insecure', action='store_true', default=False,
help='Allow accessing insecure keystone server. '
'The keystone\'s certificate will not be verified.')
options, args = parser.parse_args()
if args:
conffile = args.pop(0)
c = ConfigParser()
if not c.read(conffile):
exit('Unable to read config file: %s' % conffile)
conf = dict(c.items('dispersion'))
swift_dir = conf.get('swift_dir', '/etc/swift')
dispersion_coverage = float(conf.get('dispersion_coverage', 1))
retries = int(conf.get('retries', 5))
concurrency = int(conf.get('concurrency', 25))
endpoint_type = str(conf.get('endpoint_type', 'publicURL'))
insecure = options.insecure \
or config_true_value(conf.get('keystone_api_insecure', 'no'))
container_populate = config_true_value(
conf.get('container_populate', 'yes')) and not options.object_only
object_populate = config_true_value(
conf.get('object_populate', 'yes')) and not options.container_only
if not (object_populate or container_populate):
exit("Neither container or object populate is set to run")
coropool = GreenPool(size=concurrency)
retries_done = 0
os_options = {'endpoint_type': endpoint_type}
url, token = get_auth(conf['auth_url'], conf['auth_user'],
conf['auth_key'],
auth_version=conf.get('auth_version', '1.0'),
os_options=os_options,
insecure=insecure)
account = url.rsplit('/', 1)[1]
connpool = Pool(max_size=concurrency)
connpool.create = lambda: Connection(conf['auth_url'],
conf['auth_user'], conf['auth_key'],
retries=retries,
preauthurl=url, preauthtoken=token,
os_options=os_options,
insecure=insecure)
if container_populate:
container_ring = Ring(swift_dir, ring_name='container')
parts_left = dict((x, x)
for x in xrange(container_ring.partition_count))
item_type = 'containers'
created = 0
retries_done = 0
need_to_create = need_to_queue = \
dispersion_coverage / 100.0 * container_ring.partition_count
begun = next_report = time()
next_report += 2
suffix = 0
while need_to_queue >= 1:
container = 'dispersion_%d' % suffix
part, _junk = container_ring.get_nodes(account, container)
if part in parts_left:
if suffix >= options.container_suffix_start:
coropool.spawn(put_container, connpool, container, report)
sleep()
else:
report(True)
del parts_left[part]
need_to_queue -= 1
suffix += 1
coropool.waitall()
elapsed, elapsed_unit = get_time_units(time() - begun)
print '\r\x1B[KCreated %d containers for dispersion reporting, ' \
'%d%s, %d retries' % \
(need_to_create, round(elapsed), elapsed_unit, retries_done)
stdout.flush()
if object_populate:
container = 'dispersion_objects'
put_container(connpool, container, None)
object_ring = Ring(swift_dir, ring_name='object')
parts_left = dict((x, x) for x in xrange(object_ring.partition_count))
item_type = 'objects'
created = 0
retries_done = 0
need_to_create = need_to_queue = \
dispersion_coverage / 100.0 * object_ring.partition_count
begun = next_report = time()
next_report += 2
suffix = 0
while need_to_queue >= 1:
obj = 'dispersion_%d' % suffix
part, _junk = object_ring.get_nodes(account, container, obj)
if part in parts_left:
if suffix >= options.object_suffix_start:
coropool.spawn(
put_object, connpool, container, obj, report)
sleep()
else:
report(True)
del parts_left[part]
need_to_queue -= 1
suffix += 1
coropool.waitall()
elapsed, elapsed_unit = get_time_units(time() - begun)
print '\r\x1B[KCreated %d objects for dispersion reporting, ' \
'%d%s, %d retries' % \
(need_to_create, round(elapsed), elapsed_unit, retries_done)
stdout.flush()