Some optimizations for ring rebalancing.
For a part-power 18 ring with a small number of devices (12), this drops "swift-ring-builder ... rebalance" from ~7.2 seconds to ~3.6s on OS X with Python 2.7 (2.7 GHz i7 processor). Under the profiler, that part-power 18 rebalance now spends only slightly more time rebalancing the ring than it does writing out the results. I haven't done more extensive before/after comparison with larger numbers of devices and larger part-power values... Change-Id: I25735bc71da2f11617cc436d4f8e0c4f3f82bfec
This commit is contained in:
parent
d904fd4c0f
commit
0bba07111f
@ -335,6 +335,8 @@ class RingBuilder(object):
|
||||
:raises RingValidationError: problem was found with the ring.
|
||||
"""
|
||||
|
||||
# "len" showed up in profling, so it's just computed once.
|
||||
dev_len = len(self.devs)
|
||||
if sum(d['parts'] for d in self._iter_devs()) != \
|
||||
self.parts * self.replicas:
|
||||
raise exceptions.RingValidationError(
|
||||
@ -344,7 +346,7 @@ class RingBuilder(object):
|
||||
if stats:
|
||||
# dev_usage[dev_id] will equal the number of partitions assigned to
|
||||
# that device.
|
||||
dev_usage = array('I', (0 for _junk in xrange(len(self.devs))))
|
||||
dev_usage = array('I', (0 for _junk in xrange(dev_len)))
|
||||
for part2dev in self._replica2part2dev:
|
||||
for dev_id in part2dev:
|
||||
dev_usage[dev_id] += 1
|
||||
@ -352,7 +354,7 @@ class RingBuilder(object):
|
||||
for part in xrange(self.parts):
|
||||
for replica in xrange(self.replicas):
|
||||
dev_id = self._replica2part2dev[replica][part]
|
||||
if dev_id >= len(self.devs) or not self.devs[dev_id]:
|
||||
if dev_id >= dev_len or not self.devs[dev_id]:
|
||||
raise exceptions.RingValidationError(
|
||||
"Partition %d, replica %d was not allocated "
|
||||
"to a device." %
|
||||
@ -477,8 +479,13 @@ class RingBuilder(object):
|
||||
"""
|
||||
elapsed_hours = int(time() - self._last_part_moves_epoch) / 3600
|
||||
for part in xrange(self.parts):
|
||||
self._last_part_moves[part] = \
|
||||
min(self._last_part_moves[part] + elapsed_hours, 0xff)
|
||||
# The "min(self._last_part_moves[part] + elapsed_hours, 0xff)"
|
||||
# which was here showed up in profiling, so it got inlined.
|
||||
last_plus_elapsed = self._last_part_moves[part] + elapsed_hours
|
||||
if last_plus_elapsed < 0xff:
|
||||
self._last_part_moves[part] = last_plus_elapsed
|
||||
else:
|
||||
self._last_part_moves[part] = 0xff
|
||||
self._last_part_moves_epoch = int(time())
|
||||
|
||||
def _gather_reassign_parts(self):
|
||||
@ -487,6 +494,10 @@ class RingBuilder(object):
|
||||
gathering from removed devices, insufficiently-far-apart replicas, and
|
||||
overweight drives.
|
||||
"""
|
||||
# inline memoization of tiers_for_dev() results (profiling reveals it
|
||||
# as a hot-spot).
|
||||
tfd = {}
|
||||
|
||||
# First we gather partitions from removed devices. Since removed
|
||||
# devices usually indicate device failures, we have no choice but to
|
||||
# reassign these partitions. However, we mark them as moved so later
|
||||
@ -513,19 +524,31 @@ class RingBuilder(object):
|
||||
|
||||
# First, add up the count of replicas at each tier for each
|
||||
# partition.
|
||||
replicas_at_tier = defaultdict(lambda: 0)
|
||||
# replicas_at_tier was a "lambda: 0" defaultdict, but profiling
|
||||
# revealed the lambda invocation as a significant cost.
|
||||
replicas_at_tier = {}
|
||||
for replica in xrange(self.replicas):
|
||||
dev = self.devs[self._replica2part2dev[replica][part]]
|
||||
for tier in tiers_for_dev(dev):
|
||||
replicas_at_tier[tier] += 1
|
||||
if dev['id'] not in tfd:
|
||||
tfd[dev['id']] = tiers_for_dev(dev)
|
||||
for tier in tfd[dev['id']]:
|
||||
if tier not in replicas_at_tier:
|
||||
replicas_at_tier[tier] = 1
|
||||
else:
|
||||
replicas_at_tier[tier] += 1
|
||||
|
||||
# Now, look for partitions not yet spread out enough and not
|
||||
# recently moved.
|
||||
for replica in xrange(self.replicas):
|
||||
dev = self.devs[self._replica2part2dev[replica][part]]
|
||||
removed_replica = False
|
||||
for tier in tiers_for_dev(dev):
|
||||
if (replicas_at_tier[tier] > max_allowed_replicas[tier] and
|
||||
if dev['id'] not in tfd:
|
||||
tfd[dev['id']] = tiers_for_dev(dev)
|
||||
for tier in tfd[dev['id']]:
|
||||
rep_at_tier = 0
|
||||
if tier in replicas_at_tier:
|
||||
rep_at_tier = replicas_at_tier[tier]
|
||||
if (rep_at_tier > max_allowed_replicas[tier] and
|
||||
self._last_part_moves[part] >=
|
||||
self.min_part_hours):
|
||||
self._last_part_moves[part] = 0
|
||||
@ -535,7 +558,9 @@ class RingBuilder(object):
|
||||
removed_replica = True
|
||||
break
|
||||
if removed_replica:
|
||||
for tier in tiers_for_dev(dev):
|
||||
if dev['id'] not in tfd:
|
||||
tfd[dev['id']] = tiers_for_dev(dev)
|
||||
for tier in tfd[dev['id']]:
|
||||
replicas_at_tier[tier] -= 1
|
||||
|
||||
# Last, we gather partitions from devices that are "overweight" because
|
||||
|
Loading…
Reference in New Issue
Block a user