Some optimizations for ring rebalancing.

For a part-power 18 ring with a small number of devices (12), this drops "swift-ring-builder ... rebalance" from ~7.2 seconds to ~3.6s on OS X with Python 2.7 (2.7 GHz i7 processor). Under the profiler, that part-power 18 rebalance now spends only slightly more time rebalancing the ring than it does writing out the results. I haven't done more extensive before/after comparison with larger numbers of devices and larger part-power values... Change-Id: I25735bc71da2f11617cc436d4f8e0c4f3f82bfec
2013-01-13 23:26:33 -08:00 · 2013-01-13 23:26:33 -08:00 · 0bba07111f
commit 0bba07111f
parent d904fd4c0f
1 changed files with 35 additions and 10 deletions
--- a/swift/common/ring/builder.py
+++ b/swift/common/ring/builder.py
@ -335,6 +335,8 @@ class RingBuilder(object):
        :raises RingValidationError: problem was found with the ring.
        """

+        # "len" showed up in profling, so it's just computed once.
+        dev_len = len(self.devs)
        if sum(d['parts'] for d in self._iter_devs()) != \
                self.parts * self.replicas:
            raise exceptions.RingValidationError(
@ -344,7 +346,7 @@ class RingBuilder(object):
        if stats:
            # dev_usage[dev_id] will equal the number of partitions assigned to
            # that device.
-            dev_usage = array('I', (0 for _junk in xrange(len(self.devs))))
+            dev_usage = array('I', (0 for _junk in xrange(dev_len)))
            for part2dev in self._replica2part2dev:
                for dev_id in part2dev:
                    dev_usage[dev_id] += 1
@ -352,7 +354,7 @@ class RingBuilder(object):
        for part in xrange(self.parts):
            for replica in xrange(self.replicas):
                dev_id = self._replica2part2dev[replica][part]
-                if dev_id >= len(self.devs) or not self.devs[dev_id]:
+                if dev_id >= dev_len or not self.devs[dev_id]:
                    raise exceptions.RingValidationError(
                        "Partition %d, replica %d was not allocated "
                        "to a device." %
@ -477,8 +479,13 @@ class RingBuilder(object):
        """
        elapsed_hours = int(time() - self._last_part_moves_epoch) / 3600
        for part in xrange(self.parts):
-            self._last_part_moves[part] = \
-                min(self._last_part_moves[part] + elapsed_hours, 0xff)
+            # The "min(self._last_part_moves[part] + elapsed_hours, 0xff)"
+            # which was here showed up in profiling, so it got inlined.
+            last_plus_elapsed = self._last_part_moves[part] + elapsed_hours
+            if last_plus_elapsed < 0xff:
+                self._last_part_moves[part] = last_plus_elapsed
+            else:
+                self._last_part_moves[part] = 0xff
        self._last_part_moves_epoch = int(time())

    def _gather_reassign_parts(self):
@ -487,6 +494,10 @@ class RingBuilder(object):
        gathering from removed devices, insufficiently-far-apart replicas, and
        overweight drives.
        """
+        # inline memoization of tiers_for_dev() results (profiling reveals it
+        # as a hot-spot).
+        tfd = {}
+
        # First we gather partitions from removed devices. Since removed
        # devices usually indicate device failures, we have no choice but to
        # reassign these partitions. However, we mark them as moved so later
@ -513,19 +524,31 @@ class RingBuilder(object):

            # First, add up the count of replicas at each tier for each
            # partition.
-            replicas_at_tier = defaultdict(lambda: 0)
+            # replicas_at_tier was a "lambda: 0" defaultdict, but profiling
+            # revealed the lambda invocation as a significant cost.
+            replicas_at_tier = {}
            for replica in xrange(self.replicas):
                dev = self.devs[self._replica2part2dev[replica][part]]
-                for tier in tiers_for_dev(dev):
-                    replicas_at_tier[tier] += 1
+                if dev['id'] not in tfd:
+                    tfd[dev['id']] = tiers_for_dev(dev)
+                for tier in tfd[dev['id']]:
+                    if tier not in replicas_at_tier:
+                        replicas_at_tier[tier] = 1
+                    else:
+                        replicas_at_tier[tier] += 1

            # Now, look for partitions not yet spread out enough and not
            # recently moved.
            for replica in xrange(self.replicas):
                dev = self.devs[self._replica2part2dev[replica][part]]
                removed_replica = False
-                for tier in tiers_for_dev(dev):
-                    if (replicas_at_tier[tier] > max_allowed_replicas[tier] and
+                if dev['id'] not in tfd:
+                    tfd[dev['id']] = tiers_for_dev(dev)
+                for tier in tfd[dev['id']]:
+                    rep_at_tier = 0
+                    if tier in replicas_at_tier:
+                        rep_at_tier = replicas_at_tier[tier]
+                    if (rep_at_tier > max_allowed_replicas[tier] and
                            self._last_part_moves[part] >=
                            self.min_part_hours):
                        self._last_part_moves[part] = 0
@ -535,7 +558,9 @@ class RingBuilder(object):
                        removed_replica = True
                        break
                if removed_replica:
-                    for tier in tiers_for_dev(dev):
+                    if dev['id'] not in tfd:
+                        tfd[dev['id']] = tiers_for_dev(dev)
+                    for tier in tfd[dev['id']]:
                        replicas_at_tier[tier] -= 1

        # Last, we gather partitions from devices that are "overweight" because