diff --git a/bin/swift-ring-builder b/bin/swift-ring-builder index 7522ea6011..3288e06c36 100755 --- a/bin/swift-ring-builder +++ b/bin/swift-ring-builder @@ -24,6 +24,7 @@ from sys import argv, exit, modules from textwrap import wrap from time import time +from swift.common import exceptions from swift.common.ring import RingBuilder @@ -494,7 +495,21 @@ swift-ring-builder remove print 'Aborting device removals' exit(EXIT_ERROR) for dev in devs: - builder.remove_dev(dev['id']) + try: + builder.remove_dev(dev['id']) + except exceptions.RingBuilderError, e: + print '-' * 79 + print ("An error occurred while removing device with id %d\n" + "This usually means that you attempted to remove the\n" + "last device in a ring. If this is the case, consider\n" + "creating a new ring instead.\n" + "The on-disk ring builder is unchanged\n" + "Original exception message: %s" % + (dev['id'], e.message) + ) + print '-' * 79 + exit(EXIT_ERROR) + print 'd%(id)sz%(zone)s-%(ip)s:%(port)s/%(device)s_"%(meta)s" ' \ 'marked for removal and will be removed next rebalance.' \ % dev @@ -508,8 +523,18 @@ swift-ring-builder rebalance recently reassigned. """ devs_changed = builder.devs_changed - last_balance = builder.get_balance() - parts, balance = builder.rebalance() + try: + last_balance = builder.get_balance() + parts, balance = builder.rebalance() + except exceptions.RingBuilderError, e: + print '-' * 79 + print ("An error has occurred during ring validation. Common\n" + "causes of failure are rings that are empty or do not\n" + "have enough devices to accommodate the replica count.\n" + "Original exception message:\n %s" % e.message + ) + print '-' * 79 + exit(EXIT_ERROR) if not parts: print 'No partitions could be reassigned.' print 'Either none need to be or none can be due to ' \ @@ -519,7 +544,17 @@ swift-ring-builder rebalance print 'Cowardly refusing to save rebalance as it did not change ' \ 'at least 1%.' exit(EXIT_WARNING) - builder.validate() + try: + builder.validate() + except exceptions.RingValidationError, e: + print '-' * 79 + print ("An error has occurred during ring validation. Common\n" + "causes of failure are rings that are empty or do not\n" + "have enough devices to accommodate the replica count.\n" + "Original exception message:\n %s" % e.message + ) + print '-' * 79 + exit(EXIT_ERROR) print 'Reassigned %d (%.02f%%) partitions. Balance is now %.02f.' % \ (parts, 100.0 * parts / builder.parts, balance) status = EXIT_SUCCESS diff --git a/swift/common/exceptions.py b/swift/common/exceptions.py index 30af79b722..d35bb3e92f 100644 --- a/swift/common/exceptions.py +++ b/swift/common/exceptions.py @@ -60,3 +60,19 @@ class DriveNotMounted(Exception): class LockTimeout(MessageTimeout): pass + + +class RingBuilderError(Exception): + pass + + +class RingValidationError(RingBuilderError): + pass + + +class EmptyRingError(RingBuilderError): + pass + + +class DuplicateDeviceError(RingBuilderError): + pass diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index d07c3cecf1..1b5b775aa5 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -17,6 +17,7 @@ from array import array from random import randint, shuffle from time import time +from swift.common import exceptions from swift.common.ring import RingData @@ -69,6 +70,15 @@ class RingBuilder(object): self._remove_devs = [] self._ring = None + def weighted_parts(self): + try: + return self.parts * self.replicas / \ + sum(d['weight'] for d in self.devs if d is not None) + except ZeroDivisionError: + raise exceptions.EmptyRingError('There are no devices in this ' + 'ring, or all devices have been ' + 'deleted') + def copy_from(self, builder): if hasattr(builder, 'devs'): self.part_power = builder.part_power @@ -177,7 +187,8 @@ class RingBuilder(object): :param dev: device dict """ if dev['id'] < len(self.devs) and self.devs[dev['id']] is not None: - raise Exception('Duplicate device id: %d' % dev['id']) + raise exceptions.DuplicateDeviceError( + 'Duplicate device id: %d' % dev['id']) while dev['id'] >= len(self.devs): self.devs.append(None) dev['weight'] = float(dev['weight']) @@ -273,11 +284,11 @@ class RingBuilder(object): :param stats: if True, check distribution of partitions across devices :returns: if stats is True, a tuple of (device usage, worst stat), else (None, None) - :raises Exception: problem was found with the ring. + :raises RingValidationError: problem was found with the ring. """ if sum(d['parts'] for d in self.devs if d is not None) != \ self.parts * self.replicas: - raise Exception( + raise exceptions.RingValidationError( 'All partitions are not double accounted for: %d != %d' % (sum(d['parts'] for d in self.devs if d is not None), self.parts * self.replicas)) @@ -291,7 +302,7 @@ class RingBuilder(object): dev_usage[dev_id] += 1 zone = self.devs[dev_id]['zone'] if zone in zones: - raise Exception( + raise exceptions.RingValidationError( 'Partition %d not in %d distinct zones. ' \ 'Zones were: %s' % (part, self.replicas, @@ -299,8 +310,7 @@ class RingBuilder(object): for r in xrange(self.replicas)])) zones[zone] = True if stats: - weighted_parts = self.parts * self.replicas / \ - sum(d['weight'] for d in self.devs if d is not None) + weighted_parts = self.weighted_parts() worst = 0 for dev in self.devs: if dev is None: @@ -328,9 +338,8 @@ class RingBuilder(object): :returns: balance of the ring """ - weighted_parts = self.parts * self.replicas / \ - sum(d['weight'] for d in self.devs if d is not None) balance = 0 + weighted_parts = self.weighted_parts() for dev in self.devs: if dev is None: continue @@ -370,8 +379,8 @@ class RingBuilder(object): used to sort the devices according to "most wanted" during rebalancing to best distribute partitions. """ - weighted_parts = self.parts * self.replicas / \ - sum(d['weight'] for d in self.devs if d is not None) + weighted_parts = self.weighted_parts() + for dev in self.devs: if dev is not None: if not dev['weight']: diff --git a/test/unit/common/ring/test_builder.py b/test/unit/common/ring/test_builder.py index 77be1df634..4a2dcfed76 100644 --- a/test/unit/common/ring/test_builder.py +++ b/test/unit/common/ring/test_builder.py @@ -17,8 +17,9 @@ import os import unittest from shutil import rmtree -from swift.common.ring import RingBuilder, RingData +from swift.common import exceptions from swift.common import ring +from swift.common.ring import RingBuilder, RingData class TestRingBuilder(unittest.TestCase): @@ -68,7 +69,7 @@ class TestRingBuilder(unittest.TestCase): dev = \ {'id': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000} rb.add_dev(dev) - self.assertRaises(Exception, rb.add_dev, dev) + self.assertRaises(exceptions.DuplicateDeviceError, rb.add_dev, dev) def test_set_dev_weight(self): rb = ring.RingBuilder(8, 3, 1) @@ -226,13 +227,13 @@ class TestRingBuilder(unittest.TestCase): # Test not all partitions doubly accounted for rb.devs[1]['parts'] -= 1 - self.assertRaises(Exception, rb.validate) + self.assertRaises(exceptions.RingValidationError, rb.validate) rb.devs[1]['parts'] += 1 # Test duplicate device for partition orig_dev_id = rb._replica2part2dev[0][0] rb._replica2part2dev[0][0] = rb._replica2part2dev[1][0] - self.assertRaises(Exception, rb.validate) + self.assertRaises(exceptions.RingValidationError, rb.validate) rb._replica2part2dev[0][0] = orig_dev_id # Test duplicate zone for partition @@ -256,7 +257,7 @@ class TestRingBuilder(unittest.TestCase): break if orig_replica is not None: break - self.assertRaises(Exception, rb.validate) + self.assertRaises(exceptions.RingValidationError, rb.validate) rb._replica2part2dev[orig_replica][orig_partition] = orig_device # Tests that validate can handle 'holes' in .devs