Fix rebalancing when 2+ of a partition's replicas are on deleted devices.
RingBuilder._reassign_parts assumed that only replica for a given partition would move. This isn't necessarily true in the case where a bunch of devices have been removed. This would leave invalid entries in _replica2part2dev and also cause validation to fail. One easy way to reproduce this is to create a 3-replica, 3-zone, 6-device ring with 2 drives per zone (all of equal weight), rebalance, and then remove one drive from each zone and rebalance again. Bug: 943493 Change-Id: I0d399bed5d733448ad877fa2823b542777d385a4
This commit is contained in:
parent
9316a8f876
commit
7fe0c6c695
1
AUTHORS
1
AUTHORS
@ -38,6 +38,7 @@ Dragos Manolescu (dragosm@hp.com)
|
||||
Juan J. Martinez (juan@memset.com)
|
||||
Donagh McCabe (donagh.mccabe@hp.com)
|
||||
Ewan Mellor (ewan.mellor@citrix.com)
|
||||
Samuel Merritt (spam@andcheese.org)
|
||||
Stephen Milton (milton@isomedia.com)
|
||||
Russ Nelson (russ@crynwr.com)
|
||||
Maru Newby (mnewby@internap.com)
|
||||
|
@ -502,31 +502,35 @@ class RingBuilder(object):
|
||||
key=lambda x: x['sort_key'])
|
||||
for part in reassign_parts:
|
||||
other_zones = array('H')
|
||||
replace = None
|
||||
replace = []
|
||||
for replica in xrange(self.replicas):
|
||||
if self._replica2part2dev[replica][part] == 0xffff:
|
||||
replace = replica
|
||||
replace.append(replica)
|
||||
else:
|
||||
other_zones.append(self.devs[
|
||||
self._replica2part2dev[replica][part]]['zone'])
|
||||
index = len(available_devs) - 1
|
||||
while available_devs[index]['zone'] in other_zones:
|
||||
index -= 1
|
||||
dev = available_devs.pop(index)
|
||||
self._replica2part2dev[replace][part] = dev['id']
|
||||
dev['parts_wanted'] -= 1
|
||||
dev['parts'] += 1
|
||||
dev['sort_key'] = '%08x.%04x' % (self.parts + dev['parts_wanted'],
|
||||
randint(0, 0xffff))
|
||||
index = 0
|
||||
end = len(available_devs)
|
||||
while index < end:
|
||||
mid = (index + end) // 2
|
||||
if dev['sort_key'] < available_devs[mid]['sort_key']:
|
||||
end = mid
|
||||
else:
|
||||
index = mid + 1
|
||||
available_devs.insert(index, dev)
|
||||
|
||||
for replica in replace:
|
||||
index = len(available_devs) - 1
|
||||
while available_devs[index]['zone'] in other_zones:
|
||||
index -= 1
|
||||
dev = available_devs.pop(index)
|
||||
other_zones.append(dev['zone'])
|
||||
self._replica2part2dev[replica][part] = dev['id']
|
||||
dev['parts_wanted'] -= 1
|
||||
dev['parts'] += 1
|
||||
dev['sort_key'] = \
|
||||
'%08x.%04x' % (self.parts + dev['parts_wanted'],
|
||||
randint(0, 0xffff))
|
||||
index = 0
|
||||
end = len(available_devs)
|
||||
while index < end:
|
||||
mid = (index + end) // 2
|
||||
if dev['sort_key'] < available_devs[mid]['sort_key']:
|
||||
end = mid
|
||||
else:
|
||||
index = mid + 1
|
||||
available_devs.insert(index, dev)
|
||||
for dev in self.devs:
|
||||
if dev is not None:
|
||||
del dev['sort_key']
|
||||
|
@ -126,6 +126,36 @@ class TestRingBuilder(unittest.TestCase):
|
||||
counts[dev_id] = counts.get(dev_id, 0) + 1
|
||||
self.assertEquals(counts, {0: 256, 2: 256, 3: 256})
|
||||
|
||||
def test_remove_a_lot(self):
|
||||
rb = ring.RingBuilder(3, 3, 1)
|
||||
rb.add_dev({'id': 0, 'device': 'd0', 'ip': '10.0.0.1',
|
||||
'port': 6002, 'weight': 1000.0, 'zone': 1})
|
||||
rb.add_dev({'id': 1, 'device': 'd1', 'ip': '10.0.0.2',
|
||||
'port': 6002, 'weight': 1000.0, 'zone': 2})
|
||||
rb.add_dev({'id': 2, 'device': 'd2', 'ip': '10.0.0.3',
|
||||
'port': 6002, 'weight': 1000.0, 'zone': 3})
|
||||
rb.add_dev({'id': 3, 'device': 'd3', 'ip': '10.0.0.1',
|
||||
'port': 6002, 'weight': 1000.0, 'zone': 1})
|
||||
rb.add_dev({'id': 4, 'device': 'd4', 'ip': '10.0.0.2',
|
||||
'port': 6002, 'weight': 1000.0, 'zone': 2})
|
||||
rb.add_dev({'id': 5, 'device': 'd5', 'ip': '10.0.0.3',
|
||||
'port': 6002, 'weight': 1000.0, 'zone': 3})
|
||||
rb.rebalance()
|
||||
rb.validate()
|
||||
|
||||
# this has to put more than 1/3 of the partitions in the
|
||||
# cluster on removed devices in order to ensure that at least
|
||||
# one partition has multiple replicas that need to move.
|
||||
#
|
||||
# (for an N-replica ring, it's more than 1/N of the
|
||||
# partitions, of course)
|
||||
rb.remove_dev(3)
|
||||
rb.remove_dev(4)
|
||||
rb.remove_dev(5)
|
||||
|
||||
rb.rebalance()
|
||||
rb.validate()
|
||||
|
||||
def test_shuffled_gather(self):
|
||||
if self._shuffled_gather_helper() and \
|
||||
self._shuffled_gather_helper():
|
||||
|
Loading…
Reference in New Issue
Block a user