From 7fa9d6e5b4fb74ad95f059742d8844e7c12fa235 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Fri, 16 Sep 2016 14:31:44 +0100 Subject: [PATCH] Unset random seed after rebalancing ring Unit tests use the random module in places to randomise test inputs, but once the tests in test_builder.py or test_ring_builder_analyzer.py have been run the random seed is left in a repeatable state because calls are made to RingBuilder.balance with a seed value. Consequently, subsequent calls to random in other tests get repeatable results from one test run to another. This patch resets the state of the random module before returning from RingBuilder.rebalance. Closes-Bug: #1639755 Change-Id: I4b74030afc654e60452e65b3e0f1b45a189c16e3 --- swift/common/ring/builder.py | 94 +++++++++++++++---------- test/unit/common/ring/test_builder.py | 33 ++++++--- test/unit/common/ring/test_ring.py | 2 +- test/unit/proxy/controllers/test_obj.py | 21 +++--- 4 files changed, 95 insertions(+), 55 deletions(-) diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index 293d522e08..e1b4ccd02b 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import contextlib import copy import errno import itertools @@ -58,6 +59,24 @@ except ImportError: pass +@contextlib.contextmanager +def _set_random_seed(seed): + # If random seed is set when entering this context then reset original + # random state when exiting the context. This avoids a test calling this + # method with a fixed seed value causing all subsequent tests to use a + # repeatable random sequence. + random_state = None + if seed is not None: + random_state = random.getstate() + random.seed(seed) + try: + yield + finally: + if random_state: + # resetting state rather than calling seed() eases unit testing + random.setstate(random_state) + + class RingBuilder(object): """ Used to build swift.common.ring.RingData instances to be written to disk @@ -482,9 +501,6 @@ class RingBuilder(object): 'num_devices': num_devices, }) - if seed is not None: - random.seed(seed) - self._ring = None old_replica2part2dev = copy.deepcopy(self._replica2part2dev) @@ -494,45 +510,47 @@ class RingBuilder(object): self._last_part_moves = array('B', itertools.repeat(0, self.parts)) self._update_last_part_moves() - replica_plan = self._build_replica_plan() - self._set_parts_wanted(replica_plan) + with _set_random_seed(seed): + replica_plan = self._build_replica_plan() + self._set_parts_wanted(replica_plan) - assign_parts = defaultdict(list) - # gather parts from replica count adjustment - self._adjust_replica2part2dev_size(assign_parts) - # gather parts from failed devices - removed_devs = self._gather_parts_from_failed_devices(assign_parts) - # gather parts for dispersion (N.B. this only picks up parts that - # *must* disperse according to the replica plan) - self._gather_parts_for_dispersion(assign_parts, replica_plan) - - # we'll gather a few times, or until we archive the plan - for gather_count in range(MAX_BALANCE_GATHER_COUNT): - self._gather_parts_for_balance(assign_parts, replica_plan) - if not assign_parts: - # most likely min part hours - finish_status = 'Unable to finish' - break - assign_parts_list = list(assign_parts.items()) - # shuffle the parts to be reassigned, we have no preference on the - # order in which the replica plan is fulfilled. - random.shuffle(assign_parts_list) - # reset assign_parts map for next iteration assign_parts = defaultdict(list) + # gather parts from replica count adjustment + self._adjust_replica2part2dev_size(assign_parts) + # gather parts from failed devices + removed_devs = self._gather_parts_from_failed_devices(assign_parts) + # gather parts for dispersion (N.B. this only picks up parts that + # *must* disperse according to the replica plan) + self._gather_parts_for_dispersion(assign_parts, replica_plan) - num_part_replicas = sum(len(r) for p, r in assign_parts_list) - self.logger.debug("Gathered %d parts", num_part_replicas) - self._reassign_parts(assign_parts_list, replica_plan) - self.logger.debug("Assigned %d parts", num_part_replicas) + # we'll gather a few times, or until we archive the plan + for gather_count in range(MAX_BALANCE_GATHER_COUNT): + self._gather_parts_for_balance(assign_parts, replica_plan) + if not assign_parts: + # most likely min part hours + finish_status = 'Unable to finish' + break + assign_parts_list = list(assign_parts.items()) + # shuffle the parts to be reassigned, we have no preference on + # the order in which the replica plan is fulfilled. + random.shuffle(assign_parts_list) + # reset assign_parts map for next iteration + assign_parts = defaultdict(list) - if not sum(d['parts_wanted'] < 0 for d in - self._iter_devs()): - finish_status = 'Finished' - break - else: - finish_status = 'Unable to finish' - self.logger.debug('%(status)s rebalance plan after %(count)s attempts', - {'status': finish_status, 'count': gather_count + 1}) + num_part_replicas = sum(len(r) for p, r in assign_parts_list) + self.logger.debug("Gathered %d parts", num_part_replicas) + self._reassign_parts(assign_parts_list, replica_plan) + self.logger.debug("Assigned %d parts", num_part_replicas) + + if not sum(d['parts_wanted'] < 0 for d in + self._iter_devs()): + finish_status = 'Finished' + break + else: + finish_status = 'Unable to finish' + self.logger.debug( + '%(status)s rebalance plan after %(count)s attempts', + {'status': finish_status, 'count': gather_count + 1}) self.devs_changed = False self.version += 1 diff --git a/test/unit/common/ring/test_builder.py b/test/unit/common/ring/test_builder.py index 8f244df84d..54e612cf07 100644 --- a/test/unit/common/ring/test_builder.py +++ b/test/unit/common/ring/test_builder.py @@ -174,6 +174,19 @@ class TestRingBuilder(unittest.TestCase): self.assertNotEqual(r0.to_dict(), r1.to_dict()) self.assertEqual(r1.to_dict(), r2.to_dict()) + # check that random state is reset + pre_state = random.getstate() + rb2.rebalance(seed=10) + self.assertEqual(pre_state, random.getstate(), + "Random state was not reset") + + pre_state = random.getstate() + with mock.patch.object(rb2, "_build_replica_plan", + side_effect=Exception()): + self.assertRaises(Exception, rb2.rebalance, seed=10) + self.assertEqual(pre_state, random.getstate(), + "Random state was not reset") + def test_rebalance_part_on_deleted_other_part_on_drained(self): rb = ring.RingBuilder(8, 3, 1) rb.add_dev({'id': 0, 'region': 1, 'zone': 1, 'weight': 1, @@ -1057,18 +1070,18 @@ class TestRingBuilder(unittest.TestCase): rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 100, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 900, - 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb'}) rb.add_dev({'id': 4, 'region': 0, 'zone': 0, 'weight': 900, - 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc'}) rb.add_dev({'id': 6, 'region': 0, 'zone': 0, 'weight': 900, - 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd'}) # 127.0.0.2 (odd devices) rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 500, - 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdb'}) + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sda'}) rb.add_dev({'id': 3, 'region': 0, 'zone': 0, 'weight': 500, - 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdc'}) + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdb'}) rb.add_dev({'id': 5, 'region': 0, 'zone': 0, 'weight': 500, - 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdd'}) + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdc'}) rb.add_dev({'id': 7, 'region': 0, 'zone': 0, 'weight': 500, 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdd'}) @@ -1175,9 +1188,13 @@ class TestRingBuilder(unittest.TestCase): } self.assertEqual(expected, {d['id']: d['parts_wanted'] for d in rb._iter_devs()}) + self.assertEqual(rb.get_balance(), 100) rb.pretend_min_part_hours_passed() - rb.rebalance() + # There's something like a 11% chance that we won't be able to get to + # a balance of 0 (and a 6% chance that we won't change anything at all) + # Pick a seed to make this pass. + rb.rebalance(seed=123) self.assertEqual(rb.get_balance(), 0) def test_multiple_duplicate_device_assignment(self): @@ -1601,7 +1618,7 @@ class TestRingBuilder(unittest.TestCase): # overload is 10% (0.1). rb.set_overload(0.1) rb.pretend_min_part_hours_passed() - rb.rebalance() + rb.rebalance(seed=12345) part_counts = self._partition_counts(rb, key='zone') self.assertEqual(part_counts[0], 212) diff --git a/test/unit/common/ring/test_ring.py b/test/unit/common/ring/test_ring.py index 91b1e5a138..2e7cd261a7 100644 --- a/test/unit/common/ring/test_ring.py +++ b/test/unit/common/ring/test_ring.py @@ -904,7 +904,7 @@ class TestRing(TestRingBase): part = random.randint(0, r.partition_count) node_iter = r.get_more_nodes(part) next(node_iter) - self.assertEqual(5, counting_table.count) + self.assertLess(counting_table.count, 14) # sanity self.assertEqual(1, r._num_regions) self.assertEqual(2, r._num_zones) diff --git a/test/unit/proxy/controllers/test_obj.py b/test/unit/proxy/controllers/test_obj.py index 58e85933a2..e0f73eb25b 100644 --- a/test/unit/proxy/controllers/test_obj.py +++ b/test/unit/proxy/controllers/test_obj.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections import email.parser import itertools import math @@ -4310,13 +4311,17 @@ class TestECDuplicationObjController( # all nodes have a frag but there is no one set that reaches quorum, # which means there is no backend 404 response, but proxy should still # return 404 rather than 503 - obj1 = self._make_ec_object_stub() - obj2 = self._make_ec_object_stub() - obj3 = self._make_ec_object_stub() - obj4 = self._make_ec_object_stub() - obj5 = self._make_ec_object_stub() - obj6 = self._make_ec_object_stub() - obj7 = self._make_ec_object_stub() + stub_objects = [ + self._make_ec_object_stub('obj1' * self.policy.ec_segment_size), + self._make_ec_object_stub('obj2' * self.policy.ec_segment_size), + self._make_ec_object_stub('obj3' * self.policy.ec_segment_size), + self._make_ec_object_stub('obj4' * self.policy.ec_segment_size), + self._make_ec_object_stub('obj5' * self.policy.ec_segment_size), + self._make_ec_object_stub('obj6' * self.policy.ec_segment_size), + self._make_ec_object_stub('obj7' * self.policy.ec_segment_size), + ] + etags = collections.Counter(stub['etag'] for stub in stub_objects) + self.assertEqual(len(etags), 7, etags) # sanity # primaries and handoffs for required nodes # this is 10-4 * 2 case so that 56 requests (2 * replicas) required @@ -4326,7 +4331,7 @@ class TestECDuplicationObjController( # fill them out to the primary and handoff nodes node_frags = [] for frag in range(8): - for stub_obj in (obj1, obj2, obj3, obj4, obj5, obj6, obj7): + for stub_obj in stub_objects: if len(node_frags) >= required_nodes: # we already have enough responses break