From 744e9a94afea55ddc67cf8a1e8f580f8bf5927a4 Mon Sep 17 00:00:00 2001 From: Jianjian Huo Date: Tue, 2 May 2023 20:21:08 -0700 Subject: [PATCH] Sharder: add timing metrics for individual steps and total time spent. Change-Id: Ie2a8e4eced6688e5a98aa37c3c7b0c13fd2ddeee --- swift/container/sharder.py | 19 +++++++ test/unit/container/test_sharder.py | 84 +++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/swift/container/sharder.py b/swift/container/sharder.py index ee97880cd9..adff1df977 100644 --- a/swift/container/sharder.py +++ b/swift/container/sharder.py @@ -2331,9 +2331,13 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): return # now look and deal with misplaced objects. + move_start_ts = time.time() self._move_misplaced_objects(broker) + self.logger.timing_since( + 'sharder.sharding.move_misplaced', move_start_ts) is_leader = node['index'] == 0 and self.auto_shard and not is_deleted + if state in (UNSHARDED, COLLAPSED): if is_leader and broker.is_root_container(): # bootstrap sharding of root container @@ -2348,11 +2352,14 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): # container has been given shard ranges rather than # found them e.g. via replication or a shrink event, # or manually triggered cleaving. + db_start_ts = time.time() if broker.set_sharding_state(): state = SHARDING self.info(broker, 'Kick off container cleaving, ' 'own shard range in state %r', own_shard_range.state_text) + self.logger.timing_since( + 'sharder.sharding.set_state', db_start_ts) elif is_leader: if broker.set_sharding_state(): state = SHARDING @@ -2363,6 +2370,7 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): own_shard_range.state_text) if state == SHARDING: + cleave_start_ts = time.time() if is_leader: num_found = self._find_shard_ranges(broker) else: @@ -2377,6 +2385,8 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): # always try to cleave any pending shard ranges cleave_complete = self._cleave(broker) + self.logger.timing_since( + 'sharder.sharding.cleave', cleave_start_ts) if cleave_complete: if self._complete_sharding(broker): @@ -2384,6 +2394,9 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): self._increment_stat('visited', 'completed', statsd=True) self.info(broker, 'Completed cleaving, DB set to sharded ' 'state') + self.logger.timing_since( + 'sharder.sharding.completed', + broker.get_own_shard_range().epoch) else: self.info(broker, 'Completed cleaving, DB remaining in ' 'sharding state') @@ -2391,6 +2404,7 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): if not broker.is_deleted(): if state == SHARDED and broker.is_root_container(): # look for shrink stats + send_start_ts = time.time() self._identify_shrinking_candidate(broker, node) if is_leader: self._find_and_enable_shrinking_candidates(broker) @@ -2400,6 +2414,8 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): self._send_shard_ranges(broker, shard_range.account, shard_range.container, [shard_range]) + self.logger.timing_since( + 'sharder.sharding.send_sr', send_start_ts) if not broker.is_root_container(): # Update the root container with this container's shard range @@ -2408,7 +2424,10 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator): # sharding a shard, this is when the root will see the new # shards move to ACTIVE state and the sharded shard # simultaneously become deleted. + update_start_ts = time.time() self._update_root_container(broker) + self.logger.timing_since( + 'sharder.sharding.update_root', update_start_ts) self.debug(broker, 'Finished processing, state %s%s', diff --git a/test/unit/container/test_sharder.py b/test/unit/container/test_sharder.py index 463ca14613..76387d1379 100644 --- a/test/unit/container/test_sharder.py +++ b/test/unit/container/test_sharder.py @@ -2466,6 +2466,16 @@ class TestSharder(BaseTestSharder): self.assertEqual('', context.cursor) self.assertEqual(10, context.cleave_to_row) self.assertEqual(12, context.max_row) # note that max row increased + self.assertTrue(self.logger.log_dict['timing_since']) + self.assertEqual('sharder.sharding.move_misplaced', + self.logger.log_dict['timing_since'][-3][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-3][0][1], 0) + self.assertEqual('sharder.sharding.set_state', + self.logger.log_dict['timing_since'][-2][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-2][0][1], 0) + self.assertEqual('sharder.sharding.cleave', + self.logger.log_dict['timing_since'][-1][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-1][0][1], 0) lines = sharder.logger.get_lines_for_level('info') self.assertEqual( ["Kick off container cleaving, own shard range in state " @@ -2511,6 +2521,80 @@ class TestSharder(BaseTestSharder): 'Completed cleaving, DB set to sharded state, path: a/c, db: %s' % broker.db_file, lines[1:]) self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertTrue(self.logger.log_dict['timing_since']) + self.assertEqual('sharder.sharding.move_misplaced', + self.logger.log_dict['timing_since'][-4][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-4][0][1], 0) + self.assertEqual('sharder.sharding.cleave', + self.logger.log_dict['timing_since'][-3][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-3][0][1], 0) + self.assertEqual('sharder.sharding.completed', + self.logger.log_dict['timing_since'][-2][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-2][0][1], 0) + self.assertEqual('sharder.sharding.send_sr', + self.logger.log_dict['timing_since'][-1][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-1][0][1], 0) + + def test_cleave_timing_metrics(self): + broker = self._make_broker() + objects = [{'name': 'obj_%03d' % i, + 'created_at': Timestamp.now().normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': i % 2, + 'storage_policy_index': 0, + } for i in range(1, 8)] + broker.merge_items([dict(obj) for obj in objects]) + broker.enable_sharding(Timestamp.now()) + shard_ranges = self._make_shard_ranges( + (('', 'obj_004'), ('obj_004', '')), state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + + with self._mock_sharder() as sharder: + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + lines = sharder.logger.get_lines_for_level('info') + self.assertEqual( + 'Starting to cleave (2 todo), path: a/c, db: %s' + % broker.db_file, lines[0]) + self.assertIn( + 'Completed cleaving, DB set to sharded state, path: a/c, db: %s' + % broker.db_file, lines[1:]) + + self.assertTrue(self.logger.log_dict['timing_since']) + self.assertEqual('sharder.sharding.move_misplaced', + self.logger.log_dict['timing_since'][-4][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-4][0][1], 0) + self.assertEqual('sharder.sharding.cleave', + self.logger.log_dict['timing_since'][-3][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-3][0][1], 0) + self.assertEqual('sharder.sharding.completed', + self.logger.log_dict['timing_since'][-2][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-2][0][1], 0) + self.assertEqual('sharder.sharding.send_sr', + self.logger.log_dict['timing_since'][-1][0][0]) + self.assertGreater(self.logger.log_dict['timing_since'][-1][0][1], 0) + + # check shard ranges were updated to ACTIVE + self.assertEqual([ShardRange.ACTIVE] * 2, + [sr.state for sr in broker.get_shard_ranges()]) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + actual_objects = shard_broker.get_objects() + self.assertEqual(objects[:4], actual_objects) + shard_broker = ContainerBroker(expected_shard_dbs[1]) + actual_objects = shard_broker.get_objects() + self.assertEqual(objects[4:], actual_objects) def test_cleave_multiple_storage_policies(self): # verify that objects in all storage policies are cleaved