From 18f20daf389e3db42c7bfb56a520913e416caaf7 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Fri, 5 Mar 2021 15:59:22 +0000 Subject: [PATCH] Add absolute values for shard shrinking config options Add two new sharder config options for configuring shrinking behaviour: - shrink_threshold: the size below which a shard may shrink - expansion_limit: the maximum size to which an acceptor shard may grow The new options match the 'swift-manage-shard-ranges' command line options and take absolute values. The new options provide alternatives to the current equivalent options 'shard_shrink_point' and 'shard_shrink_merge_point', which are expressed as percentages of 'shard_container_threshold'. 'shard_shrink_point' and 'shard_shrink_merge_point' are deprecated and will be overridden by the new options if the new options are explicitly set in a config file. The default values of the new options are the same as the values that would result from the default 'shard_container_threshold', 'shard_shrink_point' and 'shard_shrink_merge_point' i.e.: - shrink_threshold: 100000 - expansion_limit: 750000 Change-Id: I087eac961c1eab53540fe56be4881e01ded1f60e --- doc/source/config/container_server_config.rst | 43 ++++- etc/container-server.conf-sample | 17 ++ swift/container/sharder.py | 6 + test/unit/cli/test_manage_shard_ranges.py | 169 ++++++++++++++---- test/unit/container/test_sharder.py | 163 +++++++++++++---- 5 files changed, 323 insertions(+), 75 deletions(-) diff --git a/doc/source/config/container_server_config.rst b/doc/source/config/container_server_config.rst index 85fb9b377c..8b64214fb2 100644 --- a/doc/source/config/container_server_config.rst +++ b/doc/source/config/container_server_config.rst @@ -287,6 +287,8 @@ the following configuration options defined for the `[container-replicator]`_: * rsync_module * recon_cache_path +Some config options in this section may also be used by the +:ref:`swift-manage-shard-ranges CLI tool `. ================================= ================= ======================================= Option Default Description @@ -310,8 +312,7 @@ auto_shard false If the auto_shard option this option to true in a production cluster. -shard_container_threshold 1000000 When auto-sharding is - enabled this defines the +shard_container_threshold 1000000 This defines the object count at which a container with container-sharding @@ -326,8 +327,24 @@ shard_container_threshold 1000000 When auto-sharding is shrinking and merging shard containers. -shard_shrink_point 10 When auto-sharding is - enabled this defines the +shrink_threshold This defines the + object count below which + a 'donor' shard container + will be considered for + shrinking into another + 'acceptor' shard + container. The default is + determined by + shard_shrink_point. If + set, shrink_threshold + will take precedence over + shard_shrink_point. + +shard_shrink_point 10 Deprecated: shrink_threshold + is recommended and if set + will take precedence over + shard_shrink_point. + This defines the object count below which a 'donor' shard container will be considered for @@ -341,8 +358,22 @@ shard_shrink_point 10 When auto-sharding is 10 means 10% of the shard_container_threshold. -shard_shrink_merge_point 75 When auto-sharding is - enabled this defines the +expansion_limit This defines the + maximum allowed size of + an acceptor shard + container after having a + donor merged into it. The + default is determined by + shard_shrink_merge_point. + If set, expansion_limit + will take precedence over + shard_shrink_merge_point. + +shard_shrink_merge_point 75 Deprecated: expansion_limit + is recommended and if set + will take precedence over + shard_shrink_merge_point. + This defines the maximum allowed size of an acceptor shard container after having a diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample index 0bcda5af1d..9b76fca1ce 100644 --- a/etc/container-server.conf-sample +++ b/etc/container-server.conf-sample @@ -372,13 +372,28 @@ use = egg:swift#xprofile # containers. # shard_container_threshold = 1000000 # +# When auto-sharding is enabled shrink_threshold defines the object count +# below which a 'donor' shard container will be considered for shrinking into +# another 'acceptor' shard container. The default is determined by +# shard_shrink_point. If set, shrink_threshold will take precedence over +# shard_shrink_point. +# shrink_threshold = +# # When auto-sharding is enabled shard_shrink_point defines the object count # below which a 'donor' shard container will be considered for shrinking into # another 'acceptor' shard container. shard_shrink_point is a percentage of # shard_container_threshold e.g. the default value of 10 means 10% of the # shard_container_threshold. +# Deprecated: shrink_threshold is recommended and if set will take precedence +# over shard_shrink_point. # shard_shrink_point = 10 # +# When auto-sharding is enabled expansion_limit defines the maximum +# allowed size of an acceptor shard container after having a donor merged into +# it. The default is determined by shard_shrink_merge_point. +# If set, expansion_limit will take precedence over shard_shrink_merge_point. +# expansion_limit = +# # When auto-sharding is enabled shard_shrink_merge_point defines the maximum # allowed size of an acceptor shard container after having a donor merged into # it. Shard_shrink_merge_point is a percentage of shard_container_threshold. @@ -391,6 +406,8 @@ use = egg:swift#xprofile # be considered for shrinking if it has less than or equal to 100 thousand # objects but will only merge into an acceptor if the combined object count # would be less than or equal to 750 thousand objects. +# Deprecated: expansion_limit is recommended and if set will take precedence +# over shard_shrink_merge_point. # shard_shrink_merge_point = 75 # # When auto-sharding is enabled shard_scanner_batch_size defines the maximum diff --git a/swift/container/sharder.py b/swift/container/sharder.py index 8153be3b3a..17baaf529a 100644 --- a/swift/container/sharder.py +++ b/swift/container/sharder.py @@ -616,10 +616,16 @@ class ContainerSharderConf(object): 'conn_timeout', float, 5) self.auto_shard = get_val( 'auto_shard', config_true_value, False) + # deprecated percent options still loaded... self.shrink_threshold = get_val( 'shard_shrink_point', self.percent_of_threshold, 10) self.expansion_limit = get_val( 'shard_shrink_merge_point', self.percent_of_threshold, 75) + # ...but superseded by absolute options if present in conf + self.shrink_threshold = get_val( + 'shrink_threshold', int, self.shrink_threshold) + self.expansion_limit = get_val( + 'expansion_limit', int, self.expansion_limit) self.rows_per_shard = self.shard_container_threshold // 2 def percent_of_threshold(self, val): diff --git a/test/unit/cli/test_manage_shard_ranges.py b/test/unit/cli/test_manage_shard_ranges.py index dbc81fa3dc..91ee18505b 100644 --- a/test/unit/cli/test_manage_shard_ranges.py +++ b/test/unit/cli/test_manage_shard_ranges.py @@ -136,8 +136,8 @@ class TestManageShardRanges(unittest.TestCase): conf = """ [container-sharder] - shard_shrink_point = 15 - shard_shrink_merge_point = 65 + shrink_threshold = 150 + expansion_limit = 650 shard_container_threshold = 1000 max_shrinking = 33 max_expanding = 31 @@ -227,39 +227,6 @@ class TestManageShardRanges(unittest.TestCase): dry_run=False) mocked.assert_called_once_with(mock.ANY, expected) - # conf file - small percentages resulting in zero absolute values - # should be respected rather than falling back to defaults, to avoid - # nasty surprises - conf = """ - [container-sharder] - shard_shrink_point = 1 - shard_shrink_merge_point = 2 - shard_container_threshold = 10 - max_shrinking = 33 - max_expanding = 31 - """ - conf_file = os.path.join(self.testdir, 'sharder.conf') - with open(conf_file, 'w') as fd: - fd.write(dedent(conf)) - - with mock.patch('swift.cli.manage_shard_ranges.compact_shard_ranges', - return_value=0) as mocked: - ret = main([db_file, '--config', conf_file, 'compact']) - self.assertEqual(0, ret) - expected = Namespace(conf_file=conf_file, - path_to_file=mock.ANY, - func=mock.ANY, - subcommand='compact', - force_commits=False, - verbose=0, - max_expanding=31, - max_shrinking=33, - shrink_threshold=0, - expansion_limit=0, - yes=False, - dry_run=False) - mocked.assert_called_once_with(mock.ANY, expected) - # cli options with mock.patch('swift.cli.manage_shard_ranges.compact_shard_ranges', return_value=0) as mocked: @@ -283,6 +250,108 @@ class TestManageShardRanges(unittest.TestCase): dry_run=False) mocked.assert_called_once_with(mock.ANY, expected) + def test_conf_file_deprecated_options(self): + # verify that deprecated percent-based do get applied + db_file = os.path.join(self.testdir, 'hash.db') + broker = ContainerBroker(db_file, account='a', container='c') + broker.initialize() + + conf = """ + [container-sharder] + shard_shrink_point = 15 + shard_shrink_merge_point = 65 + shard_container_threshold = 1000 + max_shrinking = 33 + max_expanding = 31 + """ + + conf_file = os.path.join(self.testdir, 'sharder.conf') + with open(conf_file, 'w') as fd: + fd.write(dedent(conf)) + + with mock.patch('swift.cli.manage_shard_ranges.compact_shard_ranges', + return_value=0) as mocked: + ret = main([db_file, '--config', conf_file, 'compact']) + self.assertEqual(0, ret) + expected = Namespace(conf_file=conf_file, + path_to_file=mock.ANY, + func=mock.ANY, + subcommand='compact', + force_commits=False, + verbose=0, + max_expanding=31, + max_shrinking=33, + shrink_threshold=150, + expansion_limit=650, + yes=False, + dry_run=False) + mocked.assert_called_once_with(mock.ANY, expected) + + # absolute value options take precedence if specified in the conf file + conf = """ + [container-sharder] + shard_shrink_point = 15 + shrink_threshold = 123 + shard_shrink_merge_point = 65 + expansion_limit = 456 + shard_container_threshold = 1000 + max_shrinking = 33 + max_expanding = 31 + """ + + conf_file = os.path.join(self.testdir, 'sharder.conf') + with open(conf_file, 'w') as fd: + fd.write(dedent(conf)) + + with mock.patch('swift.cli.manage_shard_ranges.compact_shard_ranges') \ + as mocked: + main([db_file, '--config', conf_file, 'compact']) + expected = Namespace(conf_file=conf_file, + path_to_file=mock.ANY, + func=mock.ANY, + subcommand='compact', + force_commits=False, + verbose=0, + max_expanding=31, + max_shrinking=33, + shrink_threshold=123, + expansion_limit=456, + yes=False, + dry_run=False) + mocked.assert_called_once_with(mock.ANY, expected) + + # conf file - small percentages resulting in zero absolute values + # should be respected rather than falling back to defaults, to avoid + # nasty surprises + conf = """ + [container-sharder] + shard_shrink_point = 1 + shard_shrink_merge_point = 2 + shard_container_threshold = 10 + max_shrinking = 33 + max_expanding = 31 + """ + conf_file = os.path.join(self.testdir, 'sharder.conf') + with open(conf_file, 'w') as fd: + fd.write(dedent(conf)) + + with mock.patch('swift.cli.manage_shard_ranges.compact_shard_ranges') \ + as mocked: + main([db_file, '--config', conf_file, 'compact']) + expected = Namespace(conf_file=conf_file, + path_to_file=mock.ANY, + func=mock.ANY, + subcommand='compact', + force_commits=False, + verbose=0, + max_expanding=31, + max_shrinking=33, + shrink_threshold=0, + expansion_limit=0, + yes=False, + dry_run=False) + mocked.assert_called_once_with(mock.ANY, expected) + def test_conf_file_invalid(self): db_file = os.path.join(self.testdir, 'hash.db') broker = ContainerBroker(db_file, account='a', container='c') @@ -291,8 +360,8 @@ class TestManageShardRanges(unittest.TestCase): # conf file - invalid value for shard_container_threshold conf = """ [container-sharder] - shard_shrink_point = 1 - shard_shrink_merge_point = 2 + shrink_threshold = 1 + expansion_limit = 2 shard_container_threshold = 0 max_shrinking = 33 max_expanding = 31 @@ -310,6 +379,32 @@ class TestManageShardRanges(unittest.TestCase): self.assert_starts_with(err_lines[0], 'Error loading config file') self.assertIn('shard_container_threshold', err_lines[0]) + def test_conf_file_invalid_deprecated_options(self): + db_file = os.path.join(self.testdir, 'hash.db') + broker = ContainerBroker(db_file, account='a', container='c') + broker.initialize() + + # conf file - invalid value for shard_container_threshold + conf = """ + [container-sharder] + shard_shrink_point = -1 + shard_shrink_merge_point = 2 + shard_container_threshold = 1000 + max_shrinking = 33 + max_expanding = 31 + """ + conf_file = os.path.join(self.testdir, 'sharder.conf') + with open(conf_file, 'w') as fd: + fd.write(dedent(conf)) + + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, '--config', conf_file, 'compact']) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Error loading config file') + self.assertIn('shard_shrink_point', err_lines[0]) + def test_conf_file_does_not_exist(self): db_file = os.path.join(self.testdir, 'hash.db') broker = ContainerBroker(db_file, account='a', container='c') diff --git a/test/unit/container/test_sharder.py b/test/unit/container/test_sharder.py index 9b5b99e919..9da268804a 100644 --- a/test/unit/container/test_sharder.py +++ b/test/unit/container/test_sharder.py @@ -130,28 +130,29 @@ class BaseTestSharder(unittest.TestCase): class TestSharder(BaseTestSharder): - def test_init(self): - def do_test(conf, expected, logger=self.logger): - if logger: - logger.clear() - with mock.patch( - 'swift.container.sharder.internal_client.InternalClient') \ - as mock_ic: - with mock.patch('swift.common.db_replicator.ring.Ring') \ - as mock_ring: - mock_ring.return_value = mock.MagicMock() - mock_ring.return_value.replica_count = 3 - sharder = ContainerSharder(conf, logger=logger) - mock_ring.assert_called_once_with( - '/etc/swift', ring_name='container') - for k, v in expected.items(): - self.assertTrue(hasattr(sharder, k), 'Missing attr %s' % k) - self.assertEqual(v, getattr(sharder, k), - 'Incorrect value: expected %s=%s but got %s' % - (k, v, getattr(sharder, k))) - return sharder, mock_ic + def _do_test_init(self, conf, expected, use_logger=True): + logger = self.logger if use_logger else None + if logger: + logger.clear() + with mock.patch( + 'swift.container.sharder.internal_client.InternalClient') \ + as mock_ic: + with mock.patch('swift.common.db_replicator.ring.Ring') \ + as mock_ring: + mock_ring.return_value = mock.MagicMock() + mock_ring.return_value.replica_count = 3 + sharder = ContainerSharder(conf, logger=logger) + mock_ring.assert_called_once_with( + '/etc/swift', ring_name='container') + for k, v in expected.items(): + self.assertTrue(hasattr(sharder, k), 'Missing attr %s' % k) + self.assertEqual(v, getattr(sharder, k), + 'Incorrect value: expected %s=%s but got %s' % + (k, v, getattr(sharder, k))) + return sharder, mock_ic - # defaults + def test_init(self): + # default values expected = { 'mount_check': True, 'bind_ip': '0.0.0.0', 'port': 6201, 'per_diff': 1000, 'max_diffs': 100, 'interval': 30, @@ -177,7 +178,7 @@ class TestSharder(BaseTestSharder): 'max_shrinking': 1, 'max_expanding': -1 } - sharder, mock_ic = do_test({}, expected, logger=None) + sharder, mock_ic = self._do_test_init({}, expected, use_logger=False) self.assertEqual( 'container-sharder', sharder.logger.logger.name) mock_ic.assert_called_once_with( @@ -185,7 +186,7 @@ class TestSharder(BaseTestSharder): allow_modify_pipeline=False, use_replication_network=True) - # non-default + # non-default values conf = { 'mount_check': False, 'bind_ip': '10.11.12.13', 'bind_port': 62010, 'per_diff': 2000, 'max_diffs': 200, 'interval': 60, @@ -195,8 +196,8 @@ class TestSharder(BaseTestSharder): 'rsync_compress': True, 'rsync_module': '{replication_ip}::container_sda/', 'reclaim_age': 86400 * 14, - 'shard_shrink_point': 35, - 'shard_shrink_merge_point': 85, + 'shrink_threshold': 7000000, + 'expansion_limit': 17000000, 'shard_container_threshold': 20000000, 'cleave_batch_size': 4, 'shard_scanner_batch_size': 8, @@ -238,7 +239,7 @@ class TestSharder(BaseTestSharder): 'max_shrinking': 5, 'max_expanding': 4 } - sharder, mock_ic = do_test(conf, expected) + sharder, mock_ic = self._do_test_init(conf, expected) mock_ic.assert_called_once_with( '/etc/swift/my-sharder-ic.conf', 'Swift Container Sharder', 2, allow_modify_pipeline=False, @@ -253,7 +254,7 @@ class TestSharder(BaseTestSharder): 'existing_shard_replication_quorum': 3}) conf.update({'shard_replication_quorum': 4, 'existing_shard_replication_quorum': 4}) - do_test(conf, expected) + self._do_test_init(conf, expected) warnings = self.logger.get_lines_for_level('warning') self.assertEqual(warnings[:1], [ 'Option auto_create_account_prefix is deprecated. ' @@ -268,15 +269,81 @@ class TestSharder(BaseTestSharder): ]) with self.assertRaises(ValueError) as cm: - do_test({'shard_shrink_point': 101}, {}) + self._do_test_init({'shard_shrink_point': 101}, {}) self.assertIn( 'greater than 0, less than 100, not "101"', str(cm.exception)) with self.assertRaises(ValueError) as cm: - do_test({'shard_shrink_merge_point': 101}, {}) + self._do_test_init({'shard_shrink_merge_point': 101}, {}) self.assertIn( 'greater than 0, less than 100, not "101"', str(cm.exception)) + def test_init_deprecated_options(self): + # percent values applied if absolute values not given + conf = { + 'shard_shrink_point': 15, # trumps shrink_threshold + 'shard_shrink_merge_point': 95, # trumps expansion_limit + 'shard_container_threshold': 20000000, + } + expected = { + 'mount_check': True, 'bind_ip': '0.0.0.0', 'port': 6201, + 'per_diff': 1000, 'max_diffs': 100, 'interval': 30, + 'databases_per_second': 50, + 'cleave_row_batch_size': 10000, + 'node_timeout': 10, 'conn_timeout': 5, + 'rsync_compress': False, + 'rsync_module': '{replication_ip}::container', + 'reclaim_age': 86400 * 7, + 'shard_container_threshold': 20000000, + 'rows_per_shard': 10000000, + 'shrink_threshold': 3000000, + 'expansion_limit': 19000000, + 'cleave_batch_size': 2, + 'shard_scanner_batch_size': 10, + 'rcache': '/var/cache/swift/container.recon', + 'shards_account_prefix': '.shards_', + 'auto_shard': False, + 'recon_candidates_limit': 5, + 'shard_replication_quorum': 2, + 'existing_shard_replication_quorum': 2, + 'max_shrinking': 1, + 'max_expanding': -1 + } + self._do_test_init(conf, expected) + # absolute values override percent values + conf = { + 'shard_shrink_point': 15, # trumps shrink_threshold + 'shrink_threshold': 7000000, + 'shard_shrink_merge_point': 95, # trumps expansion_limit + 'expansion_limit': 17000000, + 'shard_container_threshold': 20000000, + } + expected = { + 'mount_check': True, 'bind_ip': '0.0.0.0', 'port': 6201, + 'per_diff': 1000, 'max_diffs': 100, 'interval': 30, + 'databases_per_second': 50, + 'cleave_row_batch_size': 10000, + 'node_timeout': 10, 'conn_timeout': 5, + 'rsync_compress': False, + 'rsync_module': '{replication_ip}::container', + 'reclaim_age': 86400 * 7, + 'shard_container_threshold': 20000000, + 'rows_per_shard': 10000000, + 'shrink_threshold': 7000000, + 'expansion_limit': 17000000, + 'cleave_batch_size': 2, + 'shard_scanner_batch_size': 10, + 'rcache': '/var/cache/swift/container.recon', + 'shards_account_prefix': '.shards_', + 'auto_shard': False, + 'recon_candidates_limit': 5, + 'shard_replication_quorum': 2, + 'existing_shard_replication_quorum': 2, + 'max_shrinking': 1, + 'max_expanding': -1 + } + self._do_test_init(conf, expected) + def test_init_internal_client_conf_loading_error(self): with mock.patch('swift.common.db_replicator.ring.Ring') \ as mock_ring: @@ -7206,6 +7273,26 @@ class TestContainerSharderConf(unittest.TestCase): self.assertEqual(expected, DEFAULT_SHARDER_CONF) def test_conf(self): + conf = {'shard_container_threshold': 2000000, + 'max_shrinking': 2, + 'max_expanding': 3, + 'shard_scanner_batch_size': 11, + 'cleave_batch_size': 4, + 'cleave_row_batch_size': 50000, + 'broker_timeout': 61, + 'recon_candidates_limit': 6, + 'recon_sharded_timeout': 43201, + 'conn_timeout': 5.1, + 'auto_shard': True, + 'shrink_threshold': 100001, + 'expansion_limit': 750001, + 'rows_per_shard': 500001} + # rows_per_shard is not directly configurable + expected = dict(conf, rows_per_shard=1000000) + conf.update({'unexpected': 'option'}) + self.assertEqual(expected, vars(ContainerSharderConf(conf))) + + def test_deprecated_percent_conf(self): base_conf = {'shard_container_threshold': 2000000, 'max_shrinking': 2, 'max_expanding': 3, @@ -7218,12 +7305,22 @@ class TestContainerSharderConf(unittest.TestCase): 'conn_timeout': 5.1, 'auto_shard': True} - percent_conf = {'shard_shrink_point': 9, - 'shard_shrink_merge_point': 71} + # percent options work + deprecated_conf = {'shard_shrink_point': 9, + 'shard_shrink_merge_point': 71} expected = dict(base_conf, rows_per_shard=1000000, shrink_threshold=180000, expansion_limit=1420000) conf = dict(base_conf) - conf.update(percent_conf) + conf.update(deprecated_conf) + self.assertEqual(expected, vars(ContainerSharderConf(conf))) + + # check absolute options override percent options + conf.update({'shrink_threshold': 100001, + 'expansion_limit': 750001}) + + expected = dict(base_conf, rows_per_shard=1000000, + shrink_threshold=100001, expansion_limit=750001) + conf.update(deprecated_conf) self.assertEqual(expected, vars(ContainerSharderConf(conf))) def test_bad_values(self): @@ -7241,6 +7338,8 @@ class TestContainerSharderConf(unittest.TestCase): 'recon_sharded_timeout': not_int, 'conn_timeout': not_float, # 'auto_shard': anything can be passed to config_true_value + 'shrink_threshold': not_int, + 'expansion_limit': not_int, 'shard_shrink_point': not_percent, 'shard_shrink_merge_point': not_percent}