From bf4edefce4990a9c8c3f6a92696af37d840d2008 Mon Sep 17 00:00:00 2001 From: Matthew Oliver Date: Thu, 10 Feb 2022 13:42:48 +1100 Subject: [PATCH] DB Replicator: Add handoff_delete option Currently the object-replicator has an option called `handoff_delete` which allows us to define the the number of replicas which are ensured in swift. Once a handoff node ensures that many successful responses it can go ahead and delete the handoff partition. By default it's 'auto' or rather the number of primary nodes. But this can be reduced. It's useful in draining full disks, but has to be used carefully. This patch adds the same option to the DB replicator and works the same way. But instead of deleting a partition it's done at the per DB level. Because it's done in the DB Replicator level it means the option is now available to both the Account and Container replicators. Change-Id: Ide739a6d805bda20071c7977f5083574a5345a33 --- doc/source/config/account_server_config.rst | 25 +++++++- doc/source/config/container_server_config.rst | 25 +++++++- etc/account-server.conf-sample | 13 ++++- etc/container-server.conf-sample | 13 ++++- swift/common/db_replicator.py | 28 ++++++--- test/unit/common/test_db_replicator.py | 57 +++++++++++++++++-- 6 files changed, 137 insertions(+), 24 deletions(-) diff --git a/doc/source/config/account_server_config.rst b/doc/source/config/account_server_config.rst index 241c3e9d60..f305c021b0 100644 --- a/doc/source/config/account_server_config.rst +++ b/doc/source/config/account_server_config.rst @@ -170,9 +170,9 @@ ionice_priority None I/O scheduling priority of server [account-replicator] ******************** -==================== ========================= =============================== +==================== ========================= ===================================== Option Default Description --------------------- ------------------------- ------------------------------- +-------------------- ------------------------- ------------------------------------- log_name account-replicator Label used when logging log_facility LOG_LOCAL0 Syslog log facility log_level INFO Logging level @@ -256,7 +256,26 @@ ionice_priority None I/O scheduling priority of serve Work only with ionice_class. Ignored if IOPRIO_CLASS_IDLE is set. -==================== ========================= =============================== +handoffs_only no When handoffs_only mode is enabled + the replicator will *only* replicate + from handoff nodes to primary nodes + and will not sync primary nodes + with other primary nodes. +handoff_delete auto the number of replicas which are + ensured in swift. If the number + less than the number of replicas + is set, account-replicator + could delete local handoffs even + if all replicas are not ensured in + the cluster. The replicator would + remove local handoff account database + after syncing when the number of + successful responses is greater than + or equal to this number. By default + handoff partitions will be removed + when it has successfully replicated + to all the canonical nodes. +==================== ========================= ===================================== ***************** [account-auditor] diff --git a/doc/source/config/container_server_config.rst b/doc/source/config/container_server_config.rst index 7961f50e8f..6f7d6031a3 100644 --- a/doc/source/config/container_server_config.rst +++ b/doc/source/config/container_server_config.rst @@ -175,9 +175,9 @@ ionice_priority None I/O scheduling priority of ser [container-replicator] ********************** -==================== =========================== ============================= +==================== =========================== ======================================= Option Default Description --------------------- --------------------------- ----------------------------- +-------------------- --------------------------- --------------------------------------- log_name container-replicator Label used when logging log_facility LOG_LOCAL0 Syslog log facility log_level INFO Logging level @@ -266,7 +266,26 @@ ionice_priority None I/O scheduling priority of Work only with ionice_class. Ignored if IOPRIO_CLASS_IDLE is set. -==================== =========================== ============================= +handoffs_only no When handoffs_only mode is enabled + the replicator will *only* replicate + from handoff nodes to primary nodes + and will not sync primary nodes + with other primary nodes. +handoff_delete auto the number of replicas which are + ensured in swift. If the number + less than the number of replicas + is set, container-replicator + could delete local handoffs even + if all replicas are not ensured in + the cluster. The replicator would + remove local handoff container database + after syncing when the number of + successful responses is greater than + or equal to this number. By default + handoff partitions will be removed + when it has successfully replicated + to all the canonical nodes. +==================== =========================== ======================================= ******************* [container-sharder] diff --git a/etc/account-server.conf-sample b/etc/account-server.conf-sample index 89c1ea3506..ef274cbfd3 100644 --- a/etc/account-server.conf-sample +++ b/etc/account-server.conf-sample @@ -187,8 +187,8 @@ use = egg:swift#recon # ionice_class = # ionice_priority = # -# The handoffs_only mode option is for special-case emergency -# situations such as full disks in the cluster. This option SHOULD NOT +# The handoffs_only and handoff_delete options are for special-case emergency +# situations such as full disks in the cluster. These options SHOULD NOT # BE ENABLED except in emergencies. When handoffs_only mode is enabled # the replicator will *only* replicate from handoff nodes to primary # nodes and will not sync primary nodes with other primary nodes. @@ -205,6 +205,15 @@ use = egg:swift#recon # long-term use. # # handoffs_only = no +# +# handoff_delete is the number of replicas which are ensured in swift. +# If the number less than the number of replicas is set, account-replicator +# could delete local handoffs even if all replicas are not ensured in the +# cluster. The replicator would remove local handoff account database after +# syncing when the number of successful responses is greater than or equal to +# this number. By default(auto), handoff partitions will be +# removed when it has successfully replicated to all the canonical nodes. +# handoff_delete = auto [account-auditor] # You can override the default log routing for this app here (don't use set!): diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample index e1f8482c6e..93887f60a5 100644 --- a/etc/container-server.conf-sample +++ b/etc/container-server.conf-sample @@ -197,8 +197,8 @@ use = egg:swift#recon # ionice_class = # ionice_priority = # -# The handoffs_only mode option is for special-case emergency -# situations such as full disks in the cluster. This option SHOULD NOT +# The handoffs_only and handoff_delete options are for special-case emergency +# situations such as full disks in the cluster. These options SHOULD NOT # BE ENABLED except in emergencies. When handoffs_only mode is enabled # the replicator will *only* replicate from handoff nodes to primary # nodes and will not sync primary nodes with other primary nodes. @@ -215,6 +215,15 @@ use = egg:swift#recon # long-term use. # # handoffs_only = no +# +# handoff_delete is the number of replicas which are ensured in swift. +# If the number less than the number of replicas is set, container-replicator +# could delete local handoffs even if all replicas are not ensured in the +# cluster. The replicator would remove local handoff container database after +# syncing when the number of successful responses is greater than or equal to +# this number. By default(auto), handoff partitions will be +# removed when it has successfully replicated to all the canonical nodes. +# handoff_delete = auto [container-updater] # You can override the default log routing for this app here (don't use set!): diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index 9447a802d4..9c77d809bb 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -34,7 +34,7 @@ from swift.common.utils import get_logger, whataremyips, storage_directory, \ renamer, mkdirs, lock_parent_directory, config_true_value, \ unlink_older_than, dump_recon_cache, rsync_module_interpolation, \ parse_override_options, round_robin_iter, Everything, get_db_files, \ - parse_db_filename, quote, RateLimitedIterator + parse_db_filename, quote, RateLimitedIterator, config_auto_int_value from swift.common import ring from swift.common.ring.utils import is_local_device from swift.common.http import HTTP_NOT_FOUND, HTTP_INSUFFICIENT_STORAGE, \ @@ -238,6 +238,8 @@ class Replicator(Daemon): self.extract_device_re = re.compile('%s%s([^%s]+)' % ( self.root, os.path.sep, os.path.sep)) self.handoffs_only = config_true_value(conf.get('handoffs_only', 'no')) + self.handoff_delete = config_auto_int_value( + conf.get('handoff_delete', 'auto'), 0) def _zero_stats(self): """Zero out the stats.""" @@ -554,7 +556,13 @@ class Replicator(Daemon): reason = '%s new rows' % max_row_delta self.logger.debug(log_template, reason) return True - if not (responses and all(responses)): + if self.handoff_delete: + # delete handoff if we have had handoff_delete successes + successes_count = len([resp for resp in responses if resp]) + delete_handoff = successes_count >= self.handoff_delete + else: + delete_handoff = responses and all(responses) + if not delete_handoff: reason = '%s/%s success' % (responses.count(True), len(responses)) self.logger.debug(log_template, reason) return True @@ -773,11 +781,12 @@ class Replicator(Daemon): self.logger.error(_('ERROR Failed to get my own IPs?')) return - if self.handoffs_only: + if self.handoffs_only or self.handoff_delete: self.logger.warning( - 'Starting replication pass with handoffs_only enabled. ' - 'This mode is not intended for normal ' - 'operation; use handoffs_only with care.') + 'Starting replication pass with handoffs_only ' + 'and/or handoffs_delete enabled. ' + 'These modes are not intended for normal ' + 'operation; use these options with care.') self._local_device_ids = set() found_local = False @@ -820,10 +829,11 @@ class Replicator(Daemon): self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) - if self.handoffs_only: + if self.handoffs_only or self.handoff_delete: self.logger.warning( - 'Finished replication pass with handoffs_only enabled. ' - 'If handoffs_only is no longer required, disable it.') + 'Finished replication pass with handoffs_only and/or ' + 'handoffs_delete enabled. If these are no longer required, ' + 'disable them.') self._report_stats() def run_forever(self, *args, **kwargs): diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index c5951da21b..3c70faa375 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -837,6 +837,51 @@ class TestDBReplicator(unittest.TestCase): self.assertEqual(['/path/to/file'], self.delete_db_calls) self.assertEqual(0, replicator.stats['failure']) + def test_handoff_delete(self): + def do_test(config, repl_to_node_results, expect_delete): + self.delete_db_calls = [] + replicator = TestReplicator(config) + replicator.ring = FakeRingWithNodes().Ring('path') + replicator.brokerclass = FakeAccountBroker + mock_repl_to_node = mock.Mock() + mock_repl_to_node.side_effect = repl_to_node_results + replicator._repl_to_node = mock_repl_to_node + replicator.delete_db = self.stub_delete_db + orig_cleanup = replicator.cleanup_post_replicate + with mock.patch.object(replicator, 'cleanup_post_replicate', + side_effect=orig_cleanup) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', 'node_id') + mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, + repl_to_node_results) + self.assertIsInstance(mock_cleanup.call_args[0][0], + replicator.brokerclass) + if expect_delete: + self.assertEqual(['/path/to/file'], self.delete_db_calls) + else: + self.assertNotEqual(['/path/to/file'], self.delete_db_calls) + + self.assertEqual(repl_to_node_results.count(True), + replicator.stats['success']) + self.assertEqual(repl_to_node_results.count(False), + replicator.stats['failure']) + + for cfg, repl_results, expected_delete in ( + # Start with the sanilty check + ({}, [True] * 3, True), + ({}, [True, True, False], False), + ({'handoff_delete': 'auto'}, [True] * 3, True), + ({'handoff_delete': 'auto'}, [True, True, False], False), + ({'handoff_delete': 0}, [True] * 3, True), + ({'handoff_delete': 0}, [True, True, False], False), + # Now test a lower handoff delete + ({'handoff_delete': 2}, [True] * 3, True), + ({'handoff_delete': 2}, [True, True, False], True), + ({'handoff_delete': 2}, [True, False, False], False), + ({'handoff_delete': 1}, [True] * 3, True), + ({'handoff_delete': 1}, [True, True, False], True), + ({'handoff_delete': 1}, [True, False, False], True)): + do_test(cfg, repl_results, expected_delete) + def test_replicate_object_delete_delegated_to_cleanup_post_replicate(self): replicator = TestReplicator({}) replicator.ring = FakeRingWithNodes().Ring('path') @@ -1845,11 +1890,13 @@ class TestHandoffsOnly(unittest.TestCase): self.assertEqual( self.logger.get_lines_for_level('warning'), - [('Starting replication pass with handoffs_only enabled. This ' - 'mode is not intended for normal operation; use ' - 'handoffs_only with care.'), - ('Finished replication pass with handoffs_only enabled. ' - 'If handoffs_only is no longer required, disable it.')]) + [('Starting replication pass with handoffs_only and/or ' + 'handoffs_delete enabled. These ' + 'modes are not intended for normal operation; use ' + 'these options with care.'), + ('Finished replication pass with handoffs_only and/or ' + 'handoffs_delete enabled. If these are no longer required, ' + 'disable them.')]) def test_skips_primary_partitions(self): replicator = TestReplicator({