sharding: Make replicator logging less scary

When we abort the replication process because we've got shard ranges and the sharder is now responsible for ensuring object-row durability, we log a warning like "refusing to replicate objects" which sounds scary. That's because it *is*, of course -- if the sharder isn't running, whatever rows that DB has may only exist in that DB, meaning we're one drive failure away from losing track of them entirely. However, when the sharder *is* running and everything's happy, we reach a steady-state where the root containers are all sharded and none of them have any object rows to lose. At that point, the warning does more harm than good. Only print the scary "refusing to replicate" warning if we're still responsible for some object rows, whether deleted or not. Change-Id: I35de08d6c1617b2e446e969a54b79b42e8cfafef
2019-01-31 14:55:05 -08:00 · 2019-01-31 14:55:05 -08:00 · c0dbf5b885
commit c0dbf5b885
parent 0c316a134f
2 changed files with 17 additions and 8 deletions
--- a/swift/container/replicator.py
+++ b/swift/container/replicator.py
@ -20,7 +20,7 @@ from collections import defaultdict
 from eventlet import Timeout
 from swift.container.sync_store import ContainerSyncStore
-from swift.container.backend import ContainerBroker, DATADIR
+from swift.container.backend import ContainerBroker, DATADIR, SHARDED
 from swift.container.reconciler import (
    MISPLACED_OBJECTS_ACCOUNT, incorrect_policy_index,
    get_reconciler_container_name, get_row_to_q_entry_translator)
@ -113,11 +113,20 @@ class ContainerReplicator(db_replicator.Replicator):
                'peer may need upgrading', broker.db_file,
                '%(ip)s:%(port)s/%(device)s' % node)
        if broker.sharding_initiated():
-            self.logger.warning(
+            if info['db_state'] == SHARDED and len(
-                '%s is able to shard -- refusing to replicate objects to peer '
+                    broker.get_objects(limit=1)) == 0:
-                '%s; have shard ranges and will wait for cleaving',
+                self.logger.debug('%s is sharded and has nothing more to '
-                broker.db_file,
+                                  'replicate to peer %s',
-                '%(ip)s:%(port)s/%(device)s' % node)
+                                  broker.db_file,
                                  '%(ip)s:%(port)s/%(device)s' % node)
            else:
                # Only print the scary warning if there was something that
                # didn't get replicated
                self.logger.warning(
                    '%s is able to shard -- refusing to replicate objects to '
                    'peer %s; have shard ranges and will wait for cleaving',
                    broker.db_file,
                    '%(ip)s:%(port)s/%(device)s' % node)
            self.stats['deferred'] += 1
            return shard_range_success
--- a/test/probe/test_sharder.py
+++ b/test/probe/test_sharder.py
@ -1902,7 +1902,7 @@ class TestContainerSharding(BaseTestContainerSharding):
        old_primary_dir, container_hash = self.get_storage_dir(
            self.brain.part, handoff_node)
        utils.mkdirs(os.path.dirname(old_primary_dir))
-        os.rename(new_primary_dir, old_primary_dir)
+        shutil.move(new_primary_dir, old_primary_dir)
        # make the cluster more or less "healthy" again
        self.brain.servers.start(number=new_primary_node_number)
@ -2009,7 +2009,7 @@ class TestContainerSharding(BaseTestContainerSharding):
        old_primary_dir, container_hash = self.get_storage_dir(
            self.brain.part, handoff_node)
        utils.mkdirs(os.path.dirname(old_primary_dir))
-        os.rename(new_primary_dir, old_primary_dir)
+        shutil.move(new_primary_dir, old_primary_dir)
        self.assert_container_state(handoff_node, 'sharding', 3)
        # run replicator on handoff node to create a fresh db on new primary