sharding: Make replicator logging less scary
When we abort the replication process because we've got shard ranges and the sharder is now responsible for ensuring object-row durability, we log a warning like "refusing to replicate objects" which sounds scary. That's because it *is*, of course -- if the sharder isn't running, whatever rows that DB has may only exist in that DB, meaning we're one drive failure away from losing track of them entirely. However, when the sharder *is* running and everything's happy, we reach a steady-state where the root containers are all sharded and none of them have any object rows to lose. At that point, the warning does more harm than good. Only print the scary "refusing to replicate" warning if we're still responsible for some object rows, whether deleted or not. Change-Id: I35de08d6c1617b2e446e969a54b79b42e8cfafef
This commit is contained in:
parent
0c316a134f
commit
c0dbf5b885
@ -20,7 +20,7 @@ from collections import defaultdict
|
|||||||
from eventlet import Timeout
|
from eventlet import Timeout
|
||||||
|
|
||||||
from swift.container.sync_store import ContainerSyncStore
|
from swift.container.sync_store import ContainerSyncStore
|
||||||
from swift.container.backend import ContainerBroker, DATADIR
|
from swift.container.backend import ContainerBroker, DATADIR, SHARDED
|
||||||
from swift.container.reconciler import (
|
from swift.container.reconciler import (
|
||||||
MISPLACED_OBJECTS_ACCOUNT, incorrect_policy_index,
|
MISPLACED_OBJECTS_ACCOUNT, incorrect_policy_index,
|
||||||
get_reconciler_container_name, get_row_to_q_entry_translator)
|
get_reconciler_container_name, get_row_to_q_entry_translator)
|
||||||
@ -113,11 +113,20 @@ class ContainerReplicator(db_replicator.Replicator):
|
|||||||
'peer may need upgrading', broker.db_file,
|
'peer may need upgrading', broker.db_file,
|
||||||
'%(ip)s:%(port)s/%(device)s' % node)
|
'%(ip)s:%(port)s/%(device)s' % node)
|
||||||
if broker.sharding_initiated():
|
if broker.sharding_initiated():
|
||||||
self.logger.warning(
|
if info['db_state'] == SHARDED and len(
|
||||||
'%s is able to shard -- refusing to replicate objects to peer '
|
broker.get_objects(limit=1)) == 0:
|
||||||
'%s; have shard ranges and will wait for cleaving',
|
self.logger.debug('%s is sharded and has nothing more to '
|
||||||
broker.db_file,
|
'replicate to peer %s',
|
||||||
'%(ip)s:%(port)s/%(device)s' % node)
|
broker.db_file,
|
||||||
|
'%(ip)s:%(port)s/%(device)s' % node)
|
||||||
|
else:
|
||||||
|
# Only print the scary warning if there was something that
|
||||||
|
# didn't get replicated
|
||||||
|
self.logger.warning(
|
||||||
|
'%s is able to shard -- refusing to replicate objects to '
|
||||||
|
'peer %s; have shard ranges and will wait for cleaving',
|
||||||
|
broker.db_file,
|
||||||
|
'%(ip)s:%(port)s/%(device)s' % node)
|
||||||
self.stats['deferred'] += 1
|
self.stats['deferred'] += 1
|
||||||
return shard_range_success
|
return shard_range_success
|
||||||
|
|
||||||
|
@ -1902,7 +1902,7 @@ class TestContainerSharding(BaseTestContainerSharding):
|
|||||||
old_primary_dir, container_hash = self.get_storage_dir(
|
old_primary_dir, container_hash = self.get_storage_dir(
|
||||||
self.brain.part, handoff_node)
|
self.brain.part, handoff_node)
|
||||||
utils.mkdirs(os.path.dirname(old_primary_dir))
|
utils.mkdirs(os.path.dirname(old_primary_dir))
|
||||||
os.rename(new_primary_dir, old_primary_dir)
|
shutil.move(new_primary_dir, old_primary_dir)
|
||||||
|
|
||||||
# make the cluster more or less "healthy" again
|
# make the cluster more or less "healthy" again
|
||||||
self.brain.servers.start(number=new_primary_node_number)
|
self.brain.servers.start(number=new_primary_node_number)
|
||||||
@ -2009,7 +2009,7 @@ class TestContainerSharding(BaseTestContainerSharding):
|
|||||||
old_primary_dir, container_hash = self.get_storage_dir(
|
old_primary_dir, container_hash = self.get_storage_dir(
|
||||||
self.brain.part, handoff_node)
|
self.brain.part, handoff_node)
|
||||||
utils.mkdirs(os.path.dirname(old_primary_dir))
|
utils.mkdirs(os.path.dirname(old_primary_dir))
|
||||||
os.rename(new_primary_dir, old_primary_dir)
|
shutil.move(new_primary_dir, old_primary_dir)
|
||||||
self.assert_container_state(handoff_node, 'sharding', 3)
|
self.assert_container_state(handoff_node, 'sharding', 3)
|
||||||
|
|
||||||
# run replicator on handoff node to create a fresh db on new primary
|
# run replicator on handoff node to create a fresh db on new primary
|
||||||
|
Loading…
Reference in New Issue
Block a user