Add some debug output to the ring builder
Sometimes, I get handed a builder file in a support ticket and a question of the form "why is the balance [not] doing $thing?". When that happens, I add a bunch of print statements to my local swift/common/ring/builder.py, figure things out, and then delete the print statements. This time, instead of deleting the print statements, I turned them into debug() calls and added a "--debug" flag to the rebalance command in hopes that someone else will find it useful. Change-Id: I697af90984fa5b314ddf570280b4585ba0ba363c
This commit is contained in:
parent
cf21db4bc6
commit
8d3b3b2ee0
@ -88,6 +88,16 @@ attempting to write to or read the builder/ring files while operations are in
|
||||
progress. This can be useful in environments where ring management has been
|
||||
automated but the operator still needs to interact with the rings manually.
|
||||
|
||||
If the ring builder is not producing the balances that you are
|
||||
expecting, you can gain visibility into what it's doing with the
|
||||
``--debug`` flag.::
|
||||
|
||||
swift-ring-builder <builder-file> rebalance --debug
|
||||
|
||||
This produces a great deal of output that is mostly useful if you are
|
||||
either (a) attempting to fix the ring builder, or (b) filing a bug
|
||||
against the ring builder.
|
||||
|
||||
-----------------------
|
||||
Scripting Ring Creation
|
||||
-----------------------
|
||||
|
@ -14,12 +14,14 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
from errno import EEXIST
|
||||
from itertools import islice, izip
|
||||
from operator import itemgetter
|
||||
from os import mkdir
|
||||
from os.path import basename, abspath, dirname, exists, join as pathjoin
|
||||
from sys import argv as sys_argv, exit, stderr
|
||||
from sys import argv as sys_argv, exit, stderr, stdout
|
||||
from textwrap import wrap
|
||||
from time import time
|
||||
import optparse
|
||||
@ -831,6 +833,8 @@ swift-ring-builder <builder_file> rebalance [options]
|
||||
help='Force a rebalanced ring to save even '
|
||||
'if < 1% of parts changed')
|
||||
parser.add_option('-s', '--seed', help="seed to use for rebalance")
|
||||
parser.add_option('-d', '--debug', action='store_true',
|
||||
help="print debug information")
|
||||
options, args = parser.parse_args(argv)
|
||||
|
||||
def get_seed(index):
|
||||
@ -841,6 +845,14 @@ swift-ring-builder <builder_file> rebalance [options]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
if options.debug:
|
||||
logger = logging.getLogger("swift.ring.builder")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
handler = logging.StreamHandler(stdout)
|
||||
formatter = logging.Formatter("%(levelname)s: %(message)s")
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
devs_changed = builder.devs_changed
|
||||
try:
|
||||
last_balance = builder.get_balance()
|
||||
@ -889,11 +901,12 @@ swift-ring-builder <builder_file> rebalance [options]
|
||||
status = EXIT_SUCCESS
|
||||
if builder.dispersion > 0:
|
||||
print '-' * 79
|
||||
print('NOTE: Dispersion of %.06f indicates some parts are not\n'
|
||||
' optimally dispersed.\n\n'
|
||||
' You may want adjust some device weights, increase\n'
|
||||
' the overload or review the dispersion report.' %
|
||||
builder.dispersion)
|
||||
print(
|
||||
'NOTE: Dispersion of %.06f indicates some parts are not\n'
|
||||
' optimally dispersed.\n\n'
|
||||
' You may want to adjust some device weights, increase\n'
|
||||
' the overload or review the dispersion report.' %
|
||||
builder.dispersion)
|
||||
status = EXIT_WARNING
|
||||
print '-' * 79
|
||||
elif balance > 5 and balance / 100.0 > builder.overload:
|
||||
|
@ -17,6 +17,7 @@ import bisect
|
||||
import copy
|
||||
import errno
|
||||
import itertools
|
||||
import logging
|
||||
import math
|
||||
import random
|
||||
import cPickle as pickle
|
||||
@ -33,6 +34,16 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree, \
|
||||
MAX_BALANCE = 999.99
|
||||
|
||||
|
||||
try:
|
||||
# python 2.7+
|
||||
from logging import NullHandler
|
||||
except ImportError:
|
||||
# python 2.6
|
||||
class NullHandler(logging.Handler):
|
||||
def emit(self, *a, **kw):
|
||||
pass
|
||||
|
||||
|
||||
class RingBuilder(object):
|
||||
"""
|
||||
Used to build swift.common.ring.RingData instances to be written to disk
|
||||
@ -96,6 +107,11 @@ class RingBuilder(object):
|
||||
self._remove_devs = []
|
||||
self._ring = None
|
||||
|
||||
self.logger = logging.getLogger("swift.ring.builder")
|
||||
if not self.logger.handlers:
|
||||
# silence "no handler for X" error messages
|
||||
self.logger.addHandler(NullHandler())
|
||||
|
||||
def weight_of_one_part(self):
|
||||
"""
|
||||
Returns the weight of each partition as calculated from the
|
||||
@ -355,6 +371,7 @@ class RingBuilder(object):
|
||||
|
||||
self._ring = None
|
||||
if self._last_part_moves_epoch is None:
|
||||
self.logger.debug("New builder; performing initial balance")
|
||||
self._initial_balance()
|
||||
self.devs_changed = False
|
||||
self._build_dispersion_graph()
|
||||
@ -363,16 +380,23 @@ class RingBuilder(object):
|
||||
self._update_last_part_moves()
|
||||
last_balance = 0
|
||||
new_parts, removed_part_count = self._adjust_replica2part2dev_size()
|
||||
self.logger.debug(
|
||||
"%d new parts and %d removed parts from replica-count change",
|
||||
len(new_parts), removed_part_count)
|
||||
changed_parts += removed_part_count
|
||||
self._set_parts_wanted()
|
||||
self._reassign_parts(new_parts)
|
||||
changed_parts += len(new_parts)
|
||||
while True:
|
||||
reassign_parts = self._gather_reassign_parts()
|
||||
self._reassign_parts(reassign_parts)
|
||||
changed_parts += len(reassign_parts)
|
||||
self.logger.debug("Gathered %d parts", changed_parts)
|
||||
self._reassign_parts(reassign_parts)
|
||||
self.logger.debug("Assigned %d parts", changed_parts)
|
||||
while self._remove_devs:
|
||||
self.devs[self._remove_devs.pop()['id']] = None
|
||||
remove_dev_id = self._remove_devs.pop()['id']
|
||||
self.logger.debug("Removing dev %d", remove_dev_id)
|
||||
self.devs[remove_dev_id] = None
|
||||
balance = self.get_balance()
|
||||
if balance < 1 or abs(last_balance - balance) < 1 or \
|
||||
changed_parts == self.parts:
|
||||
@ -786,6 +810,9 @@ class RingBuilder(object):
|
||||
if dev_id in dev_ids:
|
||||
self._last_part_moves[part] = 0
|
||||
removed_dev_parts[part].append(replica)
|
||||
self.logger.debug(
|
||||
"Gathered %d/%d from dev %d [dev removed]",
|
||||
part, replica, dev_id)
|
||||
|
||||
# Now we gather partitions that are "at risk" because they aren't
|
||||
# currently sufficient spread out across the cluster.
|
||||
@ -859,6 +886,9 @@ class RingBuilder(object):
|
||||
dev['parts'] -= 1
|
||||
removed_replica = True
|
||||
moved_parts += 1
|
||||
self.logger.debug(
|
||||
"Gathered %d/%d from dev %d [dispersion]",
|
||||
part, replica, dev['id'])
|
||||
break
|
||||
if removed_replica:
|
||||
for tier in tfd[dev['id']]:
|
||||
@ -894,6 +924,9 @@ class RingBuilder(object):
|
||||
dev['parts_wanted'] += 1
|
||||
dev['parts'] -= 1
|
||||
reassign_parts[part].append(replica)
|
||||
self.logger.debug(
|
||||
"Gathered %d/%d from dev %d [weight]",
|
||||
part, replica, dev['id'])
|
||||
|
||||
reassign_parts.update(spread_out_parts)
|
||||
reassign_parts.update(removed_dev_parts)
|
||||
@ -1121,6 +1154,8 @@ class RingBuilder(object):
|
||||
new_index, new_last_sort_key)
|
||||
|
||||
self._replica2part2dev[replica][part] = dev['id']
|
||||
self.logger.debug(
|
||||
"Placed %d/%d onto dev %d", part, replica, dev['id'])
|
||||
|
||||
# Just to save memory and keep from accidental reuse.
|
||||
for dev in self._iter_devs():
|
||||
|
@ -13,6 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
import mock
|
||||
import os
|
||||
import StringIO
|
||||
@ -1744,6 +1745,32 @@ class TestRebalanceCommand(unittest.TestCase, RunSwiftRingBuilderMixin):
|
||||
raise
|
||||
return (mock_stdout.getvalue(), mock_stderr.getvalue())
|
||||
|
||||
def test_debug(self):
|
||||
# NB: getLogger(name) always returns the same object
|
||||
rb_logger = logging.getLogger("swift.ring.builder")
|
||||
try:
|
||||
self.assertNotEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
|
||||
|
||||
self.run_srb("create", 8, 3, 1)
|
||||
self.run_srb("add",
|
||||
"r1z1-10.1.1.1:2345/sda", 100.0,
|
||||
"r1z1-10.1.1.1:2345/sdb", 100.0,
|
||||
"r1z1-10.1.1.1:2345/sdc", 100.0,
|
||||
"r1z1-10.1.1.1:2345/sdd", 100.0)
|
||||
self.run_srb("rebalance", "--debug")
|
||||
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
|
||||
|
||||
rb_logger.setLevel(logging.INFO)
|
||||
self.run_srb("rebalance", "--debug", "123")
|
||||
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
|
||||
|
||||
rb_logger.setLevel(logging.INFO)
|
||||
self.run_srb("rebalance", "123", "--debug")
|
||||
self.assertEqual(rb_logger.getEffectiveLevel(), logging.DEBUG)
|
||||
|
||||
finally:
|
||||
rb_logger.setLevel(logging.INFO) # silence other test cases
|
||||
|
||||
def test_rebalance_warning_appears(self):
|
||||
self.run_srb("create", 8, 3, 24)
|
||||
# all in one machine: totally balanceable
|
||||
|
Loading…
Reference in New Issue
Block a user