Merge "Fix reconstructor stats mssage."

This commit is contained in:
Jenkins 2015-07-28 00:22:32 +00:00 committed by Gerrit Code Review
commit f05eddeece
2 changed files with 112 additions and 17 deletions

View File

@ -337,22 +337,34 @@ class ObjectReconstructor(Daemon):
""" """
Logs various stats for the currently running reconstruction pass. Logs various stats for the currently running reconstruction pass.
""" """
if self.reconstruction_count: if (self.device_count and self.part_count and
self.reconstruction_device_count):
elapsed = (time.time() - self.start) or 0.000001 elapsed = (time.time() - self.start) or 0.000001
rate = self.reconstruction_count / elapsed rate = self.reconstruction_part_count / elapsed
total_part_count = (self.part_count *
self.device_count /
self.reconstruction_device_count)
self.logger.info( self.logger.info(
_("%(reconstructed)d/%(total)d (%(percentage).2f%%)" _("%(reconstructed)d/%(total)d (%(percentage).2f%%)"
" partitions reconstructed in %(time).2fs (%(rate).2f/sec, " " partitions of %(device)d/%(dtotal)d "
"%(remaining)s remaining)"), "(%(dpercentage).2f%%) devices"
{'reconstructed': self.reconstruction_count, " reconstructed in %(time).2fs "
'total': self.job_count, "(%(rate).2f/sec, %(remaining)s remaining)"),
{'reconstructed': self.reconstruction_part_count,
'total': self.part_count,
'percentage': 'percentage':
self.reconstruction_count * 100.0 / self.job_count, self.reconstruction_part_count * 100.0 / self.part_count,
'device': self.reconstruction_device_count,
'dtotal': self.device_count,
'dpercentage':
self.reconstruction_device_count * 100.0 / self.device_count,
'time': time.time() - self.start, 'rate': rate, 'time': time.time() - self.start, 'rate': rate,
'remaining': '%d%s' % compute_eta(self.start, 'remaining': '%d%s' %
self.reconstruction_count, compute_eta(self.start,
self.job_count)}) self.reconstruction_part_count,
if self.suffix_count: total_part_count)})
if self.suffix_count and self.partition_times:
self.logger.info( self.logger.info(
_("%(checked)d suffixes checked - " _("%(checked)d suffixes checked - "
"%(hashed).2f%% hashed, %(synced).2f%% synced"), "%(hashed).2f%% hashed, %(synced).2f%% synced"),
@ -781,16 +793,22 @@ class ObjectReconstructor(Daemon):
self._diskfile_mgr = self._df_router[policy] self._diskfile_mgr = self._df_router[policy]
self.load_object_ring(policy) self.load_object_ring(policy)
data_dir = get_data_dir(policy) data_dir = get_data_dir(policy)
local_devices = itertools.ifilter( local_devices = list(itertools.ifilter(
lambda dev: dev and is_local_device( lambda dev: dev and is_local_device(
ips, self.port, ips, self.port,
dev['replication_ip'], dev['replication_port']), dev['replication_ip'], dev['replication_port']),
policy.object_ring.devs) policy.object_ring.devs))
if override_devices:
self.device_count = len(override_devices)
else:
self.device_count = len(local_devices)
for local_dev in local_devices: for local_dev in local_devices:
if override_devices and (local_dev['device'] not in if override_devices and (local_dev['device'] not in
override_devices): override_devices):
continue continue
self.reconstruction_device_count += 1
dev_path = self._df_router[policy].get_dev_path( dev_path = self._df_router[policy].get_dev_path(
local_dev['device']) local_dev['device'])
if not dev_path: if not dev_path:
@ -814,6 +832,8 @@ class ObjectReconstructor(Daemon):
self.logger.exception( self.logger.exception(
'Unable to list partitions in %r' % obj_path) 'Unable to list partitions in %r' % obj_path)
continue continue
self.part_count += len(partitions)
for partition in partitions: for partition in partitions:
part_path = join(obj_path, partition) part_path = join(obj_path, partition)
if not (partition.isdigit() and if not (partition.isdigit() and
@ -821,6 +841,7 @@ class ObjectReconstructor(Daemon):
self.logger.warning( self.logger.warning(
'Unexpected entity in data dir: %r' % part_path) 'Unexpected entity in data dir: %r' % part_path)
remove_file(part_path) remove_file(part_path)
self.reconstruction_part_count += 1
continue continue
partition = int(partition) partition = int(partition)
if override_partitions and (partition not in if override_partitions and (partition not in
@ -833,6 +854,7 @@ class ObjectReconstructor(Daemon):
'part_path': part_path, 'part_path': part_path,
} }
yield part_info yield part_info
self.reconstruction_part_count += 1
def build_reconstruction_jobs(self, part_info): def build_reconstruction_jobs(self, part_info):
""" """
@ -850,10 +872,14 @@ class ObjectReconstructor(Daemon):
def _reset_stats(self): def _reset_stats(self):
self.start = time.time() self.start = time.time()
self.job_count = 0 self.job_count = 0
self.part_count = 0
self.device_count = 0
self.suffix_count = 0 self.suffix_count = 0
self.suffix_sync = 0 self.suffix_sync = 0
self.suffix_hash = 0 self.suffix_hash = 0
self.reconstruction_count = 0 self.reconstruction_count = 0
self.reconstruction_part_count = 0
self.reconstruction_device_count = 0
self.last_reconstruction_count = -1 self.last_reconstruction_count = -1
def delete_partition(self, path): def delete_partition(self, path):

View File

@ -24,7 +24,7 @@ import shutil
import re import re
import random import random
import struct import struct
from eventlet import Timeout from eventlet import Timeout, sleep
from contextlib import closing, nested, contextmanager from contextlib import closing, nested, contextmanager
from gzip import GzipFile from gzip import GzipFile
@ -599,10 +599,74 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
self.assertFalse(jobs) # that should be all of them self.assertFalse(jobs) # that should be all of them
check_jobs(part_num) check_jobs(part_num)
def test_run_once(self): def _run_once(self, http_count, extra_devices, override_devices=None):
with mocked_http_conn(*[200] * 12, body=pickle.dumps({})): ring_devs = list(self.policy.object_ring.devs)
for device, parts in extra_devices.items():
device_path = os.path.join(self.devices, device)
os.mkdir(device_path)
for part in range(parts):
os.makedirs(os.path.join(device_path, 'objects-1', str(part)))
# we update the ring to make is_local happy
devs = [dict(d) for d in ring_devs]
for d in devs:
d['device'] = device
self.policy.object_ring.devs.extend(devs)
self.reconstructor.stats_interval = 0
self.process_job = lambda j: sleep(0)
with mocked_http_conn(*[200] * http_count, body=pickle.dumps({})):
with mock_ssync_sender(): with mock_ssync_sender():
self.reconstructor.run_once() self.reconstructor.run_once(devices=override_devices)
def test_run_once(self):
# sda1: 3 is done in setup
extra_devices = {
'sdb1': 4,
'sdc1': 1,
'sdd1': 0,
}
self._run_once(18, extra_devices)
stats_lines = set()
for line in self.logger.get_lines_for_level('info'):
if 'devices reconstructed in' not in line:
continue
stat_line = line.split('of', 1)[0].strip()
stats_lines.add(stat_line)
acceptable = set([
'0/3 (0.00%) partitions',
'8/8 (100.00%) partitions',
])
matched = stats_lines & acceptable
self.assertEqual(matched, acceptable,
'missing some expected acceptable:\n%s' % (
'\n'.join(sorted(acceptable - matched))))
self.assertEqual(self.reconstructor.reconstruction_device_count, 4)
self.assertEqual(self.reconstructor.reconstruction_part_count, 8)
self.assertEqual(self.reconstructor.part_count, 8)
def test_run_once_override_devices(self):
# sda1: 3 is done in setup
extra_devices = {
'sdb1': 4,
'sdc1': 1,
'sdd1': 0,
}
self._run_once(2, extra_devices, 'sdc1')
stats_lines = set()
for line in self.logger.get_lines_for_level('info'):
if 'devices reconstructed in' not in line:
continue
stat_line = line.split('of', 1)[0].strip()
stats_lines.add(stat_line)
acceptable = set([
'1/1 (100.00%) partitions',
])
matched = stats_lines & acceptable
self.assertEqual(matched, acceptable,
'missing some expected acceptable:\n%s' % (
'\n'.join(sorted(acceptable - matched))))
self.assertEqual(self.reconstructor.reconstruction_device_count, 1)
self.assertEqual(self.reconstructor.reconstruction_part_count, 1)
self.assertEqual(self.reconstructor.part_count, 1)
def test_get_response(self): def test_get_response(self):
part = self.part_nums[0] part = self.part_nums[0]
@ -621,6 +685,7 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
def test_reconstructor_skips_bogus_partition_dirs(self): def test_reconstructor_skips_bogus_partition_dirs(self):
# A directory in the wrong place shouldn't crash the reconstructor # A directory in the wrong place shouldn't crash the reconstructor
self.reconstructor._reset_stats()
rmtree(self.objects_1) rmtree(self.objects_1)
os.mkdir(self.objects_1) os.mkdir(self.objects_1)
@ -699,6 +764,7 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
self.assertEqual(expected_partners, sorted(got_partners)) self.assertEqual(expected_partners, sorted(got_partners))
def test_collect_parts(self): def test_collect_parts(self):
self.reconstructor._reset_stats()
parts = [] parts = []
for part_info in self.reconstructor.collect_parts(): for part_info in self.reconstructor.collect_parts():
parts.append(part_info['partition']) parts.append(part_info['partition'])
@ -709,6 +775,7 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
def blowup_mkdirs(path): def blowup_mkdirs(path):
raise OSError('Ow!') raise OSError('Ow!')
self.reconstructor._reset_stats()
with mock.patch.object(object_reconstructor, 'mkdirs', blowup_mkdirs): with mock.patch.object(object_reconstructor, 'mkdirs', blowup_mkdirs):
rmtree(self.objects_1, ignore_errors=1) rmtree(self.objects_1, ignore_errors=1)
parts = [] parts = []
@ -734,6 +801,7 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
# since our collect_parts job is a generator, that yields directly # since our collect_parts job is a generator, that yields directly
# into build_jobs and then spawns it's safe to do the remove_files # into build_jobs and then spawns it's safe to do the remove_files
# without making reconstructor startup slow # without making reconstructor startup slow
self.reconstructor._reset_stats()
for part_info in self.reconstructor.collect_parts(): for part_info in self.reconstructor.collect_parts():
self.assertNotEqual(pol_1_part_1_path, part_info['part_path']) self.assertNotEqual(pol_1_part_1_path, part_info['part_path'])
self.assertFalse(os.path.exists(pol_1_part_1_path)) self.assertFalse(os.path.exists(pol_1_part_1_path))
@ -1033,6 +1101,7 @@ class TestObjectReconstructor(unittest.TestCase):
self.reconstructor.job_count = 1 self.reconstructor.job_count = 1
def tearDown(self): def tearDown(self):
self.reconstructor._reset_stats()
self.reconstructor.stats_line() self.reconstructor.stats_line()
shutil.rmtree(self.testdir) shutil.rmtree(self.testdir)