00bfc425ce
Currently we simply mock calls in the FakeLogger for calls statsd calls, and there are also some helper methods for counting and collating metrics that were called. This Fakelogger is overloaded and doesn't simulate the real world. In real life we use a Statsdclient that is attached to the logger. We've been in the situation where unit tests pass but the statsd client stacktraces because we don't actually fake the statsdclient based off the real one and let it's use its internal logic. This patch creates a new FakeStatsdClient that is based off the real one, this can then be used (like the real statsd client) and attached to the FakeLogger. There is quite a bit of churn in tests to make this work, because we now have to looking into the fake statsd client to check the faked calls made. The FakeStatsdClient does everything the real one does, except overrides the _send method and socket creation so no actual statsd metrics are emitted. Change-Id: I9cdf395e85ab559c2b67b0617f898ad2d6a870d4
6904 lines
299 KiB
Python
6904 lines
299 KiB
Python
# Copyright (c) 2010-2012 OpenStack Foundation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import itertools
|
|
import json
|
|
import unittest
|
|
import os
|
|
import mock
|
|
import six
|
|
import six.moves.cPickle as pickle
|
|
import tempfile
|
|
import time
|
|
import shutil
|
|
import re
|
|
import random
|
|
import struct
|
|
import collections
|
|
from eventlet import Timeout, sleep, spawn
|
|
from eventlet.green import threading
|
|
|
|
from contextlib import closing, contextmanager
|
|
from gzip import GzipFile
|
|
from shutil import rmtree
|
|
from six.moves.urllib.parse import unquote
|
|
from swift.common import utils
|
|
from swift.common.exceptions import DiskFileError, DiskFileQuarantined
|
|
from swift.common.header_key_dict import HeaderKeyDict
|
|
from swift.common.utils import dump_recon_cache, md5, Timestamp, mkdirs
|
|
from swift.obj import diskfile, reconstructor as object_reconstructor
|
|
from swift.common import ring
|
|
from swift.common.storage_policy import (StoragePolicy, ECStoragePolicy,
|
|
POLICIES, EC_POLICY)
|
|
from swift.obj.reconstructor import SYNC, REVERT
|
|
from test import annotate_failure
|
|
|
|
from test.debug_logger import debug_logger
|
|
from test.unit import (patch_policies, mocked_http_conn, FabricatedRing,
|
|
make_timestamp_iter, DEFAULT_TEST_EC_TYPE,
|
|
encode_frag_archive_bodies, quiet_eventlet_exceptions,
|
|
skip_if_no_xattrs)
|
|
from test.unit.obj.common import write_diskfile
|
|
|
|
|
|
class FakeSsyncSender(object):
|
|
def __init__(self, daemon, node, job, suffixes, ssync_calls=None,
|
|
response_callback=None, **kwargs):
|
|
if ssync_calls is not None:
|
|
call_args = {'node': node, 'job': job, 'suffixes': suffixes}
|
|
call_args.update(kwargs)
|
|
ssync_calls.append(call_args)
|
|
self.response_callback = response_callback
|
|
self.node = node
|
|
self.job = job
|
|
self.suffixes = suffixes
|
|
self.limited_by_max_objects = False
|
|
|
|
def __call__(self):
|
|
if self.response_callback:
|
|
response = self.response_callback(
|
|
self.node, self.job, self.suffixes)
|
|
else:
|
|
response = True, {}
|
|
return response
|
|
|
|
|
|
@contextmanager
|
|
def mock_ssync_sender(ssync_calls=None, response_callback=None, **kwargs):
|
|
def fake_ssync(daemon, node, job, suffixes, **kwargs):
|
|
return FakeSsyncSender(daemon, node, job, suffixes, ssync_calls,
|
|
response_callback, **kwargs)
|
|
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender', fake_ssync):
|
|
yield fake_ssync
|
|
|
|
|
|
def make_ec_archive_bodies(policy, test_body):
|
|
segment_size = policy.ec_segment_size
|
|
# split up the body into buffers
|
|
chunks = [test_body[x:x + segment_size]
|
|
for x in range(0, len(test_body), segment_size)]
|
|
# encode the buffers into fragment payloads
|
|
fragment_payloads = []
|
|
for chunk in chunks:
|
|
fragments = \
|
|
policy.pyeclib_driver.encode(chunk) * policy.ec_duplication_factor
|
|
if not fragments:
|
|
break
|
|
fragment_payloads.append(fragments)
|
|
|
|
# join up the fragment payloads per node
|
|
ec_archive_bodies = [''.join(frags) for frags in zip(*fragment_payloads)]
|
|
return ec_archive_bodies
|
|
|
|
|
|
def _create_test_rings(path, next_part_power=None):
|
|
testgz = os.path.join(path, 'object.ring.gz')
|
|
intended_replica2part2dev_id = [
|
|
[0, 1, 2],
|
|
[1, 2, 3],
|
|
[2, 3, 0]
|
|
]
|
|
|
|
intended_devs = [
|
|
{'id': 0, 'device': 'sda1', 'zone': 0, 'ip': '127.0.0.0',
|
|
'port': 6200},
|
|
{'id': 1, 'device': 'sda1', 'zone': 1, 'ip': '127.0.0.1',
|
|
'port': 6200},
|
|
{'id': 2, 'device': 'sda1', 'zone': 2, 'ip': '127.0.0.2',
|
|
'port': 6200},
|
|
{'id': 3, 'device': 'sda1', 'zone': 4, 'ip': '127.0.0.3',
|
|
'port': 6200}
|
|
]
|
|
intended_part_shift = 30
|
|
with closing(GzipFile(testgz, 'wb')) as f:
|
|
pickle.dump(
|
|
ring.RingData(intended_replica2part2dev_id,
|
|
intended_devs, intended_part_shift,
|
|
next_part_power),
|
|
f)
|
|
|
|
testgz = os.path.join(path, 'object-1.ring.gz')
|
|
with closing(GzipFile(testgz, 'wb')) as f:
|
|
pickle.dump(
|
|
ring.RingData(intended_replica2part2dev_id,
|
|
intended_devs, intended_part_shift,
|
|
next_part_power),
|
|
f)
|
|
|
|
|
|
def count_stats(logger, key, metric):
|
|
count = 0
|
|
for record in logger.statsd_client.calls[key]:
|
|
stat_args, stat_kwargs = record
|
|
m = stat_args[0]
|
|
if re.match(metric, m):
|
|
count += 1
|
|
return count
|
|
|
|
|
|
def get_header_frag_index(self, body):
|
|
metadata = self.policy.pyeclib_driver.get_metadata(body)
|
|
frag_index = struct.unpack('h', metadata[:2])[0]
|
|
return {
|
|
'X-Object-Sysmeta-Ec-Frag-Index': frag_index,
|
|
}
|
|
|
|
|
|
@patch_policies([StoragePolicy(0, name='zero', is_default=True),
|
|
ECStoragePolicy(1, name='one',
|
|
ec_type=DEFAULT_TEST_EC_TYPE,
|
|
ec_ndata=3, ec_nparity=2),
|
|
ECStoragePolicy(2, name='two',
|
|
ec_type=DEFAULT_TEST_EC_TYPE,
|
|
ec_ndata=3, ec_nparity=2)])
|
|
class TestGlobalSetupObjectReconstructor(unittest.TestCase):
|
|
# Tests for reconstructor using real objects in test partition directories.
|
|
legacy_durable = False
|
|
|
|
def setUp(self):
|
|
skip_if_no_xattrs()
|
|
self.testdir = tempfile.mkdtemp()
|
|
POLICIES[0].object_ring = FabricatedRing(3)
|
|
POLICIES[1].object_ring = FabricatedRing(5)
|
|
POLICIES[2].object_ring = FabricatedRing(5)
|
|
utils.HASH_PATH_SUFFIX = b'endcap'
|
|
utils.HASH_PATH_PREFIX = b''
|
|
self.devices = os.path.join(self.testdir, 'node')
|
|
os.makedirs(self.devices)
|
|
os.mkdir(os.path.join(self.devices, 'sda1'))
|
|
self.objects = os.path.join(self.devices, 'sda1',
|
|
diskfile.get_data_dir(POLICIES[0]))
|
|
self.objects_1 = os.path.join(self.devices, 'sda1',
|
|
diskfile.get_data_dir(POLICIES[1]))
|
|
os.mkdir(self.objects)
|
|
os.mkdir(self.objects_1)
|
|
self.parts = {}
|
|
self.parts_1 = {}
|
|
self.part_nums = ['0', '1', '2']
|
|
for part in self.part_nums:
|
|
self.parts[part] = os.path.join(self.objects, part)
|
|
os.mkdir(self.parts[part])
|
|
self.parts_1[part] = os.path.join(self.objects_1, part)
|
|
os.mkdir(self.parts_1[part])
|
|
|
|
self.conf = dict(
|
|
swift_dir=self.testdir, devices=self.devices, mount_check='false',
|
|
timeout='300', stats_interval='1',
|
|
bind_ip='10.0.0.1', bind_port=6200)
|
|
self.logger = debug_logger('test-reconstructor')
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
|
|
self.policy = POLICIES[1]
|
|
|
|
# most of the reconstructor test methods require that there be
|
|
# real objects in place, not just part dirs, so we'll create them
|
|
# all here....
|
|
# part 0: 3C1/hash/xxx#1#d.data <-- job: sync_only - partners (FI 1)
|
|
# 061/hash/xxx#1#d.data <-- included in earlier job (FI 1)
|
|
# /xxx#2#d.data <-- job: sync_revert to index 2
|
|
# part_nodes: ['sda0', 'sda1', 'sda2', 'sda3', 'sda4']
|
|
|
|
# part 1: 3C1/hash/xxx#0#d.data <-- job: sync_revert to index 0
|
|
# /xxx#1#d.data <-- job: sync_revert to index 1
|
|
# 061/hash/xxx#1#d.data <-- included in earlier job (FI 1)
|
|
# part_nodes: ['sda5', 'sda6', 'sda7', 'sda0', 'sda1']
|
|
|
|
# part 2: 3C1/hash/xxx#2#d.data <-- job: sync_revert to index 2
|
|
# 061/hash/xxx#0#d.data <-- job: sync_revert to index 0
|
|
# part_nodes: ['sda2', 'sda3', 'sda4', 'sda5', 'sda6']
|
|
|
|
def _create_frag_archives(policy, obj_path, local_id, obj_set):
|
|
# we'll create 2 sets of objects in different suffix dirs
|
|
# so we cover all the scenarios we want (3 of them)
|
|
# 1) part dir with all FI's matching the local node index
|
|
# 2) part dir with one local and mix of others
|
|
# 3) part dir with no local FI and one or more others
|
|
def part_0(set):
|
|
if set == 0:
|
|
# just the local
|
|
return local_id
|
|
else:
|
|
# one local and all of another
|
|
if obj_num == 0:
|
|
return local_id
|
|
else:
|
|
return (local_id + 1) % 3
|
|
|
|
def part_1(set):
|
|
if set == 0:
|
|
# one local and all of another
|
|
if obj_num == 0:
|
|
return local_id
|
|
else:
|
|
return (local_id + 2) % 3
|
|
else:
|
|
# just the local node
|
|
return local_id
|
|
|
|
def part_2(set):
|
|
# this part is a handoff in our config (always)
|
|
# so lets do a set with indices from different nodes
|
|
if set == 0:
|
|
return (local_id + 1) % 3
|
|
else:
|
|
return (local_id + 2) % 3
|
|
|
|
# function dictionary for defining test scenarios base on set #
|
|
scenarios = {'0': part_0,
|
|
'1': part_1,
|
|
'2': part_2}
|
|
|
|
for part_num in self.part_nums:
|
|
# create 3 unique objects per part, each part
|
|
# will then have a unique mix of FIs for the
|
|
# possible scenarios
|
|
for obj_num in range(0, 3):
|
|
self._create_diskfile(
|
|
part=part_num, object_name='o' + str(obj_set),
|
|
policy=policy, frag_index=scenarios[part_num](obj_set),
|
|
timestamp=utils.Timestamp(t))
|
|
|
|
ips = utils.whataremyips(self.reconstructor.ring_ip)
|
|
for policy in [p for p in POLICIES if p.policy_type == EC_POLICY]:
|
|
self.ec_policy = policy
|
|
self.ec_obj_ring = self.reconstructor.load_object_ring(
|
|
self.ec_policy)
|
|
data_dir = diskfile.get_data_dir(self.ec_policy)
|
|
for local_dev in [dev for dev in self.ec_obj_ring.devs
|
|
if dev and dev['replication_ip'] in ips and
|
|
dev['replication_port'] ==
|
|
self.reconstructor.port]:
|
|
self.ec_local_dev = local_dev
|
|
dev_path = os.path.join(self.reconstructor.devices_dir,
|
|
self.ec_local_dev['device'])
|
|
self.ec_obj_path = os.path.join(dev_path, data_dir)
|
|
# create a bunch of FA's to test
|
|
t = 1421181937.70054 # time.time()
|
|
with mock.patch('swift.obj.diskfile.time') as mock_time:
|
|
# since (a) we are using a fixed time here to create
|
|
# frags which corresponds to all the hardcoded hashes and
|
|
# (b) the EC diskfile will delete its .data file right
|
|
# after creating if it has expired, use this horrible hack
|
|
# to prevent the reclaim happening
|
|
mock_time.time.return_value = 0.0
|
|
_create_frag_archives(self.ec_policy, self.ec_obj_path,
|
|
self.ec_local_dev['id'], 0)
|
|
_create_frag_archives(self.ec_policy, self.ec_obj_path,
|
|
self.ec_local_dev['id'], 1)
|
|
break
|
|
break
|
|
|
|
def tearDown(self):
|
|
rmtree(self.testdir, ignore_errors=1)
|
|
|
|
def _create_diskfile(self, policy=None, part=0, object_name='o',
|
|
frag_index=0, timestamp=None, test_data=None,
|
|
commit=True):
|
|
policy = policy or self.policy
|
|
df_mgr = self.reconstructor._df_router[policy]
|
|
df = df_mgr.get_diskfile('sda1', part, 'a', 'c', object_name,
|
|
policy=policy)
|
|
timestamp = timestamp or utils.Timestamp.now()
|
|
test_data = test_data or b'test data'
|
|
write_diskfile(df, timestamp, data=test_data, frag_index=frag_index,
|
|
commit=commit, legacy_durable=self.legacy_durable)
|
|
return df
|
|
|
|
def assert_expected_jobs(self, part_num, jobs):
|
|
# the dict diffs can be pretty big
|
|
self.maxDiff = 2048
|
|
|
|
for job in jobs:
|
|
del job['path']
|
|
del job['policy']
|
|
if 'local_index' in job:
|
|
del job['local_index']
|
|
job['suffixes'].sort()
|
|
|
|
expected = []
|
|
# part num 0
|
|
expected.append(
|
|
[{
|
|
'sync_to': [{
|
|
'index': 2,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.2',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.2',
|
|
'device': 'sda2',
|
|
'id': 2,
|
|
'weight': 1.0,
|
|
}],
|
|
'job_type': object_reconstructor.REVERT,
|
|
'suffixes': ['061'],
|
|
'partition': 0,
|
|
'frag_index': 2,
|
|
'primary_frag_index': 1,
|
|
'device': 'sda1',
|
|
'local_dev': {
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'id': 1,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda1',
|
|
'port': 6200,
|
|
'weight': 1.0,
|
|
},
|
|
'hashes': {
|
|
'061': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
2: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
'3c1': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
},
|
|
}, {
|
|
'sync_to': [{
|
|
'index': 0,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.0',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.0',
|
|
'device': 'sda0',
|
|
'id': 0,
|
|
'weight': 1.0,
|
|
}, {
|
|
'index': 2,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.2',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.2',
|
|
'device': 'sda2',
|
|
'id': 2,
|
|
'weight': 1.0,
|
|
}, {
|
|
'index': 3,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.3',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.3',
|
|
'device': 'sda3',
|
|
'id': 3,
|
|
'weight': 1.0,
|
|
}],
|
|
'job_type': object_reconstructor.SYNC,
|
|
'sync_diskfile_builder': self.reconstructor.reconstruct_fa,
|
|
'suffixes': ['061', '3c1'],
|
|
'partition': 0,
|
|
'frag_index': 1,
|
|
'primary_frag_index': 1,
|
|
'device': 'sda1',
|
|
'local_dev': {
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'id': 1,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda1',
|
|
'port': 6200,
|
|
'weight': 1.0,
|
|
},
|
|
'hashes':
|
|
{
|
|
'061': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
2: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229'
|
|
},
|
|
'3c1': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
},
|
|
}]
|
|
)
|
|
# part num 1
|
|
expected.append(
|
|
[{
|
|
'sync_to': [{
|
|
'index': 1,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.2',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.2',
|
|
'device': 'sda6',
|
|
'id': 6,
|
|
'weight': 1.0,
|
|
}],
|
|
'job_type': object_reconstructor.REVERT,
|
|
'suffixes': ['061', '3c1'],
|
|
'partition': 1,
|
|
'frag_index': 1,
|
|
'primary_frag_index': 4,
|
|
'device': 'sda1',
|
|
'local_dev': {
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'id': 1,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda1',
|
|
'port': 6200,
|
|
'weight': 1.0,
|
|
},
|
|
'hashes':
|
|
{
|
|
'061': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
'3c1': {
|
|
0: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
},
|
|
}, {
|
|
'sync_to': [{
|
|
'index': 0,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda5',
|
|
'id': 5,
|
|
'weight': 1.0,
|
|
}],
|
|
'job_type': object_reconstructor.REVERT,
|
|
'suffixes': ['3c1'],
|
|
'partition': 1,
|
|
'frag_index': 0,
|
|
'primary_frag_index': 4,
|
|
'device': 'sda1',
|
|
'local_dev': {
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'id': 1,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda1',
|
|
'port': 6200,
|
|
'weight': 1.0,
|
|
},
|
|
'hashes': {
|
|
'061': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
'3c1': {
|
|
0: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
},
|
|
}, {
|
|
'sync_to': [{
|
|
'index': 3,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.0',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.0',
|
|
'device': 'sda0',
|
|
'id': 0,
|
|
'weight': 1.0,
|
|
}, {
|
|
'index': 0,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda5',
|
|
'id': 5,
|
|
'weight': 1.0,
|
|
}, {
|
|
'index': 1,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.2',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.2',
|
|
'device': 'sda6',
|
|
'id': 6,
|
|
'weight': 1.0,
|
|
}],
|
|
'job_type': object_reconstructor.SYNC,
|
|
'sync_diskfile_builder': self.reconstructor.reconstruct_fa,
|
|
'suffixes': [],
|
|
'partition': 1,
|
|
'frag_index': 4,
|
|
'primary_frag_index': 4,
|
|
'device': 'sda1',
|
|
'local_dev': {
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'id': 1,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda1',
|
|
'port': 6200,
|
|
'weight': 1.0,
|
|
},
|
|
'hashes': {
|
|
'061': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
'3c1': {
|
|
0: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
1: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
},
|
|
},
|
|
|
|
}]
|
|
)
|
|
# part num 2
|
|
expected.append(
|
|
[{
|
|
'sync_to': [{
|
|
'index': 0,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.2',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.2',
|
|
'device': 'sda2',
|
|
'id': 2,
|
|
'weight': 1.0,
|
|
}],
|
|
'job_type': object_reconstructor.REVERT,
|
|
'suffixes': ['061'],
|
|
'partition': 2,
|
|
'frag_index': 0,
|
|
'primary_frag_index': None,
|
|
'device': 'sda1',
|
|
'local_dev': {
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'id': 1,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda1',
|
|
'port': 6200,
|
|
'weight': 1.0,
|
|
},
|
|
'hashes': {
|
|
'061': {
|
|
0: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
None: '85b02a5283704292a511078a5c483da5'
|
|
},
|
|
'3c1': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
2: '0e6e8d48d801dc89fd31904ae3b31229'
|
|
},
|
|
},
|
|
}, {
|
|
'sync_to': [{
|
|
'index': 2,
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.0',
|
|
'region': 1,
|
|
'port': 6200,
|
|
'replication_ip': '10.0.0.0',
|
|
'device': 'sda4',
|
|
'id': 4,
|
|
'weight': 1.0,
|
|
}],
|
|
'job_type': object_reconstructor.REVERT,
|
|
'suffixes': ['3c1'],
|
|
'partition': 2,
|
|
'frag_index': 2,
|
|
'primary_frag_index': None,
|
|
'device': 'sda1',
|
|
'local_dev': {
|
|
'replication_port': 6200,
|
|
'zone': 1,
|
|
'ip': '10.0.0.1',
|
|
'region': 1,
|
|
'id': 1,
|
|
'replication_ip': '10.0.0.1',
|
|
'device': 'sda1',
|
|
'port': 6200,
|
|
'weight': 1.0,
|
|
},
|
|
'hashes': {
|
|
'061': {
|
|
0: '0e6e8d48d801dc89fd31904ae3b31229',
|
|
None: '85b02a5283704292a511078a5c483da5'
|
|
},
|
|
'3c1': {
|
|
None: '85b02a5283704292a511078a5c483da5',
|
|
2: '0e6e8d48d801dc89fd31904ae3b31229'
|
|
},
|
|
},
|
|
}]
|
|
)
|
|
|
|
def check_jobs(part_num):
|
|
try:
|
|
expected_jobs = expected[int(part_num)]
|
|
except (IndexError, ValueError):
|
|
self.fail('Unknown part number %r' % part_num)
|
|
expected_by_part_frag_index = dict(
|
|
((j['partition'], j['frag_index']), j) for j in expected_jobs)
|
|
unexpected_jobs = []
|
|
for job in jobs:
|
|
job_key = (job['partition'], job['frag_index'])
|
|
if job_key in expected_by_part_frag_index:
|
|
self.assertEqual(job, expected_by_part_frag_index[job_key])
|
|
else:
|
|
unexpected_jobs.append(job)
|
|
if unexpected_jobs:
|
|
self.fail(
|
|
'Unexpected jobs for frags %r in part num %s - '
|
|
'expected jobs for frags %r' % (
|
|
[j['frag_index'] for j in unexpected_jobs], part_num,
|
|
[k[1] for k in expected_by_part_frag_index]))
|
|
for expected_job in expected_jobs:
|
|
if expected_job in jobs:
|
|
jobs.remove(expected_job)
|
|
self.assertFalse(jobs) # that should be all of them
|
|
check_jobs(part_num)
|
|
|
|
def _run_once(self, http_count, extra_devices, override_devices=None):
|
|
id_counter = itertools.count(
|
|
max(d['id'] for d in self.policy.object_ring.devs) + 1)
|
|
for device, parts in extra_devices.items():
|
|
device_path = os.path.join(self.devices, device)
|
|
os.mkdir(device_path)
|
|
for part in range(parts):
|
|
hash_path = os.path.join(
|
|
device_path, 'objects-1', str(part), 'abc', 'hash')
|
|
os.makedirs(hash_path)
|
|
tombstone_file = utils.Timestamp(time.time()).internal + '.ts'
|
|
with open(os.path.join(hash_path, tombstone_file), 'w'):
|
|
pass
|
|
# use sda1 as a base to make is_local happy
|
|
new_device = dict(self.policy.object_ring.devs[1])
|
|
new_device['device'] = device
|
|
new_device['id'] = next(id_counter)
|
|
self.policy.object_ring.devs.append(new_device)
|
|
|
|
self.reconstructor.stats_interval = object()
|
|
|
|
can_process = threading.Event()
|
|
can_do_stats = threading.Event()
|
|
can_do_stats.set()
|
|
|
|
def fake_sleep(secs=0):
|
|
if secs is not self.reconstructor.stats_interval:
|
|
return sleep(secs)
|
|
can_do_stats.wait()
|
|
can_do_stats.clear()
|
|
can_process.set()
|
|
|
|
def fake_process(job):
|
|
can_process.wait()
|
|
can_process.clear()
|
|
can_do_stats.set()
|
|
|
|
self.reconstructor.process_job = fake_process
|
|
with mock_ssync_sender(), mock.patch(
|
|
'swift.obj.reconstructor.sleep', fake_sleep):
|
|
self.reconstructor.run_once(devices=override_devices)
|
|
|
|
def test_run_once(self):
|
|
# sda1: 3 is done in setup
|
|
extra_devices = {
|
|
'sdb1': 4,
|
|
'sdc1': 1,
|
|
'sdd1': 0,
|
|
}
|
|
with Timeout(60):
|
|
self._run_once(32, extra_devices)
|
|
stats_lines = set()
|
|
for line in self.logger.get_lines_for_level('info'):
|
|
if 'reconstructed in' not in line:
|
|
continue
|
|
stat_line = line.split('reconstructed', 1)[0].strip()
|
|
stats_lines.add(stat_line)
|
|
acceptable = set([
|
|
'2/8 (25.00%) partitions',
|
|
'3/8 (37.50%) partitions',
|
|
'4/8 (50.00%) partitions',
|
|
'5/8 (62.50%) partitions',
|
|
'6/8 (75.00%) partitions',
|
|
'7/8 (87.50%) partitions',
|
|
'8/8 (100.00%) partitions',
|
|
])
|
|
matched = stats_lines & acceptable
|
|
self.assertEqual(matched, acceptable,
|
|
'missing some expected acceptable:\n%s' % (
|
|
'\n'.join(sorted(acceptable - matched))))
|
|
self.assertEqual(self.reconstructor.reconstruction_part_count, 8)
|
|
self.assertEqual(self.reconstructor.part_count, 8)
|
|
|
|
def test_run_once_override_devices(self):
|
|
# sda1: 3 is done in setup
|
|
extra_devices = {
|
|
'sdb1': 4,
|
|
'sdc1': 1,
|
|
'sdd1': 0,
|
|
}
|
|
with Timeout(60):
|
|
self._run_once(3, extra_devices, 'sdc1')
|
|
stats_lines = set()
|
|
for line in self.logger.get_lines_for_level('info'):
|
|
if 'reconstructed in' not in line:
|
|
continue
|
|
stat_line = line.split('reconstructed', 1)[0].strip()
|
|
stats_lines.add(stat_line)
|
|
acceptable = set([
|
|
'1/1 (100.00%) partitions',
|
|
])
|
|
matched = stats_lines & acceptable
|
|
self.assertEqual(matched, acceptable,
|
|
'missing some expected acceptable:\n%s' % (
|
|
'\n'.join(sorted(acceptable - matched))))
|
|
self.assertEqual(self.reconstructor.reconstruction_part_count, 1)
|
|
self.assertEqual(self.reconstructor.part_count, 1)
|
|
|
|
def test_get_response(self):
|
|
part = self.part_nums[0]
|
|
node = self.policy.object_ring.get_part_nodes(int(part))[0]
|
|
# separate replication network
|
|
node['replication_port'] = node['port'] + 1000
|
|
|
|
def do_test(stat_code):
|
|
with mocked_http_conn(stat_code) as mock_conn:
|
|
resp = self.reconstructor._get_response(
|
|
node, self.policy, part, path='/nada', headers={})
|
|
self.assertEqual(mock_conn.requests, [{
|
|
'ssl': False,
|
|
'ip': node['replication_ip'],
|
|
'port': node['replication_port'],
|
|
'method': 'GET',
|
|
'path': '/sda0/%s/nada' % part,
|
|
'qs': None,
|
|
'headers': {},
|
|
}])
|
|
return resp
|
|
|
|
for status in (200, 400, 404, 503):
|
|
resp = do_test(status)
|
|
self.assertEqual(status, resp.status)
|
|
|
|
resp = do_test(Exception())
|
|
self.assertIsNone(resp)
|
|
# exception should result in error logs
|
|
for line in self.logger.get_lines_for_level('error'):
|
|
self.assertIn('Trying to GET', line)
|
|
self.logger._clear()
|
|
|
|
# Timeout also should result in error logs
|
|
resp = do_test(Timeout())
|
|
self.assertIsNone(resp)
|
|
for line in self.logger.get_lines_for_level('error'):
|
|
self.assertIn('Trying to GET', line)
|
|
# sanity Timeout has extra message in the error log
|
|
self.assertIn('Timeout', line)
|
|
self.logger.clear()
|
|
|
|
def test_reconstructor_skips_bogus_partition_dirs(self):
|
|
# A directory in the wrong place shouldn't crash the reconstructor
|
|
self.reconstructor._reset_stats()
|
|
rmtree(self.objects_1)
|
|
os.mkdir(self.objects_1)
|
|
|
|
os.mkdir(os.path.join(self.objects_1, "burrito"))
|
|
jobs = []
|
|
for part_info in self.reconstructor.collect_parts():
|
|
jobs += self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertFalse(jobs)
|
|
|
|
def test_check_ring(self):
|
|
testring = tempfile.mkdtemp()
|
|
_create_test_rings(testring)
|
|
obj_ring = ring.Ring(testring, ring_name='object') # noqa
|
|
self.assertTrue(self.reconstructor.check_ring(obj_ring))
|
|
orig_check = self.reconstructor.next_check
|
|
self.reconstructor.next_check = orig_check - 30
|
|
self.assertTrue(self.reconstructor.check_ring(obj_ring))
|
|
self.reconstructor.next_check = orig_check
|
|
orig_ring_time = obj_ring._mtime
|
|
obj_ring._mtime = orig_ring_time - 30
|
|
self.assertTrue(self.reconstructor.check_ring(obj_ring))
|
|
self.reconstructor.next_check = orig_check - 30
|
|
self.assertFalse(self.reconstructor.check_ring(obj_ring))
|
|
rmtree(testring, ignore_errors=1)
|
|
|
|
def test_reconstruct_check_ring(self):
|
|
# test reconstruct logs info when check_ring is false and that
|
|
# there are no jobs built
|
|
objects_2 = os.path.join(self.devices, 'sda1',
|
|
diskfile.get_data_dir(POLICIES[2]))
|
|
os.mkdir(objects_2)
|
|
for part in ['0', '1', '2']:
|
|
os.mkdir(os.path.join(objects_2, part))
|
|
|
|
with mock.patch.object(self.reconstructor, 'process_job') as mock_pj, \
|
|
mock.patch(
|
|
'swift.obj.reconstructor.ObjectReconstructor.check_ring',
|
|
side_effect=lambda ring: ring is not POLICIES[1].object_ring):
|
|
self.reconstructor.reconstruct()
|
|
msgs = self.logger.get_lines_for_level('info')
|
|
self.assertEqual(1, msgs.count(
|
|
'Ring change detected for policy 1 (one). Aborting '
|
|
'current reconstruction pass for this policy.'), msgs)
|
|
self.assertEqual(
|
|
[call[1][0]['job_type'] for call in mock_pj.mock_calls],
|
|
['sync_only'] * 2)
|
|
self.assertEqual(
|
|
[call[1][0]['policy'] for call in mock_pj.mock_calls],
|
|
[POLICIES[2]] * 2)
|
|
# partition 2 doesn't belong here and doesn't have data,
|
|
# so it just gets cleaned up
|
|
self.assertEqual(
|
|
{call[1][0]['partition'] for call in mock_pj.mock_calls},
|
|
{0, 1})
|
|
|
|
def test_build_reconstruction_jobs(self):
|
|
self.reconstructor._reset_stats()
|
|
for part_info in self.reconstructor.collect_parts():
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertTrue(jobs[0]['job_type'] in
|
|
(object_reconstructor.SYNC,
|
|
object_reconstructor.REVERT))
|
|
self.assert_expected_jobs(part_info['partition'], jobs)
|
|
|
|
def test_handoffs_only(self):
|
|
self.reconstructor.handoffs_only = True
|
|
|
|
found_job_types = set()
|
|
|
|
def fake_process_job(job):
|
|
# increment failure counter
|
|
self.reconstructor.handoffs_remaining += 1
|
|
found_job_types.add(job['job_type'])
|
|
|
|
self.reconstructor.process_job = fake_process_job
|
|
|
|
_orig_build_jobs = self.reconstructor.build_reconstruction_jobs
|
|
built_jobs = []
|
|
|
|
def capture_jobs(part_info):
|
|
jobs = _orig_build_jobs(part_info)
|
|
built_jobs.append((part_info, jobs))
|
|
return jobs
|
|
|
|
with mock.patch.object(self.reconstructor, 'build_reconstruction_jobs',
|
|
capture_jobs):
|
|
self.reconstructor.reconstruct()
|
|
# only revert jobs
|
|
found = [(part_info['partition'], set(
|
|
j['job_type'] for j in jobs))
|
|
for part_info, jobs in built_jobs]
|
|
self.assertEqual([
|
|
# partition, job_types
|
|
(2, {'sync_revert'}),
|
|
], found)
|
|
self.assertEqual(found_job_types, {object_reconstructor.REVERT})
|
|
# but failures keep handoffs remaining
|
|
msgs = self.logger.get_lines_for_level('info')
|
|
self.assertIn('Next pass will continue to revert handoffs', msgs[-1])
|
|
self.logger._clear()
|
|
|
|
found_job_types = set()
|
|
|
|
def fake_process_job(job):
|
|
# success does not increment failure counter
|
|
found_job_types.add(job['job_type'])
|
|
|
|
self.reconstructor.process_job = fake_process_job
|
|
|
|
# only revert jobs ... but all handoffs cleared out successfully
|
|
self.reconstructor.reconstruct()
|
|
self.assertEqual(found_job_types, {object_reconstructor.REVERT})
|
|
# it's time to turn off handoffs_only
|
|
msgs = self.logger.get_lines_for_level('warning')
|
|
self.assertIn('You should disable handoffs_only', msgs[-1])
|
|
|
|
def test_get_partners(self):
|
|
expected = (
|
|
# node_index, part_nodes => partners
|
|
(0, [0, 1, 2, 3], [3, 1, 2]),
|
|
(0, [2, 3, 1, 0], [0, 3, 1]),
|
|
(0, [0, 1, 2, 3, 4], [4, 1, 2]),
|
|
(0, [0, 1, 2, 3, 4, 5], [5, 1, 3]),
|
|
(1, [0, 1, 2, 3, 4, 5], [0, 2, 4]),
|
|
(2, [0, 1, 2, 3, 4, 5], [1, 3, 5]),
|
|
(3, [0, 1, 2, 3, 4, 5], [2, 4, 0]),
|
|
(4, [0, 1, 2, 3, 4, 5], [3, 5, 1]),
|
|
(5, [0, 1, 2, 3, 4, 5], [4, 0, 2]),
|
|
(5, [1, 4, 0, 2, 3, 5], [3, 1, 0]),
|
|
)
|
|
failures = []
|
|
for frag_index, part_nodes, partners in expected:
|
|
sync_to = object_reconstructor._get_partners(
|
|
frag_index, part_nodes)
|
|
if partners != sync_to:
|
|
failures.append('Given nodes %r for index %s we expected '
|
|
'%r but got %r' % (
|
|
part_nodes, frag_index, partners, sync_to))
|
|
if failures:
|
|
failures.insert(0, 'Some test scenarios failed:')
|
|
self.fail('\n'.join(failures))
|
|
|
|
def test_iter_nodes_for_frag(self):
|
|
# no limit
|
|
self.reconstructor.rebuild_handoff_node_count = -1
|
|
policy = ECStoragePolicy(1, name='test', ec_type=DEFAULT_TEST_EC_TYPE,
|
|
ec_ndata=4, ec_nparity=3)
|
|
policy.object_ring = FabricatedRing(replicas=7, devices=28)
|
|
primaries = policy.object_ring.get_part_nodes(0)
|
|
|
|
node = primaries[0]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [0, 0, 7, 14]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(0, node['backend_index'])
|
|
|
|
node = primaries[3]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [3, 3, 10, 17]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(3, node['backend_index'])
|
|
|
|
node = primaries[-1]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [6, 6, 13, 20]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(6, node['backend_index'])
|
|
|
|
# default limit is 2
|
|
self.reconstructor.rebuild_handoff_node_count = 2
|
|
node = primaries[0]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [0, 0, 7]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(0, node['backend_index'])
|
|
|
|
# zero means only primaries
|
|
self.reconstructor.rebuild_handoff_node_count = 0
|
|
node = primaries[0]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [0]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(0, node['backend_index'])
|
|
|
|
def test_collect_parts(self):
|
|
self.reconstructor._reset_stats()
|
|
parts = []
|
|
for part_info in self.reconstructor.collect_parts():
|
|
parts.append(part_info['partition'])
|
|
self.assertEqual(sorted(parts), [0, 1, 2])
|
|
|
|
def test_collect_parts_mkdirs_error(self):
|
|
|
|
def blowup_mkdirs(path):
|
|
raise OSError('Ow!')
|
|
|
|
self.reconstructor._reset_stats()
|
|
with mock.patch.object(object_reconstructor, 'mkdirs', blowup_mkdirs):
|
|
rmtree(self.objects_1, ignore_errors=1)
|
|
parts = []
|
|
for part_info in self.reconstructor.collect_parts():
|
|
parts.append(part_info['partition'])
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(
|
|
len(error_lines), 2,
|
|
'Expected exactly two errors, got %r' % error_lines)
|
|
log_args, log_kwargs = self.logger.log_dict['error'][0]
|
|
self.assertEqual(str(log_kwargs['exc_info'][1]), 'Ow!')
|
|
log_args, log_kwargs = self.logger.log_dict['error'][1]
|
|
self.assertEqual(str(log_kwargs['exc_info'][1]), 'Ow!')
|
|
|
|
def test_removes_zbf(self):
|
|
# suppress unmount warning
|
|
os.mkdir(os.path.join(self.devices, 'sda5'))
|
|
# After running xfs_repair, a partition directory could become a
|
|
# zero-byte file. If this happens, the reconstructor should clean it
|
|
# up, log something, and move on to the next partition.
|
|
|
|
# Surprise! Partition dir 1 is actually a zero-byte file.
|
|
pol_1_part_1_path = os.path.join(self.objects_1, '1')
|
|
rmtree(pol_1_part_1_path)
|
|
with open(pol_1_part_1_path, 'w'):
|
|
pass
|
|
self.assertTrue(os.path.isfile(pol_1_part_1_path)) # sanity check
|
|
|
|
self.reconstructor.process_job = lambda j: None
|
|
self.reconstructor.reconstruct()
|
|
|
|
self.assertFalse(os.path.exists(pol_1_part_1_path))
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(2, len(warnings))
|
|
# first warning is due to get_hashes failing to take lock on non-dir
|
|
self.assertIn(pol_1_part_1_path + '/hashes.pkl', warnings[0])
|
|
self.assertIn('unable to read', warnings[0].lower())
|
|
self.assertIn(pol_1_part_1_path, warnings[1])
|
|
self.assertIn('not a directory', warnings[1].lower())
|
|
|
|
def test_ignores_status_file(self):
|
|
# Following fd86d5a, the auditor will leave status files on each device
|
|
# until an audit can complete. The reconstructor should ignore these
|
|
|
|
@contextmanager
|
|
def status_files(*auditor_types):
|
|
status_paths = [os.path.join(self.objects_1,
|
|
'auditor_status_%s.json' % typ)
|
|
for typ in auditor_types]
|
|
for status_path in status_paths:
|
|
self.assertFalse(os.path.exists(status_path)) # sanity check
|
|
with open(status_path, 'w'):
|
|
pass
|
|
self.assertTrue(os.path.isfile(status_path)) # sanity check
|
|
try:
|
|
yield status_paths
|
|
finally:
|
|
for status_path in status_paths:
|
|
try:
|
|
os.unlink(status_path)
|
|
except OSError as e:
|
|
if e.errno != 2:
|
|
raise
|
|
|
|
# suppress unmount warning
|
|
os.mkdir(os.path.join(self.devices, 'sda5'))
|
|
|
|
# since our collect_parts job is a generator, that yields directly
|
|
# into build_jobs and then spawns it's safe to do the remove_files
|
|
# without making reconstructor startup slow
|
|
with status_files('ALL', 'ZBF') as status_paths:
|
|
self.reconstructor._reset_stats()
|
|
for part_info in self.reconstructor.collect_parts():
|
|
self.assertNotIn(part_info['part_path'], status_paths)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(0, len(warnings))
|
|
for status_path in status_paths:
|
|
self.assertTrue(os.path.exists(status_path))
|
|
|
|
def _make_fake_ssync(self, ssync_calls, fail_jobs=None):
|
|
"""
|
|
Replace SsyncSender with a thin Fake.
|
|
|
|
:param ssync_calls: an empty list, a non_local, all calls to ssync will
|
|
be captured for assertion in the caller.
|
|
:param fail_jobs: optional iter of dicts, any job passed into Fake that
|
|
matches a failure dict will return success == False.
|
|
"""
|
|
class _fake_ssync(object):
|
|
def __init__(self, daemon, node, job, suffixes,
|
|
include_non_durable=False, max_objects=0,
|
|
**kwargs):
|
|
# capture context and generate an available_map of objs
|
|
context = {}
|
|
context['node'] = node
|
|
context['job'] = job
|
|
context['suffixes'] = suffixes
|
|
context['max_objects'] = max_objects
|
|
self.suffixes = suffixes
|
|
self.daemon = daemon
|
|
self.job = job
|
|
frag_prefs = [] if include_non_durable else None
|
|
hash_gen = self.daemon._df_router[job['policy']].yield_hashes(
|
|
self.job['device'], self.job['partition'],
|
|
self.job['policy'], self.suffixes,
|
|
frag_index=self.job.get('frag_index'),
|
|
frag_prefs=frag_prefs)
|
|
self.available_map = {}
|
|
self.limited_by_max_objects = False
|
|
nlines = 0
|
|
for hash_, timestamps in hash_gen:
|
|
self.available_map[hash_] = timestamps
|
|
nlines += 1
|
|
if 0 < max_objects <= nlines:
|
|
break
|
|
for _ in hash_gen:
|
|
self.limited_by_max_objects = True
|
|
break
|
|
context['available_map'] = self.available_map
|
|
ssync_calls.append(context)
|
|
self.success = True
|
|
for failure in (fail_jobs or []):
|
|
if all(job.get(k) == v for (k, v) in failure.items()):
|
|
self.success = False
|
|
break
|
|
context['success'] = self.success
|
|
context['include_non_durable'] = include_non_durable
|
|
|
|
def __call__(self, *args, **kwargs):
|
|
return self.success, self.available_map if self.success else {}
|
|
|
|
return _fake_ssync
|
|
|
|
def test_delete_reverted(self):
|
|
# verify reconstructor deletes reverted frag indexes after ssync'ing
|
|
|
|
def visit_obj_dirs(context):
|
|
for suff in context['suffixes']:
|
|
suff_dir = os.path.join(
|
|
context['job']['path'], suff)
|
|
for root, dirs, files in os.walk(suff_dir):
|
|
for d in dirs:
|
|
dirpath = os.path.join(root, d)
|
|
files = os.listdir(dirpath)
|
|
yield dirpath, files
|
|
|
|
n_files = n_files_after = 0
|
|
|
|
# run reconstructor with delete function mocked out to check calls
|
|
ssync_calls = []
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)), \
|
|
mocked_http_conn(*[200] * 6, body=pickle.dumps({})), \
|
|
mock.patch.object(
|
|
self.reconstructor, 'delete_reverted_objs') as mock_delete:
|
|
self.reconstructor.reconstruct()
|
|
expected_calls = []
|
|
for context in ssync_calls:
|
|
if context['job']['job_type'] == REVERT:
|
|
self.assertTrue(context.get('include_non_durable'))
|
|
for dirpath, files in visit_obj_dirs(context):
|
|
# sanity check - expect some files to be in dir,
|
|
# may not be for the reverted frag index
|
|
self.assertTrue(files)
|
|
n_files += len(files)
|
|
self.assertEqual(context['job']['frag_index'],
|
|
context['node']['index'])
|
|
expected_calls.append(mock.call(context['job'],
|
|
context['available_map']))
|
|
else:
|
|
self.assertFalse(context.get('include_non_durable'))
|
|
self.assertEqual(0, context.get('max_objects'))
|
|
|
|
mock_delete.assert_has_calls(expected_calls, any_order=True)
|
|
|
|
# N.B. in this next test sequence we acctually delete files after
|
|
# revert, so the on-disk hashes can change. In partition 1, if the
|
|
# revert jobs (for frag_index 0 or 1) run before the sync job
|
|
# (frag_index 4) all suffixes will get removed and the sync job won't
|
|
# have anything to ship the remote (meaning there's no post-sync
|
|
# REPLICATE call). To keep the number of mocked_http_conn responses
|
|
# predictable we force a stable job order by mocking random's shuffle.
|
|
ssync_calls = []
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)), \
|
|
mocked_http_conn(*[200] * 6, body=pickle.dumps({})), \
|
|
mock.patch('swift.obj.reconstructor.random.shuffle'):
|
|
self.reconstructor.reconstruct()
|
|
for context in ssync_calls:
|
|
if context['job']['job_type'] == REVERT:
|
|
self.assertTrue(context.get('include_non_durable'))
|
|
data_file_tail = ('#%s.data'
|
|
% context['node']['index'])
|
|
for dirpath, files in visit_obj_dirs(context):
|
|
n_files_after += len(files)
|
|
for filename in files:
|
|
self.assertFalse(
|
|
filename.endswith(data_file_tail), filename)
|
|
else:
|
|
self.assertFalse(context.get('include_non_durable'))
|
|
self.assertEqual(0, context.get('max_objects'))
|
|
|
|
# sanity check that some files should were deleted
|
|
self.assertGreater(n_files, n_files_after)
|
|
|
|
def test_max_objects_per_revert_only_for_revert_jobs(self):
|
|
# verify max_objects_per_revert option is only passed to revert jobs
|
|
ssync_calls = []
|
|
conf = dict(self.conf, max_objects_per_revert=2)
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)), \
|
|
mocked_http_conn(*[200] * 6, body=pickle.dumps({})):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
conf, logger=self.logger)
|
|
reconstructor.reconstruct()
|
|
reverts = syncs = 0
|
|
for context in ssync_calls:
|
|
if context['job']['job_type'] == REVERT:
|
|
self.assertEqual(2, context.get('max_objects'))
|
|
reverts += 1
|
|
else:
|
|
self.assertEqual(0, context.get('max_objects'))
|
|
syncs += 1
|
|
self.assertGreater(reverts, 0)
|
|
self.assertGreater(syncs, 0)
|
|
|
|
def test_delete_reverted_nondurable(self):
|
|
# verify reconstructor only deletes reverted nondurable fragments older
|
|
# commit_window
|
|
shutil.rmtree(self.ec_obj_path)
|
|
ips = utils.whataremyips(self.reconstructor.ring_ip)
|
|
local_devs = [dev for dev in self.ec_obj_ring.devs
|
|
if dev and dev['replication_ip'] in ips and
|
|
dev['replication_port'] ==
|
|
self.reconstructor.port]
|
|
partition = (local_devs[0]['id'] + 1) % 3
|
|
# recent non-durable
|
|
df_recent = self._create_diskfile(
|
|
object_name='recent', part=partition, commit=False)
|
|
datafile_recent = df_recent.manager.cleanup_ondisk_files(
|
|
df_recent._datadir, frag_prefs=[])['data_file']
|
|
# older non-durable but with recent mtime
|
|
df_older = self._create_diskfile(
|
|
object_name='older', part=partition, commit=False,
|
|
timestamp=Timestamp(time.time() - 61))
|
|
datafile_older = df_older.manager.cleanup_ondisk_files(
|
|
df_older._datadir, frag_prefs=[])['data_file']
|
|
# durable
|
|
df_durable = self._create_diskfile(
|
|
object_name='durable', part=partition, commit=True)
|
|
datafile_durable = df_durable.manager.cleanup_ondisk_files(
|
|
df_durable._datadir, frag_prefs=[])['data_file']
|
|
self.assertTrue(os.path.exists(datafile_recent))
|
|
self.assertTrue(os.path.exists(datafile_older))
|
|
self.assertTrue(os.path.exists(datafile_durable))
|
|
ssync_calls = []
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)):
|
|
self.reconstructor.handoffs_only = True
|
|
self.reconstructor.reconstruct()
|
|
for context in ssync_calls:
|
|
self.assertEqual(REVERT, context['job']['job_type'])
|
|
self.assertTrue(context.get('include_non_durable'))
|
|
# neither nondurable should be removed yet with default commit_window
|
|
# because their mtimes are too recent
|
|
self.assertTrue(os.path.exists(datafile_recent))
|
|
self.assertTrue(os.path.exists(datafile_older))
|
|
# but durable is purged
|
|
self.assertFalse(os.path.exists(datafile_durable), datafile_durable)
|
|
|
|
ssync_calls = []
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)):
|
|
self.reconstructor.handoffs_only = True
|
|
# let the file get a little bit older and turn down the
|
|
# commit_window...
|
|
sleep(0.01)
|
|
df_older.manager.commit_window = 0.005
|
|
self.reconstructor.reconstruct()
|
|
for context in ssync_calls:
|
|
self.assertEqual(REVERT, context['job']['job_type'])
|
|
self.assertTrue(context.get('include_non_durable'))
|
|
|
|
# ...now the nondurables get purged
|
|
self.assertFalse(os.path.exists(datafile_recent))
|
|
self.assertFalse(os.path.exists(datafile_older))
|
|
|
|
def test_sync_old_nondurable_before_committed_non_zero_commit_window(self):
|
|
# verify that a *recently written* nondurable fragment survives being
|
|
# visited by the reconstructor, despite having timestamp older than
|
|
# reclaim_age
|
|
shutil.rmtree(self.ec_obj_path)
|
|
ips = utils.whataremyips(self.reconstructor.ring_ip)
|
|
local_devs = [dev for dev in self.ec_obj_ring.devs
|
|
if dev and dev['replication_ip'] in ips and
|
|
dev['replication_port'] ==
|
|
self.reconstructor.port]
|
|
partition = local_devs[0]['id']
|
|
# recently written, recent timestamp non-durable
|
|
ts_recent = Timestamp(time.time())
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
reclaim_age = df_mgr.reclaim_age
|
|
df_recent = self._create_diskfile(
|
|
object_name='recent', part=partition, commit=False,
|
|
timestamp=ts_recent, frag_index=4)
|
|
datafile_recent = df_recent.manager.cleanup_ondisk_files(
|
|
df_recent._datadir, frag_prefs=[])['data_file']
|
|
|
|
# recently written but old timestamp non-durable
|
|
ts_old = Timestamp(time.time() - reclaim_age - 1)
|
|
df_older = self._create_diskfile(
|
|
object_name='older', part=partition, commit=False,
|
|
timestamp=ts_old, frag_index=4)
|
|
datafile_older = df_older.manager.cleanup_ondisk_files(
|
|
df_older._datadir, frag_prefs=[])['data_file']
|
|
self.assertTrue(os.path.exists(datafile_recent))
|
|
self.assertTrue(os.path.exists(datafile_older))
|
|
|
|
# for this test we don't actually need to ssync anything, so pretend
|
|
# all suffixes are in sync
|
|
self.reconstructor._get_suffixes_to_sync = (
|
|
lambda job, node: ([], node))
|
|
df_mgr.commit_window = 1000 # avoid non-durables being reclaimed
|
|
self.reconstructor.reconstruct()
|
|
# neither nondurable should be removed yet with default commit_window
|
|
# because their mtimes are too recent
|
|
self.assertTrue(os.path.exists(datafile_recent))
|
|
self.assertTrue(os.path.exists(datafile_older))
|
|
# and we can still make the nondurables durable
|
|
df_recent.writer().commit(ts_recent)
|
|
self.assertTrue(os.path.exists(datafile_recent.replace('#4', '#4#d')))
|
|
df_older.writer().commit(ts_old)
|
|
self.assertTrue(os.path.exists(datafile_older.replace('#4', '#4#d')))
|
|
|
|
def test_sync_old_nondurable_before_committed_zero_commit_window(self):
|
|
# verify that a *recently written* nondurable fragment won't survive
|
|
# being visited by the reconstructor if its timestamp is older than
|
|
# reclaim_age and commit_window is zero; this test illustrates the
|
|
# potential data loss bug that commit_window addresses
|
|
shutil.rmtree(self.ec_obj_path)
|
|
ips = utils.whataremyips(self.reconstructor.ring_ip)
|
|
local_devs = [dev for dev in self.ec_obj_ring.devs
|
|
if dev and dev['replication_ip'] in ips and
|
|
dev['replication_port'] ==
|
|
self.reconstructor.port]
|
|
partition = local_devs[0]['id']
|
|
# recently written, recent timestamp non-durable
|
|
ts_recent = Timestamp(time.time())
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
reclaim_age = df_mgr.reclaim_age
|
|
df_recent = self._create_diskfile(
|
|
object_name='recent', part=partition, commit=False,
|
|
timestamp=ts_recent, frag_index=4)
|
|
datafile_recent = df_recent.manager.cleanup_ondisk_files(
|
|
df_recent._datadir, frag_prefs=[])['data_file']
|
|
|
|
# recently written but old timestamp non-durable
|
|
ts_old = Timestamp(time.time() - reclaim_age - 1)
|
|
df_older = self._create_diskfile(
|
|
object_name='older', part=partition, commit=False,
|
|
timestamp=ts_old, frag_index=4)
|
|
datafile_older = df_older.manager.cleanup_ondisk_files(
|
|
df_older._datadir, frag_prefs=[])['data_file']
|
|
self.assertTrue(os.path.exists(datafile_recent))
|
|
self.assertTrue(os.path.exists(datafile_older))
|
|
|
|
# for this test we don't actually need to ssync anything, so pretend
|
|
# all suffixes are in sync
|
|
self.reconstructor._get_suffixes_to_sync = (
|
|
lambda job, node: ([], node))
|
|
df_mgr.commit_window = 0
|
|
with mock.patch(
|
|
'swift.obj.diskfile.is_file_older') as mock_is_file_older:
|
|
self.reconstructor.reconstruct()
|
|
# older nondurable will be removed with commit_window = 0
|
|
self.assertTrue(os.path.exists(datafile_recent))
|
|
self.assertFalse(os.path.exists(datafile_older))
|
|
df_recent.writer().commit(ts_recent)
|
|
self.assertTrue(os.path.exists(datafile_recent.replace('#4', '#4#d')))
|
|
# ...and attempt to commit will fail :(
|
|
with self.assertRaises(DiskFileError):
|
|
df_older.writer().commit(ts_old)
|
|
# with zero commit_window the call to stat the file is not made
|
|
mock_is_file_older.assert_not_called()
|
|
|
|
def test_sync_old_nondurable_before_committed_past_commit_window(self):
|
|
# verify that a *not so recently written* nondurable fragment won't
|
|
# survive being visited by the reconstructor if its timestamp is older
|
|
# than reclaim_age
|
|
shutil.rmtree(self.ec_obj_path)
|
|
ips = utils.whataremyips(self.reconstructor.ring_ip)
|
|
local_devs = [dev for dev in self.ec_obj_ring.devs
|
|
if dev and dev['replication_ip'] in ips and
|
|
dev['replication_port'] ==
|
|
self.reconstructor.port]
|
|
partition = local_devs[0]['id']
|
|
# recently written, recent timestamp non-durable
|
|
ts_recent = Timestamp(time.time())
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
reclaim_age = df_mgr.reclaim_age
|
|
df_recent = self._create_diskfile(
|
|
object_name='recent', part=partition, commit=False,
|
|
timestamp=ts_recent, frag_index=4)
|
|
datafile_recent = df_recent.manager.cleanup_ondisk_files(
|
|
df_recent._datadir, frag_prefs=[])['data_file']
|
|
|
|
# recently written but old timestamp non-durable
|
|
ts_old = Timestamp(time.time() - reclaim_age - 1)
|
|
df_older = self._create_diskfile(
|
|
object_name='older', part=partition, commit=False,
|
|
timestamp=ts_old, frag_index=4)
|
|
datafile_older = df_older.manager.cleanup_ondisk_files(
|
|
df_older._datadir, frag_prefs=[])['data_file']
|
|
# pretend file was written more than commit_window seconds ago
|
|
now = time.time()
|
|
os.utime(datafile_older, (now - 60.1, now - 60.1))
|
|
self.assertTrue(os.path.exists(datafile_recent))
|
|
self.assertTrue(os.path.exists(datafile_older))
|
|
|
|
# for this test we don't actually need to ssync anything, so pretend
|
|
# all suffixes are in sync
|
|
self.reconstructor._get_suffixes_to_sync = (
|
|
lambda job, node: ([], node))
|
|
# leave commit_window at default of 60 seconds
|
|
self.reconstructor.reconstruct()
|
|
# older nondurable will be removed
|
|
self.assertTrue(os.path.exists(datafile_recent))
|
|
self.assertFalse(os.path.exists(datafile_older))
|
|
df_recent.writer().commit(ts_recent)
|
|
self.assertTrue(os.path.exists(datafile_recent.replace('#4', '#4#d')))
|
|
# ...and attempt to commit will fail :(
|
|
with self.assertRaises(DiskFileError):
|
|
df_older.writer().commit(ts_old)
|
|
|
|
def test_delete_reverted_max_objects_per_revert(self):
|
|
# verify reconstructor only deletes objects that were actually reverted
|
|
# when ssync is limited by max_objects_per_revert
|
|
shutil.rmtree(self.ec_obj_path)
|
|
ips = utils.whataremyips(self.reconstructor.ring_ip)
|
|
local_devs = [dev for dev in self.ec_obj_ring.devs
|
|
if dev and dev['replication_ip'] in ips and
|
|
dev['replication_port'] ==
|
|
self.reconstructor.port]
|
|
partition = (local_devs[0]['id'] + 1) % 3
|
|
# three durable objects
|
|
df_0 = self._create_diskfile(
|
|
object_name='zero', part=partition)
|
|
datafile_0 = df_0.manager.cleanup_ondisk_files(
|
|
df_0._datadir, frag_prefs=[])['data_file']
|
|
self.assertTrue(os.path.exists(datafile_0))
|
|
df_1 = self._create_diskfile(
|
|
object_name='one', part=partition)
|
|
datafile_1 = df_1.manager.cleanup_ondisk_files(
|
|
df_1._datadir, frag_prefs=[])['data_file']
|
|
self.assertTrue(os.path.exists(datafile_1))
|
|
df_2 = self._create_diskfile(
|
|
object_name='two', part=partition)
|
|
datafile_2 = df_2.manager.cleanup_ondisk_files(
|
|
df_2._datadir, frag_prefs=[])['data_file']
|
|
self.assertTrue(os.path.exists(datafile_2))
|
|
|
|
datafiles = [datafile_0, datafile_1, datafile_2]
|
|
actual_datafiles = [df for df in datafiles if os.path.exists(df)]
|
|
self.assertEqual(datafiles, actual_datafiles)
|
|
|
|
# only two objects will be sync'd and purged...
|
|
ssync_calls = []
|
|
conf = dict(self.conf, max_objects_per_revert=2, handoffs_only=True)
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
conf, logger=self.logger)
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)):
|
|
self.reconstructor.reconstruct()
|
|
for context in ssync_calls:
|
|
self.assertEqual(REVERT, context['job']['job_type'])
|
|
self.assertEqual(2, context.get('max_objects'))
|
|
actual_datafiles = [df for df in datafiles if os.path.exists(df)]
|
|
self.assertEqual(1, len(actual_datafiles), actual_datafiles)
|
|
# handoff still reported as remaining
|
|
self.assertEqual(1, self.reconstructor.handoffs_remaining)
|
|
|
|
# ...until next reconstructor run which will sync and purge the last
|
|
# object; max_objects_per_revert == actual number of objects
|
|
ssync_calls = []
|
|
conf = dict(self.conf, max_objects_per_revert=1, handoffs_only=True)
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
conf, logger=self.logger)
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)):
|
|
self.reconstructor.reconstruct()
|
|
for context in ssync_calls:
|
|
self.assertEqual(REVERT, context['job']['job_type'])
|
|
self.assertEqual(1, context.get('max_objects'))
|
|
actual_datafiles = [df for df in datafiles if os.path.exists(df)]
|
|
self.assertEqual([], actual_datafiles)
|
|
# handoff is no longer remaining
|
|
self.assertEqual(0, self.reconstructor.handoffs_remaining)
|
|
|
|
def test_no_delete_failed_revert(self):
|
|
# test will only process revert jobs
|
|
self.reconstructor.handoffs_only = True
|
|
# suppress unmount warning
|
|
os.mkdir(os.path.join(self.devices, 'sda5'))
|
|
|
|
captured_ssync = []
|
|
# fail all jobs on part 2 on sda1
|
|
fail_jobs = [
|
|
{'device': 'sda1', 'partition': 2},
|
|
]
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(
|
|
captured_ssync, fail_jobs=fail_jobs)), \
|
|
mocked_http_conn() as request_log:
|
|
self.reconstructor.reconstruct()
|
|
self.assertFalse(request_log.unexpected_requests)
|
|
|
|
# global setup has four revert jobs
|
|
self.assertEqual(len(captured_ssync), 2)
|
|
expected_ssync_calls = {
|
|
# device, part, frag_index: expected_occurrences
|
|
('sda1', 2, 2, True): 1,
|
|
('sda1', 2, 0, True): 1,
|
|
}
|
|
self.assertEqual(expected_ssync_calls, dict(collections.Counter(
|
|
(context['job']['device'],
|
|
context['job']['partition'],
|
|
context['job']['frag_index'],
|
|
context['include_non_durable'])
|
|
for context in captured_ssync
|
|
)))
|
|
|
|
# failed jobs don't sync suffixes
|
|
self.assertFalse(
|
|
self.logger.get_lines_for_level('warning'))
|
|
self.assertFalse(
|
|
self.logger.get_lines_for_level('error'))
|
|
# handoffs remaining and part exists
|
|
self.assertEqual(2, self.reconstructor.handoffs_remaining)
|
|
self.assertTrue(os.path.exists(self.parts_1['2']))
|
|
|
|
# again with no failures
|
|
captured_ssync = []
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(captured_ssync)):
|
|
self.reconstructor.reconstruct()
|
|
# same jobs
|
|
self.assertEqual(len(captured_ssync), 2)
|
|
self.assertFalse(
|
|
self.logger.get_lines_for_level('error'))
|
|
# handoffs are cleaned up
|
|
self.assertEqual(0, self.reconstructor.handoffs_remaining)
|
|
warning_msgs = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(1, len(warning_msgs))
|
|
self.assertIn('no handoffs remaining', warning_msgs[0])
|
|
|
|
# need one more pass to cleanup the part dir
|
|
self.assertTrue(os.path.exists(self.parts_1['2']))
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync([])), \
|
|
mocked_http_conn() as request_log:
|
|
self.reconstructor.reconstruct()
|
|
self.assertFalse(os.path.exists(self.parts_1['2']))
|
|
|
|
def test_get_part_jobs(self):
|
|
# yeah, this test code expects a specific setup
|
|
self.assertEqual(len(self.part_nums), 3)
|
|
|
|
# OK, at this point we should have 4 loaded parts with one
|
|
jobs = []
|
|
for partition in os.listdir(self.ec_obj_path):
|
|
part_path = os.path.join(self.ec_obj_path, partition)
|
|
jobs = self.reconstructor._get_part_jobs(
|
|
self.ec_local_dev, part_path, int(partition), self.ec_policy)
|
|
self.assert_expected_jobs(partition, jobs)
|
|
|
|
def assertStatCount(self, stat_method, stat_prefix, expected_count):
|
|
count = count_stats(self.logger, stat_method, stat_prefix)
|
|
msg = 'expected %s != %s for %s %s' % (
|
|
expected_count, count, stat_method, stat_prefix)
|
|
self.assertEqual(expected_count, count, msg)
|
|
|
|
def test_delete_partition(self):
|
|
# part 2 is predefined to have all revert jobs
|
|
part_path = os.path.join(self.objects_1, '2')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
ssync_calls = []
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)):
|
|
self.reconstructor.reconstruct(override_partitions=[2])
|
|
|
|
expected_ssync_calls = sorted([
|
|
(u'10.0.0.0', REVERT, 2, [u'3c1'], True),
|
|
(u'10.0.0.2', REVERT, 2, [u'061'], True),
|
|
])
|
|
self.assertEqual(expected_ssync_calls, sorted((
|
|
c['node']['ip'],
|
|
c['job']['job_type'],
|
|
c['job']['partition'],
|
|
c['suffixes'],
|
|
c.get('include_non_durable')
|
|
) for c in ssync_calls))
|
|
|
|
expected_stats = {
|
|
('increment', 'partition.delete.count.'): 2,
|
|
('timing_since', 'partition.delete.timing'): 2,
|
|
}
|
|
for stat_key, expected in expected_stats.items():
|
|
stat_method, stat_prefix = stat_key
|
|
self.assertStatCount(stat_method, stat_prefix, expected)
|
|
|
|
stub_data = self.reconstructor._get_hashes(
|
|
'sda1', 2, self.policy, do_listdir=True)
|
|
stub_data.update({'7ca': {None: '8f19c38e1cf8e2390d4ca29051407ae3'}})
|
|
pickle_path = os.path.join(part_path, 'hashes.pkl')
|
|
with open(pickle_path, 'wb') as f:
|
|
pickle.dump(stub_data, f)
|
|
|
|
# part 2 should be totally empty
|
|
hash_gen = self.reconstructor._df_router[self.policy].yield_hashes(
|
|
'sda1', '2', self.policy, suffixes=stub_data.keys())
|
|
for hash_, ts in hash_gen:
|
|
self.fail('found %s : %s' % (hash_, ts))
|
|
|
|
new_hashes = self.reconstructor._get_hashes(
|
|
'sda1', 2, self.policy, do_listdir=True)
|
|
self.assertFalse(new_hashes)
|
|
|
|
# N.B. the partition directory is removed next pass
|
|
ssync_calls = []
|
|
with mocked_http_conn() as request_log:
|
|
with mock.patch('swift.obj.reconstructor.ssync_sender',
|
|
self._make_fake_ssync(ssync_calls)):
|
|
self.reconstructor.reconstruct(override_partitions=[2])
|
|
self.assertEqual([], ssync_calls)
|
|
self.assertEqual([], request_log.requests)
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
def test_process_job_all_success(self):
|
|
rehash_per_job_type = {SYNC: 1, REVERT: 0}
|
|
self.reconstructor._reset_stats()
|
|
with mock_ssync_sender():
|
|
found_jobs = []
|
|
for part_info in self.reconstructor.collect_parts():
|
|
jobs = self.reconstructor.build_reconstruction_jobs(
|
|
part_info)
|
|
found_jobs.extend(jobs)
|
|
for job in jobs:
|
|
self.logger.clear()
|
|
node_count = len(job['sync_to'])
|
|
rehash_count = node_count * rehash_per_job_type[
|
|
job['job_type']]
|
|
with mocked_http_conn(*[200] * rehash_count,
|
|
body=pickle.dumps({})):
|
|
self.reconstructor.process_job(job)
|
|
if job['job_type'] == object_reconstructor.REVERT:
|
|
self.assertStatCount('update_stats',
|
|
'suffix.hashes', 0)
|
|
else:
|
|
self.assertStatCount('update_stats',
|
|
'suffix.hashes', node_count)
|
|
self.assertStatCount('update_stats',
|
|
'suffix.syncs', node_count)
|
|
self.assertNotIn('error', self.logger.all_log_lines())
|
|
self.assertEqual(
|
|
dict(collections.Counter((job['device'], job['partition'],
|
|
job['frag_index'], job['job_type'])
|
|
for job in found_jobs)),
|
|
{('sda1', 0, 1, SYNC): 1,
|
|
('sda1', 0, 2, REVERT): 1,
|
|
('sda1', 1, 0, REVERT): 1,
|
|
('sda1', 1, 1, REVERT): 1,
|
|
('sda1', 1, 4, SYNC): 1,
|
|
('sda1', 2, 0, REVERT): 1,
|
|
('sda1', 2, 2, REVERT): 1})
|
|
self.assertEqual(self.reconstructor.suffix_sync, 12)
|
|
self.assertEqual(self.reconstructor.suffix_count, 12)
|
|
self.assertEqual(self.reconstructor.reconstruction_count, 7)
|
|
|
|
def test_process_job_all_insufficient_storage(self):
|
|
self.reconstructor._reset_stats()
|
|
with mock_ssync_sender():
|
|
with mocked_http_conn(*[507] * 10):
|
|
found_jobs = []
|
|
for part_info in self.reconstructor.collect_parts():
|
|
jobs = self.reconstructor.build_reconstruction_jobs(
|
|
part_info)
|
|
found_jobs.extend(jobs)
|
|
for job in jobs:
|
|
self.logger.clear()
|
|
self.reconstructor.process_job(job)
|
|
for line in self.logger.get_lines_for_level('error'):
|
|
self.assertIn('responded as unmounted', line)
|
|
self.assertEqual(0, count_stats(
|
|
self.logger, 'update_stats', 'suffix.hashes'))
|
|
self.assertEqual(0, count_stats(
|
|
self.logger, 'update_stats', 'suffix.syncs'))
|
|
self.assertEqual(
|
|
dict(collections.Counter((job['device'], job['partition'],
|
|
job['frag_index'], job['job_type'])
|
|
for job in found_jobs)),
|
|
{('sda1', 0, 1, SYNC): 1,
|
|
('sda1', 0, 2, REVERT): 1,
|
|
('sda1', 1, 0, REVERT): 1,
|
|
('sda1', 1, 1, REVERT): 1,
|
|
('sda1', 1, 4, SYNC): 1,
|
|
('sda1', 2, 0, REVERT): 1,
|
|
('sda1', 2, 2, REVERT): 1})
|
|
self.assertEqual(self.reconstructor.suffix_sync, 0)
|
|
self.assertEqual(self.reconstructor.suffix_count, 0)
|
|
self.assertEqual(self.reconstructor.reconstruction_count, 7)
|
|
|
|
def test_process_job_all_client_error(self):
|
|
self.reconstructor._reset_stats()
|
|
with mock_ssync_sender():
|
|
with mocked_http_conn(*[400] * 6):
|
|
found_jobs = []
|
|
for part_info in self.reconstructor.collect_parts():
|
|
jobs = self.reconstructor.build_reconstruction_jobs(
|
|
part_info)
|
|
found_jobs.extend(jobs)
|
|
for job in jobs:
|
|
self.logger.clear()
|
|
self.reconstructor.process_job(job)
|
|
for line in self.logger.get_lines_for_level('error'):
|
|
self.assertIn('Invalid response 400', line)
|
|
self.assertEqual(0, count_stats(
|
|
self.logger, 'update_stats', 'suffix.hashes'))
|
|
self.assertEqual(0, count_stats(
|
|
self.logger, 'update_stats', 'suffix.syncs'))
|
|
self.assertEqual(
|
|
dict(collections.Counter(
|
|
(job['device'], job['partition'], job['frag_index'])
|
|
for job in found_jobs)),
|
|
{('sda1', 0, 1): 1,
|
|
('sda1', 0, 2): 1,
|
|
('sda1', 1, 0): 1,
|
|
('sda1', 1, 1): 1,
|
|
('sda1', 1, 4): 1,
|
|
('sda1', 2, 0): 1,
|
|
('sda1', 2, 2): 1})
|
|
self.assertEqual(self.reconstructor.suffix_sync, 0)
|
|
self.assertEqual(self.reconstructor.suffix_count, 0)
|
|
self.assertEqual(self.reconstructor.reconstruction_count, 7)
|
|
|
|
def test_process_job_all_timeout(self):
|
|
self.reconstructor._reset_stats()
|
|
with mock_ssync_sender(), mocked_http_conn(*[Timeout()] * 6):
|
|
found_jobs = []
|
|
for part_info in self.reconstructor.collect_parts():
|
|
jobs = self.reconstructor.build_reconstruction_jobs(
|
|
part_info)
|
|
found_jobs.extend(jobs)
|
|
for job in jobs:
|
|
self.logger.clear()
|
|
self.reconstructor.process_job(job)
|
|
for line in self.logger.get_lines_for_level('error'):
|
|
self.assertIn('Timeout (Nones)', line)
|
|
self.assertStatCount(
|
|
'update_stats', 'suffix.hashes', 0)
|
|
self.assertStatCount(
|
|
'update_stats', 'suffix.syncs', 0)
|
|
self.assertEqual(
|
|
dict(collections.Counter(
|
|
(job['device'], job['partition'], job['frag_index'])
|
|
for job in found_jobs)),
|
|
{('sda1', 0, 1): 1,
|
|
('sda1', 0, 2): 1,
|
|
('sda1', 1, 0): 1,
|
|
('sda1', 1, 1): 1,
|
|
('sda1', 1, 4): 1,
|
|
('sda1', 2, 0): 1,
|
|
('sda1', 2, 2): 1})
|
|
self.assertEqual(self.reconstructor.suffix_sync, 0)
|
|
self.assertEqual(self.reconstructor.suffix_count, 0)
|
|
self.assertEqual(self.reconstructor.reconstruction_count, 7)
|
|
|
|
def test_reconstructor_skipped_partpower_increase(self):
|
|
self.reconstructor._reset_stats()
|
|
_create_test_rings(self.testdir, 10)
|
|
# Enforce re-reading the EC ring
|
|
POLICIES[1].object_ring = ring.Ring(self.testdir, ring_name='object-1')
|
|
|
|
self.reconstructor.reconstruct()
|
|
|
|
self.assertEqual(0, self.reconstructor.reconstruction_count)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
self.assertIn(
|
|
"next_part_power set in policy 'one'. Skipping", warnings)
|
|
|
|
|
|
class TestGlobalSetupObjectReconstructorLegacyDurable(
|
|
TestGlobalSetupObjectReconstructor):
|
|
# Tests for reconstructor using real objects in test partition directories.
|
|
legacy_durable = True
|
|
|
|
|
|
@patch_policies(with_ec_default=True)
|
|
class TestWorkerReconstructor(unittest.TestCase):
|
|
|
|
maxDiff = None
|
|
|
|
def setUp(self):
|
|
super(TestWorkerReconstructor, self).setUp()
|
|
self.logger = debug_logger()
|
|
self.testdir = tempfile.mkdtemp()
|
|
self.recon_cache_path = os.path.join(self.testdir, 'recon')
|
|
self.rcache = os.path.join(self.recon_cache_path, 'object.recon')
|
|
# dump_recon_cache expects recon_cache_path to exist
|
|
os.mkdir(self.recon_cache_path)
|
|
|
|
def tearDown(self):
|
|
super(TestWorkerReconstructor, self).tearDown()
|
|
shutil.rmtree(self.testdir)
|
|
|
|
def test_no_workers_by_default(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{}, logger=self.logger)
|
|
self.assertEqual(0, reconstructor.reconstructor_workers)
|
|
self.assertEqual(0, len(list(reconstructor.get_worker_args())))
|
|
|
|
def test_bad_value_workers(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '-1'}, logger=self.logger)
|
|
self.assertEqual(-1, reconstructor.reconstructor_workers)
|
|
self.assertEqual(0, len(list(reconstructor.get_worker_args())))
|
|
|
|
def test_workers_with_no_devices(self):
|
|
def do_test(num_workers):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': num_workers}, logger=self.logger)
|
|
self.assertEqual(num_workers, reconstructor.reconstructor_workers)
|
|
self.assertEqual(1, len(list(reconstructor.get_worker_args())))
|
|
self.assertEqual([
|
|
{'override_partitions': [], 'override_devices': [],
|
|
'multiprocess_worker_index': 0},
|
|
], list(reconstructor.get_worker_args()))
|
|
do_test(1)
|
|
do_test(10)
|
|
|
|
def test_workers_with_devices_and_no_valid_overrides(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '2'}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: ['sdb', 'sdc']
|
|
self.assertEqual(2, reconstructor.reconstructor_workers)
|
|
# N.B. sdz is not in local_devices so there are no devices to process
|
|
# but still expect a single worker process
|
|
worker_args = list(reconstructor.get_worker_args(
|
|
once=True, devices='sdz'))
|
|
self.assertEqual(1, len(worker_args))
|
|
self.assertEqual([{'override_partitions': [],
|
|
'override_devices': ['sdz'],
|
|
'multiprocess_worker_index': 0}],
|
|
worker_args)
|
|
# overrides are ignored in forever mode
|
|
worker_args = list(reconstructor.get_worker_args(
|
|
once=False, devices='sdz'))
|
|
self.assertEqual(2, len(worker_args))
|
|
self.assertEqual([
|
|
{'override_partitions': [], 'override_devices': ['sdb'],
|
|
'multiprocess_worker_index': 0},
|
|
{'override_partitions': [], 'override_devices': ['sdc'],
|
|
'multiprocess_worker_index': 1},
|
|
], worker_args)
|
|
|
|
def test_workers_with_devices(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '2'}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: ['sdb', 'sdc']
|
|
self.assertEqual(2, reconstructor.reconstructor_workers)
|
|
self.assertEqual(2, len(list(reconstructor.get_worker_args())))
|
|
expected = [
|
|
{'override_partitions': [], 'override_devices': ['sdb'],
|
|
'multiprocess_worker_index': 0},
|
|
{'override_partitions': [], 'override_devices': ['sdc'],
|
|
'multiprocess_worker_index': 1},
|
|
]
|
|
worker_args = list(reconstructor.get_worker_args(once=False))
|
|
self.assertEqual(2, len(worker_args))
|
|
self.assertEqual(expected, worker_args)
|
|
worker_args = list(reconstructor.get_worker_args(once=True))
|
|
self.assertEqual(2, len(worker_args))
|
|
self.assertEqual(expected, worker_args)
|
|
|
|
def test_workers_with_devices_and_overrides(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '2'}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: ['sdb', 'sdc']
|
|
self.assertEqual(2, reconstructor.reconstructor_workers)
|
|
# check we don't get more workers than override devices...
|
|
# N.B. sdz is not in local_devices so should be ignored for the
|
|
# purposes of generating workers
|
|
worker_args = list(reconstructor.get_worker_args(
|
|
once=True, devices='sdb,sdz', partitions='99,333'))
|
|
self.assertEqual(1, len(worker_args))
|
|
self.assertEqual(
|
|
[{'override_partitions': [99, 333], 'override_devices': ['sdb'],
|
|
'multiprocess_worker_index': 0}],
|
|
worker_args)
|
|
|
|
# overrides are ignored in forever mode
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '2'}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: ['sdb', 'sdc']
|
|
worker_args = list(reconstructor.get_worker_args(
|
|
once=False, devices='sdb,sdz', partitions='99,333'))
|
|
self.assertEqual([
|
|
{'override_partitions': [], 'override_devices': ['sdb'],
|
|
'multiprocess_worker_index': 0},
|
|
{'override_partitions': [], 'override_devices': ['sdc'],
|
|
'multiprocess_worker_index': 1}
|
|
], worker_args)
|
|
|
|
def test_workers_with_lots_of_devices(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '2'}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: [
|
|
'sdb', 'sdc', 'sdd', 'sde', 'sdf']
|
|
self.assertEqual(2, reconstructor.reconstructor_workers)
|
|
self.assertEqual(2, len(list(reconstructor.get_worker_args())))
|
|
self.assertEqual([
|
|
{'override_partitions': [],
|
|
'override_devices': ['sdb', 'sdd', 'sdf'],
|
|
'multiprocess_worker_index': 0},
|
|
{'override_partitions': [],
|
|
'override_devices': ['sdc', 'sde'],
|
|
'multiprocess_worker_index': 1},
|
|
], list(reconstructor.get_worker_args()))
|
|
|
|
def test_workers_with_lots_of_devices_and_overrides(self):
|
|
# check that override devices get distributed across workers
|
|
# in similar fashion to all devices
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '2'}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: [
|
|
'sdb', 'sdc', 'sdd', 'sde', 'sdf']
|
|
self.assertEqual(2, reconstructor.reconstructor_workers)
|
|
worker_args = list(reconstructor.get_worker_args(
|
|
once=True, devices='sdb,sdd,sdf', partitions='99,333'))
|
|
# 3 devices to operate on, 2 workers -> one worker gets two devices
|
|
# and the other worker just gets one
|
|
self.assertEqual([{
|
|
'override_partitions': [99, 333],
|
|
'override_devices': ['sdb', 'sdf'],
|
|
'multiprocess_worker_index': 0,
|
|
}, {
|
|
'override_partitions': [99, 333],
|
|
'override_devices': ['sdd'],
|
|
'multiprocess_worker_index': 1,
|
|
}], worker_args)
|
|
|
|
# with 4 override devices, expect 2 per worker
|
|
worker_args = list(reconstructor.get_worker_args(
|
|
once=True, devices='sdb,sdc,sdd,sdf', partitions='99,333'))
|
|
self.assertEqual(2, len(worker_args))
|
|
self.assertEqual([
|
|
{'override_partitions': [99, 333],
|
|
'override_devices': ['sdb', 'sdd'],
|
|
'multiprocess_worker_index': 0},
|
|
{'override_partitions': [99, 333],
|
|
'override_devices': ['sdc', 'sdf'],
|
|
'multiprocess_worker_index': 1},
|
|
], worker_args)
|
|
|
|
def test_workers_with_lots_of_workers(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '10'}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: ['sdb', 'sdc']
|
|
self.assertEqual(10, reconstructor.reconstructor_workers)
|
|
self.assertEqual(2, len(list(reconstructor.get_worker_args())))
|
|
self.assertEqual([
|
|
{'override_partitions': [], 'override_devices': ['sdb'],
|
|
'multiprocess_worker_index': 0},
|
|
{'override_partitions': [], 'override_devices': ['sdc'],
|
|
'multiprocess_worker_index': 1},
|
|
], list(reconstructor.get_worker_args()))
|
|
|
|
def test_workers_with_lots_of_workers_and_devices(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'reconstructor_workers': '10'}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: [
|
|
'sdb', 'sdc', 'sdd', 'sde', 'sdf']
|
|
self.assertEqual(10, reconstructor.reconstructor_workers)
|
|
self.assertEqual(5, len(list(reconstructor.get_worker_args())))
|
|
self.assertEqual([
|
|
{'override_partitions': [], 'override_devices': ['sdb'],
|
|
'multiprocess_worker_index': 0},
|
|
{'override_partitions': [], 'override_devices': ['sdc'],
|
|
'multiprocess_worker_index': 1},
|
|
{'override_partitions': [], 'override_devices': ['sdd'],
|
|
'multiprocess_worker_index': 2},
|
|
{'override_partitions': [], 'override_devices': ['sde'],
|
|
'multiprocess_worker_index': 3},
|
|
{'override_partitions': [], 'override_devices': ['sdf'],
|
|
'multiprocess_worker_index': 4},
|
|
], list(reconstructor.get_worker_args()))
|
|
|
|
def test_workers_with_some_workers_and_devices(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: [
|
|
'd%s' % (i + 1) for i in range(21)]
|
|
|
|
# With more devices than workers, the work is spread out as evenly
|
|
# as we can manage. When number-of-devices is a multiple of
|
|
# number-of-workers, every worker has the same number of devices to
|
|
# operate on.
|
|
reconstructor.reconstructor_workers = 7
|
|
worker_args = list(reconstructor.get_worker_args())
|
|
self.assertEqual([len(a['override_devices']) for a in worker_args],
|
|
[3] * 7)
|
|
|
|
# When number-of-devices is not a multiple of number-of-workers,
|
|
# device counts differ by at most 1.
|
|
reconstructor.reconstructor_workers = 5
|
|
worker_args = list(reconstructor.get_worker_args())
|
|
self.assertEqual(
|
|
sorted([len(a['override_devices']) for a in worker_args]),
|
|
[4, 4, 4, 4, 5])
|
|
|
|
# With more workers than devices, we don't create useless workers.
|
|
# We'll only make one per device.
|
|
reconstructor.reconstructor_workers = 22
|
|
worker_args = list(reconstructor.get_worker_args())
|
|
self.assertEqual(
|
|
[len(a['override_devices']) for a in worker_args],
|
|
[1] * 21)
|
|
|
|
# This is true even if we have far more workers than devices.
|
|
reconstructor.reconstructor_workers = 2 ** 16
|
|
worker_args = list(reconstructor.get_worker_args())
|
|
self.assertEqual(
|
|
[len(a['override_devices']) for a in worker_args],
|
|
[1] * 21)
|
|
|
|
# Spot check one full result for sanity's sake
|
|
reconstructor.reconstructor_workers = 11
|
|
self.assertEqual([
|
|
{'override_partitions': [], 'override_devices': ['d1', 'd12'],
|
|
'multiprocess_worker_index': 0},
|
|
{'override_partitions': [], 'override_devices': ['d2', 'd13'],
|
|
'multiprocess_worker_index': 1},
|
|
{'override_partitions': [], 'override_devices': ['d3', 'd14'],
|
|
'multiprocess_worker_index': 2},
|
|
{'override_partitions': [], 'override_devices': ['d4', 'd15'],
|
|
'multiprocess_worker_index': 3},
|
|
{'override_partitions': [], 'override_devices': ['d5', 'd16'],
|
|
'multiprocess_worker_index': 4},
|
|
{'override_partitions': [], 'override_devices': ['d6', 'd17'],
|
|
'multiprocess_worker_index': 5},
|
|
{'override_partitions': [], 'override_devices': ['d7', 'd18'],
|
|
'multiprocess_worker_index': 6},
|
|
{'override_partitions': [], 'override_devices': ['d8', 'd19'],
|
|
'multiprocess_worker_index': 7},
|
|
{'override_partitions': [], 'override_devices': ['d9', 'd20'],
|
|
'multiprocess_worker_index': 8},
|
|
{'override_partitions': [], 'override_devices': ['d10', 'd21'],
|
|
'multiprocess_worker_index': 9},
|
|
{'override_partitions': [], 'override_devices': ['d11'],
|
|
'multiprocess_worker_index': 10},
|
|
], list(reconstructor.get_worker_args()))
|
|
|
|
def test_next_rcache_update_configured_with_stats_interval(self):
|
|
now = time.time()
|
|
with mock.patch('swift.obj.reconstructor.time.time', return_value=now):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{}, logger=self.logger)
|
|
self.assertEqual(now + 300, reconstructor._next_rcache_update)
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'stats_interval': '30'}, logger=self.logger)
|
|
self.assertEqual(now + 30, reconstructor._next_rcache_update)
|
|
|
|
def test_is_healthy_rcache_update_waits_for_next_update(self):
|
|
now = time.time()
|
|
with mock.patch('swift.obj.reconstructor.time.time', return_value=now):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'recon_cache_path': self.recon_cache_path},
|
|
logger=self.logger)
|
|
# file does not exist to start
|
|
self.assertFalse(os.path.exists(self.rcache))
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=10):
|
|
self.assertTrue(reconstructor.is_healthy())
|
|
# ... and isn't created until _next_rcache_update
|
|
self.assertFalse(os.path.exists(self.rcache))
|
|
# ... but if we wait 5 mins (by default)
|
|
orig_next_update = reconstructor._next_rcache_update
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=now + 301):
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=11):
|
|
self.assertTrue(reconstructor.is_healthy())
|
|
self.assertGreater(reconstructor._next_rcache_update, orig_next_update)
|
|
# ... it will be created
|
|
self.assertTrue(os.path.exists(self.rcache))
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
# and empty
|
|
self.assertEqual({}, data)
|
|
|
|
def test_is_healthy(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'recon_cache_path': self.recon_cache_path},
|
|
logger=self.logger)
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=10):
|
|
self.assertTrue(reconstructor.is_healthy())
|
|
reconstructor.get_local_devices = lambda: {
|
|
'sdb%d' % p for p in reconstructor.policies}
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=11):
|
|
self.assertFalse(reconstructor.is_healthy())
|
|
reconstructor.all_local_devices = {
|
|
'sdb%d' % p for p in reconstructor.policies}
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=12):
|
|
self.assertTrue(reconstructor.is_healthy())
|
|
|
|
def test_is_healthy_detects_ring_change(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'recon_cache_path': self.recon_cache_path,
|
|
'reconstructor_workers': 1,
|
|
# bind ip and port will not match any dev in first version of ring
|
|
'bind_ip': '10.0.0.20', 'bind_port': '1020'},
|
|
logger=self.logger)
|
|
p = random.choice(reconstructor.policies)
|
|
self.assertEqual(14, len(p.object_ring.devs)) # sanity check
|
|
worker_args = list(reconstructor.get_worker_args())
|
|
self.assertFalse(worker_args[0]['override_devices']) # no local devs
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=10):
|
|
self.assertTrue(reconstructor.is_healthy())
|
|
# expand ring - now there are local devices
|
|
p.object_ring.set_replicas(28)
|
|
self.assertEqual(28, len(p.object_ring.devs)) # sanity check
|
|
|
|
# If ring.gz mtime did not change, there is no change to detect
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=10):
|
|
self.assertTrue(reconstructor.is_healthy())
|
|
# Now, ring.gz mtime changed, so the change will be detected
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=11):
|
|
self.assertFalse(reconstructor.is_healthy())
|
|
|
|
self.assertNotEqual(worker_args, list(reconstructor.get_worker_args()))
|
|
with mock.patch('swift.obj.reconstructor.os.path.getmtime',
|
|
return_value=12):
|
|
self.assertTrue(reconstructor.is_healthy())
|
|
|
|
def test_final_recon_dump(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'recon_cache_path': self.recon_cache_path},
|
|
logger=self.logger)
|
|
reconstructor.all_local_devices = ['sda', 'sdc']
|
|
total = 12.0
|
|
now = time.time()
|
|
with mock.patch('swift.obj.reconstructor.time.time', return_value=now):
|
|
reconstructor.final_recon_dump(total)
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': now,
|
|
'object_reconstruction_time': total,
|
|
}, data)
|
|
total = 14.0
|
|
now += total * 60
|
|
with mock.patch('swift.obj.reconstructor.time.time', return_value=now):
|
|
reconstructor.final_recon_dump(total)
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': now,
|
|
'object_reconstruction_time': total,
|
|
}, data)
|
|
|
|
def check_per_disk_stats(before, now, old_total, total,
|
|
override_devices):
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=now), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value='pid-1'):
|
|
reconstructor.final_recon_dump(
|
|
total, override_devices=override_devices)
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': before,
|
|
'object_reconstruction_time': old_total,
|
|
'object_reconstruction_per_disk': {
|
|
'sda': {
|
|
'object_reconstruction_last': now,
|
|
'object_reconstruction_time': total,
|
|
'pid': 'pid-1',
|
|
},
|
|
'sdc': {
|
|
'object_reconstruction_last': now,
|
|
'object_reconstruction_time': total,
|
|
'pid': 'pid-1',
|
|
},
|
|
|
|
},
|
|
}, data)
|
|
|
|
# per_disk_stats with workers and local_devices
|
|
reconstructor.reconstructor_workers = 1
|
|
old_total = total
|
|
total = 16.0
|
|
before = now
|
|
now += total * 60
|
|
check_per_disk_stats(before, now, old_total, total, ['sda', 'sdc'])
|
|
|
|
# per_disk_stats with workers and local_devices but no overrides
|
|
reconstructor.reconstructor_workers = 1
|
|
total = 17.0
|
|
now += total * 60
|
|
check_per_disk_stats(before, now, old_total, total, [])
|
|
|
|
# and without workers we clear it out
|
|
reconstructor.reconstructor_workers = 0
|
|
total = 18.0
|
|
now += total * 60
|
|
with mock.patch('swift.obj.reconstructor.time.time', return_value=now):
|
|
reconstructor.final_recon_dump(total)
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': now,
|
|
'object_reconstruction_time': total,
|
|
}, data)
|
|
|
|
# set per disk stats again...
|
|
reconstructor.reconstructor_workers = 1
|
|
old_total = total
|
|
total = 18.0
|
|
before = now
|
|
now += total * 60
|
|
check_per_disk_stats(before, now, old_total, total, ['sda', 'sdc'])
|
|
|
|
# ...then remove all devices and check we clear out per-disk stats
|
|
reconstructor.all_local_devices = []
|
|
total = 20.0
|
|
now += total * 60
|
|
with mock.patch('swift.obj.reconstructor.time.time', return_value=now):
|
|
reconstructor.final_recon_dump(total)
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': now,
|
|
'object_reconstruction_time': total,
|
|
}, data)
|
|
|
|
def test_dump_recon_run_once_inline(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'recon_cache_path': self.recon_cache_path},
|
|
logger=self.logger)
|
|
reconstructor.reconstruct = mock.MagicMock()
|
|
now = time.time()
|
|
later = now + 300 # 5 mins
|
|
with mock.patch('swift.obj.reconstructor.time.time', side_effect=[
|
|
now, later, later]):
|
|
reconstructor.run_once()
|
|
# no override args passed to reconstruct
|
|
self.assertEqual([mock.call(
|
|
override_devices=[],
|
|
override_partitions=[]
|
|
)], reconstructor.reconstruct.call_args_list)
|
|
# script mode with no override args, we expect recon dumps
|
|
self.assertTrue(os.path.exists(self.rcache))
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 5.0,
|
|
}, data)
|
|
total = 10.0
|
|
later += total * 60
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=later):
|
|
reconstructor.final_recon_dump(total)
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 10.0,
|
|
}, data)
|
|
|
|
def test_dump_recon_run_once_in_worker(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'recon_cache_path': self.recon_cache_path,
|
|
'reconstructor_workers': 1},
|
|
logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: {'sda'}
|
|
now = time.time()
|
|
later = now + 300 # 5 mins
|
|
|
|
def do_test(run_kwargs, expected_device):
|
|
# get the actual kwargs that would be passed to run_once in a
|
|
# worker
|
|
run_once_kwargs = list(
|
|
reconstructor.get_worker_args(once=True, **run_kwargs))[0]
|
|
reconstructor.reconstruct = mock.MagicMock()
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
side_effect=[now, later, later]):
|
|
reconstructor.run_once(**run_once_kwargs)
|
|
self.assertEqual([mock.call(
|
|
override_devices=[expected_device],
|
|
override_partitions=[]
|
|
)], reconstructor.reconstruct.call_args_list)
|
|
self.assertTrue(os.path.exists(self.rcache))
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
# no aggregate is written but perhaps it should be, in which
|
|
# case this assertion will need to change
|
|
'object_reconstruction_per_disk': {
|
|
expected_device: {
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': mock.ANY
|
|
}
|
|
}
|
|
}, data)
|
|
|
|
# script mode with no CLI override args, we expect recon dumps
|
|
do_test({}, 'sda')
|
|
# script mode *with* CLI override devices, we expect recon dumps
|
|
os.unlink(self.rcache)
|
|
do_test(dict(devices='sda'), 'sda')
|
|
# if the override device is not in local devices we still get
|
|
# a recon dump, but it'll get cleaned up in the next aggregation
|
|
os.unlink(self.rcache)
|
|
do_test(dict(devices='sdz'), 'sdz')
|
|
# repeat with no local devices
|
|
reconstructor.get_local_devices = lambda: set()
|
|
os.unlink(self.rcache)
|
|
do_test(dict(devices='sdz'), 'sdz')
|
|
|
|
# now disable workers and check that inline run_once updates rcache
|
|
# and clears out per disk stats
|
|
reconstructor.get_local_devices = lambda: {'sda'}
|
|
now = time.time()
|
|
later = now + 600 # 10 mins
|
|
reconstructor.reconstructor_workers = 0
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
side_effect=[now, later, later]):
|
|
reconstructor.run_once()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 10.0,
|
|
}, data)
|
|
|
|
def test_no_dump_recon_run_once(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'recon_cache_path': self.recon_cache_path},
|
|
logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: {'sda', 'sdb', 'sdc'}
|
|
|
|
def do_test(run_once_kwargs, expected_devices, expected_partitions):
|
|
reconstructor.reconstruct = mock.MagicMock()
|
|
now = time.time()
|
|
later = now + 300 # 5 mins
|
|
with mock.patch('swift.obj.reconstructor.time.time', side_effect=[
|
|
now, later, later]):
|
|
reconstructor.run_once(**run_once_kwargs)
|
|
# override args passed to reconstruct
|
|
actual_calls = reconstructor.reconstruct.call_args_list
|
|
self.assertEqual({'override_devices', 'override_partitions'},
|
|
set(actual_calls[0][1]))
|
|
self.assertEqual(sorted(expected_devices),
|
|
sorted(actual_calls[0][1]['override_devices']))
|
|
self.assertEqual(sorted(expected_partitions),
|
|
sorted(actual_calls[0][1]['override_partitions']))
|
|
self.assertFalse(actual_calls[1:])
|
|
self.assertEqual(False, os.path.exists(self.rcache))
|
|
|
|
# inline mode with overrides never does recon dump
|
|
reconstructor.reconstructor_workers = 0
|
|
kwargs = {'devices': 'sda,sdb'}
|
|
do_test(kwargs, ['sda', 'sdb'], [])
|
|
|
|
# Have partition override, so no recon dump
|
|
kwargs = {'partitions': '1,2,3'}
|
|
do_test(kwargs, [], [1, 2, 3])
|
|
reconstructor.reconstructor_workers = 1
|
|
worker_kwargs = list(
|
|
reconstructor.get_worker_args(once=True, **kwargs))[0]
|
|
do_test(worker_kwargs, ['sda', 'sdb', 'sdc'], [1, 2, 3])
|
|
|
|
reconstructor.reconstructor_workers = 0
|
|
kwargs = {'devices': 'sda,sdb', 'partitions': '1,2,3'}
|
|
do_test(kwargs, ['sda', 'sdb'], [1, 2, 3])
|
|
reconstructor.reconstructor_workers = 1
|
|
worker_kwargs = list(
|
|
reconstructor.get_worker_args(once=True, **kwargs))[0]
|
|
do_test(worker_kwargs, ['sda', 'sdb'], [1, 2, 3])
|
|
|
|
# 'sdz' is not in local devices
|
|
reconstructor.reconstructor_workers = 0
|
|
kwargs = {'devices': 'sdz'}
|
|
do_test(kwargs, ['sdz'], [])
|
|
|
|
def test_run_forever_recon_aggregation(self):
|
|
|
|
class StopForever(Exception):
|
|
pass
|
|
|
|
reconstructor = object_reconstructor.ObjectReconstructor({
|
|
'reconstructor_workers': 2,
|
|
'recon_cache_path': self.recon_cache_path
|
|
}, logger=self.logger)
|
|
reconstructor.get_local_devices = lambda: ['sda', 'sdb', 'sdc', 'sdd']
|
|
reconstructor.reconstruct = mock.MagicMock()
|
|
now = time.time()
|
|
later = now + 300 # 5 mins
|
|
worker_args = list(
|
|
# include 'devices' kwarg as a sanity check - it should be ignored
|
|
# in run_forever mode
|
|
reconstructor.get_worker_args(once=False, devices='sda'))
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
side_effect=[now, later, later]), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value='pid-1'), \
|
|
mock.patch('swift.obj.reconstructor.sleep',
|
|
side_effect=[StopForever]), \
|
|
Timeout(.3), quiet_eventlet_exceptions(), \
|
|
self.assertRaises(StopForever):
|
|
gt = spawn(reconstructor.run_forever, **worker_args[0])
|
|
gt.wait()
|
|
# override args are passed to reconstruct
|
|
self.assertEqual([mock.call(
|
|
override_devices=['sda', 'sdc'],
|
|
override_partitions=[]
|
|
)], reconstructor.reconstruct.call_args_list)
|
|
# forever mode with override args, we expect per-disk recon dumps
|
|
self.assertTrue(os.path.exists(self.rcache))
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_per_disk': {
|
|
'sda': {
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-1',
|
|
},
|
|
'sdc': {
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-1',
|
|
},
|
|
}
|
|
}, data)
|
|
reconstructor.reconstruct.reset_mock()
|
|
# another worker would get *different* disks
|
|
before = now = later
|
|
later = now + 300 # 5 more minutes
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
side_effect=[now, later, later]), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value='pid-2'), \
|
|
mock.patch('swift.obj.reconstructor.sleep',
|
|
side_effect=[StopForever]), \
|
|
Timeout(.3), quiet_eventlet_exceptions(), \
|
|
self.assertRaises(StopForever):
|
|
gt = spawn(reconstructor.run_forever, **worker_args[1])
|
|
gt.wait()
|
|
# override args are parsed
|
|
self.assertEqual([mock.call(
|
|
override_devices=['sdb', 'sdd'],
|
|
override_partitions=[]
|
|
)], reconstructor.reconstruct.call_args_list)
|
|
# forever mode with override args, we expect per-disk recon dumps
|
|
self.assertTrue(os.path.exists(self.rcache))
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_per_disk': {
|
|
'sda': {
|
|
'object_reconstruction_last': before,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-1',
|
|
},
|
|
'sdb': {
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-2',
|
|
},
|
|
'sdc': {
|
|
'object_reconstruction_last': before,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-1',
|
|
},
|
|
'sdd': {
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-2',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
# aggregation is done in the parent thread even later
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 10.0,
|
|
'object_reconstruction_per_disk': {
|
|
'sda': {
|
|
'object_reconstruction_last': before,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-1',
|
|
},
|
|
'sdb': {
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-2',
|
|
},
|
|
'sdc': {
|
|
'object_reconstruction_last': before,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-1',
|
|
},
|
|
'sdd': {
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 5.0,
|
|
'pid': 'pid-2',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
def test_run_forever_recon_no_devices(self):
|
|
|
|
class StopForever(Exception):
|
|
pass
|
|
|
|
reconstructor = object_reconstructor.ObjectReconstructor({
|
|
'reconstructor_workers': 2,
|
|
'recon_cache_path': self.recon_cache_path
|
|
}, logger=self.logger)
|
|
|
|
def run_forever_but_stop(pid, mock_times, worker_kwargs):
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
side_effect=mock_times), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value=pid), \
|
|
mock.patch('swift.obj.reconstructor.sleep',
|
|
side_effect=[StopForever]), \
|
|
Timeout(.3), quiet_eventlet_exceptions(), \
|
|
self.assertRaises(StopForever):
|
|
gt = spawn(reconstructor.run_forever, **worker_kwargs)
|
|
gt.wait()
|
|
|
|
reconstructor.reconstruct = mock.MagicMock()
|
|
now = time.time()
|
|
# first run_forever with no devices
|
|
reconstructor.get_local_devices = lambda: []
|
|
later = now + 6 # 6 sec
|
|
worker_args = list(
|
|
# include 'devices' kwarg as a sanity check - it should be ignored
|
|
# in run_forever mode
|
|
reconstructor.get_worker_args(once=False, devices='sda'))
|
|
run_forever_but_stop('pid-1', [now, later, later], worker_args[0])
|
|
# override args are passed to reconstruct
|
|
self.assertEqual([mock.call(
|
|
override_devices=[],
|
|
override_partitions=[]
|
|
)], reconstructor.reconstruct.call_args_list)
|
|
# forever mode with no args, we expect total recon dumps
|
|
self.assertTrue(os.path.exists(self.rcache))
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
expected = {
|
|
'object_reconstruction_last': later,
|
|
'object_reconstruction_time': 0.1,
|
|
}
|
|
self.assertEqual(expected, data)
|
|
reconstructor.reconstruct.reset_mock()
|
|
|
|
# aggregation is done in the parent thread even later
|
|
now = later + 300
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
side_effect=[now]):
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual(expected, data)
|
|
|
|
def test_recon_aggregation_waits_for_all_devices(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor({
|
|
'reconstructor_workers': 2,
|
|
'recon_cache_path': self.recon_cache_path
|
|
}, logger=self.logger)
|
|
reconstructor.all_local_devices = set([
|
|
'd0', 'd1', 'd2', 'd3',
|
|
# unreported device definitely matters
|
|
'd4'])
|
|
start = time.time() - 1000
|
|
for i in range(4):
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=start + (300 * i)), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value='pid-%s' % i):
|
|
reconstructor.final_recon_dump(
|
|
i, override_devices=['d%s' % i])
|
|
# sanity
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_per_disk': {
|
|
'd0': {
|
|
'object_reconstruction_last': start,
|
|
'object_reconstruction_time': 0.0,
|
|
'pid': 'pid-0',
|
|
},
|
|
'd1': {
|
|
'object_reconstruction_last': start + 300,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-1',
|
|
},
|
|
'd2': {
|
|
'object_reconstruction_last': start + 600,
|
|
'object_reconstruction_time': 2,
|
|
'pid': 'pid-2',
|
|
},
|
|
'd3': {
|
|
'object_reconstruction_last': start + 900,
|
|
'object_reconstruction_time': 3,
|
|
'pid': 'pid-3',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
# unreported device d4 prevents aggregation
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertNotIn('object_reconstruction_last', data)
|
|
self.assertNotIn('object_reconstruction_time', data)
|
|
self.assertEqual(set(['d0', 'd1', 'd2', 'd3']),
|
|
set(data['object_reconstruction_per_disk'].keys()))
|
|
|
|
# it's idempotent
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertNotIn('object_reconstruction_last', data)
|
|
self.assertNotIn('object_reconstruction_time', data)
|
|
self.assertEqual(set(['d0', 'd1', 'd2', 'd3']),
|
|
set(data['object_reconstruction_per_disk'].keys()))
|
|
|
|
# remove d4, we no longer wait on it for aggregation
|
|
reconstructor.all_local_devices = set(['d0', 'd1', 'd2', 'd3'])
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual(start + 900, data['object_reconstruction_last'])
|
|
self.assertEqual(15, data['object_reconstruction_time'])
|
|
self.assertEqual(set(['d0', 'd1', 'd2', 'd3']),
|
|
set(data['object_reconstruction_per_disk'].keys()))
|
|
|
|
def test_recon_aggregation_removes_devices(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor({
|
|
'reconstructor_workers': 2,
|
|
'recon_cache_path': self.recon_cache_path
|
|
}, logger=self.logger)
|
|
reconstructor.all_local_devices = set(['d0', 'd1', 'd2', 'd3'])
|
|
start = time.time() - 1000
|
|
for i in range(4):
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=start + (300 * i)), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value='pid-%s' % i):
|
|
reconstructor.final_recon_dump(
|
|
i, override_devices=['d%s' % i])
|
|
# sanity
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_per_disk': {
|
|
'd0': {
|
|
'object_reconstruction_last': start,
|
|
'object_reconstruction_time': 0.0,
|
|
'pid': 'pid-0',
|
|
},
|
|
'd1': {
|
|
'object_reconstruction_last': start + 300,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-1',
|
|
},
|
|
'd2': {
|
|
'object_reconstruction_last': start + 600,
|
|
'object_reconstruction_time': 2,
|
|
'pid': 'pid-2',
|
|
},
|
|
'd3': {
|
|
'object_reconstruction_last': start + 900,
|
|
'object_reconstruction_time': 3,
|
|
'pid': 'pid-3',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
reconstructor.all_local_devices = set(['d0', 'd1', 'd2', 'd3'])
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual(start + 900, data['object_reconstruction_last'])
|
|
self.assertEqual(15, data['object_reconstruction_time'])
|
|
self.assertEqual(set(['d0', 'd1', 'd2', 'd3']),
|
|
set(data['object_reconstruction_per_disk'].keys()))
|
|
|
|
# it's idempotent
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': start + 900,
|
|
'object_reconstruction_time': 15,
|
|
'object_reconstruction_per_disk': {
|
|
'd0': {
|
|
'object_reconstruction_last': start,
|
|
'object_reconstruction_time': 0.0,
|
|
'pid': 'pid-0',
|
|
},
|
|
'd1': {
|
|
'object_reconstruction_last': start + 300,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-1',
|
|
},
|
|
'd2': {
|
|
'object_reconstruction_last': start + 600,
|
|
'object_reconstruction_time': 2,
|
|
'pid': 'pid-2',
|
|
},
|
|
'd3': {
|
|
'object_reconstruction_last': start + 900,
|
|
'object_reconstruction_time': 3,
|
|
'pid': 'pid-3',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
# if a device is removed from the ring
|
|
reconstructor.all_local_devices = set(['d1', 'd2', 'd3'])
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
# ... it's per-disk stats are removed (d0)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': start + 900,
|
|
'object_reconstruction_time': 11,
|
|
'object_reconstruction_per_disk': {
|
|
'd1': {
|
|
'object_reconstruction_last': start + 300,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-1',
|
|
},
|
|
'd2': {
|
|
'object_reconstruction_last': start + 600,
|
|
'object_reconstruction_time': 2,
|
|
'pid': 'pid-2',
|
|
},
|
|
'd3': {
|
|
'object_reconstruction_last': start + 900,
|
|
'object_reconstruction_time': 3,
|
|
'pid': 'pid-3',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
# which can affect the aggregates!
|
|
reconstructor.all_local_devices = set(['d1', 'd2'])
|
|
reconstructor.aggregate_recon_update()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': start + 600,
|
|
'object_reconstruction_time': 6,
|
|
'object_reconstruction_per_disk': {
|
|
'd1': {
|
|
'object_reconstruction_last': start + 300,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-1',
|
|
},
|
|
'd2': {
|
|
'object_reconstruction_last': start + 600,
|
|
'object_reconstruction_time': 2,
|
|
'pid': 'pid-2',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
def test_recon_aggregation_at_end_of_run_once(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor({
|
|
'reconstructor_workers': 2,
|
|
'recon_cache_path': self.recon_cache_path
|
|
}, logger=self.logger)
|
|
reconstructor.all_local_devices = set(['d0', 'd1', 'd2', 'd3'])
|
|
start = time.time() - 1000
|
|
for i in range(4):
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=start + (300 * i)), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value='pid-%s' % i):
|
|
reconstructor.final_recon_dump(
|
|
i, override_devices=['d%s' % i])
|
|
# sanity
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_per_disk': {
|
|
'd0': {
|
|
'object_reconstruction_last': start,
|
|
'object_reconstruction_time': 0.0,
|
|
'pid': 'pid-0',
|
|
},
|
|
'd1': {
|
|
'object_reconstruction_last': start + 300,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-1',
|
|
},
|
|
'd2': {
|
|
'object_reconstruction_last': start + 600,
|
|
'object_reconstruction_time': 2,
|
|
'pid': 'pid-2',
|
|
},
|
|
'd3': {
|
|
'object_reconstruction_last': start + 900,
|
|
'object_reconstruction_time': 3,
|
|
'pid': 'pid-3',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
reconstructor.post_multiprocess_run()
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual(start + 900, data['object_reconstruction_last'])
|
|
self.assertEqual(15, data['object_reconstruction_time'])
|
|
|
|
def test_recon_aggregation_races_with_final_recon_dump(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor({
|
|
'reconstructor_workers': 2,
|
|
'recon_cache_path': self.recon_cache_path
|
|
}, logger=self.logger)
|
|
reconstructor.all_local_devices = set(['d0', 'd1'])
|
|
start = time.time() - 1000
|
|
# first worker dumps to recon cache
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=start), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value='pid-0'):
|
|
reconstructor.final_recon_dump(
|
|
1, override_devices=['d0'])
|
|
# sanity
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_per_disk': {
|
|
'd0': {
|
|
'object_reconstruction_last': start,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-0',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
# simulate a second worker concurrently dumping to recon cache while
|
|
# parent is aggregating existing results; mock dump_recon_cache as a
|
|
# convenient way to interrupt parent aggregate_recon_update and 'pass
|
|
# control' to second worker
|
|
updated_data = [] # state of recon cache just after second worker dump
|
|
|
|
def simulate_other_process_final_recon_dump():
|
|
with mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=start + 999), \
|
|
mock.patch('swift.obj.reconstructor.os.getpid',
|
|
return_value='pid-1'):
|
|
reconstructor.final_recon_dump(
|
|
1000, override_devices=['d1'])
|
|
with open(self.rcache) as f:
|
|
updated_data.append(json.load(f))
|
|
|
|
def fake_dump_recon_cache(*args, **kwargs):
|
|
# temporarily put back real dump_recon_cache
|
|
with mock.patch('swift.obj.reconstructor.dump_recon_cache',
|
|
dump_recon_cache):
|
|
simulate_other_process_final_recon_dump()
|
|
# and now proceed with parent dump_recon_cache
|
|
dump_recon_cache(*args, **kwargs)
|
|
|
|
reconstructor.dump_recon_cache = fake_dump_recon_cache
|
|
with mock.patch('swift.obj.reconstructor.dump_recon_cache',
|
|
fake_dump_recon_cache):
|
|
reconstructor.aggregate_recon_update()
|
|
|
|
self.assertEqual([{ # sanity check - second process did dump its data
|
|
'object_reconstruction_per_disk': {
|
|
'd0': {
|
|
'object_reconstruction_last': start,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-0',
|
|
},
|
|
'd1': {
|
|
'object_reconstruction_last': start + 999,
|
|
'object_reconstruction_time': 1000,
|
|
'pid': 'pid-1',
|
|
},
|
|
}
|
|
}], updated_data)
|
|
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_per_disk': {
|
|
'd0': {
|
|
'object_reconstruction_last': start,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-0',
|
|
},
|
|
'd1': {
|
|
'object_reconstruction_last': start + 999,
|
|
'object_reconstruction_time': 1000,
|
|
'pid': 'pid-1',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
# next aggregation will find d1 stats
|
|
reconstructor.aggregate_recon_update()
|
|
|
|
with open(self.rcache) as f:
|
|
data = json.load(f)
|
|
self.assertEqual({
|
|
'object_reconstruction_last': start + 999,
|
|
'object_reconstruction_time': 1000,
|
|
'object_reconstruction_per_disk': {
|
|
'd0': {
|
|
'object_reconstruction_last': start,
|
|
'object_reconstruction_time': 1,
|
|
'pid': 'pid-0',
|
|
},
|
|
'd1': {
|
|
'object_reconstruction_last': start + 999,
|
|
'object_reconstruction_time': 1000,
|
|
'pid': 'pid-1',
|
|
},
|
|
}
|
|
}, data)
|
|
|
|
def test_worker_logging(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor({
|
|
'reconstructor_workers': 4,
|
|
'recon_cache_path': self.recon_cache_path
|
|
}, logger=self.logger)
|
|
|
|
def log_some_stuff(*a, **kw):
|
|
reconstructor.logger.debug("debug message")
|
|
reconstructor.logger.info("info message")
|
|
reconstructor.logger.warning("warning message")
|
|
reconstructor.logger.error("error message")
|
|
|
|
with mock.patch.object(reconstructor, 'reconstruct',
|
|
log_some_stuff), \
|
|
mock.patch("os.getpid", lambda: 20641):
|
|
reconstructor.get_worker_args()
|
|
reconstructor.run_once(multiprocess_worker_index=1,
|
|
override_devices=['sda', 'sdb'])
|
|
|
|
prefix = "[worker 2/4 pid=20641] "
|
|
for level, lines in self.logger.logger.all_log_lines().items():
|
|
for line in lines:
|
|
self.assertTrue(
|
|
line.startswith(prefix),
|
|
"%r doesn't start with %r (level %s)" % (
|
|
line, prefix, level))
|
|
|
|
|
|
@patch_policies(with_ec_default=True)
|
|
class BaseTestObjectReconstructor(unittest.TestCase):
|
|
def setUp(self):
|
|
skip_if_no_xattrs()
|
|
self.policy = POLICIES.default
|
|
self.policy.object_ring._rtime = time.time() + 3600
|
|
self.testdir = tempfile.mkdtemp()
|
|
self.devices = os.path.join(self.testdir, 'devices')
|
|
self.local_dev = self.policy.object_ring.devs[0]
|
|
self.ip = self.local_dev['replication_ip']
|
|
self.port = self.local_dev['replication_port']
|
|
self.conf = {
|
|
'devices': self.devices,
|
|
'mount_check': False,
|
|
'bind_ip': self.ip,
|
|
'bind_port': self.port,
|
|
}
|
|
self.logger = debug_logger('object-reconstructor')
|
|
self._configure_reconstructor()
|
|
self.policy.object_ring.max_more_nodes = \
|
|
self.policy.object_ring.replicas
|
|
self.ts_iter = make_timestamp_iter()
|
|
self.fabricated_ring = FabricatedRing(replicas=14, devices=28)
|
|
|
|
def _configure_reconstructor(self, **kwargs):
|
|
self.conf.update(kwargs)
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.reconstructor._reset_stats()
|
|
# some tests bypass build_reconstruction_jobs and go to process_job
|
|
# directly, so you end up with a /0 when you try to show the
|
|
# percentage of complete jobs as ratio of the total job count
|
|
self.reconstructor.job_count = 1
|
|
# if we ever let a test through without properly patching the
|
|
# REPLICATE and SSYNC calls - let's fail sort fast-ish
|
|
self.reconstructor.lockup_timeout = 3
|
|
|
|
def tearDown(self):
|
|
self.reconstructor._reset_stats()
|
|
self.reconstructor.stats_line()
|
|
shutil.rmtree(self.testdir)
|
|
|
|
def ts(self):
|
|
return next(self.ts_iter)
|
|
|
|
|
|
class TestObjectReconstructor(BaseTestObjectReconstructor):
|
|
def test_ring_ip_and_bind_ip(self):
|
|
# make clean base_conf
|
|
base_conf = dict(self.conf)
|
|
for key in ('bind_ip', 'ring_ip'):
|
|
base_conf.pop(key, None)
|
|
|
|
# default ring_ip is always 0.0.0.0
|
|
self.conf = base_conf
|
|
self._configure_reconstructor()
|
|
self.assertEqual('0.0.0.0', self.reconstructor.ring_ip)
|
|
|
|
# bind_ip works fine for legacy configs
|
|
self.conf = dict(base_conf)
|
|
self.conf['bind_ip'] = '192.168.1.42'
|
|
self._configure_reconstructor()
|
|
self.assertEqual('192.168.1.42', self.reconstructor.ring_ip)
|
|
|
|
# ring_ip works fine by-itself
|
|
self.conf = dict(base_conf)
|
|
self.conf['ring_ip'] = '192.168.1.43'
|
|
self._configure_reconstructor()
|
|
self.assertEqual('192.168.1.43', self.reconstructor.ring_ip)
|
|
|
|
# if you have both ring_ip wins
|
|
self.conf = dict(base_conf)
|
|
self.conf['bind_ip'] = '192.168.1.44'
|
|
self.conf['ring_ip'] = '192.168.1.45'
|
|
self._configure_reconstructor()
|
|
self.assertEqual('192.168.1.45', self.reconstructor.ring_ip)
|
|
|
|
def test_handoffs_only_default(self):
|
|
# sanity neither option added to default conf
|
|
self.conf.pop('handoffs_first', None)
|
|
self.conf.pop('handoffs_only', None)
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertFalse(self.reconstructor.handoffs_only)
|
|
|
|
def test_handoffs_first_enables_handoffs_only(self):
|
|
self.conf['handoffs_first'] = "True"
|
|
self.conf.pop('handoffs_only', None) # sanity
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertTrue(self.reconstructor.handoffs_only)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
expected = [
|
|
'The handoffs_first option is deprecated in favor '
|
|
'of handoffs_only. This option may be ignored in a '
|
|
'future release.',
|
|
'Handoff only mode is not intended for normal operation, '
|
|
'use handoffs_only with care.',
|
|
]
|
|
self.assertEqual(expected, warnings)
|
|
|
|
def test_handoffs_only_ignores_handoffs_first(self):
|
|
self.conf['handoffs_first'] = "True"
|
|
self.conf['handoffs_only'] = "False"
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertFalse(self.reconstructor.handoffs_only)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
expected = [
|
|
'The handoffs_first option is deprecated in favor of '
|
|
'handoffs_only. This option may be ignored in a future release.',
|
|
'Ignored handoffs_first option in favor of handoffs_only.',
|
|
]
|
|
self.assertEqual(expected, warnings)
|
|
|
|
def test_handoffs_only_enabled(self):
|
|
self.conf.pop('handoffs_first', None) # sanity
|
|
self.conf['handoffs_only'] = "True"
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertTrue(self.reconstructor.handoffs_only)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
expected = [
|
|
'Handoff only mode is not intended for normal operation, '
|
|
'use handoffs_only with care.',
|
|
]
|
|
self.assertEqual(expected, warnings)
|
|
|
|
def test_handoffs_only_true_and_first_true(self):
|
|
self.conf['handoffs_first'] = "True"
|
|
self.conf['handoffs_only'] = "True"
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertTrue(self.reconstructor.handoffs_only)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
expected = [
|
|
'The handoffs_first option is deprecated in favor of '
|
|
'handoffs_only. This option may be ignored in a future release.',
|
|
'Handoff only mode is not intended for normal operation, '
|
|
'use handoffs_only with care.',
|
|
]
|
|
self.assertEqual(expected, warnings)
|
|
|
|
def test_handoffs_only_false_and_first_false(self):
|
|
self.conf['handoffs_only'] = "False"
|
|
self.conf['handoffs_first'] = "False"
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertFalse(self.reconstructor.handoffs_only)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
expected = [
|
|
'The handoffs_first option is deprecated in favor of '
|
|
'handoffs_only. This option may be ignored in a future release.',
|
|
]
|
|
self.assertEqual(expected, warnings)
|
|
|
|
def test_handoffs_only_none_and_first_false(self):
|
|
self.conf['handoffs_first'] = "False"
|
|
self.conf.pop('handoffs_only', None) # sanity
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertFalse(self.reconstructor.handoffs_only)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
expected = [
|
|
'The handoffs_first option is deprecated in favor of '
|
|
'handoffs_only. This option may be ignored in a future release.',
|
|
]
|
|
self.assertEqual(expected, warnings)
|
|
|
|
def test_handoffs_only_false_and_first_none(self):
|
|
self.conf.pop('handoffs_first', None) # sanity
|
|
self.conf['handoffs_only'] = "False"
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertFalse(self.reconstructor.handoffs_only)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
self.assertFalse(warnings)
|
|
|
|
def test_handoffs_only_true_and_first_false(self):
|
|
self.conf['handoffs_first'] = "False"
|
|
self.conf['handoffs_only'] = "True"
|
|
self.reconstructor = object_reconstructor.ObjectReconstructor(
|
|
self.conf, logger=self.logger)
|
|
self.assertTrue(self.reconstructor.handoffs_only)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
expected = [
|
|
'The handoffs_first option is deprecated in favor of '
|
|
'handoffs_only. This option may be ignored in a future release.',
|
|
'Handoff only mode is not intended for normal operation, '
|
|
'use handoffs_only with care.',
|
|
]
|
|
self.assertEqual(expected, warnings)
|
|
|
|
def test_two_ec_policies(self):
|
|
with patch_policies([
|
|
StoragePolicy(0, name='zero', is_deprecated=True),
|
|
ECStoragePolicy(1, name='one', is_default=True,
|
|
ec_type=DEFAULT_TEST_EC_TYPE,
|
|
ec_ndata=4, ec_nparity=3),
|
|
ECStoragePolicy(2, name='two',
|
|
ec_type=DEFAULT_TEST_EC_TYPE,
|
|
ec_ndata=8, ec_nparity=2)],
|
|
fake_ring_args=[
|
|
{}, {'replicas': 7}, {'replicas': 10}]):
|
|
self._configure_reconstructor()
|
|
jobs = []
|
|
|
|
def process_job(job):
|
|
jobs.append(job)
|
|
|
|
self.reconstructor.process_job = process_job
|
|
|
|
os.makedirs(os.path.join(self.devices, 'sda', 'objects-1', '0'))
|
|
self.reconstructor.run_once()
|
|
self.assertEqual(1, len(jobs))
|
|
|
|
def test_collect_parts_skips_non_ec_policy_and_device(self):
|
|
stub_parts = (371, 78, 419, 834)
|
|
for policy in POLICIES:
|
|
datadir = diskfile.get_data_dir(policy)
|
|
for part in stub_parts:
|
|
utils.mkdirs(os.path.join(
|
|
self.devices, self.local_dev['device'],
|
|
datadir, str(part)))
|
|
part_infos = list(self.reconstructor.collect_parts())
|
|
found_parts = sorted(int(p['partition']) for p in part_infos)
|
|
self.assertEqual(found_parts, sorted(stub_parts))
|
|
for part_info in part_infos:
|
|
self.assertEqual(part_info['local_dev'], self.local_dev)
|
|
self.assertEqual(part_info['policy'], self.policy)
|
|
self.assertEqual(part_info['part_path'],
|
|
os.path.join(self.devices,
|
|
self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(part_info['partition'])))
|
|
|
|
def test_collect_parts_skips_non_local_devs_servers_per_port(self):
|
|
self._configure_reconstructor(devices=self.devices, mount_check=False,
|
|
bind_ip=self.ip, bind_port=self.port,
|
|
servers_per_port=2)
|
|
|
|
device_parts = {
|
|
'sda': (374,),
|
|
'sdb': (179, 807), # w/one-serv-per-port, same IP alone is local
|
|
'sdc': (363, 468, 843),
|
|
'sdd': (912,), # "not local" via different IP
|
|
}
|
|
for policy in POLICIES:
|
|
datadir = diskfile.get_data_dir(policy)
|
|
for dev, parts in device_parts.items():
|
|
for part in parts:
|
|
utils.mkdirs(os.path.join(
|
|
self.devices, dev,
|
|
datadir, str(part)))
|
|
|
|
# we're only going to add sda and sdc into the ring
|
|
local_devs = ('sda', 'sdb', 'sdc')
|
|
stub_ring_devs = [{
|
|
'id': i,
|
|
'device': dev,
|
|
'replication_ip': self.ip,
|
|
'replication_port': self.port + 1 if dev == 'sdb' else self.port,
|
|
} for i, dev in enumerate(local_devs)]
|
|
stub_ring_devs.append({
|
|
'id': len(local_devs),
|
|
'device': 'sdd',
|
|
'replication_ip': '127.0.0.88', # not local via IP
|
|
'replication_port': self.port,
|
|
})
|
|
self.reconstructor.ring_ip = '0.0.0.0' # use whataremyips
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs):
|
|
part_infos = list(self.reconstructor.collect_parts())
|
|
found_parts = sorted(int(p['partition']) for p in part_infos)
|
|
expected_parts = sorted(itertools.chain(
|
|
*(device_parts[d] for d in local_devs)))
|
|
self.assertEqual(found_parts, expected_parts)
|
|
for part_info in part_infos:
|
|
self.assertEqual(part_info['policy'], self.policy)
|
|
self.assertIn(part_info['local_dev'], stub_ring_devs)
|
|
dev = part_info['local_dev']
|
|
self.assertEqual(part_info['part_path'],
|
|
os.path.join(self.devices,
|
|
dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(part_info['partition'])))
|
|
|
|
def test_collect_parts_multi_device_skips_non_non_local_devs(self):
|
|
device_parts = {
|
|
'sda': (374,),
|
|
'sdb': (179, 807), # "not local" via different port
|
|
'sdc': (363, 468, 843),
|
|
'sdd': (912,), # "not local" via different IP
|
|
}
|
|
for policy in POLICIES:
|
|
datadir = diskfile.get_data_dir(policy)
|
|
for dev, parts in device_parts.items():
|
|
for part in parts:
|
|
utils.mkdirs(os.path.join(
|
|
self.devices, dev,
|
|
datadir, str(part)))
|
|
|
|
# we're only going to add sda and sdc into the ring
|
|
local_devs = ('sda', 'sdc')
|
|
stub_ring_devs = [{
|
|
'id': i,
|
|
'device': dev,
|
|
'replication_ip': self.ip,
|
|
'replication_port': self.port,
|
|
} for i, dev in enumerate(local_devs)]
|
|
stub_ring_devs.append({
|
|
'id': len(local_devs),
|
|
'device': 'sdb',
|
|
'replication_ip': self.ip,
|
|
'replication_port': self.port + 1, # not local via port
|
|
})
|
|
stub_ring_devs.append({
|
|
'id': len(local_devs) + 1,
|
|
'device': 'sdd',
|
|
'replication_ip': '127.0.0.88', # not local via IP
|
|
'replication_port': self.port,
|
|
})
|
|
self.reconstructor.ring_ip = '0.0.0.0' # use whataremyips
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs):
|
|
part_infos = list(self.reconstructor.collect_parts())
|
|
found_parts = sorted(int(p['partition']) for p in part_infos)
|
|
expected_parts = sorted(itertools.chain(
|
|
*(device_parts[d] for d in local_devs)))
|
|
self.assertEqual(found_parts, expected_parts)
|
|
for part_info in part_infos:
|
|
self.assertEqual(part_info['policy'], self.policy)
|
|
self.assertIn(part_info['local_dev'], stub_ring_devs)
|
|
dev = part_info['local_dev']
|
|
self.assertEqual(part_info['part_path'],
|
|
os.path.join(self.devices,
|
|
dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(part_info['partition'])))
|
|
|
|
def test_collect_parts_multi_device_skips_non_ring_devices(self):
|
|
device_parts = {
|
|
'sda': (374,),
|
|
'sdc': (363, 468, 843),
|
|
}
|
|
for policy in POLICIES:
|
|
datadir = diskfile.get_data_dir(policy)
|
|
for dev, parts in device_parts.items():
|
|
for part in parts:
|
|
utils.mkdirs(os.path.join(
|
|
self.devices, dev,
|
|
datadir, str(part)))
|
|
|
|
# we're only going to add sda and sdc into the ring
|
|
local_devs = ('sda', 'sdc')
|
|
stub_ring_devs = [{
|
|
'id': i,
|
|
'device': dev,
|
|
'replication_ip': self.ip,
|
|
'replication_port': self.port,
|
|
} for i, dev in enumerate(local_devs)]
|
|
self.reconstructor.ring_ip = '0.0.0.0' # use whataremyips
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs):
|
|
part_infos = list(self.reconstructor.collect_parts())
|
|
found_parts = sorted(int(p['partition']) for p in part_infos)
|
|
expected_parts = sorted(itertools.chain(
|
|
*(device_parts[d] for d in local_devs)))
|
|
self.assertEqual(found_parts, expected_parts)
|
|
for part_info in part_infos:
|
|
self.assertEqual(part_info['policy'], self.policy)
|
|
self.assertIn(part_info['local_dev'], stub_ring_devs)
|
|
dev = part_info['local_dev']
|
|
self.assertEqual(part_info['part_path'],
|
|
os.path.join(self.devices,
|
|
dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(part_info['partition'])))
|
|
|
|
def test_collect_parts_mount_check(self):
|
|
# each device has one part in it
|
|
local_devs = ('sda', 'sdb')
|
|
for i, dev in enumerate(local_devs):
|
|
datadir = diskfile.get_data_dir(self.policy)
|
|
utils.mkdirs(os.path.join(
|
|
self.devices, dev, datadir, str(i)))
|
|
stub_ring_devs = [{
|
|
'id': i,
|
|
'device': dev,
|
|
'replication_ip': self.ip,
|
|
'replication_port': self.port
|
|
} for i, dev in enumerate(local_devs)]
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs):
|
|
part_infos = list(self.reconstructor.collect_parts())
|
|
self.assertEqual(2, len(part_infos)) # sanity
|
|
self.assertEqual(set(int(p['partition']) for p in part_infos),
|
|
set([0, 1]))
|
|
|
|
paths = []
|
|
|
|
def fake_check_drive(devices, device, mount_check):
|
|
path = os.path.join(devices, device)
|
|
if (not mount_check) and os.path.isdir(path):
|
|
# while mount_check is false, the test still creates the dirs
|
|
paths.append(path)
|
|
return path
|
|
return None
|
|
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs), \
|
|
mock.patch('swift.obj.diskfile.check_drive',
|
|
fake_check_drive):
|
|
part_infos = list(self.reconstructor.collect_parts())
|
|
self.assertEqual(2, len(part_infos)) # sanity, same jobs
|
|
self.assertEqual(set(int(p['partition']) for p in part_infos),
|
|
set([0, 1]))
|
|
|
|
# ... because fake_check_drive returned paths for both dirs
|
|
self.assertEqual(set(paths), set([
|
|
os.path.join(self.devices, dev) for dev in local_devs]))
|
|
|
|
# ... now with mount check
|
|
self._configure_reconstructor(mount_check=True)
|
|
self.assertTrue(self.reconstructor.mount_check)
|
|
paths = []
|
|
for policy in POLICIES:
|
|
self.assertTrue(self.reconstructor._df_router[policy].mount_check)
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs), \
|
|
mock.patch('swift.obj.diskfile.check_drive',
|
|
fake_check_drive):
|
|
part_infos = list(self.reconstructor.collect_parts())
|
|
self.assertEqual([], part_infos) # sanity, no jobs
|
|
|
|
# ... because fake_check_drive returned False for both paths
|
|
self.assertFalse(paths)
|
|
|
|
def fake_check_drive(devices, device, mount_check):
|
|
self.assertTrue(mount_check)
|
|
if device == 'sda':
|
|
return os.path.join(devices, device)
|
|
else:
|
|
return False
|
|
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs), \
|
|
mock.patch('swift.obj.diskfile.check_drive',
|
|
fake_check_drive):
|
|
part_infos = list(self.reconstructor.collect_parts())
|
|
self.assertEqual(1, len(part_infos)) # only sda picked up (part 0)
|
|
self.assertEqual(part_infos[0]['partition'], 0)
|
|
|
|
def test_collect_parts_cleans_tmp(self):
|
|
local_devs = ('sda', 'sdc')
|
|
stub_ring_devs = [{
|
|
'id': i,
|
|
'device': dev,
|
|
'replication_ip': self.ip,
|
|
'replication_port': self.port
|
|
} for i, dev in enumerate(local_devs)]
|
|
for device in local_devs:
|
|
utils.mkdirs(os.path.join(self.devices, device))
|
|
fake_unlink = mock.MagicMock()
|
|
self._configure_reconstructor(reclaim_age=1000)
|
|
now = time.time()
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch('swift.obj.reconstructor.time.time',
|
|
return_value=now), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs), \
|
|
mock.patch('swift.obj.reconstructor.unlink_older_than',
|
|
fake_unlink):
|
|
self.assertEqual([], list(self.reconstructor.collect_parts()))
|
|
# each local device hash unlink_older_than called on it,
|
|
# with now - self.reclaim_age
|
|
tmpdir = diskfile.get_tmp_dir(self.policy)
|
|
expected = now - 1000
|
|
self.assertEqual(fake_unlink.mock_calls, [
|
|
mock.call(os.path.join(self.devices, dev, tmpdir), expected)
|
|
for dev in local_devs])
|
|
|
|
def test_collect_parts_creates_datadir(self):
|
|
# create just the device path
|
|
dev_path = os.path.join(self.devices, self.local_dev['device'])
|
|
utils.mkdirs(dev_path)
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]):
|
|
self.assertEqual([], list(self.reconstructor.collect_parts()))
|
|
datadir_path = os.path.join(dev_path,
|
|
diskfile.get_data_dir(self.policy))
|
|
self.assertTrue(os.path.exists(datadir_path))
|
|
|
|
def test_collect_parts_creates_datadir_error(self):
|
|
# create just the device path
|
|
datadir_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy))
|
|
utils.mkdirs(os.path.dirname(datadir_path))
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch('swift.obj.reconstructor.mkdirs',
|
|
side_effect=OSError('kaboom!')):
|
|
self.assertEqual([], list(self.reconstructor.collect_parts()))
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(len(error_lines), 1,
|
|
'Expected only one error, got %r' % error_lines)
|
|
line = error_lines[0]
|
|
self.assertIn('Unable to create', line)
|
|
self.assertIn(datadir_path, line)
|
|
|
|
def test_collect_parts_skips_invalid_paths(self):
|
|
datadir_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy))
|
|
utils.mkdirs(os.path.dirname(datadir_path))
|
|
with open(datadir_path, 'w') as f:
|
|
f.write('junk')
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]):
|
|
self.assertEqual([], list(self.reconstructor.collect_parts()))
|
|
self.assertTrue(os.path.exists(datadir_path))
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(len(error_lines), 1,
|
|
'Expected only one error, got %r' % error_lines)
|
|
line = error_lines[0]
|
|
self.assertIn('Unable to list partitions', line)
|
|
self.assertIn(datadir_path, line)
|
|
|
|
def test_reconstruct_removes_non_partition_files(self):
|
|
# create some junk next to partitions
|
|
datadir_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy))
|
|
num_parts = 3
|
|
for part in range(num_parts):
|
|
utils.mkdirs(os.path.join(datadir_path, str(part)))
|
|
|
|
# Add some clearly non-partition dentries
|
|
utils.mkdirs(os.path.join(datadir_path, 'not/a/partition'))
|
|
for junk_name in ('junk', '1234'):
|
|
junk_file = os.path.join(datadir_path, junk_name)
|
|
with open(junk_file, 'w') as f:
|
|
f.write('junk')
|
|
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch('swift.obj.reconstructor.'
|
|
'ObjectReconstructor.process_job'):
|
|
self.reconstructor.reconstruct()
|
|
|
|
# all the bad gets cleaned up
|
|
errors = []
|
|
for junk_name in ('junk', '1234', 'not'):
|
|
junk_file = os.path.join(datadir_path, junk_name)
|
|
if os.path.exists(junk_file):
|
|
errors.append('%s still exists!' % junk_file)
|
|
|
|
self.assertFalse(errors)
|
|
|
|
error_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertIn('Unexpected entity in data dir: %r'
|
|
% os.path.join(datadir_path, 'not'), error_lines)
|
|
self.assertIn('Unexpected entity in data dir: %r'
|
|
% os.path.join(datadir_path, 'junk'), error_lines)
|
|
self.assertIn('Unexpected entity %r is not a directory'
|
|
% os.path.join(datadir_path, '1234'), error_lines)
|
|
self.assertEqual(self.reconstructor.reconstruction_part_count, 6)
|
|
|
|
def test_collect_parts_overrides(self):
|
|
# setup multiple devices, with multiple parts
|
|
device_parts = {
|
|
'sda': (374, 843),
|
|
'sdb': (179, 807),
|
|
'sdc': (363, 468, 843),
|
|
}
|
|
datadir = diskfile.get_data_dir(self.policy)
|
|
for dev, parts in device_parts.items():
|
|
for part in parts:
|
|
utils.mkdirs(os.path.join(
|
|
self.devices, dev,
|
|
datadir, str(part)))
|
|
|
|
# we're only going to add sda and sdc into the ring
|
|
local_devs = ('sda', 'sdc')
|
|
stub_ring_devs = [{
|
|
'id': i,
|
|
'device': dev,
|
|
'replication_ip': self.ip,
|
|
'replication_port': self.port
|
|
} for i, dev in enumerate(local_devs)]
|
|
|
|
expected = (
|
|
({}, [
|
|
('sda', 374),
|
|
('sda', 843),
|
|
('sdc', 363),
|
|
('sdc', 468),
|
|
('sdc', 843),
|
|
]),
|
|
({'override_devices': ['sda', 'sdc']}, [
|
|
('sda', 374),
|
|
('sda', 843),
|
|
('sdc', 363),
|
|
('sdc', 468),
|
|
('sdc', 843),
|
|
]),
|
|
({'override_devices': ['sdc']}, [
|
|
('sdc', 363),
|
|
('sdc', 468),
|
|
('sdc', 843),
|
|
]),
|
|
({'override_devices': ['sda']}, [
|
|
('sda', 374),
|
|
('sda', 843),
|
|
]),
|
|
({'override_devices': ['sdx']}, []),
|
|
({'override_partitions': [374]}, [
|
|
('sda', 374),
|
|
]),
|
|
({'override_partitions': [843]}, [
|
|
('sda', 843),
|
|
('sdc', 843),
|
|
]),
|
|
({'override_partitions': [843], 'override_devices': ['sda']}, [
|
|
('sda', 843),
|
|
]),
|
|
)
|
|
with mock.patch('swift.obj.reconstructor.whataremyips',
|
|
return_value=[self.ip]), \
|
|
mock.patch.object(self.policy.object_ring, '_devs',
|
|
new=stub_ring_devs):
|
|
for kwargs, expected_parts in expected:
|
|
part_infos = list(self.reconstructor.collect_parts(**kwargs))
|
|
expected_paths = set(
|
|
os.path.join(self.devices, dev, datadir, str(part))
|
|
for dev, part in expected_parts)
|
|
found_paths = set(p['part_path'] for p in part_infos)
|
|
msg = 'expected %r != %r for %r' % (
|
|
expected_paths, found_paths, kwargs)
|
|
self.assertEqual(expected_paths, found_paths, msg)
|
|
|
|
def test_build_jobs_creates_empty_hashes(self):
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy), '0')
|
|
utils.mkdirs(part_path)
|
|
part_info = {
|
|
'local_dev': self.local_dev,
|
|
'policy': self.policy,
|
|
'partition': 0,
|
|
'part_path': part_path,
|
|
}
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertEqual(1, len(jobs))
|
|
job = jobs[0]
|
|
self.assertEqual(job['job_type'], object_reconstructor.SYNC)
|
|
self.assertEqual(job['frag_index'], 0)
|
|
self.assertEqual(job['suffixes'], [])
|
|
self.assertEqual(len(job['sync_to']), 3)
|
|
self.assertEqual(job['partition'], 0)
|
|
self.assertEqual(job['path'], part_path)
|
|
self.assertEqual(job['hashes'], {})
|
|
self.assertEqual(job['policy'], self.policy)
|
|
self.assertEqual(job['local_dev'], self.local_dev)
|
|
self.assertEqual(job['device'], self.local_dev['device'])
|
|
hashes_file = os.path.join(part_path,
|
|
diskfile.HASH_FILE)
|
|
self.assertTrue(os.path.exists(hashes_file))
|
|
suffixes = self.reconstructor._get_hashes(
|
|
self.local_dev['device'], 0, self.policy, do_listdir=True)
|
|
self.assertEqual(suffixes, {})
|
|
|
|
def test_build_jobs_no_hashes(self):
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy), '0')
|
|
part_info = {
|
|
'local_dev': self.local_dev,
|
|
'policy': self.policy,
|
|
'partition': 0,
|
|
'part_path': part_path,
|
|
}
|
|
stub_hashes = {}
|
|
with mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)):
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertEqual(1, len(jobs))
|
|
job = jobs[0]
|
|
self.assertEqual(job['job_type'], object_reconstructor.SYNC)
|
|
self.assertEqual(job['frag_index'], 0)
|
|
self.assertEqual(job['suffixes'], [])
|
|
self.assertEqual(len(job['sync_to']), 3)
|
|
self.assertEqual(job['partition'], 0)
|
|
self.assertEqual(job['path'], part_path)
|
|
self.assertEqual(job['hashes'], {})
|
|
self.assertEqual(job['policy'], self.policy)
|
|
self.assertEqual(job['local_dev'], self.local_dev)
|
|
self.assertEqual(job['device'], self.local_dev['device'])
|
|
|
|
def test_build_jobs_primary(self):
|
|
ring = self.policy.object_ring = self.fabricated_ring
|
|
# find a partition for which we're a primary
|
|
for partition in range(2 ** ring.part_power):
|
|
part_nodes = ring.get_part_nodes(partition)
|
|
try:
|
|
frag_index = [n['id'] for n in part_nodes].index(
|
|
self.local_dev['id'])
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
break
|
|
else:
|
|
self.fail("the ring doesn't work: %r" % ring._replica2part2dev_id)
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
part_info = {
|
|
'local_dev': self.local_dev,
|
|
'policy': self.policy,
|
|
'partition': partition,
|
|
'part_path': part_path,
|
|
}
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
with mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)):
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertEqual(1, len(jobs))
|
|
job = jobs[0]
|
|
self.assertEqual(job['job_type'], object_reconstructor.SYNC)
|
|
self.assertEqual(job['frag_index'], frag_index)
|
|
self.assertEqual(job['suffixes'], list(stub_hashes.keys()))
|
|
self.assertEqual(set([n['index'] for n in job['sync_to']]),
|
|
set([(frag_index + 1) % ring.replicas,
|
|
(frag_index - 1) % ring.replicas,
|
|
(frag_index + int(0.5 * ring.replicas)),
|
|
]))
|
|
self.assertEqual(job['partition'], partition)
|
|
self.assertEqual(job['path'], part_path)
|
|
self.assertEqual(job['hashes'], stub_hashes)
|
|
self.assertEqual(job['policy'], self.policy)
|
|
self.assertEqual(job['local_dev'], self.local_dev)
|
|
self.assertEqual(job['device'], self.local_dev['device'])
|
|
|
|
def test_build_jobs_handoff(self):
|
|
ring = self.policy.object_ring = self.fabricated_ring
|
|
# find a partition for which we're a handoff
|
|
for partition in range(2 ** ring.part_power):
|
|
part_nodes = ring.get_part_nodes(partition)
|
|
if self.local_dev['id'] not in [n['id'] for n in part_nodes]:
|
|
break
|
|
else:
|
|
self.fail("the ring doesn't work: %r" % ring._replica2part2dev_id)
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
part_info = {
|
|
'local_dev': self.local_dev,
|
|
'policy': self.policy,
|
|
'partition': partition,
|
|
'part_path': part_path,
|
|
}
|
|
# since this part doesn't belong on us it doesn't matter what
|
|
# frag_index we have
|
|
frag_index = random.randint(0, self.policy.ec_n_unique_fragments - 1)
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {None: 'hash'},
|
|
}
|
|
with mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)):
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertEqual(1, len(jobs), 'Expected only one job, got %r' % jobs)
|
|
job = jobs[0]
|
|
self.assertEqual(job['job_type'], object_reconstructor.REVERT)
|
|
self.assertEqual(job['frag_index'], frag_index)
|
|
self.assertEqual(sorted(job['suffixes']), sorted(stub_hashes.keys()))
|
|
self.assertEqual(
|
|
self.policy.ec_duplication_factor, len(job['sync_to']))
|
|
# the sync_to node should be different each other
|
|
node_ids = set([node['id'] for node in job['sync_to']])
|
|
self.assertEqual(len(node_ids),
|
|
self.policy.ec_duplication_factor)
|
|
# but all the nodes have same backend index to sync
|
|
node_indexes = set(
|
|
self.policy.get_backend_index(node['index'])
|
|
for node in job['sync_to'])
|
|
self.assertEqual(1, len(node_indexes))
|
|
self.assertEqual(job['sync_to'][0]['index'], frag_index)
|
|
self.assertEqual(job['path'], part_path)
|
|
self.assertEqual(job['partition'], partition)
|
|
self.assertEqual(sorted(job['hashes']), sorted(stub_hashes))
|
|
self.assertEqual(job['local_dev'], self.local_dev)
|
|
|
|
def test_build_jobs_mixed(self):
|
|
ring = self.policy.object_ring = self.fabricated_ring
|
|
# find a partition for which we're a primary
|
|
for partition in range(2 ** ring.part_power):
|
|
part_nodes = ring.get_part_nodes(partition)
|
|
try:
|
|
node_index = [n['id'] for n in part_nodes].index(
|
|
self.local_dev['id'])
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
break
|
|
else:
|
|
self.fail("the ring doesn't work: %r" % ring._replica2part2dev_id)
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
part_info = {
|
|
'local_dev': self.local_dev,
|
|
'policy': self.policy,
|
|
'partition': partition,
|
|
'part_path': part_path,
|
|
}
|
|
frag_index = self.policy.get_backend_index(node_index)
|
|
other_frag_index = random.choice(
|
|
[f for f in range(self.policy.ec_n_unique_fragments)
|
|
if f != node_index])
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'456': {other_frag_index: 'hash', None: 'hash'},
|
|
'abc': {None: 'hash'},
|
|
}
|
|
with mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)):
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertEqual(2, len(jobs))
|
|
sync_jobs, revert_jobs = [], []
|
|
for job in jobs:
|
|
self.assertEqual(job['partition'], partition)
|
|
self.assertEqual(job['path'], part_path)
|
|
self.assertEqual(sorted(job['hashes']), sorted(stub_hashes))
|
|
self.assertEqual(job['policy'], self.policy)
|
|
self.assertEqual(job['local_dev'], self.local_dev)
|
|
self.assertEqual(job['device'], self.local_dev['device'])
|
|
{
|
|
object_reconstructor.SYNC: sync_jobs,
|
|
object_reconstructor.REVERT: revert_jobs,
|
|
}[job['job_type']].append(job)
|
|
self.assertEqual(1, len(sync_jobs))
|
|
job = sync_jobs[0]
|
|
self.assertEqual(job['frag_index'], frag_index)
|
|
self.assertEqual(sorted(job['suffixes']), sorted(['123', 'abc']))
|
|
self.assertEqual(len(job['sync_to']), 3)
|
|
self.assertEqual(set([n['index'] for n in job['sync_to']]),
|
|
set([(frag_index + 1) % ring.replicas,
|
|
(frag_index - 1) % ring.replicas,
|
|
(frag_index + int(0.5 * ring.replicas)),
|
|
]))
|
|
self.assertEqual(1, len(revert_jobs))
|
|
job = revert_jobs[0]
|
|
self.assertEqual(job['frag_index'], other_frag_index)
|
|
self.assertEqual(job['suffixes'], ['456'])
|
|
self.assertEqual(len(job['sync_to']),
|
|
self.policy.ec_duplication_factor)
|
|
self.assertEqual(job['sync_to'][0]['index'], other_frag_index)
|
|
|
|
def test_build_jobs_revert_only_tombstones(self):
|
|
ring = self.policy.object_ring = self.fabricated_ring
|
|
# find a partition for which we're a handoff
|
|
for partition in range(2 ** ring.part_power):
|
|
part_nodes = ring.get_part_nodes(partition)
|
|
if self.local_dev['id'] not in [n['id'] for n in part_nodes]:
|
|
break
|
|
else:
|
|
self.fail("the ring doesn't work: %r" % ring._replica2part2dev_id)
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
part_info = {
|
|
'local_dev': self.local_dev,
|
|
'policy': self.policy,
|
|
'partition': partition,
|
|
'part_path': part_path,
|
|
}
|
|
# we have no fragment index to hint the jobs where they belong
|
|
stub_hashes = {
|
|
'123': {None: 'hash'},
|
|
'abc': {None: 'hash'},
|
|
}
|
|
with mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)):
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertEqual(len(jobs), 1, 'Expected only one job, got %r' % jobs)
|
|
job = jobs[0]
|
|
expected = {
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': None,
|
|
'suffixes': list(stub_hashes.keys()),
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
}
|
|
self.assertEqual(ring.replica_count, len(part_nodes))
|
|
expected_samples = (
|
|
(self.policy.ec_n_unique_fragments *
|
|
self.policy.ec_duplication_factor) -
|
|
self.policy.ec_ndata + 1)
|
|
self.assertEqual(len(job['sync_to']), expected_samples)
|
|
for k, v in expected.items():
|
|
msg = 'expected %s != %s for %s' % (
|
|
v, job[k], k)
|
|
self.assertEqual(v, job[k], msg)
|
|
|
|
def test_get_suffixes_to_sync(self):
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy), '1')
|
|
utils.mkdirs(part_path)
|
|
part_info = {
|
|
'local_dev': self.local_dev,
|
|
'policy': self.policy,
|
|
'partition': 1,
|
|
'part_path': part_path,
|
|
}
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertEqual(1, len(jobs))
|
|
job = jobs[0]
|
|
node = job['sync_to'][0]
|
|
# process_job used to try and modify the instance base headers
|
|
self.reconstructor.headers['X-Backend-Storage-Policy-Index'] = \
|
|
int(POLICIES[1])
|
|
# ... which doesn't work out under concurrency with multiple policies
|
|
self.assertNotEqual(
|
|
self.reconstructor.headers['X-Backend-Storage-Policy-Index'],
|
|
int(job['policy']))
|
|
with mocked_http_conn(200, body=pickle.dumps({})) as request_log:
|
|
suffixes, new_node = self.reconstructor._get_suffixes_to_sync(
|
|
job, node)
|
|
self.assertEqual([int(job['policy'])], [
|
|
r['headers']['X-Backend-Storage-Policy-Index']
|
|
for r in request_log.requests])
|
|
self.assertEqual(suffixes, [])
|
|
self.assertEqual(new_node, node)
|
|
|
|
def test_get_suffixes_in_sync(self):
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy), '1')
|
|
utils.mkdirs(part_path)
|
|
part_info = {
|
|
'local_dev': self.local_dev,
|
|
'policy': self.policy,
|
|
'partition': 1,
|
|
'part_path': part_path,
|
|
}
|
|
jobs = self.reconstructor.build_reconstruction_jobs(part_info)
|
|
self.assertEqual(1, len(jobs))
|
|
job = jobs[0]
|
|
node = job['sync_to'][0]
|
|
local_hashes = {
|
|
'123': {job['frag_index']: 'hash', None: 'hash'},
|
|
'abc': {job['frag_index']: 'hash', None: 'hash'},
|
|
}
|
|
self.assertEqual(node['index'], self.policy.object_ring.replicas - 1)
|
|
remote_index = self.policy.get_backend_index(node['index'])
|
|
remote_hashes = {
|
|
'123': {remote_index: 'hash', None: 'hash'},
|
|
'abc': {remote_index: 'hash', None: 'hash'},
|
|
}
|
|
remote_response = pickle.dumps(remote_hashes)
|
|
with mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, local_hashes)), \
|
|
mocked_http_conn(200, body=remote_response) as request_log:
|
|
suffixes, new_node = self.reconstructor._get_suffixes_to_sync(
|
|
job, node)
|
|
self.assertEqual([node['replication_ip']],
|
|
[r['ip'] for r in request_log.requests])
|
|
self.assertEqual(suffixes, [])
|
|
self.assertEqual(new_node, node)
|
|
|
|
def test_get_suffix_delta(self):
|
|
# different
|
|
local_suff = {'123': {None: 'abc', 0: 'def'}}
|
|
remote_suff = {'456': {None: 'ghi', 0: 'jkl'}}
|
|
local_index = 0
|
|
remote_index = 0
|
|
suffs = self.reconstructor.get_suffix_delta(local_suff,
|
|
local_index,
|
|
remote_suff,
|
|
remote_index)
|
|
self.assertEqual(suffs, ['123'])
|
|
|
|
# now the same
|
|
remote_suff = {'123': {None: 'abc', 0: 'def'}}
|
|
suffs = self.reconstructor.get_suffix_delta(local_suff,
|
|
local_index,
|
|
remote_suff,
|
|
remote_index)
|
|
self.assertEqual(suffs, [])
|
|
|
|
# now with a mis-matched None key (missing durable)
|
|
remote_suff = {'123': {None: 'ghi', 0: 'def'}}
|
|
suffs = self.reconstructor.get_suffix_delta(local_suff,
|
|
local_index,
|
|
remote_suff,
|
|
remote_index)
|
|
self.assertEqual(suffs, ['123'])
|
|
|
|
# now with bogus local index
|
|
local_suff = {'123': {None: 'abc', 99: 'def'}}
|
|
remote_suff = {'456': {None: 'ghi', 0: 'jkl'}}
|
|
suffs = self.reconstructor.get_suffix_delta(local_suff,
|
|
local_index,
|
|
remote_suff,
|
|
remote_index)
|
|
self.assertEqual(suffs, ['123'])
|
|
|
|
def test_process_job_primary_in_sync(self):
|
|
partition = 0
|
|
part_nodes = self.policy.object_ring.get_part_nodes(partition)
|
|
local_dev = random.choice(part_nodes)
|
|
frag_index = self.policy.get_backend_index(local_dev['index'])
|
|
sync_to = object_reconstructor._get_partners(
|
|
local_dev['index'], part_nodes)
|
|
# setup left, right and far hashes
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
left_frag_index = self.policy.get_backend_index(sync_to[0]['index'])
|
|
left_hashes = {
|
|
'123': {left_frag_index: 'hash', None: 'hash'},
|
|
'abc': {left_frag_index: 'hash', None: 'hash'},
|
|
}
|
|
right_frag_index = self.policy.get_backend_index(sync_to[1]['index'])
|
|
right_hashes = {
|
|
'123': {right_frag_index: 'hash', None: 'hash'},
|
|
'abc': {right_frag_index: 'hash', None: 'hash'},
|
|
}
|
|
far_index = self.policy.get_backend_index(sync_to[2]['index'])
|
|
far_hashes = {
|
|
'123': {far_index: 'hash', None: 'hash'},
|
|
'abc': {far_index: 'hash', None: 'hash'},
|
|
}
|
|
partition = 0
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
job = {
|
|
'job_type': object_reconstructor.SYNC,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
}
|
|
|
|
responses = [(200, pickle.dumps(hashes)) for hashes in (
|
|
left_hashes, right_hashes, far_hashes)]
|
|
codes, body_iter = zip(*responses)
|
|
|
|
ssync_calls = []
|
|
|
|
with mock_ssync_sender(ssync_calls), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)), \
|
|
mocked_http_conn(*codes, body_iter=body_iter) as request_log:
|
|
self.reconstructor.process_job(job)
|
|
|
|
expected_suffix_calls = [
|
|
(sync_to[0]['ip'], '/%s/0' % sync_to[0]['device']),
|
|
(sync_to[1]['ip'], '/%s/0' % sync_to[1]['device']),
|
|
(sync_to[2]['ip'], '/%s/0' % sync_to[2]['device']),
|
|
]
|
|
self.assertEqual(expected_suffix_calls,
|
|
[(r['ip'], r['path']) for r in request_log.requests])
|
|
|
|
self.assertFalse(ssync_calls)
|
|
|
|
def test_process_job_primary_not_in_sync(self):
|
|
partition = 0
|
|
part_nodes = self.policy.object_ring.get_part_nodes(partition)
|
|
local_dev = random.choice(part_nodes)
|
|
frag_index = self.policy.get_backend_index(local_dev['index'])
|
|
sync_to = object_reconstructor._get_partners(
|
|
local_dev['index'], part_nodes)
|
|
# setup left and right hashes
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
left_hashes = {}
|
|
right_hashes = {}
|
|
far_hashes = {}
|
|
|
|
partition = 0
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
job = {
|
|
'job_type': object_reconstructor.SYNC,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
}
|
|
|
|
responses = []
|
|
for hashes in (left_hashes, right_hashes, far_hashes):
|
|
responses.append((200, pickle.dumps(hashes)))
|
|
codes, body_iter = zip(*responses)
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)), \
|
|
mocked_http_conn(*codes, body_iter=body_iter) as request_log:
|
|
self.reconstructor.process_job(job)
|
|
|
|
expected_suffix_calls = [
|
|
(sync_to[0]['ip'], '/%s/0' % sync_to[0]['device']),
|
|
(sync_to[1]['ip'], '/%s/0' % sync_to[1]['device']),
|
|
(sync_to[2]['ip'], '/%s/0' % sync_to[2]['device']),
|
|
]
|
|
self.assertEqual(expected_suffix_calls,
|
|
[(r['ip'], r['path']) for r in request_log.requests])
|
|
|
|
expected_ssync_calls = sorted([
|
|
(sync_to[0]['ip'], 0, set(['123', 'abc']), False),
|
|
(sync_to[1]['ip'], 0, set(['123', 'abc']), False),
|
|
(sync_to[2]['ip'], 0, set(['123', 'abc']), False),
|
|
])
|
|
self.assertEqual(expected_ssync_calls, sorted((
|
|
c['node']['ip'],
|
|
c['job']['partition'],
|
|
set(c['suffixes']),
|
|
c.get('include_non_durable'),
|
|
) for c in ssync_calls))
|
|
|
|
def test_sync_duplicates_to_remote_region(self):
|
|
partition = 0
|
|
part_nodes = self.policy.object_ring.get_part_nodes(partition)
|
|
# in the non-duplicate case we just pick a random node
|
|
local_dev = random.choice(part_nodes[-14:])
|
|
frag_index = self.policy.get_backend_index(local_dev['index'])
|
|
sync_to = object_reconstructor._get_partners(
|
|
local_dev['index'], part_nodes)
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
# setup left and right hashes
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
# left hand side is in sync
|
|
left_frag_index = self.policy.get_backend_index(sync_to[0]['index'])
|
|
left_hashes = {
|
|
'123': {left_frag_index: 'hash', None: 'hash'},
|
|
'abc': {left_frag_index: 'hash', None: 'hash'},
|
|
}
|
|
# right hand side needs sync
|
|
right_frag_index = self.policy.get_backend_index(sync_to[1]['index'])
|
|
right_hashes = {
|
|
'123': {right_frag_index: 'hash', None: 'hash'},
|
|
'abc': {right_frag_index: 'hashX', None: 'hash'},
|
|
}
|
|
far_index = self.policy.get_backend_index(sync_to[2]['index'])
|
|
far_hashes = {
|
|
'123': {far_index: 'hash', None: 'hash'},
|
|
'abc': {far_index: 'hash', None: 'hash'},
|
|
}
|
|
|
|
job = {
|
|
'job_type': object_reconstructor.SYNC,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
}
|
|
|
|
responses = [
|
|
(200, pickle.dumps(left_hashes)),
|
|
(200, pickle.dumps(right_hashes)),
|
|
(200, pickle.dumps(far_hashes)),
|
|
]
|
|
codes, body_iter = zip(*responses)
|
|
|
|
# we're going to dip our mocks into the ssync layer a bit
|
|
ssync_resp = mock.MagicMock()
|
|
ssync_resp.status = 200
|
|
ssync_resp.readline.side_effect = [
|
|
b':MISSING_CHECK: START',
|
|
b':MISSING_CHECK: END',
|
|
b':UPDATES: START',
|
|
b':UPDATES: END',
|
|
]
|
|
|
|
ssync_headers = []
|
|
|
|
def capture_headers(name, value):
|
|
ssync_headers.append((name, value))
|
|
|
|
ssync_conn = mock.MagicMock()
|
|
ssync_conn.getresponse.return_value = ssync_resp
|
|
ssync_conn.putheader = capture_headers
|
|
|
|
with mock.patch('swift.obj.ssync_sender.SsyncBufferedHTTPConnection',
|
|
return_value=ssync_conn), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager.yield_hashes',
|
|
return_value=iter([])), \
|
|
mocked_http_conn(*codes, body_iter=body_iter):
|
|
self.reconstructor.process_job(job)
|
|
|
|
# ... to make sure it sets up our headers correctly
|
|
self.assertEqual(ssync_headers, [
|
|
('Transfer-Encoding', 'chunked'),
|
|
('X-Backend-Storage-Policy-Index', 0),
|
|
('X-Backend-Ssync-Frag-Index', right_frag_index),
|
|
# we include this for backwards compat
|
|
('X-Backend-Ssync-Node-Index', right_frag_index),
|
|
])
|
|
|
|
def test_process_job_sync_missing_durable(self):
|
|
partition = 0
|
|
part_nodes = self.policy.object_ring.get_part_nodes(partition)
|
|
local_dev = random.choice(part_nodes)
|
|
frag_index = self.policy.get_backend_index(local_dev['index'])
|
|
sync_to = object_reconstructor._get_partners(
|
|
local_dev['index'], part_nodes)
|
|
# setup left and right hashes
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
# left hand side is in sync
|
|
left_frag_index = self.policy.get_backend_index(sync_to[0]['index'])
|
|
left_hashes = {
|
|
'123': {left_frag_index: 'hash', None: 'hash'},
|
|
'abc': {left_frag_index: 'hash', None: 'hash'},
|
|
}
|
|
# right hand side has fragment, but no durable (None key is whack)
|
|
right_frag_index = self.policy.get_backend_index(sync_to[1]['index'])
|
|
right_hashes = {
|
|
'123': {right_frag_index: 'hash', None: 'hash'},
|
|
'abc': {right_frag_index: 'hash',
|
|
None: 'different-because-durable'},
|
|
}
|
|
# far side is in sync
|
|
far_index = self.policy.get_backend_index(sync_to[2]['index'])
|
|
far_hashes = {
|
|
'123': {far_index: 'hash', None: 'hash'},
|
|
'abc': {far_index: 'hash', None: 'hash'},
|
|
}
|
|
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
job = {
|
|
'job_type': object_reconstructor.SYNC,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
}
|
|
|
|
responses = [(200, pickle.dumps(hashes)) for hashes in (
|
|
left_hashes, right_hashes, far_hashes)]
|
|
codes, body_iter = zip(*responses)
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)), \
|
|
mocked_http_conn(*codes, body_iter=body_iter) as request_log:
|
|
self.reconstructor.process_job(job)
|
|
|
|
expected_suffix_calls = set([
|
|
(sync_to[0]['ip'], '/%s/0' % sync_to[0]['device']),
|
|
(sync_to[1]['ip'], '/%s/0' % sync_to[1]['device']),
|
|
(sync_to[2]['ip'], '/%s/0' % sync_to[2]['device']),
|
|
])
|
|
self.assertEqual(expected_suffix_calls,
|
|
set((r['ip'], r['path'])
|
|
for r in request_log.requests))
|
|
|
|
expected_ssync_calls = sorted([
|
|
(sync_to[1]['ip'], 0, ['abc'], False),
|
|
])
|
|
self.assertEqual(expected_ssync_calls, sorted((
|
|
c['node']['ip'],
|
|
c['job']['partition'],
|
|
c['suffixes'],
|
|
c.get('include_non_durable')
|
|
) for c in ssync_calls))
|
|
|
|
def test_process_job_primary_some_in_sync(self):
|
|
partition = 0
|
|
part_nodes = self.policy.object_ring.get_part_nodes(partition)
|
|
local_dev = random.choice(part_nodes)
|
|
frag_index = self.policy.get_backend_index(local_dev['index'])
|
|
sync_to = object_reconstructor._get_partners(
|
|
local_dev['index'], part_nodes)
|
|
# setup left and right hashes
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
left_frag_index = self.policy.get_backend_index(sync_to[0]['index'])
|
|
left_hashes = {
|
|
'123': {left_frag_index: 'hashX', None: 'hash'},
|
|
'abc': {left_frag_index: 'hash', None: 'hash'},
|
|
}
|
|
right_frag_index = self.policy.get_backend_index(sync_to[1]['index'])
|
|
right_hashes = {
|
|
'123': {right_frag_index: 'hash', None: 'hash'},
|
|
}
|
|
far_index = self.policy.get_backend_index(sync_to[2]['index'])
|
|
far_hashes = {
|
|
'abc': {far_index: 'hashX', None: 'hash'},
|
|
}
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
job = {
|
|
'job_type': object_reconstructor.SYNC,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
}
|
|
|
|
responses = []
|
|
for hashes in (left_hashes, right_hashes, far_hashes):
|
|
responses.append((200, pickle.dumps(hashes)))
|
|
codes, body_iter = zip(*responses)
|
|
|
|
ssync_calls = []
|
|
|
|
with mock_ssync_sender(ssync_calls), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)), \
|
|
mocked_http_conn(*codes, body_iter=body_iter) as request_log:
|
|
self.reconstructor.process_job(job)
|
|
|
|
expected_suffix_calls = set([
|
|
(sync_to[0]['ip'], '/%s/0' % sync_to[0]['device']),
|
|
(sync_to[1]['ip'], '/%s/0' % sync_to[1]['device']),
|
|
(sync_to[2]['ip'], '/%s/0' % sync_to[2]['device']),
|
|
])
|
|
self.assertEqual(expected_suffix_calls,
|
|
set((r['ip'], r['path'])
|
|
for r in request_log.requests))
|
|
|
|
self.assertEqual(
|
|
dict(collections.Counter(
|
|
(c['node']['index'], tuple(sorted(c['suffixes'])),
|
|
c.get('include_non_durable'))
|
|
for c in ssync_calls)),
|
|
{(sync_to[0]['index'], ('123',), False): 1,
|
|
(sync_to[1]['index'], ('abc',), False): 1,
|
|
(sync_to[2]['index'], ('123', 'abc'), False): 1,
|
|
})
|
|
|
|
def test_process_job_primary_down(self):
|
|
partition = 0
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
|
|
part_nodes = self.policy.object_ring.get_part_nodes(partition)
|
|
sync_to = part_nodes[:3]
|
|
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
job = {
|
|
'job_type': object_reconstructor.SYNC,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'device': self.local_dev['device'],
|
|
'local_dev': self.local_dev,
|
|
}
|
|
|
|
non_local = {'called': 0}
|
|
|
|
def ssync_response_callback(*args):
|
|
# in this test, ssync fails on the first (primary sync_to) node
|
|
if non_local['called'] >= 1:
|
|
return True, {}
|
|
non_local['called'] += 1
|
|
return False, {}
|
|
|
|
expected_suffix_calls = set()
|
|
for node in part_nodes[:3]:
|
|
expected_suffix_calls.update([
|
|
(node['replication_ip'], '/%s/0' % node['device']),
|
|
])
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls,
|
|
response_callback=ssync_response_callback), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)), \
|
|
mocked_http_conn(*[200] * len(expected_suffix_calls),
|
|
body=pickle.dumps({})) as request_log:
|
|
self.reconstructor.process_job(job)
|
|
|
|
found_suffix_calls = set((r['ip'], r['path'])
|
|
for r in request_log.requests)
|
|
self.assertEqual(expected_suffix_calls, found_suffix_calls)
|
|
|
|
expected_ssync_calls = sorted([
|
|
('10.0.0.0', 0, set(['123', 'abc']), False),
|
|
('10.0.0.1', 0, set(['123', 'abc']), False),
|
|
('10.0.0.2', 0, set(['123', 'abc']), False),
|
|
])
|
|
found_ssync_calls = sorted((
|
|
c['node']['ip'],
|
|
c['job']['partition'],
|
|
set(c['suffixes']),
|
|
c.get('include_non_durable')
|
|
) for c in ssync_calls)
|
|
self.assertEqual(expected_ssync_calls, found_ssync_calls)
|
|
|
|
def test_process_job_suffix_call_errors(self):
|
|
partition = 0
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
|
|
part_nodes = self.policy.object_ring.get_part_nodes(partition)
|
|
sync_to = part_nodes[:2]
|
|
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
job = {
|
|
'job_type': object_reconstructor.SYNC,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'device': self.local_dev['device'],
|
|
'local_dev': self.local_dev,
|
|
}
|
|
|
|
expected_suffix_calls = set((
|
|
node['replication_ip'], '/%s/0' % node['device']
|
|
) for node in sync_to)
|
|
|
|
possible_errors = [404, Timeout(), Exception('kaboom!')]
|
|
codes = [random.choice(possible_errors)
|
|
for r in expected_suffix_calls]
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)), \
|
|
mocked_http_conn(*codes) as request_log:
|
|
self.reconstructor.process_job(job)
|
|
|
|
found_suffix_calls = set((r['ip'], r['path'])
|
|
for r in request_log.requests)
|
|
self.assertEqual(expected_suffix_calls, found_suffix_calls)
|
|
|
|
self.assertFalse(ssync_calls)
|
|
|
|
def test_process_job_sync_partner_unmounted(self):
|
|
partition = 0
|
|
part_nodes = self.policy.object_ring.get_part_nodes(partition)
|
|
frag_index = [n['id'] for n in part_nodes].index(self.local_dev['id'])
|
|
sync_to = object_reconstructor._get_partners(frag_index, part_nodes)
|
|
self.assertEqual(3, len(sync_to))
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
# left partner out of sync
|
|
left_frag_index = self.policy.get_backend_index(sync_to[0]['index'])
|
|
left_hashes = {
|
|
'123': {left_frag_index: 'not-in-sync-hash', None: 'hash'},
|
|
'abc': {left_frag_index: 'hash', None: 'hash'},
|
|
}
|
|
# we don't need right partner hashes
|
|
# far partner in sync
|
|
far_index = self.policy.get_backend_index(sync_to[2]['index'])
|
|
far_hashes = {
|
|
'123': {far_index: 'hash', None: 'hash'},
|
|
'abc': {far_index: 'hash', None: 'hash'},
|
|
}
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
job = {
|
|
'job_type': object_reconstructor.SYNC,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'device': self.local_dev['device'],
|
|
'local_dev': self.local_dev,
|
|
}
|
|
|
|
responses = [
|
|
(200, pickle.dumps(left_hashes)), # hashes left partner
|
|
(507, ''), # unmounted right partner
|
|
(200, pickle.dumps({})), # hashes handoff
|
|
(200, pickle.dumps(far_hashes)), # hashes far partner
|
|
]
|
|
codes, body_iter = zip(*responses)
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)), \
|
|
mocked_http_conn(*codes, body_iter=body_iter) as request_log:
|
|
self.reconstructor.process_job(job)
|
|
# increment frag_index since we're rebuilding to our right
|
|
frag_index = (frag_index + 1) % self.policy.ec_n_unique_fragments
|
|
handoffs = self.policy.object_ring.get_more_nodes(partition)
|
|
for i, handoff in enumerate(handoffs):
|
|
if i == frag_index:
|
|
break
|
|
else:
|
|
self.fail('Unable to find handoff?!')
|
|
expected = collections.Counter([
|
|
(200, sync_to[0]['ip']),
|
|
(507, sync_to[1]['ip']),
|
|
(200, handoff['ip']),
|
|
(200, sync_to[2]['ip']),
|
|
])
|
|
self.assertEqual(expected, collections.Counter(
|
|
[(c, r['ip']) for c, r in zip(codes, request_log.requests)]))
|
|
expected = collections.Counter([
|
|
sync_to[0]['ip'],
|
|
handoff['ip'],
|
|
])
|
|
self.assertEqual(expected, collections.Counter(
|
|
[c['node']['ip'] for c in ssync_calls]))
|
|
|
|
def test_process_job_handoff(self):
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
sync_to = [random.choice([n for n in self.policy.object_ring.devs
|
|
if n != self.local_dev])]
|
|
sync_to[0]['index'] = frag_index
|
|
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
partition = 0
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
os.makedirs(part_path)
|
|
job = {
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
}
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls), \
|
|
mock.patch('swift.obj.diskfile.ECDiskFileManager._get_hashes',
|
|
return_value=(None, stub_hashes)):
|
|
self.reconstructor.process_job(job)
|
|
|
|
self.assertEqual(
|
|
sorted(collections.Counter(
|
|
(c['node']['ip'], c['node']['port'], c['node']['device'],
|
|
tuple(sorted(c['suffixes'])),
|
|
c.get('include_non_durable'))
|
|
for c in ssync_calls).items()),
|
|
[((sync_to[0]['ip'], sync_to[0]['port'], sync_to[0]['device'],
|
|
('123', 'abc'), True), 1)])
|
|
|
|
def test_process_job_will_not_revert_to_handoff(self):
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
sync_to = [random.choice([n for n in self.policy.object_ring.devs
|
|
if n != self.local_dev])]
|
|
sync_to[0]['index'] = frag_index
|
|
partition = 0
|
|
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
os.makedirs(part_path)
|
|
job = {
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
}
|
|
|
|
non_local = {'called': 0}
|
|
|
|
def ssync_response_callback(*args):
|
|
# in this test, ssync fails on the first (primary sync_to) node
|
|
if non_local['called'] >= 1:
|
|
return True, {}
|
|
non_local['called'] += 1
|
|
return False, {}
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls,
|
|
response_callback=ssync_response_callback), \
|
|
mocked_http_conn() as request_log:
|
|
self.reconstructor.process_job(job)
|
|
|
|
# failed ssync job should not generate a suffix rehash
|
|
self.assertEqual([], request_log.requests)
|
|
|
|
self.assertEqual(
|
|
sorted(collections.Counter(
|
|
(c['node']['ip'], c['node']['port'], c['node']['device'],
|
|
tuple(sorted(c['suffixes'])),
|
|
c.get('include_non_durable'))
|
|
for c in ssync_calls).items()),
|
|
[((sync_to[0]['ip'], sync_to[0]['port'], sync_to[0]['device'],
|
|
('123', 'abc'), True), 1)])
|
|
|
|
def test_process_job_revert_is_handoff_fails(self):
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
sync_to = [random.choice([n for n in self.policy.object_ring.devs
|
|
if n != self.local_dev])]
|
|
sync_to[0]['index'] = frag_index
|
|
partition = 0
|
|
handoff_nodes = list(self.policy.object_ring.get_more_nodes(partition))
|
|
|
|
stub_hashes = {
|
|
'123': {frag_index: 'hash', None: 'hash'},
|
|
'abc': {frag_index: 'hash', None: 'hash'},
|
|
}
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
os.makedirs(part_path)
|
|
job = {
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': frag_index,
|
|
'suffixes': stub_hashes.keys(),
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': stub_hashes,
|
|
'policy': self.policy,
|
|
'local_dev': handoff_nodes[-1],
|
|
'device': self.local_dev['device'],
|
|
}
|
|
|
|
def ssync_response_callback(*args):
|
|
# in this test ssync always fails, until we encounter ourselves in
|
|
# the list of possible handoff's to sync to, so handoffs_remaining
|
|
# should increment
|
|
return False, {}
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls,
|
|
response_callback=ssync_response_callback), \
|
|
mocked_http_conn() as request_log:
|
|
self.reconstructor.process_job(job)
|
|
|
|
# failed ssync job should not generate a suffix rehash
|
|
self.assertEqual([], request_log.requests)
|
|
|
|
# this is ssync call to primary (which fails) and nothing else!
|
|
self.assertEqual(
|
|
sorted(collections.Counter(
|
|
(c['node']['ip'], c['node']['port'], c['node']['device'],
|
|
tuple(sorted(c['suffixes'])),
|
|
c.get('include_non_durable'))
|
|
for c in ssync_calls).items()),
|
|
[((sync_to[0]['ip'], sync_to[0]['port'], sync_to[0]['device'],
|
|
('123', 'abc'), True), 1)])
|
|
self.assertEqual(self.reconstructor.handoffs_remaining, 1)
|
|
|
|
def test_process_job_revert_cleanup(self):
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
sync_to = [random.choice([n for n in self.policy.object_ring.devs
|
|
if n != self.local_dev])]
|
|
sync_to[0]['index'] = frag_index
|
|
partition = 0
|
|
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
os.makedirs(part_path)
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
df = df_mgr.get_diskfile(self.local_dev['device'], partition, 'a',
|
|
'c', 'data-obj', policy=self.policy)
|
|
ts = self.ts()
|
|
with df.create() as writer:
|
|
test_data = b'test data'
|
|
writer.write(test_data)
|
|
metadata = {
|
|
'X-Timestamp': ts.internal,
|
|
'Content-Length': len(test_data),
|
|
'Etag': md5(test_data, usedforsecurity=False).hexdigest(),
|
|
'X-Object-Sysmeta-Ec-Frag-Index': frag_index,
|
|
}
|
|
writer.put(metadata)
|
|
writer.commit(ts)
|
|
|
|
ohash = os.path.basename(df._datadir)
|
|
suffix = os.path.basename(os.path.dirname(df._datadir))
|
|
|
|
job = {
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': frag_index,
|
|
'primary_frag_index': None,
|
|
'suffixes': [suffix],
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': {},
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
}
|
|
|
|
def ssync_response_callback(*args):
|
|
# success should not increment handoffs_remaining
|
|
return True, {ohash: {'ts_data': ts}}
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls,
|
|
response_callback=ssync_response_callback):
|
|
self.reconstructor.process_job(job)
|
|
|
|
# hashpath has been removed
|
|
self.assertFalse(os.path.exists(df._datadir))
|
|
|
|
self.assertEqual(self.reconstructor.handoffs_remaining, 0)
|
|
|
|
def test_process_job_revert_cleanup_but_already_reclaimed(self):
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
sync_to = [random.choice([n for n in self.policy.object_ring.devs
|
|
if n != self.local_dev])]
|
|
sync_to[0]['index'] = frag_index
|
|
partition = 0
|
|
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
os.makedirs(part_path)
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
df = df_mgr.get_diskfile(self.local_dev['device'], partition, 'a',
|
|
'c', 'data-obj', policy=self.policy)
|
|
ts_delete = self.ts()
|
|
df.delete(ts_delete)
|
|
ohash = os.path.basename(df._datadir)
|
|
suffix = os.path.basename(os.path.dirname(df._datadir))
|
|
|
|
job = {
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': frag_index,
|
|
'suffixes': [suffix],
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': {},
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
}
|
|
|
|
fake_time = [float(ts_delete) + df_mgr.reclaim_age - 100]
|
|
|
|
def mock_time():
|
|
return fake_time[0]
|
|
|
|
def ssync_response_callback(*args):
|
|
# pretend ssync completed and time has moved just beyonf the
|
|
# reclaim age for the tombstone
|
|
fake_time[0] = float(ts_delete) + df_mgr.reclaim_age + 1
|
|
return True, {ohash: {'ts_data': ts_delete}}
|
|
|
|
ssync_calls = []
|
|
with mock.patch('swift.obj.diskfile.time.time', mock_time):
|
|
with mock_ssync_sender(ssync_calls,
|
|
response_callback=ssync_response_callback):
|
|
self.reconstructor.process_job(job)
|
|
|
|
self.assertFalse(os.path.exists(df._datadir))
|
|
self.assertEqual(self.reconstructor.handoffs_remaining, 0)
|
|
# check there's no tracebacks for opening the reclaimed tombstone
|
|
self.assertEqual(
|
|
[], self.reconstructor.logger.logger.get_lines_for_level('error'))
|
|
|
|
def _make_frag(self, df, fi, ts_data):
|
|
with df.create() as writer:
|
|
test_data = b'test data'
|
|
writer.write(test_data)
|
|
metadata = {
|
|
'X-Timestamp': ts_data.internal,
|
|
'Content-Length': len(test_data),
|
|
'Etag': md5(test_data, usedforsecurity=False).hexdigest(),
|
|
'X-Object-Sysmeta-Ec-Frag-Index': fi,
|
|
}
|
|
writer.put(metadata)
|
|
writer.commit(ts_data)
|
|
|
|
def _do_test_process_job_revert_cleanup_with_meta(self, frag_indexes,
|
|
primary_frag_index):
|
|
sync_to = [[dict(random.choice([n for n in self.policy.object_ring.devs
|
|
if n != self.local_dev]),
|
|
index=frag_index)] for frag_index in frag_indexes]
|
|
partition = 0
|
|
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
mkdirs(part_path)
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
df = df_mgr.get_diskfile(self.local_dev['device'], partition, 'a',
|
|
'c', 'data-obj', policy=self.policy)
|
|
|
|
ts_data = self.ts()
|
|
for frag_index in frag_indexes:
|
|
self._make_frag(df, frag_index, ts_data)
|
|
if primary_frag_index is not None:
|
|
self._make_frag(df, primary_frag_index, ts_data)
|
|
ts_meta = self.ts()
|
|
df.write_metadata({'X-Timestamp': ts_meta.internal,
|
|
'X-Object-Meta-Test': 'testing'})
|
|
|
|
ohash = os.path.basename(df._datadir)
|
|
suffix = os.path.basename(os.path.dirname(df._datadir))
|
|
|
|
jobs = [{
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': frag_index,
|
|
'primary_frag_index': primary_frag_index,
|
|
'suffixes': [suffix],
|
|
'sync_to': sync_to[i],
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': {},
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
} for i, frag_index in enumerate(frag_indexes)]
|
|
|
|
ondisk_files_during_sync = []
|
|
|
|
def ssync_response_callback(*args):
|
|
ondisk_files_during_sync.append(os.listdir(df._datadir))
|
|
# success should not increment handoffs_remaining
|
|
return True, {ohash: {'ts_data': ts_data, 'ts_meta': ts_meta}}
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls,
|
|
response_callback=ssync_response_callback):
|
|
for job in jobs:
|
|
self.reconstructor.process_job(job)
|
|
|
|
self.assertEqual(self.reconstructor.handoffs_remaining, 0)
|
|
self.assertEqual(len(jobs), len(ssync_calls))
|
|
self.assertEqual(len(jobs), len(ondisk_files_during_sync))
|
|
# verify that the meta file is intact at startof every job/ssync call:
|
|
# if it is removed at all, it should be removed in the *last* call
|
|
for fileset in ondisk_files_during_sync:
|
|
self.assertIn(ts_meta.internal + '.meta', fileset)
|
|
return df
|
|
|
|
def test_process_job_revert_does_cleanup_meta_pure_handoff(self):
|
|
# verify that danging meta files are cleaned up if the revert job is
|
|
# for a pure handoff partition
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
df = self._do_test_process_job_revert_cleanup_with_meta(
|
|
frag_indexes=[frag_index], primary_frag_index=None)
|
|
# hashpath has been removed
|
|
self.assertFalse(os.path.exists(df._datadir))
|
|
|
|
extra_index = frag_index
|
|
while extra_index == frag_index:
|
|
extra_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
df = self._do_test_process_job_revert_cleanup_with_meta(
|
|
frag_indexes=[frag_index, extra_index], primary_frag_index=None)
|
|
# hashpath has been removed
|
|
self.assertFalse(os.path.exists(df._datadir))
|
|
|
|
def test_process_job_revert_does_not_cleanup_meta_also_primary(self):
|
|
# verify that danging meta files are not cleaned up if the revert job
|
|
# is for a handoff partition that is also a primary for another frag
|
|
# index
|
|
frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
primary_frag_index = frag_index
|
|
while primary_frag_index == frag_index:
|
|
primary_frag_index = random.randint(
|
|
0, self.policy.ec_n_unique_fragments - 1)
|
|
df = self._do_test_process_job_revert_cleanup_with_meta(
|
|
frag_indexes=[frag_index], primary_frag_index=primary_frag_index)
|
|
# hashpath has not been removed
|
|
self.assertTrue(os.path.exists(df._datadir))
|
|
file_info = df._manager.cleanup_ondisk_files(df._datadir)
|
|
self.maxDiff = None
|
|
self.assertTrue('meta_file' in file_info)
|
|
self.assertTrue(os.path.exists(file_info['meta_file']))
|
|
self.assertTrue('data_info' in file_info)
|
|
self.assertEqual(primary_frag_index,
|
|
file_info['data_info']['frag_index'])
|
|
self.assertTrue(os.path.exists(file_info['data_file']))
|
|
# only the primary frag and meta file remain
|
|
self.assertEqual(2, len(os.listdir(df._datadir)))
|
|
|
|
def test_process_job_revert_does_not_cleanup_meta_new_data(self):
|
|
# verify that danging meta files are not cleaned up if the revert job
|
|
# is for a pure handoff partition that has a newer data frag in
|
|
# addition to the frag that was sync'd
|
|
frag_index = 0
|
|
extra_frag_index = 1
|
|
sync_to = [dict(random.choice([n for n in self.policy.object_ring.devs
|
|
if n != self.local_dev]),
|
|
index=frag_index)]
|
|
partition = 0
|
|
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
mkdirs(part_path)
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
df = df_mgr.get_diskfile(self.local_dev['device'], partition, 'a',
|
|
'c', 'data-obj', policy=self.policy)
|
|
|
|
ts_data0 = self.ts() # original frag
|
|
ts_data1 = self.ts() # new one written during ssync
|
|
self._make_frag(df, frag_index, ts_data0)
|
|
ts_meta = self.ts()
|
|
df.write_metadata({'X-Timestamp': ts_meta.internal,
|
|
'X-Object-Meta-Test': 'testing'})
|
|
|
|
ohash = os.path.basename(df._datadir)
|
|
suffix = os.path.basename(os.path.dirname(df._datadir))
|
|
|
|
job = {
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': frag_index,
|
|
'primary_frag_index': None,
|
|
'suffixes': [suffix],
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': {},
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
}
|
|
|
|
def ssync_response_callback(*args):
|
|
# pretend that during the ssync call the original frag is replaced
|
|
# by a newer one
|
|
self._make_frag(df, extra_frag_index, ts_data1)
|
|
return True, {ohash: {'ts_data': ts_data0, 'ts_meta': ts_meta}}
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls,
|
|
response_callback=ssync_response_callback):
|
|
self.reconstructor.process_job(job)
|
|
|
|
self.assertEqual(1, len(ssync_calls))
|
|
# hashpath has not been removed
|
|
self.assertTrue(os.path.exists(df._datadir))
|
|
file_info = df._manager.cleanup_ondisk_files(df._datadir)
|
|
self.maxDiff = None
|
|
self.assertIsNotNone(file_info['meta_file'])
|
|
self.assertTrue(os.path.exists(file_info['meta_file']))
|
|
self.assertTrue('data_info' in file_info)
|
|
self.assertTrue(os.path.exists(file_info['data_file']))
|
|
# only the newer frag and meta file remain
|
|
self.assertEqual(2, len(os.listdir(df._datadir)))
|
|
self.assertEqual(ts_data1, file_info['data_info']['timestamp'])
|
|
|
|
def test_process_job_revert_cleanup_tombstone(self):
|
|
partition = 0
|
|
sync_to = [random.choice([
|
|
n for n in self.policy.object_ring.get_part_nodes(partition)
|
|
if n['id'] != self.local_dev['id']])]
|
|
|
|
part_path = os.path.join(self.devices, self.local_dev['device'],
|
|
diskfile.get_data_dir(self.policy),
|
|
str(partition))
|
|
os.makedirs(part_path)
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
df = df_mgr.get_diskfile(self.local_dev['device'], partition, 'a',
|
|
'c', 'data-obj', policy=self.policy)
|
|
ts = self.ts()
|
|
df.delete(ts)
|
|
|
|
ohash = os.path.basename(df._datadir)
|
|
suffix = os.path.basename(os.path.dirname(df._datadir))
|
|
|
|
job = {
|
|
'job_type': object_reconstructor.REVERT,
|
|
'frag_index': None,
|
|
'primary_frag_index': None,
|
|
'suffixes': [suffix],
|
|
'sync_to': sync_to,
|
|
'partition': partition,
|
|
'path': part_path,
|
|
'hashes': {},
|
|
'policy': self.policy,
|
|
'local_dev': self.local_dev,
|
|
'device': self.local_dev['device'],
|
|
}
|
|
|
|
def ssync_response_callback(*args):
|
|
return True, {ohash: {'ts_data': ts}}
|
|
|
|
ssync_calls = []
|
|
with mock_ssync_sender(ssync_calls,
|
|
response_callback=ssync_response_callback):
|
|
self.reconstructor.process_job(job)
|
|
|
|
# hashpath is still there, but it's empty
|
|
self.assertEqual([], os.listdir(df._datadir))
|
|
|
|
def test_get_local_devices(self):
|
|
local_devs = self.reconstructor.get_local_devices()
|
|
self.assertEqual({'sda'}, local_devs)
|
|
|
|
@patch_policies(legacy_only=True)
|
|
def test_get_local_devices_with_no_ec_policy_env(self):
|
|
# even no ec_policy found on the server, it runs just like as
|
|
# no ec device found
|
|
self._configure_reconstructor()
|
|
self.assertEqual([], self.reconstructor.policies)
|
|
local_devs = self.reconstructor.get_local_devices()
|
|
self.assertEqual(set(), local_devs)
|
|
|
|
@patch_policies(legacy_only=True)
|
|
def test_reconstruct_with_no_ec_policy_env(self):
|
|
self._configure_reconstructor()
|
|
self.assertEqual([], self.reconstructor.policies)
|
|
collect_parts_results = []
|
|
_orig_collect_parts = self.reconstructor.collect_parts
|
|
|
|
def capture_collect_parts(**kwargs):
|
|
part_infos = _orig_collect_parts(**kwargs)
|
|
collect_parts_results.append(part_infos)
|
|
return part_infos
|
|
|
|
with mock.patch.object(self.reconstructor, 'collect_parts',
|
|
capture_collect_parts):
|
|
self.reconstructor.reconstruct()
|
|
|
|
# There is one call, and it returns an empty list
|
|
self.assertEqual([[]], collect_parts_results)
|
|
log_lines = self.logger.all_log_lines()
|
|
self.assertEqual(log_lines, {'info': [mock.ANY]})
|
|
line = log_lines['info'][0]
|
|
self.assertTrue(line.startswith('Nothing reconstructed '), line)
|
|
|
|
|
|
class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
|
|
obj_name = b'o' # subclass overrides this
|
|
|
|
def setUp(self):
|
|
super(TestReconstructFragmentArchive, self).setUp()
|
|
self.obj_path = b'/a/c/' + self.obj_name
|
|
self.obj_timestamp = self.ts()
|
|
|
|
def _create_fragment(self, frag_index, body=b'test data'):
|
|
utils.mkdirs(os.path.join(self.devices, 'sda1'))
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
if six.PY2:
|
|
obj_name = self.obj_name
|
|
else:
|
|
obj_name = self.obj_name.decode('utf8')
|
|
self.df = df_mgr.get_diskfile('sda1', 9, 'a', 'c', obj_name,
|
|
policy=self.policy)
|
|
write_diskfile(self.df, self.obj_timestamp, data=body,
|
|
frag_index=frag_index)
|
|
self.df.open()
|
|
self.logger.clear()
|
|
return self.df
|
|
|
|
def test_reconstruct_fa_no_errors(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
broken_body = ec_archive_bodies.pop(1)
|
|
|
|
responses = list()
|
|
for body in ec_archive_bodies:
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
responses.append((200, body, headers))
|
|
|
|
# make a hook point at
|
|
# swift.obj.reconstructor.ObjectReconstructor._get_response
|
|
called_headers = []
|
|
orig_func = object_reconstructor.ObjectReconstructor._get_response
|
|
|
|
def _get_response_hook(self, node, policy, part, path, headers):
|
|
called_headers.append(headers)
|
|
return orig_func(self, node, policy, part, path, headers)
|
|
|
|
codes, body_iter, headers = zip(*responses)
|
|
get_response_path = \
|
|
'swift.obj.reconstructor.ObjectReconstructor._get_response'
|
|
with mock.patch(get_response_path, _get_response_hook):
|
|
with mocked_http_conn(
|
|
*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2, body=b''))
|
|
self.assertEqual(0, df.content_length)
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
self.assertEqual(len(part_nodes) - 1, len(called_headers),
|
|
'Expected %d calls, got %r' % (len(part_nodes) - 1,
|
|
called_headers))
|
|
for called_header in called_headers:
|
|
called_header = HeaderKeyDict(called_header)
|
|
self.assertIn('Content-Length', called_header)
|
|
self.assertEqual(called_header['Content-Length'], '0')
|
|
self.assertIn('User-Agent', called_header)
|
|
user_agent = called_header['User-Agent']
|
|
self.assertTrue(user_agent.startswith('obj-reconstructor'))
|
|
self.assertIn('X-Backend-Storage-Policy-Index', called_header)
|
|
self.assertEqual(called_header['X-Backend-Storage-Policy-Index'],
|
|
self.policy)
|
|
self.assertIn('X-Backend-Fragment-Preferences', called_header)
|
|
self.assertEqual(
|
|
[{'timestamp': self.obj_timestamp.normal, 'exclude': []}],
|
|
json.loads(called_header['X-Backend-Fragment-Preferences']))
|
|
self.assertIn('X-Backend-Replication', called_header)
|
|
# no error and warning
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_errors_works(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[4]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(4)
|
|
|
|
base_responses = list()
|
|
for body in ec_archive_bodies:
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
base_responses.append((200, body, headers))
|
|
|
|
# since we're already missing a fragment a +2 scheme can only support
|
|
# one additional failure at a time
|
|
for error in (Timeout(), 404, Exception('kaboom!')):
|
|
responses = base_responses
|
|
error_index = random.randint(0, len(responses) - 1)
|
|
responses[error_index] = (error, '', '')
|
|
codes, body_iter, headers_iter = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter,
|
|
headers=headers_iter):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
def test_reconstruct_fa_mixed_meta_timestamps_works(self):
|
|
# verify scenario where all fragments have same data timestamp but some
|
|
# have different meta timestamp
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[4]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(4)
|
|
ts_data = next(self.ts_iter) # all frags .data timestamp
|
|
ts_meta = next(self.ts_iter) # some frags .meta timestamp
|
|
ts_cycle = itertools.cycle((ts_data, ts_meta))
|
|
responses = list()
|
|
for body in ec_archive_bodies:
|
|
ts = next(ts_cycle) # vary timestamp between data and meta
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag,
|
|
'X-Timestamp': ts.normal,
|
|
'X-Backend-Timestamp': ts.internal,
|
|
'X-Backend-Data-Timestamp': ts_data.internal,
|
|
'X-Backend-Durable-Timestamp': ts_data.internal})
|
|
responses.append((200, body, headers))
|
|
|
|
codes, body_iter, headers_iter = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter,
|
|
headers=headers_iter):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
def test_reconstruct_fa_error_with_invalid_header(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[4]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(4)
|
|
|
|
base_responses = list()
|
|
for body in ec_archive_bodies:
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
base_responses.append((200, body, headers))
|
|
|
|
responses = base_responses
|
|
# force the test to exercise the handling of this bad response by
|
|
# sticking it in near the front
|
|
error_index = random.randint(0, self.policy.ec_ndata - 1)
|
|
status, body, headers = responses[error_index]
|
|
# one esoteric failure is a literal string 'None' in place of the
|
|
# X-Object-Sysmeta-EC-Frag-Index
|
|
stub_node_job = {'some_keys': 'foo', 'but_not': 'frag_index'}
|
|
headers['X-Object-Sysmeta-Ec-Frag-Index'] = str(
|
|
stub_node_job.get('frag_index'))
|
|
# oops!
|
|
self.assertEqual('None',
|
|
headers.get('X-Object-Sysmeta-Ec-Frag-Index'))
|
|
responses[error_index] = status, body, headers
|
|
codes, body_iter, headers_iter = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter,
|
|
headers=headers_iter):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
# ... this bad response should be ignored like any other failure
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
def test_reconstruct_parity_fa_with_data_node_failure(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[-4]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
# make up some data (trim some amount to make it unaligned with
|
|
# segment size)
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-454]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
# the scheme is 10+4, so this gets a parity node
|
|
broken_body = ec_archive_bodies.pop(-4)
|
|
|
|
responses = list()
|
|
for body in ec_archive_bodies:
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
responses.append((200, body, headers))
|
|
|
|
for error in (Timeout(), 404, Exception('kaboom!')):
|
|
# grab a data node index
|
|
error_index = random.randint(0, self.policy.ec_ndata - 1)
|
|
responses[error_index] = (error, '', '')
|
|
codes, body_iter, headers_iter = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter,
|
|
headers=headers_iter):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
def test_reconstruct_fa_exceptions_fails(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
policy = self.policy
|
|
|
|
possible_errors = [Timeout(), Exception('kaboom!')]
|
|
codes = [random.choice(possible_errors) for i in
|
|
range(policy.object_ring.replicas - 1)]
|
|
with mocked_http_conn(*codes):
|
|
self.assertRaises(DiskFileError, self.reconstructor.reconstruct_fa,
|
|
job, node, self._create_fragment(2))
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
# # of replicas failed and one more error log to report not enough
|
|
# responses to reconstruct.
|
|
self.assertEqual(policy.object_ring.replicas, len(error_lines))
|
|
for line in error_lines[:-1]:
|
|
self.assertIn("Trying to GET", line)
|
|
self.assertIn(
|
|
'Unable to get enough responses (%s x unknown error responses)'
|
|
% (policy.object_ring.replicas - 1),
|
|
error_lines[-1],
|
|
"Unexpected error line found: %s" % error_lines[-1])
|
|
# no warning
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_all_404s_fails(self):
|
|
self._create_fragment(2)
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
policy = self.policy
|
|
|
|
codes = [404 for i in range(policy.object_ring.replicas - 1)]
|
|
with mocked_http_conn(*codes):
|
|
self.assertRaises(DiskFileError, self.reconstructor.reconstruct_fa,
|
|
job, node, self.df)
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
# only 1 log to report not enough responses
|
|
self.assertEqual(1, len(error_lines))
|
|
self.assertIn(
|
|
'Unable to get enough responses (%s x 404 error responses)'
|
|
% (policy.object_ring.replicas - 1),
|
|
error_lines[0],
|
|
"Unexpected error line found: %s" % error_lines[0])
|
|
# no warning
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_all_404s_fails_custom_request_node_count(self):
|
|
# verify that when quarantine_threshold is not set the number of
|
|
# requests is capped at replicas - 1 regardless of request_node_count
|
|
self._create_fragment(2)
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
ring = self.policy.object_ring
|
|
# sanity check: number of handoffs available == replicas
|
|
self.assertEqual(ring.max_more_nodes, ring.replicas)
|
|
for request_node_count in (0,
|
|
self.policy.ec_ndata - 1,
|
|
ring.replicas + 1,
|
|
2 * ring.replicas - 1,
|
|
2 * ring.replicas,
|
|
3 * ring.replicas,
|
|
99 * ring.replicas):
|
|
with annotate_failure(request_node_count):
|
|
self.logger.clear()
|
|
self.reconstructor.request_node_count = \
|
|
lambda replicas: request_node_count
|
|
# request count capped at num primaries - 1
|
|
exp_requests = ring.replicas - 1
|
|
codes = [404 for i in range(exp_requests)]
|
|
with mocked_http_conn(*codes):
|
|
self.assertRaises(DiskFileError,
|
|
self.reconstructor.reconstruct_fa,
|
|
job, node, self.df)
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
# only 1 log to report not enough responses
|
|
self.assertEqual(1, len(error_lines))
|
|
self.assertIn(
|
|
'Unable to get enough responses (%s x 404 error responses)'
|
|
% exp_requests,
|
|
error_lines[0],
|
|
"Unexpected error line found: %s" % error_lines[0])
|
|
# no warning
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_mixture_of_errors_fails(self):
|
|
self._create_fragment(2)
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
policy = self.policy
|
|
|
|
# ensure at least one of each error type
|
|
possible_errors = [Timeout(), 404, 507]
|
|
codes = possible_errors + [random.choice(possible_errors) for i in
|
|
range(policy.object_ring.replicas - 4)]
|
|
with mocked_http_conn(*codes):
|
|
self.assertRaises(DiskFileError, self.reconstructor.reconstruct_fa,
|
|
job, node, self.df)
|
|
exp_timeouts = len([c for c in codes if isinstance(c, Timeout)])
|
|
exp_404s = len([c for c in codes if c == 404])
|
|
exp_507s = len([c for c in codes if c == 507])
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
# 1 error log to report not enough responses and possibly some to
|
|
# report Timeouts
|
|
self.assertEqual(len(error_lines), exp_timeouts + 1, error_lines)
|
|
for line in error_lines[:-1]:
|
|
self.assertIn("Trying to GET", line)
|
|
self.assertIn(
|
|
'Unable to get enough responses '
|
|
'(%s x unknown, %s x 404, %s x 507 error responses)'
|
|
% (exp_timeouts, exp_404s, exp_507s), error_lines[-1],
|
|
"Unexpected error line found: %s" % error_lines[-1])
|
|
# no warning
|
|
warning_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(exp_507s, len(warning_lines), warning_lines)
|
|
for line in warning_lines:
|
|
self.assertIn('Invalid response 507', line)
|
|
|
|
def test_reconstruct_fa_with_mixed_old_etag(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
# bad response
|
|
broken_body = ec_archive_bodies.pop(1)
|
|
ts = make_timestamp_iter()
|
|
bad_headers = get_header_frag_index(self, broken_body)
|
|
bad_headers.update({
|
|
'X-Object-Sysmeta-Ec-Etag': 'some garbage',
|
|
'X-Backend-Timestamp': next(ts).internal,
|
|
})
|
|
|
|
# good responses
|
|
responses = list()
|
|
t1 = next(ts).internal
|
|
for body in ec_archive_bodies:
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag,
|
|
'X-Backend-Timestamp': t1})
|
|
responses.append((200, body, headers))
|
|
|
|
# include the one older frag with different etag in first responses
|
|
error_index = random.randint(0, self.policy.ec_ndata - 1)
|
|
error_headers = get_header_frag_index(self,
|
|
(responses[error_index])[1])
|
|
error_headers.update(bad_headers)
|
|
bad_response = (200, '', bad_headers)
|
|
responses[error_index] = bad_response
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# no error and warning
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_with_mixed_new_etag(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(1)
|
|
ts = make_timestamp_iter()
|
|
|
|
# good responses
|
|
responses = list()
|
|
t0 = next(ts).internal
|
|
for body in ec_archive_bodies:
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag,
|
|
'X-Backend-Timestamp': t0})
|
|
responses.append((200, body, headers))
|
|
|
|
# sanity check before negative test
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# one newer etag won't spoil the bunch
|
|
new_index = random.randint(0, self.policy.ec_ndata - 1)
|
|
new_headers = get_header_frag_index(self, (responses[new_index])[1])
|
|
new_headers.update({'X-Object-Sysmeta-Ec-Etag': 'some garbage',
|
|
'X-Backend-Timestamp': next(ts).internal})
|
|
new_response = (200, '', new_headers)
|
|
responses[new_index] = new_response
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# no error and warning
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_with_mixed_etag_with_same_timestamp(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(1)
|
|
|
|
# good responses
|
|
responses = list()
|
|
for body in ec_archive_bodies:
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
responses.append((200, body, headers))
|
|
|
|
# sanity check before negative test
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# a response at same timestamp but different etag won't spoil the bunch
|
|
# N.B. (FIXME). if we choose the first response as garbage, the
|
|
# reconstruction fails because all other *correct* frags will be
|
|
# assumed as garbage. To avoid the freaky failing set randint
|
|
# as [1, self.policy.ec_ndata - 1] to make the first response
|
|
# always have the correct etag to reconstruct
|
|
new_index = random.randint(1, self.policy.ec_ndata - 1)
|
|
new_headers = get_header_frag_index(self, (responses[new_index])[1])
|
|
new_headers.update({'X-Object-Sysmeta-Ec-Etag': 'some garbage'})
|
|
new_response = (200, '', new_headers)
|
|
responses[new_index] = new_response
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# expect an error log but no warnings
|
|
error_log_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(1, len(error_log_lines))
|
|
self.assertIn(
|
|
'Mixed Etag (some garbage, %s) for 10.0.0.1:1001/sdb/0%s '
|
|
'policy#%s frag#1' %
|
|
(etag, self.obj_path.decode('utf8'), int(self.policy)),
|
|
error_log_lines[0])
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_with_mixed_timestamps_etags_fail(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
ec_archive_dict = dict()
|
|
ts = make_timestamp_iter()
|
|
# create 3 different ec bodies
|
|
for i in range(3):
|
|
body = test_data[i:]
|
|
archive_bodies = encode_frag_archive_bodies(self.policy, body)
|
|
# pop the index to the destination node
|
|
archive_bodies.pop(1)
|
|
key = (md5(body, usedforsecurity=False).hexdigest(),
|
|
next(ts).internal, bool(i % 2))
|
|
ec_archive_dict[key] = archive_bodies
|
|
|
|
responses = list()
|
|
# fill out response list by 3 different etag bodies
|
|
for etag, ts, durable in itertools.cycle(ec_archive_dict):
|
|
body = ec_archive_dict[(etag, ts, durable)].pop(0)
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag,
|
|
'X-Backend-Timestamp': ts})
|
|
if durable:
|
|
headers['X-Backend-Durable-Timestamp'] = ts
|
|
responses.append((200, body, headers))
|
|
if len(responses) >= (self.policy.object_ring.replicas - 1):
|
|
break
|
|
|
|
# sanity, there is 3 different etag and each etag
|
|
# doesn't have > ec_k bodies
|
|
etag_count = collections.Counter(
|
|
[in_resp_headers['X-Object-Sysmeta-Ec-Etag']
|
|
for _, _, in_resp_headers in responses])
|
|
self.assertEqual(3, len(etag_count))
|
|
for etag, count in etag_count.items():
|
|
self.assertLess(count, self.policy.ec_ndata)
|
|
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
self.assertRaises(DiskFileError, self.reconstructor.reconstruct_fa,
|
|
job, node, self._create_fragment(2))
|
|
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
# 1 error log per etag to report not enough responses
|
|
self.assertEqual(3, len(error_lines))
|
|
for error_line in error_lines:
|
|
for expected_etag, ts, durable in ec_archive_dict:
|
|
if expected_etag in error_line:
|
|
break
|
|
else:
|
|
self.fail(
|
|
"no expected etag %s found: %s" %
|
|
(list(ec_archive_dict), error_line))
|
|
# remove the found etag which should not be found in the
|
|
# following error lines
|
|
del ec_archive_dict[(expected_etag, ts, durable)]
|
|
|
|
expected = 'Unable to get enough responses (%s/10 from %s ok ' \
|
|
'responses) to reconstruct %s 10.0.0.1:1001/sdb/0%s ' \
|
|
'policy#0 frag#1 with ETag %s and timestamp %s' %\
|
|
(etag_count[expected_etag], etag_count[expected_etag],
|
|
'durable' if durable else 'non-durable',
|
|
self.obj_path.decode('utf8'), expected_etag, ts)
|
|
self.assertIn(
|
|
expected, error_line,
|
|
"Unexpected error line found: Expected: %s Got: %s"
|
|
% (expected, error_line))
|
|
# no warning
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_with_mixed_etags_same_timestamp_fail(self):
|
|
self._create_fragment(2)
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
ec_archive_dict = dict()
|
|
ts = next(make_timestamp_iter())
|
|
# create 3 different ec bodies
|
|
for i in range(3):
|
|
body = test_data[i:]
|
|
archive_bodies = encode_frag_archive_bodies(self.policy, body)
|
|
# pop the index to the destination node
|
|
archive_bodies.pop(1)
|
|
key = (md5(body, usedforsecurity=False).hexdigest(),
|
|
ts.internal, bool(i % 2))
|
|
ec_archive_dict[key] = archive_bodies
|
|
|
|
responses = list()
|
|
# fill out response list by 3 different etag bodies, same timestamp
|
|
for etag, ts, durable in itertools.cycle(ec_archive_dict):
|
|
body = ec_archive_dict[(etag, ts, durable)].pop(0)
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag,
|
|
'X-Backend-Timestamp': ts})
|
|
if durable:
|
|
headers['X-Backend-Durable-Timestamp'] = ts
|
|
responses.append((200, body, headers))
|
|
if len(responses) >= (self.policy.object_ring.replicas - 1):
|
|
break
|
|
|
|
# sanity, there is 3 different etag and each etag
|
|
# doesn't have > ec_k bodies
|
|
etag_count = collections.Counter(
|
|
[in_resp_headers['X-Object-Sysmeta-Ec-Etag']
|
|
for _, _, in_resp_headers in responses])
|
|
self.assertEqual(3, len(etag_count))
|
|
for etag, count in etag_count.items():
|
|
self.assertLess(count, self.policy.ec_ndata)
|
|
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
self.assertRaises(DiskFileError, self.reconstructor.reconstruct_fa,
|
|
job, node, self.df)
|
|
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertGreater(len(error_lines), 1)
|
|
for expected_etag, ts, durable in ec_archive_dict:
|
|
if expected_etag in error_lines[-1]:
|
|
break
|
|
else:
|
|
self.fail(
|
|
"no expected etag %s found: %s" %
|
|
(list(ec_archive_dict), error_lines[0]))
|
|
|
|
other_etags_count = sum(count for etag, count in etag_count.items()
|
|
if etag != expected_etag)
|
|
self.assertEqual(other_etags_count + 1, len(error_lines))
|
|
for line in error_lines[:-1]:
|
|
self.assertIn('Mixed Etag', line)
|
|
expected = 'Unable to get enough responses (%s/10 from %s ok ' \
|
|
'responses) to reconstruct %s 10.0.0.1:1001/sdb/0%s ' \
|
|
'policy#0 frag#1 with ETag %s and timestamp %s' % \
|
|
(etag_count[expected_etag], len(responses),
|
|
'durable' if durable else 'non-durable',
|
|
self.obj_path.decode('utf8'), expected_etag, ts)
|
|
self.assertIn(
|
|
expected, error_lines[-1],
|
|
"Unexpected error line found: Expected: %s Got: %s"
|
|
% (expected, error_lines[0]))
|
|
# no warning
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
|
|
def test_reconstruct_fa_finds_missing_frag_does_not_fail(self):
|
|
# verify that reconstruction of a missing frag can cope with finding
|
|
# that missing frag in the responses it gets from other nodes while
|
|
# attempting to rebuild the missing frag
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
broken_index = random.randint(0, self.policy.ec_ndata - 1)
|
|
node = part_nodes[broken_index]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
# instead of popping the broken body, we'll just leave it in the list
|
|
# of responses and take away something else.
|
|
broken_body = ec_archive_bodies[broken_index]
|
|
ec_archive_bodies = ec_archive_bodies[:-1]
|
|
|
|
def make_header(body):
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
return headers
|
|
|
|
responses = [(200, body, make_header(body))
|
|
for body in ec_archive_bodies]
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# no error, no warning
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
# the found own frag will be reported in the debug message
|
|
debug_log_lines = self.logger.get_lines_for_level('debug')
|
|
# redundant frag found once in first ec_ndata responses
|
|
self.assertIn(
|
|
'Found existing frag #%s at' % broken_index,
|
|
debug_log_lines[0], debug_log_lines)
|
|
|
|
# N.B. in the future, we could avoid those check because
|
|
# definitely sending the copy rather than reconstruct will
|
|
# save resources. But one more reason, we're avoiding to
|
|
# use the dest index fragment even if it goes to reconstruct
|
|
# function is that it will cause a bunch of warning log from
|
|
# liberasurecode[1].
|
|
# 1: https://github.com/openstack/liberasurecode/blob/
|
|
# master/src/erasurecode.c#L870
|
|
log_prefix = 'Reconstruct frag #%s with frag indexes' % broken_index
|
|
self.assertIn(log_prefix, debug_log_lines[1])
|
|
self.assertFalse(debug_log_lines[2:])
|
|
got_frag_index_list = json.loads(
|
|
debug_log_lines[1][len(log_prefix):])
|
|
self.assertNotIn(broken_index, got_frag_index_list)
|
|
|
|
def test_quarantine_threshold_conf(self):
|
|
reconstructor = object_reconstructor.ObjectReconstructor({})
|
|
self.assertEqual(0, reconstructor.quarantine_threshold)
|
|
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_threshold': '0'})
|
|
self.assertEqual(0, reconstructor.quarantine_threshold)
|
|
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_threshold': '1'})
|
|
self.assertEqual(1, reconstructor.quarantine_threshold)
|
|
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_threshold': 2.0})
|
|
self.assertEqual(2, reconstructor.quarantine_threshold)
|
|
|
|
for bad in ('1.1', 1.1, '-1', -1, 'auto', 'bad'):
|
|
with annotate_failure(bad):
|
|
with self.assertRaises(ValueError):
|
|
object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_threshold': bad})
|
|
|
|
def test_quarantine_age_conf(self):
|
|
# defaults to DEFAULT_RECLAIM_AGE
|
|
reconstructor = object_reconstructor.ObjectReconstructor({})
|
|
self.assertEqual(604800, reconstructor.quarantine_age)
|
|
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_age': '0'})
|
|
self.assertEqual(0, reconstructor.quarantine_age)
|
|
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_age': '1'})
|
|
self.assertEqual(1, reconstructor.quarantine_age)
|
|
|
|
# trumps reclaim_age
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_age': '1', 'reclaim_age': 0})
|
|
self.assertEqual(1, reconstructor.quarantine_age)
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_age': '1', 'reclaim_age': 2})
|
|
self.assertEqual(1, reconstructor.quarantine_age)
|
|
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_age': 2.2})
|
|
self.assertEqual(2, reconstructor.quarantine_age)
|
|
|
|
for bad in ('1.1', 'auto', 'bad'):
|
|
with annotate_failure(bad):
|
|
with self.assertRaises(ValueError):
|
|
object_reconstructor.ObjectReconstructor(
|
|
{'quarantine_age': bad})
|
|
|
|
def test_request_node_count_conf(self):
|
|
# default is 1 * replicas
|
|
reconstructor = object_reconstructor.ObjectReconstructor({})
|
|
self.assertEqual(6, reconstructor.request_node_count(3))
|
|
self.assertEqual(22, reconstructor.request_node_count(11))
|
|
|
|
def do_test(value, replicas, expected):
|
|
reconstructor = object_reconstructor.ObjectReconstructor(
|
|
{'request_node_count': value})
|
|
self.assertEqual(expected,
|
|
reconstructor.request_node_count(replicas))
|
|
do_test('0', 10, 0)
|
|
do_test('1 * replicas', 3, 3)
|
|
do_test('1 * replicas', 11, 11)
|
|
do_test('2 * replicas', 3, 6)
|
|
do_test('2 * replicas', 11, 22)
|
|
do_test('11', 11, 11)
|
|
do_test('10', 11, 10)
|
|
do_test('12', 11, 12)
|
|
|
|
for bad in ('1.1', 1.1, 'auto', 'bad',
|
|
'2.5 * replicas', 'two * replicas'):
|
|
with annotate_failure(bad):
|
|
with self.assertRaises(ValueError):
|
|
object_reconstructor.ObjectReconstructor(
|
|
{'request_node_count': bad})
|
|
|
|
def _do_test_reconstruct_insufficient_frags(
|
|
self, extra_conf, num_frags, other_responses,
|
|
local_frag_index=2, frag_index_to_rebuild=1,
|
|
resp_timestamps=None, resp_etags=None):
|
|
# num_frags is number of ok responses, other_responses is bad responses
|
|
# By default frag_index_to_rebuild is less than local_frag_index and
|
|
# all frag responses have indexes >= local_frag_index
|
|
self.assertGreater(num_frags, 0)
|
|
self.logger.clear()
|
|
self._configure_reconstructor(**extra_conf)
|
|
self._create_fragment(local_frag_index)
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[frag_index_to_rebuild]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
frags = ec_archive_bodies[
|
|
local_frag_index:local_frag_index + num_frags]
|
|
|
|
if resp_etags:
|
|
self.assertEqual(len(frags), len(resp_etags))
|
|
etags = []
|
|
for other_etag in resp_etags:
|
|
# use default etag where other_etag is None
|
|
etags.append(other_etag if other_etag else etag)
|
|
else:
|
|
etags = [etag] * len(frags)
|
|
|
|
def make_header(body):
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etags.pop(0)})
|
|
return headers
|
|
|
|
responses = [(200, frag, make_header(frag)) for frag in frags]
|
|
codes, body_iter, headers = zip(*(responses + other_responses))
|
|
resp_timestamps = (resp_timestamps if resp_timestamps
|
|
else [self.obj_timestamp] * len(codes))
|
|
resp_timestamps = [ts.internal for ts in resp_timestamps]
|
|
with mocked_http_conn(*codes, body_iter=body_iter,
|
|
headers=headers,
|
|
timestamps=resp_timestamps):
|
|
with self.assertRaises(DiskFileError) as cm:
|
|
self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
return cm.exception
|
|
|
|
def _verify_error_lines(self, num_frags, other_responses,
|
|
exp_useful_responses):
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(2, len(error_lines), error_lines)
|
|
self.assertIn(
|
|
'Unable to get enough responses (%d/%d from %d ok responses)'
|
|
% (exp_useful_responses, self.policy.ec_ndata, num_frags),
|
|
error_lines[0])
|
|
bad_codes = collections.Counter(
|
|
status for status, _, _ in other_responses)
|
|
errors = ', '.join('%s x %s' % (num, code)
|
|
for code, num in sorted(bad_codes.items()))
|
|
self.assertIn('Unable to get enough responses (%s error responses)'
|
|
% errors, error_lines[1])
|
|
|
|
def _assert_diskfile_quarantined(self):
|
|
warning_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(1, len(warning_lines), warning_lines)
|
|
self.assertIn('Quarantined object', warning_lines[0])
|
|
|
|
# Check the diskfile has moved to quarantine dir
|
|
data_filename = os.path.basename(self.df._data_file)
|
|
df_hash = os.path.basename(self.df._datadir)
|
|
quarantine_dir = os.path.join(
|
|
self.df._device_path, 'quarantined',
|
|
diskfile.get_data_dir(self.policy), df_hash)
|
|
self.assertTrue(os.path.isdir(quarantine_dir))
|
|
quarantine_file = os.path.join(quarantine_dir, data_filename)
|
|
self.assertTrue(os.path.isfile(quarantine_file))
|
|
with open(quarantine_file, 'r') as fd:
|
|
self.assertEqual('test data', fd.read())
|
|
self.assertFalse(os.path.exists(self.df._data_file))
|
|
|
|
def _assert_diskfile_not_quarantined(self):
|
|
# Check the diskfile has not moved to quarantine dir
|
|
quarantine_dir = os.path.join(
|
|
self.df._device_path, 'quarantined')
|
|
self.assertFalse(os.path.isdir(quarantine_dir))
|
|
self.assertTrue(os.path.exists(self.df._data_file))
|
|
with open(self.df._data_file, 'r') as fd:
|
|
self.assertEqual('test data', fd.read())
|
|
|
|
def test_reconstruct_fa_quarantine_threshold_one_rnc_two_replicas(self):
|
|
# use default request_node_count == 2 * replicas
|
|
num_other_resps = 2 * self.policy.object_ring.replicas - 2
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
conf = {'quarantine_threshold': 1, 'reclaim_age': 0}
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
conf, 1, other_responses)
|
|
self.assertIsInstance(exc, DiskFileQuarantined)
|
|
self._assert_diskfile_quarantined()
|
|
self._verify_error_lines(1, other_responses, 1)
|
|
|
|
def test_reconstruct_fa_quarantine_threshold_one_rnc_three_replicas(self):
|
|
num_other_resps = 3 * self.policy.object_ring.replicas - 2
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
conf = {'quarantine_threshold': 1, 'reclaim_age': 0,
|
|
'request_node_count': '3 * replicas'}
|
|
# set ring get_more_nodes to yield enough handoffs
|
|
self.policy.object_ring.max_more_nodes = (
|
|
2 * self.policy.object_ring.replicas)
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
conf, 1, other_responses)
|
|
self.assertIsInstance(exc, DiskFileQuarantined)
|
|
self._assert_diskfile_quarantined()
|
|
self._verify_error_lines(1, other_responses, 1)
|
|
|
|
def test_reconstruct_fa_quarantine_threshold_one_rnc_four_replicas(self):
|
|
# verify handoff search exhausting handoff node iter
|
|
num_other_resps = 3 * self.policy.object_ring.replicas - 2
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
conf = {'quarantine_threshold': 1, 'reclaim_age': 0,
|
|
'request_node_count': '4 * replicas'}
|
|
# limit ring get_more_nodes to yield less than
|
|
# (request_node_count - 1 * replicas) nodes
|
|
self.policy.object_ring.max_more_nodes = (
|
|
2 * self.policy.object_ring.replicas)
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
conf, 1, other_responses)
|
|
self.assertIsInstance(exc, DiskFileQuarantined)
|
|
self._assert_diskfile_quarantined()
|
|
self._verify_error_lines(1, other_responses, 1)
|
|
|
|
def test_reconstruct_fa_quarantine_threshold_one_rnc_absolute_number(self):
|
|
def do_test(rnc_num):
|
|
if rnc_num < self.policy.object_ring.replicas:
|
|
num_other_resps = self.policy.object_ring.replicas - 2
|
|
else:
|
|
num_other_resps = rnc_num - 2
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
conf = {'quarantine_threshold': 1, 'reclaim_age': 0,
|
|
'request_node_count': str(rnc_num)}
|
|
# set ring get_more_nodes to yield enough handoffs
|
|
self.policy.object_ring.max_more_nodes = (
|
|
2 * self.policy.object_ring.replicas)
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
conf, 1, other_responses)
|
|
self.assertIsInstance(exc, DiskFileQuarantined)
|
|
self._assert_diskfile_quarantined()
|
|
self._verify_error_lines(1, other_responses, 1)
|
|
|
|
for rnc_num in range(0, 3 * self.policy.object_ring.replicas):
|
|
do_test(rnc_num)
|
|
|
|
def test_reconstruct_fa_quarantine_threshold_two(self):
|
|
num_other_resps = 2 * self.policy.object_ring.replicas - 3
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
conf = {'quarantine_threshold': 2, 'reclaim_age': 0}
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
conf, 2, other_responses)
|
|
self.assertIsInstance(exc, DiskFileQuarantined)
|
|
self._assert_diskfile_quarantined()
|
|
self._verify_error_lines(2, other_responses, 2)
|
|
|
|
def test_reconstruct_fa_quarantine_threshold_two_with_quarantine_age(self):
|
|
num_other_resps = 2 * self.policy.object_ring.replicas - 3
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
conf = {'quarantine_threshold': 2,
|
|
'quarantine_age': 0, # quarantine age trumps reclaim age
|
|
'reclaim_age': 1000}
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
conf, 2, other_responses)
|
|
self.assertIsInstance(exc, DiskFileQuarantined)
|
|
self._assert_diskfile_quarantined()
|
|
self._verify_error_lines(2, other_responses, 2)
|
|
|
|
def test_reconstruct_fa_no_quarantine_more_than_threshold_frags(self):
|
|
# default config
|
|
num_other_resps = self.policy.object_ring.replicas - 2
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'reclaim_age': 0}, 1, other_responses)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
|
|
# configured quarantine_threshold
|
|
for quarantine_threshold in range(self.policy.ec_ndata):
|
|
for num_frags in range(quarantine_threshold + 1,
|
|
self.policy.ec_ndata):
|
|
num_other_resps = (self.policy.object_ring.replicas -
|
|
num_frags - 1)
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': quarantine_threshold,
|
|
'reclaim_age': 0},
|
|
num_frags, other_responses)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(num_frags, other_responses, num_frags)
|
|
warning_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual([], warning_lines)
|
|
|
|
# responses include the frag_index_to_rebuild - verify that response is
|
|
# counted against the threshold
|
|
num_other_resps = self.policy.object_ring.replicas - 3
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1, 'reclaim_age': 0}, 2, other_responses,
|
|
local_frag_index=2, frag_index_to_rebuild=3)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(2, other_responses, 1)
|
|
|
|
def test_reconstruct_fa_no_quarantine_non_404_response(self):
|
|
num_frags = 1
|
|
ring = self.policy.object_ring
|
|
for bad_status in (400, 503, 507):
|
|
# a non-404 in primary responses will prevent quarantine
|
|
num_other_resps = ring.replicas - num_frags - 1
|
|
other_responses = [(404, None, None)] * (num_other_resps - 1)
|
|
other_responses.append((bad_status, None, None))
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1, 'reclaim_age': 0},
|
|
num_frags, other_responses)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(num_frags, other_responses, num_frags)
|
|
warning_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(1, len(warning_lines), warning_lines)
|
|
self.assertIn('Invalid response %s' % bad_status, warning_lines[0])
|
|
|
|
# a non-404 in handoff responses will prevent quarantine; non-404
|
|
# is the *final* handoff response...
|
|
ring.max_more_nodes = (13 * ring.replicas)
|
|
for request_node_count in (2, 3, 13):
|
|
num_other_resps = (request_node_count * ring.replicas
|
|
- num_frags - 1)
|
|
other_responses = [(404, None, None)] * (num_other_resps - 1)
|
|
other_responses.append((bad_status, None, None))
|
|
with annotate_failure(
|
|
'request_node_count=%d' % request_node_count):
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1,
|
|
'reclaim_age': 0,
|
|
'request_node_count': '%s * replicas'
|
|
% request_node_count},
|
|
num_frags, other_responses)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(num_frags, other_responses, num_frags)
|
|
warning_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(1, len(warning_lines), warning_lines)
|
|
self.assertIn('Invalid response %s' % bad_status,
|
|
warning_lines[0])
|
|
|
|
# a non-404 in handoff responses will prevent quarantine; non-404
|
|
# is part way through all handoffs so not all handoffs are used
|
|
# regardless of how big request_node_count is
|
|
non_404_handoff = 3
|
|
for request_node_count in (2, 3, 13):
|
|
# replicas - 1 - num_frags other_responses from primaries,
|
|
# plus a batch of replicas - 1 during which non-404 shows up,
|
|
# plus some that trickle out before the non-404 shows up, but
|
|
# limited to (request_node_count * replicas - num_frags - 1)
|
|
# e.g. for 10+4 policy with request_node_count > 2
|
|
# - batch of 13 requests go to primaries,
|
|
# - 12 other_responses are consumed,
|
|
# - then a batch of 13 handoff requests is sent,
|
|
# - the non-404 is the 4th response in that batch,
|
|
# - so 3 more requests will have been trickled out
|
|
batch_size = ring.replicas - 1
|
|
num_other_resps = min(
|
|
2 * batch_size - num_frags + non_404_handoff,
|
|
request_node_count * ring.replicas - 1 - num_frags)
|
|
other_responses = [(404, None, None)] * (num_other_resps - 1)
|
|
other_responses.insert(
|
|
batch_size - num_frags + non_404_handoff,
|
|
(bad_status, None, None))
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1, 'reclaim_age': 0,
|
|
'request_node_count': '%s * replicas'
|
|
% request_node_count},
|
|
num_frags, other_responses)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(num_frags, other_responses, num_frags)
|
|
warning_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(1, len(warning_lines), warning_lines)
|
|
self.assertIn('Invalid response %s' % bad_status,
|
|
warning_lines[0])
|
|
|
|
def test_reconstruct_fa_no_quarantine_frag_not_old_enough(self):
|
|
# verify that solitary fragment is not quarantined if it has not
|
|
# reached reclaim_age
|
|
num_other_resps = self.policy.object_ring.replicas - 2
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1, 'reclaim_age': 10000},
|
|
1, other_responses)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(1, other_responses, 1)
|
|
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1,
|
|
'quarantine_age': 10000, # quarantine_age trumps reclaim_age
|
|
'reclaim_age': 0},
|
|
1, other_responses)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(1, other_responses, 1)
|
|
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1}, # default reclaim_age
|
|
1, other_responses)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(1, other_responses, 1)
|
|
|
|
def test_reconstruct_fa_no_quarantine_frag_resp_different_timestamp(self):
|
|
# verify that solitary fragment is not quarantined if the only frag
|
|
# response is for a different timestamp than the local frag
|
|
resp_timestamp = utils.Timestamp(float(self.obj_timestamp) + 1)
|
|
num_other_resps = self.policy.object_ring.replicas - 2
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
resp_timestamps = [resp_timestamp] * (num_other_resps + 1)
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1, 'reclaim_age': 0},
|
|
1, other_responses, resp_timestamps=resp_timestamps)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(1, other_responses, 1)
|
|
|
|
def test_reconstruct_fa_no_quarantine_frag_resp_mixed_timestamps(self):
|
|
# verify that solitary fragment is not quarantined if there is a
|
|
# response for a frag at different timestamp in addition to the
|
|
# response for the solitary local frag
|
|
resp_timestamp = utils.Timestamp(float(self.obj_timestamp) + 1)
|
|
num_other_resps = self.policy.object_ring.replicas - 3
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
resp_timestamps = ([self.obj_timestamp] +
|
|
[resp_timestamp] * (num_other_resps + 1))
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1, 'reclaim_age': 0},
|
|
2, other_responses, resp_timestamps=resp_timestamps)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(3, len(error_lines), error_lines)
|
|
self.assertIn(
|
|
'Unable to get enough responses (1/%d from 1 ok responses)'
|
|
% (self.policy.ec_ndata,), error_lines[0])
|
|
self.assertIn(
|
|
'Unable to get enough responses (1/%d from 1 ok responses)'
|
|
% (self.policy.ec_ndata,), error_lines[1])
|
|
self.assertIn(
|
|
'Unable to get enough responses (%d x 404 error responses)'
|
|
% num_other_resps, error_lines[2])
|
|
|
|
def test_reconstruct_fa_no_quarantine_frag_resp_mixed_etags(self):
|
|
# verify that solitary fragment is not quarantined if there is a
|
|
# response for a frag with different etag in addition to the
|
|
# response for the solitary local frag
|
|
etags = [None, 'unexpected_etag']
|
|
num_other_resps = self.policy.object_ring.replicas - 3
|
|
other_responses = [(404, None, None)] * num_other_resps
|
|
exc = self._do_test_reconstruct_insufficient_frags(
|
|
{'quarantine_threshold': 1, 'reclaim_age': 0},
|
|
2, other_responses, resp_etags=etags)
|
|
self.assertIsInstance(exc, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(3, len(error_lines), error_lines)
|
|
self.assertIn(
|
|
'Mixed Etag', error_lines[0])
|
|
self.assertIn(
|
|
'Unable to get enough responses (1/%d from 2 ok responses)'
|
|
% (self.policy.ec_ndata,), error_lines[1])
|
|
self.assertIn(
|
|
'Unable to get enough responses (%d x 404 error responses)'
|
|
% num_other_resps, error_lines[2])
|
|
|
|
def _do_test_reconstruct_fa_no_quarantine_bad_headers(self, bad_headers):
|
|
# verify that responses with invalid headers count against the
|
|
# quarantine_threshold
|
|
self._configure_reconstructor(reclaim_age=0, quarantine_threshold=1)
|
|
local_frag_index = 2
|
|
self._create_fragment(local_frag_index)
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[0]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
def make_header(body):
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
return headers
|
|
|
|
responses = []
|
|
body = ec_archive_bodies[2]
|
|
headers = make_header(body)
|
|
responses.append((200, body, headers))
|
|
body = ec_archive_bodies[3]
|
|
headers = make_header(body)
|
|
headers.update(bad_headers)
|
|
responses.append((200, body, headers))
|
|
other_responses = ([(404, None, None)] *
|
|
(self.policy.object_ring.replicas - 3))
|
|
codes, body_iter, headers = zip(*(responses + other_responses))
|
|
resp_timestamps = [self.obj_timestamp] * len(codes)
|
|
resp_timestamps = [ts.internal for ts in resp_timestamps]
|
|
with mocked_http_conn(*codes, body_iter=body_iter,
|
|
headers=headers,
|
|
timestamps=resp_timestamps):
|
|
with self.assertRaises(DiskFileError) as cm:
|
|
self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
self.assertIsInstance(cm.exception, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(2, len(error_lines), error_lines)
|
|
self.assertIn(
|
|
'Unable to get enough responses (1/%d from 1 ok responses)'
|
|
% (self.policy.ec_ndata,), error_lines[0])
|
|
self.assertIn(
|
|
'Unable to get enough responses '
|
|
'(1 x unknown, %d x 404 error responses)'
|
|
% len(other_responses), error_lines[1])
|
|
|
|
def test_reconstruct_fa_no_quarantine_invalid_frag_index_header(self):
|
|
self._do_test_reconstruct_fa_no_quarantine_bad_headers(
|
|
{'X-Object-Sysmeta-Ec-Frag-Index': 'two'})
|
|
|
|
def test_reconstruct_fa_no_quarantine_missing_frag_index_header(self):
|
|
self._do_test_reconstruct_fa_no_quarantine_bad_headers(
|
|
{'X-Object-Sysmeta-Ec-Frag-Index': ''})
|
|
|
|
def test_reconstruct_fa_no_quarantine_missing_timestamp_header(self):
|
|
self._do_test_reconstruct_fa_no_quarantine_bad_headers(
|
|
{'X-Backend-Data-Timestamp': ''})
|
|
|
|
def test_reconstruct_fa_no_quarantine_missing_etag_header(self):
|
|
self._do_test_reconstruct_fa_no_quarantine_bad_headers(
|
|
{'X-Object-Sysmeta-Ec-Etag': ''})
|
|
|
|
def test_reconstruct_fa_frags_on_handoffs(self):
|
|
# just a lonely old frag on primaries: this appears to be a quarantine
|
|
# candidate, but unexpectedly the other frags are found on handoffs so
|
|
# expect rebuild
|
|
# set reclaim_age to 0 to make lonely frag old enugh for quarantine
|
|
self._configure_reconstructor(quarantine_threshold=1, reclaim_age=0)
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
broken_body = ec_archive_bodies.pop(1)
|
|
|
|
# arrange for just one 200 to come from a primary, then 404s, then 200s
|
|
# from handoffs
|
|
responses = list()
|
|
for i, body in enumerate(ec_archive_bodies):
|
|
if i == 1:
|
|
# skip: this is the frag index we're rebuilding; insert 404s
|
|
responses.extend(
|
|
((404, None, None),) * self.policy.object_ring.replicas)
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
responses.append((200, body, headers))
|
|
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(
|
|
*codes, body_iter=body_iter, headers=headers,
|
|
timestamps=[self.obj_timestamp.internal] * len(codes)):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(0, body=b''))
|
|
self.assertEqual(0, df.content_length)
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
# no error and warning
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
debug_lines = self.logger.get_lines_for_level('debug')
|
|
self.assertIn('Reconstructing frag from handoffs, node_count=%d'
|
|
% (self.policy.object_ring.replicas * 2), debug_lines)
|
|
|
|
def test_reconstruct_fa_finds_duplicate_does_not_fail(self):
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(1)
|
|
# add some duplicates
|
|
num_duplicates = self.policy.ec_nparity - 1
|
|
ec_archive_bodies = (ec_archive_bodies[:num_duplicates] +
|
|
ec_archive_bodies)[:-num_duplicates]
|
|
|
|
def make_header(body):
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
return headers
|
|
|
|
responses = [(200, body, make_header(body))
|
|
for body in ec_archive_bodies]
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# no error and warning
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
self.assertFalse(self.logger.get_lines_for_level('warning'))
|
|
debug_log_lines = self.logger.get_lines_for_level('debug')
|
|
self.assertEqual(1, len(debug_log_lines))
|
|
expected_prefix = 'Reconstruct frag #1 with frag indexes'
|
|
self.assertIn(expected_prefix, debug_log_lines[0])
|
|
got_frag_index_list = json.loads(
|
|
debug_log_lines[0][len(expected_prefix):])
|
|
self.assertNotIn(1, got_frag_index_list)
|
|
|
|
def test_reconstruct_fa_missing_headers(self):
|
|
# This is much negative tests asserting when the expected
|
|
# headers are missing in the responses to gather fragments
|
|
# to reconstruct
|
|
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(1)
|
|
|
|
def make_header(body):
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update(
|
|
{'X-Object-Sysmeta-Ec-Etag': etag,
|
|
'X-Backend-Timestamp': self.obj_timestamp.internal})
|
|
return headers
|
|
|
|
def test_missing_header(missing_header, warning_extra):
|
|
self.logger._clear()
|
|
responses = [(200, body, make_header(body))
|
|
for body in ec_archive_bodies]
|
|
|
|
# To drop the header from the response[0], set None as the value
|
|
# explicitly instead of deleting the key because if no key exists
|
|
# in the dict, fake_http_connect will insert some key/value pairs
|
|
# automatically (e.g. X-Backend-Timestamp)
|
|
responses[0][2].update({missing_header: None})
|
|
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(
|
|
*codes, body_iter=body_iter, headers=headers) as mock_conn:
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# no errors
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
# ...but warning for the missing header
|
|
warning_log_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(1, len(warning_log_lines))
|
|
|
|
path = unquote(
|
|
'%(ip)s:%(port)d%(path)s' % mock_conn.requests[0]
|
|
)
|
|
expected_warning = 'Invalid resp from %s policy#0%s' % (
|
|
path, warning_extra)
|
|
if six.PY2:
|
|
expected_warning = expected_warning.decode('utf8')
|
|
self.assertIn(expected_warning, warning_log_lines)
|
|
|
|
test_missing_header(
|
|
'X-Object-Sysmeta-Ec-Frag-Index',
|
|
' (invalid X-Object-Sysmeta-Ec-Frag-Index: None)')
|
|
test_missing_header(
|
|
'X-Object-Sysmeta-Ec-Etag',
|
|
', frag index 0 (missing Etag)')
|
|
test_missing_header(
|
|
'X-Backend-Timestamp',
|
|
', frag index 0 (missing X-Backend-Data-Timestamp and '
|
|
'X-Backend-Timestamp)')
|
|
|
|
def test_reconstruct_fa_invalid_frag_index_headers(self):
|
|
# This is much negative tests asserting when the expected
|
|
# ec frag index header has invalid value in the responses
|
|
# to gather fragments to reconstruct
|
|
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[1]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(1)
|
|
|
|
def make_header(body):
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
return headers
|
|
|
|
def test_invalid_ec_frag_index_header(invalid_frag_index):
|
|
self.logger._clear()
|
|
responses = [(200, body, make_header(body))
|
|
for body in ec_archive_bodies]
|
|
|
|
responses[0][2].update({
|
|
'X-Object-Sysmeta-Ec-Frag-Index': invalid_frag_index})
|
|
|
|
codes, body_iter, headers = zip(*responses)
|
|
with mocked_http_conn(
|
|
*codes, body_iter=body_iter, headers=headers) as mock_conn:
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
|
|
# no errors
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
# ...but warning for the invalid header
|
|
warning_log_lines = self.logger.get_lines_for_level('warning')
|
|
self.assertEqual(1, len(warning_log_lines))
|
|
|
|
path = unquote(
|
|
'%(ip)s:%(port)d%(path)s' % mock_conn.requests[0]
|
|
)
|
|
expected_warning = (
|
|
'Invalid resp from %s policy#0 '
|
|
'(invalid X-Object-Sysmeta-Ec-Frag-Index: %r)'
|
|
% (path, invalid_frag_index))
|
|
if six.PY2:
|
|
expected_warning = expected_warning.decode('utf8')
|
|
self.assertIn(expected_warning, warning_log_lines)
|
|
|
|
for value in ('None', 'invalid'):
|
|
test_invalid_ec_frag_index_header(value)
|
|
|
|
|
|
@patch_policies(with_ec_default=True)
|
|
class TestReconstructFragmentArchiveUTF8(TestReconstructFragmentArchive):
|
|
# repeat superclass tests with an object path that contains non-ascii chars
|
|
obj_name = b'o\xc3\xa8'
|
|
|
|
|
|
@patch_policies([ECStoragePolicy(0, name='ec', is_default=True,
|
|
ec_type=DEFAULT_TEST_EC_TYPE,
|
|
ec_ndata=10, ec_nparity=4,
|
|
ec_segment_size=4096,
|
|
ec_duplication_factor=2),
|
|
StoragePolicy(1, name='other')],
|
|
fake_ring_args=[{'replicas': 28}, {'replicas': 3}])
|
|
class TestReconstructFragmentArchiveECDuplicationFactor(
|
|
TestReconstructFragmentArchive):
|
|
def test_reconstruct_fa_no_quarantine_duplicate_frags(self):
|
|
# verify that quarantine does not happen if the only other response in
|
|
# addition to the lonely frag's own response is for the same
|
|
# (duplicate) frag index
|
|
self._configure_reconstructor(quarantine_threshold=1, reclaim_age=0)
|
|
local_frag_index = 2
|
|
self._create_fragment(local_frag_index)
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[0]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
frags = [
|
|
ec_archive_bodies[local_frag_index],
|
|
ec_archive_bodies[local_frag_index +
|
|
self.policy.ec_n_unique_fragments]]
|
|
|
|
def make_header(body):
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
return headers
|
|
|
|
responses = [(200, frag, make_header(frag)) for frag in frags]
|
|
other_responses = ([(404, None, None)] *
|
|
(self.policy.ec_n_unique_fragments * 2 - 3))
|
|
codes, body_iter, headers = zip(*(responses + other_responses))
|
|
resp_timestamps = [self.obj_timestamp.internal] * len(codes)
|
|
with mocked_http_conn(*codes, body_iter=body_iter,
|
|
headers=headers,
|
|
timestamps=resp_timestamps):
|
|
with self.assertRaises(DiskFileError) as cm:
|
|
self.reconstructor.reconstruct_fa(
|
|
job, node, self._create_fragment(2))
|
|
self.assertIsInstance(cm.exception, DiskFileError)
|
|
self._assert_diskfile_not_quarantined()
|
|
self._verify_error_lines(2, other_responses, 1)
|
|
|
|
|
|
@patch_policies([ECStoragePolicy(0, name='ec', is_default=True,
|
|
ec_type=DEFAULT_TEST_EC_TYPE,
|
|
ec_ndata=10, ec_nparity=4,
|
|
ec_segment_size=4096,
|
|
ec_duplication_factor=2),
|
|
StoragePolicy(1, name='other')],
|
|
fake_ring_args=[{'replicas': 28}, {'replicas': 3}])
|
|
class TestObjectReconstructorECDuplicationFactor(TestObjectReconstructor):
|
|
def setUp(self):
|
|
super(TestObjectReconstructorECDuplicationFactor, self).setUp()
|
|
self.fabricated_ring = FabricatedRing(replicas=28, devices=56)
|
|
|
|
def _test_reconstruct_with_duplicate_frags_no_errors(self, index):
|
|
utils.mkdirs(os.path.join(self.devices, 'sda1'))
|
|
df_mgr = self.reconstructor._df_router[self.policy]
|
|
df = df_mgr.get_diskfile('sda1', 9, 'a', 'c', 'o',
|
|
policy=self.policy)
|
|
write_diskfile(df, self.ts(), data=b'', frag_index=2)
|
|
df.open()
|
|
|
|
job = {
|
|
'partition': 0,
|
|
'policy': self.policy,
|
|
}
|
|
part_nodes = self.policy.object_ring.get_part_nodes(0)
|
|
node = part_nodes[index]
|
|
node['backend_index'] = self.policy.get_backend_index(node['index'])
|
|
|
|
test_data = (b'rebuild' * self.policy.ec_segment_size)[:-777]
|
|
etag = md5(test_data, usedforsecurity=False).hexdigest()
|
|
ec_archive_bodies = encode_frag_archive_bodies(self.policy, test_data)
|
|
|
|
broken_body = ec_archive_bodies.pop(index)
|
|
|
|
responses = list()
|
|
for body in ec_archive_bodies:
|
|
headers = get_header_frag_index(self, body)
|
|
headers.update({'X-Object-Sysmeta-Ec-Etag': etag})
|
|
responses.append((200, body, headers))
|
|
|
|
# make a hook point at
|
|
# swift.obj.reconstructor.ObjectReconstructor._get_response
|
|
called_headers = []
|
|
orig_func = object_reconstructor.ObjectReconstructor._get_response
|
|
|
|
def _get_response_hook(self, node, policy, part, path, headers):
|
|
called_headers.append(headers)
|
|
return orig_func(self, node, policy, part, path, headers)
|
|
|
|
# need parity + 1 node failures to reach duplicated fragments
|
|
failed_start_at = (
|
|
self.policy.ec_n_unique_fragments - self.policy.ec_nparity - 1)
|
|
|
|
# set Timeout for node #9, #10, #11, #12, #13
|
|
for i in range(self.policy.ec_nparity + 1):
|
|
responses[failed_start_at + i] = (Timeout(), '', '')
|
|
|
|
codes, body_iter, headers = zip(*responses)
|
|
get_response_path = \
|
|
'swift.obj.reconstructor.ObjectReconstructor._get_response'
|
|
with mock.patch(get_response_path, _get_response_hook):
|
|
with mocked_http_conn(
|
|
*codes, body_iter=body_iter, headers=headers):
|
|
df = self.reconstructor.reconstruct_fa(
|
|
job, node, df)
|
|
fixed_body = b''.join(df.reader())
|
|
self.assertEqual(len(fixed_body), len(broken_body))
|
|
self.assertEqual(
|
|
md5(fixed_body, usedforsecurity=False).hexdigest(),
|
|
md5(broken_body, usedforsecurity=False).hexdigest())
|
|
for called_header in called_headers:
|
|
called_header = HeaderKeyDict(called_header)
|
|
self.assertIn('Content-Length', called_header)
|
|
self.assertEqual(called_header['Content-Length'], '0')
|
|
self.assertIn('User-Agent', called_header)
|
|
user_agent = called_header['User-Agent']
|
|
self.assertTrue(user_agent.startswith('obj-reconstructor'))
|
|
|
|
def test_reconstruct_with_duplicate_frags_no_errors(self):
|
|
# any fragments can be broken
|
|
for index in range(28):
|
|
self._test_reconstruct_with_duplicate_frags_no_errors(index)
|
|
|
|
def test_iter_nodes_for_frag(self):
|
|
self.reconstructor.rebuild_handoff_node_count = -1
|
|
policy = ECStoragePolicy(1, name='test', ec_type=DEFAULT_TEST_EC_TYPE,
|
|
ec_ndata=4, ec_nparity=3,
|
|
ec_duplication_factor=2)
|
|
policy.object_ring = FabricatedRing(replicas=14, devices=42)
|
|
primaries = policy.object_ring.get_part_nodes(0)
|
|
|
|
node = primaries[0]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [0, 0, 7, 14, 21]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(0, node['backend_index'])
|
|
|
|
node = primaries[3]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [3, 3, 10, 17, 24]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(3, node['backend_index'])
|
|
|
|
node = primaries[7]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [7, 0, 7, 14, 21]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(0, node['backend_index'])
|
|
|
|
node = primaries[-1]
|
|
nodes_for_frag = list(self.reconstructor._iter_nodes_for_frag(
|
|
policy, 0, node))
|
|
expected = [13, 6, 13, 20, 27]
|
|
self.assertEqual(expected, [n.get('index', n.get('handoff_index'))
|
|
for n in nodes_for_frag])
|
|
for node in nodes_for_frag:
|
|
self.assertEqual(6, node['backend_index'])
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|