Merge "Limit number of revert tombstone SSYNC requests"

This commit is contained in:
Jenkins 2017-06-08 18:10:07 +00:00 committed by Gerrit Code Review
commit d46b0f29f9
3 changed files with 36 additions and 58 deletions

View File

@ -872,14 +872,18 @@ class ObjectReconstructor(Daemon):
# push partitions off this node, but none of the suffixes
# have any data fragments to hint at which node would be a
# good candidate to receive the tombstones.
#
# we'll check a sample of other primaries before we delete our
# local tombstones, the exact number doesn't matter as long as
# it's enough to ensure the tombstones are not lost and less
# than *all the replicas*
nsample = (policy.ec_n_unique_fragments *
policy.ec_duplication_factor) - policy.ec_ndata + 1
jobs.append(build_job(
job_type=REVERT,
frag_index=None,
suffixes=non_data_fragment_suffixes,
# this is super safe
sync_to=part_nodes,
# something like this would be probably be better
# sync_to=random.sample(part_nodes, 3),
sync_to=random.sample(part_nodes, nsample)
))
# return a list of jobs for this part
return jobs

View File

@ -173,7 +173,7 @@ class TestReconstructorRevert(ECProbeTest):
client.put_object(self.url, self.token, self.container_name,
self.object_name, contents=contents)
# now lets shut down a couple primaries
# now lets shut down a couple of primaries
failed_nodes = random.sample(onodes, 2)
for node in failed_nodes:
self.kill_drive(self.device_dir('object', node))
@ -197,61 +197,26 @@ class TestReconstructorRevert(ECProbeTest):
self.fail('Node data on %r was not fully destroyed!' %
(node,))
# repair the first primary
self.revive_drive(self.device_dir('object', failed_nodes[0]))
# run the reconstructor on the handoffs nodes, if there are no data
# frags to hint at the node index - each hnode syncs to all primaries
for hnode in hnodes:
self.reconstructor.once(number=self.config_number(hnode))
# because not all primaries are online, the tombstones remain
for hnode in hnodes:
# run the reconstructor on the handoff node multiple times until
# tombstone is pushed out - each handoff node syncs to a few
# primaries each time
iterations = 0
while iterations < 52:
self.reconstructor.once(number=self.config_number(hnodes[0]))
iterations += 1
# see if the tombstone is reverted
try:
self.direct_get(hnode, opart)
self.direct_get(hnodes[0], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
delete_timestamp)
else:
self.fail('Found obj data on %r' % hnode)
# ... but it's on the first failed (now repaired) primary
try:
self.direct_get(failed_nodes[0], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
delete_timestamp)
if 'X-Backend-Timestamp' not in err.http_headers:
# this means the tombstone is *gone* so it's reverted
break
else:
self.fail('Found obj data on %r' % failed_nodes[0])
self.fail('Still found tombstone on %r after %s iterations' % (
hnodes[0], iterations))
# repair the second primary
self.revive_drive(self.device_dir('object', failed_nodes[1]))
# run the reconstructor on the *first* handoff node
self.reconstructor.once(number=self.config_number(hnodes[0]))
# make sure it's tombstone was pushed out
try:
self.direct_get(hnodes[0], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertNotIn('X-Backend-Timestamp', err.http_headers)
else:
self.fail('Found obj data on %r' % hnodes[0])
# ... and now it's on the second failed primary too!
try:
self.direct_get(failed_nodes[1], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
delete_timestamp)
else:
self.fail('Found obj data on %r' % failed_nodes[1])
# ... but still on the second handoff node
# tombstone is still on the *second* handoff
try:
self.direct_get(hnodes[1], opart)
except direct_client.DirectClientException as err:
@ -261,10 +226,14 @@ class TestReconstructorRevert(ECProbeTest):
else:
self.fail('Found obj data on %r' % hnodes[1])
# ... until it's next sync
# repair the primaries
self.revive_drive(self.device_dir('object', failed_nodes[0]))
self.revive_drive(self.device_dir('object', failed_nodes[1]))
# run reconstructor on second handoff
self.reconstructor.once(number=self.config_number(hnodes[1]))
# ... then it's tombstone is pushed off too!
# verify tombstone is reverted on the first pass
try:
self.direct_get(hnodes[1], opart)
except direct_client.DirectClientException as err:

View File

@ -2086,7 +2086,12 @@ class TestObjectReconstructor(BaseTestObjectReconstructor):
'local_dev': self.local_dev,
'device': self.local_dev['device'],
}
self.assertEqual(ring.replica_count, len(job['sync_to']))
self.assertEqual(ring.replica_count, len(part_nodes))
expected_samples = (
(self.policy.ec_n_unique_fragments *
self.policy.ec_duplication_factor) -
self.policy.ec_ndata + 1)
self.assertEqual(len(job['sync_to']), expected_samples)
for k, v in expected.items():
msg = 'expected %s != %s for %s' % (
v, job[k], k)