Merge "Limit number of revert tombstone SSYNC requests"
This commit is contained in:
commit
d46b0f29f9
@ -872,14 +872,18 @@ class ObjectReconstructor(Daemon):
|
||||
# push partitions off this node, but none of the suffixes
|
||||
# have any data fragments to hint at which node would be a
|
||||
# good candidate to receive the tombstones.
|
||||
#
|
||||
# we'll check a sample of other primaries before we delete our
|
||||
# local tombstones, the exact number doesn't matter as long as
|
||||
# it's enough to ensure the tombstones are not lost and less
|
||||
# than *all the replicas*
|
||||
nsample = (policy.ec_n_unique_fragments *
|
||||
policy.ec_duplication_factor) - policy.ec_ndata + 1
|
||||
jobs.append(build_job(
|
||||
job_type=REVERT,
|
||||
frag_index=None,
|
||||
suffixes=non_data_fragment_suffixes,
|
||||
# this is super safe
|
||||
sync_to=part_nodes,
|
||||
# something like this would be probably be better
|
||||
# sync_to=random.sample(part_nodes, 3),
|
||||
sync_to=random.sample(part_nodes, nsample)
|
||||
))
|
||||
# return a list of jobs for this part
|
||||
return jobs
|
||||
|
@ -173,7 +173,7 @@ class TestReconstructorRevert(ECProbeTest):
|
||||
client.put_object(self.url, self.token, self.container_name,
|
||||
self.object_name, contents=contents)
|
||||
|
||||
# now lets shut down a couple primaries
|
||||
# now lets shut down a couple of primaries
|
||||
failed_nodes = random.sample(onodes, 2)
|
||||
for node in failed_nodes:
|
||||
self.kill_drive(self.device_dir('object', node))
|
||||
@ -197,61 +197,26 @@ class TestReconstructorRevert(ECProbeTest):
|
||||
self.fail('Node data on %r was not fully destroyed!' %
|
||||
(node,))
|
||||
|
||||
# repair the first primary
|
||||
self.revive_drive(self.device_dir('object', failed_nodes[0]))
|
||||
|
||||
# run the reconstructor on the handoffs nodes, if there are no data
|
||||
# frags to hint at the node index - each hnode syncs to all primaries
|
||||
for hnode in hnodes:
|
||||
self.reconstructor.once(number=self.config_number(hnode))
|
||||
|
||||
# because not all primaries are online, the tombstones remain
|
||||
for hnode in hnodes:
|
||||
# run the reconstructor on the handoff node multiple times until
|
||||
# tombstone is pushed out - each handoff node syncs to a few
|
||||
# primaries each time
|
||||
iterations = 0
|
||||
while iterations < 52:
|
||||
self.reconstructor.once(number=self.config_number(hnodes[0]))
|
||||
iterations += 1
|
||||
# see if the tombstone is reverted
|
||||
try:
|
||||
self.direct_get(hnode, opart)
|
||||
self.direct_get(hnodes[0], opart)
|
||||
except direct_client.DirectClientException as err:
|
||||
self.assertEqual(err.http_status, 404)
|
||||
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
|
||||
delete_timestamp)
|
||||
else:
|
||||
self.fail('Found obj data on %r' % hnode)
|
||||
|
||||
# ... but it's on the first failed (now repaired) primary
|
||||
try:
|
||||
self.direct_get(failed_nodes[0], opart)
|
||||
except direct_client.DirectClientException as err:
|
||||
self.assertEqual(err.http_status, 404)
|
||||
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
|
||||
delete_timestamp)
|
||||
if 'X-Backend-Timestamp' not in err.http_headers:
|
||||
# this means the tombstone is *gone* so it's reverted
|
||||
break
|
||||
else:
|
||||
self.fail('Found obj data on %r' % failed_nodes[0])
|
||||
self.fail('Still found tombstone on %r after %s iterations' % (
|
||||
hnodes[0], iterations))
|
||||
|
||||
# repair the second primary
|
||||
self.revive_drive(self.device_dir('object', failed_nodes[1]))
|
||||
|
||||
# run the reconstructor on the *first* handoff node
|
||||
self.reconstructor.once(number=self.config_number(hnodes[0]))
|
||||
|
||||
# make sure it's tombstone was pushed out
|
||||
try:
|
||||
self.direct_get(hnodes[0], opart)
|
||||
except direct_client.DirectClientException as err:
|
||||
self.assertEqual(err.http_status, 404)
|
||||
self.assertNotIn('X-Backend-Timestamp', err.http_headers)
|
||||
else:
|
||||
self.fail('Found obj data on %r' % hnodes[0])
|
||||
|
||||
# ... and now it's on the second failed primary too!
|
||||
try:
|
||||
self.direct_get(failed_nodes[1], opart)
|
||||
except direct_client.DirectClientException as err:
|
||||
self.assertEqual(err.http_status, 404)
|
||||
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
|
||||
delete_timestamp)
|
||||
else:
|
||||
self.fail('Found obj data on %r' % failed_nodes[1])
|
||||
|
||||
# ... but still on the second handoff node
|
||||
# tombstone is still on the *second* handoff
|
||||
try:
|
||||
self.direct_get(hnodes[1], opart)
|
||||
except direct_client.DirectClientException as err:
|
||||
@ -261,10 +226,14 @@ class TestReconstructorRevert(ECProbeTest):
|
||||
else:
|
||||
self.fail('Found obj data on %r' % hnodes[1])
|
||||
|
||||
# ... until it's next sync
|
||||
# repair the primaries
|
||||
self.revive_drive(self.device_dir('object', failed_nodes[0]))
|
||||
self.revive_drive(self.device_dir('object', failed_nodes[1]))
|
||||
|
||||
# run reconstructor on second handoff
|
||||
self.reconstructor.once(number=self.config_number(hnodes[1]))
|
||||
|
||||
# ... then it's tombstone is pushed off too!
|
||||
# verify tombstone is reverted on the first pass
|
||||
try:
|
||||
self.direct_get(hnodes[1], opart)
|
||||
except direct_client.DirectClientException as err:
|
||||
|
@ -2086,7 +2086,12 @@ class TestObjectReconstructor(BaseTestObjectReconstructor):
|
||||
'local_dev': self.local_dev,
|
||||
'device': self.local_dev['device'],
|
||||
}
|
||||
self.assertEqual(ring.replica_count, len(job['sync_to']))
|
||||
self.assertEqual(ring.replica_count, len(part_nodes))
|
||||
expected_samples = (
|
||||
(self.policy.ec_n_unique_fragments *
|
||||
self.policy.ec_duplication_factor) -
|
||||
self.policy.ec_ndata + 1)
|
||||
self.assertEqual(len(job['sync_to']), expected_samples)
|
||||
for k, v in expected.items():
|
||||
msg = 'expected %s != %s for %s' % (
|
||||
v, job[k], k)
|
||||
|
Loading…
x
Reference in New Issue
Block a user