Expose bulk-operation limits in /info.

These will allow clients to perform the minimal number of requests
required to accomplish some bulk tasks. For example, a client with
many objects to delete can learn that the cluster's limit on
deletes-per-request is, say, 128, and then batch up their deletes in
groups of 128. Without this, the client has to either discover the
limit out-of-band somehow (and get notified if it changes), or do some
sort of binary search to figure out the limit.

Similar reasoning applies to the containers-per-request value.

The errors-per-request values are included so that clients may size
their requests such that everything is attempted regardless of
failure.

I split the 'bulk' entry into 'bulk_delete' and 'bulk_upload' because,
from a client's standpoint, they're separate operations. It so happens
that Swift implements both in one piece of middleware, but clients
don't care.

Bonus fix: documented a missing config setting for the bulk middleware.

Change-Id: Ic3549aef79682fd5b798145c3545c1609aa1592b
This commit is contained in:
Samuel Merritt 2013-11-27 18:24:17 -08:00
parent 438596fc0f
commit 0d45e99ff0
3 changed files with 58 additions and 12 deletions

View File

@ -492,6 +492,7 @@ use = egg:swift#bulk
# max_containers_per_extraction = 10000 # max_containers_per_extraction = 10000
# max_failed_extractions = 1000 # max_failed_extractions = 1000
# max_deletes_per_request = 10000 # max_deletes_per_request = 10000
# max_failed_deletes = 1000
# yield_frequency = 60 # yield_frequency = 60
# Note: Put after auth in the pipeline. # Note: Put after auth in the pipeline.

View File

@ -184,18 +184,16 @@ class Bulk(object):
payload sent to the proxy (the list of objects/containers to be deleted). payload sent to the proxy (the list of objects/containers to be deleted).
""" """
def __init__(self, app, conf): def __init__(self, app, conf, max_containers_per_extraction=10000,
max_failed_extractions=1000, max_deletes_per_request=10000,
max_failed_deletes=1000, yield_frequency=60):
self.app = app self.app = app
self.logger = get_logger(conf, log_route='bulk') self.logger = get_logger(conf, log_route='bulk')
self.max_containers = int( self.max_containers = max_containers_per_extraction
conf.get('max_containers_per_extraction', 10000)) self.max_failed_extractions = max_failed_extractions
self.max_failed_extractions = int( self.max_failed_deletes = max_failed_deletes
conf.get('max_failed_extractions', 1000)) self.max_deletes_per_request = max_deletes_per_request
self.max_failed_deletes = int( self.yield_frequency = yield_frequency
conf.get('max_failed_deletes', 1000))
self.max_deletes_per_request = int(
conf.get('max_deletes_per_request', 10000))
self.yield_frequency = int(conf.get('yield_frequency', 60))
def create_container(self, req, container_path): def create_container(self, req, container_path):
""" """
@ -542,8 +540,29 @@ class Bulk(object):
def filter_factory(global_conf, **local_conf): def filter_factory(global_conf, **local_conf):
conf = global_conf.copy() conf = global_conf.copy()
conf.update(local_conf) conf.update(local_conf)
register_swift_info('bulk')
max_containers_per_extraction = \
int(conf.get('max_containers_per_extraction', 10000))
max_failed_extractions = int(conf.get('max_failed_extractions', 1000))
max_deletes_per_request = int(conf.get('max_deletes_per_request', 10000))
max_failed_deletes = int(conf.get('max_failed_deletes', 1000))
yield_frequency = int(conf.get('yield_frequency', 60))
register_swift_info(
'bulk_upload',
max_containers_per_extraction=max_containers_per_extraction,
max_failed_extractions=max_failed_extractions)
register_swift_info(
'bulk_delete',
max_deletes_per_request=max_deletes_per_request,
max_failed_deletes=max_failed_deletes)
def bulk_filter(app): def bulk_filter(app):
return Bulk(app, conf) return Bulk(
app, conf,
max_containers_per_extraction=max_containers_per_extraction,
max_failed_extractions=max_failed_extractions,
max_deletes_per_request=max_deletes_per_request,
max_failed_deletes=max_failed_deletes,
yield_frequency=yield_frequency)
return bulk_filter return bulk_filter

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numbers
import unittest import unittest
import os import os
import tarfile import tarfile
@ -22,6 +23,7 @@ from shutil import rmtree
from tempfile import mkdtemp from tempfile import mkdtemp
from StringIO import StringIO from StringIO import StringIO
from mock import patch from mock import patch
from swift.common import utils
from swift.common.middleware import bulk from swift.common.middleware import bulk
from swift.common.swob import Request, Response, HTTPException from swift.common.swob import Request, Response, HTTPException
from swift.common.http import HTTP_NOT_FOUND, HTTP_UNAUTHORIZED from swift.common.http import HTTP_NOT_FOUND, HTTP_UNAUTHORIZED
@ -736,5 +738,29 @@ class TestDelete(unittest.TestCase):
['/c/f2', '401 Unauthorized']]) ['/c/f2', '401 Unauthorized']])
class TestSwiftInfo(unittest.TestCase):
def setUp(self):
utils._swift_info = {}
utils._swift_admin_info = {}
def test_registered_defaults(self):
bulk.filter_factory({})
swift_info = utils.get_swift_info()
self.assertTrue('bulk_upload' in swift_info)
self.assertTrue(isinstance(
swift_info['bulk_upload'].get('max_containers_per_extraction'),
numbers.Integral))
self.assertTrue(isinstance(
swift_info['bulk_upload'].get('max_failed_extractions'),
numbers.Integral))
self.assertTrue('bulk_delete' in swift_info)
self.assertTrue(isinstance(
swift_info['bulk_delete'].get('max_deletes_per_request'),
numbers.Integral))
self.assertTrue(isinstance(
swift_info['bulk_delete'].get('max_failed_deletes'),
numbers.Integral))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()