Merge "Fix lazy-listing of object segments."

This commit is contained in:
Jenkins 2012-11-19 15:28:20 +00:00 committed by Gerrit Code Review
commit cb55f0c3a8
2 changed files with 171 additions and 14 deletions

View File

@ -24,10 +24,7 @@
# These shenanigans are to ensure all related objects can be garbage # These shenanigans are to ensure all related objects can be garbage
# collected. We've seen objects hang around forever otherwise. # collected. We've seen objects hang around forever otherwise.
try: import itertools
import simplejson as json
except ImportError:
import json
import mimetypes import mimetypes
import re import re
import time import time
@ -41,7 +38,7 @@ from eventlet.queue import Queue
from eventlet.timeout import Timeout from eventlet.timeout import Timeout
from swift.common.utils import ContextPool, normalize_timestamp, \ from swift.common.utils import ContextPool, normalize_timestamp, \
config_true_value, public config_true_value, public, json
from swift.common.bufferedhttp import http_connect from swift.common.bufferedhttp import http_connect
from swift.common.constraints import check_metadata, check_object_creation, \ from swift.common.constraints import check_metadata, check_object_creation, \
CONTAINER_LISTING_LIMIT, MAX_FILE_SIZE CONTAINER_LISTING_LIMIT, MAX_FILE_SIZE
@ -247,6 +244,11 @@ class ObjectController(Controller):
self.object_name = unquote(object_name) self.object_name = unquote(object_name)
def _listing_iter(self, lcontainer, lprefix, env): def _listing_iter(self, lcontainer, lprefix, env):
for page in self._listing_pages_iter(lcontainer, lprefix, env):
for item in page:
yield item
def _listing_pages_iter(self, lcontainer, lprefix, env):
lpartition, lnodes = self.app.container_ring.get_nodes( lpartition, lnodes = self.app.container_ring.get_nodes(
self.account_name, lcontainer) self.account_name, lcontainer)
marker = '' marker = ''
@ -278,8 +280,29 @@ class ObjectController(Controller):
if not sublisting: if not sublisting:
break break
marker = sublisting[-1]['name'] marker = sublisting[-1]['name']
for obj in sublisting: yield sublisting
yield obj
def _remaining_items(self, listing_iter):
"""
Returns an item-by-item iterator for a page-by-page iterator
of item listings.
Swallows listing-related errors; this iterator is only used
after we've already started streaming a response to the
client, and so if we start getting errors from the container
servers now, it's too late to send an error to the client, so
we just quit looking for segments.
"""
try:
for page in listing_iter:
for item in page:
yield item
except ListingIterNotFound:
pass
except ListingIterError:
pass
except ListingIterNotAuthorized:
pass
def is_good_source(self, src): def is_good_source(self, src):
""" """
@ -316,16 +339,21 @@ class ObjectController(Controller):
lcontainer = unquote(lcontainer) lcontainer = unquote(lcontainer)
lprefix = unquote(lprefix) lprefix = unquote(lprefix)
try: try:
listing = list(self._listing_iter(lcontainer, lprefix, pages_iter = iter(self._listing_pages_iter(lcontainer, lprefix,
req.environ)) req.environ))
listing_page1 = pages_iter.next()
listing = itertools.chain(listing_page1,
self._remaining_items(pages_iter))
except ListingIterNotFound: except ListingIterNotFound:
return HTTPNotFound(request=req) return HTTPNotFound(request=req)
except ListingIterNotAuthorized, err: except ListingIterNotAuthorized, err:
return err.aresp return err.aresp
except ListingIterError: except ListingIterError:
return HTTPServerError(request=req) return HTTPServerError(request=req)
except StopIteration:
listing_page1 = listing = ()
if len(listing) > CONTAINER_LISTING_LIMIT: if len(listing_page1) >= CONTAINER_LISTING_LIMIT:
resp = Response(headers=resp.headers, request=req, resp = Response(headers=resp.headers, request=req,
conditional_response=True) conditional_response=True)
if req.method == 'HEAD': if req.method == 'HEAD':
@ -344,14 +372,13 @@ class ObjectController(Controller):
return head_response return head_response
else: else:
resp.app_iter = SegmentedIterable( resp.app_iter = SegmentedIterable(
self, lcontainer, self, lcontainer, listing, resp)
self._listing_iter(lcontainer, lprefix, req.environ),
resp)
else: else:
# For objects with a reasonable number of segments, we'll serve # For objects with a reasonable number of segments, we'll serve
# them with a set content-length and computed etag. # them with a set content-length and computed etag.
if listing: if listing:
listing = list(listing)
content_length = sum(o['bytes'] for o in listing) content_length = sum(o['bytes'] for o in listing)
last_modified = max(o['last_modified'] for o in listing) last_modified = max(o['last_modified'] for o in listing)
last_modified = datetime(*map(int, re.split('[^\d]', last_modified = datetime(*map(int, re.split('[^\d]',

View File

@ -20,6 +20,7 @@ from logging.handlers import SysLogHandler
import os import os
import sys import sys
import unittest import unittest
import urlparse
import signal import signal
from ConfigParser import ConfigParser from ConfigParser import ConfigParser
from contextlib import contextmanager from contextlib import contextmanager
@ -270,6 +271,8 @@ def fake_http_connect(*code_iter, **kwargs):
code_iter = iter(code_iter) code_iter = iter(code_iter)
static_body = kwargs.get('body', None) static_body = kwargs.get('body', None)
body_iter = kwargs.get('body_iter', None) body_iter = kwargs.get('body_iter', None)
if body_iter:
body_iter = iter(body_iter)
def connect(*args, **ckwargs): def connect(*args, **ckwargs):
if 'give_content_type' in kwargs: if 'give_content_type' in kwargs:
@ -875,7 +878,7 @@ class TestObjectController(unittest.TestCase):
set_http_connect(200, 200, 200, 200, 200, 200, 200, 200, 201, 201, set_http_connect(200, 200, 200, 200, 200, 200, 200, 200, 201, 201,
201, 200, 200, 200, 201, 200, 200, 200,
give_connect=test_connect, give_connect=test_connect,
body_iter=iter(body_iter), body_iter=body_iter,
headers={'x-versions-location': 'foo'}) headers={'x-versions-location': 'foo'})
self.app.memcache.store = {} self.app.memcache.store = {}
req = Request.blank('/a/c/o', req = Request.blank('/a/c/o',
@ -885,6 +888,133 @@ class TestObjectController(unittest.TestCase):
res = controller.DELETE(req) res = controller.DELETE(req)
self.assertEquals(test_errors, []) self.assertEquals(test_errors, [])
def test_GET_manifest_no_segments(self):
response_bodies = (
'', # HEAD /a
'', # HEAD /a/c
'', # GET manifest
simplejson.dumps([])) # GET empty listing
with save_globals():
controller = proxy_server.ObjectController(
self.app, 'a', 'c', 'manifest')
set_http_connect(
200, # HEAD /a
200, # HEAD /a/c
200, # GET manifest
200, # GET empty listing
headers={"X-Object-Manifest": "segments/seg"},
body_iter=response_bodies)
req = Request.blank('/a/c/manifest')
resp = controller.GET(req)
self.assertEqual(resp.status_int, 200)
self.assertEqual(resp.body, '')
def test_GET_manifest_limited_listing(self):
listing1 = [{"hash": "454dfc73af632012ce3e6217dc464241",
"last_modified": "2012-11-08T04:05:37.866820",
"bytes": 2,
"name": "seg01",
"content_type": "application/octet-stream"},
{"hash": "474bab96c67528d42d5c0c52b35228eb",
"last_modified": "2012-11-08T04:05:37.846710",
"bytes": 2,
"name": "seg02",
"content_type": "application/octet-stream"}]
listing2 = [{"hash": "116baa5508693d1d1ca36abdd9f9478b",
"last_modified": "2012-11-08T04:05:37.849510",
"bytes": 2,
"name": "seg03",
"content_type": "application/octet-stream"},
{"hash": "7bd6aaa1ef6013353f0420459574ac9d",
"last_modified": "2012-11-08T04:05:37.855180",
"bytes": 2,
"name": "seg04",
"content_type": "application/octet-stream"
}]
listing3 = [{"hash": "6605f80e3cefaa24e9823544df4edbd6",
"last_modified": "2012-11-08T04:05:37.853710",
"bytes": 2,
"name": "seg05",
"content_type": "application/octet-stream"}]
response_bodies = (
'', # HEAD /a
'', # HEAD /a/c
'', # GET manifest
simplejson.dumps(listing1), # GET listing1
'Aa', # GET seg01
'Bb', # GET seg02
simplejson.dumps(listing2), # GET listing2
'Cc', # GET seg03
'Dd', # GET seg04
simplejson.dumps(listing3), # GET listing3
'Ee', # GET seg05
simplejson.dumps([])) # GET final empty listing
with save_globals():
try:
swift.proxy.controllers.obj.CONTAINER_LISTING_LIMIT = 2
controller = proxy_server.ObjectController(
self.app, 'a', 'c', 'manifest')
requested = []
def capture_requested_paths(ipaddr, port, device, partition,
method, path, headers=None,
query_string=None):
qs_dict = dict(urlparse.parse_qsl(query_string or ''))
requested.append([method, path, qs_dict])
set_http_connect(
200, # HEAD /a
200, # HEAD /a/c
200, # GET manifest
200, # GET listing1
200, # GET seg01
200, # GET seg02
200, # GET listing2
200, # GET seg03
200, # GET seg04
200, # GET listing3
200, # GET seg05
200, # GET final empty listing
headers={"X-Object-Manifest": "segments/seg"},
body_iter=response_bodies,
give_connect=capture_requested_paths)
req = Request.blank('/a/c/manifest')
resp = controller.GET(req)
self.assertEqual(resp.status_int, 200)
self.assertEqual(resp.body, 'AaBbCcDdEe')
self.assertEqual(
requested,
[['HEAD', '/a', {}],
['HEAD', '/a/c', {}],
['GET', '/a/c/manifest', {}],
['GET', '/a/segments',
{'format': 'json', 'prefix': 'seg'}],
['GET', '/a/segments/seg01', {}],
['GET', '/a/segments/seg02', {}],
['GET', '/a/segments',
{'format': 'json', 'prefix': 'seg', 'marker': 'seg02'}],
['GET', '/a/segments/seg03', {}],
['GET', '/a/segments/seg04', {}],
['GET', '/a/segments',
{'format': 'json', 'prefix': 'seg', 'marker': 'seg04'}],
['GET', '/a/segments/seg05', {}],
['GET', '/a/segments',
{'format': 'json', 'prefix': 'seg', 'marker': 'seg05'}]])
finally:
# other tests in this file get very unhappy if this
# isn't set back, which leads to time-wasting
# debugging of other tests.
swift.proxy.controllers.obj.CONTAINER_LISTING_LIMIT = \
_orig_container_listing_limit
def test_PUT_auto_content_type(self): def test_PUT_auto_content_type(self):
with save_globals(): with save_globals():
controller = proxy_server.ObjectController(self.app, 'account', controller = proxy_server.ObjectController(self.app, 'account',