Merge "Add support for data segments to SLO and SegmentedIterable"

This commit is contained in:
Zuul 2018-02-01 12:52:55 +00:00 committed by Gerrit Code Review
commit 82844a3211
6 changed files with 706 additions and 148 deletions

View File

@ -742,7 +742,7 @@ use = egg:swift#bulk
[filter:slo] [filter:slo]
use = egg:swift#slo use = egg:swift#slo
# max_manifest_segments = 1000 # max_manifest_segments = 1000
# max_manifest_size = 2097152 # max_manifest_size = 8388608
# #
# Rate limiting applies only to segments smaller than this size (bytes). # Rate limiting applies only to segments smaller than this size (bytes).
# rate_limit_under_size = 1048576 # rate_limit_under_size = 1048576

View File

@ -188,16 +188,20 @@ class GetContext(WSGIContext):
if isinstance(seg_name, six.text_type): if isinstance(seg_name, six.text_type):
seg_name = seg_name.encode("utf-8") seg_name = seg_name.encode("utf-8")
# (obj path, etag, size, first byte, last byte) # We deliberately omit the etag and size here;
yield ("/" + "/".join((version, account, container, # SegmentedIterable will check size and etag if
seg_name)), # specified, but we don't want it to. DLOs only care
# We deliberately omit the etag and size here; # that the objects' names match the specified prefix.
# SegmentedIterable will check size and etag if # SegmentedIterable will instead check that the data read
# specified, but we don't want it to. DLOs only care # from each segment matches the response headers.
# that the objects' names match the specified prefix. _path = "/".join(["", version, account, container, seg_name])
None, None, _first = None if first_byte <= 0 else first_byte
(None if first_byte <= 0 else first_byte), _last = None if last_byte >= seg_length - 1 else last_byte
(None if last_byte >= seg_length - 1 else last_byte)) yield {
'path': _path,
'first_byte': _first,
'last_byte': _last
}
first_byte = max(first_byte - seg_length, -1) first_byte = max(first_byte - seg_length, -1)
last_byte = max(last_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1)

View File

@ -1,4 +1,4 @@
# Copyright (c) 2013 OpenStack Foundation # Copyright (c) 2018 OpenStack Foundation
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -32,7 +32,7 @@ uploaded. The request must be a ``PUT`` with the query parameter::
?multipart-manifest=put ?multipart-manifest=put
The body of this request will be an ordered list of segment descriptions in The body of this request will be an ordered list of segment descriptions in
JSON format. The data to be supplied for each segment is: JSON format. The data to be supplied for each segment is either:
=========== ======================================================== =========== ========================================================
Key Description Key Description
@ -44,27 +44,47 @@ etag (optional) the ETag given back when the segment object
size_bytes (optional) the size of the complete segment object in size_bytes (optional) the size of the complete segment object in
bytes bytes
range (optional) the (inclusive) range within the object to range (optional) the (inclusive) range within the object to
use as a segment. If omitted, the entire object is used. use as a segment. If omitted, the entire object is used
=========== ======================================================== =========== ========================================================
Or:
=========== ========================================================
Key Description
=========== ========================================================
data base64-encoded data to be returned
=========== ========================================================
.. note::
At least one object-backed segment must be included. If you'd like
to create a manifest consisting purely of data segments, consider
uploading a normal object instead.
The format of the list will be:: The format of the list will be::
[{"path": "/cont/object", [{"path": "/cont/object",
"etag": "etagoftheobjectsegment", "etag": "etagoftheobjectsegment",
"size_bytes": 10485760, "size_bytes": 10485760,
"range": "1048576-2097151"}, "range": "1048576-2097151"},
{"data": base64.b64encode("interstitial data")},
{"path": "/cont/another-object", ...},
...] ...]
The number of object segments is limited to a configurable amount, default The number of object-backed segments is limited to ``max_manifest_segments``
1000. Each segment must be at least 1 byte. On upload, the middleware will (configurable in proxy-server.conf, default 1000). Each segment must be at
head every segment passed in to verify: least 1 byte. On upload, the middleware will head every object-backed segment
passed in to verify:
1. the segment exists (i.e. the ``HEAD`` was successful); 1. the segment exists (i.e. the ``HEAD`` was successful);
2. the segment meets minimum size requirements; 2. the segment meets minimum size requirements;
3. if the user provided a non-null ``etag``, the etag matches; 3. if the user provided a non-null ``etag``, the etag matches;
4. if the user provided a non-null ``size_bytes``, the size_bytes matches; and 4. if the user provided a non-null ``size_bytes``, the size_bytes matches; and
5. if the user provided a ``range``, it is a singular, syntactically correct 5. if the user provided a ``range``, it is a singular, syntactically correct
range that is satisfiable given the size of the object. range that is satisfiable given the size of the object referenced.
For inlined data segments, the middleware verifies each is valid, non-empty
base64-encoded binary data. Note that data segments *do not* count against
``max_manifest_segments``.
Note that the ``etag`` and ``size_bytes`` keys are optional; if omitted, the Note that the ``etag`` and ``size_bytes`` keys are optional; if omitted, the
verification is not performed. If any of the objects fail to verify (not verification is not performed. If any of the objects fail to verify (not
@ -148,13 +168,16 @@ above manifest would be::
echo -n 'etagoftheobjectsegmentone:1-2;etagoftheobjectsegmenttwo:3-4;' \ echo -n 'etagoftheobjectsegmentone:1-2;etagoftheobjectsegmenttwo:3-4;' \
| md5sum | md5sum
For the purposes of Etag computations, inlined data segments are considered to
have an etag of the md5 of the raw data (i.e., *not* base64-encoded).
------------------- -------------------
Range Specification Range Specification
------------------- -------------------
Users now have the ability to specify ranges for SLO segments. Users now have the ability to specify ranges for SLO segments.
Users can now include an optional ``range`` field in segment descriptions Users can include an optional ``range`` field in segment descriptions
to specify which bytes from the underlying object should be used for the to specify which bytes from the underlying object should be used for the
segment data. Only one range may be specified per segment. segment data. Only one range may be specified per segment.
@ -177,11 +200,28 @@ finally bytes 2095104 through 2097152 (i.e., the last 2048 bytes) of
.. note:: .. note::
The minimum sized range is 1 byte. This is the same as the minimum The minimum sized range is 1 byte. This is the same as the minimum
segment size. segment size.
-------------------------
Inline Data Specification
-------------------------
When uploading a manifest, users can include 'data' segments that should
be included along with objects. The data in these segments must be
base64-encoded binary data and will be included in the etag of the
resulting large object exactly as if that data had been uploaded and
referenced as separate objects.
.. note::
This feature is primarily aimed at reducing the need for storing
many tiny objects, and as such any supplied data must fit within
the maximum manifest size (default is 8MiB). This maximum size
can be configured via ``max_manifest_size`` in proxy-server.conf.
------------------------- -------------------------
Retrieving a Large Object Retrieving a Large Object
------------------------- -------------------------
@ -272,6 +312,7 @@ the manifest and the segments it's referring to) in the container and account
metadata which can be used for stats and billing purposes. metadata which can be used for stats and billing purposes.
""" """
import base64
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
import json import json
@ -289,7 +330,7 @@ from swift.common.swob import Request, HTTPBadRequest, HTTPServerError, \
from swift.common.utils import get_logger, config_true_value, \ from swift.common.utils import get_logger, config_true_value, \
get_valid_utf8_str, override_bytes_from_content_type, split_path, \ get_valid_utf8_str, override_bytes_from_content_type, split_path, \
register_swift_info, RateLimitedIterator, quote, close_if_possible, \ register_swift_info, RateLimitedIterator, quote, close_if_possible, \
closing_if_possible, LRUCache, StreamingPile closing_if_possible, LRUCache, StreamingPile, strict_b64decode
from swift.common.request_helpers import SegmentedIterable, \ from swift.common.request_helpers import SegmentedIterable, \
get_sys_meta_prefix, update_etag_is_at_header get_sys_meta_prefix, update_etag_is_at_header
from swift.common.constraints import check_utf8, MAX_BUFFERED_SLO_SEGMENTS from swift.common.constraints import check_utf8, MAX_BUFFERED_SLO_SEGMENTS
@ -299,15 +340,17 @@ from swift.common.middleware.bulk import get_response_body, \
ACCEPTABLE_FORMATS, Bulk ACCEPTABLE_FORMATS, Bulk
DEFAULT_RATE_LIMIT_UNDER_SIZE = 1024 * 1024 # 1 MiB DEFAULT_RATE_LIMIT_UNDER_SIZE = 1024 ** 2 # 1 MiB
DEFAULT_MAX_MANIFEST_SEGMENTS = 1000 DEFAULT_MAX_MANIFEST_SEGMENTS = 1000
DEFAULT_MAX_MANIFEST_SIZE = 1024 * 1024 * 2 # 2 MiB DEFAULT_MAX_MANIFEST_SIZE = 8 * (1024 ** 2) # 8 MiB
DEFAULT_YIELD_FREQUENCY = 10 DEFAULT_YIELD_FREQUENCY = 10
REQUIRED_SLO_KEYS = set(['path']) SLO_KEYS = {
OPTIONAL_SLO_KEYS = set(['range', 'etag', 'size_bytes']) # required: optional
ALLOWED_SLO_KEYS = REQUIRED_SLO_KEYS | OPTIONAL_SLO_KEYS 'data': set(),
'path': {'range', 'etag', 'size_bytes'},
}
SYSMETA_SLO_ETAG = get_sys_meta_prefix('object') + 'slo-etag' SYSMETA_SLO_ETAG = get_sys_meta_prefix('object') + 'slo-etag'
SYSMETA_SLO_SIZE = get_sys_meta_prefix('object') + 'slo-size' SYSMETA_SLO_SIZE = get_sys_meta_prefix('object') + 'slo-size'
@ -318,8 +361,8 @@ def parse_and_validate_input(req_body, req_path):
Given a request body, parses it and returns a list of dictionaries. Given a request body, parses it and returns a list of dictionaries.
The output structure is nearly the same as the input structure, but it The output structure is nearly the same as the input structure, but it
is not an exact copy. Given a valid input dictionary ``d_in``, its is not an exact copy. Given a valid object-backed input dictionary
corresponding output dictionary ``d_out`` will be as follows: ``d_in``, its corresponding output dictionary ``d_out`` will be as follows:
* d_out['etag'] == d_in['etag'] * d_out['etag'] == d_in['etag']
@ -333,8 +376,10 @@ def parse_and_validate_input(req_body, req_path):
corresponding swob.Range object. If d_in does not have a key corresponding swob.Range object. If d_in does not have a key
'range', neither will d_out. 'range', neither will d_out.
:raises HTTPException: on parse errors or semantic errors (e.g. bogus Inlined data dictionaries will have any extraneous padding stripped.
JSON structure, syntactically invalid ranges)
:raises: HTTPException on parse errors or semantic errors (e.g. bogus
JSON structure, syntactically invalid ranges)
:returns: a list of dictionaries on success :returns: a list of dictionaries on success
""" """
@ -356,15 +401,19 @@ def parse_and_validate_input(req_body, req_path):
errors.append("Index %d: not a JSON object" % seg_index) errors.append("Index %d: not a JSON object" % seg_index)
continue continue
missing_keys = [k for k in REQUIRED_SLO_KEYS if k not in seg_dict] for required in SLO_KEYS:
if missing_keys: if required in seg_dict:
segment_type = required
break
else:
errors.append( errors.append(
"Index %d: missing keys %s" "Index %d: expected keys to include one of %s"
% (seg_index, % (seg_index,
", ".join('"%s"' % (mk,) for mk in sorted(missing_keys)))) " or ".join(repr(required) for required in SLO_KEYS)))
continue continue
extraneous_keys = [k for k in seg_dict if k not in ALLOWED_SLO_KEYS] allowed_keys = SLO_KEYS[segment_type].union([segment_type])
extraneous_keys = [k for k in seg_dict if k not in allowed_keys]
if extraneous_keys: if extraneous_keys:
errors.append( errors.append(
"Index %d: extraneous keys %s" "Index %d: extraneous keys %s"
@ -373,61 +422,84 @@ def parse_and_validate_input(req_body, req_path):
for ek in sorted(extraneous_keys)))) for ek in sorted(extraneous_keys))))
continue continue
if not isinstance(seg_dict['path'], six.string_types): if segment_type == 'path':
errors.append("Index %d: \"path\" must be a string" % seg_index) if not isinstance(seg_dict['path'], six.string_types):
continue errors.append("Index %d: \"path\" must be a string" %
if not (seg_dict.get('etag') is None or seg_index)
isinstance(seg_dict['etag'], six.string_types)):
errors.append('Index %d: "etag" must be a string or null '
'(if provided)' % seg_index)
continue
if '/' not in seg_dict['path'].strip('/'):
errors.append(
"Index %d: path does not refer to an object. Path must be of "
"the form /container/object." % seg_index)
continue
seg_size = seg_dict.get('size_bytes')
if seg_size is not None:
try:
seg_size = int(seg_size)
seg_dict['size_bytes'] = seg_size
except (TypeError, ValueError):
errors.append("Index %d: invalid size_bytes" % seg_index)
continue continue
if seg_size < 1 and seg_index != (len(parsed_data) - 1): if not (seg_dict.get('etag') is None or
isinstance(seg_dict['etag'], six.string_types)):
errors.append('Index %d: "etag" must be a string or null '
'(if provided)' % seg_index)
continue
if '/' not in seg_dict['path'].strip('/'):
errors.append(
"Index %d: path does not refer to an object. Path must "
"be of the form /container/object." % seg_index)
continue
seg_size = seg_dict.get('size_bytes')
if seg_size is not None:
try:
seg_size = int(seg_size)
seg_dict['size_bytes'] = seg_size
except (TypeError, ValueError):
errors.append("Index %d: invalid size_bytes" % seg_index)
continue
if seg_size < 1 and seg_index != (len(parsed_data) - 1):
errors.append("Index %d: too small; each segment must be "
"at least 1 byte."
% (seg_index,))
continue
obj_path = '/'.join(['', vrs, account,
seg_dict['path'].lstrip('/')])
if req_path == quote(obj_path):
errors.append(
"Index %d: manifest must not include itself as a segment"
% seg_index)
continue
if seg_dict.get('range'):
try:
seg_dict['range'] = Range('bytes=%s' % seg_dict['range'])
except ValueError:
errors.append("Index %d: invalid range" % seg_index)
continue
if len(seg_dict['range'].ranges) > 1:
errors.append("Index %d: multiple ranges "
"(only one allowed)" % seg_index)
continue
# If the user *told* us the object's size, we can check range
# satisfiability right now. If they lied about the size, we'll
# fail that validation later.
if (seg_size is not None and 1 != len(
seg_dict['range'].ranges_for_length(seg_size))):
errors.append("Index %d: unsatisfiable range" % seg_index)
continue
elif segment_type == 'data':
# Validate that the supplied data is non-empty and base64-encoded
try:
data = strict_b64decode(seg_dict['data'])
except ValueError:
errors.append(
"Index %d: data must be valid base64" % seg_index)
continue
if len(data) < 1:
errors.append("Index %d: too small; each segment must be " errors.append("Index %d: too small; each segment must be "
"at least 1 byte." "at least 1 byte."
% (seg_index,)) % (seg_index,))
continue continue
# re-encode to normalize padding
seg_dict['data'] = base64.b64encode(data)
obj_path = '/'.join(['', vrs, account, seg_dict['path'].lstrip('/')]) if parsed_data and all('data' in d for d in parsed_data):
if req_path == quote(obj_path): errors.append("Inline data segments require at least one "
errors.append( "object-backed segment.")
"Index %d: manifest must not include itself as a segment"
% seg_index)
continue
if seg_dict.get('range'):
try:
seg_dict['range'] = Range('bytes=%s' % seg_dict['range'])
except ValueError:
errors.append("Index %d: invalid range" % seg_index)
continue
if len(seg_dict['range'].ranges) > 1:
errors.append("Index %d: multiple ranges (only one allowed)"
% seg_index)
continue
# If the user *told* us the object's size, we can check range
# satisfiability right now. If they lied about the size, we'll
# fail that validation later.
if (seg_size is not None and
len(seg_dict['range'].ranges_for_length(seg_size)) != 1):
errors.append("Index %d: unsatisfiable range" % seg_index)
continue
if errors: if errors:
error_message = "".join(e + "\n" for e in errors) error_message = "".join(e + "\n" for e in errors)
@ -472,11 +544,20 @@ class SloGetContext(WSGIContext):
'while fetching %s, JSON-decoding of submanifest %s ' 'while fetching %s, JSON-decoding of submanifest %s '
'failed with %s' % (req.path, sub_req.path, err)) 'failed with %s' % (req.path, sub_req.path, err))
def _segment_path(self, version, account, seg_dict):
return "/{ver}/{acc}/{conobj}".format(
ver=version, acc=account,
conobj=seg_dict['name'].lstrip('/')
)
def _segment_length(self, seg_dict): def _segment_length(self, seg_dict):
""" """
Returns the number of bytes that will be fetched from the specified Returns the number of bytes that will be fetched from the specified
segment on a plain GET request for this SLO manifest. segment on a plain GET request for this SLO manifest.
""" """
if 'raw_data' in seg_dict:
return len(seg_dict['raw_data'])
seg_range = seg_dict.get('range') seg_range = seg_dict.get('range')
if seg_range is not None: if seg_range is not None:
# The range is of the form N-M, where N and M are both positive # The range is of the form N-M, where N and M are both positive
@ -484,7 +565,7 @@ class SloGetContext(WSGIContext):
# only thing that creates the SLO manifests stored in the # only thing that creates the SLO manifests stored in the
# cluster. # cluster.
range_start, range_end = [int(x) for x in seg_range.split('-')] range_start, range_end = [int(x) for x in seg_range.split('-')]
return range_end - range_start + 1 return (range_end - range_start) + 1
else: else:
return int(seg_dict['bytes']) return int(seg_dict['bytes'])
@ -533,6 +614,9 @@ class SloGetContext(WSGIContext):
recursion_depth=1): recursion_depth=1):
last_sub_path = None last_sub_path = None
for seg_dict in segments: for seg_dict in segments:
if 'data' in seg_dict:
seg_dict['raw_data'] = strict_b64decode(seg_dict.pop('data'))
seg_length = self._segment_length(seg_dict) seg_length = self._segment_length(seg_dict)
if first_byte >= seg_length: if first_byte >= seg_length:
# don't need any bytes from this segment # don't need any bytes from this segment
@ -544,16 +628,25 @@ class SloGetContext(WSGIContext):
# no bytes are needed from this or any future segment # no bytes are needed from this or any future segment
return return
if 'raw_data' in seg_dict:
yield dict(seg_dict,
first_byte=max(0, first_byte),
last_byte=min(seg_length - 1, last_byte))
first_byte -= seg_length
last_byte -= seg_length
continue
seg_range = seg_dict.get('range') seg_range = seg_dict.get('range')
if seg_range is None: if seg_range is None:
range_start, range_end = 0, seg_length - 1 range_start, range_end = 0, seg_length - 1
else: else:
# We already validated and supplied concrete values # This simple parsing of the range is valid because we already
# for the range on upload # validated and supplied concrete values for the range
# during SLO manifest creation
range_start, range_end = map(int, seg_range.split('-')) range_start, range_end = map(int, seg_range.split('-'))
if config_true_value(seg_dict.get('sub_slo')): if config_true_value(seg_dict.get('sub_slo')):
# do this check here so that we can avoid fetching this last # Do this check here so that we can avoid fetching this last
# manifest before raising the exception # manifest before raising the exception
if recursion_depth >= self.max_slo_recursion_depth: if recursion_depth >= self.max_slo_recursion_depth:
raise ListingIterError( raise ListingIterError(
@ -568,7 +661,7 @@ class SloGetContext(WSGIContext):
last_sub_path = sub_path last_sub_path = sub_path
# Use the existing machinery to slice into the sub-SLO. # Use the existing machinery to slice into the sub-SLO.
for sub_seg_dict, sb, eb in self._byterange_listing_iterator( for sub_seg_dict in self._byterange_listing_iterator(
req, version, account, sub_segments, req, version, account, sub_segments,
# This adjusts first_byte and last_byte to be # This adjusts first_byte and last_byte to be
# relative to the sub-SLO. # relative to the sub-SLO.
@ -577,13 +670,13 @@ class SloGetContext(WSGIContext):
cached_fetch_sub_slo_segments, cached_fetch_sub_slo_segments,
recursion_depth=recursion_depth + 1): recursion_depth=recursion_depth + 1):
yield sub_seg_dict, sb, eb yield sub_seg_dict
else: else:
if isinstance(seg_dict['name'], six.text_type): if isinstance(seg_dict['name'], six.text_type):
seg_dict['name'] = seg_dict['name'].encode("utf-8") seg_dict['name'] = seg_dict['name'].encode("utf-8")
yield (seg_dict, yield dict(seg_dict,
max(0, first_byte) + range_start, first_byte=max(0, first_byte) + range_start,
min(range_end, range_start + last_byte)) last_byte=min(range_end, range_start + last_byte))
first_byte -= seg_length first_byte -= seg_length
last_byte -= seg_length last_byte -= seg_length
@ -741,6 +834,8 @@ class SloGetContext(WSGIContext):
segments = self._get_manifest_read(resp_iter) segments = self._get_manifest_read(resp_iter)
for seg_dict in segments: for seg_dict in segments:
if 'data' in seg_dict:
continue
seg_dict.pop('content_type', None) seg_dict.pop('content_type', None)
seg_dict.pop('last_modified', None) seg_dict.pop('last_modified', None)
seg_dict.pop('sub_slo', None) seg_dict.pop('sub_slo', None)
@ -774,7 +869,6 @@ class SloGetContext(WSGIContext):
def get_or_head_response(self, req, resp_headers, resp_iter): def get_or_head_response(self, req, resp_headers, resp_iter):
segments = self._get_manifest_read(resp_iter) segments = self._get_manifest_read(resp_iter)
slo_etag = None slo_etag = None
content_length = None content_length = None
response_headers = [] response_headers = []
@ -789,21 +883,38 @@ class SloGetContext(WSGIContext):
elif lheader not in ('etag', 'content-length'): elif lheader not in ('etag', 'content-length'):
response_headers.append((header, value)) response_headers.append((header, value))
if slo_etag is None or content_length is None: # Prep to calculate content_length & etag if necessary
etag = md5() if slo_etag is None:
content_length = 0 calculated_etag = md5()
for seg_dict in segments: if content_length is None:
if seg_dict.get('range'): calculated_content_length = 0
etag.update('%s:%s;' % (seg_dict['hash'],
seg_dict['range']))
else:
etag.update(seg_dict['hash'])
for seg_dict in segments:
# Decode any inlined data; it's important that we do this *before*
# calculating the segment length and etag
if 'data' in seg_dict:
seg_dict['raw_data'] = base64.b64decode(seg_dict.pop('data'))
if slo_etag is None:
if 'raw_data' in seg_dict:
calculated_etag.update(
md5(seg_dict['raw_data']).hexdigest())
elif seg_dict.get('range'):
calculated_etag.update(
'%s:%s;' % (seg_dict['hash'], seg_dict['range']))
else:
calculated_etag.update(seg_dict['hash'])
if content_length is None:
if config_true_value(seg_dict.get('sub_slo')): if config_true_value(seg_dict.get('sub_slo')):
override_bytes_from_content_type( override_bytes_from_content_type(
seg_dict, logger=self.slo.logger) seg_dict, logger=self.slo.logger)
content_length += self._segment_length(seg_dict) calculated_content_length += self._segment_length(seg_dict)
slo_etag = etag.hexdigest()
if slo_etag is None:
slo_etag = calculated_etag.hexdigest()
if content_length is None:
content_length = calculated_content_length
response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Content-Length', str(content_length)))
response_headers.append(('Etag', '"%s"' % slo_etag)) response_headers.append(('Etag', '"%s"' % slo_etag))
@ -833,9 +944,13 @@ class SloGetContext(WSGIContext):
plain_listing_iter = self._segment_listing_iterator( plain_listing_iter = self._segment_listing_iterator(
req, ver, account, segments, byteranges) req, ver, account, segments, byteranges)
def is_small_segment((seg_dict, start_byte, end_byte)): def ratelimit_predicate(seg_dict):
start = 0 if start_byte is None else start_byte if 'raw_data' in seg_dict:
end = int(seg_dict['bytes']) - 1 if end_byte is None else end_byte return False # it's already in memory anyway
start = seg_dict.get('start_byte') or 0
end = seg_dict.get('end_byte')
if end is None:
end = int(seg_dict['bytes']) - 1
is_small = (end - start + 1) < self.slo.rate_limit_under_size is_small = (end - start + 1) < self.slo.rate_limit_under_size
return is_small return is_small
@ -843,17 +958,14 @@ class SloGetContext(WSGIContext):
plain_listing_iter, plain_listing_iter,
self.slo.rate_limit_segments_per_sec, self.slo.rate_limit_segments_per_sec,
limit_after=self.slo.rate_limit_after_segment, limit_after=self.slo.rate_limit_after_segment,
ratelimit_if=is_small_segment) ratelimit_if=ratelimit_predicate)
# self._segment_listing_iterator gives us 3-tuples of (segment dict, # data segments are already in the correct format, but object-backed
# start byte, end byte), but SegmentedIterable wants (obj path, etag, # segments need a path key added
# size, start byte, end byte), so we clean that up here
segment_listing_iter = ( segment_listing_iter = (
("/{ver}/{acc}/{conobj}".format( seg_dict if 'raw_data' in seg_dict else
ver=ver, acc=account, conobj=seg_dict['name'].lstrip('/')), dict(seg_dict, path=self._segment_path(ver, account, seg_dict))
seg_dict['hash'], int(seg_dict['bytes']), for seg_dict in ratelimited_listing_iter)
start_byte, end_byte)
for seg_dict, start_byte, end_byte in ratelimited_listing_iter)
segmented_iter = SegmentedIterable( segmented_iter = SegmentedIterable(
req, self.slo.app, segment_listing_iter, req, self.slo.app, segment_listing_iter,
@ -964,9 +1076,10 @@ class StaticLargeObject(object):
req.path) req.path)
problem_segments = [] problem_segments = []
if len(parsed_data) > self.max_manifest_segments: object_segments = [seg for seg in parsed_data if 'path' in seg]
if len(object_segments) > self.max_manifest_segments:
raise HTTPRequestEntityTooLarge( raise HTTPRequestEntityTooLarge(
'Number of segments must be <= %d' % 'Number of object-backed segments must be <= %d' %
self.max_manifest_segments) self.max_manifest_segments)
try: try:
out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS) out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
@ -974,10 +1087,15 @@ class StaticLargeObject(object):
out_content_type = 'text/plain' # Ignore invalid header out_content_type = 'text/plain' # Ignore invalid header
if not out_content_type: if not out_content_type:
out_content_type = 'text/plain' out_content_type = 'text/plain'
data_for_storage = [] data_for_storage = [None] * len(parsed_data)
total_size = 0
path2indices = defaultdict(list) path2indices = defaultdict(list)
for index, seg_dict in enumerate(parsed_data): for index, seg_dict in enumerate(parsed_data):
path2indices[seg_dict['path']].append(index) if 'data' in seg_dict:
data_for_storage[index] = seg_dict
total_size += len(base64.b64decode(seg_dict['data']))
else:
path2indices[seg_dict['path']].append(index)
def do_head(obj_name): def do_head(obj_name):
obj_path = '/'.join(['', vrs, account, obj_path = '/'.join(['', vrs, account,
@ -1023,30 +1141,45 @@ class StaticLargeObject(object):
problem_segments.append( problem_segments.append(
[quote(obj_name), [quote(obj_name),
'Too small; each segment must be at least 1 byte.']) 'Too small; each segment must be at least 1 byte.'])
if seg_dict.get('size_bytes') is not None and \
seg_dict['size_bytes'] != head_seg_resp.content_length: _size_bytes = seg_dict.get('size_bytes')
size_mismatch = (
_size_bytes is not None and
_size_bytes != head_seg_resp.content_length
)
if size_mismatch:
problem_segments.append([quote(obj_name), 'Size Mismatch']) problem_segments.append([quote(obj_name), 'Size Mismatch'])
if seg_dict.get('etag') is not None and \
seg_dict['etag'] != head_seg_resp.etag: _etag = seg_dict.get('etag')
etag_mismatch = (
_etag is not None and
_etag != head_seg_resp.etag
)
if etag_mismatch:
problem_segments.append([quote(obj_name), 'Etag Mismatch']) problem_segments.append([quote(obj_name), 'Etag Mismatch'])
if head_seg_resp.last_modified: if head_seg_resp.last_modified:
last_modified = head_seg_resp.last_modified last_modified = head_seg_resp.last_modified
else: else:
# shouldn't happen # shouldn't happen
last_modified = datetime.now() last_modified = datetime.now()
last_modified_formatted = \ last_modified_formatted = last_modified.strftime(
last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f') '%Y-%m-%dT%H:%M:%S.%f'
seg_data = {'name': '/' + seg_dict['path'].lstrip('/'), )
'bytes': head_seg_resp.content_length, seg_data = {
'hash': head_seg_resp.etag, 'name': '/' + seg_dict['path'].lstrip('/'),
'content_type': head_seg_resp.content_type, 'bytes': head_seg_resp.content_length,
'last_modified': last_modified_formatted} 'hash': head_seg_resp.etag,
'content_type': head_seg_resp.content_type,
'last_modified': last_modified_formatted
}
if seg_dict.get('range'): if seg_dict.get('range'):
seg_data['range'] = seg_dict['range'] seg_data['range'] = seg_dict['range']
if config_true_value( if config_true_value(
head_seg_resp.headers.get('X-Static-Large-Object')): head_seg_resp.headers.get('X-Static-Large-Object')):
seg_data['sub_slo'] = True seg_data['sub_slo'] = True
return segment_length, seg_data return segment_length, seg_data
heartbeat = config_true_value(req.params.get('heartbeat')) heartbeat = config_true_value(req.params.get('heartbeat'))
@ -1059,10 +1192,8 @@ class StaticLargeObject(object):
('Content-Type', out_content_type), ('Content-Type', out_content_type),
]) ])
separator = '\r\n\r\n' separator = '\r\n\r\n'
data_for_storage = [None] * len(parsed_data)
def resp_iter(): def resp_iter(total_size=total_size):
total_size = 0
# wsgi won't propagate start_response calls until some data has # wsgi won't propagate start_response calls until some data has
# been yielded so make sure first heartbeat is sent immediately # been yielded so make sure first heartbeat is sent immediately
if heartbeat: if heartbeat:
@ -1102,7 +1233,10 @@ class StaticLargeObject(object):
slo_etag = md5() slo_etag = md5()
for seg_data in data_for_storage: for seg_data in data_for_storage:
if seg_data.get('range'): if 'data' in seg_data:
raw_data = base64.b64decode(seg_data['data'])
slo_etag.update(md5(raw_data).hexdigest())
elif seg_data.get('range'):
slo_etag.update('%s:%s;' % (seg_data['hash'], slo_etag.update('%s:%s;' % (seg_data['hash'],
seg_data['range'])) seg_data['range']))
else: else:
@ -1183,6 +1317,8 @@ class StaticLargeObject(object):
raise HTTPBadRequest( raise HTTPBadRequest(
'Too many buffered slo segments to delete.') 'Too many buffered slo segments to delete.')
seg_data = segments.pop(0) seg_data = segments.pop(0)
if 'data' in seg_data:
continue
if seg_data.get('sub_slo'): if seg_data.get('sub_slo'):
try: try:
segments.extend( segments.extend(

View File

@ -354,12 +354,25 @@ class SegmentedIterable(object):
def _coalesce_requests(self): def _coalesce_requests(self):
start_time = time.time() start_time = time.time()
pending_req = None pending_req = pending_etag = pending_size = None
pending_etag = None
pending_size = None
try: try:
for seg_path, seg_etag, seg_size, first_byte, last_byte \ for seg_dict in self.listing_iter:
in self.listing_iter: if 'raw_data' in seg_dict:
if pending_req:
yield pending_req, pending_etag, pending_size
to_yield = seg_dict['raw_data'][
seg_dict['first_byte']:seg_dict['last_byte'] + 1]
yield to_yield, None, len(seg_dict['raw_data'])
pending_req = pending_etag = pending_size = None
continue
seg_path, seg_etag, seg_size, first_byte, last_byte = (
seg_dict['path'], seg_dict.get('hash'),
seg_dict.get('bytes'),
seg_dict['first_byte'], seg_dict['last_byte'])
if seg_size is not None:
seg_size = int(seg_size)
first_byte = first_byte or 0 first_byte = first_byte or 0
go_to_end = last_byte is None or ( go_to_end = last_byte is None or (
seg_size is not None and last_byte == seg_size - 1) seg_size is not None and last_byte == seg_size - 1)
@ -441,7 +454,18 @@ class SegmentedIterable(object):
bytes_left = self.response_body_length bytes_left = self.response_body_length
try: try:
for seg_req, seg_etag, seg_size in self._coalesce_requests(): for data_or_req, seg_etag, seg_size in self._coalesce_requests():
if isinstance(data_or_req, bytes):
chunk = data_or_req # ugly, awful overloading
if bytes_left is None:
yield chunk
elif bytes_left >= len(chunk):
yield chunk
bytes_left -= len(chunk)
else:
yield chunk[:bytes_left]
continue
seg_req = data_or_req
seg_resp = seg_req.get_response(self.app) seg_resp = seg_req.get_response(self.app)
if not is_success(seg_resp.status_int): if not is_success(seg_resp.status_int):
close_if_possible(seg_resp.app_iter) close_if_possible(seg_resp.app_iter)

View File

@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import base64
import email.parser import email.parser
import hashlib import hashlib
import itertools import itertools
@ -205,6 +206,28 @@ class TestSloEnv(BaseEnv):
'size_bytes': None, 'range': '-1048578'}, 'size_bytes': None, 'range': '-1048578'},
]), parms={'multipart-manifest': 'put'}) ]), parms={'multipart-manifest': 'put'})
file_item = cls.container.file("mixed-object-data-manifest")
file_item.write(
json.dumps([
{'data': base64.b64encode('APRE' * 8)},
{'path': seg_info['seg_a']['path']},
{'data': base64.b64encode('APOS' * 16)},
{'path': seg_info['seg_b']['path']},
{'data': base64.b64encode('BPOS' * 32)},
{'data': base64.b64encode('CPRE' * 64)},
{'path': seg_info['seg_c']['path']},
{'data': base64.b64encode('CPOS' * 8)},
]), parms={'multipart-manifest': 'put'}
)
file_item = cls.container.file("nested-data-manifest")
file_item.write(
json.dumps([
{'path': '%s/%s' % (cls.container.name,
"mixed-object-data-manifest")}
]), parms={'multipart-manifest': 'put'}
)
class TestSlo(Base): class TestSlo(Base):
env = TestSloEnv env = TestSloEnv
@ -681,6 +704,25 @@ class TestSlo(Base):
self.assertEqual('application/octet-stream', actual['content_type']) self.assertEqual('application/octet-stream', actual['content_type'])
self.assertEqual(copied.etag, actual['hash']) self.assertEqual(copied.etag, actual['hash'])
# Test copy manifest including data segments
source = self.env.container.file("mixed-object-data-manifest")
source_contents = source.read(parms={'multipart-manifest': 'get'})
source_json = json.loads(source_contents)
source.copy(
self.env.container.name,
"copied-mixed-object-data-manifest",
parms={'multipart-manifest': 'get'})
copied = self.env.container.file("copied-mixed-object-data-manifest")
copied_contents = copied.read(parms={'multipart-manifest': 'get'})
try:
copied_json = json.loads(copied_contents)
except ValueError:
self.fail("COPY didn't copy the manifest (invalid json on GET)")
self.assertEqual(source_contents, copied_contents)
self.assertEqual(copied_json[0],
{'data': base64.b64encode('APRE' * 8)})
def test_slo_copy_the_manifest_updating_metadata(self): def test_slo_copy_the_manifest_updating_metadata(self):
source = self.env.container.file("manifest-abcde") source = self.env.container.file("manifest-abcde")
source.content_type = 'application/octet-stream' source.content_type = 'application/octet-stream'
@ -1115,6 +1157,56 @@ class TestSlo(Base):
self.assertEqual('d', contents[-2]) self.assertEqual('d', contents[-2])
self.assertEqual('e', contents[-1]) self.assertEqual('e', contents[-1])
def test_slo_data_segments(self):
# len('APRE' * 8) == 32
# len('APOS' * 16) == 64
# len('BPOS' * 32) == 128
# len('CPRE' * 64) == 256
# len(a_pre + seg_a + post_a) == 32 + 1024 ** 2 + 64
# len(seg_b + post_b) == 1024 ** 2 + 128
# len(c_pre + seg_c) == 256 + 1024 ** 2
# len(total) == 3146208
for file_name in ("mixed-object-data-manifest",
"nested-data-manifest"):
file_item = self.env.container.file(file_name)
file_contents = file_item.read(size=3 * 1024 ** 2 + 456,
offset=28)
grouped_file_contents = [
(char, sum(1 for _char in grp))
for char, grp in itertools.groupby(file_contents)]
self.assertEqual([
('A', 1),
('P', 1),
('R', 1),
('E', 1),
('a', 1024 * 1024),
] + [
('A', 1),
('P', 1),
('O', 1),
('S', 1),
] * 16 + [
('b', 1024 * 1024),
] + [
('B', 1),
('P', 1),
('O', 1),
('S', 1),
] * 32 + [
('C', 1),
('P', 1),
('R', 1),
('E', 1),
] * 64 + [
('c', 1024 * 1024),
] + [
('C', 1),
('P', 1),
('O', 1),
('S', 1),
], grouped_file_contents)
class TestSloUTF8(Base2, TestSlo): class TestSloUTF8(Base2, TestSlo):
pass pass

View File

@ -16,12 +16,15 @@
from six.moves import range from six.moves import range
import base64
import hashlib import hashlib
import json import json
import time import time
import unittest import unittest
from mock import patch from mock import patch
from StringIO import StringIO from StringIO import StringIO
from swift.common import swob, utils from swift.common import swob, utils
from swift.common.header_key_dict import HeaderKeyDict from swift.common.header_key_dict import HeaderKeyDict
from swift.common.middleware import slo from swift.common.middleware import slo
@ -707,6 +710,29 @@ class TestSloPutManifest(SloTestCase):
status, headers, body = self.call_slo(req) status, headers, body = self.call_slo(req)
self.assertEqual(status, '201 Created') self.assertEqual(status, '201 Created')
def test_handle_multipart_put_invalid_data(self):
def do_test(bad_data):
test_json_data = json.dumps([{'path': '/cont/object',
'etag': 'etagoftheobjectsegment',
'size_bytes': 100},
{'data': bad_data}])
req = Request.blank('/v1/a/c/o', body=test_json_data)
with self.assertRaises(HTTPException) as catcher:
self.slo.handle_multipart_put(req, fake_start_response)
self.assertEqual(catcher.exception.status_int, 400)
do_test('invalid') # insufficient padding
do_test(12345)
do_test(0)
do_test(True)
do_test(False)
do_test(None)
do_test({})
do_test([])
# Empties are no good, either
do_test('')
do_test('====')
def test_handle_multipart_put_success_unicode(self): def test_handle_multipart_put_success_unicode(self):
test_json_data = json.dumps([{'path': u'/cont/object\u2661', test_json_data = json.dumps([{'path': u'/cont/object\u2661',
'etag': 'etagoftheobjectsegment', 'etag': 'etagoftheobjectsegment',
@ -2367,8 +2393,10 @@ class TestSloGetManifest(SloTestCase):
'bytes=0-3,8-11']) 'bytes=0-3,8-11'])
# we set swift.source for everything but the first request # we set swift.source for everything but the first request
self.assertIsNone(self.app.swift_sources[0]) self.assertIsNone(self.app.swift_sources[0])
self.assertEqual(self.app.swift_sources[1:], self.assertEqual(
['SLO'] * (len(self.app.swift_sources) - 1)) self.app.swift_sources[1:],
['SLO'] * (len(self.app.swift_sources) - 1)
)
self.assertEqual(md5hex(''.join([ self.assertEqual(md5hex(''.join([
md5hex('a' * 5), ':0-3;', md5hex('a' * 5), ':0-3;',
md5hex('a' * 5), ':1-4;', md5hex('a' * 5), ':1-4;',
@ -2654,18 +2682,21 @@ class TestSloGetManifest(SloTestCase):
'Etag': 'man%d' % i}, 'Etag': 'man%d' % i},
manifest_json) manifest_json)
submanifest_bytes = 6
for i in range(19, 0, -1): for i in range(19, 0, -1):
manifest_data = [ manifest_data = [
{'name': '/gettest/obj%d' % i, {'name': '/gettest/obj%d' % i,
'hash': md5hex('body%02d' % i), 'hash': md5hex('body%02d' % i),
'bytes': '6', 'bytes': '6',
'content_type': 'text/plain'}, 'content_type': 'text/plain'},
{'data': base64.b64encode('-' * 3)},
{'name': '/gettest/man%d' % (i + 1), {'name': '/gettest/man%d' % (i + 1),
'hash': 'man%d' % (i + 1), 'hash': 'man%d' % (i + 1),
'sub_slo': True, 'sub_slo': True,
'bytes': len(manifest_json), 'bytes': submanifest_bytes,
'content_type': 'application/json'}] 'content_type': 'application/json'}]
submanifest_bytes += 9
manifest_json = json.dumps(manifest_data) manifest_json = json.dumps(manifest_data)
self.app.register( self.app.register(
'GET', '/v1/AUTH_test/gettest/man%d' % i, 'GET', '/v1/AUTH_test/gettest/man%d' % i,
@ -2683,8 +2714,10 @@ class TestSloGetManifest(SloTestCase):
# we don't know at header-sending time that things are going to go # we don't know at header-sending time that things are going to go
# wrong, so we end up with a 200 and a truncated body # wrong, so we end up with a 200 and a truncated body
self.assertEqual(status, '200 OK') self.assertEqual(status, '200 OK')
self.assertEqual(body, ('body01body02body03body04body05' + self.assertEqual(headers['Content-Length'], str(9 * 19 + 6))
'body06body07body08body09body10')) self.assertEqual(body, (
'body01---body02---body03---body04---body05---' +
'body06---body07---body08---body09---body10---'))
# but the error shows up in logs # but the error shows up in logs
self.assertEqual(self.slo.logger.get_lines_for_level('error'), [ self.assertEqual(self.slo.logger.get_lines_for_level('error'), [
"While processing manifest '/v1/AUTH_test/gettest/man1', " "While processing manifest '/v1/AUTH_test/gettest/man1', "
@ -3057,6 +3090,275 @@ class TestSloGetManifest(SloTestCase):
'gettest/not_exists_obj' 'gettest/not_exists_obj'
]) ])
def test_leading_data_segment(self):
slo_etag = md5hex(
md5hex('preamble') +
md5hex('a' * 5)
)
preamble = base64.b64encode('preamble')
self.app.register(
'GET', '/v1/AUTH_test/gettest/manifest-single-preamble',
swob.HTTPOk,
{
'Content-Type': 'application/json',
'X-Static-Large-Object': 'true'
},
json.dumps([{
'data': preamble
}, {
'name': '/gettest/a_5',
'hash': md5hex('a' * 5),
'content_type': 'text/plain',
'bytes': '5',
}])
)
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-preamble',
environ={'REQUEST_METHOD': 'GET'})
status, headers, body = self.call_slo(req)
self.assertEqual('200 OK', status)
self.assertEqual(body, 'preambleaaaaa')
self.assertIn(('Etag', '"%s"' % slo_etag), headers)
self.assertIn(('Content-Length', '13'), headers)
def test_trailing_data_segment(self):
slo_etag = md5hex(
md5hex('a' * 5) +
md5hex('postamble')
)
postamble = base64.b64encode('postamble')
self.app.register(
'GET', '/v1/AUTH_test/gettest/manifest-single-postamble',
swob.HTTPOk,
{
'Content-Type': 'application/json',
'X-Static-Large-Object': 'true'
},
json.dumps([{
'name': '/gettest/a_5',
'hash': md5hex('a' * 5),
'content_type': 'text/plain',
'bytes': '5',
}, {
'data': postamble
}])
)
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-postamble',
environ={'REQUEST_METHOD': 'GET'})
status, headers, body = self.call_slo(req)
self.assertEqual('200 OK', status)
self.assertEqual(body, 'aaaaapostamble')
self.assertIn(('Etag', '"%s"' % slo_etag), headers)
self.assertIn(('Content-Length', '14'), headers)
def test_data_segment_sandwich(self):
slo_etag = md5hex(
md5hex('preamble') +
md5hex('a' * 5) +
md5hex('postamble')
)
preamble = base64.b64encode('preamble')
postamble = base64.b64encode('postamble')
self.app.register(
'GET', '/v1/AUTH_test/gettest/manifest-single-prepostamble',
swob.HTTPOk,
{
'Content-Type': 'application/json',
'X-Static-Large-Object': 'true'
},
json.dumps([{
'data': preamble,
}, {
'name': '/gettest/a_5',
'hash': md5hex('a' * 5),
'content_type': 'text/plain',
'bytes': '5',
}, {
'data': postamble
}])
)
# Test the whole SLO
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-prepostamble',
environ={'REQUEST_METHOD': 'GET'})
status, headers, body = self.call_slo(req)
self.assertEqual('200 OK', status)
self.assertEqual(body, 'preambleaaaaapostamble')
self.assertIn(('Etag', '"%s"' % slo_etag), headers)
self.assertIn(('Content-Length', '22'), headers)
# Test complete preamble only
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=0-7'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, 'preamble')
# Test range within preamble only
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=1-5'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, 'reamb')
# Test complete postamble only
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=13-21'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, 'postamble')
# Test partial pre and postamble
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=4-16'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, 'mbleaaaaapost')
# Test partial preamble and first byte of data
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=1-8'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, 'reamblea')
# Test last byte of segment data and partial postamble
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-single-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=12-16'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, 'apost')
def test_bunches_of_data_segments(self):
slo_etag = md5hex(
md5hex('ABCDEF') +
md5hex('a' * 5) +
md5hex('123456') +
md5hex('GHIJKL') +
md5hex('b' * 10) +
md5hex('7890@#')
)
self.app.register(
'GET', '/v1/AUTH_test/gettest/manifest-multi-prepostamble',
swob.HTTPOk,
{
'Content-Type': 'application/json',
'X-Static-Large-Object': 'true'
},
json.dumps([
{
'data': base64.b64encode('ABCDEF'),
},
{
'name': '/gettest/a_5',
'hash': md5hex('a' * 5),
'content_type': 'text/plain',
'bytes': '5',
},
{
'data': base64.b64encode('123456')
},
{
'data': base64.b64encode('GHIJKL'),
},
{
'name': '/gettest/b_10',
'hash': md5hex('b' * 10),
'content_type': 'text/plain',
'bytes': '10',
},
{
'data': base64.b64encode('7890@#')
}
])
)
# Test the whole SLO
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-multi-prepostamble',
environ={'REQUEST_METHOD': 'GET'})
status, headers, body = self.call_slo(req)
self.assertEqual('200 OK', status)
self.assertEqual(body, 'ABCDEFaaaaa123456GHIJKLbbbbbbbbbb7890@#')
self.assertIn(('Etag', '"%s"' % slo_etag), headers)
self.assertIn(('Content-Length', '39'), headers)
# Test last byte first pre-amble to first byte of second postamble
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-multi-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=5-33'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, 'Faaaaa123456GHIJKLbbbbbbbbbb7')
# Test only second complete preamble
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-multi-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=17-22'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, 'GHIJKL')
# Test only first complete postamble
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-multi-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=11-16'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, '123456')
# Test only range within first postamble
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-multi-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=12-15'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, '2345')
# Test only range within first postamble and second preamble
req = Request.blank(
'/v1/AUTH_test/gettest/manifest-multi-prepostamble',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=12-18'})
status, headers, body = self.call_slo(req)
self.assertEqual('206 Partial Content', status)
self.assertEqual(body, '23456GH')
class TestSloConditionalGetOldManifest(SloTestCase): class TestSloConditionalGetOldManifest(SloTestCase):
slo_data = [ slo_data = [
@ -3324,7 +3626,7 @@ class TestSwiftInfo(unittest.TestCase):
self.assertEqual(swift_info['slo'].get('max_manifest_size'), self.assertEqual(swift_info['slo'].get('max_manifest_size'),
mware.max_manifest_size) mware.max_manifest_size)
self.assertEqual(1000, mware.max_manifest_segments) self.assertEqual(1000, mware.max_manifest_segments)
self.assertEqual(2097152, mware.max_manifest_size) self.assertEqual(8388608, mware.max_manifest_size)
self.assertEqual(1048576, mware.rate_limit_under_size) self.assertEqual(1048576, mware.rate_limit_under_size)
self.assertEqual(10, mware.rate_limit_after_segment) self.assertEqual(10, mware.rate_limit_after_segment)
self.assertEqual(1, mware.rate_limit_segments_per_sec) self.assertEqual(1, mware.rate_limit_segments_per_sec)