diff --git a/swift/common/utils.py b/swift/common/utils.py index cb66c19628..66f6ad777b 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -700,6 +700,7 @@ def drop_buffer_cache(fd, offset, length): NORMAL_FORMAT = "%016.05f" INTERNAL_FORMAT = NORMAL_FORMAT + '_%016x' +SHORT_FORMAT = NORMAL_FORMAT + '_%x' MAX_OFFSET = (16 ** 16) - 1 PRECISION = 1e-5 # Setting this to True will cause the internal format to always display @@ -820,6 +821,13 @@ class Timestamp(object): else: return self.normal + @property + def short(self): + if self.offset or FORCE_INTERNAL: + return SHORT_FORMAT % (self.timestamp, self.offset) + else: + return self.normal + @property def isoformat(self): t = float(self.normal) @@ -849,16 +857,22 @@ class Timestamp(object): return isoformat def __eq__(self, other): + if other is None: + return False if not isinstance(other, Timestamp): other = Timestamp(other) return self.internal == other.internal def __ne__(self, other): + if other is None: + return True if not isinstance(other, Timestamp): other = Timestamp(other) return self.internal != other.internal def __lt__(self, other): + if other is None: + return False if not isinstance(other, Timestamp): other = Timestamp(other) return self.internal < other.internal @@ -867,6 +881,94 @@ class Timestamp(object): return hash(self.internal) +def encode_timestamps(t1, t2=None, t3=None, explicit=False): + """ + Encode up to three timestamps into a string. Unlike a Timestamp object, the + encoded string does NOT used fixed width fields and consequently no + relative chronology of the timestamps can be inferred from lexicographic + sorting of encoded timestamp strings. + + The format of the encoded string is: + [<+/->[<+/->]] + + i.e. if t1 = t2 = t3 then just the string representation of t1 is returned, + otherwise the time offsets for t2 and t3 are appended. If explicit is True + then the offsets for t2 and t3 are always appended even if zero. + + Note: any offset value in t1 will be preserved, but offsets on t2 and t3 + are not preserved. In the anticipated use cases for this method (and the + inverse decode_timestamps method) the timestamps passed as t2 and t3 are + not expected to have offsets as they will be timestamps associated with a + POST request. In the case where the encoding is used in a container objects + table row, t1 could be the PUT or DELETE time but t2 and t3 represent the + content type and metadata times (if different from the data file) i.e. + correspond to POST timestamps. In the case where the encoded form is used + in a .meta file name, t1 and t2 both correspond to POST timestamps. + """ + form = '{0}' + values = [t1.short] + if t2 is not None: + t2_t1_delta = t2.raw - t1.raw + explicit = explicit or (t2_t1_delta != 0) + values.append(t2_t1_delta) + if t3 is not None: + t3_t2_delta = t3.raw - t2.raw + explicit = explicit or (t3_t2_delta != 0) + values.append(t3_t2_delta) + if explicit: + form += '{1:+x}' + if t3 is not None: + form += '{2:+x}' + return form.format(*values) + + +def decode_timestamps(encoded, explicit=False): + """ + Parses a string of the form generated by encode_timestamps and returns + a tuple of the three component timestamps. If explicit is False, component + timestamps that are not explicitly encoded will be assumed to have zero + delta from the previous component and therefore take the value of the + previous component. If explicit is True, component timestamps that are + not explicitly encoded will be returned with value None. + """ + # TODO: some tests, e.g. in test_replicator, put float timestamps values + # into container db's, hence this defensive check, but in real world + # this may never happen. + if not isinstance(encoded, basestring): + ts = Timestamp(encoded) + return ts, ts, ts + + parts = [] + signs = [] + pos_parts = encoded.split('+') + for part in pos_parts: + # parse time components and their signs + # e.g. x-y+z --> parts = [x, y, z] and signs = [+1, -1, +1] + neg_parts = part.split('-') + parts = parts + neg_parts + signs = signs + [1] + [-1] * (len(neg_parts) - 1) + t1 = Timestamp(parts[0]) + t2 = t3 = None + if len(parts) > 1: + t2 = t1 + delta = signs[1] * int(parts[1], 16) + # if delta = 0 we want t2 = t3 = t1 in order to + # preserve any offset in t1 - only construct a distinct + # timestamp if there is a non-zero delta. + if delta: + t2 = Timestamp((t1.raw + delta) * PRECISION) + elif not explicit: + t2 = t1 + if len(parts) > 2: + t3 = t2 + delta = signs[2] * int(parts[2], 16) + if delta: + t3 = Timestamp((t2.raw + delta) * PRECISION) + elif not explicit: + t3 = t2 + return t1, t2, t3 + + def normalize_timestamp(timestamp): """ Format a timestamp (string or numeric) into a standardized @@ -3357,6 +3459,25 @@ def parse_content_type(content_type): return content_type, parm_list +def extract_swift_bytes(content_type): + """ + Parse a content-type and return a tuple containing: + - the content_type string minus any swift_bytes param, + - the swift_bytes value or None if the param was not found + + :param content_type: a content-type string + :return: a tuple of (content-type, swift_bytes or None) + """ + content_type, params = parse_content_type(content_type) + swift_bytes = None + for k, v in params: + if k == 'swift_bytes': + swift_bytes = v + else: + content_type += ';%s=%s' % (k, v) + return content_type, swift_bytes + + def override_bytes_from_content_type(listing_dict, logger=None): """ Takes a dict from a container listing and overrides the content_type, diff --git a/swift/container/backend.py b/swift/container/backend.py index 15155e252b..d39e93bfb2 100644 --- a/swift/container/backend.py +++ b/swift/container/backend.py @@ -25,7 +25,8 @@ import six.moves.cPickle as pickle from six.moves import range import sqlite3 -from swift.common.utils import Timestamp +from swift.common.utils import Timestamp, encode_timestamps, decode_timestamps, \ + extract_swift_bytes from swift.common.db import DatabaseBroker, utf8encode @@ -137,6 +138,90 @@ CONTAINER_STAT_VIEW_SCRIPT = ''' ''' +def update_new_item_from_existing(new_item, existing): + """ + Compare the data and meta related timestamps of a new object item with + the timestamps of an existing object record, and update the new item + with data and/or meta related attributes from the existing record if + their timestamps are newer. + + The multiple timestamps are encoded into a single string for storing + in the 'created_at' column of the the objects db table. + + :param new_item: A dict of object update attributes + :param existing: A dict of existing object attributes + :return: True if any attributes of the new item dict were found to be + newer than the existing and therefore not updated, otherwise + False implying that the updated item is equal to the existing. + """ + + # item[created_at] may be updated so keep a copy of the original + # value in case we process this item again + new_item.setdefault('data_timestamp', new_item['created_at']) + + # content-type and metadata timestamps may be encoded in + # item[created_at], or may be set explicitly. + item_ts_data, item_ts_ctype, item_ts_meta = decode_timestamps( + new_item['data_timestamp']) + + if new_item.get('ctype_timestamp'): + item_ts_ctype = Timestamp(new_item.get('ctype_timestamp')) + item_ts_meta = item_ts_ctype + if new_item.get('meta_timestamp'): + item_ts_meta = Timestamp(new_item.get('meta_timestamp')) + + if not existing: + # encode new_item timestamps into one string for db record + new_item['created_at'] = encode_timestamps( + item_ts_data, item_ts_ctype, item_ts_meta) + return True + + # decode existing timestamp into separate data, content-type and + # metadata timestamps + rec_ts_data, rec_ts_ctype, rec_ts_meta = decode_timestamps( + existing['created_at']) + + # Extract any swift_bytes values from the content_type values. This is + # necessary because the swift_bytes value to persist should be that at the + # most recent data timestamp whereas the content-type value to persist is + # that at the most recent content-type timestamp. The two values happen to + # be stored in the same database column for historical reasons. + for item in (new_item, existing): + content_type, swift_bytes = extract_swift_bytes(item['content_type']) + item['content_type'] = content_type + item['swift_bytes'] = swift_bytes + + newer_than_existing = [True, True, True] + if rec_ts_data >= item_ts_data: + # apply data attributes from existing record + new_item.update([(k, existing[k]) + for k in ('size', 'etag', 'deleted', 'swift_bytes')]) + item_ts_data = rec_ts_data + newer_than_existing[0] = False + if rec_ts_ctype >= item_ts_ctype: + # apply content-type attribute from existing record + new_item['content_type'] = existing['content_type'] + item_ts_ctype = rec_ts_ctype + newer_than_existing[1] = False + if rec_ts_meta >= item_ts_meta: + # apply metadata timestamp from existing record + item_ts_meta = rec_ts_meta + newer_than_existing[2] = False + + # encode updated timestamps into one string for db record + new_item['created_at'] = encode_timestamps( + item_ts_data, item_ts_ctype, item_ts_meta) + + # append the most recent swift_bytes onto the most recent content_type in + # new_item and restore existing to its original state + for item in (new_item, existing): + if item['swift_bytes']: + item['content_type'] += ';swift_bytes=%s' % item['swift_bytes'] + del item['swift_bytes'] + + return any(newer_than_existing) + + class ContainerBroker(DatabaseBroker): """Encapsulates working with a container database.""" db_type = 'container' @@ -284,13 +369,20 @@ class ContainerBroker(DatabaseBroker): storage_policy_index = data[6] else: storage_policy_index = 0 + content_type_timestamp = meta_timestamp = None + if len(data) > 7: + content_type_timestamp = data[7] + if len(data) > 8: + meta_timestamp = data[8] item_list.append({'name': name, 'created_at': timestamp, 'size': size, 'content_type': content_type, 'etag': etag, 'deleted': deleted, - 'storage_policy_index': storage_policy_index}) + 'storage_policy_index': storage_policy_index, + 'ctype_timestamp': content_type_timestamp, + 'meta_timestamp': meta_timestamp}) def empty(self): """ @@ -325,10 +417,13 @@ class ContainerBroker(DatabaseBroker): def make_tuple_for_pickle(self, record): return (record['name'], record['created_at'], record['size'], record['content_type'], record['etag'], record['deleted'], - record['storage_policy_index']) + record['storage_policy_index'], + record['ctype_timestamp'], + record['meta_timestamp']) def put_object(self, name, timestamp, size, content_type, etag, deleted=0, - storage_policy_index=0): + storage_policy_index=0, ctype_timestamp=None, + meta_timestamp=None): """ Creates an object in the DB with its metadata. @@ -340,11 +435,16 @@ class ContainerBroker(DatabaseBroker): :param deleted: if True, marks the object as deleted and sets the deleted_at timestamp to timestamp :param storage_policy_index: the storage policy index for the object + :param ctype_timestamp: timestamp of when content_type was last + updated + :param meta_timestamp: timestamp of when metadata was last updated """ record = {'name': name, 'created_at': timestamp, 'size': size, 'content_type': content_type, 'etag': etag, 'deleted': deleted, - 'storage_policy_index': storage_policy_index} + 'storage_policy_index': storage_policy_index, + 'ctype_timestamp': ctype_timestamp, + 'meta_timestamp': meta_timestamp} self.put_record(record) def _is_deleted_info(self, object_count, put_timestamp, delete_timestamp, @@ -647,7 +747,7 @@ class ContainerBroker(DatabaseBroker): # is no delimiter then we can simply return the result as # prefixes are now handled in the SQL statement. if prefix is None or not delimiter: - return [r for r in curs] + return [self._transform_record(r) for r in curs] # We have a delimiter and a prefix (possibly empty string) to # handle @@ -686,18 +786,35 @@ class ContainerBroker(DatabaseBroker): results.append([dir_name, '0', 0, None, '']) curs.close() break - results.append(row) + results.append(self._transform_record(row)) if not rowcount: break return results + def _transform_record(self, record): + """ + Decode the created_at timestamp into separate data, content-type and + meta timestamps and replace the created_at timestamp with the + metadata timestamp i.e. the last-modified time. + """ + t_data, t_ctype, t_meta = decode_timestamps(record[1]) + return (record[0], t_meta.internal) + record[2:] + + def _record_to_dict(self, rec): + if rec: + keys = ('name', 'created_at', 'size', 'content_type', 'etag', + 'deleted', 'storage_policy_index') + return dict(zip(keys, rec)) + return None + def merge_items(self, item_list, source=None): """ Merge items into the object table. :param item_list: list of dictionaries of {'name', 'created_at', 'size', 'content_type', 'etag', 'deleted', - 'storage_policy_index'} + 'storage_policy_index', 'ctype_timestamp', + 'meta_timestamp'} :param source: if defined, update incoming_sync with the source """ for item in item_list: @@ -711,15 +828,16 @@ class ContainerBroker(DatabaseBroker): else: query_mod = '' curs.execute('BEGIN IMMEDIATE') - # Get created_at times for objects in item_list that already exist. + # Get sqlite records for objects in item_list that already exist. # We must chunk it up to avoid sqlite's limit of 999 args. - created_at = {} + records = {} for offset in range(0, len(item_list), SQLITE_ARG_LIMIT): chunk = [rec['name'] for rec in item_list[offset:offset + SQLITE_ARG_LIMIT]] - created_at.update( - ((rec[0], rec[1]), rec[2]) for rec in curs.execute( - 'SELECT name, storage_policy_index, created_at ' + records.update( + ((rec[0], rec[6]), rec) for rec in curs.execute( + 'SELECT name, created_at, size, content_type,' + 'etag, deleted, storage_policy_index ' 'FROM object WHERE ' + query_mod + ' name IN (%s)' % ','.join('?' * len(chunk)), chunk)) # Sort item_list into things that need adding and deleting, based @@ -729,14 +847,13 @@ class ContainerBroker(DatabaseBroker): for item in item_list: item.setdefault('storage_policy_index', 0) # legacy item_ident = (item['name'], item['storage_policy_index']) - if created_at.get(item_ident) < item['created_at']: - if item_ident in created_at: # exists with older timestamp + existing = self._record_to_dict(records.get(item_ident)) + if update_new_item_from_existing(item, existing): + if item_ident in records: # exists with older timestamp to_delete[item_ident] = item if item_ident in to_add: # duplicate entries in item_list - to_add[item_ident] = max(item, to_add[item_ident], - key=lambda i: i['created_at']) - else: - to_add[item_ident] = item + update_new_item_from_existing(item, to_add[item_ident]) + to_add[item_ident] = item if to_delete: curs.executemany( 'DELETE FROM object WHERE ' + query_mod + diff --git a/swift/container/reconciler.py b/swift/container/reconciler.py index ba896ae527..efacfd2248 100644 --- a/swift/container/reconciler.py +++ b/swift/container/reconciler.py @@ -27,8 +27,7 @@ from swift.common.direct_client import ( from swift.common.internal_client import InternalClient, UnexpectedResponse from swift.common.utils import get_logger, split_path, quorum_size, \ FileLikeIter, Timestamp, last_modified_date_to_timestamp, \ - LRUCache - + LRUCache, decode_timestamps MISPLACED_OBJECTS_ACCOUNT = '.misplaced_objects' MISPLACED_OBJECTS_CONTAINER_DIVISOR = 3600 # 1 hour @@ -116,7 +115,18 @@ def best_policy_index(headers): def get_reconciler_container_name(obj_timestamp): - return str(int(Timestamp(obj_timestamp)) // + """ + Get the name of a container into which a misplaced object should be + enqueued. The name is the object's last modified time rounded down to the + nearest hour. + + :param obj_timestamp: a string representation of the object's 'created_at' + time from it's container db row. + :return: a container name + """ + # Use last modified time of object to determine reconciler container name + _junk, _junk, ts_meta = decode_timestamps(obj_timestamp) + return str(int(ts_meta) // MISPLACED_OBJECTS_CONTAINER_DIVISOR * MISPLACED_OBJECTS_CONTAINER_DIVISOR) @@ -262,7 +272,7 @@ def parse_raw_obj(obj_info): 'container': container, 'obj': obj, 'q_op': q_op, - 'q_ts': Timestamp(obj_info['hash']), + 'q_ts': decode_timestamps((obj_info['hash']))[0], 'q_record': last_modified_date_to_timestamp( obj_info['last_modified']), 'path': '/%s/%s/%s' % (account, container, obj) diff --git a/swift/container/server.py b/swift/container/server.py index 0a09f57615..88fdc60160 100644 --- a/swift/container/server.py +++ b/swift/container/server.py @@ -368,7 +368,9 @@ class ContainerController(BaseStorageServer): int(req.headers['x-size']), req.headers['x-content-type'], req.headers['x-etag'], 0, - obj_policy_index) + obj_policy_index, + req.headers.get('x-content-type-timestamp'), + req.headers.get('x-meta-timestamp')) return HTTPCreated(request=req) else: # put container if requested_policy_index is None: diff --git a/swift/container/sync.py b/swift/container/sync.py index 3bb0a88ad3..7bb37f9225 100644 --- a/swift/container/sync.py +++ b/swift/container/sync.py @@ -36,7 +36,7 @@ from swift.common.ring.utils import is_local_device from swift.common.utils import ( clean_content_type, config_true_value, FileLikeIter, get_logger, hash_path, quote, urlparse, validate_sync_to, - whataremyips, Timestamp) + whataremyips, Timestamp, decode_timestamps) from swift.common.daemon import Daemon from swift.common.http import HTTP_UNAUTHORIZED, HTTP_NOT_FOUND from swift.common.storage_policy import POLICIES @@ -431,9 +431,14 @@ class ContainerSync(Daemon): """ try: start_time = time() + # extract last modified time from the created_at value + ts_data, ts_ctype, ts_meta = decode_timestamps( + row['created_at']) if row['deleted']: + # when sync'ing a deleted object, use ts_data - this is the + # timestamp of the source tombstone try: - headers = {'x-timestamp': row['created_at']} + headers = {'x-timestamp': ts_data.internal} if realm and realm_key: nonce = uuid.uuid4().hex path = urlparse(sync_to).path + '/' + quote( @@ -456,13 +461,14 @@ class ContainerSync(Daemon): self.logger.increment('deletes') self.logger.timing_since('deletes.timing', start_time) else: + # when sync'ing a live object, use ts_meta - this is the time + # at which the source object was last modified by a PUT or POST part, nodes = \ self.get_object_ring(info['storage_policy_index']). \ get_nodes(info['account'], info['container'], row['name']) shuffle(nodes) exc = None - looking_for_timestamp = Timestamp(row['created_at']) # look up for the newest one headers_out = {'X-Newest': True, 'X-Backend-Storage-Policy-Index': @@ -479,7 +485,7 @@ class ContainerSync(Daemon): body = None exc = err timestamp = Timestamp(headers.get('x-timestamp', 0)) - if timestamp < looking_for_timestamp: + if timestamp < ts_meta: if exc: raise exc raise Exception( diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 6328a79387..3a8b41bb54 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -56,7 +56,7 @@ from swift.common.utils import mkdirs, Timestamp, \ storage_directory, hash_path, renamer, fallocate, fsync, fdatasync, \ fsync_dir, drop_buffer_cache, ThreadPool, lock_path, write_pickle, \ config_true_value, listdir, split_path, ismount, remove_file, \ - get_md5_socket, F_SETPIPE_SZ + get_md5_socket, F_SETPIPE_SZ, decode_timestamps, encode_timestamps from swift.common.splice import splice, tee from swift.common.exceptions import DiskFileQuarantined, DiskFileNotExist, \ DiskFileCollision, DiskFileNoSpace, DiskFileDeviceUnavailable, \ @@ -76,7 +76,7 @@ METADATA_KEY = 'user.swift.metadata' DROP_CACHE_WINDOW = 1024 * 1024 # These are system-set metadata keys that cannot be changed with a POST. # They should be lowercase. -DATAFILE_SYSTEM_META = set('content-length content-type deleted etag'.split()) +DATAFILE_SYSTEM_META = set('content-length deleted etag'.split()) DATADIR_BASE = 'objects' ASYNCDIR_BASE = 'async_pending' TMP_BASE = 'tmp' @@ -442,23 +442,78 @@ class BaseDiskFileManager(object): max_pipe_size = int(f.read()) self.pipe_size = min(max_pipe_size, self.disk_chunk_size) + def make_on_disk_filename(self, timestamp, ext=None, + ctype_timestamp=None, *a, **kw): + """ + Returns filename for given timestamp. + + :param timestamp: the object timestamp, an instance of + :class:`~swift.common.utils.Timestamp` + :param ext: an optional string representing a file extension to be + appended to the returned file name + :param ctype_timestamp: an optional content-type timestamp, an instance + of :class:`~swift.common.utils.Timestamp` + :returns: a file name + """ + rv = timestamp.internal + if ext == '.meta' and ctype_timestamp: + # If ctype_timestamp is None then the filename is simply the + # internal form of the timestamp. If ctype_timestamp is not None + # then the difference between the raw values of the two timestamps + # is appended as a hex number, with its sign. + # + # There are two reasons for encoding the content-type timestamp + # in the filename in this way. First, it means that two .meta files + # having the same timestamp but different content-type timestamps + # (and potentially different content-type values) will be distinct + # and therefore will be independently replicated when rsync + # replication is used. That ensures that all nodes end up having + # all content-type values after replication (with the most recent + # value being selected when the diskfile is opened). Second, having + # the content-type encoded in timestamp in the filename makes it + # possible for the on disk file search code to determine that + # timestamp by inspecting only the filename, and not needing to + # open the file and read its xattrs. + rv = encode_timestamps(timestamp, ctype_timestamp, explicit=True) + if ext: + rv = '%s%s' % (rv, ext) + return rv + def parse_on_disk_filename(self, filename): """ Parse an on disk file name. - :param filename: the data file name including extension - :returns: a dict, with keys for timestamp, and ext: + :param filename: the file name including extension + :returns: a dict, with keys for timestamp, ext and ctype_timestamp: * timestamp is a :class:`~swift.common.utils.Timestamp` + * ctype_timestamp is a :class:`~swift.common.utils.Timestamp` or + None for .meta files, otherwise None * ext is a string, the file extension including the leading dot or the empty string if the filename has no extension. - Subclases may add further keys to the returned dict. + Subclasses may override this method to add further keys to the + returned dict. :raises DiskFileError: if any part of the filename is not able to be validated. """ - raise NotImplementedError + ts_ctype = None + fname, ext = splitext(filename) + try: + if ext == '.meta': + timestamp, ts_ctype = decode_timestamps( + fname, explicit=True)[:2] + else: + timestamp = Timestamp(fname) + except ValueError: + raise DiskFileError('Invalid Timestamp value in filename %r' + % filename) + return { + 'timestamp': timestamp, + 'ext': ext, + 'ctype_timestamp': ts_ctype + } def _process_ondisk_files(self, exts, results, **kwargs): """ @@ -592,18 +647,45 @@ class BaseDiskFileManager(object): # the results dict is used to collect results of file filtering results = {} - # non-tombstones older than or equal to latest tombstone are obsolete if exts.get('.ts'): + # non-tombstones older than or equal to latest tombstone are + # obsolete for ext in filter(lambda ext: ext != '.ts', exts.keys()): exts[ext], older = self._split_gt_timestamp( exts[ext], exts['.ts'][0]['timestamp']) results.setdefault('obsolete', []).extend(older) + # all but most recent .ts are obsolete + results.setdefault('obsolete', []).extend(exts['.ts'][1:]) + exts['.ts'] = exts['.ts'][:1] - # all but most recent .meta and .ts are obsolete - for ext in ('.meta', '.ts'): - if ext in exts: - results.setdefault('obsolete', []).extend(exts[ext][1:]) - exts[ext] = exts[ext][:1] + if exts.get('.meta'): + # retain the newest meta file + retain = 1 + if exts['.meta'][1:]: + # there are other meta files so find the one with newest + # ctype_timestamp... + exts['.meta'][1:] = sorted( + exts['.meta'][1:], + key=lambda info: info['ctype_timestamp'], + reverse=True) + # ...and retain this IFF its ctype_timestamp is greater than + # newest meta file + if (exts['.meta'][1]['ctype_timestamp'] > + exts['.meta'][0]['ctype_timestamp']): + if (exts['.meta'][1]['timestamp'] == + exts['.meta'][0]['timestamp']): + # both at same timestamp so retain only the one with + # newest ctype + exts['.meta'][:2] = [exts['.meta'][1], + exts['.meta'][0]] + retain = 1 + else: + # retain both - first has newest metadata, second has + # newest ctype + retain = 2 + # discard all meta files not being retained... + results.setdefault('obsolete', []).extend(exts['.meta'][retain:]) + exts['.meta'] = exts['.meta'][:retain] # delegate to subclass handler self._process_ondisk_files(exts, results, **kwargs) @@ -612,11 +694,16 @@ class BaseDiskFileManager(object): if exts.get('.ts'): results['ts_info'] = exts['.ts'][0] if 'data_info' in results and exts.get('.meta'): - # only report a meta file if there is a data file + # only report meta files if there is a data file results['meta_info'] = exts['.meta'][0] + ctype_info = exts['.meta'].pop() + if (ctype_info['ctype_timestamp'] + > results['data_info']['timestamp']): + results['ctype_info'] = ctype_info - # set ts_file, data_file and meta_file with path to chosen file or None - for info_key in ('data_info', 'meta_info', 'ts_info'): + # set ts_file, data_file, meta_file and ctype_file with path to + # chosen file or None + for info_key in ('data_info', 'meta_info', 'ts_info', 'ctype_info'): info = results.get(info_key) key = info_key[:-5] + '_file' results[key] = join(datadir, info['filename']) if info else None @@ -742,6 +829,16 @@ class BaseDiskFileManager(object): # delegate to subclass for data file related updates... self._update_suffix_hashes(hashes, ondisk_info) + if 'ctype_info' in ondisk_info: + # We have a distinct content-type timestamp so update the + # hash. As a precaution, append '_ctype' to differentiate this + # value from any other timestamp value that might included in + # the hash in future. There is no .ctype file so use _ctype to + # avoid any confusion. + info = ondisk_info['ctype_info'] + hashes[None].update(info['ctype_timestamp'].internal + + '_ctype') + try: os.rmdir(path) except OSError as e: @@ -1066,6 +1163,8 @@ class BaseDiskFileManager(object): ts_data -> timestamp of data or tombstone file, ts_meta -> timestamp of meta file, if one exists + ts_ctype -> timestamp of meta file containing most recent + content-type value, if one exists where timestamps are instances of :class:`~swift.common.utils.Timestamp` @@ -1088,9 +1187,10 @@ class BaseDiskFileManager(object): (os.path.join(partition_path, suffix), suffix) for suffix in suffixes) key_preference = ( - ('ts_meta', 'meta_info'), - ('ts_data', 'data_info'), - ('ts_data', 'ts_info'), + ('ts_meta', 'meta_info', 'timestamp'), + ('ts_data', 'data_info', 'timestamp'), + ('ts_data', 'ts_info', 'timestamp'), + ('ts_ctype', 'ctype_info', 'ctype_timestamp'), ) for suffix_path, suffix in suffixes: for object_hash in self._listdir(suffix_path): @@ -1099,10 +1199,10 @@ class BaseDiskFileManager(object): results = self.cleanup_ondisk_files( object_path, self.reclaim_age, **kwargs) timestamps = {} - for ts_key, info_key in key_preference: + for ts_key, info_key, info_ts_key in key_preference: if info_key not in results: continue - timestamps[ts_key] = results[info_key]['timestamp'] + timestamps[ts_key] = results[info_key][info_ts_key] if 'ts_data' not in timestamps: # file sets that do not include a .data or .ts # file cannot be opened and therefore cannot @@ -1226,6 +1326,34 @@ class BaseDiskFileWriter(object): except OSError: logging.exception(_('Problem cleaning up %s'), self._datadir) + def _put(self, metadata, cleanup=True, *a, **kw): + """ + Helper method for subclasses. + + For this implementation, this method is responsible for renaming the + temporary file to the final name and directory location. This method + should be called after the final call to + :func:`swift.obj.diskfile.DiskFileWriter.write`. + + :param metadata: dictionary of metadata to be associated with the + object + :param cleanup: a Boolean. If True then obsolete files will be removed + from the object dir after the put completes, otherwise + obsolete files are left in place. + """ + timestamp = Timestamp(metadata['X-Timestamp']) + ctype_timestamp = metadata.get('Content-Type-Timestamp') + if ctype_timestamp: + ctype_timestamp = Timestamp(ctype_timestamp) + filename = self.manager.make_on_disk_filename( + timestamp, self._extension, ctype_timestamp=ctype_timestamp, + *a, **kw) + metadata['name'] = self._name + target_path = join(self._datadir, filename) + + self._threadpool.force_run_in_thread( + self._finalize_put, metadata, target_path, cleanup) + def put(self, metadata): """ Finalize writing the file on disk. @@ -1682,6 +1810,20 @@ class BaseDiskFile(object): def fragments(self): return None + @property + def content_type(self): + if self._metadata is None: + raise DiskFileNotOpen() + return self._metadata.get('Content-Type') + + @property + def content_type_timestamp(self): + if self._metadata is None: + raise DiskFileNotOpen() + t = self._metadata.get('Content-Type-Timestamp', + self._datafile_metadata.get('X-Timestamp')) + return Timestamp(t) + @classmethod def from_hash_dir(cls, mgr, hash_dir_path, device_path, partition, policy): return cls(mgr, device_path, None, partition, _datadir=hash_dir_path, @@ -1920,14 +2062,36 @@ class BaseDiskFile(object): quarantine_filename, "Exception reading metadata: %s" % err) - def _construct_from_data_file(self, data_file, meta_file, **kwargs): + def _merge_content_type_metadata(self, ctype_file): + """ + When a second .meta file is providing the most recent Content-Type + metadata then merge it into the metafile_metadata. + + :param ctype_file: An on-disk .meta file + """ + ctypefile_metadata = self._failsafe_read_metadata( + ctype_file, ctype_file) + if ('Content-Type' in ctypefile_metadata + and (ctypefile_metadata.get('Content-Type-Timestamp') > + self._metafile_metadata.get('Content-Type-Timestamp')) + and (ctypefile_metadata.get('Content-Type-Timestamp') > + self.data_timestamp)): + self._metafile_metadata['Content-Type'] = \ + ctypefile_metadata['Content-Type'] + self._metafile_metadata['Content-Type-Timestamp'] = \ + ctypefile_metadata.get('Content-Type-Timestamp') + + def _construct_from_data_file(self, data_file, meta_file, ctype_file, + **kwargs): """ Open the `.data` file to fetch its metadata, and fetch the metadata - from the fast-POST `.meta` file as well if it exists, merging them + from fast-POST `.meta` files as well if any exist, merging them properly. :param data_file: on-disk `.data` file being considered :param meta_file: on-disk fast-POST `.meta` file being considered + :param ctype_file: on-disk fast-POST `.meta` file being considered that + contains content-type and content-type timestamp :returns: an opened data file pointer :raises DiskFileError: various exceptions from :func:`swift.obj.diskfile.DiskFile._verify_data_file` @@ -1938,6 +2102,8 @@ class BaseDiskFile(object): if meta_file: self._metafile_metadata = self._failsafe_read_metadata( meta_file, meta_file) + if ctype_file and ctype_file != meta_file: + self._merge_content_type_metadata(ctype_file) sys_metadata = dict( [(key, val) for key, val in self._datafile_metadata.items() if key.lower() in DATAFILE_SYSTEM_META @@ -1946,6 +2112,14 @@ class BaseDiskFile(object): self._metadata.update(sys_metadata) # diskfile writer added 'name' to metafile, so remove it here self._metafile_metadata.pop('name', None) + # TODO: the check for Content-Type is only here for tests that + # create .data files without Content-Type + if ('Content-Type' in self._datafile_metadata and + (self.data_timestamp > + self._metafile_metadata.get('Content-Type-Timestamp'))): + self._metadata['Content-Type'] = \ + self._datafile_metadata['Content-Type'] + self._metadata.pop('Content-Type-Timestamp', None) else: self._metadata.update(self._datafile_metadata) if self._name is None: @@ -2144,21 +2318,10 @@ class DiskFileWriter(BaseDiskFileWriter): """ Finalize writing the file on disk. - For this implementation, this method is responsible for renaming the - temporary file to the final name and directory location. This method - should be called after the final call to - :func:`swift.obj.diskfile.DiskFileWriter.write`. - :param metadata: dictionary of metadata to be associated with the object """ - timestamp = Timestamp(metadata['X-Timestamp']).internal - metadata['name'] = self._name - target_path = join(self._datadir, timestamp + self._extension) - cleanup = True - - self._threadpool.force_run_in_thread( - self._finalize_put, metadata, target_path, cleanup) + super(DiskFileWriter, self)._put(metadata, True) class DiskFile(BaseDiskFile): @@ -2174,31 +2337,6 @@ class DiskFile(BaseDiskFile): class DiskFileManager(BaseDiskFileManager): diskfile_cls = DiskFile - def parse_on_disk_filename(self, filename): - """ - Returns the timestamp extracted .data file name. - - :param filename: the data file name including extension - :returns: a dict, with keys for timestamp, and ext: - - * timestamp is a :class:`~swift.common.utils.Timestamp` - * ext is a string, the file extension including the leading dot or - the empty string if the filename has no extension. - - :raises DiskFileError: if any part of the filename is not able to be - validated. - """ - float_part, ext = splitext(filename) - try: - timestamp = Timestamp(float_part) - except ValueError: - raise DiskFileError('Invalid Timestamp value in filename %r' - % filename) - return { - 'timestamp': timestamp, - 'ext': ext, - } - def _process_ondisk_files(self, exts, results, **kwargs): """ Implement replication policy specific handling of .data files. @@ -2303,12 +2441,10 @@ class ECDiskFileWriter(BaseDiskFileWriter): def put(self, metadata): """ The only difference between this method and the replication policy - DiskFileWriter method is the call into manager.make_on_disk_filename - to construct the data file name. + DiskFileWriter method is adding the frag index to the metadata. :param metadata: dictionary of metadata to be associated with object """ - timestamp = Timestamp(metadata['X-Timestamp']) fi = None cleanup = True if self._extension == '.data': @@ -2320,13 +2456,7 @@ class ECDiskFileWriter(BaseDiskFileWriter): self._diskfile._frag_index) # defer cleanup until commit() writes .durable cleanup = False - filename = self.manager.make_on_disk_filename( - timestamp, self._extension, frag_index=fi) - metadata['name'] = self._name - target_path = join(self._datadir, filename) - - self._threadpool.force_run_in_thread( - self._finalize_put, metadata, target_path, cleanup) + super(ECDiskFileWriter, self)._put(metadata, cleanup, frag_index=fi) class ECDiskFile(BaseDiskFile): @@ -2436,7 +2566,7 @@ class ECDiskFileManager(BaseDiskFileManager): return frag_index def make_on_disk_filename(self, timestamp, ext=None, frag_index=None, - *a, **kw): + ctype_timestamp=None, *a, **kw): """ Returns the EC specific filename for given timestamp. @@ -2446,32 +2576,36 @@ class ECDiskFileManager(BaseDiskFileManager): appended to the returned file name :param frag_index: a fragment archive index, used with .data extension only, must be a whole number. + :param ctype_timestamp: an optional content-type timestamp, an instance + of :class:`~swift.common.utils.Timestamp` :returns: a file name :raises DiskFileError: if ext=='.data' and the kwarg frag_index is not a whole number """ - rv = timestamp.internal if ext == '.data': # for datafiles only we encode the fragment index in the filename # to allow archives of different indexes to temporarily be stored # on the same node in certain situations frag_index = self.validate_fragment_index(frag_index) - rv += '#' + str(frag_index) - if ext: - rv = '%s%s' % (rv, ext) - return rv + rv = timestamp.internal + '#' + str(frag_index) + return '%s%s' % (rv, ext or '') + return super(ECDiskFileManager, self).make_on_disk_filename( + timestamp, ext, ctype_timestamp, *a, **kw) def parse_on_disk_filename(self, filename): """ - Returns the timestamp extracted from a policy specific .data file name. - For EC policy the data file name includes a fragment index which must - be stripped off to retrieve the timestamp. + Returns timestamp(s) and other info extracted from a policy specific + file name. For EC policy the data file name includes a fragment index + which must be stripped off to retrieve the timestamp. - :param filename: the data file name including extension - :returns: a dict, with keys for timestamp, frag_index, and ext: + :param filename: the file name including extension + :returns: a dict, with keys for timestamp, frag_index, ext and + ctype_timestamp: * timestamp is a :class:`~swift.common.utils.Timestamp` * frag_index is an int or None + * ctype_timestamp is a :class:`~swift.common.utils.Timestamp` or + None for .meta files, otherwise None * ext is a string, the file extension including the leading dot or the empty string if the filename has no extension. @@ -2480,13 +2614,13 @@ class ECDiskFileManager(BaseDiskFileManager): """ frag_index = None float_frag, ext = splitext(filename) - parts = float_frag.split('#', 1) - try: - timestamp = Timestamp(parts[0]) - except ValueError: - raise DiskFileError('Invalid Timestamp value in filename %r' - % filename) if ext == '.data': + parts = float_frag.split('#', 1) + try: + timestamp = Timestamp(parts[0]) + except ValueError: + raise DiskFileError('Invalid Timestamp value in filename %r' + % filename) # it is an error for an EC data file to not have a valid # fragment index try: @@ -2495,11 +2629,15 @@ class ECDiskFileManager(BaseDiskFileManager): # expect validate_fragment_index raise DiskFileError pass frag_index = self.validate_fragment_index(frag_index) - return { - 'timestamp': timestamp, - 'frag_index': frag_index, - 'ext': ext, - } + return { + 'timestamp': timestamp, + 'frag_index': frag_index, + 'ext': ext, + 'ctype_timestamp': None + } + rv = super(ECDiskFileManager, self).parse_on_disk_filename(filename) + rv['frag_index'] = None + return rv def _process_ondisk_files(self, exts, results, frag_index=None, **kwargs): """ diff --git a/swift/obj/mem_diskfile.py b/swift/obj/mem_diskfile.py index c233f57b09..e86c321e2f 100644 --- a/swift/obj/mem_diskfile.py +++ b/swift/obj/mem_diskfile.py @@ -443,3 +443,11 @@ class DiskFile(object): data_timestamp = timestamp durable_timestamp = timestamp + + content_type_timestamp = timestamp + + @property + def content_type(self): + if self._metadata is None: + raise DiskFileNotOpen() + return self._metadata.get('Content-Type') diff --git a/swift/obj/server.py b/swift/obj/server.py index cce1996503..ac3c7f39e5 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -33,7 +33,7 @@ from swift.common.utils import public, get_logger, \ config_true_value, timing_stats, replication, \ normalize_delete_at_timestamp, get_log_line, Timestamp, \ get_expirer_container, parse_mime_headers, \ - iter_multipart_mime_documents + iter_multipart_mime_documents, extract_swift_bytes from swift.common.bufferedhttp import http_connect from swift.common.constraints import check_object_creation, \ valid_timestamp, check_utf8 @@ -479,35 +479,103 @@ class ObjectController(BaseStorageServer): except (DiskFileNotExist, DiskFileQuarantined): return HTTPNotFound(request=request) orig_timestamp = Timestamp(orig_metadata.get('X-Timestamp', 0)) - if orig_timestamp >= req_timestamp: + orig_ctype_timestamp = disk_file.content_type_timestamp + req_ctype_time = '0' + req_ctype = request.headers.get('Content-Type') + if req_ctype: + req_ctype_time = request.headers.get('Content-Type-Timestamp', + req_timestamp.internal) + req_ctype_timestamp = Timestamp(req_ctype_time) + if orig_timestamp >= req_timestamp \ + and orig_ctype_timestamp >= req_ctype_timestamp: return HTTPConflict( request=request, headers={'X-Backend-Timestamp': orig_timestamp.internal}) - metadata = {'X-Timestamp': req_timestamp.internal} - self._preserve_slo_manifest(metadata, orig_metadata) - metadata.update(val for val in request.headers.items() - if is_user_meta('object', val[0])) - headers_to_copy = ( - request.headers.get( - 'X-Backend-Replication-Headers', '').split() + - list(self.allowed_headers)) - for header_key in headers_to_copy: - if header_key in request.headers: - header_caps = header_key.title() - metadata[header_caps] = request.headers[header_key] - orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) - if orig_delete_at != new_delete_at: - if new_delete_at: - self.delete_at_update('PUT', new_delete_at, account, container, - obj, request, device, policy) - if orig_delete_at: - self.delete_at_update('DELETE', orig_delete_at, account, - container, obj, request, device, - policy) + + if req_timestamp > orig_timestamp: + metadata = {'X-Timestamp': req_timestamp.internal} + self._preserve_slo_manifest(metadata, orig_metadata) + metadata.update(val for val in request.headers.items() + if is_user_meta('object', val[0])) + headers_to_copy = ( + request.headers.get( + 'X-Backend-Replication-Headers', '').split() + + list(self.allowed_headers)) + for header_key in headers_to_copy: + if header_key in request.headers: + header_caps = header_key.title() + metadata[header_caps] = request.headers[header_key] + orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) + if orig_delete_at != new_delete_at: + if new_delete_at: + self.delete_at_update( + 'PUT', new_delete_at, account, container, obj, request, + device, policy) + if orig_delete_at: + self.delete_at_update('DELETE', orig_delete_at, account, + container, obj, request, device, + policy) + else: + # preserve existing metadata, only content-type may be updated + metadata = dict(disk_file.get_metafile_metadata()) + + if req_ctype_timestamp > orig_ctype_timestamp: + # we have a new content-type, add to metadata and container update + content_type_headers = { + 'Content-Type': request.headers['Content-Type'], + 'Content-Type-Timestamp': req_ctype_timestamp.internal + } + metadata.update(content_type_headers) + else: + # send existing content-type with container update + content_type_headers = { + 'Content-Type': disk_file.content_type, + 'Content-Type-Timestamp': orig_ctype_timestamp.internal + } + if orig_ctype_timestamp != disk_file.data_timestamp: + # only add to metadata if it's not the datafile content-type + metadata.update(content_type_headers) + try: disk_file.write_metadata(metadata) except (DiskFileXattrNotSupported, DiskFileNoSpace): return HTTPInsufficientStorage(drive=device, request=request) + + update_etag = orig_metadata['ETag'] + if 'X-Object-Sysmeta-Ec-Etag' in orig_metadata: + # For EC policy, send X-Object-Sysmeta-Ec-Etag which is same as the + # X-Backend-Container-Update-Override-Etag value sent with the + # original PUT. We have to send Etag (and size etc) with a POST + # container update because the original PUT container update may + # have failed or be in async_pending. + update_etag = orig_metadata['X-Object-Sysmeta-Ec-Etag'] + + if (content_type_headers['Content-Type-Timestamp'] + != disk_file.data_timestamp): + # Current content-type is not from the datafile, but the datafile + # content-type may have a swift_bytes param that was appended by + # SLO and we must continue to send that with the container update. + # Do this (rather than use a separate header) for backwards + # compatibility because there may be 'legacy' container updates in + # async pending that have content-types with swift_bytes params, so + # we have to be able to handle those in container server anyway. + _, swift_bytes = extract_swift_bytes( + disk_file.get_datafile_metadata()['Content-Type']) + if swift_bytes: + content_type_headers['Content-Type'] += (';swift_bytes=%s' + % swift_bytes) + + self.container_update( + 'PUT', account, container, obj, request, + HeaderKeyDict({ + 'x-size': orig_metadata['Content-Length'], + 'x-content-type': content_type_headers['Content-Type'], + 'x-timestamp': disk_file.data_timestamp.internal, + 'x-content-type-timestamp': + content_type_headers['Content-Type-Timestamp'], + 'x-meta-timestamp': metadata['X-Timestamp'], + 'x-etag': update_etag}), + device, policy) return HTTPAccepted(request=request) @public diff --git a/swift/obj/ssync_receiver.py b/swift/obj/ssync_receiver.py index 5f6b7d3b0f..79a23da4ac 100644 --- a/swift/obj/ssync_receiver.py +++ b/swift/obj/ssync_receiver.py @@ -31,7 +31,7 @@ def decode_missing(line): """ Parse a string of the form generated by :py:func:`~swift.obj.ssync_sender.encode_missing` and return a dict - with keys ``object_hash``, ``ts_data``, ``ts_meta``. + with keys ``object_hash``, ``ts_data``, ``ts_meta``, ``ts_ctype``. The encoder for this line is :py:func:`~swift.obj.ssync_sender.encode_missing` @@ -40,7 +40,8 @@ def decode_missing(line): parts = line.split() result['object_hash'] = urllib.parse.unquote(parts[0]) t_data = urllib.parse.unquote(parts[1]) - result['ts_data'] = result['ts_meta'] = Timestamp(t_data) + result['ts_data'] = Timestamp(t_data) + result['ts_meta'] = result['ts_ctype'] = result['ts_data'] if len(parts) > 2: # allow for a comma separated list of k:v pairs to future-proof subparts = urllib.parse.unquote(parts[2]).split(',') @@ -48,6 +49,8 @@ def decode_missing(line): k, v = item.split(':') if k == 'm': result['ts_meta'] = Timestamp(t_data, delta=int(v, 16)) + elif k == 't': + result['ts_ctype'] = Timestamp(t_data, delta=int(v, 16)) return result @@ -71,6 +74,9 @@ def encode_wanted(remote, local): want['data'] = True if 'ts_meta' in local and remote['ts_meta'] > local['ts_meta']: want['meta'] = True + if ('ts_ctype' in local and remote['ts_ctype'] > local['ts_ctype'] + and remote['ts_ctype'] > remote['ts_data']): + want['meta'] = True else: # we got nothing, so we'll take whatever the remote has want['data'] = True @@ -264,6 +270,7 @@ class Receiver(object): return { 'ts_data': df.data_timestamp, 'ts_meta': df.timestamp, + 'ts_ctype': df.content_type_timestamp, } def _check_missing(self, line): diff --git a/swift/obj/ssync_sender.py b/swift/obj/ssync_sender.py index 51dbed32a3..a509166b46 100644 --- a/swift/obj/ssync_sender.py +++ b/swift/obj/ssync_sender.py @@ -21,11 +21,12 @@ from swift.common import exceptions from swift.common import http -def encode_missing(object_hash, ts_data, ts_meta=None): +def encode_missing(object_hash, ts_data, ts_meta=None, ts_ctype=None): """ Returns a string representing the object hash, its data file timestamp - and the delta forwards to its metafile timestamp, if non-zero, in the form: - `` m:`` + and the delta forwards to its metafile and content-type timestamps, if + non-zero, in the form: + `` [m:[,t:]]`` The decoder for this line is :py:func:`~swift.obj.ssync_receiver.decode_missing` @@ -36,6 +37,9 @@ def encode_missing(object_hash, ts_data, ts_meta=None): if ts_meta and ts_meta != ts_data: delta = ts_meta.raw - ts_data.raw msg = '%s m:%x' % (msg, delta) + if ts_ctype and ts_ctype != ts_data: + delta = ts_ctype.raw - ts_data.raw + msg = '%s,t:%x' % (msg, delta) return msg diff --git a/test/functional/tests.py b/test/functional/tests.py index 0615bf924a..4831c28b45 100644 --- a/test/functional/tests.py +++ b/test/functional/tests.py @@ -2185,6 +2185,56 @@ class TestFile(Base): info = file_item.info() self.assertEqual(etag, info['etag']) + def test_POST(self): + # verify consistency between object and container listing metadata + file_name = Utils.create_name() + file_item = self.env.container.file(file_name) + file_item.content_type = 'text/foobar' + file_item.write_random(1024) + + # sanity check + file_item = self.env.container.file(file_name) + file_item.initialize() + self.assertEqual('text/foobar', file_item.content_type) + self.assertEqual(1024, file_item.size) + etag = file_item.etag + + # check container listing is consistent + listing = self.env.container.files(parms={'format': 'json'}) + for f_dict in listing: + if f_dict['name'] == file_name: + break + else: + self.fail('Failed to find file %r in listing' % file_name) + self.assertEqual(1024, f_dict['bytes']) + self.assertEqual('text/foobar', f_dict['content_type']) + self.assertEqual(etag, f_dict['hash']) + + # now POST updated content-type to each file + file_item = self.env.container.file(file_name) + file_item.content_type = 'image/foobarbaz' + file_item.sync_metadata({'Test': 'blah'}) + + # sanity check object metadata + file_item = self.env.container.file(file_name) + file_item.initialize() + + self.assertEqual(1024, file_item.size) + self.assertEqual('image/foobarbaz', file_item.content_type) + self.assertEqual(etag, file_item.etag) + self.assertIn('test', file_item.metadata) + + # check for consistency between object and container listing + listing = self.env.container.files(parms={'format': 'json'}) + for f_dict in listing: + if f_dict['name'] == file_name: + break + else: + self.fail('Failed to find file %r in listing' % file_name) + self.assertEqual(1024, f_dict['bytes']) + self.assertEqual('image/foobarbaz', f_dict['content_type']) + self.assertEqual(etag, f_dict['hash']) + class TestFileUTF8(Base2, TestFile): set_up = False @@ -2777,6 +2827,42 @@ class TestSlo(Base): self.assertEqual('d', file_contents[-2]) self.assertEqual('e', file_contents[-1]) + def test_slo_container_listing(self): + # the listing object size should equal the sum of the size of the + # segments, not the size of the manifest body + raise SkipTest('Only passes with object_post_as_copy=False') + file_item = self.env.container.file(Utils.create_name) + file_item.write( + json.dumps([self.env.seg_info['seg_a']]), + parms={'multipart-manifest': 'put'}) + + files = self.env.container.files(parms={'format': 'json'}) + for f_dict in files: + if f_dict['name'] == file_item.name: + self.assertEqual(1024 * 1024, f_dict['bytes']) + self.assertEqual('application/octet-stream', + f_dict['content_type']) + break + else: + self.fail('Failed to find manifest file in container listing') + + # now POST updated content-type file + file_item.content_type = 'image/jpeg' + file_item.sync_metadata({'X-Object-Meta-Test': 'blah'}) + file_item.initialize() + self.assertEqual('image/jpeg', file_item.content_type) # sanity + + # verify that the container listing is consistent with the file + files = self.env.container.files(parms={'format': 'json'}) + for f_dict in files: + if f_dict['name'] == file_item.name: + self.assertEqual(1024 * 1024, f_dict['bytes']) + self.assertEqual(file_item.content_type, + f_dict['content_type']) + break + else: + self.fail('Failed to find manifest file in container listing') + def test_slo_get_nested_manifest(self): file_item = self.env.container.file('manifest-abcde-submanifest') file_contents = file_item.read() diff --git a/test/probe/common.py b/test/probe/common.py index f6091e66bf..aa094a1b59 100644 --- a/test/probe/common.py +++ b/test/probe/common.py @@ -17,6 +17,8 @@ from __future__ import print_function import os from subprocess import Popen, PIPE import sys +from tempfile import mkdtemp +from textwrap import dedent from time import sleep, time from collections import defaultdict import unittest @@ -25,8 +27,10 @@ from uuid import uuid4 from nose import SkipTest from six.moves.http_client import HTTPConnection +import shutil from swiftclient import get_auth, head_account +from swift.common import internal_client from swift.obj.diskfile import get_data_dir from swift.common.ring import Ring from swift.common.utils import readconf, renamer, \ @@ -430,6 +434,33 @@ class ProbeTest(unittest.TestCase): else: os.system('sudo mount %s' % device) + def make_internal_client(self, object_post_as_copy=True): + tempdir = mkdtemp() + try: + conf_path = os.path.join(tempdir, 'internal_client.conf') + conf_body = """ + [DEFAULT] + swift_dir = /etc/swift + + [pipeline:main] + pipeline = catch_errors cache proxy-server + + [app:proxy-server] + use = egg:swift#proxy + object_post_as_copy = %s + + [filter:cache] + use = egg:swift#memcache + + [filter:catch_errors] + use = egg:swift#catch_errors + """ % object_post_as_copy + with open(conf_path, 'w') as f: + f.write(dedent(conf_body)) + return internal_client.InternalClient(conf_path, 'test', 1) + finally: + shutil.rmtree(tempdir) + class ReplProbeTest(ProbeTest): diff --git a/test/probe/test_container_sync.py b/test/probe/test_container_sync.py index b6200ff071..7282cfd50a 100644 --- a/test/probe/test_container_sync.py +++ b/test/probe/test_container_sync.py @@ -81,20 +81,72 @@ class TestContainerSync(ReplProbeTest): return source_container, dest_container - def test_sync(self): + def _test_sync(self, object_post_as_copy): source_container, dest_container = self._setup_synced_containers() # upload to source object_name = 'object-%s' % uuid.uuid4() + put_headers = {'X-Object-Meta-Test': 'put_value'} client.put_object(self.url, self.token, source_container, object_name, - 'test-body') + 'test-body', headers=put_headers) # cycle container-sync Manager(['container-sync']).once() - _junk, body = client.get_object(self.url, self.token, - dest_container, object_name) + resp_headers, body = client.get_object(self.url, self.token, + dest_container, object_name) self.assertEqual(body, 'test-body') + self.assertIn('x-object-meta-test', resp_headers) + self.assertEqual('put_value', resp_headers['x-object-meta-test']) + + # update metadata with a POST, using an internal client so we can + # vary the object_post_as_copy setting - first use post-as-copy + post_headers = {'Content-Type': 'image/jpeg', + 'X-Object-Meta-Test': 'post_value'} + int_client = self.make_internal_client( + object_post_as_copy=object_post_as_copy) + int_client.set_object_metadata(self.account, source_container, + object_name, post_headers) + # sanity checks... + resp_headers = client.head_object( + self.url, self.token, source_container, object_name) + self.assertIn('x-object-meta-test', resp_headers) + self.assertEqual('post_value', resp_headers['x-object-meta-test']) + self.assertEqual('image/jpeg', resp_headers['content-type']) + + # cycle container-sync + Manager(['container-sync']).once() + + # verify that metadata changes were sync'd + resp_headers, body = client.get_object(self.url, self.token, + dest_container, object_name) + self.assertEqual(body, 'test-body') + self.assertIn('x-object-meta-test', resp_headers) + self.assertEqual('post_value', resp_headers['x-object-meta-test']) + self.assertEqual('image/jpeg', resp_headers['content-type']) + + # delete the object + client.delete_object( + self.url, self.token, source_container, object_name) + with self.assertRaises(ClientException) as cm: + client.get_object( + self.url, self.token, source_container, object_name) + self.assertEqual(404, cm.exception.http_status) # sanity check + + # cycle container-sync + Manager(['container-sync']).once() + + # verify delete has been sync'd + with self.assertRaises(ClientException) as cm: + client.get_object( + self.url, self.token, dest_container, object_name) + self.assertEqual(404, cm.exception.http_status) # sanity check + + def test_sync_with_post_as_copy(self): + self._test_sync(True) + + def test_sync_with_fast_post(self): + self._test_sync(False) def test_sync_lazy_skey(self): # Create synced containers, but with no key at source diff --git a/test/probe/test_object_metadata_replication.py b/test/probe/test_object_metadata_replication.py index 809a493d83..d93715bc74 100644 --- a/test/probe/test_object_metadata_replication.py +++ b/test/probe/test_object_metadata_replication.py @@ -14,19 +14,16 @@ # See the License for the specific language governing permissions and # limitations under the License. from io import StringIO -from tempfile import mkdtemp -from textwrap import dedent import unittest import os -import shutil import uuid from swift.common.direct_client import direct_get_suffix_hashes from swift.common.exceptions import DiskFileDeleted from swift.common.internal_client import UnexpectedResponse from swift.container.backend import ContainerBroker -from swift.common import internal_client, utils +from swift.common import utils from swiftclient import client from swift.common.ring import Ring from swift.common.utils import Timestamp, get_logger, hash_path @@ -48,40 +45,17 @@ class Test(ReplProbeTest): self.brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name, 'object', policy=self.policy) - self.tempdir = mkdtemp() - conf_path = os.path.join(self.tempdir, 'internal_client.conf') - conf_body = """ - [DEFAULT] - swift_dir = /etc/swift - - [pipeline:main] - pipeline = catch_errors cache proxy-server - - [app:proxy-server] - use = egg:swift#proxy - object_post_as_copy = false - - [filter:cache] - use = egg:swift#memcache - - [filter:catch_errors] - use = egg:swift#catch_errors - """ - with open(conf_path, 'w') as f: - f.write(dedent(conf_body)) - self.int_client = internal_client.InternalClient(conf_path, 'test', 1) + self.int_client = self.make_internal_client(object_post_as_copy=False) def tearDown(self): super(Test, self).tearDown() - shutil.rmtree(self.tempdir) - def _get_object_info(self, account, container, obj, number, - policy=None): + def _get_object_info(self, account, container, obj, number): obj_conf = self.configs['object-server'] config_path = obj_conf[number] options = utils.readconf(config_path, 'app:object-server') swift_dir = options.get('swift_dir', '/etc/swift') - ring = POLICIES.get_object_ring(policy, swift_dir) + ring = POLICIES.get_object_ring(int(self.policy), swift_dir) part, nodes = ring.get_nodes(account, container, obj) for node in nodes: # assumes one to one mapping @@ -92,7 +66,7 @@ class Test(ReplProbeTest): return None mgr = DiskFileManager(options, get_logger(options)) disk_file = mgr.get_diskfile(device, part, account, container, obj, - policy) + self.policy) info = disk_file.read_metadata() return info @@ -105,9 +79,7 @@ class Test(ReplProbeTest): obj_info.append(info_i) self.assertTrue(len(obj_info) > 1) for other in obj_info[1:]: - self.assertEqual(obj_info[0], other, - 'Object metadata mismatch: %s != %s' - % (obj_info[0], other)) + self.assertDictEqual(obj_info[0], other) def _assert_consistent_deleted_object(self): for i in range(1, 5): @@ -275,6 +247,7 @@ class Test(ReplProbeTest): self._assert_consistent_object_metadata() self._assert_consistent_container_dbs() + self._assert_consistent_suffix_hashes() def test_sysmeta_after_replication_with_subsequent_put(self): sysmeta = {'x-object-sysmeta-foo': 'older'} @@ -332,9 +305,11 @@ class Test(ReplProbeTest): for key in sysmeta2.keys(): self.assertTrue(key in metadata, key) self.assertEqual(metadata[key], sysmeta2[key]) + self.brain.start_handoff_half() self._assert_consistent_object_metadata() self._assert_consistent_container_dbs() + self._assert_consistent_suffix_hashes() def test_sysmeta_after_replication_with_subsequent_post(self): sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'} @@ -382,8 +357,11 @@ class Test(ReplProbeTest): for key in expected.keys(): self.assertTrue(key in metadata, key) self.assertEqual(metadata[key], expected[key]) + self.brain.start_handoff_half() + self._assert_consistent_object_metadata() self._assert_consistent_container_dbs() + self._assert_consistent_suffix_hashes() def test_sysmeta_after_replication_with_prior_post(self): sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'} @@ -433,8 +411,294 @@ class Test(ReplProbeTest): self.assertEqual(metadata[key], sysmeta[key]) for key in usermeta: self.assertFalse(key in metadata) + self.brain.start_handoff_half() + self._assert_consistent_object_metadata() self._assert_consistent_container_dbs() + self._assert_consistent_suffix_hashes() + + def test_post_ctype_replicated_when_previous_incomplete_puts(self): + # primary half handoff half + # ------------ ------------ + # t0.data: ctype = foo + # t1.data: ctype = bar + # t2.meta: ctype = baz + # + # ...run replicator and expect... + # + # t1.data: + # t2.meta: ctype = baz + self.brain.put_container(policy_index=0) + + # incomplete write to primary half + self.brain.stop_handoff_half() + self._put_object(headers={'Content-Type': 'foo'}) + self.brain.start_handoff_half() + + # handoff write + self.brain.stop_primary_half() + self._put_object(headers={'Content-Type': 'bar'}) + self.brain.start_primary_half() + + # content-type update to primary half + self.brain.stop_handoff_half() + self._post_object(headers={'Content-Type': 'baz'}) + self.brain.start_handoff_half() + + self.get_to_final_state() + + # check object metadata + metadata = client.head_object(self.url, self.token, + self.container_name, + self.object_name) + + # check container listing metadata + container_metadata, objs = client.get_container(self.url, self.token, + self.container_name) + + for obj in objs: + if obj['name'] == self.object_name: + break + expected = 'baz' + self.assertEqual(obj['content_type'], expected) + self._assert_object_metadata_matches_listing(obj, metadata) + self._assert_consistent_container_dbs() + self._assert_consistent_object_metadata() + self._assert_consistent_suffix_hashes() + + def test_put_ctype_replicated_when_subsequent_post(self): + # primary half handoff half + # ------------ ------------ + # t0.data: ctype = foo + # t1.data: ctype = bar + # t2.meta: + # + # ...run replicator and expect... + # + # t1.data: ctype = bar + # t2.meta: + self.brain.put_container(policy_index=0) + + # incomplete write + self.brain.stop_handoff_half() + self._put_object(headers={'Content-Type': 'foo'}) + self.brain.start_handoff_half() + + # handoff write + self.brain.stop_primary_half() + self._put_object(headers={'Content-Type': 'bar'}) + self.brain.start_primary_half() + + # metadata update with newest data unavailable + self.brain.stop_handoff_half() + self._post_object(headers={'X-Object-Meta-Color': 'Blue'}) + self.brain.start_handoff_half() + + self.get_to_final_state() + + # check object metadata + metadata = client.head_object(self.url, self.token, + self.container_name, + self.object_name) + + # check container listing metadata + container_metadata, objs = client.get_container(self.url, self.token, + self.container_name) + + for obj in objs: + if obj['name'] == self.object_name: + break + else: + self.fail('obj not found in container listing') + expected = 'bar' + self.assertEqual(obj['content_type'], expected) + self.assertEqual(metadata['x-object-meta-color'], 'Blue') + self._assert_object_metadata_matches_listing(obj, metadata) + self._assert_consistent_container_dbs() + self._assert_consistent_object_metadata() + self._assert_consistent_suffix_hashes() + + def test_post_ctype_replicated_when_subsequent_post_without_ctype(self): + # primary half handoff half + # ------------ ------------ + # t0.data: ctype = foo + # t1.data: ctype = bar + # t2.meta: ctype = bif + # t3.data: ctype = baz, color = 'Red' + # t4.meta: color = Blue + # + # ...run replicator and expect... + # + # t1.data: + # t4-delta.meta: ctype = baz, color = Blue + self.brain.put_container(policy_index=0) + + # incomplete write + self.brain.stop_handoff_half() + self._put_object(headers={'Content-Type': 'foo', + 'X-Object-Sysmeta-Test': 'older'}) + self.brain.start_handoff_half() + + # handoff write + self.brain.stop_primary_half() + self._put_object(headers={'Content-Type': 'bar', + 'X-Object-Sysmeta-Test': 'newer'}) + self.brain.start_primary_half() + + # incomplete post with content type + self.brain.stop_handoff_half() + self._post_object(headers={'Content-Type': 'bif'}) + self.brain.start_handoff_half() + + # incomplete post to handoff with content type + self.brain.stop_primary_half() + self._post_object(headers={'Content-Type': 'baz', + 'X-Object-Meta-Color': 'Red'}) + self.brain.start_primary_half() + + # complete post with no content type + self._post_object(headers={'X-Object-Meta-Color': 'Blue', + 'X-Object-Sysmeta-Test': 'ignored'}) + + # 'baz' wins over 'bar' but 'Blue' wins over 'Red' + self.get_to_final_state() + + # check object metadata + metadata = self._get_object_metadata() + + # check container listing metadata + container_metadata, objs = client.get_container(self.url, self.token, + self.container_name) + + for obj in objs: + if obj['name'] == self.object_name: + break + expected = 'baz' + self.assertEqual(obj['content_type'], expected) + self.assertEqual(metadata['x-object-meta-color'], 'Blue') + self.assertEqual(metadata['x-object-sysmeta-test'], 'newer') + self._assert_object_metadata_matches_listing(obj, metadata) + self._assert_consistent_container_dbs() + self._assert_consistent_object_metadata() + self._assert_consistent_suffix_hashes() + + def test_put_ctype_replicated_when_subsequent_posts_without_ctype(self): + # primary half handoff half + # ------------ ------------ + # t0.data: ctype = foo + # t1.data: ctype = bar + # t2.meta: + # t3.meta + # + # ...run replicator and expect... + # + # t1.data: ctype = bar + # t3.meta + self.brain.put_container(policy_index=0) + + self._put_object(headers={'Content-Type': 'foo', + 'X-Object-Sysmeta-Test': 'older'}) + + # incomplete write to handoff half + self.brain.stop_primary_half() + self._put_object(headers={'Content-Type': 'bar', + 'X-Object-Sysmeta-Test': 'newer'}) + self.brain.start_primary_half() + + # incomplete post with no content type to primary half + self.brain.stop_handoff_half() + self._post_object(headers={'X-Object-Meta-Color': 'Red', + 'X-Object-Sysmeta-Test': 'ignored'}) + self.brain.start_handoff_half() + + # incomplete post with no content type to handoff half + self.brain.stop_primary_half() + self._post_object(headers={'X-Object-Meta-Color': 'Blue'}) + self.brain.start_primary_half() + + self.get_to_final_state() + + # check object metadata + metadata = self._get_object_metadata() + + # check container listing metadata + container_metadata, objs = client.get_container(self.url, self.token, + self.container_name) + + for obj in objs: + if obj['name'] == self.object_name: + break + expected = 'bar' + self.assertEqual(obj['content_type'], expected) + self._assert_object_metadata_matches_listing(obj, metadata) + self.assertEqual(metadata['x-object-meta-color'], 'Blue') + self.assertEqual(metadata['x-object-sysmeta-test'], 'newer') + self._assert_object_metadata_matches_listing(obj, metadata) + self._assert_consistent_container_dbs() + self._assert_consistent_object_metadata() + self._assert_consistent_suffix_hashes() + + def test_posted_metadata_only_persists_after_prior_put(self): + # newer metadata posted to subset of nodes should persist after an + # earlier put on other nodes, but older content-type on that subset + # should not persist + self.brain.put_container(policy_index=0) + # incomplete put to handoff + self.brain.stop_primary_half() + self._put_object(headers={'Content-Type': 'oldest', + 'X-Object-Sysmeta-Test': 'oldest', + 'X-Object-Meta-Test': 'oldest'}) + self.brain.start_primary_half() + # incomplete put to primary + self.brain.stop_handoff_half() + self._put_object(headers={'Content-Type': 'oldest', + 'X-Object-Sysmeta-Test': 'oldest', + 'X-Object-Meta-Test': 'oldest'}) + self.brain.start_handoff_half() + + # incomplete post with content-type to handoff + self.brain.stop_primary_half() + self._post_object(headers={'Content-Type': 'newer', + 'X-Object-Meta-Test': 'newer'}) + self.brain.start_primary_half() + + # incomplete put to primary + self.brain.stop_handoff_half() + self._put_object(headers={'Content-Type': 'newest', + 'X-Object-Sysmeta-Test': 'newest', + 'X-Object-Meta-Test': 'newer'}) + self.brain.start_handoff_half() + + # incomplete post with no content-type to handoff which still has + # out of date content-type + self.brain.stop_primary_half() + self._post_object(headers={'X-Object-Meta-Test': 'newest'}) + metadata = self._get_object_metadata() + self.assertEqual(metadata['x-object-meta-test'], 'newest') + self.assertEqual(metadata['content-type'], 'newer') + self.brain.start_primary_half() + + self.get_to_final_state() + + # check object metadata + metadata = self._get_object_metadata() + self.assertEqual(metadata['x-object-meta-test'], 'newest') + self.assertEqual(metadata['x-object-sysmeta-test'], 'newest') + self.assertEqual(metadata['content-type'], 'newest') + + # check container listing metadata + container_metadata, objs = client.get_container(self.url, self.token, + self.container_name) + + for obj in objs: + if obj['name'] == self.object_name: + break + self.assertEqual(obj['content_type'], 'newest') + self._assert_object_metadata_matches_listing(obj, metadata) + self._assert_object_metadata_matches_listing(obj, metadata) + self._assert_consistent_container_dbs() + self._assert_consistent_object_metadata() + self._assert_consistent_suffix_hashes() def test_post_trumped_by_prior_delete(self): # new metadata and content-type posted to subset of nodes should not @@ -466,7 +730,7 @@ class Test(ReplProbeTest): metadata = self._get_object_metadata() self.assertEqual(metadata['x-object-sysmeta-test'], 'oldest') self.assertEqual(metadata['x-object-meta-test'], 'newest') - self.assertEqual(metadata['content-type'], 'oldest') + self.assertEqual(metadata['content-type'], 'newest') self.brain.start_primary_half() @@ -482,5 +746,6 @@ class Test(ReplProbeTest): self._assert_consistent_deleted_object() self._assert_consistent_suffix_hashes() + if __name__ == "__main__": unittest.main() diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 02dae8132a..63c746de51 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -289,6 +289,8 @@ class TestTimestamp(unittest.TestCase): self.assertIs(True, utils.Timestamp(ts) == ts) # sanity self.assertIs(False, utils.Timestamp(ts) != utils.Timestamp(ts)) self.assertIs(False, utils.Timestamp(ts) != ts) + self.assertIs(False, utils.Timestamp(ts) is None) + self.assertIs(True, utils.Timestamp(ts) is not None) def test_no_force_internal_no_offset(self): """Test that internal is the same as normal with no offset""" @@ -406,6 +408,15 @@ class TestTimestamp(unittest.TestCase): '%r is not bigger than %f given %r' % ( timestamp, float(normal), value)) + def test_short_format_with_offset(self): + expected = '1402436408.91203_f0' + timestamp = utils.Timestamp(1402436408.91203, 0xf0) + self.assertEqual(expected, timestamp.short) + + expected = '1402436408.91203' + timestamp = utils.Timestamp(1402436408.91203) + self.assertEqual(expected, timestamp.short) + def test_raw(self): expected = 140243640891203 timestamp = utils.Timestamp(1402436408.91203) @@ -694,6 +705,11 @@ class TestTimestamp(unittest.TestCase): '%r is not smaller than %r given %r' % ( timestamp, int(other), value)) + def test_cmp_with_none(self): + self.assertGreater(utils.Timestamp(0), None) + self.assertGreater(utils.Timestamp(1.0), None) + self.assertGreater(utils.Timestamp(1.0, 42), None) + def test_ordering(self): given = [ '1402444820.62590_000000000000000a', @@ -789,6 +805,107 @@ class TestTimestamp(unittest.TestCase): self.assertIn(ts_0_also, d) +class TestTimestampEncoding(unittest.TestCase): + + def setUp(self): + t0 = utils.Timestamp(0.0) + t1 = utils.Timestamp(997.9996) + t2 = utils.Timestamp(999) + t3 = utils.Timestamp(1000, 24) + t4 = utils.Timestamp(1001) + t5 = utils.Timestamp(1002.00040) + + # encodings that are expected when explicit = False + self.non_explicit_encodings = ( + ('0000001000.00000_18', (t3, t3, t3)), + ('0000001000.00000_18', (t3, t3, None)), + ) + + # mappings that are expected when explicit = True + self.explicit_encodings = ( + ('0000001000.00000_18+0+0', (t3, t3, t3)), + ('0000001000.00000_18+0', (t3, t3, None)), + ) + + # mappings that are expected when explicit = True or False + self.encodings = ( + ('0000001000.00000_18+0+186a0', (t3, t3, t4)), + ('0000001000.00000_18+186a0+186c8', (t3, t4, t5)), + ('0000001000.00000_18-186a0+0', (t3, t2, t2)), + ('0000001000.00000_18+0-186a0', (t3, t3, t2)), + ('0000001000.00000_18-186a0-186c8', (t3, t2, t1)), + ('0000001000.00000_18', (t3, None, None)), + ('0000001000.00000_18+186a0', (t3, t4, None)), + ('0000001000.00000_18-186a0', (t3, t2, None)), + ('0000001000.00000_18', (t3, None, t1)), + ('0000001000.00000_18-5f5e100', (t3, t0, None)), + ('0000001000.00000_18+0-5f5e100', (t3, t3, t0)), + ('0000001000.00000_18-5f5e100+5f45a60', (t3, t0, t2)), + ) + + # decodings that are expected when explicit = False + self.non_explicit_decodings = ( + ('0000001000.00000_18', (t3, t3, t3)), + ('0000001000.00000_18+186a0', (t3, t4, t4)), + ('0000001000.00000_18-186a0', (t3, t2, t2)), + ('0000001000.00000_18+186a0', (t3, t4, t4)), + ('0000001000.00000_18-186a0', (t3, t2, t2)), + ('0000001000.00000_18-5f5e100', (t3, t0, t0)), + ) + + # decodings that are expected when explicit = True + self.explicit_decodings = ( + ('0000001000.00000_18+0+0', (t3, t3, t3)), + ('0000001000.00000_18+0', (t3, t3, None)), + ('0000001000.00000_18', (t3, None, None)), + ('0000001000.00000_18+186a0', (t3, t4, None)), + ('0000001000.00000_18-186a0', (t3, t2, None)), + ('0000001000.00000_18-5f5e100', (t3, t0, None)), + ) + + # decodings that are expected when explicit = True or False + self.decodings = ( + ('0000001000.00000_18+0+186a0', (t3, t3, t4)), + ('0000001000.00000_18+186a0+186c8', (t3, t4, t5)), + ('0000001000.00000_18-186a0+0', (t3, t2, t2)), + ('0000001000.00000_18+0-186a0', (t3, t3, t2)), + ('0000001000.00000_18-186a0-186c8', (t3, t2, t1)), + ('0000001000.00000_18-5f5e100+5f45a60', (t3, t0, t2)), + ) + + def _assertEqual(self, expected, actual, test): + self.assertEqual(expected, actual, + 'Got %s but expected %s for parameters %s' + % (actual, expected, test)) + + def test_encoding(self): + for test in self.explicit_encodings: + actual = utils.encode_timestamps(test[1][0], test[1][1], + test[1][2], True) + self._assertEqual(test[0], actual, test[1]) + for test in self.non_explicit_encodings: + actual = utils.encode_timestamps(test[1][0], test[1][1], + test[1][2], False) + self._assertEqual(test[0], actual, test[1]) + for explicit in (True, False): + for test in self.encodings: + actual = utils.encode_timestamps(test[1][0], test[1][1], + test[1][2], explicit) + self._assertEqual(test[0], actual, test[1]) + + def test_decoding(self): + for test in self.explicit_decodings: + actual = utils.decode_timestamps(test[0], True) + self._assertEqual(test[1], actual, test[0]) + for test in self.non_explicit_decodings: + actual = utils.decode_timestamps(test[0], False) + self._assertEqual(test[1], actual, test[0]) + for explicit in (True, False): + for test in self.decodings: + actual = utils.decode_timestamps(test[0], explicit) + self._assertEqual(test[1], actual, test[0]) + + class TestUtils(unittest.TestCase): """Tests for swift.common.utils """ diff --git a/test/unit/container/test_backend.py b/test/unit/container/test_backend.py index 721f0f9094..8e521f0078 100644 --- a/test/unit/container/test_backend.py +++ b/test/unit/container/test_backend.py @@ -28,8 +28,9 @@ import sqlite3 import pickle import json -from swift.container.backend import ContainerBroker -from swift.common.utils import Timestamp +from swift.container.backend import ContainerBroker, \ + update_new_item_from_existing +from swift.common.utils import Timestamp, encode_timestamps from swift.common.storage_policy import POLICIES import mock @@ -431,6 +432,357 @@ class TestContainerBroker(unittest.TestCase): self.assertEqual(conn.execute( "SELECT deleted FROM object").fetchone()[0], 0) + def test_make_tuple_for_pickle(self): + record = {'name': 'obj', + 'created_at': '1234567890.12345', + 'size': 42, + 'content_type': 'text/plain', + 'etag': 'hash_test', + 'deleted': '1', + 'storage_policy_index': '2', + 'ctype_timestamp': None, + 'meta_timestamp': None} + broker = ContainerBroker(':memory:', account='a', container='c') + + expect = ('obj', '1234567890.12345', 42, 'text/plain', 'hash_test', + '1', '2', None, None) + result = broker.make_tuple_for_pickle(record) + self.assertEqual(expect, result) + + record['ctype_timestamp'] = '2233445566.00000' + expect = ('obj', '1234567890.12345', 42, 'text/plain', 'hash_test', + '1', '2', '2233445566.00000', None) + result = broker.make_tuple_for_pickle(record) + self.assertEqual(expect, result) + + record['meta_timestamp'] = '5566778899.00000' + expect = ('obj', '1234567890.12345', 42, 'text/plain', 'hash_test', + '1', '2', '2233445566.00000', '5566778899.00000') + result = broker.make_tuple_for_pickle(record) + self.assertEqual(expect, result) + + @with_tempdir + def test_load_old_record_from_pending_file(self, tempdir): + # Test reading old update record from pending file + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(time(), 0) + + record = {'name': 'obj', + 'created_at': '1234567890.12345', + 'size': 42, + 'content_type': 'text/plain', + 'etag': 'hash_test', + 'deleted': '1', + 'storage_policy_index': '2', + 'ctype_timestamp': None, + 'meta_timestamp': None} + + # sanity check + self.assertFalse(os.path.isfile(broker.pending_file)) + + # simulate existing pending items written with old code, + # i.e. without content_type and meta timestamps + def old_make_tuple_for_pickle(_, record): + return (record['name'], record['created_at'], record['size'], + record['content_type'], record['etag'], record['deleted'], + record['storage_policy_index']) + + _new = 'swift.container.backend.ContainerBroker.make_tuple_for_pickle' + with mock.patch(_new, old_make_tuple_for_pickle): + broker.put_record(dict(record)) + + self.assertTrue(os.path.getsize(broker.pending_file) > 0) + read_items = [] + + def mock_merge_items(_, item_list, *args): + # capture the items read from the pending file + read_items.extend(item_list) + + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + broker._commit_puts() + + self.assertEqual(1, len(read_items)) + self.assertEqual(record, read_items[0]) + self.assertTrue(os.path.getsize(broker.pending_file) == 0) + + @with_tempdir + def test_save_and_load_record_from_pending_file(self, tempdir): + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(time(), 0) + + record = {'name': 'obj', + 'created_at': '1234567890.12345', + 'size': 42, + 'content_type': 'text/plain', + 'etag': 'hash_test', + 'deleted': '1', + 'storage_policy_index': '2', + 'ctype_timestamp': '1234567890.44444', + 'meta_timestamp': '1234567890.99999'} + + # sanity check + self.assertFalse(os.path.isfile(broker.pending_file)) + broker.put_record(dict(record)) + self.assertTrue(os.path.getsize(broker.pending_file) > 0) + read_items = [] + + def mock_merge_items(_, item_list, *args): + # capture the items read from the pending file + read_items.extend(item_list) + + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + broker._commit_puts() + + self.assertEqual(1, len(read_items)) + self.assertEqual(record, read_items[0]) + self.assertTrue(os.path.getsize(broker.pending_file) == 0) + + def _assert_db_row(self, broker, name, timestamp, size, content_type, hash, + deleted=0): + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM object").fetchone()[0], name) + self.assertEqual(conn.execute( + "SELECT created_at FROM object").fetchone()[0], timestamp) + self.assertEqual(conn.execute( + "SELECT size FROM object").fetchone()[0], size) + self.assertEqual(conn.execute( + "SELECT content_type FROM object").fetchone()[0], + content_type) + self.assertEqual(conn.execute( + "SELECT etag FROM object").fetchone()[0], hash) + self.assertEqual(conn.execute( + "SELECT deleted FROM object").fetchone()[0], deleted) + + def _test_put_object_multiple_encoded_timestamps(self, broker): + ts = (Timestamp(t) for t in itertools.count(int(time()))) + broker.initialize(ts.next().internal, 0) + t = [ts.next() for _ in range(9)] + + # Create initial object + broker.put_object('obj_name', t[0].internal, 123, + 'application/x-test', + '5af83e3196bf99f440f31f2e1a6c9afe') + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t[0].internal, 123, + 'application/x-test', + '5af83e3196bf99f440f31f2e1a6c9afe') + + # hash and size change with same data timestamp are ignored + t_encoded = encode_timestamps(t[0], t[1], t[1]) + broker.put_object('obj_name', t_encoded, 456, + 'application/x-test-2', + '1234567890abcdeffedcba0987654321') + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 123, + 'application/x-test-2', + '5af83e3196bf99f440f31f2e1a6c9afe') + + # content-type change with same timestamp is ignored + t_encoded = encode_timestamps(t[0], t[1], t[2]) + broker.put_object('obj_name', t_encoded, 456, + 'application/x-test-3', + '1234567890abcdeffedcba0987654321') + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 123, + 'application/x-test-2', + '5af83e3196bf99f440f31f2e1a6c9afe') + + # update with differing newer timestamps + t_encoded = encode_timestamps(t[4], t[6], t[8]) + broker.put_object('obj_name', t_encoded, 789, + 'application/x-test-3', + 'abcdef1234567890abcdef1234567890') + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 789, + 'application/x-test-3', + 'abcdef1234567890abcdef1234567890') + + # update with differing older timestamps should be ignored + t_encoded_older = encode_timestamps(t[3], t[5], t[7]) + self.assertEqual(1, len(broker.get_items_since(0, 100))) + broker.put_object('obj_name', t_encoded_older, 9999, + 'application/x-test-ignored', + 'ignored_hash') + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 789, + 'application/x-test-3', + 'abcdef1234567890abcdef1234567890') + + def test_put_object_multiple_encoded_timestamps_using_memory(self): + # Test ContainerBroker.put_object with differing data, content-type + # and metadata timestamps + broker = ContainerBroker(':memory:', account='a', container='c') + self._test_put_object_multiple_encoded_timestamps(broker) + + @with_tempdir + def test_put_object_multiple_encoded_timestamps_using_file(self, tempdir): + # Test ContainerBroker.put_object with differing data, content-type + # and metadata timestamps, using file db to ensure that the code paths + # to write/read pending file are exercised. + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + self._test_put_object_multiple_encoded_timestamps(broker) + + def _test_put_object_multiple_explicit_timestamps(self, broker): + ts = (Timestamp(t) for t in itertools.count(int(time()))) + broker.initialize(ts.next().internal, 0) + t = [ts.next() for _ in range(11)] + + # Create initial object + broker.put_object('obj_name', t[0].internal, 123, + 'application/x-test', + '5af83e3196bf99f440f31f2e1a6c9afe', + ctype_timestamp=None, + meta_timestamp=None) + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t[0].internal, 123, + 'application/x-test', + '5af83e3196bf99f440f31f2e1a6c9afe') + + # hash and size change with same data timestamp are ignored + t_encoded = encode_timestamps(t[0], t[1], t[1]) + broker.put_object('obj_name', t[0].internal, 456, + 'application/x-test-2', + '1234567890abcdeffedcba0987654321', + ctype_timestamp=t[1].internal, + meta_timestamp=t[1].internal) + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 123, + 'application/x-test-2', + '5af83e3196bf99f440f31f2e1a6c9afe') + + # content-type change with same timestamp is ignored + t_encoded = encode_timestamps(t[0], t[1], t[2]) + broker.put_object('obj_name', t[0].internal, 456, + 'application/x-test-3', + '1234567890abcdeffedcba0987654321', + ctype_timestamp=t[1].internal, + meta_timestamp=t[2].internal) + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 123, + 'application/x-test-2', + '5af83e3196bf99f440f31f2e1a6c9afe') + + # update with differing newer timestamps + t_encoded = encode_timestamps(t[4], t[6], t[8]) + broker.put_object('obj_name', t[4].internal, 789, + 'application/x-test-3', + 'abcdef1234567890abcdef1234567890', + ctype_timestamp=t[6].internal, + meta_timestamp=t[8].internal) + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 789, + 'application/x-test-3', + 'abcdef1234567890abcdef1234567890') + + # update with differing older timestamps should be ignored + broker.put_object('obj_name', t[3].internal, 9999, + 'application/x-test-ignored', + 'ignored_hash', + ctype_timestamp=t[5].internal, + meta_timestamp=t[7].internal) + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 789, + 'application/x-test-3', + 'abcdef1234567890abcdef1234567890') + + # content_type_timestamp == None defaults to data timestamp + t_encoded = encode_timestamps(t[9], t[9], t[8]) + broker.put_object('obj_name', t[9].internal, 9999, + 'application/x-test-new', + 'new_hash', + ctype_timestamp=None, + meta_timestamp=t[7].internal) + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 9999, + 'application/x-test-new', + 'new_hash') + + # meta_timestamp == None defaults to data timestamp + t_encoded = encode_timestamps(t[9], t[10], t[10]) + broker.put_object('obj_name', t[8].internal, 1111, + 'application/x-test-newer', + 'older_hash', + ctype_timestamp=t[10].internal, + meta_timestamp=None) + self.assertEqual(1, len(broker.get_items_since(0, 100))) + self._assert_db_row(broker, 'obj_name', t_encoded, 9999, + 'application/x-test-newer', + 'new_hash') + + def test_put_object_multiple_explicit_timestamps_using_memory(self): + # Test ContainerBroker.put_object with differing data, content-type + # and metadata timestamps passed as explicit args + broker = ContainerBroker(':memory:', account='a', container='c') + self._test_put_object_multiple_explicit_timestamps(broker) + + @with_tempdir + def test_put_object_multiple_explicit_timestamps_using_file(self, tempdir): + # Test ContainerBroker.put_object with differing data, content-type + # and metadata timestamps passed as explicit args, using file db to + # ensure that the code paths to write/read pending file are exercised. + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + self._test_put_object_multiple_explicit_timestamps(broker) + + def test_last_modified_time(self): + # Test container listing reports the most recent of data or metadata + # timestamp as last-modified time + ts = (Timestamp(t) for t in itertools.count(int(time()))) + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(ts.next().internal, 0) + + # simple 'single' timestamp case + t0 = ts.next() + broker.put_object('obj1', t0.internal, 0, 'text/plain', 'hash1') + listing = broker.list_objects_iter(100, '', None, None, '') + self.assertEqual(len(listing), 1) + self.assertEqual(listing[0][0], 'obj1') + self.assertEqual(listing[0][1], t0.internal) + + # content-type and metadata are updated at t1 + t1 = ts.next() + t_encoded = encode_timestamps(t0, t1, t1) + broker.put_object('obj1', t_encoded, 0, 'text/plain', 'hash1') + listing = broker.list_objects_iter(100, '', None, None, '') + self.assertEqual(len(listing), 1) + self.assertEqual(listing[0][0], 'obj1') + self.assertEqual(listing[0][1], t1.internal) + + # used later + t2 = ts.next() + + # metadata is updated at t3 + t3 = ts.next() + t_encoded = encode_timestamps(t0, t1, t3) + broker.put_object('obj1', t_encoded, 0, 'text/plain', 'hash1') + listing = broker.list_objects_iter(100, '', None, None, '') + self.assertEqual(len(listing), 1) + self.assertEqual(listing[0][0], 'obj1') + self.assertEqual(listing[0][1], t3.internal) + + # all parts updated at t2, last-modified should remain at t3 + t_encoded = encode_timestamps(t2, t2, t2) + broker.put_object('obj1', t_encoded, 0, 'text/plain', 'hash1') + listing = broker.list_objects_iter(100, '', None, None, '') + self.assertEqual(len(listing), 1) + self.assertEqual(listing[0][0], 'obj1') + self.assertEqual(listing[0][1], t3.internal) + + # all parts updated at t4, last-modified should be t4 + t4 = ts.next() + t_encoded = encode_timestamps(t4, t4, t4) + broker.put_object('obj1', t_encoded, 0, 'text/plain', 'hash1') + listing = broker.list_objects_iter(100, '', None, None, '') + self.assertEqual(len(listing), 1) + self.assertEqual(listing[0][0], 'obj1') + self.assertEqual(listing[0][1], t4.internal) + @patch_policies def test_put_misplaced_object_does_not_effect_container_stats(self): policy = random.choice(list(POLICIES)) @@ -2172,3 +2524,298 @@ class TestContainerBrokerBeforeSPI(ContainerBrokerMigrationMixin, info = broker.get_info() self.assertEqual(info['object_count'], 1) self.assertEqual(info['bytes_used'], 456) + + +class TestUpdateNewItemFromExisting(unittest.TestCase): + # TODO: add test scenarios that have swift_bytes in content_type + t0 = '1234567890.00000' + t1 = '1234567890.00001' + t2 = '1234567890.00002' + t3 = '1234567890.00003' + t4 = '1234567890.00004' + t5 = '1234567890.00005' + t6 = '1234567890.00006' + t7 = '1234567890.00007' + t8 = '1234567890.00008' + t20 = '1234567890.00020' + t30 = '1234567890.00030' + + base_new_item = {'etag': 'New_item', + 'size': 'nEw_item', + 'content_type': 'neW_item', + 'deleted': '0'} + base_existing = {'etag': 'Existing', + 'size': 'eXisting', + 'content_type': 'exIsting', + 'deleted': '0'} + # + # each scenario is a tuple of: + # (existing time, new item times, expected updated item) + # + # e.g.: + # existing -> ({'created_at': t5}, + # new_item -> {'created_at': t, 'ctype_timestamp': t, 'meta_timestamp': t}, + # expected -> {'created_at': t, + # 'etag': , 'size': , 'content_type': }) + # + scenarios_when_all_existing_wins = ( + # + # all new_item times <= all existing times -> existing values win + # + # existing has attrs at single time + # + ({'created_at': t3}, + {'created_at': t0, 'ctype_timestamp': t0, 'meta_timestamp': t0}, + {'created_at': t3, + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3}, + {'created_at': t0, 'ctype_timestamp': t0, 'meta_timestamp': t1}, + {'created_at': t3, + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3}, + {'created_at': t0, 'ctype_timestamp': t1, 'meta_timestamp': t1}, + {'created_at': t3, + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3}, + {'created_at': t0, 'ctype_timestamp': t1, 'meta_timestamp': t2}, + {'created_at': t3, + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3}, + {'created_at': t0, 'ctype_timestamp': t1, 'meta_timestamp': t3}, + {'created_at': t3, + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3}, + {'created_at': t0, 'ctype_timestamp': t3, 'meta_timestamp': t3}, + {'created_at': t3, + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3}, + {'created_at': t3, 'ctype_timestamp': t3, 'meta_timestamp': t3}, + {'created_at': t3, + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + # + # existing has attrs at multiple times: + # data @ t3, ctype @ t5, meta @t7 -> existing created_at = t3+2+2 + # + ({'created_at': t3 + '+2+2'}, + {'created_at': t0, 'ctype_timestamp': t0, 'meta_timestamp': t0}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t3, 'meta_timestamp': t3}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t4}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t5}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t7}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t7}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t5, 'meta_timestamp': t5}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t5, 'meta_timestamp': t6}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t5, 'meta_timestamp': t7}, + {'created_at': t3 + '+2+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + ) + + scenarios_when_all_new_item_wins = ( + # no existing record + (None, + {'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t4}, + {'created_at': t4, + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + (None, + {'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t5}, + {'created_at': t4 + '+0+1', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + (None, + {'created_at': t4, 'ctype_timestamp': t5, 'meta_timestamp': t5}, + {'created_at': t4 + '+1+0', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + (None, + {'created_at': t4, 'ctype_timestamp': t5, 'meta_timestamp': t6}, + {'created_at': t4 + '+1+1', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + # + # all new_item times > all existing times -> new item values win + # + # existing has attrs at single time + # + ({'created_at': t3}, + {'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t4}, + {'created_at': t4, + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + ({'created_at': t3}, + {'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t5}, + {'created_at': t4 + '+0+1', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + ({'created_at': t3}, + {'created_at': t4, 'ctype_timestamp': t5, 'meta_timestamp': t5}, + {'created_at': t4 + '+1+0', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + ({'created_at': t3}, + {'created_at': t4, 'ctype_timestamp': t5, 'meta_timestamp': t6}, + {'created_at': t4 + '+1+1', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + # + # existing has attrs at multiple times: + # data @ t3, ctype @ t5, meta @t7 -> existing created_at = t3+2+2 + # + ({'created_at': t3 + '+2+2'}, + {'created_at': t4, 'ctype_timestamp': t6, 'meta_timestamp': t8}, + {'created_at': t4 + '+2+2', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t6, 'ctype_timestamp': t6, 'meta_timestamp': t8}, + {'created_at': t6 + '+0+2', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t4, 'ctype_timestamp': t8, 'meta_timestamp': t8}, + {'created_at': t4 + '+4+0', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t6, 'ctype_timestamp': t8, 'meta_timestamp': t8}, + {'created_at': t6 + '+2+0', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t8, 'ctype_timestamp': t8, 'meta_timestamp': t8}, + {'created_at': t8, + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + ) + + scenarios_when_some_new_item_wins = ( + # + # some but not all new_item times > existing times -> mixed updates + # + # existing has attrs at single time + # + ({'created_at': t3}, + {'created_at': t3, 'ctype_timestamp': t3, 'meta_timestamp': t4}, + {'created_at': t3 + '+0+1', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3}, + {'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t4}, + {'created_at': t3 + '+1+0', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'neW_item'}), + + ({'created_at': t3}, + {'created_at': t3, 'ctype_timestamp': t4, 'meta_timestamp': t5}, + {'created_at': t3 + '+1+1', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'neW_item'}), + + # + # existing has attrs at multiple times: + # data @ t3, ctype @ t5, meta @t7 -> existing created_at = t3+2+2 + # + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t3, 'meta_timestamp': t8}, + {'created_at': t3 + '+2+3', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t3, 'ctype_timestamp': t6, 'meta_timestamp': t8}, + {'created_at': t3 + '+3+2', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'neW_item'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t6}, + {'created_at': t4 + '+1+2', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'exIsting'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t4, 'ctype_timestamp': t6, 'meta_timestamp': t6}, + {'created_at': t4 + '+2+1', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'neW_item'}), + + ({'created_at': t3 + '+2+2'}, + {'created_at': t4, 'ctype_timestamp': t4, 'meta_timestamp': t8}, + {'created_at': t4 + '+1+3', + 'etag': 'New_item', 'size': 'nEw_item', 'content_type': 'exIsting'}), + + # this scenario is to check that the deltas are in hex + ({'created_at': t3 + '+2+2'}, + {'created_at': t2, 'ctype_timestamp': t20, 'meta_timestamp': t30}, + {'created_at': t3 + '+11+a', + 'etag': 'Existing', 'size': 'eXisting', 'content_type': 'neW_item'}), + ) + + def _test_scenario(self, scenario, newer): + existing_time, new_item_times, expected_attrs = scenario + # this is the existing record... + existing = None + if existing_time: + existing = dict(self.base_existing) + existing.update(existing_time) + + # this is the new item to update + new_item = dict(self.base_new_item) + new_item.update(new_item_times) + + # this is the expected result of the update + expected = dict(new_item) + expected.update(expected_attrs) + expected['data_timestamp'] = new_item['created_at'] + + try: + self.assertIs(newer, + update_new_item_from_existing(new_item, existing)) + self.assertDictEqual(expected, new_item) + except AssertionError as e: + msg = ('Scenario: existing %s, new_item %s, expected %s.' + % scenario) + msg = '%s Failed with: %s' % (msg, e.message) + raise AssertionError(msg) + + def test_update_new_item_from_existing(self): + for scenario in self.scenarios_when_all_existing_wins: + self._test_scenario(scenario, False) + + for scenario in self.scenarios_when_all_new_item_wins: + self._test_scenario(scenario, True) + + for scenario in self.scenarios_when_some_new_item_wins: + self._test_scenario(scenario, True) diff --git a/test/unit/container/test_reconciler.py b/test/unit/container/test_reconciler.py index 771e9f83e3..4a00e72f2e 100644 --- a/test/unit/container/test_reconciler.py +++ b/test/unit/container/test_reconciler.py @@ -12,6 +12,8 @@ # limitations under the License. import json +import numbers + import mock import operator import time @@ -29,7 +31,7 @@ from swift.container import reconciler from swift.container.server import gen_resp_headers from swift.common.direct_client import ClientException from swift.common import swob -from swift.common.utils import split_path, Timestamp +from swift.common.utils import split_path, Timestamp, encode_timestamps from test.unit import debug_logger, FakeRing, fake_http_connect from test.unit.common.middleware.helpers import FakeSwift @@ -132,12 +134,16 @@ class FakeInternalClient(reconciler.InternalClient): 'DELETE', obj_path, swob.HTTPNoContent, {}) # container listing entry last_modified = timestamp_to_last_modified(timestamp) + # some tests setup mock listings using floats, some use + # strings, so normalize here + if isinstance(timestamp, numbers.Number): + timestamp = '%f' % timestamp obj_data = { 'bytes': 0, # listing data is unicode 'name': obj_name.decode('utf-8'), 'last_modified': last_modified, - 'hash': timestamp, + 'hash': timestamp.decode('utf-8'), 'content_type': content_type, } container_listing_data.append(obj_data) @@ -210,6 +216,26 @@ class TestReconcilerUtils(unittest.TestCase): self.assertEqual(got['q_record'], 1234.20192) self.assertEqual(got['q_op'], 'PUT') + # the 'hash' field in object listing has the raw 'created_at' value + # which could be a composite of timestamps + timestamp_str = encode_timestamps(Timestamp(1234.20190), + Timestamp(1245.20190), + Timestamp(1256.20190), + explicit=True) + got = reconciler.parse_raw_obj({ + 'name': "1:/AUTH_bob/con/obj", + 'hash': timestamp_str, + 'last_modified': timestamp_to_last_modified(1234.20192), + 'content_type': 'application/x-put', + }) + self.assertEqual(got['q_policy_index'], 1) + self.assertEqual(got['account'], 'AUTH_bob') + self.assertEqual(got['container'], 'con') + self.assertEqual(got['obj'], 'obj') + self.assertEqual(got['q_ts'], 1234.20190) + self.assertEqual(got['q_record'], 1234.20192) + self.assertEqual(got['q_op'], 'PUT') + # negative test obj_info = { 'name': "1:/AUTH_bob/con/obj", diff --git a/test/unit/container/test_replicator.py b/test/unit/container/test_replicator.py index bdfe481d15..3f97821891 100644 --- a/test/unit/container/test_replicator.py +++ b/test/unit/container/test_replicator.py @@ -26,7 +26,7 @@ from swift.common import db_replicator from swift.container import replicator, backend, server, sync_store from swift.container.reconciler import ( MISPLACED_OBJECTS_ACCOUNT, get_reconciler_container_name) -from swift.common.utils import Timestamp +from swift.common.utils import Timestamp, encode_timestamps from swift.common.storage_policy import POLICIES from test.unit.common import test_db_replicator @@ -827,38 +827,52 @@ class TestReplicatorSync(test_db_replicator.TestReplicatorSync): self.assertEqual(info[key], value) def test_misplaced_rows_replicate_and_enqueue(self): - ts = (Timestamp(t).internal for t in - itertools.count(int(time.time()))) + # force all timestamps to fall in same hour + ts = (Timestamp(t) for t in + itertools.count(int(time.time()) // 3600 * 3600)) policy = random.choice(list(POLICIES)) broker = self._get_broker('a', 'c', node_index=0) - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) remote_policy = random.choice([p for p in POLICIES if p is not policy]) remote_broker = self._get_broker('a', 'c', node_index=1) - remote_broker.initialize(next(ts), remote_policy.idx) + remote_broker.initialize(next(ts).internal, remote_policy.idx) # add a misplaced row to *local* broker - obj_put_timestamp = next(ts) + obj_put_timestamp = next(ts).internal broker.put_object( 'o', obj_put_timestamp, 0, 'content-type', 'etag', storage_policy_index=remote_policy.idx) - misplaced = broker.get_misplaced_since(-1, 1) + misplaced = broker.get_misplaced_since(-1, 10) self.assertEqual(len(misplaced), 1) # since this row is misplaced it doesn't show up in count self.assertEqual(broker.get_info()['object_count'], 0) + # add another misplaced row to *local* broker with composite timestamp + ts_data = next(ts) + ts_ctype = next(ts) + ts_meta = next(ts) + broker.put_object( + 'o2', ts_data.internal, 0, 'content-type', + 'etag', storage_policy_index=remote_policy.idx, + ctype_timestamp=ts_ctype.internal, meta_timestamp=ts_meta.internal) + misplaced = broker.get_misplaced_since(-1, 10) + self.assertEqual(len(misplaced), 2) + # since this row is misplaced it doesn't show up in count + self.assertEqual(broker.get_info()['object_count'], 0) + # replicate part, node = self._get_broker_part_node(broker) daemon = self._run_once(node) # push to remote, and third node was missing (also maybe reconciler) - self.assertTrue(2 < daemon.stats['rsync'] <= 3) + self.assertTrue(2 < daemon.stats['rsync'] <= 3, daemon.stats['rsync']) # grab the rsynced instance of remote_broker remote_broker = self._get_broker('a', 'c', node_index=1) # remote has misplaced rows too now - misplaced = remote_broker.get_misplaced_since(-1, 1) - self.assertEqual(len(misplaced), 1) + misplaced = remote_broker.get_misplaced_since(-1, 10) + self.assertEqual(len(misplaced), 2) # and the correct policy_index and object_count info = remote_broker.get_info() @@ -869,22 +883,29 @@ class TestReplicatorSync(test_db_replicator.TestReplicatorSync): for key, value in expectations.items(): self.assertEqual(info[key], value) - # and we should have also enqeued these rows in the reconciler + # and we should have also enqueued these rows in a single reconciler, + # since we forced the object timestamps to be in the same hour. reconciler = daemon.get_reconciler_broker(misplaced[0]['created_at']) # but it may not be on the same node as us anymore though... reconciler = self._get_broker(reconciler.account, reconciler.container, node_index=0) - self.assertEqual(reconciler.get_info()['object_count'], 1) + self.assertEqual(reconciler.get_info()['object_count'], 2) objects = reconciler.list_objects_iter( - 1, '', None, None, None, None, storage_policy_index=0) - self.assertEqual(len(objects), 1) + 10, '', None, None, None, None, storage_policy_index=0) + self.assertEqual(len(objects), 2) expected = ('%s:/a/c/o' % remote_policy.idx, obj_put_timestamp, 0, 'application/x-put', obj_put_timestamp) self.assertEqual(objects[0], expected) + # the second object's listing has ts_meta as its last modified time + # but its full composite timestamp is in the hash field. + expected = ('%s:/a/c/o2' % remote_policy.idx, ts_meta.internal, 0, + 'application/x-put', + encode_timestamps(ts_data, ts_ctype, ts_meta)) + self.assertEqual(objects[1], expected) # having safely enqueued to the reconciler we can advance # our sync pointer - self.assertEqual(broker.get_reconciler_sync(), 1) + self.assertEqual(broker.get_reconciler_sync(), 2) def test_multiple_out_sync_reconciler_enqueue_normalize(self): ts = (Timestamp(t).internal for t in diff --git a/test/unit/container/test_server.py b/test/unit/container/test_server.py index 22e0f00c41..0205bca3bf 100644 --- a/test/unit/container/test_server.py +++ b/test/unit/container/test_server.py @@ -1619,6 +1619,203 @@ class TestContainerController(unittest.TestCase): listing_data = json.loads(resp.body) self.assertEqual(0, len(listing_data)) + def test_object_update_with_multiple_timestamps(self): + + def do_update(t_data, etag, size, content_type, + t_type=None, t_meta=None): + """ + Make a PUT request to container controller to update an object + """ + headers = {'X-Timestamp': t_data.internal, + 'X-Size': size, + 'X-Content-Type': content_type, + 'X-Etag': etag} + if t_type: + headers['X-Content-Type-Timestamp'] = t_type.internal + if t_meta: + headers['X-Meta-Timestamp'] = t_meta.internal + req = Request.blank( + '/sda1/p/a/c/o', method='PUT', headers=headers) + self._update_object_put_headers(req) + return req.get_response(self.controller) + + ts = (Timestamp(t) for t in itertools.count(int(time.time()))) + t0 = ts.next() + + # create container + req = Request.blank('/sda1/p/a/c', method='PUT', headers={ + 'X-Timestamp': t0.internal}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 201) + + # check status + req = Request.blank('/sda1/p/a/c', method='HEAD') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 204) + + # create object at t1 + t1 = ts.next() + resp = do_update(t1, 'etag_at_t1', 1, 'ctype_at_t1') + self.assertEqual(resp.status_int, 201) + + # check listing, expect last_modified = t1 + req = Request.blank('/sda1/p/a/c', method='GET', + query_string='format=json') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1) + self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 1) + listing_data = json.loads(resp.body) + self.assertEqual(1, len(listing_data)) + for obj in listing_data: + self.assertEqual(obj['name'], 'o') + self.assertEqual(obj['bytes'], 1) + self.assertEqual(obj['hash'], 'etag_at_t1') + self.assertEqual(obj['content_type'], 'ctype_at_t1') + self.assertEqual(obj['last_modified'], t1.isoformat) + + # send an update with a content type timestamp at t4 + t2 = ts.next() + t3 = ts.next() + t4 = ts.next() + resp = do_update(t1, 'etag_at_t1', 1, 'ctype_at_t4', t_type=t4) + self.assertEqual(resp.status_int, 201) + + # check updated listing, expect last_modified = t4 + req = Request.blank('/sda1/p/a/c', method='GET', + query_string='format=json') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1) + self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 1) + listing_data = json.loads(resp.body) + self.assertEqual(1, len(listing_data)) + for obj in listing_data: + self.assertEqual(obj['name'], 'o') + self.assertEqual(obj['bytes'], 1) + self.assertEqual(obj['hash'], 'etag_at_t1') + self.assertEqual(obj['content_type'], 'ctype_at_t4') + self.assertEqual(obj['last_modified'], t4.isoformat) + + # now overwrite with an in-between data timestamp at t2 + resp = do_update(t2, 'etag_at_t2', 2, 'ctype_at_t2', t_type=t2) + self.assertEqual(resp.status_int, 201) + + # check updated listing + req = Request.blank('/sda1/p/a/c', method='GET', + query_string='format=json') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1) + self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 2) + listing_data = json.loads(resp.body) + self.assertEqual(1, len(listing_data)) + for obj in listing_data: + self.assertEqual(obj['name'], 'o') + self.assertEqual(obj['bytes'], 2) + self.assertEqual(obj['hash'], 'etag_at_t2') + self.assertEqual(obj['content_type'], 'ctype_at_t4') + self.assertEqual(obj['last_modified'], t4.isoformat) + + # now overwrite with an in-between content-type timestamp at t3 + resp = do_update(t2, 'etag_at_t2', 2, 'ctype_at_t3', t_type=t3) + self.assertEqual(resp.status_int, 201) + + # check updated listing + req = Request.blank('/sda1/p/a/c', method='GET', + query_string='format=json') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1) + self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 2) + listing_data = json.loads(resp.body) + self.assertEqual(1, len(listing_data)) + for obj in listing_data: + self.assertEqual(obj['name'], 'o') + self.assertEqual(obj['bytes'], 2) + self.assertEqual(obj['hash'], 'etag_at_t2') + self.assertEqual(obj['content_type'], 'ctype_at_t4') + self.assertEqual(obj['last_modified'], t4.isoformat) + + # now update with an in-between meta timestamp at t5 + t5 = ts.next() + resp = do_update(t2, 'etag_at_t2', 2, 'ctype_at_t3', t_type=t3, + t_meta=t5) + self.assertEqual(resp.status_int, 201) + + # check updated listing + req = Request.blank('/sda1/p/a/c', method='GET', + query_string='format=json') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1) + self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 2) + listing_data = json.loads(resp.body) + self.assertEqual(1, len(listing_data)) + for obj in listing_data: + self.assertEqual(obj['name'], 'o') + self.assertEqual(obj['bytes'], 2) + self.assertEqual(obj['hash'], 'etag_at_t2') + self.assertEqual(obj['content_type'], 'ctype_at_t4') + self.assertEqual(obj['last_modified'], t5.isoformat) + + # delete object at t6 + t6 = ts.next() + req = Request.blank( + '/sda1/p/a/c/o', method='DELETE', headers={ + 'X-Timestamp': t6.internal}) + self._update_object_put_headers(req) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 204) + + # check empty listing + req = Request.blank('/sda1/p/a/c', method='GET', + query_string='format=json') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(int(resp.headers['X-Container-Object-Count']), 0) + self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 0) + listing_data = json.loads(resp.body) + self.assertEqual(0, len(listing_data)) + + # subsequent content type timestamp at t8 should leave object deleted + t7 = ts.next() + t8 = ts.next() + t9 = ts.next() + resp = do_update(t2, 'etag_at_t2', 2, 'ctype_at_t8', t_type=t8, + t_meta=t9) + self.assertEqual(resp.status_int, 201) + + # check empty listing + req = Request.blank('/sda1/p/a/c', method='GET', + query_string='format=json') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(int(resp.headers['X-Container-Object-Count']), 0) + self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 0) + listing_data = json.loads(resp.body) + self.assertEqual(0, len(listing_data)) + + # object recreated at t7 should pick up existing, later content-type + resp = do_update(t7, 'etag_at_t7', 7, 'ctype_at_t7') + self.assertEqual(resp.status_int, 201) + + # check listing + req = Request.blank('/sda1/p/a/c', method='GET', + query_string='format=json') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(int(resp.headers['X-Container-Object-Count']), 1) + self.assertEqual(int(resp.headers['X-Container-Bytes-Used']), 7) + listing_data = json.loads(resp.body) + self.assertEqual(1, len(listing_data)) + for obj in listing_data: + self.assertEqual(obj['name'], 'o') + self.assertEqual(obj['bytes'], 7) + self.assertEqual(obj['hash'], 'etag_at_t7') + self.assertEqual(obj['content_type'], 'ctype_at_t8') + self.assertEqual(obj['last_modified'], t9.isoformat) + def test_DELETE_account_update(self): bindsock = listen(('127.0.0.1', 0)) diff --git a/test/unit/container/test_sync.py b/test/unit/container/test_sync.py index 4b779f8791..42833ed161 100644 --- a/test/unit/container/test_sync.py +++ b/test/unit/container/test_sync.py @@ -20,6 +20,7 @@ from textwrap import dedent import mock import errno +from swift.common.utils import Timestamp from test.unit import debug_logger from swift.container import sync from swift.common.db import DatabaseConnectionError @@ -750,6 +751,7 @@ class TestContainerSync(unittest.TestCase): hex = 'abcdef' sync.uuid = FakeUUID + ts_data = Timestamp(1.1) def fake_delete_object(path, name=None, headers=None, proxy=None, logger=None, timeout=None): @@ -758,12 +760,13 @@ class TestContainerSync(unittest.TestCase): if realm: self.assertEqual(headers, { 'x-container-sync-auth': - 'US abcdef 90e95aabb45a6cdc0892a3db5535e7f918428c90', - 'x-timestamp': '1.2'}) + 'US abcdef a2401ecb1256f469494a0abcb0eb62ffa73eca63', + 'x-timestamp': ts_data.internal}) else: self.assertEqual( headers, - {'x-container-sync-key': 'key', 'x-timestamp': '1.2'}) + {'x-container-sync-key': 'key', + 'x-timestamp': ts_data.internal}) self.assertEqual(proxy, 'http://proxy') self.assertEqual(timeout, 5.0) self.assertEqual(logger, self.logger) @@ -774,11 +777,13 @@ class TestContainerSync(unittest.TestCase): cs = sync.ContainerSync({}, container_ring=FakeRing(), logger=self.logger) cs.http_proxies = ['http://proxy'] - # Success + # Success. + # simulate a row with tombstone at 1.1 and later ctype, meta times + created_at = ts_data.internal + '+1388+1388' # last modified = 1.2 self.assertTrue(cs.container_sync_row( {'deleted': True, 'name': 'object', - 'created_at': '1.2'}, 'http://sync/to/path', + 'created_at': created_at}, 'http://sync/to/path', 'key', FakeContainerBroker('broker'), {'account': 'a', 'container': 'c', 'storage_policy_index': 0}, realm, realm_key)) @@ -858,6 +863,8 @@ class TestContainerSync(unittest.TestCase): sync.uuid = FakeUUID sync.shuffle = lambda x: x + ts_data = Timestamp(1.1) + timestamp = Timestamp(1.2) def fake_put_object(sync_to, name=None, headers=None, contents=None, proxy=None, logger=None, @@ -867,15 +874,15 @@ class TestContainerSync(unittest.TestCase): if realm: self.assertEqual(headers, { 'x-container-sync-auth': - 'US abcdef ef62c64bb88a33fa00722daa23d5d43253164962', - 'x-timestamp': '1.2', + 'US abcdef a5fb3cf950738e6e3b364190e246bd7dd21dad3c', + 'x-timestamp': timestamp.internal, 'etag': 'etagvalue', 'other-header': 'other header value', 'content-type': 'text/plain'}) else: self.assertEqual(headers, { 'x-container-sync-key': 'key', - 'x-timestamp': '1.2', + 'x-timestamp': timestamp.internal, 'other-header': 'other header value', 'etag': 'etagvalue', 'content-type': 'text/plain'}) @@ -897,16 +904,19 @@ class TestContainerSync(unittest.TestCase): '0') return (200, {'other-header': 'other header value', - 'etag': '"etagvalue"', 'x-timestamp': '1.2', + 'etag': '"etagvalue"', + 'x-timestamp': timestamp.internal, 'content-type': 'text/plain; swift_bytes=123'}, iter('contents')) cs.swift.get_object = fake_get_object - # Success as everything says it worked + # Success as everything says it worked. + # simulate a row with data at 1.1 and later ctype, meta times + created_at = ts_data.internal + '+1388+1388' # last modified = 1.2 self.assertTrue(cs.container_sync_row( {'deleted': False, 'name': 'object', - 'created_at': '1.2'}, 'http://sync/to/path', + 'created_at': created_at}, 'http://sync/to/path', 'key', FakeContainerBroker('broker'), {'account': 'a', 'container': 'c', 'storage_policy_index': 0}, realm, realm_key)) @@ -920,7 +930,7 @@ class TestContainerSync(unittest.TestCase): return (200, {'date': 'date value', 'last-modified': 'last modified value', - 'x-timestamp': '1.2', + 'x-timestamp': timestamp.internal, 'other-header': 'other header value', 'etag': '"etagvalue"', 'content-type': 'text/plain; swift_bytes=123'}, @@ -933,7 +943,7 @@ class TestContainerSync(unittest.TestCase): self.assertTrue(cs.container_sync_row( {'deleted': False, 'name': 'object', - 'created_at': '1.2'}, 'http://sync/to/path', + 'created_at': timestamp.internal}, 'http://sync/to/path', 'key', FakeContainerBroker('broker'), {'account': 'a', 'container': 'c', 'storage_policy_index': 0}, realm, realm_key)) @@ -967,7 +977,7 @@ class TestContainerSync(unittest.TestCase): self.assertFalse(cs.container_sync_row( {'deleted': False, 'name': 'object', - 'created_at': '1.2'}, 'http://sync/to/path', + 'created_at': timestamp.internal}, 'http://sync/to/path', 'key', FakeContainerBroker('broker'), {'account': 'a', 'container': 'c', 'storage_policy_index': 0}, realm, realm_key)) @@ -990,7 +1000,7 @@ class TestContainerSync(unittest.TestCase): self.assertFalse(cs.container_sync_row( {'deleted': False, 'name': 'object', - 'created_at': '1.2'}, 'http://sync/to/path', + 'created_at': timestamp.internal}, 'http://sync/to/path', 'key', FakeContainerBroker('broker'), {'account': 'a', 'container': 'c', 'storage_policy_index': 0}, realm, realm_key)) @@ -1003,7 +1013,8 @@ class TestContainerSync(unittest.TestCase): self.assertEqual(headers['X-Backend-Storage-Policy-Index'], '0') return (200, {'other-header': 'other header value', - 'x-timestamp': '1.2', 'etag': '"etagvalue"'}, + 'x-timestamp': timestamp.internal, + 'etag': '"etagvalue"'}, iter('contents')) def fake_put_object(*args, **kwargs): @@ -1015,7 +1026,7 @@ class TestContainerSync(unittest.TestCase): self.assertFalse(cs.container_sync_row( {'deleted': False, 'name': 'object', - 'created_at': '1.2'}, 'http://sync/to/path', + 'created_at': timestamp.internal}, 'http://sync/to/path', 'key', FakeContainerBroker('broker'), {'account': 'a', 'container': 'c', 'storage_policy_index': 0}, realm, realm_key)) @@ -1030,7 +1041,7 @@ class TestContainerSync(unittest.TestCase): self.assertFalse(cs.container_sync_row( {'deleted': False, 'name': 'object', - 'created_at': '1.2'}, 'http://sync/to/path', + 'created_at': timestamp.internal}, 'http://sync/to/path', 'key', FakeContainerBroker('broker'), {'account': 'a', 'container': 'c', 'storage_policy_index': 0}, realm, realm_key)) @@ -1045,7 +1056,7 @@ class TestContainerSync(unittest.TestCase): self.assertFalse(cs.container_sync_row( {'deleted': False, 'name': 'object', - 'created_at': '1.2'}, 'http://sync/to/path', + 'created_at': timestamp.internal}, 'http://sync/to/path', 'key', FakeContainerBroker('broker'), {'account': 'a', 'container': 'c', 'storage_policy_index': 0}, realm, realm_key)) diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index de0cf4b1f9..657a29ed53 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -45,7 +45,7 @@ from test.unit import (FakeLogger, mock as unit_mock, temptree, from nose import SkipTest from swift.obj import diskfile from swift.common import utils -from swift.common.utils import hash_path, mkdirs, Timestamp +from swift.common.utils import hash_path, mkdirs, Timestamp, encode_timestamps from swift.common import ring from swift.common.splice import splice from swift.common.exceptions import DiskFileNotExist, DiskFileQuarantined, \ @@ -616,7 +616,8 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin): def test_get_ondisk_files_with_empty_dir(self): files = [] - expected = dict(data_file=None, meta_file=None, ts_file=None) + expected = dict( + data_file=None, meta_file=None, ctype_file=None, ts_file=None) for policy in POLICIES: for frag_index in (0, None, '14'): # check manager @@ -1214,6 +1215,64 @@ class TestDiskFileManager(DiskFileManagerMixin, unittest.TestCase): } self._check_yield_hashes(POLICIES.default, suffix_map, expected) + def test_yield_hashes_yields_content_type_timestamp(self): + hash_ = '9373a92d072897b136b3fc06595b4abc' + ts_iter = make_timestamp_iter() + ts0, ts1, ts2, ts3, ts4 = (next(ts_iter) for _ in range(5)) + data_file = ts1.internal + '.data' + + # no content-type delta + meta_file = ts2.internal + '.meta' + suffix_map = {'abc': {hash_: [data_file, meta_file]}} + expected = {hash_: {'ts_data': ts1, + 'ts_meta': ts2}} + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + # non-zero content-type delta + delta = ts3.raw - ts2.raw + meta_file = '%s-%x.meta' % (ts3.internal, delta) + suffix_map = {'abc': {hash_: [data_file, meta_file]}} + expected = {hash_: {'ts_data': ts1, + 'ts_meta': ts3, + 'ts_ctype': ts2}} + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + # zero content-type delta + meta_file = '%s+0.meta' % ts3.internal + suffix_map = {'abc': {hash_: [data_file, meta_file]}} + expected = {hash_: {'ts_data': ts1, + 'ts_meta': ts3, + 'ts_ctype': ts3}} + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + # content-type in second meta file + delta = ts3.raw - ts2.raw + meta_file1 = '%s-%x.meta' % (ts3.internal, delta) + meta_file2 = '%s.meta' % ts4.internal + suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}} + expected = {hash_: {'ts_data': ts1, + 'ts_meta': ts4, + 'ts_ctype': ts2}} + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + # obsolete content-type in second meta file, older than data file + delta = ts3.raw - ts0.raw + meta_file1 = '%s-%x.meta' % (ts3.internal, delta) + meta_file2 = '%s.meta' % ts4.internal + suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}} + expected = {hash_: {'ts_data': ts1, + 'ts_meta': ts4}} + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + # obsolete content-type in second meta file, same time as data file + delta = ts3.raw - ts1.raw + meta_file1 = '%s-%x.meta' % (ts3.internal, delta) + meta_file2 = '%s.meta' % ts4.internal + suffix_map = {'abc': {hash_: [data_file, meta_file1, meta_file2]}} + expected = {hash_: {'ts_data': ts1, + 'ts_meta': ts4}} + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + def test_yield_hashes_suffix_filter(self): # test again with limited suffixes old_ts = '1383180000.12345' @@ -1611,6 +1670,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): 'timestamp': ts, 'frag_index': int(frag), 'ext': '.data', + 'ctype_timestamp': None }) # these functions are inverse self.assertEqual( @@ -1631,6 +1691,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): 'timestamp': ts, 'frag_index': None, 'ext': ext, + 'ctype_timestamp': None }) # these functions are inverse self.assertEqual( @@ -1662,6 +1723,30 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): actual = mgr.make_on_disk_filename(ts, ext, frag_index=frag) self.assertEqual(expected, actual) + def test_make_on_disk_filename_for_meta_with_content_type(self): + # verify .meta filename encodes content-type timestamp + mgr = self.df_router[POLICIES.default] + time_ = 1234567890.00001 + for delta in (0.0, .00001, 1.11111): + t_meta = Timestamp(time_) + t_type = Timestamp(time_ - delta) + sign = '-' if delta else '+' + expected = '%s%s%x.meta' % (t_meta.short, sign, 100000 * delta) + actual = mgr.make_on_disk_filename( + t_meta, '.meta', ctype_timestamp=t_type) + self.assertEqual(expected, actual) + parsed = mgr.parse_on_disk_filename(actual) + self.assertEqual(parsed, { + 'timestamp': t_meta, + 'frag_index': None, + 'ext': '.meta', + 'ctype_timestamp': t_type + }) + # these functions are inverse + self.assertEqual( + mgr.make_on_disk_filename(**parsed), + expected) + def test_yield_hashes(self): old_ts = '1383180000.12345' fresh_ts = Timestamp(time() - 10).internal @@ -1979,6 +2064,7 @@ class DiskFileMixin(BaseDiskFileTestMixin): tpool.execute = self._orig_tpool_exc def _create_ondisk_file(self, df, data, timestamp, metadata=None, + ctype_timestamp=None, ext='.data'): mkdirs(df._datadir) if timestamp is None: @@ -1996,10 +2082,17 @@ class DiskFileMixin(BaseDiskFileTestMixin): metadata['name'] = '/a/c/o' if 'Content-Length' not in metadata: metadata['Content-Length'] = str(len(data)) - filename = timestamp.internal + ext + filename = timestamp.internal if ext == '.data' and df.policy.policy_type == EC_POLICY: - filename = '%s#%s.data' % (timestamp.internal, df._frag_index) - data_file = os.path.join(df._datadir, filename) + filename = '%s#%s' % (timestamp.internal, df._frag_index) + if ctype_timestamp: + metadata.update( + {'Content-Type-Timestamp': + Timestamp(ctype_timestamp).internal}) + filename = encode_timestamps(timestamp, + Timestamp(ctype_timestamp), + explicit=True) + data_file = os.path.join(df._datadir, filename + ext) with open(data_file, 'wb') as f: f.write(data) xattr.setxattr(f.fileno(), diskfile.METADATA_KEY, @@ -2779,6 +2872,99 @@ class DiskFileMixin(BaseDiskFileTestMixin): exp_name = '%s.meta' % timestamp self.assertTrue(exp_name in set(dl)) + def test_write_metadata_with_content_type(self): + # if metadata has content-type then its time should be in file name + df = self._create_test_file('1234567890') + file_count = len(os.listdir(df._datadir)) + timestamp = Timestamp(time()) + metadata = {'X-Timestamp': timestamp.internal, + 'X-Object-Meta-test': 'data', + 'Content-Type': 'foo', + 'Content-Type-Timestamp': timestamp.internal} + df.write_metadata(metadata) + dl = os.listdir(df._datadir) + self.assertEqual(len(dl), file_count + 1) + exp_name = '%s+0.meta' % timestamp.internal + self.assertTrue(exp_name in set(dl), + 'Expected file %s not found in %s' % (exp_name, dl)) + + def test_write_metadata_with_older_content_type(self): + # if metadata has content-type then its time should be in file name + ts_iter = make_timestamp_iter() + df = self._create_test_file('1234567890', timestamp=ts_iter.next()) + file_count = len(os.listdir(df._datadir)) + timestamp = ts_iter.next() + timestamp2 = ts_iter.next() + metadata = {'X-Timestamp': timestamp2.internal, + 'X-Object-Meta-test': 'data', + 'Content-Type': 'foo', + 'Content-Type-Timestamp': timestamp.internal} + df.write_metadata(metadata) + dl = os.listdir(df._datadir) + self.assertEqual(len(dl), file_count + 1, dl) + exp_name = '%s-%x.meta' % (timestamp2.internal, + timestamp2.raw - timestamp.raw) + self.assertTrue(exp_name in set(dl), + 'Expected file %s not found in %s' % (exp_name, dl)) + + def test_write_metadata_with_content_type_removes_same_time_meta(self): + # a meta file without content-type should be cleaned up in favour of + # a meta file at same time with content-type + ts_iter = make_timestamp_iter() + df = self._create_test_file('1234567890', timestamp=ts_iter.next()) + file_count = len(os.listdir(df._datadir)) + timestamp = ts_iter.next() + timestamp2 = ts_iter.next() + metadata = {'X-Timestamp': timestamp2.internal, + 'X-Object-Meta-test': 'data'} + df.write_metadata(metadata) + metadata = {'X-Timestamp': timestamp2.internal, + 'X-Object-Meta-test': 'data', + 'Content-Type': 'foo', + 'Content-Type-Timestamp': timestamp.internal} + df.write_metadata(metadata) + + dl = os.listdir(df._datadir) + self.assertEqual(len(dl), file_count + 1, dl) + exp_name = '%s-%x.meta' % (timestamp2.internal, + timestamp2.raw - timestamp.raw) + self.assertTrue(exp_name in set(dl), + 'Expected file %s not found in %s' % (exp_name, dl)) + + def test_write_metadata_with_content_type_removes_multiple_metas(self): + # a combination of a meta file without content-type and an older meta + # file with content-type should be cleaned up in favour of a meta file + # at newer time with content-type + ts_iter = make_timestamp_iter() + df = self._create_test_file('1234567890', timestamp=ts_iter.next()) + file_count = len(os.listdir(df._datadir)) + timestamp = ts_iter.next() + timestamp2 = ts_iter.next() + metadata = {'X-Timestamp': timestamp2.internal, + 'X-Object-Meta-test': 'data'} + df.write_metadata(metadata) + metadata = {'X-Timestamp': timestamp.internal, + 'X-Object-Meta-test': 'data', + 'Content-Type': 'foo', + 'Content-Type-Timestamp': timestamp.internal} + df.write_metadata(metadata) + + dl = os.listdir(df._datadir) + self.assertEqual(len(dl), file_count + 2, dl) + + metadata = {'X-Timestamp': timestamp2.internal, + 'X-Object-Meta-test': 'data', + 'Content-Type': 'foo', + 'Content-Type-Timestamp': timestamp.internal} + df.write_metadata(metadata) + + dl = os.listdir(df._datadir) + self.assertEqual(len(dl), file_count + 1, dl) + exp_name = '%s-%x.meta' % (timestamp2.internal, + timestamp2.raw - timestamp.raw) + self.assertTrue(exp_name in set(dl), + 'Expected file %s not found in %s' % (exp_name, dl)) + def test_write_metadata_no_xattr(self): timestamp = Timestamp(time()).internal metadata = {'X-Timestamp': timestamp, 'X-Object-Meta-test': 'data'} @@ -3133,7 +3319,54 @@ class DiskFileMixin(BaseDiskFileTestMixin): Timestamp(10).internal) self.assertTrue('deleted' not in df._metadata) - def test_ondisk_search_loop_data_meta_ts(self): + def test_ondisk_search_loop_multiple_meta_data(self): + df = self._simple_get_diskfile() + self._create_ondisk_file(df, '', ext='.meta', timestamp=10, + metadata={'X-Object-Meta-User': 'user-meta'}) + self._create_ondisk_file(df, '', ext='.meta', timestamp=9, + ctype_timestamp=9, + metadata={'Content-Type': 'newest', + 'X-Object-Meta-User': 'blah'}) + self._create_ondisk_file(df, 'B', ext='.data', timestamp=8, + metadata={'Content-Type': 'newer'}) + self._create_ondisk_file(df, 'A', ext='.data', timestamp=7, + metadata={'Content-Type': 'oldest'}) + if df.policy.policy_type == EC_POLICY: + self._create_ondisk_file(df, '', ext='.durable', timestamp=8) + self._create_ondisk_file(df, '', ext='.durable', timestamp=7) + df = self._simple_get_diskfile() + with df.open(): + self.assertTrue('X-Timestamp' in df._metadata) + self.assertEqual(df._metadata['X-Timestamp'], + Timestamp(10).internal) + self.assertTrue('Content-Type' in df._metadata) + self.assertEqual(df._metadata['Content-Type'], 'newest') + self.assertTrue('X-Object-Meta-User' in df._metadata) + self.assertEqual(df._metadata['X-Object-Meta-User'], 'user-meta') + + def test_ondisk_search_loop_stale_meta_data(self): + df = self._simple_get_diskfile() + self._create_ondisk_file(df, '', ext='.meta', timestamp=10, + metadata={'X-Object-Meta-User': 'user-meta'}) + self._create_ondisk_file(df, '', ext='.meta', timestamp=9, + ctype_timestamp=7, + metadata={'Content-Type': 'older', + 'X-Object-Meta-User': 'blah'}) + self._create_ondisk_file(df, 'B', ext='.data', timestamp=8, + metadata={'Content-Type': 'newer'}) + if df.policy.policy_type == EC_POLICY: + self._create_ondisk_file(df, '', ext='.durable', timestamp=8) + df = self._simple_get_diskfile() + with df.open(): + self.assertTrue('X-Timestamp' in df._metadata) + self.assertEqual(df._metadata['X-Timestamp'], + Timestamp(10).internal) + self.assertTrue('Content-Type' in df._metadata) + self.assertEqual(df._metadata['Content-Type'], 'newer') + self.assertTrue('X-Object-Meta-User' in df._metadata) + self.assertEqual(df._metadata['X-Object-Meta-User'], 'user-meta') + + def test_ondisk_search_loop_data_ts_meta(self): df = self._simple_get_diskfile() self._create_ondisk_file(df, 'B', ext='.data', timestamp=10) self._create_ondisk_file(df, 'A', ext='.data', timestamp=9) @@ -3295,6 +3528,37 @@ class DiskFileMixin(BaseDiskFileTestMixin): with self.assertRaises(DiskFileNotOpen): df.data_timestamp + def test_content_type_and_timestamp(self): + ts_1 = self.ts() + self._get_open_disk_file(ts=ts_1.internal, + extra_metadata={'Content-Type': 'image/jpeg'}) + df = self._simple_get_diskfile() + with df.open(): + self.assertEqual(ts_1.internal, df.data_timestamp) + self.assertEqual(ts_1.internal, df.timestamp) + self.assertEqual(ts_1.internal, df.content_type_timestamp) + self.assertEqual('image/jpeg', df.content_type) + ts_2 = self.ts() + ts_3 = self.ts() + df.write_metadata({'X-Timestamp': ts_3.internal, + 'Content-Type': 'image/gif', + 'Content-Type-Timestamp': ts_2.internal}) + with df.open(): + self.assertEqual(ts_1.internal, df.data_timestamp) + self.assertEqual(ts_3.internal, df.timestamp) + self.assertEqual(ts_2.internal, df.content_type_timestamp) + self.assertEqual('image/gif', df.content_type) + + def test_content_type_timestamp_not_open(self): + df = self._simple_get_diskfile() + with self.assertRaises(DiskFileNotOpen): + df.content_type_timestamp + + def test_content_type_not_open(self): + df = self._simple_get_diskfile() + with self.assertRaises(DiskFileNotOpen): + df.content_type + def test_durable_timestamp(self): ts_1 = self.ts() df = self._get_open_disk_file(ts=ts_1.internal) @@ -4211,6 +4475,14 @@ class TestSuffixHashes(unittest.TestCase): filename += '.data' return filename + def _metafilename(self, meta_timestamp, ctype_timestamp=None): + filename = meta_timestamp.internal + if ctype_timestamp is not None: + delta = meta_timestamp.raw - ctype_timestamp.raw + filename = '%s-%x' % (filename, delta) + filename += '.meta' + return filename + def check_hash_cleanup_listdir(self, policy, input_files, output_files): orig_unlink = os.unlink file_list = list(input_files) @@ -4771,6 +5043,175 @@ class TestSuffixHashes(unittest.TestCase): self.assertEqual(sorted(os.listdir(df._datadir)), sorted(expected_files)) + def _verify_get_hashes(self, filenames, ts_data, ts_meta, ts_ctype, + policy): + """ + Helper method to create a set of ondisk files and verify suffix_hashes. + + :param filenames: list of filenames to create in an object hash dir + :param ts_data: newest data timestamp, used for expected result + :param ts_meta: newest meta timestamp, used for expected result + :param ts_ctype: newest content-type timestamp, used for expected + result + :param policy: storage policy to use for test + """ + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', + policy=policy, frag_index=4) + suffix = os.path.basename(os.path.dirname(df._datadir)) + mkdirs(df._datadir) + + # calculate expected result + hasher = md5() + if policy.policy_type == EC_POLICY: + hasher.update(ts_meta.internal + '.meta') + hasher.update(ts_data.internal + '.durable') + if ts_ctype: + hasher.update(ts_ctype.internal + '_ctype') + expected = { + suffix: { + None: hasher.hexdigest(), + 4: md5(ts_data.internal).hexdigest(), + } + } + elif policy.policy_type == REPL_POLICY: + hasher.update(ts_meta.internal + '.meta') + hasher.update(ts_data.internal + '.data') + if ts_ctype: + hasher.update(ts_ctype.internal + '_ctype') + expected = {suffix: hasher.hexdigest()} + else: + self.fail('unknown policy type %r' % policy.policy_type) + + for fname in filenames: + open(os.path.join(df._datadir, fname), 'w').close() + + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + + msg = 'expected %r != %r for policy %r' % ( + expected, hashes, policy) + self.assertEqual(hashes, expected, msg) + + def test_hash_suffix_with_older_content_type_in_meta(self): + # single meta file having older content-type + for policy in self.iter_policies(): + ts_data, ts_ctype, ts_meta = ( + self.ts(), self.ts(), self.ts()) + + filenames = [self._datafilename(ts_data, policy, frag_index=4), + self._metafilename(ts_meta, ts_ctype)] + if policy.policy_type == EC_POLICY: + filenames.append(ts_data.internal + '.durable') + + self._verify_get_hashes( + filenames, ts_data, ts_meta, ts_ctype, policy) + + def test_hash_suffix_with_same_age_content_type_in_meta(self): + # single meta file having same age content-type + for policy in self.iter_policies(): + ts_data, ts_meta = (self.ts(), self.ts()) + + filenames = [self._datafilename(ts_data, policy, frag_index=4), + self._metafilename(ts_meta, ts_meta)] + if policy.policy_type == EC_POLICY: + filenames.append(ts_data.internal + '.durable') + + self._verify_get_hashes( + filenames, ts_data, ts_meta, ts_meta, policy) + + def test_hash_suffix_with_obsolete_content_type_in_meta(self): + # After rsync replication we could have a single meta file having + # content-type older than a replicated data file + for policy in self.iter_policies(): + ts_ctype, ts_data, ts_meta = (self.ts(), self.ts(), self.ts()) + + filenames = [self._datafilename(ts_data, policy, frag_index=4), + self._metafilename(ts_meta, ts_ctype)] + if policy.policy_type == EC_POLICY: + filenames.append(ts_data.internal + '.durable') + + self._verify_get_hashes( + filenames, ts_data, ts_meta, None, policy) + + def test_hash_suffix_with_older_content_type_in_newer_meta(self): + # After rsync replication we could have two meta files: newest + # content-type is in newer meta file, older than newer meta file + for policy in self.iter_policies(): + ts_data, ts_older_meta, ts_ctype, ts_newer_meta = ( + self.ts() for _ in range(4)) + + filenames = [self._datafilename(ts_data, policy, frag_index=4), + self._metafilename(ts_older_meta), + self._metafilename(ts_newer_meta, ts_ctype)] + if policy.policy_type == EC_POLICY: + filenames.append(ts_data.internal + '.durable') + + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, ts_ctype, policy) + + def test_hash_suffix_with_same_age_content_type_in_newer_meta(self): + # After rsync replication we could have two meta files: newest + # content-type is in newer meta file, at same age as newer meta file + for policy in self.iter_policies(): + ts_data, ts_older_meta, ts_newer_meta = ( + self.ts() for _ in range(3)) + + filenames = [self._datafilename(ts_data, policy, frag_index=4), + self._metafilename(ts_newer_meta, ts_newer_meta)] + if policy.policy_type == EC_POLICY: + filenames.append(ts_data.internal + '.durable') + + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, ts_newer_meta, policy) + + def test_hash_suffix_with_older_content_type_in_older_meta(self): + # After rsync replication we could have two meta files: newest + # content-type is in older meta file, older than older meta file + for policy in self.iter_policies(): + ts_data, ts_ctype, ts_older_meta, ts_newer_meta = ( + self.ts() for _ in range(4)) + + filenames = [self._datafilename(ts_data, policy, frag_index=4), + self._metafilename(ts_newer_meta), + self._metafilename(ts_older_meta, ts_ctype)] + if policy.policy_type == EC_POLICY: + filenames.append(ts_data.internal + '.durable') + + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, ts_ctype, policy) + + def test_hash_suffix_with_same_age_content_type_in_older_meta(self): + # After rsync replication we could have two meta files: newest + # content-type is in older meta file, at same age as older meta file + for policy in self.iter_policies(): + ts_data, ts_older_meta, ts_newer_meta = ( + self.ts() for _ in range(3)) + + filenames = [self._datafilename(ts_data, policy, frag_index=4), + self._metafilename(ts_newer_meta), + self._metafilename(ts_older_meta, ts_older_meta)] + if policy.policy_type == EC_POLICY: + filenames.append(ts_data.internal + '.durable') + + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, ts_older_meta, policy) + + def test_hash_suffix_with_obsolete_content_type_in_older_meta(self): + # After rsync replication we could have two meta files: newest + # content-type is in older meta file, but older than data file + for policy in self.iter_policies(): + ts_ctype, ts_data, ts_older_meta, ts_newer_meta = ( + self.ts() for _ in range(4)) + + filenames = [self._datafilename(ts_data, policy, frag_index=4), + self._metafilename(ts_newer_meta), + self._metafilename(ts_older_meta, ts_ctype)] + if policy.policy_type == EC_POLICY: + filenames.append(ts_data.internal + '.durable') + + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, None, policy) + def test_hash_suffix_removes_empty_hashdir_and_suffix(self): for policy in self.iter_policies(): df_mgr = self.df_router[policy] diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 81fd79e42f..40c37ee39c 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -51,7 +51,8 @@ from swift.obj import server as object_server from swift.obj import diskfile from swift.common import utils, bufferedhttp from swift.common.utils import hash_path, mkdirs, normalize_timestamp, \ - NullLogger, storage_directory, public, replication + NullLogger, storage_directory, public, replication, encode_timestamps, \ + Timestamp from swift.common import constraints from swift.common.swob import Request, HeaderKeyDict, WsgiBytesIO from swift.common.splice import splice @@ -168,7 +169,7 @@ class TestObjectController(unittest.TestCase): dah = ['content-disposition', 'content-encoding', 'x-delete-at', 'x-object-manifest', 'x-static-large-object'] conf = {'devices': self.testdir, 'mount_check': 'false', - 'allowed_headers': ','.join(['content-type'] + dah)} + 'allowed_headers': ','.join(['content-length'] + dah)} self.object_controller = object_server.ObjectController( conf, logger=debug_logger()) self.assertEqual(self.object_controller.allowed_headers, set(dah)) @@ -416,12 +417,14 @@ class TestObjectController(unittest.TestCase): self.assertEqual(resp.status_int, 400) def test_POST_container_connection(self): - - def mock_http_connect(response, with_exc=False): + # Test that POST does call container_update and returns success + # whether update to container server succeeds or fails + def mock_http_connect(calls, response, with_exc=False): class FakeConn(object): - def __init__(self, status, with_exc): + def __init__(self, calls, status, with_exc): + self.calls = calls self.status = status self.reason = 'Fake' self.host = '1.2.3.4' @@ -429,6 +432,7 @@ class TestObjectController(unittest.TestCase): self.with_exc = with_exc def getresponse(self): + calls[0] += 1 if self.with_exc: raise Exception('test') return self @@ -436,7 +440,7 @@ class TestObjectController(unittest.TestCase): def read(self, amt=None): return '' - return lambda *args, **kwargs: FakeConn(response, with_exc) + return lambda *args, **kwargs: FakeConn(calls, response, with_exc) ts = time() timestamp = normalize_timestamp(ts) @@ -456,8 +460,9 @@ class TestObjectController(unittest.TestCase): 'X-Container-Device': 'sda1', 'X-Container-Timestamp': '1', 'Content-Type': 'application/new1'}) + calls = [0] with mock.patch.object(object_server, 'http_connect', - mock_http_connect(202)): + mock_http_connect(calls, 202)): resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 202) req = Request.blank( @@ -469,8 +474,9 @@ class TestObjectController(unittest.TestCase): 'X-Container-Device': 'sda1', 'X-Container-Timestamp': '1', 'Content-Type': 'application/new1'}) + calls = [0] with mock.patch.object(object_server, 'http_connect', - mock_http_connect(202, with_exc=True)): + mock_http_connect(calls, 202, with_exc=True)): resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 202) req = Request.blank( @@ -482,11 +488,215 @@ class TestObjectController(unittest.TestCase): 'X-Container-Device': 'sda1', 'X-Container-Timestamp': '1', 'Content-Type': 'application/new2'}) + calls = [0] with mock.patch.object(object_server, 'http_connect', - mock_http_connect(500)): + mock_http_connect(calls, 500)): resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 202) + def _test_POST_container_updates(self, policy, update_etag=None): + # Test that POST requests result in correct calls to container_update + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + t = [ts_iter.next() for _ in range(0, 5)] + calls_made = [] + update_etag = update_etag or '098f6bcd4621d373cade4e832627b4f6' + + def mock_container_update(ctlr, op, account, container, obj, request, + headers_out, objdevice, policy_idx): + calls_made.append(headers_out) + + headers = { + 'X-Timestamp': t[1].internal, + 'Content-Type': 'application/octet-stream;swift_bytes=123456789', + 'Content-Length': '4', + 'X-Backend-Storage-Policy': int(policy)} + if policy.policy_type == EC_POLICY: + headers['X-Backend-Container-Update-Override-Etag'] = update_etag + headers['X-Object-Sysmeta-Ec-Etag'] = update_etag + + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'PUT'}, + headers=headers) + req.body = 'test' + + with mock.patch('swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 201) + self.assertEqual(1, len(calls_made)) + expected_headers = HeaderKeyDict({ + 'x-size': '4', + 'x-content-type': 'application/octet-stream;swift_bytes=123456789', + 'x-timestamp': t[1].internal, + 'x-etag': update_etag}) + self.assertDictEqual(expected_headers, calls_made[0]) + + # POST with no metadata newer than the data should return 409, + # container update not expected + calls_made = [] + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'POST'}, + headers={'X-Timestamp': t[0].internal, + 'X-Backend-Storage-Policy': int(policy)}) + + with mock.patch('swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 409) + self.assertEqual(resp.headers['x-backend-timestamp'], + t[1].internal) + self.assertEqual(0, len(calls_made)) + + # POST with newer metadata returns success and container update + # is expected + calls_made = [] + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'POST'}, + headers={'X-Timestamp': t[3].internal, + 'X-Backend-Storage-Policy': int(policy)}) + + with mock.patch('swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 202) + self.assertEqual(1, len(calls_made)) + expected_headers = HeaderKeyDict({ + 'x-size': '4', + 'x-content-type': 'application/octet-stream;swift_bytes=123456789', + 'x-timestamp': t[1].internal, + 'x-content-type-timestamp': t[1].internal, + 'x-meta-timestamp': t[3].internal, + 'x-etag': update_etag}) + self.assertDictEqual(expected_headers, calls_made[0]) + + # POST with no metadata newer than existing metadata should return + # 409, container update not expected + calls_made = [] + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'POST'}, + headers={'X-Timestamp': t[2].internal, + 'X-Backend-Storage-Policy': int(policy)}) + + with mock.patch('swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 409) + self.assertEqual(resp.headers['x-backend-timestamp'], + t[3].internal) + self.assertEqual(0, len(calls_made)) + + # POST with newer content-type but older metadata returns success + # and container update is expected newer content-type should have + # existing swift_bytes appended + calls_made = [] + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'POST'}, + headers={ + 'X-Timestamp': t[2].internal, + 'Content-Type': 'text/plain', + 'Content-Type-Timestamp': t[2].internal, + 'X-Backend-Storage-Policy': int(policy) + }) + + with mock.patch('swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 202) + self.assertEqual(1, len(calls_made)) + expected_headers = HeaderKeyDict({ + 'x-size': '4', + 'x-content-type': 'text/plain;swift_bytes=123456789', + 'x-timestamp': t[1].internal, + 'x-content-type-timestamp': t[2].internal, + 'x-meta-timestamp': t[3].internal, + 'x-etag': update_etag}) + self.assertDictEqual(expected_headers, calls_made[0]) + + # POST with older content-type but newer metadata returns success + # and container update is expected + calls_made = [] + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'POST'}, + headers={ + 'X-Timestamp': t[4].internal, + 'Content-Type': 'older', + 'Content-Type-Timestamp': t[1].internal, + 'X-Backend-Storage-Policy': int(policy) + }) + + with mock.patch('swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 202) + self.assertEqual(1, len(calls_made)) + expected_headers = HeaderKeyDict({ + 'x-size': '4', + 'x-content-type': 'text/plain;swift_bytes=123456789', + 'x-timestamp': t[1].internal, + 'x-content-type-timestamp': t[2].internal, + 'x-meta-timestamp': t[4].internal, + 'x-etag': update_etag}) + self.assertDictEqual(expected_headers, calls_made[0]) + + # POST with same-time content-type and metadata returns 409 + # and no container update is expected + calls_made = [] + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'POST'}, + headers={ + 'X-Timestamp': t[4].internal, + 'Content-Type': 'ignored', + 'Content-Type-Timestamp': t[2].internal, + 'X-Backend-Storage-Policy': int(policy) + }) + + with mock.patch('swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 409) + self.assertEqual(0, len(calls_made)) + + # POST with implicit newer content-type but older metadata + # returns success and container update is expected, + # update reports existing metadata timestamp + calls_made = [] + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'POST'}, + headers={ + 'X-Timestamp': t[3].internal, + 'Content-Type': 'text/newer', + 'X-Backend-Storage-Policy': int(policy) + }) + + with mock.patch('swift.obj.server.ObjectController.container_update', + mock_container_update): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 202) + self.assertEqual(1, len(calls_made)) + expected_headers = HeaderKeyDict({ + 'x-size': '4', + 'x-content-type': 'text/newer;swift_bytes=123456789', + 'x-timestamp': t[1].internal, + 'x-content-type-timestamp': t[3].internal, + 'x-meta-timestamp': t[4].internal, + 'x-etag': update_etag}) + self.assertDictEqual(expected_headers, calls_made[0]) + + def test_POST_container_updates_with_replication_policy(self): + self._test_POST_container_updates(POLICIES[0]) + + def test_POST_container_updates_with_EC_policy(self): + self._test_POST_container_updates( + POLICIES[1], update_etag='override_etag') + def test_POST_quarantine_zbyte(self): timestamp = normalize_timestamp(time()) req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, @@ -1332,6 +1542,84 @@ class TestObjectController(unittest.TestCase): 'name': '/a/c/o', 'X-Object-Meta-1': 'Not One'}) + def test_POST_then_fetch_content_type(self): + # check that content_type is updated by a POST + timestamp1 = normalize_timestamp(time()) + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': timestamp1, + 'Content-Type': 'text/plain', + 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', + 'X-Object-Meta-1': 'One'}) + req.body = 'VERIFY SYSMETA' + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 201) + + timestamp2 = normalize_timestamp(time()) + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'POST'}, + headers={'X-Timestamp': timestamp2, + 'X-Object-Meta-1': 'Not One', + 'Content-Type': 'text/html'}) + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 202) + + # original .data file metadata should be unchanged + objfile = os.path.join( + self.testdir, 'sda1', + storage_directory(diskfile.get_data_dir(0), 'p', + hash_path('a', 'c', 'o')), + timestamp1 + '.data') + self.assertTrue(os.path.isfile(objfile)) + self.assertEqual(open(objfile).read(), 'VERIFY SYSMETA') + self.assertEqual(diskfile.read_metadata(objfile), + {'X-Timestamp': timestamp1, + 'Content-Length': '14', + 'Content-Type': 'text/plain', + 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', + 'name': '/a/c/o', + 'X-Object-Meta-1': 'One'}) + + # .meta file metadata should have updated content-type + metafile_name = encode_timestamps(Timestamp(timestamp2), + Timestamp(timestamp2), + explicit=True) + metafile = os.path.join( + self.testdir, 'sda1', + storage_directory(diskfile.get_data_dir(0), 'p', + hash_path('a', 'c', 'o')), + metafile_name + '.meta') + self.assertTrue(os.path.isfile(metafile)) + self.assertEqual(diskfile.read_metadata(metafile), + {'X-Timestamp': timestamp2, + 'name': '/a/c/o', + 'Content-Type': 'text/html', + 'Content-Type-Timestamp': timestamp2, + 'X-Object-Meta-1': 'Not One'}) + + def check_response(resp): + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_length, 14) + self.assertEqual(resp.content_type, 'text/html') + self.assertEqual(resp.headers['content-type'], 'text/html') + self.assertEqual( + resp.headers['last-modified'], + strftime('%a, %d %b %Y %H:%M:%S GMT', + gmtime(math.ceil(float(timestamp2))))) + self.assertEqual(resp.headers['etag'], + '"1000d172764c9dbc3a5798a67ec5bb76"') + self.assertEqual(resp.headers['x-object-meta-1'], 'Not One') + + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'HEAD'}) + resp = req.get_response(self.object_controller) + check_response(resp) + + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'GET'}) + resp = req.get_response(self.object_controller) + check_response(resp) + def test_PUT_then_fetch_system_metadata(self): timestamp = normalize_timestamp(time()) req = Request.blank( diff --git a/test/unit/obj/test_ssync.py b/test/unit/obj/test_ssync.py index 94c463606c..1349e16440 100644 --- a/test/unit/obj/test_ssync.py +++ b/test/unit/obj/test_ssync.py @@ -904,6 +904,165 @@ class TestSsyncReplication(TestBaseSsync): # tx meta file should not have been sync'd to rx data file self.assertNotIn('X-Object-Meta-Test', rx_obj.get_metadata()) + def test_content_type_sync(self): + policy = POLICIES.default + rx_node_index = 0 + + # create diskfiles... + tx_objs = {} + rx_objs = {} + tx_df_mgr = self.daemon._diskfile_router[policy] + rx_df_mgr = self.rx_controller._diskfile_router[policy] + + expected_subreqs = defaultdict(list) + + # o1 on tx only with two meta files + name = 'o1' + t1 = self.ts_iter.next() + tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t1) + t1_type = self.ts_iter.next() + metadata_1 = {'X-Timestamp': t1_type.internal, + 'Content-Type': 'text/test', + 'Content-Type-Timestamp': t1_type.internal} + tx_objs[name][0].write_metadata(metadata_1) + t1_meta = self.ts_iter.next() + metadata_2 = {'X-Timestamp': t1_meta.internal, + 'X-Object-Meta-Test': name} + tx_objs[name][0].write_metadata(metadata_2) + expected_subreqs['PUT'].append(name) + expected_subreqs['POST'].append(name) + + # o2 on tx with two meta files, rx has .data and newest .meta but is + # missing latest content-type + name = 'o2' + t2 = self.ts_iter.next() + tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t2) + t2_type = self.ts_iter.next() + metadata_1 = {'X-Timestamp': t2_type.internal, + 'Content-Type': 'text/test', + 'Content-Type-Timestamp': t2_type.internal} + tx_objs[name][0].write_metadata(metadata_1) + t2_meta = self.ts_iter.next() + metadata_2 = {'X-Timestamp': t2_meta.internal, + 'X-Object-Meta-Test': name} + tx_objs[name][0].write_metadata(metadata_2) + rx_objs[name] = self._create_ondisk_files(rx_df_mgr, name, policy, t2) + rx_objs[name][0].write_metadata(metadata_2) + expected_subreqs['POST'].append(name) + + # o3 on tx with two meta files, rx has .data and one .meta but does + # have latest content-type so nothing to sync + name = 'o3' + t3 = self.ts_iter.next() + tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t3) + t3_type = self.ts_iter.next() + metadata_1 = {'X-Timestamp': t3_type.internal, + 'Content-Type': 'text/test', + 'Content-Type-Timestamp': t3_type.internal} + tx_objs[name][0].write_metadata(metadata_1) + t3_meta = self.ts_iter.next() + metadata_2 = {'X-Timestamp': t3_meta.internal, + 'X-Object-Meta-Test': name} + tx_objs[name][0].write_metadata(metadata_2) + rx_objs[name] = self._create_ondisk_files(rx_df_mgr, name, policy, t3) + metadata_2b = {'X-Timestamp': t3_meta.internal, + 'X-Object-Meta-Test': name, + 'Content-Type': 'text/test', + 'Content-Type-Timestamp': t3_type.internal} + rx_objs[name][0].write_metadata(metadata_2b) + + # o4 on tx with one meta file having latest content-type, rx has + # .data and two .meta having latest content-type so nothing to sync + # i.e. o4 is the reverse of o3 scenario + name = 'o4' + t4 = self.ts_iter.next() + tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t4) + t4_type = self.ts_iter.next() + t4_meta = self.ts_iter.next() + metadata_2b = {'X-Timestamp': t4_meta.internal, + 'X-Object-Meta-Test': name, + 'Content-Type': 'text/test', + 'Content-Type-Timestamp': t4_type.internal} + tx_objs[name][0].write_metadata(metadata_2b) + rx_objs[name] = self._create_ondisk_files(rx_df_mgr, name, policy, t4) + metadata_1 = {'X-Timestamp': t4_type.internal, + 'Content-Type': 'text/test', + 'Content-Type-Timestamp': t4_type.internal} + rx_objs[name][0].write_metadata(metadata_1) + metadata_2 = {'X-Timestamp': t4_meta.internal, + 'X-Object-Meta-Test': name} + rx_objs[name][0].write_metadata(metadata_2) + + # o5 on tx with one meta file having latest content-type, rx has + # .data and no .meta + name = 'o5' + t5 = self.ts_iter.next() + tx_objs[name] = self._create_ondisk_files(tx_df_mgr, name, policy, t5) + t5_type = self.ts_iter.next() + t5_meta = self.ts_iter.next() + metadata = {'X-Timestamp': t5_meta.internal, + 'X-Object-Meta-Test': name, + 'Content-Type': 'text/test', + 'Content-Type-Timestamp': t5_type.internal} + tx_objs[name][0].write_metadata(metadata) + rx_objs[name] = self._create_ondisk_files(rx_df_mgr, name, policy, t5) + expected_subreqs['POST'].append(name) + + suffixes = set() + for diskfiles in tx_objs.values(): + for df in diskfiles: + suffixes.add(os.path.basename(os.path.dirname(df._datadir))) + + # create ssync sender instance... + job = {'device': self.device, + 'partition': self.partition, + 'policy': policy} + node = dict(self.rx_node) + node.update({'index': rx_node_index}) + sender = ssync_sender.Sender(self.daemon, node, job, suffixes) + # wrap connection from tx to rx to capture ssync messages... + sender.connect, trace = self.make_connect_wrapper(sender) + + # run the sync protocol... + success, in_sync_objs = sender() + + self.assertEqual(5, len(in_sync_objs), trace['messages']) + self.assertTrue(success) + + # verify protocol + results = self._analyze_trace(trace) + self.assertEqual(5, len(results['tx_missing'])) + self.assertEqual(3, len(results['rx_missing'])) + for subreq in results.get('tx_updates'): + obj = subreq['path'].split('/')[3] + method = subreq['method'] + self.assertTrue(obj in expected_subreqs[method], + 'Unexpected %s subreq for object %s, expected %s' + % (method, obj, expected_subreqs[method])) + expected_subreqs[method].remove(obj) + if method == 'PUT': + expected_body = '%s___None' % subreq['path'] + self.assertEqual(expected_body, subreq['body']) + # verify all expected subreqs consumed + for _method, expected in expected_subreqs.items(): + self.assertFalse(expected, + 'Expected subreqs not seen for %s for objects %s' + % (_method, expected)) + self.assertFalse(results['rx_updates']) + + # verify on disk files... + self._verify_ondisk_files(tx_objs, policy) + for oname, rx_obj in rx_objs.items(): + df = rx_obj[0].open() + metadata = df.get_metadata() + self.assertEqual(metadata['X-Object-Meta-Test'], oname) + self.assertEqual(metadata['Content-Type'], 'text/test') + # verify that tx and rx both generate the same suffix hashes... + tx_hashes = tx_df_mgr.get_hashes( + self.device, self.partition, suffixes, policy) + rx_hashes = rx_df_mgr.get_hashes( + self.device, self.partition, suffixes, policy) + self.assertEqual(tx_hashes, rx_hashes) if __name__ == '__main__': unittest.main() diff --git a/test/unit/obj/test_ssync_receiver.py b/test/unit/obj/test_ssync_receiver.py index cb92608a85..037828741a 100644 --- a/test/unit/obj/test_ssync_receiver.py +++ b/test/unit/obj/test_ssync_receiver.py @@ -1927,27 +1927,57 @@ class TestModuleMethods(unittest.TestCase): ts_iter = make_timestamp_iter() t_data = next(ts_iter) t_meta = next(ts_iter) + t_ctype = next(ts_iter) d_meta_data = t_meta.raw - t_data.raw + d_ctype_data = t_ctype.raw - t_data.raw # legacy single timestamp string msg = '%s %s' % (object_hash, t_data.internal) expected = dict(object_hash=object_hash, ts_meta=t_data, - ts_data=t_data) + ts_data=t_data, + ts_ctype=t_data) self.assertEqual(expected, ssync_receiver.decode_missing(msg)) # hex meta delta encoded as extra message part msg = '%s %s m:%x' % (object_hash, t_data.internal, d_meta_data) expected = dict(object_hash=object_hash, ts_data=t_data, - ts_meta=t_meta) + ts_meta=t_meta, + ts_ctype=t_data) self.assertEqual(expected, ssync_receiver.decode_missing(msg)) + # hex content type delta encoded in extra message part + msg = '%s %s t:%x,m:%x' % (object_hash, t_data.internal, + d_ctype_data, d_meta_data) + expected = dict(object_hash=object_hash, + ts_data=t_data, + ts_meta=t_meta, + ts_ctype=t_ctype) + self.assertEqual( + expected, ssync_receiver.decode_missing(msg)) + + # order of subparts does not matter + msg = '%s %s m:%x,t:%x' % (object_hash, t_data.internal, + d_meta_data, d_ctype_data) + self.assertEqual( + expected, ssync_receiver.decode_missing(msg)) + + # hex content type delta may be zero + msg = '%s %s t:0,m:%x' % (object_hash, t_data.internal, d_meta_data) + expected = dict(object_hash=object_hash, + ts_data=t_data, + ts_meta=t_meta, + ts_ctype=t_data) + self.assertEqual( + expected, ssync_receiver.decode_missing(msg)) + # unexpected zero delta is tolerated msg = '%s %s m:0' % (object_hash, t_data.internal) expected = dict(object_hash=object_hash, ts_meta=t_data, - ts_data=t_data) + ts_data=t_data, + ts_ctype=t_data) self.assertEqual(expected, ssync_receiver.decode_missing(msg)) # unexpected subparts in timestamp delta part are tolerated @@ -1956,7 +1986,8 @@ class TestModuleMethods(unittest.TestCase): d_meta_data) expected = dict(object_hash=object_hash, ts_meta=t_meta, - ts_data=t_data) + ts_data=t_data, + ts_ctype=t_data) self.assertEqual( expected, ssync_receiver.decode_missing(msg)) @@ -1966,7 +1997,8 @@ class TestModuleMethods(unittest.TestCase): d_meta_data) expected = dict(object_hash=object_hash, ts_meta=t_meta, - ts_data=t_data) + ts_data=t_data, + ts_ctype=t_data) self.assertEqual(expected, ssync_receiver.decode_missing(msg)) def test_encode_wanted(self): diff --git a/test/unit/obj/test_ssync_sender.py b/test/unit/obj/test_ssync_sender.py index b7286527fd..40f7e25527 100644 --- a/test/unit/obj/test_ssync_sender.py +++ b/test/unit/obj/test_ssync_sender.py @@ -763,12 +763,14 @@ class TestSender(BaseTest): '/srv/node/dev/objects/9/def/' '9d41d8cd98f00b204e9800998ecf0def', '9d41d8cd98f00b204e9800998ecf0def', - {'ts_data': Timestamp(1380144472.22222)}) + {'ts_data': Timestamp(1380144472.22222), + 'ts_meta': Timestamp(1380144473.22222)}) yield ( '/srv/node/dev/objects/9/def/' '9d41d8cd98f00b204e9800998ecf1def', '9d41d8cd98f00b204e9800998ecf1def', {'ts_data': Timestamp(1380144474.44444), + 'ts_ctype': Timestamp(1380144474.44448), 'ts_meta': Timestamp(1380144475.44444)}) else: raise Exception( @@ -792,18 +794,21 @@ class TestSender(BaseTest): ''.join(self.sender.connection.sent), '17\r\n:MISSING_CHECK: START\r\n\r\n' '33\r\n9d41d8cd98f00b204e9800998ecf0abc 1380144470.00000\r\n\r\n' - '33\r\n9d41d8cd98f00b204e9800998ecf0def 1380144472.22222\r\n\r\n' - '3b\r\n9d41d8cd98f00b204e9800998ecf1def 1380144474.44444 ' + '3b\r\n9d41d8cd98f00b204e9800998ecf0def 1380144472.22222 ' 'm:186a0\r\n\r\n' + '3f\r\n9d41d8cd98f00b204e9800998ecf1def 1380144474.44444 ' + 'm:186a0,t:4\r\n\r\n' '15\r\n:MISSING_CHECK: END\r\n\r\n') self.assertEqual(self.sender.send_map, {}) candidates = [('9d41d8cd98f00b204e9800998ecf0abc', dict(ts_data=Timestamp(1380144470.00000))), ('9d41d8cd98f00b204e9800998ecf0def', - dict(ts_data=Timestamp(1380144472.22222))), + dict(ts_data=Timestamp(1380144472.22222), + ts_meta=Timestamp(1380144473.22222))), ('9d41d8cd98f00b204e9800998ecf1def', dict(ts_data=Timestamp(1380144474.44444), - ts_meta=Timestamp(1380144475.44444)))] + ts_meta=Timestamp(1380144475.44444), + ts_ctype=Timestamp(1380144474.44448)))] self.assertEqual(self.sender.available_map, dict(candidates)) def test_missing_check_far_end_disconnect(self): @@ -1545,8 +1550,10 @@ class TestModuleMethods(unittest.TestCase): object_hash = '9d41d8cd98f00b204e9800998ecf0abc' ts_iter = make_timestamp_iter() t_data = next(ts_iter) + t_type = next(ts_iter) t_meta = next(ts_iter) d_meta_data = t_meta.raw - t_data.raw + d_type_data = t_type.raw - t_data.raw # equal data and meta timestamps -> legacy single timestamp string expected = '%s %s' % (object_hash, t_data.internal) @@ -1560,9 +1567,36 @@ class TestModuleMethods(unittest.TestCase): expected, ssync_sender.encode_missing(object_hash, t_data, ts_meta=t_meta)) + # newer meta timestamp -> hex data delta encoded as extra message part + # content type timestamp equals data timestamp -> no delta + expected = '%s %s m:%x' % (object_hash, t_data.internal, d_meta_data) + self.assertEqual( + expected, + ssync_sender.encode_missing(object_hash, t_data, t_meta, t_data)) + + # content type timestamp newer data timestamp -> delta encoded + expected = ('%s %s m:%x,t:%x' + % (object_hash, t_data.internal, d_meta_data, d_type_data)) + self.assertEqual( + expected, + ssync_sender.encode_missing(object_hash, t_data, t_meta, t_type)) + + # content type timestamp equal to meta timestamp -> delta encoded + expected = ('%s %s m:%x,t:%x' + % (object_hash, t_data.internal, d_meta_data, d_type_data)) + self.assertEqual( + expected, + ssync_sender.encode_missing(object_hash, t_data, t_meta, t_type)) + # test encode and decode functions invert expected = {'object_hash': object_hash, 'ts_meta': t_meta, - 'ts_data': t_data} + 'ts_data': t_data, 'ts_ctype': t_type} + msg = ssync_sender.encode_missing(**expected) + actual = ssync_receiver.decode_missing(msg) + self.assertEqual(expected, actual) + + expected = {'object_hash': object_hash, 'ts_meta': t_meta, + 'ts_data': t_meta, 'ts_ctype': t_meta} msg = ssync_sender.encode_missing(**expected) actual = ssync_receiver.decode_missing(msg) self.assertEqual(expected, actual) diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 961c7fd3b2..551b51af22 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -3210,7 +3210,8 @@ class TestObjectController(unittest.TestCase): backend_requests.append((method, path, headers)) req = Request.blank('/v1/a/c/o', {}, method='POST', - headers={'X-Object-Meta-Color': 'Blue'}) + headers={'X-Object-Meta-Color': 'Blue', + 'Content-Type': 'text/plain'}) # we want the container_info response to says a policy index of 1 resp_headers = {'X-Backend-Storage-Policy-Index': 1} @@ -3271,6 +3272,7 @@ class TestObjectController(unittest.TestCase): backend_requests = [] req = Request.blank('/v1/a/c/o', {}, method='POST', headers={'X-Object-Meta-Color': 'Blue', + 'Content-Type': 'text/plain', 'X-Backend-Storage-Policy-Index': 0}) with mocked_http_conn( 200, 200, 202, 202, 202,