From fa89064933fefa33702520b40734c11f08b2c569 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Tue, 17 Mar 2015 08:32:57 +0000 Subject: [PATCH] Per-policy DiskFile classes Adds specific disk file classes for EC policy types. The new ECDiskFile and ECDiskFileWriter classes are used by the ECDiskFileManager. ECDiskFileManager is registered with the DiskFileRouter for use with EC_POLICY type policies. Refactors diskfile tests into BaseDiskFileMixin and BaseDiskFileManagerMixin classes which are then extended in subclasses for the legacy replication-type DiskFile* and ECDiskFile* classes. Refactor to prefer use of a policy instance reference over a policy_index int to refer to a policy. Add additional verification to DiskFileManager.get_dev_path to validate the device root with common.constraints.check_dir, even when mount_check is disabled for use in on a virtual swift-all-in-one. Co-Authored-By: Thiago da Silva Co-Authored-By: John Dickinson Co-Authored-By: Clay Gerrard Co-Authored-By: Tushar Gohad Co-Authored-By: Paul Luse Co-Authored-By: Samuel Merritt Co-Authored-By: Christian Schwede Co-Authored-By: Yuan Zhou Change-Id: I22f915160dc67a9e18f4738c1ddf068344e8ad5d --- swift/cli/info.py | 7 +- swift/common/constraints.py | 13 + swift/obj/diskfile.py | 877 +++++- swift/obj/mem_diskfile.py | 16 + swift/obj/mem_server.py | 51 - swift/obj/replicator.py | 55 +- swift/obj/server.py | 11 +- swift/obj/ssync_sender.py | 2 +- swift/obj/updater.py | 40 +- test/unit/common/test_constraints.py | 5 + test/unit/obj/test_auditor.py | 61 +- test/unit/obj/test_diskfile.py | 4174 ++++++++++++++++++++------ test/unit/obj/test_replicator.py | 90 +- test/unit/obj/test_server.py | 117 +- test/unit/obj/test_ssync_sender.py | 232 +- test/unit/obj/test_updater.py | 24 +- test/unit/proxy/test_mem_server.py | 17 +- test/unit/proxy/test_server.py | 35 +- test/unit/proxy/test_sysmeta.py | 2 +- 19 files changed, 4480 insertions(+), 1349 deletions(-) diff --git a/swift/cli/info.py b/swift/cli/info.py index 142b103f47..a8cfabd17d 100644 --- a/swift/cli/info.py +++ b/swift/cli/info.py @@ -24,7 +24,7 @@ from swift.common.request_helpers import is_sys_meta, is_user_meta, \ from swift.account.backend import AccountBroker, DATADIR as ABDATADIR from swift.container.backend import ContainerBroker, DATADIR as CBDATADIR from swift.obj.diskfile import get_data_dir, read_metadata, DATADIR_BASE, \ - extract_policy_index + extract_policy from swift.common.storage_policy import POLICIES @@ -341,10 +341,7 @@ def print_obj(datafile, check_etag=True, swift_dir='/etc/swift', datadir = DATADIR_BASE # try to extract policy index from datafile disk path - try: - policy_index = extract_policy_index(datafile) - except ValueError: - pass + policy_index = int(extract_policy(datafile) or POLICIES.legacy) try: if policy_index: diff --git a/swift/common/constraints.py b/swift/common/constraints.py index d4458ddf84..8e3ba53b00 100644 --- a/swift/common/constraints.py +++ b/swift/common/constraints.py @@ -204,6 +204,19 @@ def check_object_creation(req, object_name): return check_metadata(req, 'object') +def check_dir(root, drive): + """ + Verify that the path to the device is a directory and is a lesser + constraint that is enforced when a full mount_check isn't possible + with, for instance, a VM using loopback or partitions. + + :param root: base path where the dir is + :param drive: drive name to be checked + :returns: True if it is a valid directoy, False otherwise + """ + return os.path.isdir(os.path.join(root, drive)) + + def check_mount(root, drive): """ Verify that the path to the device is a mount point and mounted. This diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 06073ef91d..654465ee75 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -40,7 +40,7 @@ import hashlib import logging import traceback import xattr -from os.path import basename, dirname, exists, getmtime, join +from os.path import basename, dirname, exists, getmtime, join, splitext from random import shuffle from tempfile import mkstemp from contextlib import contextmanager @@ -50,7 +50,7 @@ from eventlet import Timeout from eventlet.hubs import trampoline from swift import gettext_ as _ -from swift.common.constraints import check_mount +from swift.common.constraints import check_mount, check_dir from swift.common.request_helpers import is_sys_meta from swift.common.utils import mkdirs, Timestamp, \ storage_directory, hash_path, renamer, fallocate, fsync, \ @@ -63,7 +63,9 @@ from swift.common.exceptions import DiskFileQuarantined, DiskFileNotExist, \ DiskFileDeleted, DiskFileError, DiskFileNotOpen, PathNotDir, \ ReplicationLockTimeout, DiskFileExpired, DiskFileXattrNotSupported from swift.common.swob import multi_range_iterator -from swift.common.storage_policy import get_policy_string, split_policy_string +from swift.common.storage_policy import ( + get_policy_string, split_policy_string, PolicyError, POLICIES, + REPL_POLICY, EC_POLICY) from functools import partial @@ -154,10 +156,10 @@ def write_metadata(fd, metadata, xattr_size=65536): raise -def extract_policy_index(obj_path): +def extract_policy(obj_path): """ - Extracts the policy index for an object (based on the name of the objects - directory) given the device-relative path to the object. Returns 0 in + Extracts the policy for an object (based on the name of the objects + directory) given the device-relative path to the object. Returns None in the event that the path is malformed in some way. The device-relative path is everything after the mount point; for example: @@ -170,15 +172,18 @@ def extract_policy_index(obj_path): objects-5/179/485dc017205a81df3af616d917c90179/1401811134.873649.data :param obj_path: device-relative path of an object - :returns: storage policy index + :returns: a :class:`~swift.common.storage_policy.BaseStoragePolicy` or None """ - policy_idx = 0 try: obj_portion = obj_path[obj_path.index(DATADIR_BASE):] obj_dirname = obj_portion[:obj_portion.index('/')] except Exception: - return policy_idx - return int(split_policy_string(obj_dirname)[1]) + return None + try: + base, policy = split_policy_string(obj_dirname) + except PolicyError: + return None + return policy def quarantine_renamer(device_path, corrupted_file_path): @@ -193,9 +198,13 @@ def quarantine_renamer(device_path, corrupted_file_path): :raises OSError: re-raises non errno.EEXIST / errno.ENOTEMPTY exceptions from rename """ + policy = extract_policy(corrupted_file_path) + if policy is None: + # TODO: support a quarantine-unknown location + policy = POLICIES.legacy from_dir = dirname(corrupted_file_path) to_dir = join(device_path, 'quarantined', - get_data_dir(extract_policy_index(corrupted_file_path)), + get_data_dir(policy), basename(from_dir)) invalidate_hash(dirname(from_dir)) try: @@ -425,8 +434,9 @@ class AuditLocation(object): stringify to a filesystem path so the auditor's logs look okay. """ - def __init__(self, path, device, partition): - self.path, self.device, self.partition = path, device, partition + def __init__(self, path, device, partition, policy): + self.path, self.device, self.partition, self.policy = ( + path, device, partition, policy) def __str__(self): return str(self.path) @@ -466,16 +476,17 @@ def object_audit_location_generator(devices, mount_check=True, logger=None, _('Skipping %s as it is not mounted'), device) continue # loop through object dirs for all policies - for dir in [dir for dir in os.listdir(os.path.join(devices, device)) - if dir.startswith(DATADIR_BASE)]: - datadir_path = os.path.join(devices, device, dir) - # warn if the object dir doesn't match with a policy + for dir_ in os.listdir(os.path.join(devices, device)): + if not dir_.startswith(DATADIR_BASE): + continue try: - base, policy = split_policy_string(dir) - except ValueError: + base, policy = split_policy_string(dir_) + except PolicyError as e: if logger: - logger.warn(_('Directory %s does not map to a ' - 'valid policy') % dir) + logger.warn(_('Directory %r does not map ' + 'to a valid policy (%s)') % (dir_, e)) + continue + datadir_path = os.path.join(devices, device, dir_) partitions = listdir(datadir_path) for partition in partitions: part_path = os.path.join(datadir_path, partition) @@ -495,9 +506,45 @@ def object_audit_location_generator(devices, mount_check=True, logger=None, continue for hsh in hashes: hsh_path = os.path.join(suff_path, hsh) - yield AuditLocation(hsh_path, device, partition) + yield AuditLocation(hsh_path, device, partition, + policy) +def strip_self(f): + """ + Wrapper to attach module level functions to base class. + """ + def wrapper(self, *args, **kwargs): + return f(*args, **kwargs) + return wrapper + + +class DiskFileRouter(object): + + policy_type_to_manager_cls = {} + + @classmethod + def register(cls, policy_type): + """ + Decorator for Storage Policy implementations to register + their DiskFile implementation. + """ + def register_wrapper(diskfile_cls): + cls.policy_type_to_manager_cls[policy_type] = diskfile_cls + return diskfile_cls + return register_wrapper + + def __init__(self, *args, **kwargs): + self.policy_to_manager = {} + for policy in POLICIES: + manager_cls = self.policy_type_to_manager_cls[policy.policy_type] + self.policy_to_manager[policy] = manager_cls(*args, **kwargs) + + def __getitem__(self, policy): + return self.policy_to_manager[policy] + + +@DiskFileRouter.register(REPL_POLICY) class DiskFileManager(object): """ Management class for devices, providing common place for shared parameters @@ -520,6 +567,16 @@ class DiskFileManager(object): :param conf: caller provided configuration object :param logger: caller provided logger """ + + diskfile_cls = None # DiskFile will be set after that class is defined + + # module level functions dropped to implementation specific + hash_cleanup_listdir = strip_self(hash_cleanup_listdir) + _get_hashes = strip_self(get_hashes) + invalidate_hash = strip_self(invalidate_hash) + get_ondisk_files = strip_self(get_ondisk_files) + quarantine_renamer = strip_self(quarantine_renamer) + def __init__(self, conf, logger): self.logger = logger self.devices = conf.get('devices', '/srv/node') @@ -576,21 +633,25 @@ class DiskFileManager(object): def get_dev_path(self, device, mount_check=None): """ - Return the path to a device, checking to see that it is a proper mount - point based on a configuration parameter. + Return the path to a device, first checking to see if either it + is a proper mount point, or at least a directory depending on + the mount_check configuration option. :param device: name of target device :param mount_check: whether or not to check mountedness of device. Defaults to bool(self.mount_check). :returns: full path to the device, None if the path to the device is - not a proper mount point. + not a proper mount point or directory. """ - should_check = self.mount_check if mount_check is None else mount_check - if should_check and not check_mount(self.devices, device): - dev_path = None - else: - dev_path = os.path.join(self.devices, device) - return dev_path + # we'll do some kind of check unless explicitly forbidden + if mount_check is not False: + if mount_check or self.mount_check: + check = check_mount + else: + check = check_dir + if not check(self.devices, device): + return None + return os.path.join(self.devices, device) @contextmanager def replication_lock(self, device): @@ -612,28 +673,27 @@ class DiskFileManager(object): yield True def pickle_async_update(self, device, account, container, obj, data, - timestamp, policy_idx): + timestamp, policy): device_path = self.construct_dev_path(device) - async_dir = os.path.join(device_path, get_async_dir(policy_idx)) + async_dir = os.path.join(device_path, get_async_dir(policy)) ohash = hash_path(account, container, obj) self.threadpools[device].run_in_thread( write_pickle, data, os.path.join(async_dir, ohash[-3:], ohash + '-' + Timestamp(timestamp).internal), - os.path.join(device_path, get_tmp_dir(policy_idx))) + os.path.join(device_path, get_tmp_dir(policy))) self.logger.increment('async_pendings') def get_diskfile(self, device, partition, account, container, obj, - policy_idx=0, **kwargs): + policy, **kwargs): dev_path = self.get_dev_path(device) if not dev_path: raise DiskFileDeviceUnavailable() - return DiskFile(self, dev_path, self.threadpools[device], - partition, account, container, obj, - policy_idx=policy_idx, - use_splice=self.use_splice, pipe_size=self.pipe_size, - **kwargs) + return self.diskfile_cls(self, dev_path, self.threadpools[device], + partition, account, container, obj, + policy=policy, use_splice=self.use_splice, + pipe_size=self.pipe_size, **kwargs) def object_audit_location_generator(self, device_dirs=None): return object_audit_location_generator(self.devices, self.mount_check, @@ -641,12 +701,12 @@ class DiskFileManager(object): def get_diskfile_from_audit_location(self, audit_location): dev_path = self.get_dev_path(audit_location.device, mount_check=False) - return DiskFile.from_hash_dir( + return self.diskfile_cls.from_hash_dir( self, audit_location.path, dev_path, - audit_location.partition) + audit_location.partition, policy=audit_location.policy) def get_diskfile_from_hash(self, device, partition, object_hash, - policy_idx, **kwargs): + policy, **kwargs): """ Returns a DiskFile instance for an object at the given object_hash. Just in case someone thinks of refactoring, be @@ -660,13 +720,14 @@ class DiskFileManager(object): if not dev_path: raise DiskFileDeviceUnavailable() object_path = os.path.join( - dev_path, get_data_dir(policy_idx), partition, object_hash[-3:], + dev_path, get_data_dir(policy), str(partition), object_hash[-3:], object_hash) try: - filenames = hash_cleanup_listdir(object_path, self.reclaim_age) + filenames = self.hash_cleanup_listdir(object_path, + self.reclaim_age) except OSError as err: if err.errno == errno.ENOTDIR: - quar_path = quarantine_renamer(dev_path, object_path) + quar_path = self.quarantine_renamer(dev_path, object_path) logging.exception( _('Quarantined %(object_path)s to %(quar_path)s because ' 'it is not a directory'), {'object_path': object_path, @@ -686,21 +747,20 @@ class DiskFileManager(object): metadata.get('name', ''), 3, 3, True) except ValueError: raise DiskFileNotExist() - return DiskFile(self, dev_path, self.threadpools[device], - partition, account, container, obj, - policy_idx=policy_idx, **kwargs) + return self.diskfile_cls(self, dev_path, self.threadpools[device], + partition, account, container, obj, + policy=policy, **kwargs) - def get_hashes(self, device, partition, suffix, policy_idx): + def get_hashes(self, device, partition, suffixes, policy): dev_path = self.get_dev_path(device) if not dev_path: raise DiskFileDeviceUnavailable() - partition_path = os.path.join(dev_path, get_data_dir(policy_idx), + partition_path = os.path.join(dev_path, get_data_dir(policy), partition) if not os.path.exists(partition_path): mkdirs(partition_path) - suffixes = suffix.split('-') if suffix else [] _junk, hashes = self.threadpools[device].force_run_in_thread( - get_hashes, partition_path, recalculate=suffixes) + self._get_hashes, partition_path, recalculate=suffixes) return hashes def _listdir(self, path): @@ -713,7 +773,7 @@ class DiskFileManager(object): path, err) return [] - def yield_suffixes(self, device, partition, policy_idx): + def yield_suffixes(self, device, partition, policy): """ Yields tuples of (full_path, suffix_only) for suffixes stored on the given device and partition. @@ -721,7 +781,7 @@ class DiskFileManager(object): dev_path = self.get_dev_path(device) if not dev_path: raise DiskFileDeviceUnavailable() - partition_path = os.path.join(dev_path, get_data_dir(policy_idx), + partition_path = os.path.join(dev_path, get_data_dir(policy), partition) for suffix in self._listdir(partition_path): if len(suffix) != 3: @@ -732,7 +792,7 @@ class DiskFileManager(object): continue yield (os.path.join(partition_path, suffix), suffix) - def yield_hashes(self, device, partition, policy_idx, suffixes=None): + def yield_hashes(self, device, partition, policy, suffixes=None, **kwargs): """ Yields tuples of (full_path, hash_only, timestamp) for object information stored for the given device, partition, and @@ -745,17 +805,18 @@ class DiskFileManager(object): if not dev_path: raise DiskFileDeviceUnavailable() if suffixes is None: - suffixes = self.yield_suffixes(device, partition, policy_idx) + suffixes = self.yield_suffixes(device, partition, policy) else: - partition_path = os.path.join(dev_path, get_data_dir(policy_idx), - partition) + partition_path = os.path.join(dev_path, + get_data_dir(policy), + str(partition)) suffixes = ( (os.path.join(partition_path, suffix), suffix) for suffix in suffixes) for suffix_path, suffix in suffixes: for object_hash in self._listdir(suffix_path): object_path = os.path.join(suffix_path, object_hash) - for name in hash_cleanup_listdir( + for name in self.hash_cleanup_listdir( object_path, self.reclaim_age): ts, ext = name.rsplit('.', 1) yield (object_path, object_hash, ts) @@ -787,8 +848,11 @@ class DiskFileWriter(object): :param tmppath: full path name of the opened file descriptor :param bytes_per_sync: number bytes written between sync calls :param threadpool: internal thread pool to use for disk operations + :param diskfile: the diskfile creating this DiskFileWriter instance """ - def __init__(self, name, datadir, fd, tmppath, bytes_per_sync, threadpool): + + def __init__(self, name, datadir, fd, tmppath, bytes_per_sync, threadpool, + diskfile): # Parameter tracking self._name = name self._datadir = datadir @@ -796,6 +860,7 @@ class DiskFileWriter(object): self._tmppath = tmppath self._bytes_per_sync = bytes_per_sync self._threadpool = threadpool + self._diskfile = diskfile # Internal attributes self._upload_size = 0 @@ -803,6 +868,10 @@ class DiskFileWriter(object): self._extension = '.data' self._put_succeeded = False + @property + def manager(self): + return self._diskfile.manager + @property def put_succeeded(self): return self._put_succeeded @@ -848,7 +917,7 @@ class DiskFileWriter(object): # drop_cache() after fsync() to avoid redundant work (pages all # clean). drop_buffer_cache(self._fd, 0, self._upload_size) - invalidate_hash(dirname(self._datadir)) + self.manager.invalidate_hash(dirname(self._datadir)) # After the rename completes, this object will be available for other # requests to reference. renamer(self._tmppath, target_path) @@ -857,7 +926,7 @@ class DiskFileWriter(object): # succeeded, the tempfile would no longer exist at its original path. self._put_succeeded = True try: - hash_cleanup_listdir(self._datadir) + self.manager.hash_cleanup_listdir(self._datadir) except OSError: logging.exception(_('Problem cleaning up %s'), self._datadir) @@ -880,6 +949,16 @@ class DiskFileWriter(object): self._threadpool.force_run_in_thread( self._finalize_put, metadata, target_path) + def commit(self, timestamp): + """ + Perform any operations necessary to mark the object as durable. For + replication policy type this is a no-op. + + :param timestamp: object put timestamp, an instance of + :class:`~swift.common.utils.Timestamp` + """ + pass + class DiskFileReader(object): """ @@ -910,17 +989,20 @@ class DiskFileReader(object): :param quarantine_hook: 1-arg callable called w/reason when quarantined :param use_splice: if true, use zero-copy splice() to send data :param pipe_size: size of pipe buffer used in zero-copy operations + :param diskfile: the diskfile creating this DiskFileReader instance :param keep_cache: should resulting reads be kept in the buffer cache """ def __init__(self, fp, data_file, obj_size, etag, threadpool, disk_chunk_size, keep_cache_size, device_path, logger, - quarantine_hook, use_splice, pipe_size, keep_cache=False): + quarantine_hook, use_splice, pipe_size, diskfile, + keep_cache=False): # Parameter tracking self._fp = fp self._data_file = data_file self._obj_size = obj_size self._etag = etag self._threadpool = threadpool + self._diskfile = diskfile self._disk_chunk_size = disk_chunk_size self._device_path = device_path self._logger = logger @@ -943,6 +1025,10 @@ class DiskFileReader(object): self._suppress_file_closing = False self._quarantined_dir = None + @property + def manager(self): + return self._diskfile.manager + def __iter__(self): """Returns an iterator over the data file.""" try: @@ -1123,7 +1209,8 @@ class DiskFileReader(object): def _quarantine(self, msg): self._quarantined_dir = self._threadpool.run_in_thread( - quarantine_renamer, self._device_path, self._data_file) + self.manager.quarantine_renamer, self._device_path, + self._data_file) self._logger.warn("Quarantined object %s: %s" % ( self._data_file, msg)) self._logger.increment('quarantines') @@ -1189,15 +1276,18 @@ class DiskFile(object): :param container: container name for the object :param obj: object name for the object :param _datadir: override the full datadir otherwise constructed here - :param policy_idx: used to get the data dir when constructing it here + :param policy: the StoragePolicy instance :param use_splice: if true, use zero-copy splice() to send data :param pipe_size: size of pipe buffer used in zero-copy operations """ + reader_cls = DiskFileReader + writer_cls = DiskFileWriter + def __init__(self, mgr, device_path, threadpool, partition, account=None, container=None, obj=None, _datadir=None, - policy_idx=0, use_splice=False, pipe_size=None): - self._mgr = mgr + policy=None, use_splice=False, pipe_size=None, **kwargs): + self._manager = mgr self._device_path = device_path self._threadpool = threadpool or ThreadPool(nthreads=0) self._logger = mgr.logger @@ -1205,6 +1295,7 @@ class DiskFile(object): self._bytes_per_sync = mgr.bytes_per_sync self._use_splice = use_splice self._pipe_size = pipe_size + self.policy = policy if account and container and obj: self._name = '/' + '/'.join((account, container, obj)) self._account = account @@ -1212,7 +1303,7 @@ class DiskFile(object): self._obj = obj name_hash = hash_path(account, container, obj) self._datadir = join( - device_path, storage_directory(get_data_dir(policy_idx), + device_path, storage_directory(get_data_dir(policy), partition, name_hash)) else: # gets populated when we read the metadata @@ -1221,7 +1312,7 @@ class DiskFile(object): self._container = None self._obj = None self._datadir = None - self._tmpdir = join(device_path, get_tmp_dir(policy_idx)) + self._tmpdir = join(device_path, get_tmp_dir(policy)) self._metadata = None self._data_file = None self._fp = None @@ -1232,9 +1323,13 @@ class DiskFile(object): else: name_hash = hash_path(account, container, obj) self._datadir = join( - device_path, storage_directory(get_data_dir(policy_idx), + device_path, storage_directory(get_data_dir(policy), partition, name_hash)) + @property + def manager(self): + return self._manager + @property def account(self): return self._account @@ -1260,8 +1355,9 @@ class DiskFile(object): return Timestamp(self._metadata.get('X-Timestamp')) @classmethod - def from_hash_dir(cls, mgr, hash_dir_path, device_path, partition): - return cls(mgr, device_path, None, partition, _datadir=hash_dir_path) + def from_hash_dir(cls, mgr, hash_dir_path, device_path, partition, policy): + return cls(mgr, device_path, None, partition, _datadir=hash_dir_path, + policy=policy) def open(self): """ @@ -1300,7 +1396,7 @@ class DiskFile(object): .. note:: - An implemenation shall raise `DiskFileNotOpen` when has not + An implementation shall raise `DiskFileNotOpen` when has not previously invoked the :func:`swift.obj.diskfile.DiskFile.open` method. """ @@ -1332,7 +1428,7 @@ class DiskFile(object): :returns: DiskFileQuarantined exception object """ self._quarantined_dir = self._threadpool.run_in_thread( - quarantine_renamer, self._device_path, data_file) + self.manager.quarantine_renamer, self._device_path, data_file) self._logger.warn("Quarantined object %s: %s" % ( data_file, msg)) self._logger.increment('quarantines') @@ -1377,7 +1473,7 @@ class DiskFile(object): # The data directory does not exist, so the object cannot exist. fileset = (None, None, None) else: - fileset = get_ondisk_files(files, self._datadir) + fileset = self.manager.get_ondisk_files(files, self._datadir) return fileset def _construct_exception_from_ts_file(self, ts_file): @@ -1569,12 +1665,12 @@ class DiskFile(object): Not needed by the REST layer. :returns: a :class:`swift.obj.diskfile.DiskFileReader` object """ - dr = DiskFileReader( + dr = self.reader_cls( self._fp, self._data_file, int(self._metadata['Content-Length']), self._metadata['ETag'], self._threadpool, self._disk_chunk_size, - self._mgr.keep_cache_size, self._device_path, self._logger, + self._manager.keep_cache_size, self._device_path, self._logger, use_splice=self._use_splice, quarantine_hook=_quarantine_hook, - pipe_size=self._pipe_size, keep_cache=keep_cache) + pipe_size=self._pipe_size, diskfile=self, keep_cache=keep_cache) # At this point the reader object is now responsible for closing # the file pointer. self._fp = None @@ -1614,8 +1710,10 @@ class DiskFile(object): if err.errno in (errno.ENOSPC, errno.EDQUOT): raise DiskFileNoSpace() raise - dfw = DiskFileWriter(self._name, self._datadir, fd, tmppath, - self._bytes_per_sync, self._threadpool) + dfw = self.writer_cls(self._name, self._datadir, fd, tmppath, + bytes_per_sync=self._bytes_per_sync, + threadpool=self._threadpool, + diskfile=self) yield dfw finally: try: @@ -1664,8 +1762,619 @@ class DiskFile(object): :raises DiskFileError: this implementation will raise the same errors as the `create()` method. """ - timestamp = Timestamp(timestamp).internal - + # this is dumb, only tests send in strings + timestamp = Timestamp(timestamp) with self.create() as deleter: deleter._extension = '.ts' - deleter.put({'X-Timestamp': timestamp}) + deleter.put({'X-Timestamp': timestamp.internal}) + +# TODO: move DiskFileManager definition down here +DiskFileManager.diskfile_cls = DiskFile + + +class ECDiskFileReader(DiskFileReader): + pass + + +class ECDiskFileWriter(DiskFileWriter): + + def _finalize_durable(self, durable_file_path): + exc = msg = None + try: + with open(durable_file_path, 'w') as _fd: + fsync(_fd) + try: + self.manager.hash_cleanup_listdir(self._datadir) + except OSError: + self.manager.logger.exception( + _('Problem cleaning up %s'), self._datadir) + except OSError: + msg = (_('Problem fsyncing durable state file: %s'), + durable_file_path) + exc = DiskFileError(msg) + except IOError as io_err: + if io_err.errno in (errno.ENOSPC, errno.EDQUOT): + msg = (_("No space left on device for %s"), + durable_file_path) + exc = DiskFileNoSpace() + else: + msg = (_('Problem writing durable state file: %s'), + durable_file_path) + exc = DiskFileError(msg) + if exc: + self.manager.logger.exception(msg) + raise exc + + def commit(self, timestamp): + """ + Finalize put by writing a timestamp.durable file for the object. We + do this for EC policy because it requires a 2-phase put commit + confirmation. + + :param timestamp: object put timestamp, an instance of + :class:`~swift.common.utils.Timestamp` + """ + durable_file_path = os.path.join( + self._datadir, timestamp.internal + '.durable') + self._threadpool.force_run_in_thread( + self._finalize_durable, durable_file_path) + + def put(self, metadata): + """ + The only difference between this method and the replication policy + DiskFileWriter method is the call into manager.make_on_disk_filename + to construct the data file name. + """ + timestamp = Timestamp(metadata['X-Timestamp']) + fi = None + if self._extension == '.data': + # generally we treat the fragment index provided in metadata as + # canon, but if it's unavailable (e.g. tests) it's reasonable to + # use the frag_index provided at instantiation. Either way make + # sure that the fragment index is included in object sysmeta. + fi = metadata.setdefault('X-Object-Sysmeta-Ec-Frag-Index', + self._diskfile._frag_index) + filename = self.manager.make_on_disk_filename( + timestamp, self._extension, frag_index=fi) + metadata['name'] = self._name + target_path = join(self._datadir, filename) + + self._threadpool.force_run_in_thread( + self._finalize_put, metadata, target_path) + + +class ECDiskFile(DiskFile): + + reader_cls = ECDiskFileReader + writer_cls = ECDiskFileWriter + + def __init__(self, *args, **kwargs): + super(ECDiskFile, self).__init__(*args, **kwargs) + frag_index = kwargs.get('frag_index') + self._frag_index = None + if frag_index is not None: + self._frag_index = self.manager.validate_fragment_index(frag_index) + + def _get_ondisk_file(self): + """ + The only difference between this method and the replication policy + DiskFile method is passing in the frag_index kwarg to our manager's + get_ondisk_files method. + """ + try: + files = os.listdir(self._datadir) + except OSError as err: + if err.errno == errno.ENOTDIR: + # If there's a file here instead of a directory, quarantine + # it; something's gone wrong somewhere. + raise self._quarantine( + # hack: quarantine_renamer actually renames the directory + # enclosing the filename you give it, but here we just + # want this one file and not its parent. + os.path.join(self._datadir, "made-up-filename"), + "Expected directory, found file at %s" % self._datadir) + elif err.errno != errno.ENOENT: + raise DiskFileError( + "Error listing directory %s: %s" % (self._datadir, err)) + # The data directory does not exist, so the object cannot exist. + fileset = (None, None, None) + else: + fileset = self.manager.get_ondisk_files( + files, self._datadir, frag_index=self._frag_index) + return fileset + + def purge(self, timestamp, frag_index): + """ + Remove a tombstone file matching the specified timestamp or + datafile matching the specified timestamp and fragment index + from the object directory. + + This provides the EC reconstructor/ssync process with a way to + remove a tombstone or fragment from a handoff node after + reverting it to its primary node. + + The hash will be invalidated, and if empty or invalid the + hsh_path will be removed on next hash_cleanup_listdir. + + :param timestamp: the object timestamp, an instance of + :class:`~swift.common.utils.Timestamp` + :param frag_index: a fragment archive index, must be a whole number. + """ + for ext in ('.data', '.ts'): + purge_file = self.manager.make_on_disk_filename( + timestamp, ext=ext, frag_index=frag_index) + remove_file(os.path.join(self._datadir, purge_file)) + self.manager.invalidate_hash(dirname(self._datadir)) + + +@DiskFileRouter.register(EC_POLICY) +class ECDiskFileManager(DiskFileManager): + diskfile_cls = ECDiskFile + + def validate_fragment_index(self, frag_index): + """ + Return int representation of frag_index, or raise a DiskFileError if + frag_index is not a whole number. + """ + try: + frag_index = int(str(frag_index)) + except (ValueError, TypeError) as e: + raise DiskFileError( + 'Bad fragment index: %s: %s' % (frag_index, e)) + if frag_index < 0: + raise DiskFileError( + 'Fragment index must not be negative: %s' % frag_index) + return frag_index + + def make_on_disk_filename(self, timestamp, ext=None, frag_index=None, + *a, **kw): + """ + Returns the EC specific filename for given timestamp. + + :param timestamp: the object timestamp, an instance of + :class:`~swift.common.utils.Timestamp` + :param ext: an optional string representing a file extension to be + appended to the returned file name + :param frag_index: a fragment archive index, used with .data extension + only, must be a whole number. + :returns: a file name + :raises DiskFileError: if ext=='.data' and the kwarg frag_index is not + a whole number + """ + rv = timestamp.internal + if ext == '.data': + # for datafiles only we encode the fragment index in the filename + # to allow archives of different indexes to temporarily be stored + # on the same node in certain situations + frag_index = self.validate_fragment_index(frag_index) + rv += '#' + str(frag_index) + if ext: + rv = '%s%s' % (rv, ext) + return rv + + def parse_on_disk_filename(self, filename): + """ + Returns the timestamp extracted from a policy specific .data file name. + For EC policy the data file name includes a fragment index which must + be stripped off to retrieve the timestamp. + + :param filename: the data file name including extension + :returns: a dict, with keys for timestamp, frag_index, and ext:: + + * timestamp is a :class:`~swift.common.utils.Timestamp` + * frag_index is an int or None + * ext is a string, the file extension including the leading dot or + the empty string if the filename has no extenstion. + + :raises DiskFileError: if any part of the filename is not able to be + validated. + """ + frag_index = None + filename, ext = splitext(filename) + parts = filename.split('#', 1) + timestamp = parts[0] + if ext == '.data': + # it is an error for an EC data file to not have a valid + # fragment index + try: + frag_index = parts[1] + except IndexError: + frag_index = None + frag_index = self.validate_fragment_index(frag_index) + return { + 'timestamp': Timestamp(timestamp), + 'frag_index': frag_index, + 'ext': ext, + } + + def is_obsolete(self, filename, other_filename): + """ + Test if a given file is considered to be obsolete with respect to + another file in an object storage dir. + + Implements EC policy specific behavior when comparing files against a + .durable file. + + A simple string comparison would consider t2#1.data to be older than + t2.durable (since t2#1.data < t2.durable). By stripping off the file + extensions we get the desired behavior: t2#1 > t2 without compromising + the detection of t1#1 < t2. + + :param filename: a string representing an absolute filename + :param other_filename: a string representing an absolute filename + :returns: True if filename is considered obsolete, False otherwise. + """ + if other_filename.endswith('.durable'): + return splitext(filename)[0] < splitext(other_filename)[0] + return filename < other_filename + + def _gather_on_disk_file(self, filename, ext, context, frag_index=None, + **kwargs): + """ + Called by gather_ondisk_files() for each file in an object + datadir in reverse sorted order. If a file is considered part of a + valid on-disk file set it will be added to the context dict, keyed by + its extension. If a file is considered to be obsolete it will be added + to a list stored under the key 'obsolete' in the context dict. + + :param filename: name of file to be accepted or not + :param ext: extension part of filename + :param context: a context dict that may have been populated by previous + calls to this method + :param frag_index: if set, search for a specific fragment index .data + file, otherwise accept the first valid .data file. + :returns: True if a valid file set has been found, False otherwise + """ + + # if first file with given extension then add filename to context + # dict and return True + accept_first = lambda: context.setdefault(ext, filename) == filename + # add the filename to the list of obsolete files in context dict + discard = lambda: context.setdefault('obsolete', []).append(filename) + # set a flag in the context dict indicating that a valid fileset has + # been found + set_valid_fileset = lambda: context.setdefault('found_valid', True) + # return True if the valid fileset flag is set in the context dict + have_valid_fileset = lambda: context.get('found_valid') + + if context.get('.durable'): + # a .durable file has been found + if ext == '.data': + if self.is_obsolete(filename, context.get('.durable')): + # this and remaining data files are older than durable + discard() + set_valid_fileset() + else: + # accept the first .data file if it matches requested + # frag_index, or if no specific frag_index is requested + fi = self.parse_on_disk_filename(filename)['frag_index'] + if frag_index is None or frag_index == int(fi): + accept_first() + set_valid_fileset() + # else: keep searching for a .data file to match frag_index + context.setdefault('fragments', []).append(filename) + else: + # there can no longer be a matching .data file so mark what has + # been found so far as the valid fileset + discard() + set_valid_fileset() + elif ext == '.data': + # not yet found a .durable + if have_valid_fileset(): + # valid fileset means we must have a newer + # .ts, so discard the older .data file + discard() + else: + # .data newer than a .durable or .ts, don't discard yet + context.setdefault('fragments_without_durable', []).append( + filename) + elif ext == '.ts': + if have_valid_fileset() or not accept_first(): + # newer .data, .durable or .ts already found so discard this + discard() + if not have_valid_fileset(): + # remove any .meta that may have been previously found + context['.meta'] = None + set_valid_fileset() + elif ext in ('.meta', '.durable'): + if have_valid_fileset() or not accept_first(): + # newer .data, .durable or .ts already found so discard this + discard() + else: + # ignore unexpected files + pass + return have_valid_fileset() + + def _verify_on_disk_files(self, accepted_files, frag_index=None, **kwargs): + """ + Verify that the final combination of on disk files complies with the + diskfile contract. + + :param accepted_files: files that have been found and accepted + :param frag_index: specifies a specific fragment index .data file + :returns: True if the file combination is compliant, False otherwise + """ + if not accepted_files.get('.data'): + # We may find only a .meta, which doesn't mean the on disk + # contract is broken. So we clear it to comply with + # superclass assertions. + accepted_files['.meta'] = None + + data_file, meta_file, ts_file, durable_file = tuple( + [accepted_files.get(ext) + for ext in ('.data', '.meta', '.ts', '.durable')]) + + return ((data_file is None or durable_file is not None) + and (data_file is None and meta_file is None + and ts_file is None and durable_file is None) + or (ts_file is not None and data_file is None + and meta_file is None and durable_file is None) + or (data_file is not None and durable_file is not None + and ts_file is None) + or (durable_file is not None and meta_file is None + and ts_file is None)) + + def gather_ondisk_files(self, files, include_obsolete=False, + frag_index=None, verify=False, **kwargs): + """ + Given a simple list of files names, iterate over them to determine the + files that constitute a valid object, and optionally determine the + files that are obsolete and could be deleted. Note that some files may + fall into neither category. + + :param files: a list of file names. + :param include_obsolete: By default the iteration will stop when a + valid file set has been found. Setting this + argument to True will cause the iteration to + continue in order to find all obsolete files. + :param frag_index: if set, search for a specific fragment index .data + file, otherwise accept the first valid .data file. + :returns: a dict that may contain: valid on disk files keyed by their + filename extension; a list of obsolete files stored under the + key 'obsolete'. + """ + # This visitor pattern enables future refactoring of other disk + # manager implementations to re-use this method and override + # _gather_ondisk_file and _verify_ondisk_files to apply implementation + # specific selection and verification of on-disk files. + files.sort(reverse=True) + results = {} + for afile in files: + ts_file = results.get('.ts') + data_file = results.get('.data') + if not include_obsolete: + assert ts_file is None, "On-disk file search loop" \ + " continuing after tombstone, %s, encountered" % ts_file + assert data_file is None, "On-disk file search loop" \ + " continuing after data file, %s, encountered" % data_file + + ext = splitext(afile)[1] + if self._gather_on_disk_file( + afile, ext, results, frag_index=frag_index, **kwargs): + if not include_obsolete: + break + + if verify: + assert self._verify_on_disk_files( + results, frag_index=frag_index, **kwargs), \ + "On-disk file search algorithm contract is broken: %s" \ + % results.values() + return results + + def get_ondisk_files(self, files, datadir, **kwargs): + """ + Given a simple list of files names, determine the files to use. + + :param files: simple set of files as a python list + :param datadir: directory name files are from for convenience + :returns: a tuple of data, meta, and tombstone + """ + # maintain compatibility with 'legacy' get_ondisk_files return value + accepted_files = self.gather_ondisk_files(files, verify=True, **kwargs) + result = [(join(datadir, accepted_files.get(ext)) + if accepted_files.get(ext) else None) + for ext in ('.data', '.meta', '.ts')] + return tuple(result) + + def cleanup_ondisk_files(self, hsh_path, reclaim_age=ONE_WEEK, + frag_index=None): + """ + Clean up on-disk files that are obsolete and gather the set of valid + on-disk files for an object. + + :param hsh_path: object hash path + :param reclaim_age: age in seconds at which to remove tombstones + :param frag_index: if set, search for a specific fragment index .data + file, otherwise accept the first valid .data file + :returns: a dict that may contain: valid on disk files keyed by their + filename extension; a list of obsolete files stored under the + key 'obsolete'; a list of files remaining in the directory, + reverse sorted, stored under the key 'files'. + """ + def is_reclaimable(filename): + timestamp = self.parse_on_disk_filename(filename)['timestamp'] + return (time.time() - float(timestamp)) > reclaim_age + + files = listdir(hsh_path) + files.sort(reverse=True) + results = self.gather_ondisk_files(files, include_obsolete=True, + frag_index=frag_index) + if '.durable' in results and not results.get('fragments'): + # a .durable with no .data is deleted as soon as it is found + results.setdefault('obsolete', []).append(results.pop('.durable')) + if '.ts' in results and is_reclaimable(results['.ts']): + results.setdefault('obsolete', []).append(results.pop('.ts')) + for filename in results.get('fragments_without_durable', []): + # stray fragments are not deleted until reclaim-age + if is_reclaimable(filename): + results.setdefault('obsolete', []).append(filename) + for filename in results.get('obsolete', []): + remove_file(join(hsh_path, filename)) + files.remove(filename) + results['files'] = files + return results + + def hash_cleanup_listdir(self, hsh_path, reclaim_age=ONE_WEEK): + """ + List contents of a hash directory and clean up any old files. + For EC policy, delete files older than a .durable or .ts file. + + :param hsh_path: object hash path + :param reclaim_age: age in seconds at which to remove tombstones + :returns: list of files remaining in the directory, reverse sorted + """ + # maintain compatibility with 'legacy' hash_cleanup_listdir + # return value + return self.cleanup_ondisk_files( + hsh_path, reclaim_age=reclaim_age)['files'] + + def yield_hashes(self, device, partition, policy, + suffixes=None, frag_index=None): + """ + This is the same as the replicated yield_hashes except when frag_index + is provided data files for fragment indexes not matching the given + frag_index are skipped. + """ + dev_path = self.get_dev_path(device) + if not dev_path: + raise DiskFileDeviceUnavailable() + if suffixes is None: + suffixes = self.yield_suffixes(device, partition, policy) + else: + partition_path = os.path.join(dev_path, + get_data_dir(policy), + str(partition)) + suffixes = ( + (os.path.join(partition_path, suffix), suffix) + for suffix in suffixes) + for suffix_path, suffix in suffixes: + for object_hash in self._listdir(suffix_path): + object_path = os.path.join(suffix_path, object_hash) + newest_valid_file = None + try: + results = self.cleanup_ondisk_files( + object_path, self.reclaim_age, frag_index=frag_index) + newest_valid_file = (results.get('.meta') + or results.get('.data') + or results.get('.ts')) + if newest_valid_file: + timestamp = self.parse_on_disk_filename( + newest_valid_file)['timestamp'] + yield (object_path, object_hash, timestamp.internal) + except AssertionError as err: + self.logger.debug('Invalid file set in %s (%s)' % ( + object_path, err)) + except DiskFileError as err: + self.logger.debug( + 'Invalid diskfile filename %r in %r (%s)' % ( + newest_valid_file, object_path, err)) + + def _hash_suffix(self, path, reclaim_age): + """ + The only difference between this method and the module level function + hash_suffix is the way that files are updated on the returned hash. + + Instead of all filenames hashed into a single hasher, each file name + will fall into a bucket either by fragment index for datafiles, or + None (indicating a durable, metadata or tombstone). + """ + # hash_per_fi instead of single hash for whole suffix + hash_per_fi = defaultdict(hashlib.md5) + try: + path_contents = sorted(os.listdir(path)) + except OSError as err: + if err.errno in (errno.ENOTDIR, errno.ENOENT): + raise PathNotDir() + raise + for hsh in path_contents: + hsh_path = join(path, hsh) + try: + files = self.hash_cleanup_listdir(hsh_path, reclaim_age) + except OSError as err: + if err.errno == errno.ENOTDIR: + partition_path = dirname(path) + objects_path = dirname(partition_path) + device_path = dirname(objects_path) + quar_path = quarantine_renamer(device_path, hsh_path) + logging.exception( + _('Quarantined %(hsh_path)s to %(quar_path)s because ' + 'it is not a directory'), {'hsh_path': hsh_path, + 'quar_path': quar_path}) + continue + raise + if not files: + try: + os.rmdir(hsh_path) + except OSError: + pass + # we just deleted this hsh_path, why are we waiting + # until the next suffix hash to raise PathNotDir so that + # this suffix will get del'd from the suffix hashes? + for filename in files: + info = self.parse_on_disk_filename(filename) + fi = info['frag_index'] + if fi is None: + hash_per_fi[fi].update(filename) + else: + hash_per_fi[fi].update(info['timestamp'].internal) + try: + os.rmdir(path) + except OSError: + pass + # here we flatten out the hashers hexdigest into a dictionary instead + # of just returning the one hexdigest for the whole suffix + return dict((fi, md5.hexdigest()) for fi, md5 in hash_per_fi.items()) + + def _get_hashes(self, partition_path, recalculate=None, do_listdir=False, + reclaim_age=None): + """ + The only difference with this method and the module level function + get_hashes is the call to hash_suffix routes to a method _hash_suffix + on this instance. + """ + reclaim_age = reclaim_age or self.reclaim_age + hashed = 0 + hashes_file = join(partition_path, HASH_FILE) + modified = False + force_rewrite = False + hashes = {} + mtime = -1 + + if recalculate is None: + recalculate = [] + + try: + with open(hashes_file, 'rb') as fp: + hashes = pickle.load(fp) + mtime = getmtime(hashes_file) + except Exception: + do_listdir = True + force_rewrite = True + if do_listdir: + for suff in os.listdir(partition_path): + if len(suff) == 3: + hashes.setdefault(suff, None) + modified = True + hashes.update((suffix, None) for suffix in recalculate) + for suffix, hash_ in hashes.items(): + if not hash_: + suffix_dir = join(partition_path, suffix) + try: + hashes[suffix] = self._hash_suffix(suffix_dir, reclaim_age) + hashed += 1 + except PathNotDir: + del hashes[suffix] + except OSError: + logging.exception(_('Error hashing suffix')) + modified = True + if modified: + with lock_path(partition_path): + if force_rewrite or not exists(hashes_file) or \ + getmtime(hashes_file) == mtime: + write_pickle( + hashes, hashes_file, partition_path, PICKLE_PROTOCOL) + return hashed, hashes + return self._get_hashes(partition_path, recalculate, do_listdir, + reclaim_age) + else: + return hashed, hashes diff --git a/swift/obj/mem_diskfile.py b/swift/obj/mem_diskfile.py index efb8c6c8c0..be5fbf1349 100644 --- a/swift/obj/mem_diskfile.py +++ b/swift/obj/mem_diskfile.py @@ -57,6 +57,12 @@ class InMemoryFileSystem(object): def get_diskfile(self, account, container, obj, **kwargs): return DiskFile(self, account, container, obj) + def pickle_async_update(self, *args, **kwargs): + """ + For now don't handle async updates. + """ + pass + class DiskFileWriter(object): """ @@ -98,6 +104,16 @@ class DiskFileWriter(object): metadata['name'] = self._name self._filesystem.put_object(self._name, self._fp, metadata) + def commit(self, timestamp): + """ + Perform any operations necessary to mark the object as durable. For + mem_diskfile type this is a no-op. + + :param timestamp: object put timestamp, an instance of + :class:`~swift.common.utils.Timestamp` + """ + pass + class DiskFileReader(object): """ diff --git a/swift/obj/mem_server.py b/swift/obj/mem_server.py index 83647661aa..764a92a92d 100644 --- a/swift/obj/mem_server.py +++ b/swift/obj/mem_server.py @@ -15,15 +15,7 @@ """ In-Memory Object Server for Swift """ -import os -from swift import gettext_ as _ -from eventlet import Timeout - -from swift.common.bufferedhttp import http_connect -from swift.common.exceptions import ConnectionTimeout - -from swift.common.http import is_success from swift.obj.mem_diskfile import InMemoryFileSystem from swift.obj import server @@ -53,49 +45,6 @@ class ObjectController(server.ObjectController): """ return self._filesystem.get_diskfile(account, container, obj, **kwargs) - def async_update(self, op, account, container, obj, host, partition, - contdevice, headers_out, objdevice, policy_idx): - """ - Sends or saves an async update. - - :param op: operation performed (ex: 'PUT', or 'DELETE') - :param account: account name for the object - :param container: container name for the object - :param obj: object name - :param host: host that the container is on - :param partition: partition that the container is on - :param contdevice: device name that the container is on - :param headers_out: dictionary of headers to send in the container - request - :param objdevice: device name that the object is in - :param policy_idx: the associated storage policy index - """ - headers_out['user-agent'] = 'object-server %s' % os.getpid() - full_path = '/%s/%s/%s' % (account, container, obj) - if all([host, partition, contdevice]): - try: - with ConnectionTimeout(self.conn_timeout): - ip, port = host.rsplit(':', 1) - conn = http_connect(ip, port, contdevice, partition, op, - full_path, headers_out) - with Timeout(self.node_timeout): - response = conn.getresponse() - response.read() - if is_success(response.status): - return - else: - self.logger.error(_( - 'ERROR Container update failed: %(status)d ' - 'response from %(ip)s:%(port)s/%(dev)s'), - {'status': response.status, 'ip': ip, 'port': port, - 'dev': contdevice}) - except (Exception, Timeout): - self.logger.exception(_( - 'ERROR container update failed with ' - '%(ip)s:%(port)s/%(dev)s'), - {'ip': ip, 'port': port, 'dev': contdevice}) - # FIXME: For now don't handle async updates - def REPLICATE(self, request): """ Handle REPLICATE requests for the Swift Object Server. This is used diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index 5ee32884ca..ddf431ec73 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -39,7 +39,7 @@ from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE from swift.obj import ssync_sender from swift.obj.diskfile import (DiskFileManager, get_hashes, get_data_dir, get_tmp_dir) -from swift.common.storage_policy import POLICIES +from swift.common.storage_policy import POLICIES, REPL_POLICY hubs.use_hub(get_hub()) @@ -110,14 +110,15 @@ class ObjectReplicator(Daemon): """ return self.sync_method(node, job, suffixes, *args, **kwargs) - def get_object_ring(self, policy_idx): + def load_object_ring(self, policy): """ - Get the ring object to use to handle a request based on its policy. + Make sure the policy's rings are loaded. - :policy_idx: policy index as defined in swift.conf + :param policy: the StoragePolicy instance :returns: appropriate ring object """ - return POLICIES.get_object_ring(policy_idx, self.swift_dir) + policy.load_ring(self.swift_dir) + return policy.object_ring def _rsync(self, args): """ @@ -196,7 +197,7 @@ class ObjectReplicator(Daemon): had_any = True if not had_any: return False, set() - data_dir = get_data_dir(job['policy_idx']) + data_dir = get_data_dir(job['policy']) args.append(join(rsync_module, node['device'], data_dir, job['partition'])) return self._rsync(args) == 0, set() @@ -231,7 +232,7 @@ class ObjectReplicator(Daemon): if len(suff) == 3 and isdir(join(path, suff))] self.replication_count += 1 self.logger.increment('partition.delete.count.%s' % (job['device'],)) - self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx'] + self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) begin = time.time() try: responses = [] @@ -314,7 +315,7 @@ class ObjectReplicator(Daemon): """ self.replication_count += 1 self.logger.increment('partition.update.count.%s' % (job['device'],)) - self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx'] + self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) begin = time.time() try: hashed, local_hash = tpool_reraise( @@ -328,7 +329,8 @@ class ObjectReplicator(Daemon): random.shuffle(job['nodes']) nodes = itertools.chain( job['nodes'], - job['object_ring'].get_more_nodes(int(job['partition']))) + job['policy'].object_ring.get_more_nodes( + int(job['partition']))) while attempts_left > 0: # If this throws StopIteration it will be caught way below node = next(nodes) @@ -460,16 +462,15 @@ class ObjectReplicator(Daemon): self.kill_coros() self.last_replication_count = self.replication_count - def process_repl(self, policy, ips, override_devices=None, - override_partitions=None): + def build_replication_jobs(self, policy, ips, override_devices=None, + override_partitions=None): """ Helper function for collect_jobs to build jobs for replication using replication style storage policy """ jobs = [] - obj_ring = self.get_object_ring(policy.idx) - data_dir = get_data_dir(policy.idx) - for local_dev in [dev for dev in obj_ring.devs + data_dir = get_data_dir(policy) + for local_dev in [dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, self.port, @@ -479,7 +480,7 @@ class ObjectReplicator(Daemon): or dev['device'] in override_devices))]: dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) - tmp_path = join(dev_path, get_tmp_dir(int(policy))) + tmp_path = join(dev_path, get_tmp_dir(policy)) if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), local_dev['device']) continue @@ -497,7 +498,8 @@ class ObjectReplicator(Daemon): try: job_path = join(obj_path, partition) - part_nodes = obj_ring.get_part_nodes(int(partition)) + part_nodes = policy.object_ring.get_part_nodes( + int(partition)) nodes = [node for node in part_nodes if node['id'] != local_dev['id']] jobs.append( @@ -506,9 +508,8 @@ class ObjectReplicator(Daemon): obj_path=obj_path, nodes=nodes, delete=len(nodes) > len(part_nodes) - 1, - policy_idx=policy.idx, + policy=policy, partition=partition, - object_ring=obj_ring, region=local_dev['region'])) except ValueError: continue @@ -530,13 +531,15 @@ class ObjectReplicator(Daemon): jobs = [] ips = whataremyips() for policy in POLICIES: - if (override_policies is not None - and str(policy.idx) not in override_policies): - continue - # may need to branch here for future policy types - jobs += self.process_repl(policy, ips, - override_devices=override_devices, - override_partitions=override_partitions) + if policy.policy_type == REPL_POLICY: + if (override_policies is not None and + str(policy.idx) not in override_policies): + continue + # ensure rings are loaded for policy + self.load_object_ring(policy) + jobs += self.build_replication_jobs( + policy, ips, override_devices=override_devices, + override_partitions=override_partitions) random.shuffle(jobs) if self.handoffs_first: # Move the handoff parts to the front of the list @@ -569,7 +572,7 @@ class ObjectReplicator(Daemon): if self.mount_check and not ismount(dev_path): self.logger.warn(_('%s is not mounted'), job['device']) continue - if not self.check_ring(job['object_ring']): + if not self.check_ring(job['policy'].object_ring): self.logger.info(_("Ring change detected. Aborting " "current replication pass.")) return diff --git a/swift/obj/server.py b/swift/obj/server.py index ad0f9faeb3..f4dbb4264b 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -685,12 +685,17 @@ class ObjectController(BaseStorageServer): """ Handle REPLICATE requests for the Swift Object Server. This is used by the object replicator to get hashes for directories. + + Note that the name REPLICATE is preserved for historical reasons as + this verb really just returns the hashes information for the specified + parameters and is used, for example, by both replication and EC. """ - device, partition, suffix, policy_idx = \ + device, partition, suffix_parts, policy = \ get_name_and_placement(request, 2, 3, True) + suffixes = suffix_parts.split('-') if suffix_parts else [] try: - hashes = self._diskfile_mgr.get_hashes(device, partition, suffix, - policy_idx) + hashes = self._diskfile_mgr.get_hashes( + device, partition, suffixes, policy) except DiskFileDeviceUnavailable: resp = HTTPInsufficientStorage(drive=device, request=request) else: diff --git a/swift/obj/ssync_sender.py b/swift/obj/ssync_sender.py index 1058ab262d..02745d21e4 100644 --- a/swift/obj/ssync_sender.py +++ b/swift/obj/ssync_sender.py @@ -47,7 +47,7 @@ class Sender(object): @property def policy_idx(self): - return int(self.job.get('policy_idx', 0)) + return int(self.job.get('policy', 0)) def __call__(self): """ diff --git a/swift/obj/updater.py b/swift/obj/updater.py index 6c40c456ac..f5d1f37fa4 100644 --- a/swift/obj/updater.py +++ b/swift/obj/updater.py @@ -29,7 +29,8 @@ from swift.common.ring import Ring from swift.common.utils import get_logger, renamer, write_pickle, \ dump_recon_cache, config_true_value, ismount from swift.common.daemon import Daemon -from swift.obj.diskfile import get_tmp_dir, get_async_dir, ASYNCDIR_BASE +from swift.common.storage_policy import split_policy_string, PolicyError +from swift.obj.diskfile import get_tmp_dir, ASYNCDIR_BASE from swift.common.http import is_success, HTTP_NOT_FOUND, \ HTTP_INTERNAL_SERVER_ERROR @@ -148,28 +149,19 @@ class ObjectUpdater(Daemon): start_time = time.time() # loop through async pending dirs for all policies for asyncdir in self._listdir(device): - # skip stuff like "accounts", "containers", etc. - if not (asyncdir == ASYNCDIR_BASE or - asyncdir.startswith(ASYNCDIR_BASE + '-')): - continue - # we only care about directories async_pending = os.path.join(device, asyncdir) if not os.path.isdir(async_pending): continue - - if asyncdir == ASYNCDIR_BASE: - policy_idx = 0 - else: - _junk, policy_idx = asyncdir.split('-', 1) - try: - policy_idx = int(policy_idx) - get_async_dir(policy_idx) - except ValueError: - self.logger.warn(_('Directory %s does not map to a ' - 'valid policy') % asyncdir) - continue - + if not asyncdir.startswith(ASYNCDIR_BASE): + # skip stuff like "accounts", "containers", etc. + continue + try: + base, policy = split_policy_string(asyncdir) + except PolicyError as e: + self.logger.warn(_('Directory %r does not map ' + 'to a valid policy (%s)') % (asyncdir, e)) + continue for prefix in self._listdir(async_pending): prefix_path = os.path.join(async_pending, prefix) if not os.path.isdir(prefix_path): @@ -193,7 +185,7 @@ class ObjectUpdater(Daemon): os.unlink(update_path) else: self.process_object_update(update_path, device, - policy_idx) + policy) last_obj_hash = obj_hash time.sleep(self.slowdown) try: @@ -202,13 +194,13 @@ class ObjectUpdater(Daemon): pass self.logger.timing_since('timing', start_time) - def process_object_update(self, update_path, device, policy_idx): + def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device - :param policy_idx: storage policy index of object update + :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) @@ -228,7 +220,7 @@ class ObjectUpdater(Daemon): headers_out = update['headers'].copy() headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', - str(policy_idx)) + str(int(policy))) events = [spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes] @@ -256,7 +248,7 @@ class ObjectUpdater(Daemon): if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join( - device, get_tmp_dir(policy_idx))) + device, get_tmp_dir(policy))) def object_update(self, node, part, op, obj, headers_out): """ diff --git a/test/unit/common/test_constraints.py b/test/unit/common/test_constraints.py index 7ae9fb44a4..61231d3f02 100644 --- a/test/unit/common/test_constraints.py +++ b/test/unit/common/test_constraints.py @@ -368,6 +368,11 @@ class TestConstraints(unittest.TestCase): self.assertTrue('X-Delete-At' in req.headers) self.assertEqual(req.headers['X-Delete-At'], expected) + def test_check_dir(self): + self.assertFalse(constraints.check_dir('', '')) + with mock.patch("os.path.isdir", MockTrue()): + self.assertTrue(constraints.check_dir('/srv', 'foo/bar')) + def test_check_mount(self): self.assertFalse(constraints.check_mount('', '')) with mock.patch("swift.common.utils.ismount", MockTrue()): diff --git a/test/unit/obj/test_auditor.py b/test/unit/obj/test_auditor.py index e8f8a2b16a..3cfcb47573 100644 --- a/test/unit/obj/test_auditor.py +++ b/test/unit/obj/test_auditor.py @@ -28,7 +28,7 @@ from swift.obj.diskfile import DiskFile, write_metadata, invalidate_hash, \ get_data_dir, DiskFileManager, AuditLocation from swift.common.utils import hash_path, mkdirs, normalize_timestamp, \ storage_directory -from swift.common.storage_policy import StoragePolicy +from swift.common.storage_policy import StoragePolicy, POLICIES _mocked_policies = [StoragePolicy(0, 'zero', False), @@ -48,12 +48,16 @@ class TestAuditor(unittest.TestCase): os.mkdir(os.path.join(self.devices, 'sdb')) # policy 0 - self.objects = os.path.join(self.devices, 'sda', get_data_dir(0)) - self.objects_2 = os.path.join(self.devices, 'sdb', get_data_dir(0)) + self.objects = os.path.join(self.devices, 'sda', + get_data_dir(POLICIES[0])) + self.objects_2 = os.path.join(self.devices, 'sdb', + get_data_dir(POLICIES[0])) os.mkdir(self.objects) # policy 1 - self.objects_p1 = os.path.join(self.devices, 'sda', get_data_dir(1)) - self.objects_2_p1 = os.path.join(self.devices, 'sdb', get_data_dir(1)) + self.objects_p1 = os.path.join(self.devices, 'sda', + get_data_dir(POLICIES[1])) + self.objects_2_p1 = os.path.join(self.devices, 'sdb', + get_data_dir(POLICIES[1])) os.mkdir(self.objects_p1) self.parts = self.parts_p1 = {} @@ -70,9 +74,10 @@ class TestAuditor(unittest.TestCase): self.df_mgr = DiskFileManager(self.conf, self.logger) # diskfiles for policy 0, 1 - self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', 0) + self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', + policy=POLICIES[0]) self.disk_file_p1 = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', - 'o', 1) + 'o', policy=POLICIES[1]) def tearDown(self): rmtree(os.path.dirname(self.testdir), ignore_errors=1) @@ -125,13 +130,15 @@ class TestAuditor(unittest.TestCase): pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( - AuditLocation(disk_file._datadir, 'sda', '0')) + AuditLocation(disk_file._datadir, 'sda', '0', + policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines) os.write(writer._fd, 'extra_data') auditor_worker.object_audit( - AuditLocation(disk_file._datadir, 'sda', '0')) + AuditLocation(disk_file._datadir, 'sda', '0', + policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) run_tests(self.disk_file) @@ -156,10 +163,12 @@ class TestAuditor(unittest.TestCase): pre_quarantines = auditor_worker.quarantines # remake so it will have metadata - self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') + self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', + policy=POLICIES.legacy) auditor_worker.object_audit( - AuditLocation(self.disk_file._datadir, 'sda', '0')) + AuditLocation(self.disk_file._datadir, 'sda', '0', + policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines) etag = md5() etag.update('1' + '0' * 1023) @@ -171,7 +180,8 @@ class TestAuditor(unittest.TestCase): writer.put(metadata) auditor_worker.object_audit( - AuditLocation(self.disk_file._datadir, 'sda', '0')) + AuditLocation(self.disk_file._datadir, 'sda', '0', + policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_no_meta(self): @@ -186,7 +196,8 @@ class TestAuditor(unittest.TestCase): self.rcache, self.devices) pre_quarantines = auditor_worker.quarantines auditor_worker.object_audit( - AuditLocation(self.disk_file._datadir, 'sda', '0')) + AuditLocation(self.disk_file._datadir, 'sda', '0', + policy=POLICIES.legacy)) self.assertEquals(auditor_worker.quarantines, pre_quarantines + 1) def test_object_audit_will_not_swallow_errors_in_tests(self): @@ -203,7 +214,8 @@ class TestAuditor(unittest.TestCase): with mock.patch.object(DiskFileManager, 'get_diskfile_from_audit_location', blowup): self.assertRaises(NameError, auditor_worker.object_audit, - AuditLocation(os.path.dirname(path), 'sda', '0')) + AuditLocation(os.path.dirname(path), 'sda', '0', + policy=POLICIES.legacy)) def test_failsafe_object_audit_will_swallow_errors_in_tests(self): timestamp = str(normalize_timestamp(time.time())) @@ -216,9 +228,11 @@ class TestAuditor(unittest.TestCase): def blowup(*args): raise NameError('tpyo') - with mock.patch('swift.obj.diskfile.DiskFile', blowup): + with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', + blowup): auditor_worker.failsafe_object_audit( - AuditLocation(os.path.dirname(path), 'sda', '0')) + AuditLocation(os.path.dirname(path), 'sda', '0', + policy=POLICIES.legacy)) self.assertEquals(auditor_worker.errors, 1) def test_generic_exception_handling(self): @@ -240,7 +254,8 @@ class TestAuditor(unittest.TestCase): 'Content-Length': str(os.fstat(writer._fd).st_size), } writer.put(metadata) - with mock.patch('swift.obj.diskfile.DiskFile', lambda *_: 1 / 0): + with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', + lambda *_: 1 / 0): auditor_worker.audit_all_objects() self.assertEquals(auditor_worker.errors, pre_errors + 1) @@ -368,7 +383,8 @@ class TestAuditor(unittest.TestCase): } writer.put(metadata) auditor_worker.audit_all_objects() - self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob') + self.disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'ob', + policy=POLICIES.legacy) data = '1' * 10 etag = md5() with self.disk_file.create() as writer: @@ -424,7 +440,7 @@ class TestAuditor(unittest.TestCase): name_hash = hash_path('a', 'c', 'o') dir_path = os.path.join( self.devices, 'sda', - storage_directory(get_data_dir(0), '0', name_hash)) + storage_directory(get_data_dir(POLICIES[0]), '0', name_hash)) ts_file_path = os.path.join(dir_path, '99999.ts') if not os.path.exists(dir_path): mkdirs(dir_path) @@ -474,9 +490,8 @@ class TestAuditor(unittest.TestCase): DiskFile._quarantine(self, data_file, msg) self.setup_bad_zero_byte() - was_df = auditor.diskfile.DiskFile - try: - auditor.diskfile.DiskFile = FakeFile + with mock.patch('swift.obj.diskfile.DiskFileManager.diskfile_cls', + FakeFile): kwargs = {'mode': 'once'} kwargs['zero_byte_fps'] = 50 self.auditor.run_audit(**kwargs) @@ -484,8 +499,6 @@ class TestAuditor(unittest.TestCase): 'sda', 'quarantined', 'objects') self.assertTrue(os.path.isdir(quarantine_path)) self.assertTrue(rat[0]) - finally: - auditor.diskfile.DiskFile = was_df @mock.patch.object(auditor.ObjectAuditor, 'run_audit') @mock.patch('os.fork', return_value=0) diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 20d47fbcad..2ccf3b1364 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -19,6 +19,7 @@ import cPickle as pickle import os import errno +import itertools import mock import unittest import email @@ -26,6 +27,8 @@ import tempfile import uuid import xattr import re +from collections import defaultdict +from random import shuffle, randint from shutil import rmtree from time import time from tempfile import mkdtemp @@ -35,7 +38,7 @@ from gzip import GzipFile from eventlet import hubs, timeout, tpool from test.unit import (FakeLogger, mock as unit_mock, temptree, - patch_policies, debug_logger) + patch_policies, debug_logger, EMPTY_ETAG) from nose import SkipTest from swift.obj import diskfile @@ -45,32 +48,61 @@ from swift.common import ring from swift.common.splice import splice from swift.common.exceptions import DiskFileNotExist, DiskFileQuarantined, \ DiskFileDeviceUnavailable, DiskFileDeleted, DiskFileNotOpen, \ - DiskFileError, ReplicationLockTimeout, PathNotDir, DiskFileCollision, \ + DiskFileError, ReplicationLockTimeout, DiskFileCollision, \ DiskFileExpired, SwiftException, DiskFileNoSpace, DiskFileXattrNotSupported -from swift.common.storage_policy import POLICIES, get_policy_string -from functools import partial +from swift.common.storage_policy import ( + POLICIES, get_policy_string, StoragePolicy, ECStoragePolicy, + BaseStoragePolicy, REPL_POLICY, EC_POLICY) -get_data_dir = partial(get_policy_string, diskfile.DATADIR_BASE) -get_tmp_dir = partial(get_policy_string, diskfile.TMP_BASE) +test_policies = [ + StoragePolicy(0, name='zero', is_default=True), + ECStoragePolicy(1, name='one', is_default=False, + ec_type='jerasure_rs_vand', + ec_ndata=10, ec_nparity=4), +] -def _create_test_ring(path): - testgz = os.path.join(path, 'object.ring.gz') +def find_paths_with_matching_suffixes(needed_matches=2, needed_suffixes=3): + paths = defaultdict(list) + while True: + path = ('a', 'c', uuid.uuid4().hex) + hash_ = hash_path(*path) + suffix = hash_[-3:] + paths[suffix].append(path) + if len(paths) < needed_suffixes: + # in the extreamly unlikely situation where you land the matches + # you need before you get the total suffixes you need - it's + # simpler to just ignore this suffix for now + continue + if len(paths[suffix]) >= needed_matches: + break + return paths, suffix + + +def _create_test_ring(path, policy): + ring_name = get_policy_string('object', policy) + testgz = os.path.join(path, ring_name + '.ring.gz') intended_replica2part2dev_id = [ [0, 1, 2, 3, 4, 5, 6], [1, 2, 3, 0, 5, 6, 4], [2, 3, 0, 1, 6, 4, 5]] intended_devs = [ - {'id': 0, 'device': 'sda', 'zone': 0, 'ip': '127.0.0.0', 'port': 6000}, - {'id': 1, 'device': 'sda', 'zone': 1, 'ip': '127.0.0.1', 'port': 6000}, - {'id': 2, 'device': 'sda', 'zone': 2, 'ip': '127.0.0.2', 'port': 6000}, - {'id': 3, 'device': 'sda', 'zone': 4, 'ip': '127.0.0.3', 'port': 6000}, - {'id': 4, 'device': 'sda', 'zone': 5, 'ip': '127.0.0.4', 'port': 6000}, - {'id': 5, 'device': 'sda', 'zone': 6, + {'id': 0, 'device': 'sda1', 'zone': 0, 'ip': '127.0.0.0', + 'port': 6000}, + {'id': 1, 'device': 'sda1', 'zone': 1, 'ip': '127.0.0.1', + 'port': 6000}, + {'id': 2, 'device': 'sda1', 'zone': 2, 'ip': '127.0.0.2', + 'port': 6000}, + {'id': 3, 'device': 'sda1', 'zone': 4, 'ip': '127.0.0.3', + 'port': 6000}, + {'id': 4, 'device': 'sda1', 'zone': 5, 'ip': '127.0.0.4', + 'port': 6000}, + {'id': 5, 'device': 'sda1', 'zone': 6, 'ip': 'fe80::202:b3ff:fe1e:8329', 'port': 6000}, - {'id': 6, 'device': 'sda', 'zone': 7, - 'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 'port': 6000}] + {'id': 6, 'device': 'sda1', 'zone': 7, + 'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334', + 'port': 6000}] intended_part_shift = 30 intended_reload_time = 15 with closing(GzipFile(testgz, 'wb')) as f: @@ -78,7 +110,7 @@ def _create_test_ring(path): ring.RingData(intended_replica2part2dev_id, intended_devs, intended_part_shift), f) - return ring.Ring(path, ring_name='object', + return ring.Ring(path, ring_name=ring_name, reload_time=intended_reload_time) @@ -88,13 +120,13 @@ class TestDiskFileModuleMethods(unittest.TestCase): def setUp(self): utils.HASH_PATH_SUFFIX = 'endcap' utils.HASH_PATH_PREFIX = '' - # Setup a test ring (stolen from common/test_ring.py) + # Setup a test ring per policy (stolen from common/test_ring.py) self.testdir = tempfile.mkdtemp() self.devices = os.path.join(self.testdir, 'node') rmtree(self.testdir, ignore_errors=1) os.mkdir(self.testdir) os.mkdir(self.devices) - self.existing_device = 'sda' + self.existing_device = 'sda1' os.mkdir(os.path.join(self.devices, self.existing_device)) self.objects = os.path.join(self.devices, self.existing_device, 'objects') @@ -103,7 +135,7 @@ class TestDiskFileModuleMethods(unittest.TestCase): for part in ['0', '1', '2', '3']: self.parts[part] = os.path.join(self.objects, part) os.mkdir(os.path.join(self.objects, part)) - self.ring = _create_test_ring(self.testdir) + self.ring = _create_test_ring(self.testdir, POLICIES.legacy) self.conf = dict( swift_dir=self.testdir, devices=self.devices, mount_check='false', timeout='300', stats_interval='1') @@ -112,60 +144,58 @@ class TestDiskFileModuleMethods(unittest.TestCase): def tearDown(self): rmtree(self.testdir, ignore_errors=1) - def _create_diskfile(self, policy_idx=0): + def _create_diskfile(self, policy): return self.df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', - policy_idx) + policy=policy) - def test_extract_policy_index(self): + def test_extract_policy(self): # good path names pn = 'objects/0/606/1984527ed7ef6247c78606/1401379842.14643.data' - self.assertEqual(diskfile.extract_policy_index(pn), 0) + self.assertEqual(diskfile.extract_policy(pn), POLICIES[0]) pn = 'objects-1/0/606/198452b6ef6247c78606/1401379842.14643.data' - self.assertEqual(diskfile.extract_policy_index(pn), 1) - good_path = '/srv/node/sda1/objects-1/1/abc/def/1234.data' - self.assertEquals(1, diskfile.extract_policy_index(good_path)) - good_path = '/srv/node/sda1/objects/1/abc/def/1234.data' - self.assertEquals(0, diskfile.extract_policy_index(good_path)) + self.assertEqual(diskfile.extract_policy(pn), POLICIES[1]) - # short paths still ok - path = '/srv/node/sda1/objects/1/1234.data' - self.assertEqual(diskfile.extract_policy_index(path), 0) - path = '/srv/node/sda1/objects-1/1/1234.data' - self.assertEqual(diskfile.extract_policy_index(path), 1) - - # leading slash, just in case + # leading slash pn = '/objects/0/606/1984527ed7ef6247c78606/1401379842.14643.data' - self.assertEqual(diskfile.extract_policy_index(pn), 0) + self.assertEqual(diskfile.extract_policy(pn), POLICIES[0]) pn = '/objects-1/0/606/198452b6ef6247c78606/1401379842.14643.data' - self.assertEqual(diskfile.extract_policy_index(pn), 1) + self.assertEqual(diskfile.extract_policy(pn), POLICIES[1]) + + # full paths + good_path = '/srv/node/sda1/objects-1/1/abc/def/1234.data' + self.assertEqual(diskfile.extract_policy(good_path), POLICIES[1]) + good_path = '/srv/node/sda1/objects/1/abc/def/1234.data' + self.assertEqual(diskfile.extract_policy(good_path), POLICIES[0]) + + # short paths + path = '/srv/node/sda1/objects/1/1234.data' + self.assertEqual(diskfile.extract_policy(path), POLICIES[0]) + path = '/srv/node/sda1/objects-1/1/1234.data' + self.assertEqual(diskfile.extract_policy(path), POLICIES[1]) # well formatted but, unknown policy index pn = 'objects-2/0/606/198427efcff042c78606/1401379842.14643.data' - self.assertRaises(ValueError, - diskfile.extract_policy_index, pn) + self.assertEqual(diskfile.extract_policy(pn), None) + + # malformed path + self.assertEqual(diskfile.extract_policy(''), None) bad_path = '/srv/node/sda1/objects-t/1/abc/def/1234.data' - self.assertRaises(ValueError, - diskfile.extract_policy_index, bad_path) - - # malformed path (no objects dir or nothing at all) + self.assertEqual(diskfile.extract_policy(bad_path), None) pn = 'XXXX/0/606/1984527ed42b6ef6247c78606/1401379842.14643.data' - self.assertEqual(diskfile.extract_policy_index(pn), 0) - self.assertEqual(diskfile.extract_policy_index(''), 0) - - # no datadir base in path + self.assertEqual(diskfile.extract_policy(pn), None) bad_path = '/srv/node/sda1/foo-1/1/abc/def/1234.data' - self.assertEqual(diskfile.extract_policy_index(bad_path), 0) + self.assertEqual(diskfile.extract_policy(bad_path), None) bad_path = '/srv/node/sda1/obj1/1/abc/def/1234.data' - self.assertEqual(diskfile.extract_policy_index(bad_path), 0) + self.assertEqual(diskfile.extract_policy(bad_path), None) def test_quarantine_renamer(self): for policy in POLICIES: # we use this for convenience, not really about a diskfile layout - df = self._create_diskfile(policy_idx=policy.idx) + df = self._create_diskfile(policy=policy) mkdirs(df._datadir) exp_dir = os.path.join(self.devices, 'quarantined', - get_data_dir(policy.idx), + diskfile.get_data_dir(policy), os.path.basename(df._datadir)) qbit = os.path.join(df._datadir, 'qbit') with open(qbit, 'w') as f: @@ -175,38 +205,28 @@ class TestDiskFileModuleMethods(unittest.TestCase): self.assertRaises(OSError, diskfile.quarantine_renamer, self.devices, qbit) - def test_hash_suffix_enoent(self): - self.assertRaises(PathNotDir, diskfile.hash_suffix, - os.path.join(self.testdir, "doesnotexist"), 101) - - def test_hash_suffix_oserror(self): - mocked_os_listdir = mock.Mock( - side_effect=OSError(errno.EACCES, os.strerror(errno.EACCES))) - with mock.patch("os.listdir", mocked_os_listdir): - self.assertRaises(OSError, diskfile.hash_suffix, - os.path.join(self.testdir, "doesnotexist"), 101) - def test_get_data_dir(self): - self.assertEquals(diskfile.get_data_dir(0), diskfile.DATADIR_BASE) - self.assertEquals(diskfile.get_data_dir(1), + self.assertEquals(diskfile.get_data_dir(POLICIES[0]), + diskfile.DATADIR_BASE) + self.assertEquals(diskfile.get_data_dir(POLICIES[1]), diskfile.DATADIR_BASE + "-1") self.assertRaises(ValueError, diskfile.get_data_dir, 'junk') self.assertRaises(ValueError, diskfile.get_data_dir, 99) def test_get_async_dir(self): - self.assertEquals(diskfile.get_async_dir(0), + self.assertEquals(diskfile.get_async_dir(POLICIES[0]), diskfile.ASYNCDIR_BASE) - self.assertEquals(diskfile.get_async_dir(1), + self.assertEquals(diskfile.get_async_dir(POLICIES[1]), diskfile.ASYNCDIR_BASE + "-1") self.assertRaises(ValueError, diskfile.get_async_dir, 'junk') self.assertRaises(ValueError, diskfile.get_async_dir, 99) def test_get_tmp_dir(self): - self.assertEquals(diskfile.get_tmp_dir(0), + self.assertEquals(diskfile.get_tmp_dir(POLICIES[0]), diskfile.TMP_BASE) - self.assertEquals(diskfile.get_tmp_dir(1), + self.assertEquals(diskfile.get_tmp_dir(POLICIES[1]), diskfile.TMP_BASE + "-1") self.assertRaises(ValueError, diskfile.get_tmp_dir, 'junk') @@ -222,7 +242,7 @@ class TestDiskFileModuleMethods(unittest.TestCase): self.devices, self.existing_device, tmp_part) self.assertFalse(os.path.isdir(tmp_path)) pickle_args = (self.existing_device, 'a', 'c', 'o', - 'data', 0.0, int(policy)) + 'data', 0.0, policy) # async updates don't create their tmpdir on their own self.assertRaises(OSError, self.df_mgr.pickle_async_update, *pickle_args) @@ -232,438 +252,6 @@ class TestDiskFileModuleMethods(unittest.TestCase): # check tempdir self.assertTrue(os.path.isdir(tmp_path)) - def test_hash_suffix_hash_dir_is_file_quarantine(self): - df = self._create_diskfile() - mkdirs(os.path.dirname(df._datadir)) - open(df._datadir, 'wb').close() - ohash = hash_path('a', 'c', 'o') - data_dir = ohash[-3:] - whole_path_from = os.path.join(self.objects, '0', data_dir) - orig_quarantine_renamer = diskfile.quarantine_renamer - called = [False] - - def wrapped(*args, **kwargs): - called[0] = True - return orig_quarantine_renamer(*args, **kwargs) - - try: - diskfile.quarantine_renamer = wrapped - diskfile.hash_suffix(whole_path_from, 101) - finally: - diskfile.quarantine_renamer = orig_quarantine_renamer - self.assertTrue(called[0]) - - def test_hash_suffix_one_file(self): - df = self._create_diskfile() - mkdirs(df._datadir) - f = open( - os.path.join(df._datadir, - Timestamp(time() - 100).internal + '.ts'), - 'wb') - f.write('1234567890') - f.close() - ohash = hash_path('a', 'c', 'o') - data_dir = ohash[-3:] - whole_path_from = os.path.join(self.objects, '0', data_dir) - diskfile.hash_suffix(whole_path_from, 101) - self.assertEquals(len(os.listdir(self.parts['0'])), 1) - - diskfile.hash_suffix(whole_path_from, 99) - self.assertEquals(len(os.listdir(self.parts['0'])), 0) - - def test_hash_suffix_oserror_on_hcl(self): - df = self._create_diskfile() - mkdirs(df._datadir) - f = open( - os.path.join(df._datadir, - Timestamp(time() - 100).internal + '.ts'), - 'wb') - f.write('1234567890') - f.close() - ohash = hash_path('a', 'c', 'o') - data_dir = ohash[-3:] - whole_path_from = os.path.join(self.objects, '0', data_dir) - state = [0] - orig_os_listdir = os.listdir - - def mock_os_listdir(*args, **kwargs): - # We want the first call to os.listdir() to succeed, which is the - # one directly from hash_suffix() itself, but then we want to fail - # the next call to os.listdir() which is from - # hash_cleanup_listdir() - if state[0] == 1: - raise OSError(errno.EACCES, os.strerror(errno.EACCES)) - state[0] = 1 - return orig_os_listdir(*args, **kwargs) - - with mock.patch('os.listdir', mock_os_listdir): - self.assertRaises(OSError, diskfile.hash_suffix, whole_path_from, - 101) - - def test_hash_suffix_multi_file_one(self): - df = self._create_diskfile() - mkdirs(df._datadir) - for tdiff in [1, 50, 100, 500]: - for suff in ['.meta', '.data', '.ts']: - f = open( - os.path.join( - df._datadir, - Timestamp(int(time()) - tdiff).internal + suff), - 'wb') - f.write('1234567890') - f.close() - - ohash = hash_path('a', 'c', 'o') - data_dir = ohash[-3:] - whole_path_from = os.path.join(self.objects, '0', data_dir) - hsh_path = os.listdir(whole_path_from)[0] - whole_hsh_path = os.path.join(whole_path_from, hsh_path) - - diskfile.hash_suffix(whole_path_from, 99) - # only the tombstone should be left - self.assertEquals(len(os.listdir(whole_hsh_path)), 1) - - def test_hash_suffix_multi_file_two(self): - df = self._create_diskfile() - mkdirs(df._datadir) - for tdiff in [1, 50, 100, 500]: - suffs = ['.meta', '.data'] - if tdiff > 50: - suffs.append('.ts') - for suff in suffs: - f = open( - os.path.join( - df._datadir, - Timestamp(int(time()) - tdiff).internal + suff), - 'wb') - f.write('1234567890') - f.close() - - ohash = hash_path('a', 'c', 'o') - data_dir = ohash[-3:] - whole_path_from = os.path.join(self.objects, '0', data_dir) - hsh_path = os.listdir(whole_path_from)[0] - whole_hsh_path = os.path.join(whole_path_from, hsh_path) - - diskfile.hash_suffix(whole_path_from, 99) - # only the meta and data should be left - self.assertEquals(len(os.listdir(whole_hsh_path)), 2) - - def test_hash_suffix_hsh_path_disappearance(self): - orig_rmdir = os.rmdir - - def _rmdir(path): - # Done twice to recreate what happens when it doesn't exist. - orig_rmdir(path) - orig_rmdir(path) - - df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') - mkdirs(df._datadir) - ohash = hash_path('a', 'c', 'o') - suffix = ohash[-3:] - suffix_path = os.path.join(self.objects, '0', suffix) - with mock.patch('os.rmdir', _rmdir): - # If hash_suffix doesn't handle the exception _rmdir will raise, - # this test will fail. - diskfile.hash_suffix(suffix_path, 123) - - def test_invalidate_hash(self): - - def assertFileData(file_path, data): - with open(file_path, 'r') as fp: - fdata = fp.read() - self.assertEquals(pickle.loads(fdata), pickle.loads(data)) - - df = self._create_diskfile() - mkdirs(df._datadir) - ohash = hash_path('a', 'c', 'o') - data_dir = ohash[-3:] - whole_path_from = os.path.join(self.objects, '0', data_dir) - hashes_file = os.path.join(self.objects, '0', - diskfile.HASH_FILE) - # test that non existent file except caught - self.assertEquals(diskfile.invalidate_hash(whole_path_from), - None) - # test that hashes get cleared - check_pickle_data = pickle.dumps({data_dir: None}, - diskfile.PICKLE_PROTOCOL) - for data_hash in [{data_dir: None}, {data_dir: 'abcdefg'}]: - with open(hashes_file, 'wb') as fp: - pickle.dump(data_hash, fp, diskfile.PICKLE_PROTOCOL) - diskfile.invalidate_hash(whole_path_from) - assertFileData(hashes_file, check_pickle_data) - - def test_invalidate_hash_bad_pickle(self): - df = self._create_diskfile() - mkdirs(df._datadir) - ohash = hash_path('a', 'c', 'o') - data_dir = ohash[-3:] - whole_path_from = os.path.join(self.objects, '0', data_dir) - hashes_file = os.path.join(self.objects, '0', - diskfile.HASH_FILE) - for data_hash in [{data_dir: None}, {data_dir: 'abcdefg'}]: - with open(hashes_file, 'wb') as fp: - fp.write('bad hash data') - try: - diskfile.invalidate_hash(whole_path_from) - except Exception as err: - self.fail("Unexpected exception raised: %s" % err) - else: - pass - - def test_get_hashes(self): - df = self._create_diskfile() - mkdirs(df._datadir) - with open( - os.path.join(df._datadir, - Timestamp(time()).internal + '.ts'), - 'wb') as f: - f.write('1234567890') - part = os.path.join(self.objects, '0') - hashed, hashes = diskfile.get_hashes(part) - self.assertEquals(hashed, 1) - self.assert_('a83' in hashes) - hashed, hashes = diskfile.get_hashes(part, do_listdir=True) - self.assertEquals(hashed, 0) - self.assert_('a83' in hashes) - hashed, hashes = diskfile.get_hashes(part, recalculate=['a83']) - self.assertEquals(hashed, 1) - self.assert_('a83' in hashes) - - def test_get_hashes_bad_dir(self): - df = self._create_diskfile() - mkdirs(df._datadir) - with open(os.path.join(self.objects, '0', 'bad'), 'wb') as f: - f.write('1234567890') - part = os.path.join(self.objects, '0') - hashed, hashes = diskfile.get_hashes(part) - self.assertEquals(hashed, 1) - self.assert_('a83' in hashes) - self.assert_('bad' not in hashes) - - def test_get_hashes_unmodified(self): - df = self._create_diskfile() - mkdirs(df._datadir) - with open( - os.path.join(df._datadir, - Timestamp(time()).internal + '.ts'), - 'wb') as f: - f.write('1234567890') - part = os.path.join(self.objects, '0') - hashed, hashes = diskfile.get_hashes(part) - i = [0] - - def _getmtime(filename): - i[0] += 1 - return 1 - with unit_mock({'swift.obj.diskfile.getmtime': _getmtime}): - hashed, hashes = diskfile.get_hashes( - part, recalculate=['a83']) - self.assertEquals(i[0], 2) - - def test_get_hashes_unmodified_norecalc(self): - df = self._create_diskfile() - mkdirs(df._datadir) - with open( - os.path.join(df._datadir, - Timestamp(time()).internal + '.ts'), - 'wb') as f: - f.write('1234567890') - part = os.path.join(self.objects, '0') - hashed, hashes_0 = diskfile.get_hashes(part) - self.assertEqual(hashed, 1) - self.assertTrue('a83' in hashes_0) - hashed, hashes_1 = diskfile.get_hashes(part) - self.assertEqual(hashed, 0) - self.assertTrue('a83' in hashes_0) - self.assertEqual(hashes_1, hashes_0) - - def test_get_hashes_hash_suffix_error(self): - df = self._create_diskfile() - mkdirs(df._datadir) - with open( - os.path.join(df._datadir, - Timestamp(time()).internal + '.ts'), - 'wb') as f: - f.write('1234567890') - part = os.path.join(self.objects, '0') - mocked_hash_suffix = mock.MagicMock( - side_effect=OSError(errno.EACCES, os.strerror(errno.EACCES))) - with mock.patch('swift.obj.diskfile.hash_suffix', mocked_hash_suffix): - hashed, hashes = diskfile.get_hashes(part) - self.assertEqual(hashed, 0) - self.assertEqual(hashes, {'a83': None}) - - def test_get_hashes_unmodified_and_zero_bytes(self): - df = self._create_diskfile() - mkdirs(df._datadir) - part = os.path.join(self.objects, '0') - open(os.path.join(part, diskfile.HASH_FILE), 'w') - # Now the hash file is zero bytes. - i = [0] - - def _getmtime(filename): - i[0] += 1 - return 1 - with unit_mock({'swift.obj.diskfile.getmtime': _getmtime}): - hashed, hashes = diskfile.get_hashes( - part, recalculate=[]) - # getmtime will actually not get called. Initially, the pickle.load - # will raise an exception first and later, force_rewrite will - # short-circuit the if clause to determine whether to write out a - # fresh hashes_file. - self.assertEquals(i[0], 0) - self.assertTrue('a83' in hashes) - - def test_get_hashes_modified(self): - df = self._create_diskfile() - mkdirs(df._datadir) - with open( - os.path.join(df._datadir, - Timestamp(time()).internal + '.ts'), - 'wb') as f: - f.write('1234567890') - part = os.path.join(self.objects, '0') - hashed, hashes = diskfile.get_hashes(part) - i = [0] - - def _getmtime(filename): - if i[0] < 3: - i[0] += 1 - return i[0] - with unit_mock({'swift.obj.diskfile.getmtime': _getmtime}): - hashed, hashes = diskfile.get_hashes( - part, recalculate=['a83']) - self.assertEquals(i[0], 3) - - def check_hash_cleanup_listdir(self, input_files, output_files): - orig_unlink = os.unlink - file_list = list(input_files) - - def mock_listdir(path): - return list(file_list) - - def mock_unlink(path): - # timestamp 1 is a special tag to pretend a file disappeared while - # working. - if '/0000000001.00000.' in path: - # Using actual os.unlink to reproduce exactly what OSError it - # raises. - orig_unlink(uuid.uuid4().hex) - file_list.remove(os.path.basename(path)) - - with unit_mock({'os.listdir': mock_listdir, 'os.unlink': mock_unlink}): - self.assertEquals(diskfile.hash_cleanup_listdir('/whatever'), - output_files) - - def test_hash_cleanup_listdir_purge_data_newer_ts(self): - # purge .data if there's a newer .ts - file1 = Timestamp(time()).internal + '.data' - file2 = Timestamp(time() + 1).internal + '.ts' - file_list = [file1, file2] - self.check_hash_cleanup_listdir(file_list, [file2]) - - def test_hash_cleanup_listdir_purge_ts_newer_data(self): - # purge .ts if there's a newer .data - file1 = Timestamp(time()).internal + '.ts' - file2 = Timestamp(time() + 1).internal + '.data' - file_list = [file1, file2] - self.check_hash_cleanup_listdir(file_list, [file2]) - - def test_hash_cleanup_listdir_keep_meta_data_purge_ts(self): - # keep .meta and .data if meta newer than data and purge .ts - file1 = Timestamp(time()).internal + '.ts' - file2 = Timestamp(time() + 1).internal + '.data' - file3 = Timestamp(time() + 2).internal + '.meta' - file_list = [file1, file2, file3] - self.check_hash_cleanup_listdir(file_list, [file3, file2]) - - def test_hash_cleanup_listdir_keep_one_ts(self): - # keep only latest of multiple .ts files - file1 = Timestamp(time()).internal + '.ts' - file2 = Timestamp(time() + 1).internal + '.ts' - file3 = Timestamp(time() + 2).internal + '.ts' - file_list = [file1, file2, file3] - self.check_hash_cleanup_listdir(file_list, [file3]) - - def test_hash_cleanup_listdir_keep_one_data(self): - # keep only latest of multiple .data files - file1 = Timestamp(time()).internal + '.data' - file2 = Timestamp(time() + 1).internal + '.data' - file3 = Timestamp(time() + 2).internal + '.data' - file_list = [file1, file2, file3] - self.check_hash_cleanup_listdir(file_list, [file3]) - - def test_hash_cleanup_listdir_keep_one_meta(self): - # keep only latest of multiple .meta files - file1 = Timestamp(time()).internal + '.data' - file2 = Timestamp(time() + 1).internal + '.meta' - file3 = Timestamp(time() + 2).internal + '.meta' - file_list = [file1, file2, file3] - self.check_hash_cleanup_listdir(file_list, [file3, file1]) - - def test_hash_cleanup_listdir_ignore_orphaned_ts(self): - # A more recent orphaned .meta file will prevent old .ts files - # from being cleaned up otherwise - file1 = Timestamp(time()).internal + '.ts' - file2 = Timestamp(time() + 1).internal + '.ts' - file3 = Timestamp(time() + 2).internal + '.meta' - file_list = [file1, file2, file3] - self.check_hash_cleanup_listdir(file_list, [file3, file2]) - - def test_hash_cleanup_listdir_purge_old_data_only(self): - # Oldest .data will be purge, .meta and .ts won't be touched - file1 = Timestamp(time()).internal + '.data' - file2 = Timestamp(time() + 1).internal + '.ts' - file3 = Timestamp(time() + 2).internal + '.meta' - file_list = [file1, file2, file3] - self.check_hash_cleanup_listdir(file_list, [file3, file2]) - - def test_hash_cleanup_listdir_purge_old_ts(self): - # A single old .ts file will be removed - file1 = Timestamp(time() - (diskfile.ONE_WEEK + 1)).internal + '.ts' - file_list = [file1] - self.check_hash_cleanup_listdir(file_list, []) - - def test_hash_cleanup_listdir_meta_keeps_old_ts(self): - # An orphaned .meta will not clean up a very old .ts - file1 = Timestamp(time() - (diskfile.ONE_WEEK + 1)).internal + '.ts' - file2 = Timestamp(time() + 2).internal + '.meta' - file_list = [file1, file2] - self.check_hash_cleanup_listdir(file_list, [file2, file1]) - - def test_hash_cleanup_listdir_keep_single_old_data(self): - # A single old .data file will not be removed - file1 = Timestamp(time() - (diskfile.ONE_WEEK + 1)).internal + '.data' - file_list = [file1] - self.check_hash_cleanup_listdir(file_list, [file1]) - - def test_hash_cleanup_listdir_keep_single_old_meta(self): - # A single old .meta file will not be removed - file1 = Timestamp(time() - (diskfile.ONE_WEEK + 1)).internal + '.meta' - file_list = [file1] - self.check_hash_cleanup_listdir(file_list, [file1]) - - def test_hash_cleanup_listdir_disappeared_path(self): - # Next line listing a non-existent dir used to propagate the OSError; - # now should mute that. - self.assertEqual(diskfile.hash_cleanup_listdir(uuid.uuid4().hex), []) - - def test_hash_cleanup_listdir_disappeared_before_unlink_1(self): - # Timestamp 1 makes other test routines pretend the file disappeared - # while working. - file1 = '0000000001.00000.ts' - file_list = [file1] - self.check_hash_cleanup_listdir(file_list, []) - - def test_hash_cleanup_listdir_disappeared_before_unlink_2(self): - # Timestamp 1 makes other test routines pretend the file disappeared - # while working. - file1 = '0000000001.00000.data' - file2 = '0000000002.00000.ts' - file_list = [file1, file2] - self.check_hash_cleanup_listdir(file_list, [file2]) - @patch_policies class TestObjectAuditLocationGenerator(unittest.TestCase): @@ -678,7 +266,8 @@ class TestObjectAuditLocationGenerator(unittest.TestCase): pass def test_audit_location_class(self): - al = diskfile.AuditLocation('abc', '123', '_-_') + al = diskfile.AuditLocation('abc', '123', '_-_', + policy=POLICIES.legacy) self.assertEqual(str(al), 'abc') def test_finding_of_hashdirs(self): @@ -706,6 +295,7 @@ class TestObjectAuditLocationGenerator(unittest.TestCase): "6c3", "fcd938702024c25fef6c32fef05298eb")) os.makedirs(os.path.join(tmpdir, "sdq", "objects-fud", "foo")) + os.makedirs(os.path.join(tmpdir, "sdq", "objects-+1", "foo")) self._make_file(os.path.join(tmpdir, "sdp", "objects", "1519", "fed")) @@ -724,7 +314,7 @@ class TestObjectAuditLocationGenerator(unittest.TestCase): "4f9eee668b66c6f0250bfa3c7ab9e51e")) logger = debug_logger() - locations = [(loc.path, loc.device, loc.partition) + locations = [(loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( devices=tmpdir, mount_check=False, logger=logger)] @@ -733,44 +323,42 @@ class TestObjectAuditLocationGenerator(unittest.TestCase): # expect some warnings about those bad dirs warnings = logger.get_lines_for_level('warning') self.assertEqual(set(warnings), set([ - 'Directory objects- does not map to a valid policy', - 'Directory objects-2 does not map to a valid policy', - 'Directory objects-99 does not map to a valid policy', - 'Directory objects-fud does not map to a valid policy'])) + ("Directory 'objects-' does not map to a valid policy " + "(Unknown policy, for index '')"), + ("Directory 'objects-2' does not map to a valid policy " + "(Unknown policy, for index '2')"), + ("Directory 'objects-99' does not map to a valid policy " + "(Unknown policy, for index '99')"), + ("Directory 'objects-fud' does not map to a valid policy " + "(Unknown policy, for index 'fud')"), + ("Directory 'objects-+1' does not map to a valid policy " + "(Unknown policy, for index '+1')"), + ])) expected = \ [(os.path.join(tmpdir, "sdp", "objects-1", "9970", "ca5", "4a943bc72c2e647c4675923d58cf4ca5"), - "sdp", "9970"), + "sdp", "9970", POLICIES[1]), (os.path.join(tmpdir, "sdp", "objects", "1519", "aca", "5c1fdc1ffb12e5eaf84edc30d8b67aca"), - "sdp", "1519"), + "sdp", "1519", POLICIES[0]), (os.path.join(tmpdir, "sdp", "objects", "1519", "aca", "fdfd184d39080020bc8b487f8a7beaca"), - "sdp", "1519"), + "sdp", "1519", POLICIES[0]), (os.path.join(tmpdir, "sdp", "objects", "1519", "df2", "b0fe7af831cc7b1af5bf486b1c841df2"), - "sdp", "1519"), + "sdp", "1519", POLICIES[0]), (os.path.join(tmpdir, "sdp", "objects", "9720", "ca5", "4a943bc72c2e647c4675923d58cf4ca5"), - "sdp", "9720"), - (os.path.join(tmpdir, "sdq", "objects-", "1135", "6c3", - "fcd938702024c25fef6c32fef05298eb"), - "sdq", "1135"), - (os.path.join(tmpdir, "sdq", "objects-2", "9971", "8eb", - "fcd938702024c25fef6c32fef05298eb"), - "sdq", "9971"), - (os.path.join(tmpdir, "sdq", "objects-99", "9972", "8eb", - "fcd938702024c25fef6c32fef05298eb"), - "sdq", "9972"), + "sdp", "9720", POLICIES[0]), (os.path.join(tmpdir, "sdq", "objects", "3071", "8eb", "fcd938702024c25fef6c32fef05298eb"), - "sdq", "3071"), + "sdq", "3071", POLICIES[0]), ] self.assertEqual(locations, expected) # now without a logger - locations = [(loc.path, loc.device, loc.partition) + locations = [(loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( devices=tmpdir, mount_check=False)] locations.sort() @@ -790,7 +378,7 @@ class TestObjectAuditLocationGenerator(unittest.TestCase): "4993d582f41be9771505a8d4cb237a10")) locations = [ - (loc.path, loc.device, loc.partition) + (loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( devices=tmpdir, mount_check=True)] locations.sort() @@ -800,12 +388,12 @@ class TestObjectAuditLocationGenerator(unittest.TestCase): [(os.path.join(tmpdir, "sdp", "objects", "2607", "df3", "ec2871fe724411f91787462f97d30df3"), - "sdp", "2607")]) + "sdp", "2607", POLICIES[0])]) # Do it again, this time with a logger. ml = mock.MagicMock() locations = [ - (loc.path, loc.device, loc.partition) + (loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( devices=tmpdir, mount_check=True, logger=ml)] ml.debug.assert_called_once_with( @@ -818,7 +406,7 @@ class TestObjectAuditLocationGenerator(unittest.TestCase): # only normal FS corruption should be skipped over silently. def list_locations(dirname): - return [(loc.path, loc.device, loc.partition) + return [(loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( devices=dirname, mount_check=False)] @@ -844,7 +432,45 @@ class TestObjectAuditLocationGenerator(unittest.TestCase): self.assertRaises(OSError, list_locations, tmpdir) -class TestDiskFileManager(unittest.TestCase): +class TestDiskFileRouter(unittest.TestCase): + + def test_register(self): + with mock.patch.dict( + diskfile.DiskFileRouter.policy_type_to_manager_cls, {}): + @diskfile.DiskFileRouter.register('test-policy') + class TestDiskFileManager(diskfile.DiskFileManager): + pass + + @BaseStoragePolicy.register('test-policy') + class TestStoragePolicy(BaseStoragePolicy): + pass + + with patch_policies([TestStoragePolicy(0, 'test')]): + router = diskfile.DiskFileRouter({}, debug_logger('test')) + manager = router[POLICIES.default] + self.assertTrue(isinstance(manager, TestDiskFileManager)) + + +class BaseDiskFileTestMixin(object): + """ + Bag of helpers that are useful in the per-policy DiskFile test classes. + """ + + def _manager_mock(self, manager_attribute_name, df=None): + mgr_cls = df._manager.__class__ if df else self.mgr_cls + return '.'.join([ + mgr_cls.__module__, mgr_cls.__name__, manager_attribute_name]) + + +class DiskFileManagerMixin(BaseDiskFileTestMixin): + """ + Abstract test method mixin for concrete test cases - this class + won't get picked up by test runners because it doesn't subclass + unittest.TestCase and doesn't have [Tt]est in the name. + """ + + # set mgr_cls on subclasses + mgr_cls = None def setUp(self): self.tmpdir = mkdtemp() @@ -852,17 +478,111 @@ class TestDiskFileManager(unittest.TestCase): self.tmpdir, 'tmp_test_obj_server_DiskFile') self.existing_device1 = 'sda1' self.existing_device2 = 'sda2' - mkdirs(os.path.join(self.testdir, self.existing_device1, 'tmp')) - mkdirs(os.path.join(self.testdir, self.existing_device2, 'tmp')) + for policy in POLICIES: + mkdirs(os.path.join(self.testdir, self.existing_device1, + diskfile.get_tmp_dir(policy))) + mkdirs(os.path.join(self.testdir, self.existing_device2, + diskfile.get_tmp_dir(policy))) self._orig_tpool_exc = tpool.execute tpool.execute = lambda f, *args, **kwargs: f(*args, **kwargs) self.conf = dict(devices=self.testdir, mount_check='false', keep_cache_size=2 * 1024) - self.df_mgr = diskfile.DiskFileManager(self.conf, FakeLogger()) + self.logger = debug_logger('test-' + self.__class__.__name__) + self.df_mgr = self.mgr_cls(self.conf, self.logger) + self.df_router = diskfile.DiskFileRouter(self.conf, self.logger) def tearDown(self): rmtree(self.tmpdir, ignore_errors=1) + def _get_diskfile(self, policy, frag_index=None): + df_mgr = self.df_router[policy] + return df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', + policy=policy, frag_index=frag_index) + + def _test_get_ondisk_files(self, scenarios, policy, + frag_index=None): + class_under_test = self._get_diskfile(policy, frag_index=frag_index) + with mock.patch('swift.obj.diskfile.os.listdir', + lambda _: []): + self.assertEqual((None, None, None), + class_under_test._get_ondisk_file()) + + returned_ext_order = ('.data', '.meta', '.ts') + for test in scenarios: + chosen = dict((f[1], os.path.join(class_under_test._datadir, f[0])) + for f in test if f[1]) + expected = tuple(chosen.get(ext) for ext in returned_ext_order) + files = list(zip(*test)[0]) + for _order in ('ordered', 'shuffled', 'shuffled'): + class_under_test = self._get_diskfile(policy, frag_index) + try: + with mock.patch('swift.obj.diskfile.os.listdir', + lambda _: files): + actual = class_under_test._get_ondisk_file() + self.assertEqual(expected, actual, + 'Expected %s from %s but got %s' + % (expected, files, actual)) + except AssertionError as e: + self.fail('%s with files %s' % (str(e), files)) + shuffle(files) + + def _test_hash_cleanup_listdir_files(self, scenarios, policy, + reclaim_age=None): + # check that expected files are left in hashdir after cleanup + for test in scenarios: + class_under_test = self.df_router[policy] + files = list(zip(*test)[0]) + hashdir = os.path.join(self.testdir, str(uuid.uuid4())) + os.mkdir(hashdir) + for fname in files: + open(os.path.join(hashdir, fname), 'w') + expected_after_cleanup = set([f[0] for f in test + if (f[2] if len(f) > 2 else f[1])]) + if reclaim_age: + class_under_test.hash_cleanup_listdir( + hashdir, reclaim_age=reclaim_age) + else: + with mock.patch('swift.obj.diskfile.time') as mock_time: + # don't reclaim anything + mock_time.time.return_value = 0.0 + class_under_test.hash_cleanup_listdir(hashdir) + after_cleanup = set(os.listdir(hashdir)) + errmsg = "expected %r, got %r for test %r" % ( + sorted(expected_after_cleanup), sorted(after_cleanup), test + ) + self.assertEqual(expected_after_cleanup, after_cleanup, errmsg) + + def _test_yield_hashes_cleanup(self, scenarios, policy): + # opportunistic test to check that yield_hashes cleans up dir using + # same scenarios as passed to _test_hash_cleanup_listdir_files + for test in scenarios: + class_under_test = self.df_router[policy] + files = list(zip(*test)[0]) + dev_path = os.path.join(self.testdir, str(uuid.uuid4())) + hashdir = os.path.join( + dev_path, diskfile.get_data_dir(policy), + '0', 'abc', '9373a92d072897b136b3fc06595b4abc') + os.makedirs(hashdir) + for fname in files: + open(os.path.join(hashdir, fname), 'w') + expected_after_cleanup = set([f[0] for f in test + if f[1] or len(f) > 2 and f[2]]) + with mock.patch('swift.obj.diskfile.time') as mock_time: + # don't reclaim anything + mock_time.time.return_value = 0.0 + mock_func = 'swift.obj.diskfile.DiskFileManager.get_dev_path' + with mock.patch(mock_func) as mock_path: + mock_path.return_value = dev_path + for _ in class_under_test.yield_hashes( + 'ignored', '0', policy, suffixes=['abc']): + # return values are tested in test_yield_hashes_* + pass + after_cleanup = set(os.listdir(hashdir)) + errmsg = "expected %r, got %r for test %r" % ( + sorted(expected_after_cleanup), sorted(after_cleanup), test + ) + self.assertEqual(expected_after_cleanup, after_cleanup, errmsg) + def test_construct_dev_path(self): res_path = self.df_mgr.construct_dev_path('abc') self.assertEqual(os.path.join(self.df_mgr.devices, 'abc'), res_path) @@ -873,12 +593,13 @@ class TestDiskFileManager(unittest.TestCase): with mock.patch('swift.obj.diskfile.write_pickle') as wp: self.df_mgr.pickle_async_update(self.existing_device1, 'a', 'c', 'o', - dict(a=1, b=2), ts, 0) + dict(a=1, b=2), ts, POLICIES[0]) dp = self.df_mgr.construct_dev_path(self.existing_device1) ohash = diskfile.hash_path('a', 'c', 'o') wp.assert_called_with({'a': 1, 'b': 2}, - os.path.join(dp, diskfile.get_async_dir(0), - ohash[-3:], ohash + '-' + ts), + os.path.join( + dp, diskfile.get_async_dir(POLICIES[0]), + ohash[-3:], ohash + '-' + ts), os.path.join(dp, 'tmp')) self.df_mgr.logger.increment.assert_called_with('async_pendings') @@ -886,32 +607,16 @@ class TestDiskFileManager(unittest.TestCase): locations = list(self.df_mgr.object_audit_location_generator()) self.assertEqual(locations, []) - def test_get_hashes_bad_dev(self): - self.df_mgr.mount_check = True - with mock.patch('swift.obj.diskfile.check_mount', - mock.MagicMock(side_effect=[False])): - self.assertRaises(DiskFileDeviceUnavailable, - self.df_mgr.get_hashes, 'sdb1', '0', '123', - 'objects') - - def test_get_hashes_w_nothing(self): - hashes = self.df_mgr.get_hashes(self.existing_device1, '0', '123', '0') - self.assertEqual(hashes, {}) - # get_hashes creates the partition path, so call again for code - # path coverage, ensuring the result is unchanged - hashes = self.df_mgr.get_hashes(self.existing_device1, '0', '123', '0') - self.assertEqual(hashes, {}) - def test_replication_lock_on(self): # Double check settings self.df_mgr.replication_one_per_device = True self.df_mgr.replication_lock_timeout = 0.1 dev_path = os.path.join(self.testdir, self.existing_device1) - with self.df_mgr.replication_lock(dev_path): + with self.df_mgr.replication_lock(self.existing_device1): lock_exc = None exc = None try: - with self.df_mgr.replication_lock(dev_path): + with self.df_mgr.replication_lock(self.existing_device1): raise Exception( '%r was not replication locked!' % dev_path) except ReplicationLockTimeout as err: @@ -944,12 +649,10 @@ class TestDiskFileManager(unittest.TestCase): # Double check settings self.df_mgr.replication_one_per_device = True self.df_mgr.replication_lock_timeout = 0.1 - dev_path = os.path.join(self.testdir, self.existing_device1) - dev_path2 = os.path.join(self.testdir, self.existing_device2) - with self.df_mgr.replication_lock(dev_path): + with self.df_mgr.replication_lock(self.existing_device1): lock_exc = None try: - with self.df_mgr.replication_lock(dev_path2): + with self.df_mgr.replication_lock(self.existing_device2): pass except ReplicationLockTimeout as err: lock_exc = err @@ -966,10 +669,1094 @@ class TestDiskFileManager(unittest.TestCase): self.assertTrue('splice()' in warnings[-1]) self.assertFalse(mgr.use_splice) + def test_get_diskfile_from_hash_dev_path_fail(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value=None) + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata')) as \ + (dfclass, hclistdir, readmeta): + hclistdir.return_value = ['1381679759.90941.data'] + readmeta.return_value = {'name': '/a/c/o'} + self.assertRaises( + DiskFileDeviceUnavailable, + self.df_mgr.get_diskfile_from_hash, + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0]) + + def test_get_diskfile_from_hash_not_dir(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata'), + mock.patch(self._manager_mock('quarantine_renamer'))) as \ + (dfclass, hclistdir, readmeta, quarantine_renamer): + osexc = OSError() + osexc.errno = errno.ENOTDIR + hclistdir.side_effect = osexc + readmeta.return_value = {'name': '/a/c/o'} + self.assertRaises( + DiskFileNotExist, + self.df_mgr.get_diskfile_from_hash, + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0]) + quarantine_renamer.assert_called_once_with( + '/srv/dev/', + '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900') + + def test_get_diskfile_from_hash_no_dir(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata')) as \ + (dfclass, hclistdir, readmeta): + osexc = OSError() + osexc.errno = errno.ENOENT + hclistdir.side_effect = osexc + readmeta.return_value = {'name': '/a/c/o'} + self.assertRaises( + DiskFileNotExist, + self.df_mgr.get_diskfile_from_hash, + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0]) + + def test_get_diskfile_from_hash_other_oserror(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata')) as \ + (dfclass, hclistdir, readmeta): + osexc = OSError() + hclistdir.side_effect = osexc + readmeta.return_value = {'name': '/a/c/o'} + self.assertRaises( + OSError, + self.df_mgr.get_diskfile_from_hash, + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0]) + + def test_get_diskfile_from_hash_no_actual_files(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata')) as \ + (dfclass, hclistdir, readmeta): + hclistdir.return_value = [] + readmeta.return_value = {'name': '/a/c/o'} + self.assertRaises( + DiskFileNotExist, + self.df_mgr.get_diskfile_from_hash, + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0]) + + def test_get_diskfile_from_hash_read_metadata_problem(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata')) as \ + (dfclass, hclistdir, readmeta): + hclistdir.return_value = ['1381679759.90941.data'] + readmeta.side_effect = EOFError() + self.assertRaises( + DiskFileNotExist, + self.df_mgr.get_diskfile_from_hash, + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0]) + + def test_get_diskfile_from_hash_no_meta_name(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata')) as \ + (dfclass, hclistdir, readmeta): + hclistdir.return_value = ['1381679759.90941.data'] + readmeta.return_value = {} + try: + self.df_mgr.get_diskfile_from_hash( + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', + POLICIES[0]) + except DiskFileNotExist as err: + exc = err + self.assertEqual(str(exc), '') + + def test_get_diskfile_from_hash_bad_meta_name(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata')) as \ + (dfclass, hclistdir, readmeta): + hclistdir.return_value = ['1381679759.90941.data'] + readmeta.return_value = {'name': 'bad'} + try: + self.df_mgr.get_diskfile_from_hash( + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', + POLICIES[0]) + except DiskFileNotExist as err: + exc = err + self.assertEqual(str(exc), '') + + def test_get_diskfile_from_hash(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') + with nested( + mock.patch(self._manager_mock('diskfile_cls')), + mock.patch(self._manager_mock('hash_cleanup_listdir')), + mock.patch('swift.obj.diskfile.read_metadata')) as \ + (dfclass, hclistdir, readmeta): + hclistdir.return_value = ['1381679759.90941.data'] + readmeta.return_value = {'name': '/a/c/o'} + self.df_mgr.get_diskfile_from_hash( + 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0]) + dfclass.assert_called_once_with( + self.df_mgr, '/srv/dev/', self.df_mgr.threadpools['dev'], '9', + 'a', 'c', 'o', policy=POLICIES[0]) + hclistdir.assert_called_once_with( + '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900', + 604800) + readmeta.assert_called_once_with( + '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900/' + '1381679759.90941.data') + + def test_listdir_enoent(self): + oserror = OSError() + oserror.errno = errno.ENOENT + self.df_mgr.logger.error = mock.MagicMock() + with mock.patch('os.listdir', side_effect=oserror): + self.assertEqual(self.df_mgr._listdir('path'), []) + self.assertEqual(self.df_mgr.logger.error.mock_calls, []) + + def test_listdir_other_oserror(self): + oserror = OSError() + self.df_mgr.logger.error = mock.MagicMock() + with mock.patch('os.listdir', side_effect=oserror): + self.assertEqual(self.df_mgr._listdir('path'), []) + self.df_mgr.logger.error.assert_called_once_with( + 'ERROR: Skipping %r due to error with listdir attempt: %s', + 'path', oserror) + + def test_listdir(self): + self.df_mgr.logger.error = mock.MagicMock() + with mock.patch('os.listdir', return_value=['abc', 'def']): + self.assertEqual(self.df_mgr._listdir('path'), ['abc', 'def']) + self.assertEqual(self.df_mgr.logger.error.mock_calls, []) + + def test_yield_suffixes_dev_path_fail(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value=None) + exc = None + try: + list(self.df_mgr.yield_suffixes(self.existing_device1, '9', 0)) + except DiskFileDeviceUnavailable as err: + exc = err + self.assertEqual(str(exc), '') + + def test_yield_suffixes(self): + self.df_mgr._listdir = mock.MagicMock(return_value=[ + 'abc', 'def', 'ghi', 'abcd', '012']) + dev = self.existing_device1 + self.assertEqual( + list(self.df_mgr.yield_suffixes(dev, '9', POLICIES[0])), + [(self.testdir + '/' + dev + '/objects/9/abc', 'abc'), + (self.testdir + '/' + dev + '/objects/9/def', 'def'), + (self.testdir + '/' + dev + '/objects/9/012', '012')]) + + def test_yield_hashes_dev_path_fail(self): + self.df_mgr.get_dev_path = mock.MagicMock(return_value=None) + exc = None + try: + list(self.df_mgr.yield_hashes(self.existing_device1, '9', + POLICIES[0])) + except DiskFileDeviceUnavailable as err: + exc = err + self.assertEqual(str(exc), '') + + def test_yield_hashes_empty(self): + def _listdir(path): + return [] + + with mock.patch('os.listdir', _listdir): + self.assertEqual(list(self.df_mgr.yield_hashes( + self.existing_device1, '9', POLICIES[0])), []) + + def test_yield_hashes_empty_suffixes(self): + def _listdir(path): + return [] + + with mock.patch('os.listdir', _listdir): + self.assertEqual( + list(self.df_mgr.yield_hashes(self.existing_device1, '9', + POLICIES[0], + suffixes=['456'])), []) + + def _check_yield_hashes(self, policy, suffix_map, expected, **kwargs): + device = self.existing_device1 + part = '9' + part_path = os.path.join( + self.testdir, device, diskfile.get_data_dir(policy), part) + + def _listdir(path): + if path == part_path: + return suffix_map.keys() + for suff, hash_map in suffix_map.items(): + if path == os.path.join(part_path, suff): + return hash_map.keys() + for hash_, files in hash_map.items(): + if path == os.path.join(part_path, suff, hash_): + return files + self.fail('Unexpected listdir of %r' % path) + expected_items = [ + (os.path.join(part_path, hash_[-3:], hash_), hash_, + Timestamp(ts).internal) + for hash_, ts in expected.items()] + with nested( + mock.patch('os.listdir', _listdir), + mock.patch('os.unlink')): + df_mgr = self.df_router[policy] + hash_items = list(df_mgr.yield_hashes( + device, part, policy, **kwargs)) + expected = sorted(expected_items) + actual = sorted(hash_items) + self.assertEqual(actual, expected, + 'Expected %s but got %s' % (expected, actual)) + + def test_yield_hashes_tombstones(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + ts3 = next(ts_iter) + suffix_map = { + '27e': { + '1111111111111111111111111111127e': [ + ts1.internal + '.ts'], + '2222222222222222222222222222227e': [ + ts2.internal + '.ts'], + }, + 'd41': { + 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaad41': [] + }, + 'd98': {}, + '00b': { + '3333333333333333333333333333300b': [ + ts1.internal + '.ts', + ts2.internal + '.ts', + ts3.internal + '.ts', + ] + }, + '204': { + 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbb204': [ + ts3.internal + '.ts', + ] + } + } + expected = { + '1111111111111111111111111111127e': ts1.internal, + '2222222222222222222222222222227e': ts2.internal, + '3333333333333333333333333333300b': ts3.internal, + } + for policy in POLICIES: + self._check_yield_hashes(policy, suffix_map, expected, + suffixes=['27e', '00b']) + @patch_policies -class TestDiskFile(unittest.TestCase): - """Test swift.obj.diskfile.DiskFile""" +class TestDiskFileManager(DiskFileManagerMixin, unittest.TestCase): + + mgr_cls = diskfile.DiskFileManager + + def test_get_ondisk_files_with_repl_policy(self): + # Each scenario specifies a list of (filename, extension) tuples. If + # extension is set then that filename should be returned by the method + # under test for that extension type. + scenarios = [[('0000000007.00000.data', '.data')], + + [('0000000007.00000.ts', '.ts')], + + # older tombstone is ignored + [('0000000007.00000.ts', '.ts'), + ('0000000006.00000.ts', False)], + + # older data is ignored + [('0000000007.00000.data', '.data'), + ('0000000006.00000.data', False), + ('0000000004.00000.ts', False)], + + # newest meta trumps older meta + [('0000000009.00000.meta', '.meta'), + ('0000000008.00000.meta', False), + ('0000000007.00000.data', '.data'), + ('0000000004.00000.ts', False)], + + # meta older than data is ignored + [('0000000007.00000.data', '.data'), + ('0000000006.00000.meta', False), + ('0000000004.00000.ts', False)], + + # meta without data is ignored + [('0000000007.00000.meta', False, True), + ('0000000006.00000.ts', '.ts'), + ('0000000004.00000.data', False)], + + # tombstone trumps meta and data at same timestamp + [('0000000006.00000.meta', False), + ('0000000006.00000.ts', '.ts'), + ('0000000006.00000.data', False)], + ] + + self._test_get_ondisk_files(scenarios, POLICIES[0], None) + self._test_hash_cleanup_listdir_files(scenarios, POLICIES[0]) + self._test_yield_hashes_cleanup(scenarios, POLICIES[0]) + + def test_get_ondisk_files_with_stray_meta(self): + # get_ondisk_files does not tolerate a stray .meta file + + class_under_test = self._get_diskfile(POLICIES[0]) + files = ['0000000007.00000.meta'] + + self.assertRaises(AssertionError, + class_under_test.manager.get_ondisk_files, files, + self.testdir) + + def test_yield_hashes(self): + old_ts = '1383180000.12345' + fresh_ts = Timestamp(time() - 10).internal + fresher_ts = Timestamp(time() - 1).internal + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + fresh_ts + '.ts'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + old_ts + '.data'], + '9373a92d072897b136b3fc06595b7456': [ + fresh_ts + '.ts', + fresher_ts + '.data'], + }, + 'def': {}, + } + expected = { + '9373a92d072897b136b3fc06595b4abc': fresh_ts, + '9373a92d072897b136b3fc06595b0456': old_ts, + '9373a92d072897b136b3fc06595b7456': fresher_ts, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + def test_yield_hashes_yields_meta_timestamp(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + ts3 = next(ts_iter) + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + ts1.internal + '.ts', + ts2.internal + '.meta'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '.data', + ts2.internal + '.meta', + ts3.internal + '.meta'], + '9373a92d072897b136b3fc06595b7456': [ + ts1.internal + '.data', + ts2.internal + '.meta'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b4abc': ts2, + '9373a92d072897b136b3fc06595b0456': ts3, + '9373a92d072897b136b3fc06595b7456': ts2, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + def test_yield_hashes_suffix_filter(self): + # test again with limited suffixes + old_ts = '1383180000.12345' + fresh_ts = Timestamp(time() - 10).internal + fresher_ts = Timestamp(time() - 1).internal + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + fresh_ts + '.ts'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + old_ts + '.data'], + '9373a92d072897b136b3fc06595b7456': [ + fresh_ts + '.ts', + fresher_ts + '.data'], + }, + 'def': {}, + } + expected = { + '9373a92d072897b136b3fc06595b0456': old_ts, + '9373a92d072897b136b3fc06595b7456': fresher_ts, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + suffixes=['456']) + + def test_yield_hashes_fails_with_bad_ondisk_filesets(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + suffix_map = { + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '.data'], + '9373a92d072897b136b3fc06595ba456': [ + ts1.internal + '.meta'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b0456': ts1, + } + try: + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + self.fail('Expected AssertionError') + except AssertionError: + pass + + +@patch_policies(with_ec_default=True) +class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): + + mgr_cls = diskfile.ECDiskFileManager + + def test_get_ondisk_files_with_ec_policy(self): + # Each scenario specifies a list of (filename, extension, [survives]) + # tuples. If extension is set then that filename should be returned by + # the method under test for that extension type. If the optional + # 'survives' is True, the filename should still be in the dir after + # cleanup. + scenarios = [[('0000000007.00000.ts', '.ts')], + + [('0000000007.00000.ts', '.ts'), + ('0000000006.00000.ts', False)], + + # highest frag index is chosen by default + [('0000000007.00000.durable', '.durable'), + ('0000000007.00000#1.data', '.data'), + ('0000000007.00000#0.data', False, True)], + + # data with no durable is ignored + [('0000000007.00000#0.data', False, True)], + + # data newer than durable is ignored + [('0000000008.00000#1.data', False, True), + ('0000000007.00000.durable', '.durable'), + ('0000000007.00000#1.data', '.data'), + ('0000000007.00000#0.data', False, True)], + + # data newer than durable ignored, even if its only data + [('0000000008.00000#1.data', False, True), + ('0000000007.00000.durable', False, False)], + + # data older than durable is ignored + [('0000000007.00000.durable', '.durable'), + ('0000000007.00000#1.data', '.data'), + ('0000000006.00000#1.data', False), + ('0000000004.00000.ts', False)], + + # data older than durable ignored, even if its only data + [('0000000007.00000.durable', False, False), + ('0000000006.00000#1.data', False), + ('0000000004.00000.ts', False)], + + # newer meta trumps older meta + [('0000000009.00000.meta', '.meta'), + ('0000000008.00000.meta', False), + ('0000000007.00000.durable', '.durable'), + ('0000000007.00000#14.data', '.data'), + ('0000000004.00000.ts', False)], + + # older meta is ignored + [('0000000007.00000.durable', '.durable'), + ('0000000007.00000#14.data', '.data'), + ('0000000006.00000.meta', False), + ('0000000004.00000.ts', False)], + + # tombstone trumps meta, data, durable at older timestamp + [('0000000006.00000.ts', '.ts'), + ('0000000005.00000.meta', False), + ('0000000004.00000.durable', False), + ('0000000004.00000#0.data', False)], + + # tombstone trumps meta, data, durable at same timestamp + [('0000000006.00000.meta', False), + ('0000000006.00000.ts', '.ts'), + ('0000000006.00000.durable', False), + ('0000000006.00000#0.data', False)], + + # missing durable invalidates data + [('0000000006.00000.meta', False, True), + ('0000000006.00000#0.data', False, True)] + ] + + self._test_get_ondisk_files(scenarios, POLICIES.default, None) + self._test_hash_cleanup_listdir_files(scenarios, POLICIES.default) + self._test_yield_hashes_cleanup(scenarios, POLICIES.default) + + def test_get_ondisk_files_with_ec_policy_and_frag_index(self): + # Each scenario specifies a list of (filename, extension) tuples. If + # extension is set then that filename should be returned by the method + # under test for that extension type. + scenarios = [[('0000000007.00000#2.data', False, True), + ('0000000007.00000#1.data', '.data'), + ('0000000007.00000#0.data', False, True), + ('0000000007.00000.durable', '.durable')], + + # specific frag newer than durable is ignored + [('0000000007.00000#2.data', False, True), + ('0000000007.00000#1.data', False, True), + ('0000000007.00000#0.data', False, True), + ('0000000006.00000.durable', '.durable')], + + # specific frag older than durable is ignored + [('0000000007.00000#2.data', False), + ('0000000007.00000#1.data', False), + ('0000000007.00000#0.data', False), + ('0000000008.00000.durable', '.durable')], + + # specific frag older than newest durable is ignored + # even if is also has a durable + [('0000000007.00000#2.data', False), + ('0000000007.00000#1.data', False), + ('0000000007.00000.durable', False), + ('0000000008.00000#0.data', False), + ('0000000008.00000.durable', '.durable')], + + # meta included when frag index is specified + [('0000000009.00000.meta', '.meta'), + ('0000000007.00000#2.data', False, True), + ('0000000007.00000#1.data', '.data'), + ('0000000007.00000#0.data', False, True), + ('0000000007.00000.durable', '.durable')], + + # specific frag older than tombstone is ignored + [('0000000009.00000.ts', '.ts'), + ('0000000007.00000#2.data', False), + ('0000000007.00000#1.data', False), + ('0000000007.00000#0.data', False), + ('0000000007.00000.durable', False)], + + # no data file returned if specific frag index missing + [('0000000007.00000#2.data', False, True), + ('0000000007.00000#14.data', False, True), + ('0000000007.00000#0.data', False, True), + ('0000000007.00000.durable', '.durable')], + + # meta ignored if specific frag index missing + [('0000000008.00000.meta', False, True), + ('0000000007.00000#14.data', False, True), + ('0000000007.00000#0.data', False, True), + ('0000000007.00000.durable', '.durable')], + + # meta ignored if no data files + # Note: this is anomalous, because we are specifying a + # frag_index, get_ondisk_files will tolerate .meta with + # no .data + [('0000000088.00000.meta', False, True), + ('0000000077.00000.durable', '.durable')] + ] + + self._test_get_ondisk_files(scenarios, POLICIES.default, frag_index=1) + # note: not calling self._test_hash_cleanup_listdir_files(scenarios, 0) + # here due to the anomalous scenario as commented above + + def test_hash_cleanup_listdir_reclaim(self): + # Each scenario specifies a list of (filename, extension, [survives]) + # tuples. If extension is set or 'survives' is True, the filename + # should still be in the dir after cleanup. + much_older = Timestamp(time() - 2000).internal + older = Timestamp(time() - 1001).internal + newer = Timestamp(time() - 900).internal + scenarios = [[('%s.ts' % older, False, False)], + + # fresh tombstone is preserved + [('%s.ts' % newer, '.ts', True)], + + # isolated .durable is cleaned up immediately + [('%s.durable' % newer, False, False)], + + # ...even when other older files are in dir + [('%s.durable' % older, False, False), + ('%s.ts' % much_older, False, False)], + + # isolated .data files are cleaned up when stale + [('%s#2.data' % older, False, False), + ('%s#4.data' % older, False, False)], + + # ...even when there is an older durable fileset + [('%s#2.data' % older, False, False), + ('%s#4.data' % older, False, False), + ('%s#2.data' % much_older, '.data', True), + ('%s#4.data' % much_older, False, True), + ('%s.durable' % much_older, '.durable', True)], + + # ... but preserved if still fresh + [('%s#2.data' % newer, False, True), + ('%s#4.data' % newer, False, True)], + + # ... and we could have a mixture of fresh and stale .data + [('%s#2.data' % newer, False, True), + ('%s#4.data' % older, False, False)], + + # TODO these remaining scenarios exhibit different + # behavior than the legacy replication DiskFileManager + # behavior... + + # tombstone reclaimed despite newer non-durable data + [('%s#2.data' % newer, False, True), + ('%s#4.data' % older, False, False), + ('%s.ts' % much_older, '.ts', False)], + + # tombstone reclaimed despite newer non-durable data + [('%s.ts' % older, '.ts', False), + ('%s.durable' % much_older, False, False)], + + # tombstone reclaimed despite junk file + [('junk', False, True), + ('%s.ts' % much_older, '.ts', False)], + ] + + self._test_hash_cleanup_listdir_files(scenarios, POLICIES.default, + reclaim_age=1000) + + def test_get_ondisk_files_with_stray_meta(self): + # get_ondisk_files does not tolerate a stray .meta file + scenarios = [['0000000007.00000.meta'], + + ['0000000007.00000.meta', + '0000000006.00000.durable'], + + ['0000000007.00000.meta', + '0000000006.00000#1.data'], + + ['0000000007.00000.meta', + '0000000006.00000.durable', + '0000000005.00000#1.data'] + ] + for files in scenarios: + class_under_test = self._get_diskfile(POLICIES.default) + self.assertRaises(DiskFileNotExist, class_under_test.open) + + def test_parse_on_disk_filename(self): + mgr = self.df_router[POLICIES.default] + for ts in (Timestamp('1234567890.00001'), + Timestamp('1234567890.00001', offset=17)): + for frag in (0, 2, 14): + fname = '%s#%s.data' % (ts.internal, frag) + info = mgr.parse_on_disk_filename(fname) + self.assertEqual(ts, info['timestamp']) + self.assertEqual(frag, info['frag_index']) + self.assertEqual(mgr.make_on_disk_filename(**info), fname) + + for ext in ('.meta', '.durable', '.ts'): + fname = '%s%s' % (ts.internal, ext) + info = mgr.parse_on_disk_filename(fname) + self.assertEqual(ts, info['timestamp']) + self.assertEqual(None, info['frag_index']) + self.assertEqual(mgr.make_on_disk_filename(**info), fname) + + def test_parse_on_disk_filename_errors(self): + mgr = self.df_router[POLICIES.default] + for ts in (Timestamp('1234567890.00001'), + Timestamp('1234567890.00001', offset=17)): + fname = '%s.data' % ts.internal + try: + mgr.parse_on_disk_filename(fname) + msg = 'Expected DiskFileError for filename %s' % fname + self.fail(msg) + except DiskFileError: + pass + + expected = { + '': 'bad', + 'foo': 'bad', + '1.314': 'bad', + 1.314: 'bad', + -2: 'negative', + '-2': 'negative', + None: 'bad', + 'None': 'bad', + } + + for frag, msg in expected.items(): + fname = '%s#%s.data' % (ts.internal, frag) + try: + mgr.parse_on_disk_filename(fname) + except DiskFileError as e: + self.assertTrue(msg in str(e).lower()) + else: + msg = 'Expected DiskFileError for filename %s' % fname + self.fail(msg) + + def test_make_on_disk_filename(self): + mgr = self.df_router[POLICIES.default] + for ts in (Timestamp('1234567890.00001'), + Timestamp('1234567890.00001', offset=17)): + for frag in (0, '0', 2, '2', 14, '14'): + expected = '%s#%s.data' % (ts.internal, frag) + actual = mgr.make_on_disk_filename( + ts, '.data', frag_index=frag) + self.assertEqual(expected, actual) + parsed = mgr.parse_on_disk_filename(actual) + self.assertEqual(parsed, { + 'timestamp': ts, + 'frag_index': int(frag), + 'ext': '.data', + }) + # these functions are inverse + self.assertEqual( + mgr.make_on_disk_filename(**parsed), + expected) + + for ext in ('.meta', '.durable', '.ts'): + expected = '%s%s' % (ts.internal, ext) + # frag index should not be required + actual = mgr.make_on_disk_filename(ts, ext) + self.assertEqual(expected, actual) + # frag index should be ignored + actual = mgr.make_on_disk_filename( + ts, ext, frag_index=frag) + self.assertEqual(expected, actual) + parsed = mgr.parse_on_disk_filename(actual) + self.assertEqual(parsed, { + 'timestamp': ts, + 'frag_index': None, + 'ext': ext, + }) + # these functions are inverse + self.assertEqual( + mgr.make_on_disk_filename(**parsed), + expected) + + actual = mgr.make_on_disk_filename(ts) + self.assertEqual(ts, actual) + + def test_make_on_disk_filename_with_bad_frag_index(self): + mgr = self.df_router[POLICIES.default] + ts = Timestamp('1234567890.00001') + try: + # .data requires a frag_index kwarg + mgr.make_on_disk_filename(ts, '.data') + self.fail('Expected DiskFileError for missing frag_index') + except DiskFileError: + pass + + for frag in (None, 'foo', '1.314', 1.314, -2, '-2'): + try: + mgr.make_on_disk_filename(ts, '.data', frag_index=frag) + self.fail('Expected DiskFileError for frag_index %s' % frag) + except DiskFileError: + pass + for ext in ('.meta', '.durable', '.ts'): + expected = '%s%s' % (ts.internal, ext) + # bad frag index should be ignored + actual = mgr.make_on_disk_filename(ts, ext, frag_index=frag) + self.assertEqual(expected, actual) + + def test_is_obsolete(self): + mgr = self.df_router[POLICIES.default] + for ts in (Timestamp('1234567890.00001'), + Timestamp('1234567890.00001', offset=17)): + for ts2 in (Timestamp('1234567890.99999'), + Timestamp('1234567890.99999', offset=17), + ts): + f_2 = mgr.make_on_disk_filename(ts, '.durable') + for fi in (0, 2): + for ext in ('.data', '.meta', '.durable', '.ts'): + f_1 = mgr.make_on_disk_filename( + ts2, ext, frag_index=fi) + self.assertFalse(mgr.is_obsolete(f_1, f_2), + '%s should not be obsolete w.r.t. %s' + % (f_1, f_2)) + + for ts2 in (Timestamp('1234567890.00000'), + Timestamp('1234500000.00000', offset=0), + Timestamp('1234500000.00000', offset=17)): + f_2 = mgr.make_on_disk_filename(ts, '.durable') + for fi in (0, 2): + for ext in ('.data', '.meta', '.durable', '.ts'): + f_1 = mgr.make_on_disk_filename( + ts2, ext, frag_index=fi) + self.assertTrue(mgr.is_obsolete(f_1, f_2), + '%s should not be w.r.t. %s' + % (f_1, f_2)) + + def test_yield_hashes(self): + old_ts = '1383180000.12345' + fresh_ts = Timestamp(time() - 10).internal + fresher_ts = Timestamp(time() - 1).internal + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + fresh_ts + '.ts'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + old_ts + '#2.data', + old_ts + '.durable'], + '9373a92d072897b136b3fc06595b7456': [ + fresh_ts + '.ts', + fresher_ts + '#2.data', + fresher_ts + '.durable'], + }, + 'def': {}, + } + expected = { + '9373a92d072897b136b3fc06595b4abc': fresh_ts, + '9373a92d072897b136b3fc06595b0456': old_ts, + '9373a92d072897b136b3fc06595b7456': fresher_ts, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + def test_yield_hashes_yields_meta_timestamp(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + ts3 = next(ts_iter) + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + ts1.internal + '.ts', + ts2.internal + '.meta'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2.data', + ts1.internal + '.durable', + ts2.internal + '.meta', + ts3.internal + '.meta'], + '9373a92d072897b136b3fc06595b7456': [ + ts1.internal + '#2.data', + ts1.internal + '.durable', + ts2.internal + '.meta'], + }, + } + expected = { + # TODO: differs from repl DiskFileManager which *will* + # return meta timestamp when only meta and ts on disk + '9373a92d072897b136b3fc06595b4abc': ts1, + '9373a92d072897b136b3fc06595b0456': ts3, + '9373a92d072897b136b3fc06595b7456': ts2, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + # but meta timestamp is not returned if specified frag index + # is not found + expected = { + # TODO: differs from repl DiskFileManager which *will* + # return meta timestamp when only meta and ts on disk + '9373a92d072897b136b3fc06595b4abc': ts1, + '9373a92d072897b136b3fc06595b0456': ts3, + '9373a92d072897b136b3fc06595b7456': ts2, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=3) + + def test_yield_hashes_suffix_filter(self): + # test again with limited suffixes + old_ts = '1383180000.12345' + fresh_ts = Timestamp(time() - 10).internal + fresher_ts = Timestamp(time() - 1).internal + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + fresh_ts + '.ts'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + old_ts + '#2.data', + old_ts + '.durable'], + '9373a92d072897b136b3fc06595b7456': [ + fresh_ts + '.ts', + fresher_ts + '#2.data', + fresher_ts + '.durable'], + }, + 'def': {}, + } + expected = { + '9373a92d072897b136b3fc06595b0456': old_ts, + '9373a92d072897b136b3fc06595b7456': fresher_ts, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + suffixes=['456'], frag_index=2) + + def test_yield_hashes_skips_missing_durable(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + suffix_map = { + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2.data', + ts1.internal + '.durable'], + '9373a92d072897b136b3fc06595b7456': [ + ts1.internal + '#2.data'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b0456': ts1, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + # if we add a durable it shows up + suffix_map['456']['9373a92d072897b136b3fc06595b7456'].append( + ts1.internal + '.durable') + expected = { + '9373a92d072897b136b3fc06595b0456': ts1, + '9373a92d072897b136b3fc06595b7456': ts1, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + def test_yield_hashes_skips_data_without_durable(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + ts3 = next(ts_iter) + suffix_map = { + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2.data', + ts1.internal + '.durable', + ts2.internal + '#2.data', + ts3.internal + '#2.data'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b0456': ts1, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=None) + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + # if we add a durable then newer data shows up + suffix_map['456']['9373a92d072897b136b3fc06595b0456'].append( + ts2.internal + '.durable') + expected = { + '9373a92d072897b136b3fc06595b0456': ts2, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=None) + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + def test_yield_hashes_ignores_bad_ondisk_filesets(self): + # this differs from DiskFileManager.yield_hashes which will fail + # when encountering a bad on-disk file set + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + suffix_map = { + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2.data', + ts1.internal + '.durable'], + '9373a92d072897b136b3fc06595b7456': [ + ts1.internal + '.data'], + '9373a92d072897b136b3fc06595b8456': [ + 'junk_file'], + '9373a92d072897b136b3fc06595b9456': [ + ts1.internal + '.data', + ts2.internal + '.meta'], + '9373a92d072897b136b3fc06595ba456': [ + ts1.internal + '.meta'], + '9373a92d072897b136b3fc06595bb456': [ + ts1.internal + '.meta', + ts2.internal + '.meta'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b0456': ts1, + '9373a92d072897b136b3fc06595ba456': ts1, + '9373a92d072897b136b3fc06595bb456': ts2, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + def test_yield_hashes_filters_frag_index(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + ts3 = next(ts_iter) + suffix_map = { + '27e': { + '1111111111111111111111111111127e': [ + ts1.internal + '#2.data', + ts1.internal + '#3.data', + ts1.internal + '.durable', + ], + '2222222222222222222222222222227e': [ + ts1.internal + '#2.data', + ts1.internal + '.durable', + ts2.internal + '#2.data', + ts2.internal + '.durable', + ], + }, + 'd41': { + 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaad41': [ + ts1.internal + '#3.data', + ts1.internal + '.durable', + ], + }, + '00b': { + '3333333333333333333333333333300b': [ + ts1.internal + '#2.data', + ts2.internal + '#2.data', + ts3.internal + '#2.data', + ts3.internal + '.durable', + ], + }, + } + expected = { + '1111111111111111111111111111127e': ts1, + '2222222222222222222222222222227e': ts2, + '3333333333333333333333333333300b': ts3, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + def test_get_diskfile_from_hash_frag_index_filter(self): + df = self._get_diskfile(POLICIES.default) + hash_ = os.path.basename(df._datadir) + self.assertRaises(DiskFileNotExist, + self.df_mgr.get_diskfile_from_hash, + self.existing_device1, '0', hash_, + POLICIES.default) # sanity + frag_index = 7 + timestamp = Timestamp(time()) + for frag_index in (4, 7): + with df.create() as writer: + data = 'test_data' + writer.write(data) + metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': timestamp.internal, + 'Content-Length': len(data), + 'X-Object-Sysmeta-Ec-Frag-Index': str(frag_index), + } + writer.put(metadata) + writer.commit(timestamp) + + df4 = self.df_mgr.get_diskfile_from_hash( + self.existing_device1, '0', hash_, POLICIES.default, frag_index=4) + self.assertEqual(df4._frag_index, 4) + self.assertEqual( + df4.read_metadata()['X-Object-Sysmeta-Ec-Frag-Index'], '4') + df7 = self.df_mgr.get_diskfile_from_hash( + self.existing_device1, '0', hash_, POLICIES.default, frag_index=7) + self.assertEqual(df7._frag_index, 7) + self.assertEqual( + df7.read_metadata()['X-Object-Sysmeta-Ec-Frag-Index'], '7') + + +class DiskFileMixin(BaseDiskFileTestMixin): + + # set mgr_cls on subclasses + mgr_cls = None def setUp(self): """Set up for testing swift.obj.diskfile""" @@ -979,12 +1766,22 @@ class TestDiskFile(unittest.TestCase): self.existing_device = 'sda1' for policy in POLICIES: mkdirs(os.path.join(self.testdir, self.existing_device, - get_tmp_dir(policy.idx))) + diskfile.get_tmp_dir(policy))) self._orig_tpool_exc = tpool.execute tpool.execute = lambda f, *args, **kwargs: f(*args, **kwargs) self.conf = dict(devices=self.testdir, mount_check='false', keep_cache_size=2 * 1024, mb_per_sync=1) - self.df_mgr = diskfile.DiskFileManager(self.conf, FakeLogger()) + self.logger = debug_logger('test-' + self.__class__.__name__) + self.df_mgr = self.mgr_cls(self.conf, self.logger) + self.df_router = diskfile.DiskFileRouter(self.conf, self.logger) + self._ts_iter = (Timestamp(t) for t in + itertools.count(int(time()))) + + def ts(self): + """ + Timestamps - forever. + """ + return next(self._ts_iter) def tearDown(self): """Tear down for testing swift.obj.diskfile""" @@ -996,11 +1793,11 @@ class TestDiskFile(unittest.TestCase): mkdirs(df._datadir) if timestamp is None: timestamp = time() - timestamp = Timestamp(timestamp).internal + timestamp = Timestamp(timestamp) if not metadata: metadata = {} if 'X-Timestamp' not in metadata: - metadata['X-Timestamp'] = Timestamp(timestamp).internal + metadata['X-Timestamp'] = timestamp.internal if 'ETag' not in metadata: etag = md5() etag.update(data) @@ -1009,17 +1806,24 @@ class TestDiskFile(unittest.TestCase): metadata['name'] = '/a/c/o' if 'Content-Length' not in metadata: metadata['Content-Length'] = str(len(data)) - data_file = os.path.join(df._datadir, timestamp + ext) + filename = timestamp.internal + ext + if ext == '.data' and df.policy.policy_type == EC_POLICY: + filename = '%s#%s.data' % (timestamp.internal, df._frag_index) + data_file = os.path.join(df._datadir, filename) with open(data_file, 'wb') as f: f.write(data) xattr.setxattr(f.fileno(), diskfile.METADATA_KEY, pickle.dumps(metadata, diskfile.PICKLE_PROTOCOL)) def _simple_get_diskfile(self, partition='0', account='a', container='c', - obj='o', policy_idx=0): - return self.df_mgr.get_diskfile(self.existing_device, - partition, account, container, obj, - policy_idx) + obj='o', policy=None, frag_index=None): + policy = policy or POLICIES.default + df_mgr = self.df_router[policy] + if policy.policy_type == EC_POLICY and frag_index is None: + frag_index = 2 + return df_mgr.get_diskfile(self.existing_device, partition, + account, container, obj, + policy=policy, frag_index=frag_index) def _create_test_file(self, data, timestamp=None, metadata=None, account='a', container='c', obj='o'): @@ -1028,12 +1832,62 @@ class TestDiskFile(unittest.TestCase): metadata.setdefault('name', '/%s/%s/%s' % (account, container, obj)) df = self._simple_get_diskfile(account=account, container=container, obj=obj) - self._create_ondisk_file(df, data, timestamp, metadata) - df = self._simple_get_diskfile(account=account, container=container, - obj=obj) + if timestamp is None: + timestamp = time() + timestamp = Timestamp(timestamp) + with df.create() as writer: + new_metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': timestamp.internal, + 'Content-Length': len(data), + } + new_metadata.update(metadata) + writer.write(data) + writer.put(new_metadata) + writer.commit(timestamp) df.open() return df + def test_get_dev_path(self): + self.df_mgr.devices = '/srv' + device = 'sda1' + dev_path = os.path.join(self.df_mgr.devices, device) + + mount_check = None + self.df_mgr.mount_check = True + with mock.patch('swift.obj.diskfile.check_mount', + mock.MagicMock(return_value=False)): + self.assertEqual(self.df_mgr.get_dev_path(device, mount_check), + None) + with mock.patch('swift.obj.diskfile.check_mount', + mock.MagicMock(return_value=True)): + self.assertEqual(self.df_mgr.get_dev_path(device, mount_check), + dev_path) + + self.df_mgr.mount_check = False + with mock.patch('swift.obj.diskfile.check_dir', + mock.MagicMock(return_value=False)): + self.assertEqual(self.df_mgr.get_dev_path(device, mount_check), + None) + with mock.patch('swift.obj.diskfile.check_dir', + mock.MagicMock(return_value=True)): + self.assertEqual(self.df_mgr.get_dev_path(device, mount_check), + dev_path) + + mount_check = True + with mock.patch('swift.obj.diskfile.check_mount', + mock.MagicMock(return_value=False)): + self.assertEqual(self.df_mgr.get_dev_path(device, mount_check), + None) + with mock.patch('swift.obj.diskfile.check_mount', + mock.MagicMock(return_value=True)): + self.assertEqual(self.df_mgr.get_dev_path(device, mount_check), + dev_path) + + mount_check = False + self.assertEqual(self.df_mgr.get_dev_path(device, mount_check), + dev_path) + def test_open_not_exist(self): df = self._simple_get_diskfile() self.assertRaises(DiskFileNotExist, df.open) @@ -1051,15 +1905,17 @@ class TestDiskFile(unittest.TestCase): self.fail("Unexpected swift exception raised: %r" % err) def test_get_metadata(self): - df = self._create_test_file('1234567890', timestamp=42) + timestamp = self.ts().internal + df = self._create_test_file('1234567890', timestamp=timestamp) md = df.get_metadata() - self.assertEqual(md['X-Timestamp'], Timestamp(42).internal) + self.assertEqual(md['X-Timestamp'], timestamp) def test_read_metadata(self): - self._create_test_file('1234567890', timestamp=42) + timestamp = self.ts().internal + self._create_test_file('1234567890', timestamp=timestamp) df = self._simple_get_diskfile() md = df.read_metadata() - self.assertEqual(md['X-Timestamp'], Timestamp(42).internal) + self.assertEqual(md['X-Timestamp'], timestamp) def test_read_metadata_no_xattr(self): def mock_getxattr(*args, **kargs): @@ -1087,15 +1943,16 @@ class TestDiskFile(unittest.TestCase): self.fail("Expected DiskFileNotOpen exception") def test_disk_file_default_disallowed_metadata(self): - # build an object with some meta (ts 41) + # build an object with some meta (at t0+1s) orig_metadata = {'X-Object-Meta-Key1': 'Value1', 'Content-Type': 'text/garbage'} - df = self._get_open_disk_file(ts=41, extra_metadata=orig_metadata) + df = self._get_open_disk_file(ts=self.ts().internal, + extra_metadata=orig_metadata) with df.open(): self.assertEquals('1024', df._metadata['Content-Length']) - # write some new metadata (fast POST, don't send orig meta, ts 42) + # write some new metadata (fast POST, don't send orig meta, at t0+1) df = self._simple_get_diskfile() - df.write_metadata({'X-Timestamp': Timestamp(42).internal, + df.write_metadata({'X-Timestamp': self.ts().internal, 'X-Object-Meta-Key2': 'Value2'}) df = self._simple_get_diskfile() with df.open(): @@ -1107,15 +1964,16 @@ class TestDiskFile(unittest.TestCase): self.assertEquals('Value2', df._metadata['X-Object-Meta-Key2']) def test_disk_file_preserves_sysmeta(self): - # build an object with some meta (ts 41) + # build an object with some meta (at t0) orig_metadata = {'X-Object-Sysmeta-Key1': 'Value1', 'Content-Type': 'text/garbage'} - df = self._get_open_disk_file(ts=41, extra_metadata=orig_metadata) + df = self._get_open_disk_file(ts=self.ts().internal, + extra_metadata=orig_metadata) with df.open(): self.assertEquals('1024', df._metadata['Content-Length']) - # write some new metadata (fast POST, don't send orig meta, ts 42) + # write some new metadata (fast POST, don't send orig meta, at t0+1s) df = self._simple_get_diskfile() - df.write_metadata({'X-Timestamp': Timestamp(42).internal, + df.write_metadata({'X-Timestamp': self.ts().internal, 'X-Object-Sysmeta-Key1': 'Value2', 'X-Object-Meta-Key3': 'Value3'}) df = self._simple_get_diskfile() @@ -1269,34 +2127,38 @@ class TestDiskFile(unittest.TestCase): def test_disk_file_mkstemp_creates_dir(self): for policy in POLICIES: tmpdir = os.path.join(self.testdir, self.existing_device, - get_tmp_dir(policy.idx)) + diskfile.get_tmp_dir(policy)) os.rmdir(tmpdir) - df = self._simple_get_diskfile(policy_idx=policy.idx) + df = self._simple_get_diskfile(policy=policy) with df.create(): self.assert_(os.path.exists(tmpdir)) def _get_open_disk_file(self, invalid_type=None, obj_name='o', fsize=1024, csize=8, mark_deleted=False, prealloc=False, - ts=None, mount_check=False, extra_metadata=None): + ts=None, mount_check=False, extra_metadata=None, + policy=None, frag_index=None): '''returns a DiskFile''' - df = self._simple_get_diskfile(obj=obj_name) + policy = policy or POLICIES.legacy + df = self._simple_get_diskfile(obj=obj_name, policy=policy, + frag_index=frag_index) data = '0' * fsize etag = md5() if ts: - timestamp = ts + timestamp = Timestamp(ts) else: - timestamp = Timestamp(time()).internal + timestamp = Timestamp(time()) if prealloc: prealloc_size = fsize else: prealloc_size = None + with df.create(size=prealloc_size) as writer: upload_size = writer.write(data) etag.update(data) etag = etag.hexdigest() metadata = { 'ETag': etag, - 'X-Timestamp': timestamp, + 'X-Timestamp': timestamp.internal, 'Content-Length': str(upload_size), } metadata.update(extra_metadata or {}) @@ -1319,6 +2181,7 @@ class TestDiskFile(unittest.TestCase): elif invalid_type == 'Bad-X-Delete-At': metadata['X-Delete-At'] = 'bad integer' diskfile.write_metadata(writer._fd, metadata) + writer.commit(timestamp) if mark_deleted: df.delete(timestamp) @@ -1349,9 +2212,16 @@ class TestDiskFile(unittest.TestCase): self.conf['disk_chunk_size'] = csize self.conf['mount_check'] = mount_check - self.df_mgr = diskfile.DiskFileManager(self.conf, FakeLogger()) - df = self._simple_get_diskfile(obj=obj_name) + self.df_mgr = self.mgr_cls(self.conf, self.logger) + self.df_router = diskfile.DiskFileRouter(self.conf, self.logger) + + # actual on disk frag_index may have been set by metadata + frag_index = metadata.get('X-Object-Sysmeta-Ec-Frag-Index', + frag_index) + df = self._simple_get_diskfile(obj=obj_name, policy=policy, + frag_index=frag_index) df.open() + if invalid_type == 'Zero-Byte': fp = open(df._data_file, 'w') fp.close() @@ -1577,7 +2447,7 @@ class TestDiskFile(unittest.TestCase): pass df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123', - 'xyz') + 'xyz', policy=POLICIES.legacy) self.assertRaises(DiskFileQuarantined, df.open) # make sure the right thing got quarantined; the suffix dir should not @@ -1587,7 +2457,7 @@ class TestDiskFile(unittest.TestCase): def test_create_prealloc(self): df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123', - 'xyz') + 'xyz', policy=POLICIES.legacy) with mock.patch("swift.obj.diskfile.fallocate") as fa: with df.create(size=200) as writer: used_fd = writer._fd @@ -1595,7 +2465,7 @@ class TestDiskFile(unittest.TestCase): def test_create_prealloc_oserror(self): df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123', - 'xyz') + 'xyz', policy=POLICIES.legacy) for e in (errno.ENOSPC, errno.EDQUOT): with mock.patch("swift.obj.diskfile.fallocate", mock.MagicMock(side_effect=OSError( @@ -1622,7 +2492,7 @@ class TestDiskFile(unittest.TestCase): def test_create_mkstemp_no_space(self): df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123', - 'xyz') + 'xyz', policy=POLICIES.legacy) for e in (errno.ENOSPC, errno.EDQUOT): with mock.patch("swift.obj.diskfile.mkstemp", mock.MagicMock(side_effect=OSError( @@ -1649,7 +2519,7 @@ class TestDiskFile(unittest.TestCase): def test_create_close_oserror(self): df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123', - 'xyz') + 'xyz', policy=POLICIES.legacy) with mock.patch("swift.obj.diskfile.os.close", mock.MagicMock(side_effect=OSError( errno.EACCES, os.strerror(errno.EACCES)))): @@ -1663,11 +2533,12 @@ class TestDiskFile(unittest.TestCase): def test_write_metadata(self): df = self._create_test_file('1234567890') + file_count = len(os.listdir(df._datadir)) timestamp = Timestamp(time()).internal metadata = {'X-Timestamp': timestamp, 'X-Object-Meta-test': 'data'} df.write_metadata(metadata) dl = os.listdir(df._datadir) - self.assertEquals(len(dl), 2) + self.assertEquals(len(dl), file_count + 1) exp_name = '%s.meta' % timestamp self.assertTrue(exp_name in set(dl)) @@ -1705,14 +2576,135 @@ class TestDiskFile(unittest.TestCase): DiskFileNoSpace, diskfile.write_metadata, 'n/a', metadata) + def _create_diskfile_dir(self, timestamp, policy): + timestamp = Timestamp(timestamp) + df = self._simple_get_diskfile(account='a', container='c', + obj='o_%s' % policy, + policy=policy) + + with df.create() as writer: + metadata = { + 'ETag': 'bogus_etag', + 'X-Timestamp': timestamp.internal, + 'Content-Length': '0', + } + if policy.policy_type == EC_POLICY: + metadata['X-Object-Sysmeta-Ec-Frag-Index'] = \ + df._frag_index or 7 + writer.put(metadata) + writer.commit(timestamp) + return writer._datadir + + def test_commit(self): + for policy in POLICIES: + # create first fileset as starting state + timestamp = Timestamp(time()).internal + datadir = self._create_diskfile_dir(timestamp, policy) + dl = os.listdir(datadir) + expected = ['%s.data' % timestamp] + if policy.policy_type == EC_POLICY: + expected = ['%s#2.data' % timestamp, + '%s.durable' % timestamp] + self.assertEquals(len(dl), len(expected), + 'Unexpected dir listing %s' % dl) + self.assertEqual(sorted(expected), sorted(dl)) + + def test_write_cleanup(self): + for policy in POLICIES: + # create first fileset as starting state + timestamp_1 = Timestamp(time()).internal + datadir_1 = self._create_diskfile_dir(timestamp_1, policy) + # second write should clean up first fileset + timestamp_2 = Timestamp(time() + 1).internal + datadir_2 = self._create_diskfile_dir(timestamp_2, policy) + # sanity check + self.assertEqual(datadir_1, datadir_2) + dl = os.listdir(datadir_2) + expected = ['%s.data' % timestamp_2] + if policy.policy_type == EC_POLICY: + expected = ['%s#2.data' % timestamp_2, + '%s.durable' % timestamp_2] + self.assertEquals(len(dl), len(expected), + 'Unexpected dir listing %s' % dl) + self.assertEqual(sorted(expected), sorted(dl)) + + def test_commit_fsync(self): + for policy in POLICIES: + mock_fsync = mock.MagicMock() + df = self._simple_get_diskfile(account='a', container='c', + obj='o', policy=policy) + + timestamp = Timestamp(time()) + with df.create() as writer: + metadata = { + 'ETag': 'bogus_etag', + 'X-Timestamp': timestamp.internal, + 'Content-Length': '0', + } + writer.put(metadata) + with mock.patch('swift.obj.diskfile.fsync', mock_fsync): + writer.commit(timestamp) + expected = { + EC_POLICY: 1, + REPL_POLICY: 0, + }[policy.policy_type] + self.assertEqual(expected, mock_fsync.call_count) + if policy.policy_type == EC_POLICY: + durable_file = '%s.durable' % timestamp.internal + self.assertTrue(durable_file in str(mock_fsync.call_args[0])) + + def test_commit_ignores_hash_cleanup_listdir_error(self): + for policy in POLICIES: + # Check OSError from hash_cleanup_listdir is caught and ignored + mock_hcl = mock.MagicMock(side_effect=OSError) + df = self._simple_get_diskfile(account='a', container='c', + obj='o_hcl_error', policy=policy) + + timestamp = Timestamp(time()) + with df.create() as writer: + metadata = { + 'ETag': 'bogus_etag', + 'X-Timestamp': timestamp.internal, + 'Content-Length': '0', + } + writer.put(metadata) + with mock.patch(self._manager_mock( + 'hash_cleanup_listdir', df), mock_hcl): + writer.commit(timestamp) + expected = { + EC_POLICY: 1, + REPL_POLICY: 0, + }[policy.policy_type] + self.assertEqual(expected, mock_hcl.call_count) + expected = ['%s.data' % timestamp.internal] + if policy.policy_type == EC_POLICY: + expected = ['%s#2.data' % timestamp.internal, + '%s.durable' % timestamp.internal] + dl = os.listdir(df._datadir) + self.assertEquals(len(dl), len(expected), + 'Unexpected dir listing %s' % dl) + self.assertEqual(sorted(expected), sorted(dl)) + def test_delete(self): - df = self._get_open_disk_file() - ts = time() - df.delete(ts) - exp_name = '%s.ts' % Timestamp(ts).internal - dl = os.listdir(df._datadir) - self.assertEquals(len(dl), 1) - self.assertTrue(exp_name in set(dl)) + for policy in POLICIES: + if policy.policy_type == EC_POLICY: + metadata = {'X-Object-Sysmeta-Ec-Frag-Index': '1'} + fi = 1 + else: + metadata = {} + fi = None + df = self._get_open_disk_file(policy=policy, frag_index=fi, + extra_metadata=metadata) + + ts = Timestamp(time()) + df.delete(ts) + exp_name = '%s.ts' % ts.internal + dl = os.listdir(df._datadir) + self.assertEquals(len(dl), 1) + self.assertTrue(exp_name in set(dl), + 'Expected file %s missing in %s' % (exp_name, dl)) + # cleanup before next policy + os.unlink(os.path.join(df._datadir, exp_name)) def test_open_deleted(self): df = self._get_open_disk_file() @@ -1749,7 +2741,8 @@ class TestDiskFile(unittest.TestCase): 'blah blah', account='three', container='blind', obj='mice')._datadir df = self.df_mgr.get_diskfile_from_audit_location( - diskfile.AuditLocation(hashdir, self.existing_device, '0')) + diskfile.AuditLocation(hashdir, self.existing_device, '0', + policy=POLICIES.default)) df.open() self.assertEqual(df._name, '/three/blind/mice') @@ -1757,14 +2750,16 @@ class TestDiskFile(unittest.TestCase): hashdir = self._create_test_file( 'blah blah', account='this', container='is', obj='right')._datadir - - datafile = os.path.join(hashdir, os.listdir(hashdir)[0]) + datafilename = [f for f in os.listdir(hashdir) + if f.endswith('.data')][0] + datafile = os.path.join(hashdir, datafilename) meta = diskfile.read_metadata(datafile) meta['name'] = '/this/is/wrong' diskfile.write_metadata(datafile, meta) df = self.df_mgr.get_diskfile_from_audit_location( - diskfile.AuditLocation(hashdir, self.existing_device, '0')) + diskfile.AuditLocation(hashdir, self.existing_device, '0', + policy=POLICIES.default)) self.assertRaises(DiskFileQuarantined, df.open) def test_close_error(self): @@ -1779,7 +2774,10 @@ class TestDiskFile(unittest.TestCase): pass # close is called at the end of the iterator self.assertEquals(reader._fp, None) - self.assertEquals(len(df._logger.log_dict['error']), 1) + error_lines = df._logger.get_lines_for_level('error') + self.assertEqual(len(error_lines), 1) + self.assertTrue('close failure' in error_lines[0]) + self.assertTrue('Bad' in error_lines[0]) def test_mount_checking(self): @@ -1830,6 +2828,9 @@ class TestDiskFile(unittest.TestCase): self._create_ondisk_file(df, '', ext='.meta', timestamp=9) self._create_ondisk_file(df, 'B', ext='.data', timestamp=8) self._create_ondisk_file(df, 'A', ext='.data', timestamp=7) + if df.policy.policy_type == EC_POLICY: + self._create_ondisk_file(df, '', ext='.durable', timestamp=8) + self._create_ondisk_file(df, '', ext='.durable', timestamp=7) self._create_ondisk_file(df, '', ext='.ts', timestamp=6) self._create_ondisk_file(df, '', ext='.ts', timestamp=5) df = self._simple_get_diskfile() @@ -1843,6 +2844,9 @@ class TestDiskFile(unittest.TestCase): df = self._simple_get_diskfile() self._create_ondisk_file(df, 'B', ext='.data', timestamp=10) self._create_ondisk_file(df, 'A', ext='.data', timestamp=9) + if df.policy.policy_type == EC_POLICY: + self._create_ondisk_file(df, '', ext='.durable', timestamp=10) + self._create_ondisk_file(df, '', ext='.durable', timestamp=9) self._create_ondisk_file(df, '', ext='.ts', timestamp=8) self._create_ondisk_file(df, '', ext='.ts', timestamp=7) self._create_ondisk_file(df, '', ext='.meta', timestamp=6) @@ -1859,6 +2863,9 @@ class TestDiskFile(unittest.TestCase): self._create_ondisk_file(df, 'X', ext='.bar', timestamp=11) self._create_ondisk_file(df, 'B', ext='.data', timestamp=10) self._create_ondisk_file(df, 'A', ext='.data', timestamp=9) + if df.policy.policy_type == EC_POLICY: + self._create_ondisk_file(df, '', ext='.durable', timestamp=10) + self._create_ondisk_file(df, '', ext='.durable', timestamp=9) self._create_ondisk_file(df, '', ext='.ts', timestamp=8) self._create_ondisk_file(df, '', ext='.ts', timestamp=7) self._create_ondisk_file(df, '', ext='.meta', timestamp=6) @@ -1880,6 +2887,9 @@ class TestDiskFile(unittest.TestCase): self._create_ondisk_file(df, 'X', ext='.bar', timestamp=11) self._create_ondisk_file(df, 'B', ext='.data', timestamp=10) self._create_ondisk_file(df, 'A', ext='.data', timestamp=9) + if df.policy.policy_type == EC_POLICY: + self._create_ondisk_file(df, '', ext='.durable', timestamp=10) + self._create_ondisk_file(df, '', ext='.durable', timestamp=9) self._create_ondisk_file(df, '', ext='.ts', timestamp=8) self._create_ondisk_file(df, '', ext='.ts', timestamp=7) self._create_ondisk_file(df, '', ext='.meta', timestamp=6) @@ -1901,300 +2911,6 @@ class TestDiskFile(unittest.TestCase): log_lines = df._logger.get_lines_for_level('error') self.assert_('a very special error' in log_lines[-1]) - def test_get_diskfile_from_hash_dev_path_fail(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value=None) - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata')) as \ - (dfclass, hclistdir, readmeta): - hclistdir.return_value = ['1381679759.90941.data'] - readmeta.return_value = {'name': '/a/c/o'} - self.assertRaises( - DiskFileDeviceUnavailable, - self.df_mgr.get_diskfile_from_hash, - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - - def test_get_diskfile_from_hash_not_dir(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata'), - mock.patch('swift.obj.diskfile.quarantine_renamer')) as \ - (dfclass, hclistdir, readmeta, quarantine_renamer): - osexc = OSError() - osexc.errno = errno.ENOTDIR - hclistdir.side_effect = osexc - readmeta.return_value = {'name': '/a/c/o'} - self.assertRaises( - DiskFileNotExist, - self.df_mgr.get_diskfile_from_hash, - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - quarantine_renamer.assert_called_once_with( - '/srv/dev/', - '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900') - - def test_get_diskfile_from_hash_no_dir(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata')) as \ - (dfclass, hclistdir, readmeta): - osexc = OSError() - osexc.errno = errno.ENOENT - hclistdir.side_effect = osexc - readmeta.return_value = {'name': '/a/c/o'} - self.assertRaises( - DiskFileNotExist, - self.df_mgr.get_diskfile_from_hash, - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - - def test_get_diskfile_from_hash_other_oserror(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata')) as \ - (dfclass, hclistdir, readmeta): - osexc = OSError() - hclistdir.side_effect = osexc - readmeta.return_value = {'name': '/a/c/o'} - self.assertRaises( - OSError, - self.df_mgr.get_diskfile_from_hash, - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - - def test_get_diskfile_from_hash_no_actual_files(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata')) as \ - (dfclass, hclistdir, readmeta): - hclistdir.return_value = [] - readmeta.return_value = {'name': '/a/c/o'} - self.assertRaises( - DiskFileNotExist, - self.df_mgr.get_diskfile_from_hash, - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - - def test_get_diskfile_from_hash_read_metadata_problem(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata')) as \ - (dfclass, hclistdir, readmeta): - hclistdir.return_value = ['1381679759.90941.data'] - readmeta.side_effect = EOFError() - self.assertRaises( - DiskFileNotExist, - self.df_mgr.get_diskfile_from_hash, - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - - def test_get_diskfile_from_hash_no_meta_name(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata')) as \ - (dfclass, hclistdir, readmeta): - hclistdir.return_value = ['1381679759.90941.data'] - readmeta.return_value = {} - try: - self.df_mgr.get_diskfile_from_hash( - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - except DiskFileNotExist as err: - exc = err - self.assertEqual(str(exc), '') - - def test_get_diskfile_from_hash_bad_meta_name(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata')) as \ - (dfclass, hclistdir, readmeta): - hclistdir.return_value = ['1381679759.90941.data'] - readmeta.return_value = {'name': 'bad'} - try: - self.df_mgr.get_diskfile_from_hash( - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - except DiskFileNotExist as err: - exc = err - self.assertEqual(str(exc), '') - - def test_get_diskfile_from_hash(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/') - with nested( - mock.patch('swift.obj.diskfile.DiskFile'), - mock.patch('swift.obj.diskfile.hash_cleanup_listdir'), - mock.patch('swift.obj.diskfile.read_metadata')) as \ - (dfclass, hclistdir, readmeta): - hclistdir.return_value = ['1381679759.90941.data'] - readmeta.return_value = {'name': '/a/c/o'} - self.df_mgr.get_diskfile_from_hash( - 'dev', '9', '9a7175077c01a23ade5956b8a2bba900', 0) - dfclass.assert_called_once_with( - self.df_mgr, '/srv/dev/', self.df_mgr.threadpools['dev'], '9', - 'a', 'c', 'o', policy_idx=0) - hclistdir.assert_called_once_with( - '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900', - 604800) - readmeta.assert_called_once_with( - '/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900/' - '1381679759.90941.data') - - def test_listdir_enoent(self): - oserror = OSError() - oserror.errno = errno.ENOENT - self.df_mgr.logger.error = mock.MagicMock() - with mock.patch('os.listdir', side_effect=oserror): - self.assertEqual(self.df_mgr._listdir('path'), []) - self.assertEqual(self.df_mgr.logger.error.mock_calls, []) - - def test_listdir_other_oserror(self): - oserror = OSError() - self.df_mgr.logger.error = mock.MagicMock() - with mock.patch('os.listdir', side_effect=oserror): - self.assertEqual(self.df_mgr._listdir('path'), []) - self.df_mgr.logger.error.assert_called_once_with( - 'ERROR: Skipping %r due to error with listdir attempt: %s', - 'path', oserror) - - def test_listdir(self): - self.df_mgr.logger.error = mock.MagicMock() - with mock.patch('os.listdir', return_value=['abc', 'def']): - self.assertEqual(self.df_mgr._listdir('path'), ['abc', 'def']) - self.assertEqual(self.df_mgr.logger.error.mock_calls, []) - - def test_yield_suffixes_dev_path_fail(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value=None) - exc = None - try: - list(self.df_mgr.yield_suffixes('dev', '9', 0)) - except DiskFileDeviceUnavailable as err: - exc = err - self.assertEqual(str(exc), '') - - def test_yield_suffixes(self): - self.df_mgr._listdir = mock.MagicMock(return_value=[ - 'abc', 'def', 'ghi', 'abcd', '012']) - self.assertEqual( - list(self.df_mgr.yield_suffixes('dev', '9', 0)), - [(self.testdir + '/dev/objects/9/abc', 'abc'), - (self.testdir + '/dev/objects/9/def', 'def'), - (self.testdir + '/dev/objects/9/012', '012')]) - - def test_yield_hashes_dev_path_fail(self): - self.df_mgr.get_dev_path = mock.MagicMock(return_value=None) - exc = None - try: - list(self.df_mgr.yield_hashes('dev', '9', 0)) - except DiskFileDeviceUnavailable as err: - exc = err - self.assertEqual(str(exc), '') - - def test_yield_hashes_empty(self): - def _listdir(path): - return [] - - with mock.patch('os.listdir', _listdir): - self.assertEqual(list(self.df_mgr.yield_hashes('dev', '9', 0)), []) - - def test_yield_hashes_empty_suffixes(self): - def _listdir(path): - return [] - - with mock.patch('os.listdir', _listdir): - self.assertEqual( - list(self.df_mgr.yield_hashes('dev', '9', 0, - suffixes=['456'])), []) - - def test_yield_hashes(self): - fresh_ts = Timestamp(time() - 10).internal - fresher_ts = Timestamp(time() - 1).internal - - def _listdir(path): - if path.endswith('/dev/objects/9'): - return ['abc', '456', 'def'] - elif path.endswith('/dev/objects/9/abc'): - return ['9373a92d072897b136b3fc06595b4abc'] - elif path.endswith( - '/dev/objects/9/abc/9373a92d072897b136b3fc06595b4abc'): - return [fresh_ts + '.ts'] - elif path.endswith('/dev/objects/9/456'): - return ['9373a92d072897b136b3fc06595b0456', - '9373a92d072897b136b3fc06595b7456'] - elif path.endswith( - '/dev/objects/9/456/9373a92d072897b136b3fc06595b0456'): - return ['1383180000.12345.data'] - elif path.endswith( - '/dev/objects/9/456/9373a92d072897b136b3fc06595b7456'): - return [fresh_ts + '.ts', - fresher_ts + '.data'] - elif path.endswith('/dev/objects/9/def'): - return [] - else: - raise Exception('Unexpected listdir of %r' % path) - - with nested( - mock.patch('os.listdir', _listdir), - mock.patch('os.unlink')): - self.assertEqual( - list(self.df_mgr.yield_hashes('dev', '9', 0)), - [(self.testdir + - '/dev/objects/9/abc/9373a92d072897b136b3fc06595b4abc', - '9373a92d072897b136b3fc06595b4abc', fresh_ts), - (self.testdir + - '/dev/objects/9/456/9373a92d072897b136b3fc06595b0456', - '9373a92d072897b136b3fc06595b0456', '1383180000.12345'), - (self.testdir + - '/dev/objects/9/456/9373a92d072897b136b3fc06595b7456', - '9373a92d072897b136b3fc06595b7456', fresher_ts)]) - - def test_yield_hashes_suffixes(self): - fresh_ts = Timestamp(time() - 10).internal - fresher_ts = Timestamp(time() - 1).internal - - def _listdir(path): - if path.endswith('/dev/objects/9'): - return ['abc', '456', 'def'] - elif path.endswith('/dev/objects/9/abc'): - return ['9373a92d072897b136b3fc06595b4abc'] - elif path.endswith( - '/dev/objects/9/abc/9373a92d072897b136b3fc06595b4abc'): - return [fresh_ts + '.ts'] - elif path.endswith('/dev/objects/9/456'): - return ['9373a92d072897b136b3fc06595b0456', - '9373a92d072897b136b3fc06595b7456'] - elif path.endswith( - '/dev/objects/9/456/9373a92d072897b136b3fc06595b0456'): - return ['1383180000.12345.data'] - elif path.endswith( - '/dev/objects/9/456/9373a92d072897b136b3fc06595b7456'): - return [fresh_ts + '.ts', - fresher_ts + '.data'] - elif path.endswith('/dev/objects/9/def'): - return [] - else: - raise Exception('Unexpected listdir of %r' % path) - - with nested( - mock.patch('os.listdir', _listdir), - mock.patch('os.unlink')): - self.assertEqual( - list(self.df_mgr.yield_hashes( - 'dev', '9', 0, suffixes=['456'])), - [(self.testdir + - '/dev/objects/9/456/9373a92d072897b136b3fc06595b0456', - '9373a92d072897b136b3fc06595b0456', '1383180000.12345'), - (self.testdir + - '/dev/objects/9/456/9373a92d072897b136b3fc06595b7456', - '9373a92d072897b136b3fc06595b7456', fresher_ts)]) - def test_diskfile_names(self): df = self._simple_get_diskfile() self.assertEqual(df.account, 'a') @@ -2260,10 +2976,11 @@ class TestDiskFile(unittest.TestCase): self.assertEqual(str(exc), '') def test_diskfile_timestamp(self): - self._get_open_disk_file(ts='1383181759.12345') + ts = Timestamp(time()) + self._get_open_disk_file(ts=ts.internal) df = self._simple_get_diskfile() with df.open(): - self.assertEqual(df.timestamp, '1383181759.12345') + self.assertEqual(df.timestamp, ts.internal) def test_error_in_hash_cleanup_listdir(self): @@ -2271,16 +2988,16 @@ class TestDiskFile(unittest.TestCase): raise OSError() df = self._get_open_disk_file() + file_count = len(os.listdir(df._datadir)) ts = time() - with mock.patch("swift.obj.diskfile.hash_cleanup_listdir", - mock_hcl): + with mock.patch(self._manager_mock('hash_cleanup_listdir'), mock_hcl): try: df.delete(ts) except OSError: self.fail("OSError raised when it should have been swallowed") exp_name = '%s.ts' % str(Timestamp(ts).internal) dl = os.listdir(df._datadir) - self.assertEquals(len(dl), 2) + self.assertEquals(len(dl), file_count + 1) self.assertTrue(exp_name in set(dl)) def _system_can_zero_copy(self): @@ -2301,7 +3018,6 @@ class TestDiskFile(unittest.TestCase): self.conf['splice'] = 'on' self.conf['keep_cache_size'] = 16384 self.conf['disk_chunk_size'] = 4096 - self.df_mgr = diskfile.DiskFileManager(self.conf, FakeLogger()) df = self._get_open_disk_file(fsize=16385) reader = df.reader() @@ -2315,7 +3031,7 @@ class TestDiskFile(unittest.TestCase): def test_zero_copy_turns_off_when_md5_sockets_not_supported(self): if not self._system_can_zero_copy(): raise SkipTest("zero-copy support is missing") - + df_mgr = self.df_router[POLICIES.default] self.conf['splice'] = 'on' with mock.patch('swift.obj.diskfile.get_md5_socket') as mock_md5sock: mock_md5sock.side_effect = IOError( @@ -2324,7 +3040,7 @@ class TestDiskFile(unittest.TestCase): reader = df.reader() self.assertFalse(reader.can_zero_copy_send()) - log_lines = self.df_mgr.logger.get_lines_for_level('warning') + log_lines = df_mgr.logger.get_lines_for_level('warning') self.assert_('MD5 sockets' in log_lines[-1]) def test_tee_to_md5_pipe_length_mismatch(self): @@ -2421,7 +3137,7 @@ class TestDiskFile(unittest.TestCase): def test_create_unlink_cleanup_DiskFileNoSpace(self): # Test cleanup when DiskFileNoSpace() is raised. df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123', - 'xyz') + 'xyz', policy=POLICIES.legacy) _m_fallocate = mock.MagicMock(side_effect=OSError(errno.ENOSPC, os.strerror(errno.ENOSPC))) _m_unlink = mock.Mock() @@ -2436,7 +3152,7 @@ class TestDiskFile(unittest.TestCase): self.fail("Expected exception DiskFileNoSpace") self.assertTrue(_m_fallocate.called) self.assertTrue(_m_unlink.called) - self.assert_(len(self.df_mgr.logger.log_dict['exception']) == 0) + self.assertTrue('error' not in self.logger.all_log_lines()) def test_create_unlink_cleanup_renamer_fails(self): # Test cleanup when renamer fails @@ -2463,12 +3179,12 @@ class TestDiskFile(unittest.TestCase): self.assertFalse(writer.put_succeeded) self.assertTrue(_m_renamer.called) self.assertTrue(_m_unlink.called) - self.assert_(len(self.df_mgr.logger.log_dict['exception']) == 0) + self.assertTrue('error' not in self.logger.all_log_lines()) def test_create_unlink_cleanup_logging(self): # Test logging of os.unlink() failures. df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123', - 'xyz') + 'xyz', policy=POLICIES.legacy) _m_fallocate = mock.MagicMock(side_effect=OSError(errno.ENOSPC, os.strerror(errno.ENOSPC))) _m_unlink = mock.MagicMock(side_effect=OSError(errno.ENOENT, @@ -2484,9 +3200,1633 @@ class TestDiskFile(unittest.TestCase): self.fail("Expected exception DiskFileNoSpace") self.assertTrue(_m_fallocate.called) self.assertTrue(_m_unlink.called) - error_lines = self.df_mgr.logger.get_lines_for_level('error') + error_lines = self.logger.get_lines_for_level('error') for line in error_lines: self.assertTrue(line.startswith("Error removing tempfile:")) + +@patch_policies(test_policies) +class TestDiskFile(DiskFileMixin, unittest.TestCase): + + mgr_cls = diskfile.DiskFileManager + + +@patch_policies(with_ec_default=True) +class TestECDiskFile(DiskFileMixin, unittest.TestCase): + + mgr_cls = diskfile.ECDiskFileManager + + def test_commit_raises_DiskFileErrors(self): + scenarios = ((errno.ENOSPC, DiskFileNoSpace), + (errno.EDQUOT, DiskFileNoSpace), + (errno.ENOTDIR, DiskFileError), + (errno.EPERM, DiskFileError)) + + # Check IOErrors from open() is handled + for err_number, expected_exception in scenarios: + io_error = IOError() + io_error.errno = err_number + mock_open = mock.MagicMock(side_effect=io_error) + df = self._simple_get_diskfile(account='a', container='c', + obj='o_%s' % err_number, + policy=POLICIES.default) + timestamp = Timestamp(time()) + with df.create() as writer: + metadata = { + 'ETag': 'bogus_etag', + 'X-Timestamp': timestamp.internal, + 'Content-Length': '0', + } + writer.put(metadata) + with mock.patch('__builtin__.open', mock_open): + self.assertRaises(expected_exception, + writer.commit, + timestamp) + dl = os.listdir(df._datadir) + self.assertEqual(1, len(dl), dl) + rmtree(df._datadir) + + # Check OSError from fsync() is handled + mock_fsync = mock.MagicMock(side_effect=OSError) + df = self._simple_get_diskfile(account='a', container='c', + obj='o_fsync_error') + + timestamp = Timestamp(time()) + with df.create() as writer: + metadata = { + 'ETag': 'bogus_etag', + 'X-Timestamp': timestamp.internal, + 'Content-Length': '0', + } + writer.put(metadata) + with mock.patch('swift.obj.diskfile.fsync', mock_fsync): + self.assertRaises(DiskFileError, + writer.commit, timestamp) + + def test_data_file_has_frag_index(self): + policy = POLICIES.default + for good_value in (0, '0', 2, '2', 14, '14'): + # frag_index set by constructor arg + ts = self.ts().internal + expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts] + df = self._get_open_disk_file(ts=ts, policy=policy, + frag_index=good_value) + self.assertEqual(expected, sorted(os.listdir(df._datadir))) + # frag index should be added to object sysmeta + actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index') + self.assertEqual(int(good_value), int(actual)) + + # metadata value overrides the constructor arg + ts = self.ts().internal + expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts] + meta = {'X-Object-Sysmeta-Ec-Frag-Index': good_value} + df = self._get_open_disk_file(ts=ts, policy=policy, + frag_index='99', + extra_metadata=meta) + self.assertEqual(expected, sorted(os.listdir(df._datadir))) + actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index') + self.assertEqual(int(good_value), int(actual)) + + # metadata value alone is sufficient + ts = self.ts().internal + expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts] + meta = {'X-Object-Sysmeta-Ec-Frag-Index': good_value} + df = self._get_open_disk_file(ts=ts, policy=policy, + frag_index=None, + extra_metadata=meta) + self.assertEqual(expected, sorted(os.listdir(df._datadir))) + actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index') + self.assertEqual(int(good_value), int(actual)) + + def test_sysmeta_frag_index_is_immutable(self): + # the X-Object-Sysmeta-Ec-Frag-Index should *only* be set when + # the .data file is written. + policy = POLICIES.default + orig_frag_index = 14 + # frag_index set by constructor arg + ts = self.ts().internal + expected = ['%s#%s.data' % (ts, orig_frag_index), '%s.durable' % ts] + df = self._get_open_disk_file(ts=ts, policy=policy, obj_name='my_obj', + frag_index=orig_frag_index) + self.assertEqual(expected, sorted(os.listdir(df._datadir))) + # frag index should be added to object sysmeta + actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index') + self.assertEqual(int(orig_frag_index), int(actual)) + + # open the same diskfile with no frag_index passed to constructor + df = self.df_router[policy].get_diskfile( + self.existing_device, 0, 'a', 'c', 'my_obj', policy=policy, + frag_index=None) + df.open() + actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index') + self.assertEqual(int(orig_frag_index), int(actual)) + + # write metadata to a meta file + ts = self.ts().internal + metadata = {'X-Timestamp': ts, + 'X-Object-Meta-Fruit': 'kiwi'} + df.write_metadata(metadata) + # sanity check we did write a meta file + expected.append('%s.meta' % ts) + actual_files = sorted(os.listdir(df._datadir)) + self.assertEqual(expected, actual_files) + + # open the same diskfile, check frag index is unchanged + df = self.df_router[policy].get_diskfile( + self.existing_device, 0, 'a', 'c', 'my_obj', policy=policy, + frag_index=None) + df.open() + # sanity check we have read the meta file + self.assertEqual(ts, df.get_metadata().get('X-Timestamp')) + self.assertEqual('kiwi', df.get_metadata().get('X-Object-Meta-Fruit')) + # check frag index sysmeta is unchanged + actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index') + self.assertEqual(int(orig_frag_index), int(actual)) + + # attempt to overwrite frag index sysmeta + ts = self.ts().internal + metadata = {'X-Timestamp': ts, + 'X-Object-Sysmeta-Ec-Frag-Index': 99, + 'X-Object-Meta-Fruit': 'apple'} + df.write_metadata(metadata) + + # open the same diskfile, check frag index is unchanged + df = self.df_router[policy].get_diskfile( + self.existing_device, 0, 'a', 'c', 'my_obj', policy=policy, + frag_index=None) + df.open() + # sanity check we have read the meta file + self.assertEqual(ts, df.get_metadata().get('X-Timestamp')) + self.assertEqual('apple', df.get_metadata().get('X-Object-Meta-Fruit')) + actual = df.get_metadata().get('X-Object-Sysmeta-Ec-Frag-Index') + self.assertEqual(int(orig_frag_index), int(actual)) + + def test_data_file_errors_bad_frag_index(self): + policy = POLICIES.default + df_mgr = self.df_router[policy] + for bad_value in ('foo', '-2', -2, '3.14', 3.14): + # check that bad frag_index set by constructor arg raises error + # as soon as diskfile is constructed, before data is written + self.assertRaises(DiskFileError, self._simple_get_diskfile, + policy=policy, frag_index=bad_value) + + # bad frag_index set by metadata value + # (drive-by check that it is ok for constructor arg to be None) + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', + policy=policy, frag_index=None) + ts = self.ts() + meta = {'X-Object-Sysmeta-Ec-Frag-Index': bad_value, + 'X-Timestamp': ts.internal, + 'Content-Length': 0, + 'Etag': EMPTY_ETAG, + 'Content-Type': 'plain/text'} + with df.create() as writer: + try: + writer.put(meta) + self.fail('Expected DiskFileError for frag_index %s' + % bad_value) + except DiskFileError: + pass + + # bad frag_index set by metadata value overrides ok constructor arg + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', + policy=policy, frag_index=2) + ts = self.ts() + meta = {'X-Object-Sysmeta-Ec-Frag-Index': bad_value, + 'X-Timestamp': ts.internal, + 'Content-Length': 0, + 'Etag': EMPTY_ETAG, + 'Content-Type': 'plain/text'} + with df.create() as writer: + try: + writer.put(meta) + self.fail('Expected DiskFileError for frag_index %s' + % bad_value) + except DiskFileError: + pass + + def test_purge_one_fragment_index(self): + ts = self.ts() + for frag_index in (1, 2): + df = self._simple_get_diskfile(frag_index=frag_index) + with df.create() as writer: + data = 'test data' + writer.write(data) + metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': ts.internal, + 'Content-Length': len(data), + } + writer.put(metadata) + writer.commit(ts) + + # sanity + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '#1.data', + ts.internal + '#2.data', + ts.internal + '.durable', + ]) + df.purge(ts, 2) + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '#1.data', + ts.internal + '.durable', + ]) + + def test_purge_last_fragment_index(self): + ts = self.ts() + frag_index = 0 + df = self._simple_get_diskfile(frag_index=frag_index) + with df.create() as writer: + data = 'test data' + writer.write(data) + metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': ts.internal, + 'Content-Length': len(data), + } + writer.put(metadata) + writer.commit(ts) + + # sanity + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '#0.data', + ts.internal + '.durable', + ]) + df.purge(ts, 0) + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '.durable', + ]) + + def test_purge_non_existant_fragment_index(self): + ts = self.ts() + frag_index = 7 + df = self._simple_get_diskfile(frag_index=frag_index) + with df.create() as writer: + data = 'test data' + writer.write(data) + metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': ts.internal, + 'Content-Length': len(data), + } + writer.put(metadata) + writer.commit(ts) + + # sanity + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '#7.data', + ts.internal + '.durable', + ]) + df.purge(ts, 3) + # no effect + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '#7.data', + ts.internal + '.durable', + ]) + + def test_purge_old_timestamp_frag_index(self): + old_ts = self.ts() + ts = self.ts() + frag_index = 1 + df = self._simple_get_diskfile(frag_index=frag_index) + with df.create() as writer: + data = 'test data' + writer.write(data) + metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': ts.internal, + 'Content-Length': len(data), + } + writer.put(metadata) + writer.commit(ts) + + # sanity + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '#1.data', + ts.internal + '.durable', + ]) + df.purge(old_ts, 1) + # no effect + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '#1.data', + ts.internal + '.durable', + ]) + + def test_purge_tombstone(self): + ts = self.ts() + df = self._simple_get_diskfile(frag_index=3) + df.delete(ts) + + # sanity + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '.ts', + ]) + df.purge(ts, 3) + self.assertEqual(sorted(os.listdir(df._datadir)), []) + + def test_purge_old_tombstone(self): + old_ts = self.ts() + ts = self.ts() + df = self._simple_get_diskfile(frag_index=5) + df.delete(ts) + + # sanity + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '.ts', + ]) + df.purge(old_ts, 5) + # no effect + self.assertEqual(sorted(os.listdir(df._datadir)), [ + ts.internal + '.ts', + ]) + + def test_purge_already_removed(self): + df = self._simple_get_diskfile(frag_index=6) + + df.purge(self.ts(), 6) # no errors + + # sanity + os.makedirs(df._datadir) + self.assertEqual(sorted(os.listdir(df._datadir)), []) + df.purge(self.ts(), 6) + # no effect + self.assertEqual(sorted(os.listdir(df._datadir)), []) + + def test_open_most_recent_durable(self): + policy = POLICIES.default + df_mgr = self.df_router[policy] + + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy) + + ts = self.ts() + with df.create() as writer: + data = 'test data' + writer.write(data) + metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': ts.internal, + 'Content-Length': len(data), + 'X-Object-Sysmeta-Ec-Frag-Index': 3, + } + writer.put(metadata) + writer.commit(ts) + + # add some .meta stuff + extra_meta = { + 'X-Object-Meta-Foo': 'Bar', + 'X-Timestamp': self.ts().internal, + } + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy) + df.write_metadata(extra_meta) + + # sanity + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy) + metadata.update(extra_meta) + self.assertEqual(metadata, df.read_metadata()) + + # add a newer datafile + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy) + ts = self.ts() + with df.create() as writer: + data = 'test data' + writer.write(data) + new_metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': ts.internal, + 'Content-Length': len(data), + 'X-Object-Sysmeta-Ec-Frag-Index': 3, + } + writer.put(new_metadata) + # N.B. don't make it durable + + # and we still get the old metadata (same as if no .data!) + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy) + self.assertEqual(metadata, df.read_metadata()) + + def test_open_most_recent_missing_durable(self): + policy = POLICIES.default + df_mgr = self.df_router[policy] + + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy) + + self.assertRaises(DiskFileNotExist, df.read_metadata) + + # now create a datafile missing durable + ts = self.ts() + with df.create() as writer: + data = 'test data' + writer.write(data) + new_metadata = { + 'ETag': md5(data).hexdigest(), + 'X-Timestamp': ts.internal, + 'Content-Length': len(data), + 'X-Object-Sysmeta-Ec-Frag-Index': 3, + } + writer.put(new_metadata) + # N.B. don't make it durable + + # add some .meta stuff + extra_meta = { + 'X-Object-Meta-Foo': 'Bar', + 'X-Timestamp': self.ts().internal, + } + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy) + df.write_metadata(extra_meta) + + # we still get the DiskFileNotExist (same as if no .data!) + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy, + frag_index=3) + self.assertRaises(DiskFileNotExist, df.read_metadata) + + # sanity, withtout the frag_index kwarg + df = df_mgr.get_diskfile(self.existing_device, '0', + 'a', 'c', 'o', policy=policy) + self.assertRaises(DiskFileNotExist, df.read_metadata) + + +@patch_policies(with_ec_default=True) +class TestSuffixHashes(unittest.TestCase): + """ + This tests all things related to hashing suffixes and therefore + there's also few test methods for hash_cleanup_listdir as well + (because it's used by hash_suffix). + + The public interface to suffix hashing is on the Manager:: + + * hash_cleanup_listdir(hsh_path) + * get_hashes(device, partition, suffixes, policy) + * invalidate_hash(suffix_dir) + + The Manager.get_hashes method (used by the REPLICATION verb) + calls Manager._get_hashes (which may be an alias to the module + method get_hashes), which calls hash_suffix, which calls + hash_cleanup_listdir. + + Outside of that, hash_cleanup_listdir and invalidate_hash are + used mostly after writing new files via PUT or DELETE. + + Test methods are organized by:: + + * hash_cleanup_listdir tests - behaviors + * hash_cleanup_listdir tests - error handling + * invalidate_hash tests - behavior + * invalidate_hash tests - error handling + * get_hashes tests - hash_suffix behaviors + * get_hashes tests - hash_suffix error handling + * get_hashes tests - behaviors + * get_hashes tests - error handling + + """ + + def setUp(self): + self.testdir = tempfile.mkdtemp() + self.logger = debug_logger('suffix-hash-test') + self.devices = os.path.join(self.testdir, 'node') + os.mkdir(self.devices) + self.existing_device = 'sda1' + os.mkdir(os.path.join(self.devices, self.existing_device)) + self.conf = { + 'swift_dir': self.testdir, + 'devices': self.devices, + 'mount_check': False, + } + self.df_router = diskfile.DiskFileRouter(self.conf, self.logger) + self._ts_iter = (Timestamp(t) for t in + itertools.count(int(time()))) + self.policy = None + + def ts(self): + """ + Timestamps - forever. + """ + return next(self._ts_iter) + + def fname_to_ts_hash(self, fname): + """ + EC datafiles are only hashed by their timestamp + """ + return md5(fname.split('#', 1)[0]).hexdigest() + + def tearDown(self): + rmtree(self.testdir, ignore_errors=1) + + def iter_policies(self): + for policy in POLICIES: + self.policy = policy + yield policy + + def assertEqual(self, *args): + try: + unittest.TestCase.assertEqual(self, *args) + except AssertionError as err: + if not self.policy: + raise + policy_trailer = '\n\n... for policy %r' % self.policy + raise AssertionError(str(err) + policy_trailer) + + def _datafilename(self, timestamp, policy, frag_index=None): + if frag_index is None: + frag_index = randint(0, 9) + filename = timestamp.internal + if policy.policy_type == EC_POLICY: + filename += '#%d' % frag_index + filename += '.data' + return filename + + def check_hash_cleanup_listdir(self, policy, input_files, output_files): + orig_unlink = os.unlink + file_list = list(input_files) + + def mock_listdir(path): + return list(file_list) + + def mock_unlink(path): + # timestamp 1 is a special tag to pretend a file disappeared + # between the listdir and unlink. + if '/0000000001.00000.' in path: + # Using actual os.unlink for a non-existent name to reproduce + # exactly what OSError it raises in order to prove that + # common.utils.remove_file is squelching the error - but any + # OSError would do. + orig_unlink(uuid.uuid4().hex) + file_list.remove(os.path.basename(path)) + + df_mgr = self.df_router[policy] + with unit_mock({'os.listdir': mock_listdir, 'os.unlink': mock_unlink}): + if isinstance(output_files, Exception): + path = os.path.join(self.testdir, 'does-not-matter') + self.assertRaises(output_files.__class__, + df_mgr.hash_cleanup_listdir, path) + return + files = df_mgr.hash_cleanup_listdir('/whatever') + self.assertEquals(files, output_files) + + # hash_cleanup_listdir tests - behaviors + + def test_hash_cleanup_listdir_purge_data_newer_ts(self): + for policy in self.iter_policies(): + # purge .data if there's a newer .ts + file1 = self._datafilename(self.ts(), policy) + file2 = self.ts().internal + '.ts' + file_list = [file1, file2] + self.check_hash_cleanup_listdir(policy, file_list, [file2]) + + def test_hash_cleanup_listdir_purge_expired_ts(self): + for policy in self.iter_policies(): + # purge older .ts files if there's a newer .data + file1 = self.ts().internal + '.ts' + file2 = self.ts().internal + '.ts' + timestamp = self.ts() + file3 = self._datafilename(timestamp, policy) + file_list = [file1, file2, file3] + expected = { + # no durable datafile means you can't get rid of the + # latest tombstone even if datafile is newer + EC_POLICY: [file3, file2], + REPL_POLICY: [file3], + }[policy.policy_type] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_purge_ts_newer_data(self): + for policy in self.iter_policies(): + # purge .ts if there's a newer .data + file1 = self.ts().internal + '.ts' + timestamp = self.ts() + file2 = self._datafilename(timestamp, policy) + file_list = [file1, file2] + if policy.policy_type == EC_POLICY: + durable_file = timestamp.internal + '.durable' + file_list.append(durable_file) + expected = { + EC_POLICY: [durable_file, file2], + REPL_POLICY: [file2], + }[policy.policy_type] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_purge_older_ts(self): + for policy in self.iter_policies(): + file1 = self.ts().internal + '.ts' + file2 = self.ts().internal + '.ts' + file3 = self._datafilename(self.ts(), policy) + file4 = self.ts().internal + '.meta' + expected = { + # no durable means we can only throw out things before + # the latest tombstone + EC_POLICY: [file4, file3, file2], + # keep .meta and .data and purge all .ts files + REPL_POLICY: [file4, file3], + }[policy.policy_type] + file_list = [file1, file2, file3, file4] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_keep_meta_data_purge_ts(self): + for policy in self.iter_policies(): + file1 = self.ts().internal + '.ts' + file2 = self.ts().internal + '.ts' + timestamp = self.ts() + file3 = self._datafilename(timestamp, policy) + file_list = [file1, file2, file3] + if policy.policy_type == EC_POLICY: + durable_filename = timestamp.internal + '.durable' + file_list.append(durable_filename) + file4 = self.ts().internal + '.meta' + file_list.append(file4) + # keep .meta and .data if meta newer than data and purge .ts + expected = { + EC_POLICY: [file4, durable_filename, file3], + REPL_POLICY: [file4, file3], + }[policy.policy_type] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_keep_one_ts(self): + for policy in self.iter_policies(): + file1, file2, file3 = [self.ts().internal + '.ts' + for i in range(3)] + file_list = [file1, file2, file3] + # keep only latest of multiple .ts files + self.check_hash_cleanup_listdir(policy, file_list, [file3]) + + def test_hash_cleanup_listdir_multi_data_file(self): + for policy in self.iter_policies(): + file1 = self._datafilename(self.ts(), policy, 1) + file2 = self._datafilename(self.ts(), policy, 2) + file3 = self._datafilename(self.ts(), policy, 3) + expected = { + # keep all non-durable datafiles + EC_POLICY: [file3, file2, file1], + # keep only latest of multiple .data files + REPL_POLICY: [file3] + }[policy.policy_type] + file_list = [file1, file2, file3] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_keeps_one_datafile(self): + for policy in self.iter_policies(): + timestamps = [self.ts() for i in range(3)] + file1 = self._datafilename(timestamps[0], policy, 1) + file2 = self._datafilename(timestamps[1], policy, 2) + file3 = self._datafilename(timestamps[2], policy, 3) + file_list = [file1, file2, file3] + if policy.policy_type == EC_POLICY: + for t in timestamps: + file_list.append(t.internal + '.durable') + latest_durable = file_list[-1] + expected = { + # keep latest durable and datafile + EC_POLICY: [latest_durable, file3], + # keep only latest of multiple .data files + REPL_POLICY: [file3] + }[policy.policy_type] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_keep_one_meta(self): + for policy in self.iter_policies(): + # keep only latest of multiple .meta files + t_data = self.ts() + file1 = self._datafilename(t_data, policy) + file2, file3 = [self.ts().internal + '.meta' for i in range(2)] + file_list = [file1, file2, file3] + if policy.policy_type == EC_POLICY: + durable_file = t_data.internal + '.durable' + file_list.append(durable_file) + expected = { + EC_POLICY: [file3, durable_file, file1], + REPL_POLICY: [file3, file1] + }[policy.policy_type] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_only_meta(self): + for policy in self.iter_policies(): + file1, file2 = [self.ts().internal + '.meta' for i in range(2)] + file_list = [file1, file2] + if policy.policy_type == EC_POLICY: + # EC policy does tolerate only .meta's in dir when cleaning up + expected = [file2] + else: + # the get_ondisk_files contract validation doesn't allow a + # directory with only .meta files + expected = AssertionError() + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_ignore_orphaned_ts(self): + for policy in self.iter_policies(): + # A more recent orphaned .meta file will prevent old .ts files + # from being cleaned up otherwise + file1, file2 = [self.ts().internal + '.ts' for i in range(2)] + file3 = self.ts().internal + '.meta' + file_list = [file1, file2, file3] + self.check_hash_cleanup_listdir(policy, file_list, [file3, file2]) + + def test_hash_cleanup_listdir_purge_old_data_only(self): + for policy in self.iter_policies(): + # Oldest .data will be purge, .meta and .ts won't be touched + file1 = self._datafilename(self.ts(), policy) + file2 = self.ts().internal + '.ts' + file3 = self.ts().internal + '.meta' + file_list = [file1, file2, file3] + self.check_hash_cleanup_listdir(policy, file_list, [file3, file2]) + + def test_hash_cleanup_listdir_purge_old_ts(self): + for policy in self.iter_policies(): + # A single old .ts file will be removed + old_float = time() - (diskfile.ONE_WEEK + 1) + file1 = Timestamp(old_float).internal + '.ts' + file_list = [file1] + self.check_hash_cleanup_listdir(policy, file_list, []) + + def test_hash_cleanup_listdir_meta_keeps_old_ts(self): + for policy in self.iter_policies(): + old_float = time() - (diskfile.ONE_WEEK + 1) + file1 = Timestamp(old_float).internal + '.ts' + file2 = Timestamp(time() + 2).internal + '.meta' + file_list = [file1, file2] + if policy.policy_type == EC_POLICY: + # EC will clean up old .ts despite a .meta + expected = [file2] + else: + # An orphaned .meta will not clean up a very old .ts + expected = [file2, file1] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_keep_single_old_data(self): + for policy in self.iter_policies(): + old_float = time() - (diskfile.ONE_WEEK + 1) + file1 = self._datafilename(Timestamp(old_float), policy) + file_list = [file1] + if policy.policy_type == EC_POLICY: + # for EC an isolated old .data file is removed, its useless + # without a .durable + expected = [] + else: + # A single old .data file will not be removed + expected = file_list + self.check_hash_cleanup_listdir(policy, file_list, expected) + + def test_hash_cleanup_listdir_drops_isolated_durable(self): + for policy in self.iter_policies(): + if policy.policy_type == EC_POLICY: + file1 = Timestamp(time()).internal + '.durable' + file_list = [file1] + self.check_hash_cleanup_listdir(policy, file_list, []) + + def test_hash_cleanup_listdir_keep_single_old_meta(self): + for policy in self.iter_policies(): + # A single old .meta file will not be removed + old_float = time() - (diskfile.ONE_WEEK + 1) + file1 = Timestamp(old_float).internal + '.meta' + file_list = [file1] + self.check_hash_cleanup_listdir(policy, file_list, [file1]) + + # hash_cleanup_listdir tests - error handling + + def test_hash_cleanup_listdir_hsh_path_enoent(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # common.utils.listdir *completely* mutes ENOENT + path = os.path.join(self.testdir, 'does-not-exist') + self.assertEqual(df_mgr.hash_cleanup_listdir(path), []) + + def test_hash_cleanup_listdir_hsh_path_other_oserror(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + with mock.patch('os.listdir') as mock_listdir: + mock_listdir.side_effect = OSError('kaboom!') + # but it will raise other OSErrors + path = os.path.join(self.testdir, 'does-not-matter') + self.assertRaises(OSError, df_mgr.hash_cleanup_listdir, + path) + + def test_hash_cleanup_listdir_reclaim_tombstone_remove_file_error(self): + for policy in self.iter_policies(): + # Timestamp 1 makes the check routine pretend the file + # disappeared after listdir before unlink. + file1 = '0000000001.00000.ts' + file_list = [file1] + self.check_hash_cleanup_listdir(policy, file_list, []) + + def test_hash_cleanup_listdir_older_remove_file_error(self): + for policy in self.iter_policies(): + # Timestamp 1 makes the check routine pretend the file + # disappeared after listdir before unlink. + file1 = self._datafilename(Timestamp(1), policy) + file2 = '0000000002.00000.ts' + file_list = [file1, file2] + if policy.policy_type == EC_POLICY: + # the .ts gets reclaimed up despite failed .data delete + expected = [] + else: + # the .ts isn't reclaimed because there were two files in dir + expected = [file2] + self.check_hash_cleanup_listdir(policy, file_list, expected) + + # invalidate_hash tests - behavior + + def test_invalidate_hash_file_does_not_exist(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', + policy=policy) + suffix_dir = os.path.dirname(df._datadir) + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + hashes_file = os.path.join(part_path, diskfile.HASH_FILE) + self.assertFalse(os.path.exists(hashes_file)) # sanity + with mock.patch('swift.obj.diskfile.lock_path') as mock_lock: + df_mgr.invalidate_hash(suffix_dir) + self.assertFalse(mock_lock.called) + # does not create file + self.assertFalse(os.path.exists(hashes_file)) + + def test_invalidate_hash_file_exists(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # create something to hash + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', + policy=policy) + df.delete(self.ts()) + suffix_dir = os.path.dirname(df._datadir) + suffix = os.path.basename(suffix_dir) + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + self.assertTrue(suffix in hashes) # sanity + # sanity check hashes file + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + hashes_file = os.path.join(part_path, diskfile.HASH_FILE) + with open(hashes_file, 'rb') as f: + self.assertEqual(hashes, pickle.load(f)) + # invalidate the hash + with mock.patch('swift.obj.diskfile.lock_path') as mock_lock: + df_mgr.invalidate_hash(suffix_dir) + self.assertTrue(mock_lock.called) + with open(hashes_file, 'rb') as f: + self.assertEqual({suffix: None}, pickle.load(f)) + + # invalidate_hash tests - error handling + + def test_invalidate_hash_bad_pickle(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # make some valid data + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', + policy=policy) + suffix_dir = os.path.dirname(df._datadir) + suffix = os.path.basename(suffix_dir) + df.delete(self.ts()) + # sanity check hashes file + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + hashes_file = os.path.join(part_path, diskfile.HASH_FILE) + self.assertFalse(os.path.exists(hashes_file)) + # write some garbage in hashes file + with open(hashes_file, 'w') as f: + f.write('asdf') + # invalidate_hash silently *NOT* repair invalid data + df_mgr.invalidate_hash(suffix_dir) + with open(hashes_file) as f: + self.assertEqual(f.read(), 'asdf') + # ... but get_hashes will + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + self.assertTrue(suffix in hashes) + + # get_hashes tests - hash_suffix behaviors + + def test_hash_suffix_one_tombstone(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile( + 'sda1', '0', 'a', 'c', 'o', policy=policy) + suffix = os.path.basename(os.path.dirname(df._datadir)) + # write a tombstone + timestamp = self.ts() + df.delete(timestamp) + tombstone_hash = md5(timestamp.internal + '.ts').hexdigest() + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + expected = { + REPL_POLICY: {suffix: tombstone_hash}, + EC_POLICY: {suffix: { + # fi is None here because we have a tombstone + None: tombstone_hash}}, + }[policy.policy_type] + self.assertEqual(hashes, expected) + + def test_hash_suffix_one_reclaim_tombstone(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile( + 'sda1', '0', 'a', 'c', 'o', policy=policy) + suffix = os.path.basename(os.path.dirname(df._datadir)) + # scale back this tests manager's reclaim age a bit + df_mgr.reclaim_age = 1000 + # write a tombstone that's just a *little* older + old_time = time() - 1001 + timestamp = Timestamp(old_time) + df.delete(timestamp.internal) + tombstone_hash = md5(timestamp.internal + '.ts').hexdigest() + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + expected = { + # repl is broken, it doesn't use self.reclaim_age + REPL_POLICY: tombstone_hash, + EC_POLICY: {}, + }[policy.policy_type] + self.assertEqual(hashes, {suffix: expected}) + + def test_hash_suffix_one_datafile(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile( + 'sda1', '0', 'a', 'c', 'o', policy=policy, frag_index=7) + suffix = os.path.basename(os.path.dirname(df._datadir)) + # write a datafile + timestamp = self.ts() + with df.create() as writer: + test_data = 'test file' + writer.write(test_data) + metadata = { + 'X-Timestamp': timestamp.internal, + 'ETag': md5(test_data).hexdigest(), + 'Content-Length': len(test_data), + } + writer.put(metadata) + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + datafile_hash = md5({ + EC_POLICY: timestamp.internal, + REPL_POLICY: timestamp.internal + '.data', + }[policy.policy_type]).hexdigest() + expected = { + REPL_POLICY: {suffix: datafile_hash}, + EC_POLICY: {suffix: { + # because there's no .durable file, we have no hash for + # the None key - only the frag index for the data file + 7: datafile_hash}}, + }[policy.policy_type] + msg = 'expected %r != %r for policy %r' % ( + expected, hashes, policy) + self.assertEqual(hashes, expected, msg) + + def test_hash_suffix_multi_file_ends_in_tombstone(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy, + frag_index=4) + suffix = os.path.basename(os.path.dirname(df._datadir)) + mkdirs(df._datadir) + now = time() + # go behind the scenes and setup a bunch of weird file names + for tdiff in [500, 100, 10, 1]: + for suff in ['.meta', '.data', '.ts']: + timestamp = Timestamp(now - tdiff) + filename = timestamp.internal + if policy.policy_type == EC_POLICY and suff == '.data': + filename += '#%s' % df._frag_index + filename += suff + open(os.path.join(df._datadir, filename), 'w').close() + tombstone_hash = md5(filename).hexdigest() + # call get_hashes and it should clean things up + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + expected = { + REPL_POLICY: {suffix: tombstone_hash}, + EC_POLICY: {suffix: { + # fi is None here because we have a tombstone + None: tombstone_hash}}, + }[policy.policy_type] + self.assertEqual(hashes, expected) + # only the tombstone should be left + found_files = os.listdir(df._datadir) + self.assertEqual(found_files, [filename]) + + def test_hash_suffix_multi_file_ends_in_datafile(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy, + frag_index=4) + suffix = os.path.basename(os.path.dirname(df._datadir)) + mkdirs(df._datadir) + now = time() + timestamp = None + # go behind the scenes and setup a bunch of weird file names + for tdiff in [500, 100, 10, 1]: + suffs = ['.meta', '.data'] + if tdiff > 50: + suffs.append('.ts') + if policy.policy_type == EC_POLICY: + suffs.append('.durable') + for suff in suffs: + timestamp = Timestamp(now - tdiff) + filename = timestamp.internal + if policy.policy_type == EC_POLICY and suff == '.data': + filename += '#%s' % df._frag_index + filename += suff + open(os.path.join(df._datadir, filename), 'w').close() + # call get_hashes and it should clean things up + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + data_filename = timestamp.internal + if policy.policy_type == EC_POLICY: + data_filename += '#%s' % df._frag_index + data_filename += '.data' + metadata_filename = timestamp.internal + '.meta' + durable_filename = timestamp.internal + '.durable' + if policy.policy_type == EC_POLICY: + hasher = md5() + hasher.update(metadata_filename) + hasher.update(durable_filename) + expected = { + suffix: { + # metadata & durable updates are hashed separately + None: hasher.hexdigest(), + 4: self.fname_to_ts_hash(data_filename), + } + } + expected_files = [data_filename, durable_filename, + metadata_filename] + elif policy.policy_type == REPL_POLICY: + hasher = md5() + hasher.update(metadata_filename) + hasher.update(data_filename) + expected = {suffix: hasher.hexdigest()} + expected_files = [data_filename, metadata_filename] + else: + self.fail('unknown policy type %r' % policy.policy_type) + msg = 'expected %r != %r for policy %r' % ( + expected, hashes, policy) + self.assertEqual(hashes, expected, msg) + # only the meta and data should be left + self.assertEqual(sorted(os.listdir(df._datadir)), + sorted(expected_files)) + + def test_hash_suffix_removes_empty_hashdir_and_suffix(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', + policy=policy, frag_index=2) + os.makedirs(df._datadir) + self.assertTrue(os.path.exists(df._datadir)) # sanity + df_mgr.get_hashes('sda1', '0', [], policy) + suffix_dir = os.path.dirname(df._datadir) + self.assertFalse(os.path.exists(suffix_dir)) + + def test_hash_suffix_removes_empty_hashdirs_in_valid_suffix(self): + paths, suffix = find_paths_with_matching_suffixes(needed_matches=3, + needed_suffixes=0) + matching_paths = paths.pop(suffix) + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile('sda1', '0', *matching_paths[0], + policy=policy, frag_index=2) + # create a real, valid hsh_path + df.delete(Timestamp(time())) + # and a couple of empty hsh_paths + empty_hsh_paths = [] + for path in matching_paths[1:]: + fake_df = df_mgr.get_diskfile('sda1', '0', *path, + policy=policy) + os.makedirs(fake_df._datadir) + empty_hsh_paths.append(fake_df._datadir) + for hsh_path in empty_hsh_paths: + self.assertTrue(os.path.exists(hsh_path)) # sanity + # get_hashes will cleanup empty hsh_path and leave valid one + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + self.assertTrue(suffix in hashes) + self.assertTrue(os.path.exists(df._datadir)) + for hsh_path in empty_hsh_paths: + self.assertFalse(os.path.exists(hsh_path)) + + # get_hashes tests - hash_suffix error handling + + def test_hash_suffix_listdir_enotdir(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + suffix = '123' + suffix_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0', + suffix) + os.makedirs(suffix_path) + self.assertTrue(os.path.exists(suffix_path)) # sanity + hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy) + # suffix dir cleaned up by get_hashes + self.assertFalse(os.path.exists(suffix_path)) + expected = { + EC_POLICY: {'123': {}}, + REPL_POLICY: {'123': EMPTY_ETAG}, + }[policy.policy_type] + msg = 'expected %r != %r for policy %r' % (expected, hashes, + policy) + self.assertEqual(hashes, expected, msg) + + # now make the suffix path a file + open(suffix_path, 'w').close() + hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy) + expected = {} + msg = 'expected %r != %r for policy %r' % (expected, hashes, + policy) + self.assertEqual(hashes, expected, msg) + + def test_hash_suffix_listdir_enoent(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + orig_listdir = os.listdir + listdir_calls = [] + + def mock_listdir(path): + success = False + try: + rv = orig_listdir(path) + success = True + return rv + finally: + listdir_calls.append((path, success)) + + with mock.patch('swift.obj.diskfile.os.listdir', + mock_listdir): + # recalc always forces hash_suffix even if the suffix + # does not exist! + df_mgr.get_hashes('sda1', '0', ['123'], policy) + + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + + self.assertEqual(listdir_calls, [ + # part path gets created automatically + (part_path, True), + # this one blows up + (os.path.join(part_path, '123'), False), + ]) + + def test_hash_suffix_hash_cleanup_listdir_enotdir_quarantined(self): + for policy in self.iter_policies(): + df = self.df_router[policy].get_diskfile( + self.existing_device, '0', 'a', 'c', 'o', policy=policy) + # make the suffix directory + suffix_path = os.path.dirname(df._datadir) + os.makedirs(suffix_path) + suffix = os.path.basename(suffix_path) + + # make the df hash path a file + open(df._datadir, 'wb').close() + df_mgr = self.df_router[policy] + hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix], + policy) + expected = { + REPL_POLICY: {suffix: EMPTY_ETAG}, + EC_POLICY: {suffix: {}}, + }[policy.policy_type] + self.assertEqual(hashes, expected) + # and hash path is quarantined + self.assertFalse(os.path.exists(df._datadir)) + # each device a quarantined directory + quarantine_base = os.path.join(self.devices, + self.existing_device, 'quarantined') + # the quarantine path is... + quarantine_path = os.path.join( + quarantine_base, # quarantine root + diskfile.get_data_dir(policy), # per-policy data dir + suffix, # first dir from which quarantined file was removed + os.path.basename(df._datadir) # name of quarantined file + ) + self.assertTrue(os.path.exists(quarantine_path)) + + def test_hash_suffix_hash_cleanup_listdir_other_oserror(self): + for policy in self.iter_policies(): + timestamp = self.ts() + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', + 'o', policy=policy, + frag_index=7) + suffix = os.path.basename(os.path.dirname(df._datadir)) + with df.create() as writer: + test_data = 'test_data' + writer.write(test_data) + metadata = { + 'X-Timestamp': timestamp.internal, + 'ETag': md5(test_data).hexdigest(), + 'Content-Length': len(test_data), + } + writer.put(metadata) + + orig_os_listdir = os.listdir + listdir_calls = [] + + part_path = os.path.join(self.devices, self.existing_device, + diskfile.get_data_dir(policy), '0') + suffix_path = os.path.join(part_path, suffix) + datadir_path = os.path.join(suffix_path, hash_path('a', 'c', 'o')) + + def mock_os_listdir(path): + listdir_calls.append(path) + if path == datadir_path: + # we want the part and suffix listdir calls to pass and + # make the hash_cleanup_listdir raise an exception + raise OSError(errno.EACCES, os.strerror(errno.EACCES)) + return orig_os_listdir(path) + + with mock.patch('os.listdir', mock_os_listdir): + hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + + self.assertEqual(listdir_calls, [ + part_path, + suffix_path, + datadir_path, + ]) + expected = {suffix: None} + msg = 'expected %r != %r for policy %r' % ( + expected, hashes, policy) + self.assertEqual(hashes, expected, msg) + + def test_hash_suffix_rmdir_hsh_path_oserror(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # make an empty hsh_path to be removed + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', + 'o', policy=policy) + os.makedirs(df._datadir) + suffix = os.path.basename(os.path.dirname(df._datadir)) + with mock.patch('os.rmdir', side_effect=OSError()): + hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + expected = { + EC_POLICY: {}, + REPL_POLICY: md5().hexdigest(), + }[policy.policy_type] + self.assertEqual(hashes, {suffix: expected}) + self.assertTrue(os.path.exists(df._datadir)) + + def test_hash_suffix_rmdir_suffix_oserror(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # make an empty hsh_path to be removed + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', + 'o', policy=policy) + os.makedirs(df._datadir) + suffix_path = os.path.dirname(df._datadir) + suffix = os.path.basename(suffix_path) + + captured_paths = [] + + def mock_rmdir(path): + captured_paths.append(path) + if path == suffix_path: + raise OSError('kaboom!') + + with mock.patch('os.rmdir', mock_rmdir): + hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + expected = { + EC_POLICY: {}, + REPL_POLICY: md5().hexdigest(), + }[policy.policy_type] + self.assertEqual(hashes, {suffix: expected}) + self.assertTrue(os.path.exists(suffix_path)) + self.assertEqual([ + df._datadir, + suffix_path, + ], captured_paths) + + # get_hashes tests - behaviors + + def test_get_hashes_creates_partition_and_pkl(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + self.assertEqual(hashes, {}) + part_path = os.path.join( + self.devices, 'sda1', diskfile.get_data_dir(policy), '0') + self.assertTrue(os.path.exists(part_path)) + hashes_file = os.path.join(part_path, + diskfile.HASH_FILE) + self.assertTrue(os.path.exists(hashes_file)) + + # and double check the hashes + new_hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + self.assertEqual(hashes, new_hashes) + + def test_get_hashes_new_pkl_finds_new_suffix_dirs(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + part_path = os.path.join( + self.devices, self.existing_device, + diskfile.get_data_dir(policy), '0') + hashes_file = os.path.join(part_path, + diskfile.HASH_FILE) + # add something to find + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', + 'o', policy=policy, frag_index=4) + timestamp = self.ts() + df.delete(timestamp) + suffix = os.path.basename(os.path.dirname(df._datadir)) + # get_hashes will find the untracked suffix dir + self.assertFalse(os.path.exists(hashes_file)) # sanity + hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy) + self.assertTrue(suffix in hashes) + # ... and create a hashes pickle for it + self.assertTrue(os.path.exists(hashes_file)) + + def test_get_hashes_old_pickle_does_not_find_new_suffix_dirs(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # create a empty stale pickle + part_path = os.path.join( + self.devices, 'sda1', diskfile.get_data_dir(policy), '0') + hashes_file = os.path.join(part_path, + diskfile.HASH_FILE) + hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy) + self.assertEqual(hashes, {}) + self.assertTrue(os.path.exists(hashes_file)) # sanity + # add something to find + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', + policy=policy, frag_index=4) + os.makedirs(df._datadir) + filename = Timestamp(time()).internal + '.ts' + open(os.path.join(df._datadir, filename), 'w').close() + suffix = os.path.basename(os.path.dirname(df._datadir)) + # but get_hashes has no reason to find it (because we didn't + # call invalidate_hash) + new_hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + self.assertEqual(new_hashes, hashes) + # ... unless remote end asks for a recalc + hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix], + policy) + self.assertTrue(suffix in hashes) + + def test_get_hashes_does_not_rehash_known_suffix_dirs(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', + 'o', policy=policy, frag_index=4) + suffix = os.path.basename(os.path.dirname(df._datadir)) + timestamp = self.ts() + df.delete(timestamp) + # create the baseline hashes file + hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy) + self.assertTrue(suffix in hashes) + # now change the contents of the suffix w/o calling + # invalidate_hash + rmtree(df._datadir) + suffix_path = os.path.dirname(df._datadir) + self.assertTrue(os.path.exists(suffix_path)) # sanity + new_hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + # ... and get_hashes is none the wiser + self.assertEqual(new_hashes, hashes) + + # ... unless remote end asks for a recalc + hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix], + policy) + self.assertNotEqual(new_hashes, hashes) + # and the empty suffix path is removed + self.assertFalse(os.path.exists(suffix_path)) + # ... but is hashed as "empty" + expected = { + EC_POLICY: {}, + REPL_POLICY: md5().hexdigest(), + }[policy.policy_type] + self.assertEqual({suffix: expected}, hashes) + + def test_get_hashes_multi_file_multi_suffix(self): + paths, suffix = find_paths_with_matching_suffixes(needed_matches=2, + needed_suffixes=3) + matching_paths = paths.pop(suffix) + matching_paths.sort(key=lambda path: hash_path(*path)) + other_paths = [] + for suffix, paths in paths.items(): + other_paths.append(paths[0]) + if len(other_paths) >= 2: + break + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # first we'll make a tombstone + df = df_mgr.get_diskfile(self.existing_device, '0', + *other_paths[0], policy=policy, + frag_index=4) + timestamp = self.ts() + df.delete(timestamp) + tombstone_hash = md5(timestamp.internal + '.ts').hexdigest() + tombstone_suffix = os.path.basename(os.path.dirname(df._datadir)) + # second file in another suffix has a .datafile + df = df_mgr.get_diskfile(self.existing_device, '0', + *other_paths[1], policy=policy, + frag_index=5) + timestamp = self.ts() + with df.create() as writer: + test_data = 'test_file' + writer.write(test_data) + metadata = { + 'X-Timestamp': timestamp.internal, + 'ETag': md5(test_data).hexdigest(), + 'Content-Length': len(test_data), + } + writer.put(metadata) + writer.commit(timestamp) + datafile_name = timestamp.internal + if policy.policy_type == EC_POLICY: + datafile_name += '#%d' % df._frag_index + datafile_name += '.data' + durable_hash = md5(timestamp.internal + '.durable').hexdigest() + datafile_suffix = os.path.basename(os.path.dirname(df._datadir)) + # in the *third* suffix - two datafiles for different hashes + df = df_mgr.get_diskfile(self.existing_device, '0', + *matching_paths[0], policy=policy, + frag_index=6) + matching_suffix = os.path.basename(os.path.dirname(df._datadir)) + timestamp = self.ts() + with df.create() as writer: + test_data = 'test_file' + writer.write(test_data) + metadata = { + 'X-Timestamp': timestamp.internal, + 'ETag': md5(test_data).hexdigest(), + 'Content-Length': len(test_data), + } + writer.put(metadata) + writer.commit(timestamp) + # we'll keep track of file names for hash calculations + filename = timestamp.internal + if policy.policy_type == EC_POLICY: + filename += '#%d' % df._frag_index + filename += '.data' + filenames = { + 'data': { + 6: filename + }, + 'durable': [timestamp.internal + '.durable'], + } + df = df_mgr.get_diskfile(self.existing_device, '0', + *matching_paths[1], policy=policy, + frag_index=7) + self.assertEqual(os.path.basename(os.path.dirname(df._datadir)), + matching_suffix) # sanity + timestamp = self.ts() + with df.create() as writer: + test_data = 'test_file' + writer.write(test_data) + metadata = { + 'X-Timestamp': timestamp.internal, + 'ETag': md5(test_data).hexdigest(), + 'Content-Length': len(test_data), + } + writer.put(metadata) + writer.commit(timestamp) + filename = timestamp.internal + if policy.policy_type == EC_POLICY: + filename += '#%d' % df._frag_index + filename += '.data' + filenames['data'][7] = filename + filenames['durable'].append(timestamp.internal + '.durable') + # now make up the expected suffixes! + if policy.policy_type == EC_POLICY: + hasher = md5() + for filename in filenames['durable']: + hasher.update(filename) + expected = { + tombstone_suffix: { + None: tombstone_hash, + }, + datafile_suffix: { + None: durable_hash, + 5: self.fname_to_ts_hash(datafile_name), + }, + matching_suffix: { + None: hasher.hexdigest(), + 6: self.fname_to_ts_hash(filenames['data'][6]), + 7: self.fname_to_ts_hash(filenames['data'][7]), + }, + } + elif policy.policy_type == REPL_POLICY: + hasher = md5() + for filename in filenames['data'].values(): + hasher.update(filename) + expected = { + tombstone_suffix: tombstone_hash, + datafile_suffix: md5(datafile_name).hexdigest(), + matching_suffix: hasher.hexdigest(), + } + else: + self.fail('unknown policy type %r' % policy.policy_type) + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + self.assertEqual(hashes, expected) + + # get_hashes tests - error handling + + def test_get_hashes_bad_dev(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + df_mgr.mount_check = True + with mock.patch('swift.obj.diskfile.check_mount', + mock.MagicMock(side_effect=[False])): + self.assertRaises( + DiskFileDeviceUnavailable, + df_mgr.get_hashes, self.existing_device, '0', ['123'], + policy) + + def test_get_hashes_zero_bytes_pickle(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + part_path = os.path.join(self.devices, self.existing_device, + diskfile.get_data_dir(policy), '0') + os.makedirs(part_path) + # create a pre-existing zero-byte file + open(os.path.join(part_path, diskfile.HASH_FILE), 'w').close() + hashes = df_mgr.get_hashes(self.existing_device, '0', [], + policy) + self.assertEqual(hashes, {}) + + def test_get_hashes_hash_suffix_enotdir(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # create a real suffix dir + df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', + 'o', policy=policy, frag_index=3) + df.delete(Timestamp(time())) + suffix = os.path.basename(os.path.dirname(df._datadir)) + # touch a bad suffix dir + part_dir = os.path.join(self.devices, self.existing_device, + diskfile.get_data_dir(policy), '0') + open(os.path.join(part_dir, 'bad'), 'w').close() + hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy) + self.assertTrue(suffix in hashes) + self.assertFalse('bad' in hashes) + + def test_get_hashes_hash_suffix_other_oserror(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + suffix = '123' + suffix_path = os.path.join(self.devices, self.existing_device, + diskfile.get_data_dir(policy), '0', + suffix) + os.makedirs(suffix_path) + self.assertTrue(os.path.exists(suffix_path)) # sanity + hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix], + policy) + expected = { + EC_POLICY: {'123': {}}, + REPL_POLICY: {'123': EMPTY_ETAG}, + }[policy.policy_type] + msg = 'expected %r != %r for policy %r' % (expected, hashes, + policy) + self.assertEqual(hashes, expected, msg) + + # this OSError does *not* raise PathNotDir, and is allowed to leak + # from hash_suffix into get_hashes + mocked_os_listdir = mock.Mock( + side_effect=OSError(errno.EACCES, os.strerror(errno.EACCES))) + with mock.patch("os.listdir", mocked_os_listdir): + with mock.patch('swift.obj.diskfile.logging') as mock_logging: + hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy) + self.assertEqual(mock_logging.method_calls, + [mock.call.exception('Error hashing suffix')]) + # recalc always causes a suffix to get reset to None; the listdir + # error prevents the suffix from being rehashed + expected = {'123': None} + msg = 'expected %r != %r for policy %r' % (expected, hashes, + policy) + self.assertEqual(hashes, expected, msg) + + def test_get_hashes_modified_recursive_retry(self): + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + # first create an empty pickle + df_mgr.get_hashes(self.existing_device, '0', [], policy) + hashes_file = os.path.join( + self.devices, self.existing_device, + diskfile.get_data_dir(policy), '0', diskfile.HASH_FILE) + mtime = os.path.getmtime(hashes_file) + non_local = {'mtime': mtime} + + calls = [] + + def mock_getmtime(filename): + t = non_local['mtime'] + if len(calls) <= 3: + # this will make the *next* call get a slightly + # newer mtime than the last + non_local['mtime'] += 1 + # track exactly the value for every return + calls.append(t) + return t + with mock.patch('swift.obj.diskfile.getmtime', + mock_getmtime): + df_mgr.get_hashes(self.existing_device, '0', ['123'], + policy) + + self.assertEqual(calls, [ + mtime + 0, # read + mtime + 1, # modified + mtime + 2, # read + mtime + 3, # modifed + mtime + 4, # read + mtime + 4, # not modifed + ]) + + if __name__ == '__main__': unittest.main() diff --git a/test/unit/obj/test_replicator.py b/test/unit/obj/test_replicator.py index e1fb3062d1..d662f01eb5 100644 --- a/test/unit/obj/test_replicator.py +++ b/test/unit/obj/test_replicator.py @@ -173,9 +173,9 @@ class TestObjectReplicator(unittest.TestCase): os.mkdir(self.devices) os.mkdir(os.path.join(self.devices, 'sda')) self.objects = os.path.join(self.devices, 'sda', - diskfile.get_data_dir(0)) + diskfile.get_data_dir(POLICIES[0])) self.objects_1 = os.path.join(self.devices, 'sda', - diskfile.get_data_dir(1)) + diskfile.get_data_dir(POLICIES[1])) os.mkdir(self.objects) os.mkdir(self.objects_1) self.parts = {} @@ -205,7 +205,7 @@ class TestObjectReplicator(unittest.TestCase): object_replicator.http_connect = mock_http_connect(200) cur_part = '0' df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o', - policy_idx=0) + policy=POLICIES[0]) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -216,7 +216,7 @@ class TestObjectReplicator(unittest.TestCase): data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) process_arg_checker = [] - ring = replicator.get_object_ring(0) + ring = replicator.load_object_ring(POLICIES[0]) nodes = [node for node in ring.get_part_nodes(int(cur_part)) if node['ip'] not in _ips()] @@ -239,7 +239,7 @@ class TestObjectReplicator(unittest.TestCase): object_replicator.http_connect = mock_http_connect(200) cur_part = '0' df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o', - policy_idx=1) + policy=POLICIES[1]) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -250,7 +250,7 @@ class TestObjectReplicator(unittest.TestCase): data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects_1, cur_part, data_dir) process_arg_checker = [] - ring = replicator.get_object_ring(1) + ring = replicator.load_object_ring(POLICIES[1]) nodes = [node for node in ring.get_part_nodes(int(cur_part)) if node['ip'] not in _ips()] @@ -266,7 +266,7 @@ class TestObjectReplicator(unittest.TestCase): def test_check_ring(self): for pol in POLICIES: - obj_ring = self.replicator.get_object_ring(pol.idx) + obj_ring = self.replicator.load_object_ring(pol) self.assertTrue(self.replicator.check_ring(obj_ring)) orig_check = self.replicator.next_check self.replicator.next_check = orig_check - 30 @@ -300,7 +300,7 @@ class TestObjectReplicator(unittest.TestCase): jobs_to_delete = [j for j in jobs if j['delete']] jobs_by_pol_part = {} for job in jobs: - jobs_by_pol_part[str(job['policy_idx']) + job['partition']] = job + jobs_by_pol_part[str(int(job['policy'])) + job['partition']] = job self.assertEquals(len(jobs_to_delete), 2) self.assertTrue('1', jobs_to_delete[0]['partition']) self.assertEquals( @@ -392,7 +392,8 @@ class TestObjectReplicator(unittest.TestCase): def test_delete_partition(self): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -404,7 +405,7 @@ class TestObjectReplicator(unittest.TestCase): whole_path_from = os.path.join(self.objects, '1', data_dir) part_path = os.path.join(self.objects, '1') self.assertTrue(os.access(part_path, os.F_OK)) - ring = self.replicator.get_object_ring(0) + ring = self.replicator.load_object_ring(POLICIES[0]) nodes = [node for node in ring.get_part_nodes(1) if node['ip'] not in _ips()] @@ -421,7 +422,8 @@ class TestObjectReplicator(unittest.TestCase): self.replicator.conf.pop('sync_method') with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -433,7 +435,7 @@ class TestObjectReplicator(unittest.TestCase): whole_path_from = os.path.join(self.objects, '1', data_dir) part_path = os.path.join(self.objects, '1') self.assertTrue(os.access(part_path, os.F_OK)) - ring = self.replicator.get_object_ring(0) + ring = self.replicator.load_object_ring(POLICIES[0]) nodes = [node for node in ring.get_part_nodes(1) if node['ip'] not in _ips()] @@ -470,7 +472,8 @@ class TestObjectReplicator(unittest.TestCase): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -496,7 +499,7 @@ class TestObjectReplicator(unittest.TestCase): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', - policy_idx=1) + policy=POLICIES[1]) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -508,7 +511,7 @@ class TestObjectReplicator(unittest.TestCase): whole_path_from = os.path.join(self.objects_1, '1', data_dir) part_path = os.path.join(self.objects_1, '1') self.assertTrue(os.access(part_path, os.F_OK)) - ring = self.replicator.get_object_ring(1) + ring = self.replicator.load_object_ring(POLICIES[1]) nodes = [node for node in ring.get_part_nodes(1) if node['ip'] not in _ips()] @@ -524,7 +527,8 @@ class TestObjectReplicator(unittest.TestCase): def test_delete_partition_with_failures(self): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -536,7 +540,7 @@ class TestObjectReplicator(unittest.TestCase): whole_path_from = os.path.join(self.objects, '1', data_dir) part_path = os.path.join(self.objects, '1') self.assertTrue(os.access(part_path, os.F_OK)) - ring = self.replicator.get_object_ring(0) + ring = self.replicator.load_object_ring(POLICIES[0]) nodes = [node for node in ring.get_part_nodes(1) if node['ip'] not in _ips()] @@ -559,7 +563,8 @@ class TestObjectReplicator(unittest.TestCase): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): self.replicator.handoff_delete = 2 - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -571,7 +576,7 @@ class TestObjectReplicator(unittest.TestCase): whole_path_from = os.path.join(self.objects, '1', data_dir) part_path = os.path.join(self.objects, '1') self.assertTrue(os.access(part_path, os.F_OK)) - ring = self.replicator.get_object_ring(0) + ring = self.replicator.load_object_ring(POLICIES[0]) nodes = [node for node in ring.get_part_nodes(1) if node['ip'] not in _ips()] @@ -593,7 +598,8 @@ class TestObjectReplicator(unittest.TestCase): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): self.replicator.handoff_delete = 2 - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -605,7 +611,7 @@ class TestObjectReplicator(unittest.TestCase): whole_path_from = os.path.join(self.objects, '1', data_dir) part_path = os.path.join(self.objects, '1') self.assertTrue(os.access(part_path, os.F_OK)) - ring = self.replicator.get_object_ring(0) + ring = self.replicator.load_object_ring(POLICIES[0]) nodes = [node for node in ring.get_part_nodes(1) if node['ip'] not in _ips()] @@ -627,7 +633,8 @@ class TestObjectReplicator(unittest.TestCase): def test_delete_partition_with_handoff_delete_fail_in_other_region(self): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -639,7 +646,7 @@ class TestObjectReplicator(unittest.TestCase): whole_path_from = os.path.join(self.objects, '1', data_dir) part_path = os.path.join(self.objects, '1') self.assertTrue(os.access(part_path, os.F_OK)) - ring = self.replicator.get_object_ring(0) + ring = self.replicator.load_object_ring(POLICIES[0]) nodes = [node for node in ring.get_part_nodes(1) if node['ip'] not in _ips()] @@ -659,7 +666,8 @@ class TestObjectReplicator(unittest.TestCase): self.assertTrue(os.access(part_path, os.F_OK)) def test_delete_partition_override_params(self): - df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) part_path = os.path.join(self.objects, '1') self.assertTrue(os.access(part_path, os.F_OK)) @@ -672,9 +680,10 @@ class TestObjectReplicator(unittest.TestCase): self.assertFalse(os.access(part_path, os.F_OK)) def test_delete_policy_override_params(self): - df0 = self.df_mgr.get_diskfile('sda', '99', 'a', 'c', 'o') + df0 = self.df_mgr.get_diskfile('sda', '99', 'a', 'c', 'o', + policy=POLICIES.legacy) df1 = self.df_mgr.get_diskfile('sda', '99', 'a', 'c', 'o', - policy_idx=1) + policy=POLICIES[1]) mkdirs(df0._datadir) mkdirs(df1._datadir) @@ -695,7 +704,8 @@ class TestObjectReplicator(unittest.TestCase): def test_delete_partition_ssync(self): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -743,7 +753,8 @@ class TestObjectReplicator(unittest.TestCase): def test_delete_partition_ssync_with_sync_failure(self): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -791,7 +802,8 @@ class TestObjectReplicator(unittest.TestCase): self.replicator.logger = debug_logger('test-replicator') with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -832,7 +844,8 @@ class TestObjectReplicator(unittest.TestCase): mock_http_connect(200)): self.replicator.logger = mock_logger = \ debug_logger('test-replicator') - df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -927,7 +940,8 @@ class TestObjectReplicator(unittest.TestCase): # Write some files into '1' and run replicate- they should be moved # to the other partitions and then node should get deleted. cur_part = '1' - df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -937,7 +951,7 @@ class TestObjectReplicator(unittest.TestCase): ohash = hash_path('a', 'c', 'o') data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) - ring = replicator.get_object_ring(0) + ring = replicator.load_object_ring(POLICIES[0]) process_arg_checker = [] nodes = [node for node in ring.get_part_nodes(int(cur_part)) @@ -991,7 +1005,8 @@ class TestObjectReplicator(unittest.TestCase): # Write some files into '1' and run replicate- they should be moved # to the other partitions and then node should get deleted. cur_part = '1' - df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o') + df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o', + policy=POLICIES.legacy) mkdirs(df._datadir) f = open(os.path.join(df._datadir, normalize_timestamp(time.time()) + '.data'), @@ -1002,10 +1017,11 @@ class TestObjectReplicator(unittest.TestCase): data_dir = ohash[-3:] whole_path_from = os.path.join(self.objects, cur_part, data_dir) process_arg_checker = [] - ring = replicator.get_object_ring(0) + ring = replicator.load_object_ring(POLICIES[0]) nodes = [node for node in ring.get_part_nodes(int(cur_part)) if node['ip'] not in _ips()] + for node in nodes: rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], cur_part) @@ -1069,8 +1085,8 @@ class TestObjectReplicator(unittest.TestCase): expect = 'Error syncing partition' for job in jobs: set_default(self) - ring = self.replicator.get_object_ring(job['policy_idx']) - self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx'] + ring = job['policy'].object_ring + self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) self.replicator.update(job) self.assertTrue(error in mock_logger.error.call_args[0][0]) self.assertTrue(expect in mock_logger.exception.call_args[0][0]) @@ -1116,7 +1132,7 @@ class TestObjectReplicator(unittest.TestCase): for job in jobs: set_default(self) # limit local job to policy 0 for simplicity - if job['partition'] == '0' and job['policy_idx'] == 0: + if job['partition'] == '0' and int(job['policy']) == 0: local_job = job.copy() continue self.replicator.update(job) diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 7cfb07d150..ee0d364c92 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -68,13 +68,14 @@ class TestObjectController(unittest.TestCase): self.tmpdir = mkdtemp() self.testdir = os.path.join(self.tmpdir, 'tmp_test_object_server_ObjectController') - conf = {'devices': self.testdir, 'mount_check': 'false'} + mkdirs(os.path.join(self.testdir, 'sda1')) + self.conf = {'devices': self.testdir, 'mount_check': 'false'} self.object_controller = object_server.ObjectController( - conf, logger=debug_logger()) + self.conf, logger=debug_logger()) self.object_controller.bytes_per_sync = 1 self._orig_tpool_exc = tpool.execute tpool.execute = lambda f, *args, **kwargs: f(*args, **kwargs) - self.df_mgr = diskfile.DiskFileManager(conf, + self.df_mgr = diskfile.DiskFileManager(self.conf, self.object_controller.logger) self.logger = debug_logger('test-object-controller') @@ -86,7 +87,7 @@ class TestObjectController(unittest.TestCase): def _stage_tmp_dir(self, policy): mkdirs(os.path.join(self.testdir, 'sda1', - diskfile.get_tmp_dir(int(policy)))) + diskfile.get_tmp_dir(policy))) def check_all_api_methods(self, obj_name='o', alt_res=None): path = '/sda1/p/a/c/%s' % obj_name @@ -419,7 +420,8 @@ class TestObjectController(unittest.TestCase): resp = req.get_response(self.object_controller) self.assertEquals(resp.status_int, 201) - objfile = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o') + objfile = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o', + policy=POLICIES.legacy) objfile.open() file_name = os.path.basename(objfile._data_file) with open(objfile._data_file) as fp: @@ -570,7 +572,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 201) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.data') self.assert_(os.path.isfile(objfile)) @@ -605,7 +607,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 201) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.data') self.assert_(os.path.isfile(objfile)) @@ -640,7 +642,7 @@ class TestObjectController(unittest.TestCase): self.assertEqual(resp.status_int, 201) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.data') self.assertTrue(os.path.isfile(objfile)) @@ -717,7 +719,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 201) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.data') self.assert_(os.path.isfile(objfile)) @@ -790,7 +792,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 201) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), timestamp + '.data') self.assert_(os.path.isfile(objfile)) @@ -833,7 +835,7 @@ class TestObjectController(unittest.TestCase): # original .data file metadata should be unchanged objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), timestamp1 + '.data') self.assert_(os.path.isfile(objfile)) @@ -851,7 +853,7 @@ class TestObjectController(unittest.TestCase): # .meta file metadata should have only user meta items metafile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), timestamp2 + '.meta') self.assert_(os.path.isfile(metafile)) @@ -1060,7 +1062,7 @@ class TestObjectController(unittest.TestCase): objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.data') os.unlink(objfile) @@ -1104,7 +1106,8 @@ class TestObjectController(unittest.TestCase): req.body = 'VERIFY' resp = req.get_response(self.object_controller) self.assertEquals(resp.status_int, 201) - disk_file = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o') + disk_file = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o', + policy=POLICIES.legacy) disk_file.open() file_name = os.path.basename(disk_file._data_file) @@ -1203,7 +1206,7 @@ class TestObjectController(unittest.TestCase): objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.data') os.unlink(objfile) @@ -1694,7 +1697,8 @@ class TestObjectController(unittest.TestCase): req.body = 'VERIFY' resp = req.get_response(self.object_controller) self.assertEquals(resp.status_int, 201) - disk_file = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o') + disk_file = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o', + policy=POLICIES.legacy) disk_file.open() file_name = os.path.basename(disk_file._data_file) etag = md5() @@ -1726,7 +1730,8 @@ class TestObjectController(unittest.TestCase): req.body = 'VERIFY' resp = req.get_response(self.object_controller) self.assertEquals(resp.status_int, 201) - disk_file = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o') + disk_file = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o', + policy=POLICIES.legacy) disk_file.open() file_name = os.path.basename(disk_file._data_file) with open(disk_file._data_file) as fp: @@ -1754,7 +1759,8 @@ class TestObjectController(unittest.TestCase): req.body = 'VERIFY' resp = req.get_response(self.object_controller) self.assertEquals(resp.status_int, 201) - disk_file = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o') + disk_file = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o', + policy=POLICIES.legacy) disk_file.open() file_name = os.path.basename(disk_file._data_file) etag = md5() @@ -1812,7 +1818,6 @@ class TestObjectController(unittest.TestCase): environ={'REQUEST_METHOD': 'DELETE'}) resp = req.get_response(self.object_controller) self.assertEquals(resp.status_int, 400) - # self.assertRaises(KeyError, self.object_controller.DELETE, req) # The following should have created a tombstone file timestamp = normalize_timestamp(1000) @@ -1823,7 +1828,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 404) ts_1000_file = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.ts') self.assertTrue(os.path.isfile(ts_1000_file)) @@ -1839,7 +1844,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 404) ts_999_file = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.ts') self.assertFalse(os.path.isfile(ts_999_file)) @@ -1859,7 +1864,7 @@ class TestObjectController(unittest.TestCase): # There should now be 1000 ts and a 1001 data file. data_1002_file = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), orig_timestamp + '.data') self.assertTrue(os.path.isfile(data_1002_file)) @@ -1875,7 +1880,7 @@ class TestObjectController(unittest.TestCase): self.assertEqual(resp.headers['X-Backend-Timestamp'], orig_timestamp) ts_1001_file = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.ts') self.assertFalse(os.path.isfile(ts_1001_file)) @@ -1890,7 +1895,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 204) ts_1003_file = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.ts') self.assertTrue(os.path.isfile(ts_1003_file)) @@ -1932,7 +1937,7 @@ class TestObjectController(unittest.TestCase): orig_timestamp.internal) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.ts') self.assertFalse(os.path.isfile(objfile)) @@ -1951,7 +1956,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 204) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.ts') self.assert_(os.path.isfile(objfile)) @@ -1970,7 +1975,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 404) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.ts') self.assert_(os.path.isfile(objfile)) @@ -1989,7 +1994,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.status_int, 404) objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(timestamp).internal + '.ts') self.assertFalse(os.path.isfile(objfile)) @@ -2556,8 +2561,8 @@ class TestObjectController(unittest.TestCase): self.object_controller.async_update( 'PUT', 'a', 'c', 'o', '127.0.0.1:1234', 1, 'sdc1', {'x-timestamp': '1', 'x-out': 'set', - 'X-Backend-Storage-Policy-Index': policy.idx}, 'sda1', - policy.idx) + 'X-Backend-Storage-Policy-Index': int(policy)}, 'sda1', + policy) finally: object_server.http_connect = orig_http_connect self.assertEquals( @@ -2565,7 +2570,7 @@ class TestObjectController(unittest.TestCase): ['127.0.0.1', '1234', 'sdc1', 1, 'PUT', '/a/c/o', { 'x-timestamp': '1', 'x-out': 'set', 'user-agent': 'object-server %s' % os.getpid(), - 'X-Backend-Storage-Policy-Index': policy.idx}]) + 'X-Backend-Storage-Policy-Index': int(policy)}]) @patch_policies([storage_policy.StoragePolicy(0, 'zero', True), storage_policy.StoragePolicy(1, 'one'), @@ -2609,7 +2614,7 @@ class TestObjectController(unittest.TestCase): headers={'X-Timestamp': '12345', 'Content-Type': 'application/burrito', 'Content-Length': '0', - 'X-Backend-Storage-Policy-Index': policy.idx, + 'X-Backend-Storage-Policy-Index': int(policy), 'X-Container-Partition': '20', 'X-Container-Host': '1.2.3.4:5', 'X-Container-Device': 'sdb1', @@ -2645,7 +2650,7 @@ class TestObjectController(unittest.TestCase): 'X-Backend-Storage-Policy-Index': '37', 'referer': 'PUT http://localhost/sda1/p/a/c/o', 'user-agent': 'object-server %d' % os.getpid(), - 'X-Backend-Storage-Policy-Index': policy.idx, + 'X-Backend-Storage-Policy-Index': int(policy), 'x-trans-id': '-'})}) self.assertEquals( http_connect_args[1], @@ -2790,7 +2795,7 @@ class TestObjectController(unittest.TestCase): int(delete_at_timestamp) / self.object_controller.expiring_objects_container_divisor * self.object_controller.expiring_objects_container_divisor) - req = Request.blank('/sda1/p/a/c/o', method='PUT', body='', headers={ + headers = { 'Content-Type': 'text/plain', 'X-Timestamp': put_timestamp, 'X-Container-Host': '10.0.0.1:6001', @@ -2801,8 +2806,9 @@ class TestObjectController(unittest.TestCase): 'X-Delete-At-Partition': 'p', 'X-Delete-At-Host': '10.0.0.2:6002', 'X-Delete-At-Device': 'sda1', - 'X-Backend-Storage-Policy-Index': int(policy), - }) + 'X-Backend-Storage-Policy-Index': int(policy)} + req = Request.blank( + '/sda1/p/a/c/o', method='PUT', body='', headers=headers) with mocked_http_conn( 500, 500, give_connect=capture_updates) as fake_conn: resp = req.get_response(self.object_controller) @@ -2838,7 +2844,7 @@ class TestObjectController(unittest.TestCase): self.assertEqual(headers[key], str(value)) # check async pendings async_dir = os.path.join(self.testdir, 'sda1', - diskfile.get_async_dir(policy.idx)) + diskfile.get_async_dir(policy)) found_files = [] for root, dirs, files in os.walk(async_dir): for f in files: @@ -2848,7 +2854,7 @@ class TestObjectController(unittest.TestCase): if data['account'] == 'a': self.assertEquals( int(data['headers'] - ['X-Backend-Storage-Policy-Index']), policy.idx) + ['X-Backend-Storage-Policy-Index']), int(policy)) elif data['account'] == '.expiring_objects': self.assertEquals( int(data['headers'] @@ -2872,12 +2878,12 @@ class TestObjectController(unittest.TestCase): self.object_controller.async_update( 'PUT', 'a', 'c', 'o', '127.0.0.1:1234', 1, 'sdc1', {'x-timestamp': '1', 'x-out': 'set', - 'X-Backend-Storage-Policy-Index': policy.idx}, 'sda1', - policy.idx) + 'X-Backend-Storage-Policy-Index': int(policy)}, 'sda1', + policy) finally: object_server.http_connect = orig_http_connect utils.HASH_PATH_PREFIX = _prefix - async_dir = diskfile.get_async_dir(policy.idx) + async_dir = diskfile.get_async_dir(policy) self.assertEquals( pickle.load(open(os.path.join( self.testdir, 'sda1', async_dir, 'a83', @@ -2885,7 +2891,7 @@ class TestObjectController(unittest.TestCase): utils.Timestamp(1).internal))), {'headers': {'x-timestamp': '1', 'x-out': 'set', 'user-agent': 'object-server %s' % os.getpid(), - 'X-Backend-Storage-Policy-Index': policy.idx}, + 'X-Backend-Storage-Policy-Index': int(policy)}, 'account': 'a', 'container': 'c', 'obj': 'o', 'op': 'PUT'}) def test_async_update_saves_on_non_2xx(self): @@ -2916,9 +2922,9 @@ class TestObjectController(unittest.TestCase): self.object_controller.async_update( 'PUT', 'a', 'c', 'o', '127.0.0.1:1234', 1, 'sdc1', {'x-timestamp': '1', 'x-out': str(status), - 'X-Backend-Storage-Policy-Index': policy.idx}, 'sda1', - policy.idx) - async_dir = diskfile.get_async_dir(policy.idx) + 'X-Backend-Storage-Policy-Index': int(policy)}, 'sda1', + policy) + async_dir = diskfile.get_async_dir(policy) self.assertEquals( pickle.load(open(os.path.join( self.testdir, 'sda1', async_dir, 'a83', @@ -2928,7 +2934,7 @@ class TestObjectController(unittest.TestCase): 'user-agent': 'object-server %s' % os.getpid(), 'X-Backend-Storage-Policy-Index': - policy.idx}, + int(policy)}, 'account': 'a', 'container': 'c', 'obj': 'o', 'op': 'PUT'}) finally: @@ -2992,8 +2998,8 @@ class TestObjectController(unittest.TestCase): self.object_controller.async_update( 'PUT', 'a', 'c', 'o', '127.0.0.1:1234', 1, 'sdc1', {'x-timestamp': '1', 'x-out': str(status)}, 'sda1', - policy.idx) - async_dir = diskfile.get_async_dir(int(policy)) + policy) + async_dir = diskfile.get_async_dir(policy) self.assertTrue( os.path.exists(os.path.join( self.testdir, 'sda1', async_dir, 'a83', @@ -3744,7 +3750,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(resp.body, 'TEST') objfile = os.path.join( self.testdir, 'sda1', - storage_directory(diskfile.get_data_dir(0), 'p', + storage_directory(diskfile.get_data_dir(POLICIES[0]), 'p', hash_path('a', 'c', 'o')), utils.Timestamp(test_timestamp).internal + '.data') self.assert_(os.path.isfile(objfile)) @@ -3969,10 +3975,10 @@ class TestObjectController(unittest.TestCase): def my_tpool_execute(func, *args, **kwargs): return func(*args, **kwargs) - was_get_hashes = diskfile.get_hashes + was_get_hashes = diskfile.DiskFileManager._get_hashes was_tpool_exe = tpool.execute try: - diskfile.get_hashes = fake_get_hashes + diskfile.DiskFileManager._get_hashes = fake_get_hashes tpool.execute = my_tpool_execute req = Request.blank('/sda1/p/suff', environ={'REQUEST_METHOD': 'REPLICATE'}, @@ -3983,7 +3989,7 @@ class TestObjectController(unittest.TestCase): self.assertEquals(p_data, {1: 2}) finally: tpool.execute = was_tpool_exe - diskfile.get_hashes = was_get_hashes + diskfile.DiskFileManager._get_hashes = was_get_hashes def test_REPLICATE_timeout(self): @@ -3993,10 +3999,10 @@ class TestObjectController(unittest.TestCase): def my_tpool_execute(func, *args, **kwargs): return func(*args, **kwargs) - was_get_hashes = diskfile.get_hashes + was_get_hashes = diskfile.DiskFileManager._get_hashes was_tpool_exe = tpool.execute try: - diskfile.get_hashes = fake_get_hashes + diskfile.DiskFileManager._get_hashes = fake_get_hashes tpool.execute = my_tpool_execute req = Request.blank('/sda1/p/suff', environ={'REQUEST_METHOD': 'REPLICATE'}, @@ -4004,7 +4010,7 @@ class TestObjectController(unittest.TestCase): self.assertRaises(Timeout, self.object_controller.REPLICATE, req) finally: tpool.execute = was_tpool_exe - diskfile.get_hashes = was_get_hashes + diskfile.DiskFileManager._get_hashes = was_get_hashes def test_REPLICATE_insufficient_storage(self): conf = {'devices': self.testdir, 'mount_check': 'true'} @@ -4429,6 +4435,7 @@ class TestObjectController(unittest.TestCase): self.assertTrue(os.path.isdir(object_dir)) +@patch_policies class TestObjectServer(unittest.TestCase): def setUp(self): diff --git a/test/unit/obj/test_ssync_sender.py b/test/unit/obj/test_ssync_sender.py index 87efd64cc7..70590ce334 100644 --- a/test/unit/obj/test_ssync_sender.py +++ b/test/unit/obj/test_ssync_sender.py @@ -25,15 +25,16 @@ import eventlet import mock from swift.common import exceptions, utils +from swift.common.storage_policy import POLICIES from swift.obj import ssync_sender, diskfile -from test.unit import DebugLogger, patch_policies +from test.unit import debug_logger, patch_policies class FakeReplicator(object): def __init__(self, testdir): - self.logger = mock.MagicMock() + self.logger = debug_logger('test-ssync-sender') self.conn_timeout = 1 self.node_timeout = 2 self.http_timeout = 3 @@ -43,7 +44,7 @@ class FakeReplicator(object): 'devices': testdir, 'mount_check': 'false', } - self._diskfile_mgr = diskfile.DiskFileManager(conf, DebugLogger()) + self._diskfile_mgr = diskfile.DiskFileManager(conf, self.logger) class NullBufferedHTTPConnection(object): @@ -90,24 +91,27 @@ class FakeConnection(object): self.closed = True +@patch_policies() class TestSender(unittest.TestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() self.testdir = os.path.join(self.tmpdir, 'tmp_test_ssync_sender') + utils.mkdirs(os.path.join(self.testdir, 'dev')) self.replicator = FakeReplicator(self.testdir) self.sender = ssync_sender.Sender(self.replicator, None, None, None) def tearDown(self): - shutil.rmtree(self.tmpdir, ignore_errors=1) + shutil.rmtree(self.tmpdir, ignore_errors=True) def _make_open_diskfile(self, device='dev', partition='9', account='a', container='c', obj='o', body='test', - extra_metadata=None, policy_idx=0): + extra_metadata=None, policy=None): + policy = policy or POLICIES.legacy object_parts = account, container, obj req_timestamp = utils.normalize_timestamp(time.time()) df = self.sender.daemon._diskfile_mgr.get_diskfile( - device, partition, *object_parts, policy_idx=policy_idx) + device, partition, *object_parts, policy=policy) content_length = len(body) etag = hashlib.md5(body).hexdigest() with df.create() as writer: @@ -134,16 +138,16 @@ class TestSender(unittest.TestCase): with mock.patch.object(ssync_sender.Sender, 'connect', connect): node = dict(replication_ip='1.2.3.4', replication_port=5678, device='sda1') - job = dict(partition='9') + job = dict(partition='9', policy=POLICIES.legacy) self.sender = ssync_sender.Sender(self.replicator, node, job, None) self.sender.suffixes = ['abc'] success, candidates = self.sender() self.assertFalse(success) self.assertEquals(candidates, set()) - call = self.replicator.logger.error.mock_calls[0] - self.assertEqual( - call[1][:-1], ('%s:%s/%s/%s %s', '1.2.3.4', 5678, 'sda1', '9')) - self.assertEqual(str(call[1][-1]), '1 second: test connect') + error_lines = self.replicator.logger.get_lines_for_level('error') + self.assertEqual(1, len(error_lines)) + self.assertEqual('1.2.3.4:5678/sda1/9 1 second: test connect', + error_lines[0]) def test_call_catches_ReplicationException(self): @@ -153,45 +157,44 @@ class TestSender(unittest.TestCase): with mock.patch.object(ssync_sender.Sender, 'connect', connect): node = dict(replication_ip='1.2.3.4', replication_port=5678, device='sda1') - job = dict(partition='9') + job = dict(partition='9', policy=POLICIES.legacy) self.sender = ssync_sender.Sender(self.replicator, node, job, None) self.sender.suffixes = ['abc'] success, candidates = self.sender() self.assertFalse(success) self.assertEquals(candidates, set()) - call = self.replicator.logger.error.mock_calls[0] - self.assertEqual( - call[1][:-1], ('%s:%s/%s/%s %s', '1.2.3.4', 5678, 'sda1', '9')) - self.assertEqual(str(call[1][-1]), 'test connect') + error_lines = self.replicator.logger.get_lines_for_level('error') + self.assertEqual(1, len(error_lines)) + self.assertEqual('1.2.3.4:5678/sda1/9 test connect', + error_lines[0]) def test_call_catches_other_exceptions(self): node = dict(replication_ip='1.2.3.4', replication_port=5678, device='sda1') - job = dict(partition='9') + job = dict(partition='9', policy=POLICIES.legacy) self.sender = ssync_sender.Sender(self.replicator, node, job, None) self.sender.suffixes = ['abc'] self.sender.connect = 'cause exception' success, candidates = self.sender() self.assertFalse(success) self.assertEquals(candidates, set()) - call = self.replicator.logger.exception.mock_calls[0] - self.assertEqual( - call[1], - ('%s:%s/%s/%s EXCEPTION in replication.Sender', '1.2.3.4', 5678, - 'sda1', '9')) + error_lines = self.replicator.logger.get_lines_for_level('error') + for line in error_lines: + self.assertTrue(line.startswith( + '1.2.3.4:5678/sda1/9 EXCEPTION in replication.Sender:')) def test_call_catches_exception_handling_exception(self): - node = dict(replication_ip='1.2.3.4', replication_port=5678, - device='sda1') - job = None # Will cause inside exception handler to fail + job = node = None # Will cause inside exception handler to fail self.sender = ssync_sender.Sender(self.replicator, node, job, None) self.sender.suffixes = ['abc'] self.sender.connect = 'cause exception' success, candidates = self.sender() self.assertFalse(success) self.assertEquals(candidates, set()) - self.replicator.logger.exception.assert_called_once_with( - 'EXCEPTION in replication.Sender') + error_lines = self.replicator.logger.get_lines_for_level('error') + for line in error_lines: + self.assertTrue(line.startswith( + 'EXCEPTION in replication.Sender')) def test_call_calls_others(self): self.sender.suffixes = ['abc'] @@ -222,11 +225,10 @@ class TestSender(unittest.TestCase): self.sender.updates.assert_called_once_with() self.sender.disconnect.assert_called_once_with() - @patch_policies def test_connect(self): node = dict(replication_ip='1.2.3.4', replication_port=5678, - device='sda1') - job = dict(partition='9', policy_idx=1) + device='sda1', index=0) + job = dict(partition='9', policy=POLICIES[1]) self.sender = ssync_sender.Sender(self.replicator, node, job, None) self.sender.suffixes = ['abc'] with mock.patch( @@ -256,9 +258,9 @@ class TestSender(unittest.TestCase): expected_calls)) def test_call_and_missing_check(self): - def yield_hashes(device, partition, policy_index, suffixes=None): + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): if device == 'dev' and partition == '9' and suffixes == ['abc'] \ - and policy_index == 0: + and policy == POLICIES.legacy: yield ( '/srv/node/dev/objects/9/abc/' '9d41d8cd98f00b204e9800998ecf0abc', @@ -269,7 +271,11 @@ class TestSender(unittest.TestCase): 'No match for %r %r %r' % (device, partition, suffixes)) self.sender.connection = FakeConnection() - self.sender.job = {'device': 'dev', 'partition': '9'} + self.sender.job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender.suffixes = ['abc'] self.sender.response = FakeResponse( chunk_body=( @@ -286,9 +292,9 @@ class TestSender(unittest.TestCase): self.assertEqual(self.sender.failures, 0) def test_call_and_missing_check_with_obj_list(self): - def yield_hashes(device, partition, policy_index, suffixes=None): + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): if device == 'dev' and partition == '9' and suffixes == ['abc'] \ - and policy_index == 0: + and policy == POLICIES.legacy: yield ( '/srv/node/dev/objects/9/abc/' '9d41d8cd98f00b204e9800998ecf0abc', @@ -297,7 +303,11 @@ class TestSender(unittest.TestCase): else: raise Exception( 'No match for %r %r %r' % (device, partition, suffixes)) - job = {'device': 'dev', 'partition': '9'} + job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender = ssync_sender.Sender(self.replicator, None, job, ['abc'], ['9d41d8cd98f00b204e9800998ecf0abc']) self.sender.connection = FakeConnection() @@ -315,9 +325,9 @@ class TestSender(unittest.TestCase): self.assertEqual(self.sender.failures, 0) def test_call_and_missing_check_with_obj_list_but_required(self): - def yield_hashes(device, partition, policy_index, suffixes=None): + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): if device == 'dev' and partition == '9' and suffixes == ['abc'] \ - and policy_index == 0: + and policy == POLICIES.legacy: yield ( '/srv/node/dev/objects/9/abc/' '9d41d8cd98f00b204e9800998ecf0abc', @@ -326,7 +336,11 @@ class TestSender(unittest.TestCase): else: raise Exception( 'No match for %r %r %r' % (device, partition, suffixes)) - job = {'device': 'dev', 'partition': '9'} + job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender = ssync_sender.Sender(self.replicator, None, job, ['abc'], ['9d41d8cd98f00b204e9800998ecf0abc']) self.sender.connection = FakeConnection() @@ -347,7 +361,7 @@ class TestSender(unittest.TestCase): self.replicator.conn_timeout = 0.01 node = dict(replication_ip='1.2.3.4', replication_port=5678, device='sda1') - job = dict(partition='9') + job = dict(partition='9', policy=POLICIES.legacy) self.sender = ssync_sender.Sender(self.replicator, node, job, None) self.sender.suffixes = ['abc'] @@ -360,16 +374,16 @@ class TestSender(unittest.TestCase): success, candidates = self.sender() self.assertFalse(success) self.assertEquals(candidates, set()) - call = self.replicator.logger.error.mock_calls[0] - self.assertEqual( - call[1][:-1], ('%s:%s/%s/%s %s', '1.2.3.4', 5678, 'sda1', '9')) - self.assertEqual(str(call[1][-1]), '0.01 seconds: connect send') + error_lines = self.replicator.logger.get_lines_for_level('error') + for line in error_lines: + self.assertTrue(line.startswith( + '1.2.3.4:5678/sda1/9 0.01 seconds: connect send')) def test_connect_receive_timeout(self): self.replicator.node_timeout = 0.02 node = dict(replication_ip='1.2.3.4', replication_port=5678, - device='sda1') - job = dict(partition='9') + device='sda1', index=0) + job = dict(partition='9', policy=POLICIES.legacy) self.sender = ssync_sender.Sender(self.replicator, node, job, None) self.sender.suffixes = ['abc'] @@ -384,16 +398,16 @@ class TestSender(unittest.TestCase): success, candidates = self.sender() self.assertFalse(success) self.assertEquals(candidates, set()) - call = self.replicator.logger.error.mock_calls[0] - self.assertEqual( - call[1][:-1], ('%s:%s/%s/%s %s', '1.2.3.4', 5678, 'sda1', '9')) - self.assertEqual(str(call[1][-1]), '0.02 seconds: connect receive') + error_lines = self.replicator.logger.get_lines_for_level('error') + for line in error_lines: + self.assertTrue(line.startswith( + '1.2.3.4:5678/sda1/9 0.02 seconds: connect receive')) def test_connect_bad_status(self): self.replicator.node_timeout = 0.02 node = dict(replication_ip='1.2.3.4', replication_port=5678, device='sda1') - job = dict(partition='9') + job = dict(partition='9', policy=POLICIES.legacy) self.sender = ssync_sender.Sender(self.replicator, node, job, None) self.sender.suffixes = ['abc'] @@ -409,10 +423,10 @@ class TestSender(unittest.TestCase): success, candidates = self.sender() self.assertFalse(success) self.assertEquals(candidates, set()) - call = self.replicator.logger.error.mock_calls[0] - self.assertEqual( - call[1][:-1], ('%s:%s/%s/%s %s', '1.2.3.4', 5678, 'sda1', '9')) - self.assertEqual(str(call[1][-1]), 'Expected status 200; got 503') + error_lines = self.replicator.logger.get_lines_for_level('error') + for line in error_lines: + self.assertTrue(line.startswith( + '1.2.3.4:5678/sda1/9 Expected status 200; got 503')) def test_readline_newline_in_buffer(self): self.sender.response_buffer = 'Has a newline already.\r\nOkay.' @@ -473,16 +487,21 @@ class TestSender(unittest.TestCase): self.assertRaises(exceptions.MessageTimeout, self.sender.missing_check) def test_missing_check_has_empty_suffixes(self): - def yield_hashes(device, partition, policy_idx, suffixes=None): - if (device != 'dev' or partition != '9' or policy_idx != 0 or + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): + if (device != 'dev' or partition != '9' or + policy != POLICIES.legacy or suffixes != ['abc', 'def']): yield # Just here to make this a generator raise Exception( 'No match for %r %r %r %r' % (device, partition, - policy_idx, suffixes)) + policy, suffixes)) self.sender.connection = FakeConnection() - self.sender.job = {'device': 'dev', 'partition': '9'} + self.sender.job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender.suffixes = ['abc', 'def'] self.sender.response = FakeResponse( chunk_body=( @@ -498,8 +517,9 @@ class TestSender(unittest.TestCase): self.assertEqual(self.sender.available_set, set()) def test_missing_check_has_suffixes(self): - def yield_hashes(device, partition, policy_idx, suffixes=None): - if (device == 'dev' and partition == '9' and policy_idx == 0 and + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): + if (device == 'dev' and partition == '9' and + policy == POLICIES.legacy and suffixes == ['abc', 'def']): yield ( '/srv/node/dev/objects/9/abc/' @@ -519,10 +539,14 @@ class TestSender(unittest.TestCase): else: raise Exception( 'No match for %r %r %r %r' % (device, partition, - policy_idx, suffixes)) + policy, suffixes)) self.sender.connection = FakeConnection() - self.sender.job = {'device': 'dev', 'partition': '9'} + self.sender.job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender.suffixes = ['abc', 'def'] self.sender.response = FakeResponse( chunk_body=( @@ -544,8 +568,9 @@ class TestSender(unittest.TestCase): self.assertEqual(self.sender.available_set, set(candidates)) def test_missing_check_far_end_disconnect(self): - def yield_hashes(device, partition, policy_idx, suffixes=None): - if (device == 'dev' and partition == '9' and policy_idx == 0 and + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): + if (device == 'dev' and partition == '9' and + policy == POLICIES.legacy and suffixes == ['abc']): yield ( '/srv/node/dev/objects/9/abc/' @@ -555,10 +580,14 @@ class TestSender(unittest.TestCase): else: raise Exception( 'No match for %r %r %r %r' % (device, partition, - policy_idx, suffixes)) + policy, suffixes)) self.sender.connection = FakeConnection() - self.sender.job = {'device': 'dev', 'partition': '9'} + self.sender.job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender.suffixes = ['abc'] self.sender.daemon._diskfile_mgr.yield_hashes = yield_hashes self.sender.response = FakeResponse(chunk_body='\r\n') @@ -577,8 +606,9 @@ class TestSender(unittest.TestCase): set(['9d41d8cd98f00b204e9800998ecf0abc'])) def test_missing_check_far_end_disconnect2(self): - def yield_hashes(device, partition, policy_idx, suffixes=None): - if (device == 'dev' and partition == '9' and policy_idx == 0 and + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): + if (device == 'dev' and partition == '9' and + policy == POLICIES.legacy and suffixes == ['abc']): yield ( '/srv/node/dev/objects/9/abc/' @@ -588,10 +618,14 @@ class TestSender(unittest.TestCase): else: raise Exception( 'No match for %r %r %r %r' % (device, partition, - policy_idx, suffixes)) + policy, suffixes)) self.sender.connection = FakeConnection() - self.sender.job = {'device': 'dev', 'partition': '9'} + self.sender.job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender.suffixes = ['abc'] self.sender.daemon._diskfile_mgr.yield_hashes = yield_hashes self.sender.response = FakeResponse( @@ -611,8 +645,9 @@ class TestSender(unittest.TestCase): set(['9d41d8cd98f00b204e9800998ecf0abc'])) def test_missing_check_far_end_unexpected(self): - def yield_hashes(device, partition, policy_idx, suffixes=None): - if (device == 'dev' and partition == '9' and policy_idx == 0 and + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): + if (device == 'dev' and partition == '9' and + policy == POLICIES.legacy and suffixes == ['abc']): yield ( '/srv/node/dev/objects/9/abc/' @@ -622,10 +657,14 @@ class TestSender(unittest.TestCase): else: raise Exception( 'No match for %r %r %r %r' % (device, partition, - policy_idx, suffixes)) + policy, suffixes)) self.sender.connection = FakeConnection() - self.sender.job = {'device': 'dev', 'partition': '9'} + self.sender.job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender.suffixes = ['abc'] self.sender.daemon._diskfile_mgr.yield_hashes = yield_hashes self.sender.response = FakeResponse(chunk_body='OH HAI\r\n') @@ -644,8 +683,9 @@ class TestSender(unittest.TestCase): set(['9d41d8cd98f00b204e9800998ecf0abc'])) def test_missing_check_send_list(self): - def yield_hashes(device, partition, policy_idx, suffixes=None): - if (device == 'dev' and partition == '9' and policy_idx == 0 and + def yield_hashes(device, partition, policy, suffixes=None, **kwargs): + if (device == 'dev' and partition == '9' and + policy == POLICIES.legacy and suffixes == ['abc']): yield ( '/srv/node/dev/objects/9/abc/' @@ -655,10 +695,14 @@ class TestSender(unittest.TestCase): else: raise Exception( 'No match for %r %r %r %r' % (device, partition, - policy_idx, suffixes)) + policy, suffixes)) self.sender.connection = FakeConnection() - self.sender.job = {'device': 'dev', 'partition': '9'} + self.sender.job = { + 'device': 'dev', + 'partition': '9', + 'policy': POLICIES.legacy, + } self.sender.suffixes = ['abc'] self.sender.response = FakeResponse( chunk_body=( @@ -742,7 +786,11 @@ class TestSender(unittest.TestCase): delete_timestamp = utils.normalize_timestamp(time.time()) df.delete(delete_timestamp) self.sender.connection = FakeConnection() - self.sender.job = {'device': device, 'partition': part} + self.sender.job = { + 'device': device, + 'partition': part, + 'policy': POLICIES.legacy, + } self.sender.node = {} self.sender.send_list = [object_hash] self.sender.send_delete = mock.MagicMock() @@ -771,7 +819,11 @@ class TestSender(unittest.TestCase): delete_timestamp = utils.normalize_timestamp(time.time()) df.delete(delete_timestamp) self.sender.connection = FakeConnection() - self.sender.job = {'device': device, 'partition': part} + self.sender.job = { + 'device': device, + 'partition': part, + 'policy': POLICIES.legacy, + } self.sender.node = {} self.sender.send_list = [object_hash] self.sender.response = FakeResponse( @@ -797,7 +849,11 @@ class TestSender(unittest.TestCase): object_hash = utils.hash_path(*object_parts) expected = df.get_metadata() self.sender.connection = FakeConnection() - self.sender.job = {'device': device, 'partition': part} + self.sender.job = { + 'device': device, + 'partition': part, + 'policy': POLICIES.legacy, + } self.sender.node = {} self.sender.send_list = [object_hash] self.sender.send_delete = mock.MagicMock() @@ -821,18 +877,20 @@ class TestSender(unittest.TestCase): '11\r\n:UPDATES: START\r\n\r\n' 'f\r\n:UPDATES: END\r\n\r\n') - @patch_policies def test_updates_storage_policy_index(self): device = 'dev' part = '9' object_parts = ('a', 'c', 'o') df = self._make_open_diskfile(device, part, *object_parts, - policy_idx=1) + policy=POLICIES[0]) object_hash = utils.hash_path(*object_parts) expected = df.get_metadata() self.sender.connection = FakeConnection() - self.sender.job = {'device': device, 'partition': part, - 'policy_idx': 1} + self.sender.job = { + 'device': device, + 'partition': part, + 'policy': POLICIES[0], + 'frag_index': 0} self.sender.node = {} self.sender.send_list = [object_hash] self.sender.send_delete = mock.MagicMock() @@ -847,7 +905,7 @@ class TestSender(unittest.TestCase): self.assertEqual(path, '/a/c/o') self.assert_(isinstance(df, diskfile.DiskFile)) self.assertEqual(expected, df.get_metadata()) - self.assertEqual(os.path.join(self.testdir, 'dev/objects-1/9/', + self.assertEqual(os.path.join(self.testdir, 'dev/objects/9/', object_hash[-3:], object_hash), df._datadir) diff --git a/test/unit/obj/test_updater.py b/test/unit/obj/test_updater.py index 1915a55d1d..2ca3965453 100644 --- a/test/unit/obj/test_updater.py +++ b/test/unit/obj/test_updater.py @@ -70,7 +70,7 @@ class TestObjectUpdater(unittest.TestCase): self.sda1 = os.path.join(self.devices_dir, 'sda1') os.mkdir(self.sda1) for policy in POLICIES: - os.mkdir(os.path.join(self.sda1, get_tmp_dir(int(policy)))) + os.mkdir(os.path.join(self.sda1, get_tmp_dir(policy))) self.logger = debug_logger() def tearDown(self): @@ -169,8 +169,8 @@ class TestObjectUpdater(unittest.TestCase): seen = set() class MockObjectUpdater(object_updater.ObjectUpdater): - def process_object_update(self, update_path, device, idx): - seen.add((update_path, idx)) + def process_object_update(self, update_path, device, policy): + seen.add((update_path, int(policy))) os.unlink(update_path) cu = MockObjectUpdater({ @@ -216,7 +216,7 @@ class TestObjectUpdater(unittest.TestCase): 'concurrency': '1', 'node_timeout': '15'}) cu.run_once() - async_dir = os.path.join(self.sda1, get_async_dir(0)) + async_dir = os.path.join(self.sda1, get_async_dir(POLICIES[0])) os.mkdir(async_dir) cu.run_once() self.assert_(os.path.exists(async_dir)) @@ -253,7 +253,7 @@ class TestObjectUpdater(unittest.TestCase): 'concurrency': '1', 'node_timeout': '15'}, logger=self.logger) cu.run_once() - async_dir = os.path.join(self.sda1, get_async_dir(0)) + async_dir = os.path.join(self.sda1, get_async_dir(POLICIES[0])) os.mkdir(async_dir) cu.run_once() self.assert_(os.path.exists(async_dir)) @@ -393,7 +393,7 @@ class TestObjectUpdater(unittest.TestCase): 'mount_check': 'false', 'swift_dir': self.testdir, } - async_dir = os.path.join(self.sda1, get_async_dir(policy.idx)) + async_dir = os.path.join(self.sda1, get_async_dir(policy)) os.mkdir(async_dir) account, container, obj = 'a', 'c', 'o' @@ -412,7 +412,7 @@ class TestObjectUpdater(unittest.TestCase): data = {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out} dfmanager.pickle_async_update(self.sda1, account, container, obj, - data, ts.next(), policy.idx) + data, ts.next(), policy) request_log = [] @@ -428,7 +428,7 @@ class TestObjectUpdater(unittest.TestCase): ip, part, method, path, headers, qs, ssl = request_args self.assertEqual(method, op) self.assertEqual(headers['X-Backend-Storage-Policy-Index'], - str(policy.idx)) + str(int(policy))) self.assertEqual(daemon.logger.get_increment_counts(), {'successes': 1, 'unlinks': 1, 'async_pendings': 1}) @@ -444,7 +444,7 @@ class TestObjectUpdater(unittest.TestCase): 'swift_dir': self.testdir, } daemon = object_updater.ObjectUpdater(conf, logger=self.logger) - async_dir = os.path.join(self.sda1, get_async_dir(policy.idx)) + async_dir = os.path.join(self.sda1, get_async_dir(policy)) os.mkdir(async_dir) # write an async @@ -456,12 +456,12 @@ class TestObjectUpdater(unittest.TestCase): 'x-content-type': 'text/plain', 'x-etag': 'd41d8cd98f00b204e9800998ecf8427e', 'x-timestamp': ts.next(), - 'X-Backend-Storage-Policy-Index': policy.idx, + 'X-Backend-Storage-Policy-Index': int(policy), }) data = {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out} dfmanager.pickle_async_update(self.sda1, account, container, obj, - data, ts.next(), policy.idx) + data, ts.next(), policy) request_log = [] @@ -481,7 +481,7 @@ class TestObjectUpdater(unittest.TestCase): ip, part, method, path, headers, qs, ssl = request_args self.assertEqual(method, 'PUT') self.assertEqual(headers['X-Backend-Storage-Policy-Index'], - str(policy.idx)) + str(int(policy))) self.assertEqual(daemon.logger.get_increment_counts(), {'successes': 1, 'unlinks': 1, 'async_pendings': 1}) diff --git a/test/unit/proxy/test_mem_server.py b/test/unit/proxy/test_mem_server.py index bc5b8794fc..f8bc2e3215 100644 --- a/test/unit/proxy/test_mem_server.py +++ b/test/unit/proxy/test_mem_server.py @@ -34,7 +34,22 @@ class TestProxyServer(test_server.TestProxyServer): class TestObjectController(test_server.TestObjectController): - pass + def test_PUT_no_etag_fallocate(self): + # mem server doesn't call fallocate(), believe it or not + pass + + # these tests all go looking in the filesystem + def test_policy_IO(self): + pass + + def test_PUT_ec(self): + pass + + def test_PUT_ec_multiple_segments(self): + pass + + def test_PUT_ec_fragment_archive_etag_mismatch(self): + pass class TestContainerController(test_server.TestContainerController): diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 39d637d8c5..5bee370fcc 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -87,10 +87,9 @@ def do_setup(the_object_server): os.path.join(mkdtemp(), 'tmp_test_proxy_server_chunked') mkdirs(_testdir) rmtree(_testdir) - mkdirs(os.path.join(_testdir, 'sda1')) - mkdirs(os.path.join(_testdir, 'sda1', 'tmp')) - mkdirs(os.path.join(_testdir, 'sdb1')) - mkdirs(os.path.join(_testdir, 'sdb1', 'tmp')) + for drive in ('sda1', 'sdb1', 'sdc1', 'sdd1', 'sde1', + 'sdf1', 'sdg1', 'sdh1', 'sdi1'): + mkdirs(os.path.join(_testdir, drive, 'tmp')) conf = {'devices': _testdir, 'swift_dir': _testdir, 'mount_check': 'false', 'allowed_headers': 'content-encoding, x-object-manifest, content-disposition, foo', @@ -1014,20 +1013,14 @@ class TestObjectController(unittest.TestCase): @unpatch_policies def test_policy_IO(self): - if hasattr(_test_servers[-1], '_filesystem'): - # ironically, the _filesystem attribute on the object server means - # the in-memory diskfile is in use, so this test does not apply - return - - def check_file(policy_idx, cont, devs, check_val): - partition, nodes = prosrv.get_object_ring(policy_idx).get_nodes( - 'a', cont, 'o') + def check_file(policy, cont, devs, check_val): + partition, nodes = policy.object_ring.get_nodes('a', cont, 'o') conf = {'devices': _testdir, 'mount_check': 'false'} df_mgr = diskfile.DiskFileManager(conf, FakeLogger()) for dev in devs: file = df_mgr.get_diskfile(dev, partition, 'a', cont, 'o', - policy_idx=policy_idx) + policy=policy) if check_val is True: file.open() @@ -1058,8 +1051,8 @@ class TestObjectController(unittest.TestCase): self.assertEqual(res.status_int, 200) self.assertEqual(res.body, obj) - check_file(0, 'c', ['sda1', 'sdb1'], True) - check_file(0, 'c', ['sdc1', 'sdd1', 'sde1', 'sdf1'], False) + check_file(POLICIES[0], 'c', ['sda1', 'sdb1'], True) + check_file(POLICIES[0], 'c', ['sdc1', 'sdd1', 'sde1', 'sdf1'], False) # check policy 1: put file on c1, read it back, check loc on disk sock = connect_tcp(('localhost', prolis.getsockname()[1])) @@ -1084,8 +1077,8 @@ class TestObjectController(unittest.TestCase): self.assertEqual(res.status_int, 200) self.assertEqual(res.body, obj) - check_file(1, 'c1', ['sdc1', 'sdd1'], True) - check_file(1, 'c1', ['sda1', 'sdb1', 'sde1', 'sdf1'], False) + check_file(POLICIES[1], 'c1', ['sdc1', 'sdd1'], True) + check_file(POLICIES[1], 'c1', ['sda1', 'sdb1', 'sde1', 'sdf1'], False) # check policy 2: put file on c2, read it back, check loc on disk sock = connect_tcp(('localhost', prolis.getsockname()[1])) @@ -1110,8 +1103,8 @@ class TestObjectController(unittest.TestCase): self.assertEqual(res.status_int, 200) self.assertEqual(res.body, obj) - check_file(2, 'c2', ['sde1', 'sdf1'], True) - check_file(2, 'c2', ['sda1', 'sdb1', 'sdc1', 'sdd1'], False) + check_file(POLICIES[2], 'c2', ['sde1', 'sdf1'], True) + check_file(POLICIES[2], 'c2', ['sda1', 'sdb1', 'sdc1', 'sdd1'], False) @unpatch_policies def test_policy_IO_override(self): @@ -1146,7 +1139,7 @@ class TestObjectController(unittest.TestCase): conf = {'devices': _testdir, 'mount_check': 'false'} df_mgr = diskfile.DiskFileManager(conf, FakeLogger()) df = df_mgr.get_diskfile(node['device'], partition, 'a', - 'c1', 'wrong-o', policy_idx=2) + 'c1', 'wrong-o', policy=POLICIES[2]) with df.open(): contents = ''.join(df.reader()) self.assertEqual(contents, "hello") @@ -1178,7 +1171,7 @@ class TestObjectController(unittest.TestCase): self.assertEqual(res.status_int, 204) df = df_mgr.get_diskfile(node['device'], partition, 'a', - 'c1', 'wrong-o', policy_idx=2) + 'c1', 'wrong-o', policy=POLICIES[2]) try: df.open() except DiskFileNotExist as e: diff --git a/test/unit/proxy/test_sysmeta.py b/test/unit/proxy/test_sysmeta.py index d80f2855e4..a45c689abd 100644 --- a/test/unit/proxy/test_sysmeta.py +++ b/test/unit/proxy/test_sysmeta.py @@ -135,7 +135,7 @@ class TestObjectSysmeta(unittest.TestCase): self.tmpdir = mkdtemp() self.testdir = os.path.join(self.tmpdir, 'tmp_test_object_server_ObjectController') - mkdirs(os.path.join(self.testdir, 'sda1', 'tmp')) + mkdirs(os.path.join(self.testdir, 'sda', 'tmp')) conf = {'devices': self.testdir, 'mount_check': 'false'} self.obj_ctlr = object_server.ObjectController( conf, logger=debug_logger('obj-ut'))