Erasure Code Reconstructor

This patch adds the erasure code reconstructor. It follows the design of the replicator but: - There is no notion of update() or update_deleted(). - There is a single job processor - Jobs are processed partition by partition. - At the end of processing a rebalanced or handoff partition, the reconstructor will remove successfully reverted objects if any. And various ssync changes such as the addition of reconstruct_fa() function called from ssync_sender which performs the actual reconstruction while sending the object to the receiver Co-Authored-By: Alistair Coles <alistair.coles@hp.com> Co-Authored-By: Thiago da Silva <thiago@redhat.com> Co-Authored-By: John Dickinson <me@not.mn> Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com> Co-Authored-By: Tushar Gohad <tushar.gohad@intel.com> Co-Authored-By: Samuel Merritt <sam@swiftstack.com> Co-Authored-By: Christian Schwede <christian.schwede@enovance.com> Co-Authored-By: Yuan Zhou <yuan.zhou@intel.com> blueprint ec-reconstructor Change-Id: I7d15620dc66ee646b223bb9fff700796cd6bef51
2014-10-28 09:51:06 -07:00 · 2014-10-28 09:51:06 -07:00 · 647b66a2ce
commit 647b66a2ce
parent b2189ef47a
27 changed files with 5038 additions and 234 deletions
--- a/bin/swift-object-reconstructor
+++ b/bin/swift-object-reconstructor
@ -0,0 +1,31 @@
 #!/usr/bin/env python
 # Copyright (c) 2010-2012 OpenStack Foundation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from swift.obj.reconstructor import ObjectReconstructor
 from swift.common.utils import parse_options
 from swift.common.daemon import run_daemon
 from optparse import OptionParser
 if __name__ == '__main__':
    parser = OptionParser("%prog CONFIG [options]")
    parser.add_option('-d', '--devices',
                      help='Reconstruct only given devices. '
                           'Comma-separated list')
    parser.add_option('-p', '--partitions',
                      help='Reconstruct only given partitions. '
                           'Comma-separated list')
    conf_file, options = parse_options(parser=parser, once=True)
    run_daemon(ObjectReconstructor, conf_file, **options)
--- a/etc/object-server.conf-sample
+++ b/etc/object-server.conf-sample
@ -211,6 +211,29 @@ use = egg:swift#recon
 # removed  when it has successfully replicated to all the canonical nodes.
 # handoff_delete = auto
 [object-reconstructor]
 # You can override the default log routing for this app here (don't use set!):
 # Unless otherwise noted, each setting below has the same meaning as described
 # in the [object-replicator] section, however these settings apply to the EC
 # reconstructor
 #
 # log_name = object-reconstructor
 # log_facility = LOG_LOCAL0
 # log_level = INFO
 # log_address = /dev/log
 #
 # daemonize = on
 # run_pause = 30
 # concurrency = 1
 # stats_interval = 300
 # node_timeout = 10
 # http_timeout = 60
 # lockup_timeout = 1800
 # reclaim_age = 604800
 # ring_check_interval = 15
 # recon_cache_path = /var/cache/swift
 # handoffs_first = False
 [object-updater]
 # You can override the default log routing for this app here (don't use set!):
 # log_name = object-updater
--- a/setup.cfg
+++ b/setup.cfg
@ -51,6 +51,7 @@ scripts =
    bin/swift-object-expirer
    bin/swift-object-info
    bin/swift-object-replicator
    bin/swift-object-reconstructor
    bin/swift-object-server
    bin/swift-object-updater
    bin/swift-oldies
--- a/swift/common/exceptions.py
+++ b/swift/common/exceptions.py
@ -53,6 +53,10 @@ class MultiphasePUTNotSupported(SwiftException):
    pass
 class SuffixSyncError(SwiftException):
    pass
 class DiskFileError(SwiftException):
    pass
--- a/swift/common/manager.py
+++ b/swift/common/manager.py
@ -33,7 +33,8 @@ ALL_SERVERS = ['account-auditor', 'account-server', 'container-auditor',
               'container-replicator', 'container-reconciler',
               'container-server', 'container-sync',
               'container-updater', 'object-auditor', 'object-server',
-               'object-expirer', 'object-replicator', 'object-updater',
+               'object-expirer', 'object-replicator',
               'object-reconstructor', 'object-updater',
               'proxy-server', 'account-replicator', 'account-reaper']
 MAIN_SERVERS = ['proxy-server', 'account-server', 'container-server',
                'object-server']
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@ -0,0 +1,925 @@
 # Copyright (c) 2010-2015 OpenStack Foundation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 from os.path import join
 import random
 import time
 import itertools
 from collections import defaultdict
 import cPickle as pickle
 import shutil
 from eventlet import (GreenPile, GreenPool, Timeout, sleep, hubs, tpool,
                      spawn)
 from eventlet.support.greenlets import GreenletExit
 from swift import gettext_ as _
 from swift.common.utils import (
    whataremyips, unlink_older_than, compute_eta, get_logger,
    dump_recon_cache, ismount, mkdirs, config_true_value, list_from_csv,
    get_hub, tpool_reraise, GreenAsyncPile, Timestamp, remove_file)
 from swift.common.swob import HeaderKeyDict
 from swift.common.bufferedhttp import http_connect
 from swift.common.daemon import Daemon
 from swift.common.ring.utils import is_local_device
 from swift.obj.ssync_sender import Sender as ssync_sender
 from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
 from swift.obj.diskfile import DiskFileRouter, get_data_dir, \
    get_tmp_dir
 from swift.common.storage_policy import POLICIES, EC_POLICY
 from swift.common.exceptions import ConnectionTimeout, DiskFileError, \
    SuffixSyncError
 SYNC, REVERT = ('sync_only', 'sync_revert')
 hubs.use_hub(get_hub())
 class RebuildingECDiskFileStream(object):
    """
    This class wraps the the reconstructed fragment archive data and
    metadata in the DiskFile interface for ssync.
    """
    def __init__(self, metadata, frag_index, rebuilt_fragment_iter):
        # start with metadata from a participating FA
        self.metadata = metadata
        # the new FA is going to have the same length as others in the set
        self._content_length = self.metadata['Content-Length']
        # update the FI and delete the ETag, the obj server will
        # recalc on the other side...
        self.metadata['X-Object-Sysmeta-Ec-Frag-Index'] = frag_index
        del self.metadata['ETag']
        self.frag_index = frag_index
        self.rebuilt_fragment_iter = rebuilt_fragment_iter
    def get_metadata(self):
        return self.metadata
    @property
    def content_length(self):
        return self._content_length
    def reader(self):
        for chunk in self.rebuilt_fragment_iter:
            yield chunk
 class ObjectReconstructor(Daemon):
    """
    Reconstruct objects using erasure code.  And also rebalance EC Fragment
    Archive objects off handoff nodes.
    Encapsulates most logic and data needed by the object reconstruction
    process. Each call to .reconstruct() performs one pass.  It's up to the
    caller to do this in a loop.
    """
    def __init__(self, conf, logger=None):
        """
        :param conf: configuration object obtained from ConfigParser
        :param logger: logging object
        """
        self.conf = conf
        self.logger = logger or get_logger(
            conf, log_route='object-reconstructor')
        self.devices_dir = conf.get('devices', '/srv/node')
        self.mount_check = config_true_value(conf.get('mount_check', 'true'))
        self.swift_dir = conf.get('swift_dir', '/etc/swift')
        self.port = int(conf.get('bind_port', 6000))
        self.concurrency = int(conf.get('concurrency', 1))
        self.stats_interval = int(conf.get('stats_interval', '300'))
        self.ring_check_interval = int(conf.get('ring_check_interval', 15))
        self.next_check = time.time() + self.ring_check_interval
        self.reclaim_age = int(conf.get('reclaim_age', 86400 * 7))
        self.partition_times = []
        self.run_pause = int(conf.get('run_pause', 30))
        self.http_timeout = int(conf.get('http_timeout', 60))
        self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
        self.recon_cache_path = conf.get('recon_cache_path',
                                         '/var/cache/swift')
        self.rcache = os.path.join(self.recon_cache_path, "object.recon")
        # defaults subject to change after beta
        self.conn_timeout = float(conf.get('conn_timeout', 0.5))
        self.node_timeout = float(conf.get('node_timeout', 10))
        self.network_chunk_size = int(conf.get('network_chunk_size', 65536))
        self.disk_chunk_size = int(conf.get('disk_chunk_size', 65536))
        self.headers = {
            'Content-Length': '0',
            'user-agent': 'obj-reconstructor %s' % os.getpid()}
        self.handoffs_first = config_true_value(conf.get('handoffs_first',
                                                         False))
        self._df_router = DiskFileRouter(conf, self.logger)
    def load_object_ring(self, policy):
        """
        Make sure the policy's rings are loaded.
        :param policy: the StoragePolicy instance
        :returns: appropriate ring object
        """
        policy.load_ring(self.swift_dir)
        return policy.object_ring
    def check_ring(self, object_ring):
        """
        Check to see if the ring has been updated
        :param object_ring: the ring to check
        :returns: boolean indicating whether or not the ring has changed
        """
        if time.time() > self.next_check:
            self.next_check = time.time() + self.ring_check_interval
            if object_ring.has_changed():
                return False
        return True
    def _full_path(self, node, part, path, policy):
        return '%(replication_ip)s:%(replication_port)s' \
            '/%(device)s/%(part)s%(path)s ' \
            'policy#%(policy)d frag#%(frag_index)s' % {
                'replication_ip': node['replication_ip'],
                'replication_port': node['replication_port'],
                'device': node['device'],
                'part': part, 'path': path,
                'policy': policy,
                'frag_index': node.get('index', 'handoff'),
            }
    def _get_response(self, node, part, path, headers, policy):
        """
        Helper method for reconstruction that GETs a single EC fragment
        archive
        :param node: the node to GET from
        :param part: the partition
        :param path: full path of the desired EC archive
        :param headers: the headers to send
        :param policy: an instance of
                       :class:`~swift.common.storage_policy.BaseStoragePolicy`
        :returns: response
        """
        resp = None
        headers['X-Backend-Node-Index'] = node['index']
        try:
            with ConnectionTimeout(self.conn_timeout):
                conn = http_connect(node['ip'], node['port'], node['device'],
                                    part, 'GET', path, headers=headers)
            with Timeout(self.node_timeout):
                resp = conn.getresponse()
            if resp.status != HTTP_OK:
                self.logger.warning(
                    _("Invalid response %(resp)s from %(full_path)s"),
                    {'resp': resp.status,
                     'full_path': self._full_path(node, part, path, policy)})
                resp = None
        except (Exception, Timeout):
            self.logger.exception(
                _("Trying to GET %(full_path)s"), {
                    'full_path': self._full_path(node, part, path, policy)})
        return resp
    def reconstruct_fa(self, job, node, metadata):
        """
        Reconstructs a fragment archive - this method is called from ssync
        after a remote node responds that is missing this object - the local
        diskfile is opened to provide metadata - but to reconstruct the
        missing fragment archive we must connect to multiple object servers.
        :param job: job from ssync_sender
        :param node: node that we're rebuilding to
        :param metadata:  the metadata to attach to the rebuilt archive
        :returns: a DiskFile like class for use by ssync
        :raises DiskFileError: if the fragment archive cannot be reconstructed
        """
        part_nodes = job['policy'].object_ring.get_part_nodes(
            job['partition'])
        part_nodes.remove(node)
        # the fragment index we need to reconstruct is the position index
        # of the node we're rebuilding to within the primary part list
        fi_to_rebuild = node['index']
        # KISS send out connection requests to all nodes, see what sticks
        headers = {
            'X-Backend-Storage-Policy-Index': int(job['policy']),
        }
        pile = GreenAsyncPile(len(part_nodes))
        path = metadata['name']
        for node in part_nodes:
            pile.spawn(self._get_response, node, job['partition'],
                       path, headers, job['policy'])
        responses = []
        etag = None
        for resp in pile:
            if not resp:
                continue
            resp.headers = HeaderKeyDict(resp.getheaders())
            responses.append(resp)
            etag = sorted(responses, reverse=True,
                          key=lambda r: Timestamp(
                              r.headers.get('X-Backend-Timestamp')
                          ))[0].headers.get('X-Object-Sysmeta-Ec-Etag')
            responses = [r for r in responses if
                         r.headers.get('X-Object-Sysmeta-Ec-Etag') == etag]
            if len(responses) >= job['policy'].ec_ndata:
                break
        else:
            self.logger.error(
                'Unable to get enough responses (%s/%s) '
                'to reconstruct %s with ETag %s' % (
                    len(responses), job['policy'].ec_ndata,
                    self._full_path(node, job['partition'],
                                    metadata['name'], job['policy']),
                    etag))
            raise DiskFileError('Unable to reconstruct EC archive')
        rebuilt_fragment_iter = self.make_rebuilt_fragment_iter(
            responses[:job['policy'].ec_ndata], path, job['policy'],
            fi_to_rebuild)
        return RebuildingECDiskFileStream(metadata, fi_to_rebuild,
                                          rebuilt_fragment_iter)
    def _reconstruct(self, policy, fragment_payload, frag_index):
        # XXX with jerasure this doesn't work if we need to rebuild a
        # parity fragment, and not all data fragments are available
        # segment = policy.pyeclib_driver.reconstruct(
        #     fragment_payload, [frag_index])[0]
        # for safety until pyeclib 1.0.7 we'll just use decode and encode
        segment = policy.pyeclib_driver.decode(fragment_payload)
        return policy.pyeclib_driver.encode(segment)[frag_index]
    def make_rebuilt_fragment_iter(self, responses, path, policy, frag_index):
        """
        Turn a set of connections from backend object servers into a generator
        that yields up the rebuilt fragment archive for frag_index.
        """
        def _get_one_fragment(resp):
            buff = ''
            remaining_bytes = policy.fragment_size
            while remaining_bytes:
                chunk = resp.read(remaining_bytes)
                if not chunk:
                    break
                remaining_bytes -= len(chunk)
                buff += chunk
            return buff
        def fragment_payload_iter():
            # We need a fragment from each connections, so best to
            # use a GreenPile to keep them ordered and in sync
            pile = GreenPile(len(responses))
            while True:
                for resp in responses:
                    pile.spawn(_get_one_fragment, resp)
                try:
                    with Timeout(self.node_timeout):
                        fragment_payload = [fragment for fragment in pile]
                except (Exception, Timeout):
                    self.logger.exception(
                        _("Error trying to rebuild %(path)s "
                          "policy#%(policy)d frag#%(frag_index)s"), {
                              'path': path,
                              'policy': policy,
                              'frag_index': frag_index,
                          })
                    break
                if not all(fragment_payload):
                    break
                rebuilt_fragment = self._reconstruct(
                    policy, fragment_payload, frag_index)
                yield rebuilt_fragment
        return fragment_payload_iter()
    def stats_line(self):
        """
        Logs various stats for the currently running reconstruction pass.
        """
        if self.reconstruction_count:
            elapsed = (time.time() - self.start) or 0.000001
            rate = self.reconstruction_count / elapsed
            self.logger.info(
                _("%(reconstructed)d/%(total)d (%(percentage).2f%%)"
                  " partitions reconstructed in %(time).2fs (%(rate).2f/sec, "
                  "%(remaining)s remaining)"),
                {'reconstructed': self.reconstruction_count,
                 'total': self.job_count,
                 'percentage':
                 self.reconstruction_count * 100.0 / self.job_count,
                 'time': time.time() - self.start, 'rate': rate,
                 'remaining': '%d%s' % compute_eta(self.start,
                                                   self.reconstruction_count,
                                                   self.job_count)})
            if self.suffix_count:
                self.logger.info(
                    _("%(checked)d suffixes checked - "
                      "%(hashed).2f%% hashed, %(synced).2f%% synced"),
                    {'checked': self.suffix_count,
                     'hashed': (self.suffix_hash * 100.0) / self.suffix_count,
                     'synced': (self.suffix_sync * 100.0) / self.suffix_count})
                self.partition_times.sort()
                self.logger.info(
                    _("Partition times: max %(max).4fs, "
                      "min %(min).4fs, med %(med).4fs"),
                    {'max': self.partition_times[-1],
                     'min': self.partition_times[0],
                     'med': self.partition_times[
                         len(self.partition_times) // 2]})
        else:
            self.logger.info(
                _("Nothing reconstructed for %s seconds."),
                (time.time() - self.start))
    def kill_coros(self):
        """Utility function that kills all coroutines currently running."""
        for coro in list(self.run_pool.coroutines_running):
            try:
                coro.kill(GreenletExit)
            except GreenletExit:
                pass
    def heartbeat(self):
        """
        Loop that runs in the background during reconstruction.  It
        periodically logs progress.
        """
        while True:
            sleep(self.stats_interval)
            self.stats_line()
    def detect_lockups(self):
        """
        In testing, the pool.waitall() call very occasionally failed to return.
        This is an attempt to make sure the reconstructor finishes its
        reconstruction pass in some eventuality.
        """
        while True:
            sleep(self.lockup_timeout)
            if self.reconstruction_count == self.last_reconstruction_count:
                self.logger.error(_("Lockup detected.. killing live coros."))
                self.kill_coros()
            self.last_reconstruction_count = self.reconstruction_count
    def _get_partners(self, frag_index, part_nodes):
        """
        Returns the left and right partners of the node whose index is
        equal to the given frag_index.
        :param frag_index: a fragment index
        :param part_nodes: a list of primary nodes
        :returns: [<node-to-left>, <node-to-right>]
        """
        return [
            part_nodes[(frag_index - 1) % len(part_nodes)],
            part_nodes[(frag_index + 1) % len(part_nodes)],
        ]
    def _get_hashes(self, policy, path, recalculate=None, do_listdir=False):
        df_mgr = self._df_router[policy]
        hashed, suffix_hashes = tpool_reraise(
            df_mgr._get_hashes, path, recalculate=recalculate,
            do_listdir=do_listdir, reclaim_age=self.reclaim_age)
        self.logger.update_stats('suffix.hashes', hashed)
        return suffix_hashes
    def get_suffix_delta(self, local_suff, local_index,
                         remote_suff, remote_index):
        """
        Compare the local suffix hashes with the remote suffix hashes
        for the given local and remote fragment indexes.  Return those
        suffixes which should be synced.
        :param local_suff: the local suffix hashes (from _get_hashes)
        :param local_index: the local fragment index for the job
        :param remote_suff: the remote suffix hashes (from remote
                            REPLICATE request)
        :param remote_index: the remote fragment index for the job
        :returns: a list of strings, the suffix dirs to sync
        """
        suffixes = []
        for suffix, sub_dict_local in local_suff.iteritems():
            sub_dict_remote = remote_suff.get(suffix, {})
            if (sub_dict_local.get(None) != sub_dict_remote.get(None) or
                    sub_dict_local.get(local_index) !=
                    sub_dict_remote.get(remote_index)):
                suffixes.append(suffix)
        return suffixes
    def rehash_remote(self, node, job, suffixes):
        try:
            with Timeout(self.http_timeout):
                conn = http_connect(
                    node['replication_ip'], node['replication_port'],
                    node['device'], job['partition'], 'REPLICATE',
                    '/' + '-'.join(sorted(suffixes)),
                    headers=self.headers)
                conn.getresponse().read()
        except (Exception, Timeout):
            self.logger.exception(
                _("Trying to sync suffixes with %s") % self._full_path(
                    node, job['partition'], '', job['policy']))
    def _get_suffixes_to_sync(self, job, node):
        """
        For SYNC jobs we need to make a remote REPLICATE request to get
        the remote node's current suffix's hashes and then compare to our
        local suffix's hashes to decide which suffixes (if any) are out
        of sync.
        :param: the job dict, with the keys defined in ``_get_part_jobs``
        :param node: the remote node dict
        :returns: a (possibly empty) list of strings, the suffixes to be
                  synced with the remote node.
        """
        # get hashes from the remote node
        remote_suffixes = None
        try:
            with Timeout(self.http_timeout):
                resp = http_connect(
                    node['replication_ip'], node['replication_port'],
                    node['device'], job['partition'], 'REPLICATE',
                    '', headers=self.headers).getresponse()
            if resp.status == HTTP_INSUFFICIENT_STORAGE:
                self.logger.error(
                    _('%s responded as unmounted'),
                    self._full_path(node, job['partition'], '',
                                    job['policy']))
            elif resp.status != HTTP_OK:
                self.logger.error(
                    _("Invalid response %(resp)s "
                      "from %(full_path)s"), {
                          'resp': resp.status,
                          'full_path': self._full_path(
                              node, job['partition'], '',
                              job['policy'])
                      })
            else:
                remote_suffixes = pickle.loads(resp.read())
        except (Exception, Timeout):
            # all exceptions are logged here so that our caller can
            # safely catch our exception and continue to the next node
            # without logging
            self.logger.exception('Unable to get remote suffix hashes '
                                  'from %r' % self._full_path(
                                      node, job['partition'], '',
                                      job['policy']))
        if remote_suffixes is None:
            raise SuffixSyncError('Unable to get remote suffix hashes')
        suffixes = self.get_suffix_delta(job['hashes'],
                                         job['frag_index'],
                                         remote_suffixes,
                                         node['index'])
        # now recalculate local hashes for suffixes that don't
        # match so we're comparing the latest
        local_suff = self._get_hashes(job['policy'], job['path'],
                                      recalculate=suffixes)
        suffixes = self.get_suffix_delta(local_suff,
                                         job['frag_index'],
                                         remote_suffixes,
                                         node['index'])
        self.suffix_count += len(suffixes)
        return suffixes
    def delete_reverted_objs(self, job, objects, frag_index):
        """
        For EC we can potentially revert only some of a partition
        so we'll delete reverted objects here. Note that we delete
        the fragment index of the file we sent to the remote node.
        :param job: the job being processed
        :param objects: a dict of objects to be deleted, each entry maps
                        hash=>timestamp
        :param frag_index: (int) the fragment index of data files to be deleted
        """
        df_mgr = self._df_router[job['policy']]
        for object_hash, timestamp in objects.items():
            try:
                df = df_mgr.get_diskfile_from_hash(
                    job['local_dev']['device'], job['partition'],
                    object_hash, job['policy'],
                    frag_index=frag_index)
                df.purge(Timestamp(timestamp), frag_index)
            except DiskFileError:
                continue
    def process_job(self, job):
        """
        Sync the local partition with the remote node(s) according to
        the parameters of the job.  For primary nodes, the SYNC job type
        will define both left and right hand sync_to nodes to ssync with
        as defined by this primary nodes index in the node list based on
        the fragment index found in the partition.  For non-primary
        nodes (either handoff revert, or rebalance) the REVERT job will
        define a single node in sync_to which is the proper/new home for
        the fragment index.
        N.B. ring rebalancing can be time consuming and handoff nodes'
        fragment indexes do not have a stable order, it's possible to
        have more than one REVERT job for a partition, and in some rare
        failure conditions there may even also be a SYNC job for the
        same partition - but each one will be processed separately
        because each job will define a separate list of node(s) to
        'sync_to'.
        :param: the job dict, with the keys defined in ``_get_job_info``
        """
        self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
        begin = time.time()
        if job['job_type'] == REVERT:
            self._revert(job, begin)
        else:
            self._sync(job, begin)
        self.partition_times.append(time.time() - begin)
        self.reconstruction_count += 1
    def _sync(self, job, begin):
        """
        Process a SYNC job.
        """
        self.logger.increment(
            'partition.update.count.%s' % (job['local_dev']['device'],))
        # after our left and right partners, if there's some sort of
        # failure we'll continue onto the remaining primary nodes and
        # make sure they're in sync - or potentially rebuild missing
        # fragments we find
        dest_nodes = itertools.chain(
            job['sync_to'],
            # I think we could order these based on our index to better
            # protect against a broken chain
            itertools.ifilter(
                lambda n: n['id'] not in (n['id'] for n in job['sync_to']),
                job['policy'].object_ring.get_part_nodes(job['partition'])),
        )
        syncd_with = 0
        for node in dest_nodes:
            if syncd_with >= len(job['sync_to']):
                # success!
                break
            try:
                suffixes = self._get_suffixes_to_sync(job, node)
            except SuffixSyncError:
                continue
            if not suffixes:
                syncd_with += 1
                continue
            # ssync any out-of-sync suffixes with the remote node
            success, _ = ssync_sender(
                self, node, job, suffixes)()
            # let remote end know to rehash it's suffixes
            self.rehash_remote(node, job, suffixes)
            # update stats for this attempt
            self.suffix_sync += len(suffixes)
            self.logger.update_stats('suffix.syncs', len(suffixes))
            if success:
                syncd_with += 1
        self.logger.timing_since('partition.update.timing', begin)
    def _revert(self, job, begin):
        """
        Process a REVERT job.
        """
        self.logger.increment(
            'partition.delete.count.%s' % (job['local_dev']['device'],))
        # we'd desperately like to push this partition back to it's
        # primary location, but if that node is down, the next best thing
        # is one of the handoff locations - which *might* be us already!
        dest_nodes = itertools.chain(
            job['sync_to'],
            job['policy'].object_ring.get_more_nodes(job['partition']),
        )
        syncd_with = 0
        reverted_objs = {}
        for node in dest_nodes:
            if syncd_with >= len(job['sync_to']):
                break
            if node['id'] == job['local_dev']['id']:
                # this is as good a place as any for this data for now
                break
            success, in_sync_objs = ssync_sender(
                self, node, job, job['suffixes'])()
            self.rehash_remote(node, job, job['suffixes'])
            if success:
                syncd_with += 1
                reverted_objs.update(in_sync_objs)
        if syncd_with >= len(job['sync_to']):
            self.delete_reverted_objs(
                job, reverted_objs, job['frag_index'])
        self.logger.timing_since('partition.delete.timing', begin)
    def _get_part_jobs(self, local_dev, part_path, partition, policy):
        """
        Helper function to build jobs for a partition, this method will
        read the suffix hashes and create job dictionaries to describe
        the needed work.  There will be one job for each fragment index
        discovered in the partition.
        For a fragment index which corresponds to this node's ring
        index, a job with job_type SYNC will be created to ensure that
        the left and right hand primary ring nodes for the part have the
        corresponding left and right hand fragment archives.
        A fragment index (or entire partition) for which this node is
        not the primary corresponding node, will create job(s) with
        job_type REVERT to ensure that fragment archives are pushed to
        the correct node and removed from this one.
        A partition may result in multiple jobs.  Potentially many
        REVERT jobs, and zero or one SYNC job.
        :param local_dev:  the local device
        :param part_path: full path to partition
        :param partition: partition number
        :param policy: the policy
        :returns: a list of dicts of job info
        """
        # find all the fi's in the part, and which suffixes have them
        hashes = self._get_hashes(policy, part_path, do_listdir=True)
        non_data_fragment_suffixes = []
        data_fi_to_suffixes = defaultdict(list)
        for suffix, fi_hash in hashes.items():
            if not fi_hash:
                # this is for sanity and clarity, normally an empty
                # suffix would get del'd from the hashes dict, but an
                # OSError trying to re-hash the suffix could leave the
                # value empty - it will log the exception; but there's
                # no way to properly address this suffix at this time.
                continue
            data_frag_indexes = [f for f in fi_hash if f is not None]
            if not data_frag_indexes:
                non_data_fragment_suffixes.append(suffix)
            else:
                for fi in data_frag_indexes:
                    data_fi_to_suffixes[fi].append(suffix)
        # helper to ensure consistent structure of jobs
        def build_job(job_type, frag_index, suffixes, sync_to):
            return {
                'job_type': job_type,
                'frag_index': frag_index,
                'suffixes': suffixes,
                'sync_to': sync_to,
                'partition': partition,
                'path': part_path,
                'hashes': hashes,
                'policy': policy,
                'local_dev': local_dev,
                # ssync likes to have it handy
                'device': local_dev['device'],
            }
        # aggregate jobs for all the fragment index in this part
        jobs = []
        # check the primary nodes - to see if the part belongs here
        part_nodes = policy.object_ring.get_part_nodes(partition)
        for node in part_nodes:
            if node['id'] == local_dev['id']:
                # this partition belongs here, we'll need a sync job
                frag_index = node['index']
                try:
                    suffixes = data_fi_to_suffixes.pop(frag_index)
                except KeyError:
                    suffixes = []
                sync_job = build_job(
                    job_type=SYNC,
                    frag_index=frag_index,
                    suffixes=suffixes,
                    sync_to=self._get_partners(frag_index, part_nodes),
                )
                # ssync callback to rebuild missing fragment_archives
                sync_job['sync_diskfile_builder'] = self.reconstruct_fa
                jobs.append(sync_job)
                break
        # assign remaining data fragment suffixes to revert jobs
        ordered_fis = sorted((len(suffixes), fi) for fi, suffixes
                             in data_fi_to_suffixes.items())
        for count, fi in ordered_fis:
            revert_job = build_job(
                job_type=REVERT,
                frag_index=fi,
                suffixes=data_fi_to_suffixes[fi],
                sync_to=[part_nodes[fi]],
            )
            jobs.append(revert_job)
        # now we need to assign suffixes that have no data fragments
        if non_data_fragment_suffixes:
            if jobs:
                # the first job will be either the sync_job, or the
                # revert_job for the fragment index that is most common
                # among the suffixes
                jobs[0]['suffixes'].extend(non_data_fragment_suffixes)
            else:
                # this is an unfortunate situation, we need a revert job to
                # push partitions off this node, but none of the suffixes
                # have any data fragments to hint at which node would be a
                # good candidate to receive the tombstones.
                jobs.append(build_job(
                    job_type=REVERT,
                    frag_index=None,
                    suffixes=non_data_fragment_suffixes,
                    # this is super safe
                    sync_to=part_nodes,
                    # something like this would be probably be better
                    # sync_to=random.sample(part_nodes, 3),
                ))
        # return a list of jobs for this part
        return jobs
    def collect_parts(self, override_devices=None,
                      override_partitions=None):
        """
        Helper for yielding partitions in the top level reconstructor
        """
        override_devices = override_devices or []
        override_partitions = override_partitions or []
        ips = whataremyips()
        for policy in POLICIES:
            if policy.policy_type != EC_POLICY:
                continue
            self._diskfile_mgr = self._df_router[policy]
            self.load_object_ring(policy)
            data_dir = get_data_dir(policy)
            local_devices = itertools.ifilter(
                lambda dev: dev and is_local_device(
                    ips, self.port,
                    dev['replication_ip'], dev['replication_port']),
                policy.object_ring.devs)
            for local_dev in local_devices:
                if override_devices and (local_dev['device'] not in
                                         override_devices):
                    continue
                dev_path = join(self.devices_dir, local_dev['device'])
                obj_path = join(dev_path, data_dir)
                tmp_path = join(dev_path, get_tmp_dir(int(policy)))
                if self.mount_check and not ismount(dev_path):
                    self.logger.warn(_('%s is not mounted'),
                                     local_dev['device'])
                    continue
                unlink_older_than(tmp_path, time.time() -
                                  self.reclaim_age)
                if not os.path.exists(obj_path):
                    try:
                        mkdirs(obj_path)
                    except Exception:
                        self.logger.exception(
                            'Unable to create %s' % obj_path)
                    continue
                try:
                    partitions = os.listdir(obj_path)
                except OSError:
                    self.logger.exception(
                        'Unable to list partitions in %r' % obj_path)
                    continue
                for partition in partitions:
                    part_path = join(obj_path, partition)
                    if not (partition.isdigit() and
                            os.path.isdir(part_path)):
                        self.logger.warning(
                            'Unexpected entity in data dir: %r' % part_path)
                        remove_file(part_path)
                        continue
                    partition = int(partition)
                    if override_partitions and (partition not in
                                                override_partitions):
                        continue
                    part_info = {
                        'local_dev': local_dev,
                        'policy': policy,
                        'partition': partition,
                        'part_path': part_path,
                    }
                    yield part_info
    def build_reconstruction_jobs(self, part_info):
        """
        Helper function for collect_jobs to build jobs for reconstruction
        using EC style storage policy
        """
        jobs = self._get_part_jobs(**part_info)
        random.shuffle(jobs)
        if self.handoffs_first:
            # Move the handoff revert jobs to the front of the list
            jobs.sort(key=lambda job: job['job_type'], reverse=True)
        self.job_count += len(jobs)
        return jobs
    def _reset_stats(self):
        self.start = time.time()
        self.job_count = 0
        self.suffix_count = 0
        self.suffix_sync = 0
        self.suffix_hash = 0
        self.reconstruction_count = 0
        self.last_reconstruction_count = -1
    def delete_partition(self, path):
        self.logger.info(_("Removing partition: %s"), path)
        tpool.execute(shutil.rmtree, path, ignore_errors=True)
    def reconstruct(self, **kwargs):
        """Run a reconstruction pass"""
        self._reset_stats()
        self.partition_times = []
        stats = spawn(self.heartbeat)
        lockup_detector = spawn(self.detect_lockups)
        sleep()  # Give spawns a cycle
        try:
            self.run_pool = GreenPool(size=self.concurrency)
            for part_info in self.collect_parts(**kwargs):
                if not self.check_ring(part_info['policy'].object_ring):
                    self.logger.info(_("Ring change detected. Aborting "
                                       "current reconstruction pass."))
                    return
                jobs = self.build_reconstruction_jobs(part_info)
                if not jobs:
                    # If this part belongs on this node, _get_part_jobs
                    # will *always* build a sync_job - even if there's
                    # no suffixes in the partition that needs to sync.
                    # If there's any suffixes in the partition then our
                    # job list would have *at least* one revert job.
                    # Therefore we know this part a) doesn't belong on
                    # this node and b) doesn't have any suffixes in it.
                    self.run_pool.spawn(self.delete_partition,
                                        part_info['part_path'])
                for job in jobs:
                    self.run_pool.spawn(self.process_job, job)
            with Timeout(self.lockup_timeout):
                self.run_pool.waitall()
        except (Exception, Timeout):
            self.logger.exception(_("Exception in top-level"
                                    "reconstruction loop"))
            self.kill_coros()
        finally:
            stats.kill()
            lockup_detector.kill()
            self.stats_line()
    def run_once(self, *args, **kwargs):
        start = time.time()
        self.logger.info(_("Running object reconstructor in script mode."))
        override_devices = list_from_csv(kwargs.get('devices'))
        override_partitions = [int(p) for p in
                               list_from_csv(kwargs.get('partitions'))]
        self.reconstruct(
            override_devices=override_devices,
            override_partitions=override_partitions)
        total = (time.time() - start) / 60
        self.logger.info(
            _("Object reconstruction complete (once). (%.02f minutes)"), total)
        if not (override_partitions or override_devices):
            dump_recon_cache({'object_reconstruction_time': total,
                              'object_reconstruction_last': time.time()},
                             self.rcache, self.logger)
    def run_forever(self, *args, **kwargs):
        self.logger.info(_("Starting object reconstructor in daemon mode."))
        # Run the reconstructor continually
        while True:
            start = time.time()
            self.logger.info(_("Starting object reconstruction pass."))
            # Run the reconstructor
            self.reconstruct()
            total = (time.time() - start) / 60
            self.logger.info(
                _("Object reconstruction complete. (%.02f minutes)"), total)
            dump_recon_cache({'object_reconstruction_time': total,
                              'object_reconstruction_last': time.time()},
                             self.rcache, self.logger)
            self.logger.debug('reconstruction sleeping for %s seconds.',
                              self.run_pause)
            sleep(self.run_pause)
--- a/swift/obj/replicator.py
+++ b/swift/obj/replicator.py
@ -171,7 +171,7 @@ class ObjectReplicator(Daemon):
        sync method in Swift.
        """
        if not os.path.exists(job['path']):
-            return False, set()
+            return False, {}
        args = [
            'rsync',
            '--recursive',
@ -196,11 +196,11 @@ class ObjectReplicator(Daemon):
                args.append(spath)
                had_any = True
        if not had_any:
-            return False, set()
+            return False, {}
        data_dir = get_data_dir(job['policy'])
        args.append(join(rsync_module, node['device'],
                    data_dir, job['partition']))
-        return self._rsync(args) == 0, set()
+        return self._rsync(args) == 0, {}
    def ssync(self, node, job, suffixes, remote_check_objs=None):
        return ssync_sender.Sender(
@ -246,8 +246,9 @@ class ObjectReplicator(Daemon):
                            self.conf.get('sync_method', 'rsync') == 'ssync':
                        kwargs['remote_check_objs'] = \
                            synced_remote_regions[node['region']]
-                    # cand_objs is a list of objects for deletion
+                    # candidates is a dict(hash=>timestamp) of objects
-                    success, cand_objs = self.sync(
+                    # for deletion
                    success, candidates = self.sync(
                        node, job, suffixes, **kwargs)
                    if success:
                        with Timeout(self.http_timeout):
@ -258,7 +259,8 @@ class ObjectReplicator(Daemon):
                                '/' + '-'.join(suffixes), headers=self.headers)
                            conn.getresponse().read()
                        if node['region'] != job['region']:
-                            synced_remote_regions[node['region']] = cand_objs
+                            synced_remote_regions[node['region']] = \
                                candidates.keys()
                    responses.append(success)
                for region, cand_objs in synced_remote_regions.iteritems():
                    if delete_objs is None:
--- a/swift/obj/server.py
+++ b/swift/obj/server.py
@ -880,7 +880,7 @@ class ObjectController(BaseStorageServer):
    @public
    @replication
    @timing_stats(sample_rate=0.1)
-    def REPLICATION(self, request):
+    def SSYNC(self, request):
        return Response(app_iter=ssync_receiver.Receiver(self, request)())
    def __call__(self, env, start_response):
@ -914,7 +914,7 @@ class ObjectController(BaseStorageServer):
        trans_time = time.time() - start_time
        if self.log_requests:
            log_line = get_log_line(req, res, trans_time, '')
-            if req.method in ('REPLICATE', 'REPLICATION') or \
+            if req.method in ('REPLICATE', 'SSYNC') or \
                    'X-Backend-Replication' in req.headers:
                self.logger.debug(log_line)
            else:
--- a/swift/obj/ssync_receiver.py
+++ b/swift/obj/ssync_receiver.py
@ -29,23 +29,23 @@ from swift.common import request_helpers
 class Receiver(object):
    """
-    Handles incoming REPLICATION requests to the object server.
+    Handles incoming SSYNC requests to the object server.
    These requests come from the object-replicator daemon that uses
    :py:mod:`.ssync_sender`.
-    The number of concurrent REPLICATION requests is restricted by
+    The number of concurrent SSYNC requests is restricted by
    use of a replication_semaphore and can be configured with the
    object-server.conf [object-server] replication_concurrency
    setting.
-    A REPLICATION request is really just an HTTP conduit for
+    An SSYNC request is really just an HTTP conduit for
    sender/receiver replication communication. The overall
-    REPLICATION request should always succeed, but it will contain
+    SSYNC request should always succeed, but it will contain
    multiple requests within its request and response bodies. This
    "hack" is done so that replication concurrency can be managed.
-    The general process inside a REPLICATION request is:
+    The general process inside an SSYNC request is:
        1. Initialize the request: Basic request validation, mount check,
           acquire semaphore lock, etc..
@ -73,10 +73,10 @@ class Receiver(object):
    def __call__(self):
        """
-        Processes a REPLICATION request.
+        Processes an SSYNC request.
        Acquires a semaphore lock and then proceeds through the steps
-        of the REPLICATION process.
+        of the SSYNC process.
        """
        # The general theme for functions __call__ calls is that they should
        # raise exceptions.MessageTimeout for client timeouts (logged locally),
@ -89,7 +89,7 @@ class Receiver(object):
        try:
            # Double try blocks in case our main error handlers fail.
            try:
-                # intialize_request is for preamble items that can be done
+                # initialize_request is for preamble items that can be done
                # outside a replication semaphore lock.
                for data in self.initialize_request():
                    yield data
@ -112,7 +112,7 @@ class Receiver(object):
                        self.app.replication_semaphore.release()
            except exceptions.ReplicationLockTimeout as err:
                self.app.logger.debug(
-                    '%s/%s/%s REPLICATION LOCK TIMEOUT: %s' % (
+                    '%s/%s/%s SSYNC LOCK TIMEOUT: %s' % (
                        self.request.remote_addr, self.device, self.partition,
                        err))
                yield ':ERROR: %d %r\n' % (0, str(err))
@ -169,8 +169,11 @@ class Receiver(object):
        self.request.environ['eventlet.minimum_write_chunk_size'] = 0
        self.device, self.partition, self.policy = \
            request_helpers.get_name_and_placement(self.request, 2, 2, False)
-        self.policy_idx = \
+        if 'X-Backend-Ssync-Frag-Index' in self.request.headers:
-            int(self.request.headers.get('X-Backend-Storage-Policy-Index', 0))
+            self.frag_index = int(
                self.request.headers['X-Backend-Ssync-Frag-Index'])
        else:
            self.frag_index = None
        utils.validate_device_partition(self.device, self.partition)
        self.diskfile_mgr = self.app._diskfile_router[self.policy]
        if self.diskfile_mgr.mount_check and not constraints.check_mount(
@ -183,7 +186,7 @@ class Receiver(object):
    def missing_check(self):
        """
        Handles the receiver-side of the MISSING_CHECK step of a
-        REPLICATION request.
+        SSYNC request.
        Receives a list of hashes and timestamps of object
        information the sender can provide and responds with a list
@ -227,11 +230,13 @@ class Receiver(object):
                line = self.fp.readline(self.app.network_chunk_size)
            if not line or line.strip() == ':MISSING_CHECK: END':
                break
-            object_hash, timestamp = [urllib.unquote(v) for v in line.split()]
+            parts = line.split()
            object_hash, timestamp = [urllib.unquote(v) for v in parts[:2]]
            want = False
            try:
                df = self.diskfile_mgr.get_diskfile_from_hash(
-                    self.device, self.partition, object_hash, self.policy)
+                    self.device, self.partition, object_hash, self.policy,
                    frag_index=self.frag_index)
            except exceptions.DiskFileNotExist:
                want = True
            else:
@ -254,7 +259,7 @@ class Receiver(object):
    def updates(self):
        """
-        Handles the UPDATES step of a REPLICATION request.
+        Handles the UPDATES step of an SSYNC request.
        Receives a set of PUT and DELETE subrequests that will be
        routed to the object server itself for processing. These
@ -354,7 +359,7 @@ class Receiver(object):
                    subreq_iter())
            else:
                raise Exception('Invalid subrequest method %s' % method)
-            subreq.headers['X-Backend-Storage-Policy-Index'] = self.policy_idx
+            subreq.headers['X-Backend-Storage-Policy-Index'] = int(self.policy)
            subreq.headers['X-Backend-Replication'] = 'True'
            if replication_headers:
                subreq.headers['X-Backend-Replication-Headers'] = \
--- a/swift/obj/ssync_sender.py
+++ b/swift/obj/ssync_sender.py
@ -22,7 +22,7 @@ from swift.common import http
 class Sender(object):
    """
-    Sends REPLICATION requests to the object server.
+    Sends SSYNC requests to the object server.
    These requests are eventually handled by
    :py:mod:`.ssync_receiver` and full documentation about the
@ -31,6 +31,7 @@ class Sender(object):
    def __init__(self, daemon, node, job, suffixes, remote_check_objs=None):
        self.daemon = daemon
        self.df_mgr = self.daemon._diskfile_mgr
        self.node = node
        self.job = job
        self.suffixes = suffixes
@ -38,28 +39,28 @@ class Sender(object):
        self.response = None
        self.response_buffer = ''
        self.response_chunk_left = 0
-        self.available_set = set()
+        # available_map has an entry for each object in given suffixes that
        # is available to be sync'd; each entry is a hash => timestamp
        self.available_map = {}
        # When remote_check_objs is given in job, ssync_sender trys only to
        # make sure those objects exist or not in remote.
        self.remote_check_objs = remote_check_objs
        # send_list has an entry for each object that the receiver wants to
        # be sync'ed; each entry is an object hash
        self.send_list = []
        self.failures = 0
    @property
    def policy_idx(self):
        return int(self.job.get('policy', 0))
    def __call__(self):
        """
        Perform ssync with remote node.
-        :returns: a 2-tuple, in the form (success, can_delete_objs).
+        :returns: a 2-tuple, in the form (success, can_delete_objs) where
-
+                  success is a boolean and can_delete_objs is the map of
-        Success is a boolean, and can_delete_objs is an iterable of strings
+                  objects that are in sync with the receiver. Each entry in
-        representing the hashes which are in sync with the remote node.
+                  can_delete_objs maps a hash => timestamp
        """
        if not self.suffixes:
-            return True, set()
+            return True, {}
        try:
            # Double try blocks in case our main error handler fails.
            try:
@ -72,18 +73,20 @@ class Sender(object):
                self.missing_check()
                if self.remote_check_objs is None:
                    self.updates()
-                    can_delete_obj = self.available_set
+                    can_delete_obj = self.available_map
                else:
                    # when we are initialized with remote_check_objs we don't
                    # *send* any requested updates; instead we only collect
                    # what's already in sync and safe for deletion
-                    can_delete_obj = self.available_set.difference(
+                    in_sync_hashes = (set(self.available_map.keys()) -
-                        self.send_list)
+                                      set(self.send_list))
                    can_delete_obj = dict((hash_, self.available_map[hash_])
                                          for hash_ in in_sync_hashes)
                self.disconnect()
                if not self.failures:
                    return True, can_delete_obj
                else:
-                    return False, set()
+                    return False, {}
            except (exceptions.MessageTimeout,
                    exceptions.ReplicationException) as err:
                self.daemon.logger.error(
@ -109,11 +112,11 @@ class Sender(object):
            # would only get called if the above except Exception handler
            # failed (bad node or job data).
            self.daemon.logger.exception('EXCEPTION in replication.Sender')
-        return False, set()
+        return False, {}
    def connect(self):
        """
-        Establishes a connection and starts a REPLICATION request
+        Establishes a connection and starts an SSYNC request
        with the object server.
        """
        with exceptions.MessageTimeout(
@ -121,11 +124,13 @@ class Sender(object):
            self.connection = bufferedhttp.BufferedHTTPConnection(
                '%s:%s' % (self.node['replication_ip'],
                           self.node['replication_port']))
-            self.connection.putrequest('REPLICATION', '/%s/%s' % (
+            self.connection.putrequest('SSYNC', '/%s/%s' % (
                self.node['device'], self.job['partition']))
            self.connection.putheader('Transfer-Encoding', 'chunked')
            self.connection.putheader('X-Backend-Storage-Policy-Index',
-                                      self.policy_idx)
+                                      int(self.job['policy']))
            self.connection.putheader('X-Backend-Ssync-Frag-Index',
                                      self.node['index'])
            self.connection.endheaders()
        with exceptions.MessageTimeout(
                self.daemon.node_timeout, 'connect receive'):
@ -137,7 +142,7 @@ class Sender(object):
    def readline(self):
        """
-        Reads a line from the REPLICATION response body.
+        Reads a line from the SSYNC response body.
        httplib has no readline and will block on read(x) until x is
        read, so we have to do the work ourselves. A bit of this is
@ -183,7 +188,7 @@ class Sender(object):
    def missing_check(self):
        """
        Handles the sender-side of the MISSING_CHECK step of a
-        REPLICATION request.
+        SSYNC request.
        Full documentation of this can be found at
        :py:meth:`.Receiver.missing_check`.
@ -193,14 +198,15 @@ class Sender(object):
                self.daemon.node_timeout, 'missing_check start'):
            msg = ':MISSING_CHECK: START\r\n'
            self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
-        hash_gen = self.daemon._diskfile_mgr.yield_hashes(
+        hash_gen = self.df_mgr.yield_hashes(
            self.job['device'], self.job['partition'],
-            self.policy_idx, self.suffixes)
+            self.job['policy'], self.suffixes,
            frag_index=self.job.get('frag_index'))
        if self.remote_check_objs is not None:
            hash_gen = ifilter(lambda (path, object_hash, timestamp):
                               object_hash in self.remote_check_objs, hash_gen)
        for path, object_hash, timestamp in hash_gen:
-            self.available_set.add(object_hash)
+            self.available_map[object_hash] = timestamp
            with exceptions.MessageTimeout(
                    self.daemon.node_timeout,
                    'missing_check send line'):
@ -234,12 +240,13 @@ class Sender(object):
            line = line.strip()
            if line == ':MISSING_CHECK: END':
                break
-            if line:
+            parts = line.split()
-                self.send_list.append(line)
+            if parts:
                self.send_list.append(parts[0])
    def updates(self):
        """
-        Handles the sender-side of the UPDATES step of a REPLICATION
+        Handles the sender-side of the UPDATES step of an SSYNC
        request.
        Full documentation of this can be found at
@ -252,15 +259,19 @@ class Sender(object):
            self.connection.send('%x\r\n%s\r\n' % (len(msg), msg))
        for object_hash in self.send_list:
            try:
-                df = self.daemon._diskfile_mgr.get_diskfile_from_hash(
+                df = self.df_mgr.get_diskfile_from_hash(
                    self.job['device'], self.job['partition'], object_hash,
-                    self.policy_idx)
+                    self.job['policy'], frag_index=self.job.get('frag_index'))
            except exceptions.DiskFileNotExist:
                continue
            url_path = urllib.quote(
                '/%s/%s/%s' % (df.account, df.container, df.obj))
            try:
                df.open()
                # EC reconstructor may have passed a callback to build
                # an alternative diskfile...
                df = self.job.get('sync_diskfile_builder', lambda *args: df)(
                    self.job, self.node, df.get_metadata())
            except exceptions.DiskFileDeleted as err:
                self.send_delete(url_path, err.timestamp)
            except exceptions.DiskFileError:
@ -328,7 +339,7 @@ class Sender(object):
    def disconnect(self):
        """
        Closes down the connection to the object server once done
-        with the REPLICATION request.
+        with the SSYNC request.
        """
        try:
            with exceptions.MessageTimeout(
--- a/test/probe/brain.py
+++ b/test/probe/brain.py
@ -67,7 +67,7 @@ class BrainSplitter(object):
    __metaclass__ = meta_command
    def __init__(self, url, token, container_name='test', object_name='test',
-                 server_type='container'):
+                 server_type='container', policy=None):
        self.url = url
        self.token = token
        self.account = utils.split_path(urlparse(url).path, 2, 2)[1]
@ -81,9 +81,26 @@ class BrainSplitter(object):
        o = object_name if server_type == 'object' else None
        c = container_name if server_type in ('object', 'container') else None
-        part, nodes = ring.Ring(
+        if server_type in ('container', 'account'):
-            '/etc/swift/%s.ring.gz' % server_type).get_nodes(
+            if policy:
-                self.account, c, o)
+                raise TypeError('Metadata server brains do not '
                                'support specific storage policies')
            self.policy = None
            self.ring = ring.Ring(
                '/etc/swift/%s.ring.gz' % server_type)
        elif server_type == 'object':
            if not policy:
                raise TypeError('Object BrainSplitters need to '
                                'specify the storage policy')
            self.policy = policy
            policy.load_ring('/etc/swift')
            self.ring = policy.object_ring
        else:
            raise ValueError('Unkonwn server_type: %r' % server_type)
        self.server_type = server_type
        part, nodes = self.ring.get_nodes(self.account, c, o)
        node_ids = [n['id'] for n in nodes]
        if all(n_id in node_ids for n_id in (0, 1)):
            self.primary_numbers = (1, 2)
@ -172,6 +189,8 @@ parser.add_option('-o', '--object', default='object-%s' % uuid.uuid4(),
                  help='set object name')
 parser.add_option('-s', '--server_type', default='container',
                  help='set server type')
 parser.add_option('-P', '--policy_name', default=None,
                  help='set policy')
 def main():
@ -186,8 +205,17 @@ def main():
            return 'ERROR: unknown command %s' % cmd
    url, token = get_auth('http://127.0.0.1:8080/auth/v1.0',
                          'test:tester', 'testing')
    if options.server_type == 'object' and not options.policy_name:
        options.policy_name = POLICIES.default.name
    if options.policy_name:
        options.server_type = 'object'
        policy = POLICIES.get_by_name(options.policy_name)
        if not policy:
            return 'ERROR: unknown policy %r' % options.policy_name
    else:
        policy = None
    brain = BrainSplitter(url, token, options.container, options.object,
-                          options.server_type)
+                          options.server_type, policy=policy)
    for cmd_args in commands:
        parts = cmd_args.split(':', 1)
        command = parts[0]
--- a/test/probe/common.py
+++ b/test/probe/common.py
@ -24,15 +24,19 @@ from nose import SkipTest
 from swiftclient import get_auth, head_account
 from swift.obj.diskfile import get_data_dir
 from swift.common.ring import Ring
 from swift.common.utils import readconf
 from swift.common.manager import Manager
-from swift.common.storage_policy import POLICIES
+from swift.common.storage_policy import POLICIES, EC_POLICY, REPL_POLICY
 from test.probe import CHECK_SERVER_TIMEOUT, VALIDATE_RSYNC
 ENABLED_POLICIES = [p for p in POLICIES if not p.is_deprecated]
 POLICIES_BY_TYPE = defaultdict(list)
 for p in POLICIES:
    POLICIES_BY_TYPE[p.policy_type].append(p)
 def get_server_number(port, port2server):
@ -138,6 +142,17 @@ def kill_nonprimary_server(primary_nodes, port2server, pids):
            return port
 def build_port_to_conf(server):
    # map server to config by port
    port_to_config = {}
    for server_ in Manager([server]):
        for config_path in server_.conf_files():
            conf = readconf(config_path,
                            section_name='%s-replicator' % server_.type)
            port_to_config[int(conf['bind_port'])] = conf
    return port_to_config
 def get_ring(ring_name, required_replicas, required_devices,
             server=None, force_validate=None):
    if not server:
@ -152,13 +167,7 @@ def get_ring(ring_name, required_replicas, required_devices,
    if len(ring.devs) != required_devices:
        raise SkipTest('%s has %s devices instead of %s' % (
            ring.serialized_path, len(ring.devs), required_devices))
-    # map server to config by port
+    port_to_config = build_port_to_conf(server)
    port_to_config = {}
    for server_ in Manager([server]):
        for config_path in server_.conf_files():
            conf = readconf(config_path,
                            section_name='%s-replicator' % server_.type)
            port_to_config[int(conf['bind_port'])] = conf
    for dev in ring.devs:
        # verify server is exposing mounted device
        conf = port_to_config[dev['port']]
@ -262,6 +271,10 @@ class ProbeTest(unittest.TestCase):
                ['account-replicator', 'container-replicator',
                 'object-replicator'])
            self.updaters = Manager(['container-updater', 'object-updater'])
            self.server_port_to_conf = {}
            # get some configs backend daemon configs loaded up
            for server in ('account', 'container', 'object'):
                self.server_port_to_conf[server] = build_port_to_conf(server)
        except BaseException:
            try:
                raise
@ -274,6 +287,18 @@ class ProbeTest(unittest.TestCase):
    def tearDown(self):
        Manager(['all']).kill()
    def device_dir(self, server, node):
        conf = self.server_port_to_conf[server][node['port']]
        return os.path.join(conf['devices'], node['device'])
    def storage_dir(self, server, node, part=None, policy=None):
        policy = policy or self.policy
        device_path = self.device_dir(server, node)
        path_parts = [device_path, get_data_dir(policy)]
        if part is not None:
            path_parts.append(str(part))
        return os.path.join(*path_parts)
    def get_to_final_state(self):
        # these .stop()s are probably not strictly necessary,
        # but may prevent race conditions
@ -291,7 +316,16 @@ class ReplProbeTest(ProbeTest):
    acct_cont_required_devices = 4
    obj_required_replicas = 3
    obj_required_devices = 4
-    policy_requirements = {'is_default': True}
+    policy_requirements = {'policy_type': REPL_POLICY}
 class ECProbeTest(ProbeTest):
    acct_cont_required_replicas = 3
    acct_cont_required_devices = 4
    obj_required_replicas = 6
    obj_required_devices = 8
    policy_requirements = {'policy_type': EC_POLICY}
 if __name__ == "__main__":
--- a/test/probe/test_container_merge_policy_index.py
+++ b/test/probe/test_container_merge_policy_index.py
@ -26,7 +26,8 @@ from swift.common import utils, direct_client
 from swift.common.storage_policy import POLICIES
 from swift.common.http import HTTP_NOT_FOUND
 from test.probe.brain import BrainSplitter
-from test.probe.common import ReplProbeTest, ENABLED_POLICIES
+from test.probe.common import (ReplProbeTest, ENABLED_POLICIES,
                               POLICIES_BY_TYPE, REPL_POLICY)
 from swiftclient import client, ClientException
@ -234,6 +235,18 @@ class TestContainerMergePolicyIndex(ReplProbeTest):
                        orig_policy_index, node))
    def test_reconcile_manifest(self):
        # this test is not only testing a split brain scenario on
        # multiple policies with mis-placed objects - it even writes out
        # a static large object directly to the storage nodes while the
        # objects are unavailably mis-placed from *behind* the proxy and
        # doesn't know how to do that for EC_POLICY (clayg: why did you
        # guys let me write a test that does this!?) - so we force
        # wrong_policy (where the manifest gets written) to be one of
        # any of your configured REPL_POLICY (we know you have one
        # because this is a ReplProbeTest)
        wrong_policy = random.choice(POLICIES_BY_TYPE[REPL_POLICY])
        policy = random.choice([p for p in ENABLED_POLICIES
                                if p is not wrong_policy])
        manifest_data = []
        def write_part(i):
@ -250,17 +263,14 @@ class TestContainerMergePolicyIndex(ReplProbeTest):
        # get an old container stashed
        self.brain.stop_primary_half()
-        policy = random.choice(ENABLED_POLICIES)
+        self.brain.put_container(int(policy))
        self.brain.put_container(policy.idx)
        self.brain.start_primary_half()
        # write some parts
        for i in range(10):
            write_part(i)
        self.brain.stop_handoff_half()
-        wrong_policy = random.choice([p for p in ENABLED_POLICIES
+        self.brain.put_container(int(wrong_policy))
                                      if p is not policy])
        self.brain.put_container(wrong_policy.idx)
        # write some more parts
        for i in range(10, 20):
            write_part(i)
--- a/test/probe/test_empty_device_handoff.py
+++ b/test/probe/test_empty_device_handoff.py
@ -44,7 +44,9 @@ class TestEmptyDevice(ReplProbeTest):
    def test_main(self):
        # Create container
        container = 'container-%s' % uuid4()
-        client.put_container(self.url, self.token, container)
+        client.put_container(self.url, self.token, container,
                             headers={'X-Storage-Policy':
                                      self.policy.name})
        cpart, cnodes = self.container_ring.get_nodes(self.account, container)
        cnode = cnodes[0]
@ -58,7 +60,7 @@ class TestEmptyDevice(ReplProbeTest):
        # Delete the default data directory for objects on the primary server
        obj_dir = '%s/%s' % (self._get_objects_dir(onode),
-                             get_data_dir(self.policy.idx))
+                             get_data_dir(self.policy))
        shutil.rmtree(obj_dir, True)
        self.assertFalse(os.path.exists(obj_dir))
--- a/test/probe/test_object_async_update.py
+++ b/test/probe/test_object_async_update.py
@ -108,7 +108,9 @@ class TestUpdateOverrides(ReplProbeTest):
            'X-Backend-Container-Update-Override-Etag': 'override-etag',
            'X-Backend-Container-Update-Override-Content-Type': 'override-type'
        }
-        client.put_container(self.url, self.token, 'c1')
+        client.put_container(self.url, self.token, 'c1',
                             headers={'X-Storage-Policy':
                                      self.policy.name})
        self.int_client.upload_object(StringIO(u'stuff'), self.account,
                                      'c1', 'o1', headers)
--- a/test/probe/test_object_failures.py
+++ b/test/probe/test_object_failures.py
@ -52,7 +52,9 @@ def get_data_file_path(obj_dir):
 class TestObjectFailures(ReplProbeTest):
    def _setup_data_file(self, container, obj, data):
-        client.put_container(self.url, self.token, container)
+        client.put_container(self.url, self.token, container,
                             headers={'X-Storage-Policy':
                                      self.policy.name})
        client.put_object(self.url, self.token, container, obj, data)
        odata = client.get_object(self.url, self.token, container, obj)[-1]
        self.assertEquals(odata, data)
@ -65,7 +67,7 @@ class TestObjectFailures(ReplProbeTest):
        obj_server_conf = readconf(self.configs['object-server'][node_id])
        devices = obj_server_conf['app:object-server']['devices']
        obj_dir = '%s/%s/%s/%s/%s/%s/' % (devices, device,
-                                          get_data_dir(self.policy.idx),
+                                          get_data_dir(self.policy),
                                          opart, hash_str[-3:], hash_str)
        data_file = get_data_file_path(obj_dir)
        return onode, opart, data_file
--- a/test/probe/test_object_handoff.py
+++ b/test/probe/test_object_handoff.py
@ -30,7 +30,9 @@ class TestObjectHandoff(ReplProbeTest):
    def test_main(self):
        # Create container
        container = 'container-%s' % uuid4()
-        client.put_container(self.url, self.token, container)
+        client.put_container(self.url, self.token, container,
                             headers={'X-Storage-Policy':
                                      self.policy.name})
        # Kill one container/obj primary server
        cpart, cnodes = self.container_ring.get_nodes(self.account, container)
--- a/test/probe/test_object_metadata_replication.py
+++ b/test/probe/test_object_metadata_replication.py
@ -73,7 +73,8 @@ class Test(ReplProbeTest):
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        self.brain = BrainSplitter(self.url, self.token, self.container_name,
-                                   self.object_name, 'object')
+                                   self.object_name, 'object',
                                   policy=self.policy)
        self.tempdir = mkdtemp()
        conf_path = os.path.join(self.tempdir, 'internal_client.conf')
        conf_body = """
@ -128,7 +129,7 @@ class Test(ReplProbeTest):
                                                   self.object_name)
    def test_object_delete_is_replicated(self):
-        self.brain.put_container(policy_index=0)
+        self.brain.put_container(policy_index=int(self.policy))
        # put object
        self._put_object()
@ -174,7 +175,7 @@ class Test(ReplProbeTest):
    def test_sysmeta_after_replication_with_subsequent_post(self):
        sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'}
        usermeta = {'x-object-meta-bar': 'meta-bar'}
-        self.brain.put_container(policy_index=0)
+        self.brain.put_container(policy_index=int(self.policy))
        # put object
        self._put_object()
        # put newer object with sysmeta to first server subset
@ -221,7 +222,7 @@ class Test(ReplProbeTest):
    def test_sysmeta_after_replication_with_prior_post(self):
        sysmeta = {'x-object-sysmeta-foo': 'sysmeta-foo'}
        usermeta = {'x-object-meta-bar': 'meta-bar'}
-        self.brain.put_container(policy_index=0)
+        self.brain.put_container(policy_index=int(self.policy))
        # put object
        self._put_object()
--- a/test/probe/test_reconstructor_durable.py
+++ b/test/probe/test_reconstructor_durable.py
@ -0,0 +1,157 @@
 #!/usr/bin/python -u
 # Copyright (c) 2010-2012 OpenStack Foundation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from hashlib import md5
 import unittest
 import uuid
 import random
 import os
 import errno
 from test.probe.common import ECProbeTest
 from swift.common import direct_client
 from swift.common.storage_policy import EC_POLICY
 from swift.common.manager import Manager
 from swiftclient import client
 class Body(object):
    def __init__(self, total=3.5 * 2 ** 20):
        self.total = total
        self.hasher = md5()
        self.size = 0
        self.chunk = 'test' * 16 * 2 ** 10
    @property
    def etag(self):
        return self.hasher.hexdigest()
    def __iter__(self):
        return self
    def next(self):
        if self.size > self.total:
            raise StopIteration()
        self.size += len(self.chunk)
        self.hasher.update(self.chunk)
        return self.chunk
    def __next__(self):
        return self.next()
 class TestReconstructorPropDurable(ECProbeTest):
    def setUp(self):
        super(TestReconstructorPropDurable, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])
    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(
            node, part, self.account, self.container_name,
            self.object_name, headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()
    def _check_node(self, node, part, etag, headers_post):
        # get fragment archive etag
        fragment_archive_etag = self.direct_get(node, part)
        # remove the .durable from the selected node
        part_dir = self.storage_dir('object', node, part=part)
        for dirs, subdirs, files in os.walk(part_dir):
            for fname in files:
                if fname.endswith('.durable'):
                    durable = os.path.join(dirs, fname)
                    os.remove(durable)
                    break
        try:
            os.remove(os.path.join(part_dir, 'hashes.pkl'))
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise
        # fire up reconstructor to propogate the .durable
        self.reconstructor.once()
        # fragment is still exactly as it was before!
        self.assertEqual(fragment_archive_etag,
                         self.direct_get(node, part))
        # check meta
        meta = client.head_object(self.url, self.token,
                                  self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertTrue(key in meta)
            self.assertEqual(meta[key], headers_post[key])
    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])
    def test_main(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)
        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}
        etag = client.put_object(self.url, self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents, headers=headers)
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=headers_post)
        del headers_post['X-Auth-Token']  # WTF, where did this come from?
        # built up a list of node lists to kill a .durable from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        single_node = [random.choice(onodes)]
        adj_nodes = [onodes[0], onodes[-1]]
        far_nodes = [onodes[0], onodes[-2]]
        test_list = [single_node, adj_nodes, far_nodes]
        for node_list in test_list:
            for onode in node_list:
                try:
                    self._check_node(onode, opart, etag, headers_post)
                except AssertionError as e:
                    self.fail(
                        str(e) + '\n... for node %r of scenario %r' % (
                            self._format_node(onode),
                            [self._format_node(n) for n in node_list]))
 if __name__ == "__main__":
    unittest.main()
--- a/test/probe/test_reconstructor_rebuild.py
+++ b/test/probe/test_reconstructor_rebuild.py
@ -0,0 +1,170 @@
 #!/usr/bin/python -u
 # Copyright (c) 2010-2012 OpenStack Foundation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from hashlib import md5
 import unittest
 import uuid
 import shutil
 import random
 from test.probe.common import ECProbeTest
 from swift.common import direct_client
 from swift.common.storage_policy import EC_POLICY
 from swift.common.manager import Manager
 from swiftclient import client
 class Body(object):
    def __init__(self, total=3.5 * 2 ** 20):
        self.total = total
        self.hasher = md5()
        self.size = 0
        self.chunk = 'test' * 16 * 2 ** 10
    @property
    def etag(self):
        return self.hasher.hexdigest()
    def __iter__(self):
        return self
    def next(self):
        if self.size > self.total:
            raise StopIteration()
        self.size += len(self.chunk)
        self.hasher.update(self.chunk)
        return self.chunk
    def __next__(self):
        return self.next()
 class TestReconstructorRebuild(ECProbeTest):
    def setUp(self):
        super(TestReconstructorRebuild, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])
    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2 ** 10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return resp_checksum.hexdigest()
    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(
            node, part, self.account, self.container_name,
            self.object_name, headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()
    def _check_node(self, node, part, etag, headers_post):
        # get fragment archive etag
        fragment_archive_etag = self.direct_get(node, part)
        # remove data from the selected node
        part_dir = self.storage_dir('object', node, part=part)
        shutil.rmtree(part_dir, True)
        # this node can't servce the data any more
        try:
            self.direct_get(node, part)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destoryed!' %
                      (node,))
        # make sure we can still GET the object and its correct, the
        # proxy is doing decode on remaining fragments to get the obj
        self.assertEqual(etag, self.proxy_get())
        # fire up reconstructor
        self.reconstructor.once()
        # fragment is rebuilt exactly as it was before!
        self.assertEqual(fragment_archive_etag,
                         self.direct_get(node, part))
        # check meta
        meta = client.head_object(self.url, self.token,
                                  self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertTrue(key in meta)
            self.assertEqual(meta[key], headers_post[key])
    def _format_node(self, node):
        return '%s#%s' % (node['device'], node['index'])
    def test_main(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)
        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}
        etag = client.put_object(self.url, self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents, headers=headers)
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=headers_post)
        del headers_post['X-Auth-Token']  # WTF, where did this come from?
        # built up a list of node lists to kill data from,
        # first try a single node
        # then adjacent nodes and then nodes >1 node apart
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        single_node = [random.choice(onodes)]
        adj_nodes = [onodes[0], onodes[-1]]
        far_nodes = [onodes[0], onodes[-2]]
        test_list = [single_node, adj_nodes, far_nodes]
        for node_list in test_list:
            for onode in node_list:
                try:
                    self._check_node(onode, opart, etag, headers_post)
                except AssertionError as e:
                    self.fail(
                        str(e) + '\n... for node %r of scenario %r' % (
                            self._format_node(onode),
                            [self._format_node(n) for n in node_list]))
 if __name__ == "__main__":
    unittest.main()
--- a/test/probe/test_reconstructor_revert.py
+++ b/test/probe/test_reconstructor_revert.py
@ -0,0 +1,258 @@
 #!/usr/bin/python -u
 # Copyright (c) 2010-2012 OpenStack Foundation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 # implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from hashlib import md5
 import unittest
 import uuid
 import os
 from test.probe.common import ECProbeTest
 from swift.common import direct_client
 from swift.common.storage_policy import EC_POLICY
 from swift.common.manager import Manager
 from swift.common.utils import renamer
 from swiftclient import client
 class Body(object):
    def __init__(self, total=3.5 * 2 ** 20):
        self.total = total
        self.hasher = md5()
        self.size = 0
        self.chunk = 'test' * 16 * 2 ** 10
    @property
    def etag(self):
        return self.hasher.hexdigest()
    def __iter__(self):
        return self
    def next(self):
        if self.size > self.total:
            raise StopIteration()
        self.size += len(self.chunk)
        self.hasher.update(self.chunk)
        return self.chunk
    def __next__(self):
        return self.next()
 class TestReconstructorRevert(ECProbeTest):
    def setUp(self):
        super(TestReconstructorRevert, self).setUp()
        self.container_name = 'container-%s' % uuid.uuid4()
        self.object_name = 'object-%s' % uuid.uuid4()
        # sanity
        self.assertEqual(self.policy.policy_type, EC_POLICY)
        self.reconstructor = Manager(["object-reconstructor"])
    def kill_drive(self, device):
        if os.path.ismount(device):
            os.system('sudo umount %s' % device)
        else:
            renamer(device, device + "X")
    def revive_drive(self, device):
        disabled_name = device + "X"
        if os.path.isdir(disabled_name):
            renamer(device + "X", device)
        else:
            os.system('sudo mount %s' % device)
    def proxy_get(self):
        # GET object
        headers, body = client.get_object(self.url, self.token,
                                          self.container_name,
                                          self.object_name,
                                          resp_chunk_size=64 * 2 ** 10)
        resp_checksum = md5()
        for chunk in body:
            resp_checksum.update(chunk)
        return resp_checksum.hexdigest()
    def direct_get(self, node, part):
        req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)}
        headers, data = direct_client.direct_get_object(
            node, part, self.account, self.container_name,
            self.object_name, headers=req_headers,
            resp_chunk_size=64 * 2 ** 20)
        hasher = md5()
        for chunk in data:
            hasher.update(chunk)
        return hasher.hexdigest()
    def test_revert_object(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)
        # get our node lists
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        hnodes = self.object_ring.get_more_nodes(opart)
        # kill 2 a parity count number of primary nodes so we can
        # force data onto handoffs, we do that by renaming dev dirs
        # to induce 507
        p_dev1 = self.device_dir('object', onodes[0])
        p_dev2 = self.device_dir('object', onodes[1])
        self.kill_drive(p_dev1)
        self.kill_drive(p_dev2)
        # PUT object
        contents = Body()
        headers = {'x-object-meta-foo': 'meta-foo'}
        headers_post = {'x-object-meta-bar': 'meta-bar'}
        client.put_object(self.url, self.token, self.container_name,
                          self.object_name, contents=contents,
                          headers=headers)
        client.post_object(self.url, self.token, self.container_name,
                           self.object_name, headers=headers_post)
        del headers_post['X-Auth-Token']  # WTF, where did this come from?
        # these primaries can't servce the data any more, we expect 507
        # here and not 404 because we're using mount_check to kill nodes
        for onode in (onodes[0], onodes[1]):
            try:
                self.direct_get(onode, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 507)
            else:
                self.fail('Node data on %r was not fully destoryed!' %
                          (onode,))
        # now take out another primary
        p_dev3 = self.device_dir('object', onodes[2])
        self.kill_drive(p_dev3)
        # this node can't servce the data any more
        try:
            self.direct_get(onodes[2], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 507)
        else:
            self.fail('Node data on %r was not fully destoryed!' %
                      (onode,))
        # make sure we can still GET the object and its correct
        # we're now pulling from handoffs and reconstructing
        etag = self.proxy_get()
        self.assertEqual(etag, contents.etag)
        # rename the dev dirs so they don't 507 anymore
        self.revive_drive(p_dev1)
        self.revive_drive(p_dev2)
        self.revive_drive(p_dev3)
        # fire up reconstructor on handoff nodes only
        for hnode in hnodes:
            hnode_id = (hnode['port'] - 6000) / 10
            self.reconstructor.once(number=hnode_id)
        # first threee primaries have data again
        for onode in (onodes[0], onodes[2]):
            self.direct_get(onode, opart)
        # check meta
        meta = client.head_object(self.url, self.token,
                                  self.container_name,
                                  self.object_name)
        for key in headers_post:
            self.assertTrue(key in meta)
            self.assertEqual(meta[key], headers_post[key])
        # handoffs are empty
        for hnode in hnodes:
            try:
                self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                self.assertEqual(err.http_status, 404)
            else:
                self.fail('Node data on %r was not fully destoryed!' %
                          (hnode,))
    def test_delete_propogate(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)
        # get our node lists
        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)
        hnodes = self.object_ring.get_more_nodes(opart)
        p_dev2 = self.device_dir('object', onodes[1])
        # PUT object
        contents = Body()
        client.put_object(self.url, self.token, self.container_name,
                          self.object_name, contents=contents)
        # now lets shut one down
        self.kill_drive(p_dev2)
        # delete on the ones that are left
        client.delete_object(self.url, self.token,
                             self.container_name,
                             self.object_name)
        # spot check a node
        try:
            self.direct_get(onodes[0], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destoryed!' %
                      (onodes[0],))
        # enable the first node again
        self.revive_drive(p_dev2)
        # propogate the delete...
        # fire up reconstructor on handoff nodes only
        for hnode in hnodes:
            hnode_id = (hnode['port'] - 6000) / 10
            self.reconstructor.once(number=hnode_id, override_devices=['sdb8'])
        # check the first node to make sure its gone
        try:
            self.direct_get(onodes[1], opart)
        except direct_client.DirectClientException as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destoryed!' %
                      (onodes[0]))
        # make sure proxy get can't find it
        try:
            self.proxy_get()
        except Exception as err:
            self.assertEqual(err.http_status, 404)
        else:
            self.fail('Node data on %r was not fully destoryed!' %
                      (onodes[0]))
 if __name__ == "__main__":
    unittest.main()
--- a/test/probe/test_replication_servers_working.py
+++ b/test/probe/test_replication_servers_working.py
@ -21,7 +21,6 @@ import time
 import shutil
 from swiftclient import client
 from swift.common.storage_policy import POLICIES
 from swift.obj.diskfile import get_data_dir
 from test.probe.common import ReplProbeTest
@ -88,7 +87,7 @@ class TestReplicatorFunctions(ReplProbeTest):
        # Delete file "hashes.pkl".
        # Check, that all files were replicated.
        path_list = []
-        data_dir = get_data_dir(POLICIES.default.idx)
+        data_dir = get_data_dir(self.policy)
        # Figure out where the devices are
        for node_id in range(1, 5):
            conf = readconf(self.configs['object-server'][node_id])
@ -100,7 +99,9 @@ class TestReplicatorFunctions(ReplProbeTest):
        # Put data to storage nodes
        container = 'container-%s' % uuid4()
-        client.put_container(self.url, self.token, container)
+        client.put_container(self.url, self.token, container,
                             headers={'X-Storage-Policy':
                                      self.policy.name})
        obj = 'object-%s' % uuid4()
        client.put_object(self.url, self.token, container, obj, 'VERIFY')
--- a/test/unit/obj/test_reconstructor.py
+++ b/test/unit/obj/test_reconstructor.py
--- a/test/unit/obj/test_replicator.py
+++ b/test/unit/obj/test_replicator.py
@ -475,8 +475,8 @@ class TestObjectReplicator(unittest.TestCase):
            df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
                                          policy=POLICIES.legacy)
            mkdirs(df._datadir)
-            f = open(os.path.join(df._datadir,
+            ts = normalize_timestamp(time.time())
-                                  normalize_timestamp(time.time()) + '.data'),
+            f = open(os.path.join(df._datadir, ts + '.data'),
                     'wb')
            f.write('1234567890')
            f.close()
@ -487,7 +487,7 @@ class TestObjectReplicator(unittest.TestCase):
            self.assertTrue(os.access(part_path, os.F_OK))
            def _fake_ssync(node, job, suffixes, **kwargs):
-                return True, set([ohash])
+                return True, {ohash: ts}
            self.replicator.sync_method = _fake_ssync
            self.replicator.replicate()
@ -707,8 +707,8 @@ class TestObjectReplicator(unittest.TestCase):
            df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
                                          policy=POLICIES.legacy)
            mkdirs(df._datadir)
-            f = open(os.path.join(df._datadir,
+            ts = normalize_timestamp(time.time())
-                                  normalize_timestamp(time.time()) + '.data'),
+            f = open(os.path.join(df._datadir, ts + '.data'),
                     'wb')
            f.write('0')
            f.close()
@ -723,14 +723,14 @@ class TestObjectReplicator(unittest.TestCase):
            def _fake_ssync(node, job, suffixes, **kwargs):
                success = True
-                ret_val = [whole_path_from]
+                ret_val = {ohash: ts}
                if self.call_nums == 2:
                    # ssync should return (True, []) only when the second
                    # candidate node has not get the replica yet.
                    success = False
-                    ret_val = []
+                    ret_val = {}
                self.call_nums += 1
-                return success, set(ret_val)
+                return success, ret_val
            self.replicator.sync_method = _fake_ssync
            self.replicator.replicate()
@ -755,10 +755,9 @@ class TestObjectReplicator(unittest.TestCase):
                        mock_http_connect(200)):
            df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
                                          policy=POLICIES.legacy)
            ts = normalize_timestamp(time.time())
            mkdirs(df._datadir)
-            f = open(os.path.join(df._datadir,
+            f = open(os.path.join(df._datadir, ts + '.data'), 'wb')
                                  normalize_timestamp(time.time()) + '.data'),
                     'wb')
            f.write('0')
            f.close()
            ohash = hash_path('a', 'c', 'o')
@ -771,14 +770,14 @@ class TestObjectReplicator(unittest.TestCase):
            def _fake_ssync(node, job, suffixes, **kwags):
                success = False
-                ret_val = []
+                ret_val = {}
                if self.call_nums == 2:
                    # ssync should return (True, []) only when the second
                    # candidate node has not get the replica yet.
                    success = True
-                    ret_val = [whole_path_from]
+                    ret_val = {ohash: ts}
                self.call_nums += 1
-                return success, set(ret_val)
+                return success, ret_val
            self.replicator.sync_method = _fake_ssync
            self.replicator.replicate()
@ -805,9 +804,8 @@ class TestObjectReplicator(unittest.TestCase):
            df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
                                          policy=POLICIES.legacy)
            mkdirs(df._datadir)
-            f = open(os.path.join(df._datadir,
+            ts = normalize_timestamp(time.time())
-                                  normalize_timestamp(time.time()) + '.data'),
+            f = open(os.path.join(df._datadir, ts + '.data'), 'wb')
                     'wb')
            f.write('0')
            f.close()
            ohash = hash_path('a', 'c', 'o')
@ -818,16 +816,16 @@ class TestObjectReplicator(unittest.TestCase):
            self.call_nums = 0
            self.conf['sync_method'] = 'ssync'
-            in_sync_objs = []
+            in_sync_objs = {}
            def _fake_ssync(node, job, suffixes, remote_check_objs=None):
                self.call_nums += 1
                if remote_check_objs is None:
                    # sync job
-                    ret_val = [whole_path_from]
+                    ret_val = {ohash: ts}
                else:
                    ret_val = in_sync_objs
-                return True, set(ret_val)
+                return True, ret_val
            self.replicator.sync_method = _fake_ssync
            self.replicator.replicate()
@ -847,9 +845,8 @@ class TestObjectReplicator(unittest.TestCase):
            df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
                                          policy=POLICIES.legacy)
            mkdirs(df._datadir)
-            f = open(os.path.join(df._datadir,
+            ts = normalize_timestamp(time.time())
-                                  normalize_timestamp(time.time()) + '.data'),
+            f = open(os.path.join(df._datadir, ts + '.data'), 'wb')
                     'wb')
            f.write('0')
            f.close()
            ohash = hash_path('a', 'c', 'o')
@ -863,14 +860,14 @@ class TestObjectReplicator(unittest.TestCase):
            def _fake_ssync(node, job, suffixes, **kwargs):
                success = True
-                ret_val = [whole_path_from]
+                ret_val = {ohash: ts}
                if self.call_nums == 2:
                    # ssync should return (True, []) only when the second
                    # candidate node has not get the replica yet.
                    success = False
-                    ret_val = []
+                    ret_val = {}
                self.call_nums += 1
-                return success, set(ret_val)
+                return success, ret_val
            rmdir_func = os.rmdir
--- a/test/unit/obj/test_server.py
+++ b/test/unit/obj/test_server.py
@ -1417,7 +1417,7 @@ class TestObjectController(unittest.TestCase):
        resp = server_handler.OPTIONS(req)
        self.assertEquals(200, resp.status_int)
        for verb in 'OPTIONS GET POST PUT DELETE HEAD REPLICATE \
-                REPLICATION'.split():
+                SSYNC'.split():
            self.assertTrue(
                verb in resp.headers['Allow'].split(', '))
        self.assertEquals(len(resp.headers['Allow'].split(', ')), 8)
@ -4409,9 +4409,9 @@ class TestObjectController(unittest.TestCase):
            resp = req.get_response(self.object_controller)
        self.assertEqual(resp.status_int, 507)
-    def test_REPLICATION_can_be_called(self):
+    def test_SSYNC_can_be_called(self):
        req = Request.blank('/sda1/p/other/suff',
-                            environ={'REQUEST_METHOD': 'REPLICATION'},
+                            environ={'REQUEST_METHOD': 'SSYNC'},
                            headers={})
        resp = req.get_response(self.object_controller)
        self.assertEqual(resp.status_int, 200)
@ -4502,7 +4502,7 @@ class TestObjectController(unittest.TestCase):
    def test_list_allowed_methods(self):
        # Test list of allowed_methods
        obj_methods = ['DELETE', 'PUT', 'HEAD', 'GET', 'POST']
-        repl_methods = ['REPLICATE', 'REPLICATION']
+        repl_methods = ['REPLICATE', 'SSYNC']
        for method_name in obj_methods:
            method = getattr(self.object_controller, method_name)
            self.assertFalse(hasattr(method, 'replication'))
--- a/test/unit/obj/test_ssync_receiver.py
+++ b/test/unit/obj/test_ssync_receiver.py
@ -93,14 +93,14 @@ class TestReceiver(unittest.TestCase):
                lines.append(line)
        return lines
-    def test_REPLICATION_semaphore_locked(self):
+    def test_SSYNC_semaphore_locked(self):
        with mock.patch.object(
                self.controller, 'replication_semaphore') as \
                mocked_replication_semaphore:
            self.controller.logger = mock.MagicMock()
            mocked_replication_semaphore.acquire.return_value = False
            req = swob.Request.blank(
-                '/device/partition', environ={'REQUEST_METHOD': 'REPLICATION'})
+                '/device/partition', environ={'REQUEST_METHOD': 'SSYNC'})
            resp = req.get_response(self.controller)
            self.assertEqual(
                self.body_lines(resp.body),
@ -111,13 +111,13 @@ class TestReceiver(unittest.TestCase):
            self.assertFalse(self.controller.logger.error.called)
            self.assertFalse(self.controller.logger.exception.called)
-    def test_REPLICATION_calls_replication_lock(self):
+    def test_SSYNC_calls_replication_lock(self):
        with mock.patch.object(
                self.controller._diskfile_router[POLICIES.legacy],
                'replication_lock') as mocked_replication_lock:
            req = swob.Request.blank(
                '/sda1/1',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n'
                     ':MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n:UPDATES: END\r\n')
@ -132,7 +132,7 @@ class TestReceiver(unittest.TestCase):
    def test_Receiver_with_default_storage_policy(self):
        req = swob.Request.blank(
            '/sda1/1',
-            environ={'REQUEST_METHOD': 'REPLICATION'},
+            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n'
                 ':MISSING_CHECK: END\r\n'
                 ':UPDATES: START\r\n:UPDATES: END\r\n')
@ -145,9 +145,12 @@ class TestReceiver(unittest.TestCase):
        self.assertEqual(rcvr.policy, POLICIES[0])
    def test_Receiver_with_storage_policy_index_header(self):
        # update router post policy patch
        self.controller._diskfile_router = diskfile.DiskFileRouter(
            self.conf, self.controller.logger)
        req = swob.Request.blank(
            '/sda1/1',
-            environ={'REQUEST_METHOD': 'REPLICATION',
+            environ={'REQUEST_METHOD': 'SSYNC',
                     'HTTP_X_BACKEND_STORAGE_POLICY_INDEX': '1'},
            body=':MISSING_CHECK: START\r\n'
                 ':MISSING_CHECK: END\r\n'
@ -159,6 +162,7 @@ class TestReceiver(unittest.TestCase):
            [':MISSING_CHECK: START', ':MISSING_CHECK: END',
             ':UPDATES: START', ':UPDATES: END'])
        self.assertEqual(rcvr.policy, POLICIES[1])
        self.assertEqual(rcvr.frag_index, None)
    def test_Receiver_with_bad_storage_policy_index_header(self):
        valid_indices = sorted([int(policy) for policy in POLICIES])
@ -166,6 +170,7 @@ class TestReceiver(unittest.TestCase):
        req = swob.Request.blank(
            '/sda1/1',
            environ={'REQUEST_METHOD': 'SSYNC',
                     'HTTP_X_BACKEND_SSYNC_FRAG_INDEX': '0',
                     'HTTP_X_BACKEND_STORAGE_POLICY_INDEX': bad_index},
            body=':MISSING_CHECK: START\r\n'
                 ':MISSING_CHECK: END\r\n'
@ -175,7 +180,29 @@ class TestReceiver(unittest.TestCase):
        body_lines = [chunk.strip() for chunk in receiver() if chunk.strip()]
        self.assertEqual(body_lines, [":ERROR: 503 'No policy with index 2'"])
-    def test_REPLICATION_replication_lock_fail(self):
+    @unit.patch_policies()
    def test_Receiver_with_frag_index_header(self):
        # update router post policy patch
        self.controller._diskfile_router = diskfile.DiskFileRouter(
            self.conf, self.controller.logger)
        req = swob.Request.blank(
            '/sda1/1',
            environ={'REQUEST_METHOD': 'SSYNC',
                     'HTTP_X_BACKEND_SSYNC_FRAG_INDEX': '7',
                     'HTTP_X_BACKEND_STORAGE_POLICY_INDEX': '1'},
            body=':MISSING_CHECK: START\r\n'
                 ':MISSING_CHECK: END\r\n'
                 ':UPDATES: START\r\n:UPDATES: END\r\n')
        rcvr = ssync_receiver.Receiver(self.controller, req)
        body_lines = [chunk.strip() for chunk in rcvr() if chunk.strip()]
        self.assertEqual(
            body_lines,
            [':MISSING_CHECK: START', ':MISSING_CHECK: END',
             ':UPDATES: START', ':UPDATES: END'])
        self.assertEqual(rcvr.policy, POLICIES[1])
        self.assertEqual(rcvr.frag_index, 7)
    def test_SSYNC_replication_lock_fail(self):
        def _mock(path):
            with exceptions.ReplicationLockTimeout(0.01, '/somewhere/' + path):
                eventlet.sleep(0.05)
@ -185,7 +212,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/sda1/1',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n'
                     ':MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n:UPDATES: END\r\n')
@ -194,15 +221,15 @@ class TestReceiver(unittest.TestCase):
                self.body_lines(resp.body),
                [":ERROR: 0 '0.01 seconds: /somewhere/sda1'"])
            self.controller.logger.debug.assert_called_once_with(
-                'None/sda1/1 REPLICATION LOCK TIMEOUT: 0.01 seconds: '
+                'None/sda1/1 SSYNC LOCK TIMEOUT: 0.01 seconds: '
                '/somewhere/sda1')
-    def test_REPLICATION_initial_path(self):
+    def test_SSYNC_initial_path(self):
        with mock.patch.object(
                self.controller, 'replication_semaphore') as \
                mocked_replication_semaphore:
            req = swob.Request.blank(
-                '/device', environ={'REQUEST_METHOD': 'REPLICATION'})
+                '/device', environ={'REQUEST_METHOD': 'SSYNC'})
            resp = req.get_response(self.controller)
            self.assertEqual(
                self.body_lines(resp.body),
@ -215,7 +242,7 @@ class TestReceiver(unittest.TestCase):
                self.controller, 'replication_semaphore') as \
                mocked_replication_semaphore:
            req = swob.Request.blank(
-                '/device/', environ={'REQUEST_METHOD': 'REPLICATION'})
+                '/device/', environ={'REQUEST_METHOD': 'SSYNC'})
            resp = req.get_response(self.controller)
            self.assertEqual(
                self.body_lines(resp.body),
@ -228,7 +255,7 @@ class TestReceiver(unittest.TestCase):
                self.controller, 'replication_semaphore') as \
                mocked_replication_semaphore:
            req = swob.Request.blank(
-                '/device/partition', environ={'REQUEST_METHOD': 'REPLICATION'})
+                '/device/partition', environ={'REQUEST_METHOD': 'SSYNC'})
            resp = req.get_response(self.controller)
            self.assertEqual(
                self.body_lines(resp.body),
@ -242,7 +269,7 @@ class TestReceiver(unittest.TestCase):
                mocked_replication_semaphore:
            req = swob.Request.blank(
                '/device/partition/junk',
-                environ={'REQUEST_METHOD': 'REPLICATION'})
+                environ={'REQUEST_METHOD': 'SSYNC'})
            resp = req.get_response(self.controller)
            self.assertEqual(
                self.body_lines(resp.body),
@ -251,7 +278,7 @@ class TestReceiver(unittest.TestCase):
            self.assertFalse(mocked_replication_semaphore.acquire.called)
            self.assertFalse(mocked_replication_semaphore.release.called)
-    def test_REPLICATION_mount_check(self):
+    def test_SSYNC_mount_check(self):
        with contextlib.nested(
                mock.patch.object(
                    self.controller, 'replication_semaphore'),
@ -264,7 +291,7 @@ class TestReceiver(unittest.TestCase):
                mocked_mount_check,
                mocked_check_mount):
            req = swob.Request.blank(
-                '/device/partition', environ={'REQUEST_METHOD': 'REPLICATION'})
+                '/device/partition', environ={'REQUEST_METHOD': 'SSYNC'})
            resp = req.get_response(self.controller)
            self.assertEqual(
                self.body_lines(resp.body),
@ -284,7 +311,7 @@ class TestReceiver(unittest.TestCase):
                mocked_mount_check,
                mocked_check_mount):
            req = swob.Request.blank(
-                '/device/partition', environ={'REQUEST_METHOD': 'REPLICATION'})
+                '/device/partition', environ={'REQUEST_METHOD': 'SSYNC'})
            resp = req.get_response(self.controller)
            self.assertEqual(
                self.body_lines(resp.body),
@ -299,7 +326,7 @@ class TestReceiver(unittest.TestCase):
            mocked_check_mount.reset_mock()
            mocked_check_mount.return_value = True
            req = swob.Request.blank(
-                '/device/partition', environ={'REQUEST_METHOD': 'REPLICATION'})
+                '/device/partition', environ={'REQUEST_METHOD': 'SSYNC'})
            resp = req.get_response(self.controller)
            self.assertEqual(
                self.body_lines(resp.body),
@ -309,7 +336,7 @@ class TestReceiver(unittest.TestCase):
                self.controller._diskfile_router[POLICIES.legacy].devices,
                'device')
-    def test_REPLICATION_Exception(self):
+    def test_SSYNC_Exception(self):
        class _Wrapper(StringIO.StringIO):
@ -326,7 +353,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\nBad content is here')
            req.remote_addr = '1.2.3.4'
@ -344,7 +371,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger.exception.assert_called_once_with(
                '1.2.3.4/device/partition EXCEPTION in replication.Receiver')
-    def test_REPLICATION_Exception_Exception(self):
+    def test_SSYNC_Exception_Exception(self):
        class _Wrapper(StringIO.StringIO):
@ -361,7 +388,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\nBad content is here')
            req.remote_addr = mock.MagicMock()
@ -404,7 +431,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/sda1/1',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n'
                     'hash ts\r\n'
                     ':MISSING_CHECK: END\r\n'
@ -446,7 +473,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/sda1/1',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n'
                     'hash ts\r\n'
                     ':MISSING_CHECK: END\r\n'
@ -468,7 +495,7 @@ class TestReceiver(unittest.TestCase):
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/sda1/1',
-            environ={'REQUEST_METHOD': 'REPLICATION'},
+            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n'
                 ':MISSING_CHECK: END\r\n'
                 ':UPDATES: START\r\n:UPDATES: END\r\n')
@ -486,7 +513,7 @@ class TestReceiver(unittest.TestCase):
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/sda1/1',
-            environ={'REQUEST_METHOD': 'REPLICATION'},
+            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n' +
                 self.hash1 + ' ' + self.ts1 + '\r\n' +
                 self.hash2 + ' ' + self.ts2 + '\r\n'
@ -504,6 +531,32 @@ class TestReceiver(unittest.TestCase):
        self.assertFalse(self.controller.logger.error.called)
        self.assertFalse(self.controller.logger.exception.called)
    def test_MISSING_CHECK_extra_line_parts(self):
        # check that rx tolerates extra parts in missing check lines to
        # allow for protocol upgrades
        extra_1 = 'extra'
        extra_2 = 'multiple extra parts'
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/sda1/1',
            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n' +
                 self.hash1 + ' ' + self.ts1 + ' ' + extra_1 + '\r\n' +
                 self.hash2 + ' ' + self.ts2 + ' ' + extra_2 + '\r\n'
                 ':MISSING_CHECK: END\r\n'
                 ':UPDATES: START\r\n:UPDATES: END\r\n')
        resp = req.get_response(self.controller)
        self.assertEqual(
            self.body_lines(resp.body),
            [':MISSING_CHECK: START',
             self.hash1,
             self.hash2,
             ':MISSING_CHECK: END',
             ':UPDATES: START', ':UPDATES: END'])
        self.assertEqual(resp.status_int, 200)
        self.assertFalse(self.controller.logger.error.called)
        self.assertFalse(self.controller.logger.exception.called)
    def test_MISSING_CHECK_have_one_exact(self):
        object_dir = utils.storage_directory(
            os.path.join(self.testdir, 'sda1',
@ -519,7 +572,7 @@ class TestReceiver(unittest.TestCase):
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/sda1/1',
-            environ={'REQUEST_METHOD': 'REPLICATION'},
+            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n' +
                 self.hash1 + ' ' + self.ts1 + '\r\n' +
                 self.hash2 + ' ' + self.ts2 + '\r\n'
@ -537,6 +590,9 @@ class TestReceiver(unittest.TestCase):
        self.assertFalse(self.controller.logger.exception.called)
    def test_MISSING_CHECK_storage_policy(self):
        # update router post policy patch
        self.controller._diskfile_router = diskfile.DiskFileRouter(
            self.conf, self.controller.logger)
        object_dir = utils.storage_directory(
            os.path.join(self.testdir, 'sda1',
                         diskfile.get_data_dir(POLICIES[1])),
@ -551,7 +607,7 @@ class TestReceiver(unittest.TestCase):
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/sda1/1',
-            environ={'REQUEST_METHOD': 'REPLICATION',
+            environ={'REQUEST_METHOD': 'SSYNC',
                     'HTTP_X_BACKEND_STORAGE_POLICY_INDEX': '1'},
            body=':MISSING_CHECK: START\r\n' +
                 self.hash1 + ' ' + self.ts1 + '\r\n' +
@ -586,7 +642,7 @@ class TestReceiver(unittest.TestCase):
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/sda1/1',
-            environ={'REQUEST_METHOD': 'REPLICATION'},
+            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n' +
                 self.hash1 + ' ' + self.ts1 + '\r\n' +
                 self.hash2 + ' ' + self.ts2 + '\r\n'
@ -620,7 +676,7 @@ class TestReceiver(unittest.TestCase):
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/sda1/1',
-            environ={'REQUEST_METHOD': 'REPLICATION'},
+            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n' +
                 self.hash1 + ' ' + self.ts1 + '\r\n' +
                 self.hash2 + ' ' + self.ts2 + '\r\n'
@ -662,7 +718,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n'
@ -709,7 +765,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n'
@ -752,7 +808,7 @@ class TestReceiver(unittest.TestCase):
                mock_shutdown_safe, mock_delete):
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n'
@ -774,7 +830,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'bad_subrequest_line\r\n')
@ -793,7 +849,7 @@ class TestReceiver(unittest.TestCase):
                self.controller.logger = mock.MagicMock()
                req = swob.Request.blank(
                    '/device/partition',
-                    environ={'REQUEST_METHOD': 'REPLICATION'},
+                    environ={'REQUEST_METHOD': 'SSYNC'},
                    body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                         ':UPDATES: START\r\n'
                         'DELETE /a/c/o\r\n'
@ -813,7 +869,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n')
@ -830,7 +886,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n'
@ -847,7 +903,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n'
@ -866,7 +922,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'PUT /a/c/o\r\n'
@ -884,7 +940,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n'
@ -902,7 +958,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'PUT /a/c/o\r\n\r\n')
@ -919,7 +975,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'PUT /a/c/o\r\n'
@ -949,7 +1005,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n\r\n'
@ -972,7 +1028,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n\r\n'
@ -998,7 +1054,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n\r\n'
@ -1026,7 +1082,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n\r\n'
@ -1059,7 +1115,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'PUT /a/c/o\r\n'
@ -1096,6 +1152,9 @@ class TestReceiver(unittest.TestCase):
            self.assertEqual(req.read_body, '1')
    def test_UPDATES_with_storage_policy(self):
        # update router post policy patch
        self.controller._diskfile_router = diskfile.DiskFileRouter(
            self.conf, self.controller.logger)
        _PUT_request = [None]
        @server.public
@ -1108,7 +1167,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION',
+                environ={'REQUEST_METHOD': 'SSYNC',
                         'HTTP_X_BACKEND_STORAGE_POLICY_INDEX': '1'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
@ -1157,7 +1216,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'DELETE /a/c/o\r\n'
@ -1192,7 +1251,7 @@ class TestReceiver(unittest.TestCase):
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/device/partition',
-            environ={'REQUEST_METHOD': 'REPLICATION'},
+            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                 ':UPDATES: START\r\n'
                 'BONK /a/c/o\r\n'
@ -1228,7 +1287,7 @@ class TestReceiver(unittest.TestCase):
            self.controller.logger = mock.MagicMock()
            req = swob.Request.blank(
                '/device/partition',
-                environ={'REQUEST_METHOD': 'REPLICATION'},
+                environ={'REQUEST_METHOD': 'SSYNC'},
                body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                     ':UPDATES: START\r\n'
                     'PUT /a/c/o1\r\n'
@ -1339,7 +1398,7 @@ class TestReceiver(unittest.TestCase):
            self.assertEqual(_requests, [])
    def test_UPDATES_subreq_does_not_read_all(self):
-        # This tests that if a REPLICATION subrequest fails and doesn't read
+        # This tests that if a SSYNC subrequest fails and doesn't read
        # all the subrequest body that it will read and throw away the rest of
        # the body before moving on to the next subrequest.
        # If you comment out the part in ssync_receiver where it does:
@ -1368,7 +1427,7 @@ class TestReceiver(unittest.TestCase):
        self.controller.logger = mock.MagicMock()
        req = swob.Request.blank(
            '/device/partition',
-            environ={'REQUEST_METHOD': 'REPLICATION'},
+            environ={'REQUEST_METHOD': 'SSYNC'},
            body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
                 ':UPDATES: START\r\n'
                 'PUT /a/c/o1\r\n'
--- a/test/unit/obj/test_ssync_sender.py
+++ b/test/unit/obj/test_ssync_sender.py
@ -22,18 +22,23 @@ import time
 import unittest
 import eventlet
 import itertools
 import mock
 from swift.common import exceptions, utils
 from swift.common.storage_policy import POLICIES
-from swift.obj import ssync_sender, diskfile
+from swift.common.exceptions import DiskFileNotExist, DiskFileError, \
    DiskFileDeleted
 from swift.common.swob import Request
 from swift.common.utils import Timestamp, FileLikeIter
 from swift.obj import ssync_sender, diskfile, server, ssync_receiver
 from swift.obj.reconstructor import RebuildingECDiskFileStream
 from test.unit import debug_logger, patch_policies
 class FakeReplicator(object):
-
+    def __init__(self, testdir, policy=None):
    def __init__(self, testdir):
        self.logger = debug_logger('test-ssync-sender')
        self.conn_timeout = 1
        self.node_timeout = 2
@ -44,7 +49,9 @@ class FakeReplicator(object):
            'devices': testdir,
            'mount_check': 'false',
        }
-        self._diskfile_mgr = diskfile.DiskFileManager(conf, self.logger)
+        policy = POLICIES.default if policy is None else policy
        self._diskfile_router = diskfile.DiskFileRouter(conf, self.logger)
        self._diskfile_mgr = self._diskfile_router[policy]
 class NullBufferedHTTPConnection(object):
@ -91,42 +98,49 @@ class FakeConnection(object):
        self.closed = True
-@patch_policies()
+class BaseTestSender(unittest.TestCase):
 class TestSender(unittest.TestCase):
    def setUp(self):
        self.tmpdir = tempfile.mkdtemp()
        self.testdir = os.path.join(self.tmpdir, 'tmp_test_ssync_sender')
        utils.mkdirs(os.path.join(self.testdir, 'dev'))
-        self.replicator = FakeReplicator(self.testdir)
+        self.daemon = FakeReplicator(self.testdir)
-        self.sender = ssync_sender.Sender(self.replicator, None, None, None)
+        self.sender = ssync_sender.Sender(self.daemon, None, None, None)
    def tearDown(self):
        shutil.rmtree(self.tmpdir, ignore_errors=True)
    def _make_open_diskfile(self, device='dev', partition='9',
                            account='a', container='c', obj='o', body='test',
-                            extra_metadata=None, policy=None):
+                            extra_metadata=None, policy=None,
                            frag_index=None, timestamp=None, df_mgr=None):
        policy = policy or POLICIES.legacy
        object_parts = account, container, obj
-        req_timestamp = utils.normalize_timestamp(time.time())
+        timestamp = Timestamp(time.time()) if timestamp is None else timestamp
-        df = self.sender.daemon._diskfile_mgr.get_diskfile(
+        if df_mgr is None:
-            device, partition, *object_parts, policy=policy)
+            df_mgr = self.daemon._diskfile_router[policy]
        df = df_mgr.get_diskfile(
            device, partition, *object_parts, policy=policy,
            frag_index=frag_index)
        content_length = len(body)
        etag = hashlib.md5(body).hexdigest()
        with df.create() as writer:
            writer.write(body)
            metadata = {
-                'X-Timestamp': req_timestamp,
+                'X-Timestamp': timestamp.internal,
-                'Content-Length': content_length,
+                'Content-Length': str(content_length),
                'ETag': etag,
            }
            if extra_metadata:
                metadata.update(extra_metadata)
            writer.put(metadata)
            writer.commit(timestamp)
        df.open()
        return df
@patch_policies()
 class TestSender(BaseTestSender):
    def test_call_catches_MessageTimeout(self):
        def connect(self):
@ -139,12 +153,12 @@ class TestSender(unittest.TestCase):
            node = dict(replication_ip='1.2.3.4', replication_port=5678,
                        device='sda1')
            job = dict(partition='9', policy=POLICIES.legacy)
-            self.sender = ssync_sender.Sender(self.replicator, node, job, None)
+            self.sender = ssync_sender.Sender(self.daemon, node, job, None)
            self.sender.suffixes = ['abc']
            success, candidates = self.sender()
            self.assertFalse(success)
-            self.assertEquals(candidates, set())
+            self.assertEquals(candidates, {})
-        error_lines = self.replicator.logger.get_lines_for_level('error')
+        error_lines = self.daemon.logger.get_lines_for_level('error')
        self.assertEqual(1, len(error_lines))
        self.assertEqual('1.2.3.4:5678/sda1/9 1 second: test connect',
                         error_lines[0])
@ -158,12 +172,12 @@ class TestSender(unittest.TestCase):
            node = dict(replication_ip='1.2.3.4', replication_port=5678,
                        device='sda1')
            job = dict(partition='9', policy=POLICIES.legacy)
-            self.sender = ssync_sender.Sender(self.replicator, node, job, None)
+            self.sender = ssync_sender.Sender(self.daemon, node, job, None)
            self.sender.suffixes = ['abc']
            success, candidates = self.sender()
            self.assertFalse(success)
-            self.assertEquals(candidates, set())
+            self.assertEquals(candidates, {})
-        error_lines = self.replicator.logger.get_lines_for_level('error')
+        error_lines = self.daemon.logger.get_lines_for_level('error')
        self.assertEqual(1, len(error_lines))
        self.assertEqual('1.2.3.4:5678/sda1/9 test connect',
                         error_lines[0])
@ -172,26 +186,26 @@ class TestSender(unittest.TestCase):
        node = dict(replication_ip='1.2.3.4', replication_port=5678,
                    device='sda1')
        job = dict(partition='9', policy=POLICIES.legacy)
-        self.sender = ssync_sender.Sender(self.replicator, node, job, None)
+        self.sender = ssync_sender.Sender(self.daemon, node, job, None)
        self.sender.suffixes = ['abc']
        self.sender.connect = 'cause exception'
        success, candidates = self.sender()
        self.assertFalse(success)
-        self.assertEquals(candidates, set())
+        self.assertEquals(candidates, {})
-        error_lines = self.replicator.logger.get_lines_for_level('error')
+        error_lines = self.daemon.logger.get_lines_for_level('error')
        for line in error_lines:
            self.assertTrue(line.startswith(
                '1.2.3.4:5678/sda1/9 EXCEPTION in replication.Sender:'))
    def test_call_catches_exception_handling_exception(self):
        job = node = None  # Will cause inside exception handler to fail
-        self.sender = ssync_sender.Sender(self.replicator, node, job, None)
+        self.sender = ssync_sender.Sender(self.daemon, node, job, None)
        self.sender.suffixes = ['abc']
        self.sender.connect = 'cause exception'
        success, candidates = self.sender()
        self.assertFalse(success)
-        self.assertEquals(candidates, set())
+        self.assertEquals(candidates, {})
-        error_lines = self.replicator.logger.get_lines_for_level('error')
+        error_lines = self.daemon.logger.get_lines_for_level('error')
        for line in error_lines:
            self.assertTrue(line.startswith(
                'EXCEPTION in replication.Sender'))
@ -204,7 +218,7 @@ class TestSender(unittest.TestCase):
        self.sender.disconnect = mock.MagicMock()
        success, candidates = self.sender()
        self.assertTrue(success)
-        self.assertEquals(candidates, set())
+        self.assertEquals(candidates, {})
        self.sender.connect.assert_called_once_with()
        self.sender.missing_check.assert_called_once_with()
        self.sender.updates.assert_called_once_with()
@ -219,7 +233,7 @@ class TestSender(unittest.TestCase):
        self.sender.failures = 1
        success, candidates = self.sender()
        self.assertFalse(success)
-        self.assertEquals(candidates, set())
+        self.assertEquals(candidates, {})
        self.sender.connect.assert_called_once_with()
        self.sender.missing_check.assert_called_once_with()
        self.sender.updates.assert_called_once_with()
@ -229,7 +243,7 @@ class TestSender(unittest.TestCase):
        node = dict(replication_ip='1.2.3.4', replication_port=5678,
                    device='sda1', index=0)
        job = dict(partition='9', policy=POLICIES[1])
-        self.sender = ssync_sender.Sender(self.replicator, node, job, None)
+        self.sender = ssync_sender.Sender(self.daemon, node, job, None)
        self.sender.suffixes = ['abc']
        with mock.patch(
                'swift.obj.ssync_sender.bufferedhttp.BufferedHTTPConnection'
@ -242,11 +256,12 @@ class TestSender(unittest.TestCase):
        mock_conn_class.assert_called_once_with('1.2.3.4:5678')
        expectations = {
            'putrequest': [
-                mock.call('REPLICATION', '/sda1/9'),
+                mock.call('SSYNC', '/sda1/9'),
            ],
            'putheader': [
                mock.call('Transfer-Encoding', 'chunked'),
                mock.call('X-Backend-Storage-Policy-Index', 1),
                mock.call('X-Backend-Ssync-Frag-Index', 0),
            ],
            'endheaders': [mock.call()],
        }
@ -257,6 +272,76 @@ class TestSender(unittest.TestCase):
                                  method_name, mock_method.mock_calls,
                                  expected_calls))
    def test_call(self):
        def patch_sender(sender):
            sender.connect = mock.MagicMock()
            sender.missing_check = mock.MagicMock()
            sender.updates = mock.MagicMock()
            sender.disconnect = mock.MagicMock()
        node = dict(replication_ip='1.2.3.4', replication_port=5678,
                    device='sda1')
        job = {
            'device': 'dev',
            'partition': '9',
            'policy': POLICIES.legacy,
            'frag_index': 0,
        }
        available_map = dict([('9d41d8cd98f00b204e9800998ecf0abc',
                               '1380144470.00000'),
                              ('9d41d8cd98f00b204e9800998ecf0def',
                               '1380144472.22222'),
                              ('9d41d8cd98f00b204e9800998ecf1def',
                               '1380144474.44444')])
        # no suffixes -> no work done
        sender = ssync_sender.Sender(
            self.daemon, node, job, [], remote_check_objs=None)
        patch_sender(sender)
        sender.available_map = available_map
        success, candidates = sender()
        self.assertTrue(success)
        self.assertEqual({}, candidates)
        # all objs in sync
        sender = ssync_sender.Sender(
            self.daemon, node, job, ['ignored'], remote_check_objs=None)
        patch_sender(sender)
        sender.available_map = available_map
        success, candidates = sender()
        self.assertTrue(success)
        self.assertEqual(available_map, candidates)
        # one obj not in sync, sync'ing faked, all objs should be in return set
        wanted = '9d41d8cd98f00b204e9800998ecf0def'
        sender = ssync_sender.Sender(
            self.daemon, node, job, ['ignored'],
            remote_check_objs=None)
        patch_sender(sender)
        sender.send_list = [wanted]
        sender.available_map = available_map
        success, candidates = sender()
        self.assertTrue(success)
        self.assertEqual(available_map, candidates)
        # one obj not in sync, remote check only so that obj is not sync'd
        # and should not be in the return set
        wanted = '9d41d8cd98f00b204e9800998ecf0def'
        remote_check_objs = set(available_map.keys())
        sender = ssync_sender.Sender(
            self.daemon, node, job, ['ignored'],
            remote_check_objs=remote_check_objs)
        patch_sender(sender)
        sender.send_list = [wanted]
        sender.available_map = available_map
        success, candidates = sender()
        self.assertTrue(success)
        expected_map = dict([('9d41d8cd98f00b204e9800998ecf0abc',
                              '1380144470.00000'),
                             ('9d41d8cd98f00b204e9800998ecf1def',
                              '1380144474.44444')])
        self.assertEqual(expected_map, candidates)
    def test_call_and_missing_check(self):
        def yield_hashes(device, partition, policy, suffixes=None, **kwargs):
            if device == 'dev' and partition == '9' and suffixes == ['abc'] \
@ -275,6 +360,7 @@ class TestSender(unittest.TestCase):
            'device': 'dev',
            'partition': '9',
            'policy': POLICIES.legacy,
            'frag_index': 0,
        }
        self.sender.suffixes = ['abc']
        self.sender.response = FakeResponse(
@ -288,7 +374,8 @@ class TestSender(unittest.TestCase):
        self.sender.disconnect = mock.MagicMock()
        success, candidates = self.sender()
        self.assertTrue(success)
-        self.assertEqual(candidates, set(['9d41d8cd98f00b204e9800998ecf0abc']))
+        self.assertEqual(candidates, dict([('9d41d8cd98f00b204e9800998ecf0abc',
                                            '1380144470.00000')]))
        self.assertEqual(self.sender.failures, 0)
    def test_call_and_missing_check_with_obj_list(self):
@ -307,8 +394,9 @@ class TestSender(unittest.TestCase):
            'device': 'dev',
            'partition': '9',
            'policy': POLICIES.legacy,
            'frag_index': 0,
        }
-        self.sender = ssync_sender.Sender(self.replicator, None, job, ['abc'],
+        self.sender = ssync_sender.Sender(self.daemon, None, job, ['abc'],
                                          ['9d41d8cd98f00b204e9800998ecf0abc'])
        self.sender.connection = FakeConnection()
        self.sender.response = FakeResponse(
@ -321,7 +409,8 @@ class TestSender(unittest.TestCase):
        self.sender.disconnect = mock.MagicMock()
        success, candidates = self.sender()
        self.assertTrue(success)
-        self.assertEqual(candidates, set(['9d41d8cd98f00b204e9800998ecf0abc']))
+        self.assertEqual(candidates, dict([('9d41d8cd98f00b204e9800998ecf0abc',
                                            '1380144470.00000')]))
        self.assertEqual(self.sender.failures, 0)
    def test_call_and_missing_check_with_obj_list_but_required(self):
@ -340,8 +429,9 @@ class TestSender(unittest.TestCase):
            'device': 'dev',
            'partition': '9',
            'policy': POLICIES.legacy,
            'frag_index': 0,
        }
-        self.sender = ssync_sender.Sender(self.replicator, None, job, ['abc'],
+        self.sender = ssync_sender.Sender(self.daemon, None, job, ['abc'],
                                          ['9d41d8cd98f00b204e9800998ecf0abc'])
        self.sender.connection = FakeConnection()
        self.sender.response = FakeResponse(
@ -355,14 +445,14 @@ class TestSender(unittest.TestCase):
        self.sender.disconnect = mock.MagicMock()
        success, candidates = self.sender()
        self.assertTrue(success)
-        self.assertEqual(candidates, set())
+        self.assertEqual(candidates, {})
    def test_connect_send_timeout(self):
-        self.replicator.conn_timeout = 0.01
+        self.daemon.conn_timeout = 0.01
        node = dict(replication_ip='1.2.3.4', replication_port=5678,
                    device='sda1')
        job = dict(partition='9', policy=POLICIES.legacy)
-        self.sender = ssync_sender.Sender(self.replicator, node, job, None)
+        self.sender = ssync_sender.Sender(self.daemon, node, job, None)
        self.sender.suffixes = ['abc']
        def putrequest(*args, **kwargs):
@ -373,18 +463,18 @@ class TestSender(unittest.TestCase):
                'putrequest', putrequest):
            success, candidates = self.sender()
            self.assertFalse(success)
-            self.assertEquals(candidates, set())
+            self.assertEquals(candidates, {})
-        error_lines = self.replicator.logger.get_lines_for_level('error')
+        error_lines = self.daemon.logger.get_lines_for_level('error')
        for line in error_lines:
            self.assertTrue(line.startswith(
                '1.2.3.4:5678/sda1/9 0.01 seconds: connect send'))
    def test_connect_receive_timeout(self):
-        self.replicator.node_timeout = 0.02
+        self.daemon.node_timeout = 0.02
        node = dict(replication_ip='1.2.3.4', replication_port=5678,
                    device='sda1', index=0)
        job = dict(partition='9', policy=POLICIES.legacy)
-        self.sender = ssync_sender.Sender(self.replicator, node, job, None)
+        self.sender = ssync_sender.Sender(self.daemon, node, job, None)
        self.sender.suffixes = ['abc']
        class FakeBufferedHTTPConnection(NullBufferedHTTPConnection):
@ -397,18 +487,18 @@ class TestSender(unittest.TestCase):
                FakeBufferedHTTPConnection):
            success, candidates = self.sender()
            self.assertFalse(success)
-            self.assertEquals(candidates, set())
+            self.assertEquals(candidates, {})
-        error_lines = self.replicator.logger.get_lines_for_level('error')
+        error_lines = self.daemon.logger.get_lines_for_level('error')
        for line in error_lines:
            self.assertTrue(line.startswith(
                '1.2.3.4:5678/sda1/9 0.02 seconds: connect receive'))
    def test_connect_bad_status(self):
-        self.replicator.node_timeout = 0.02
+        self.daemon.node_timeout = 0.02
        node = dict(replication_ip='1.2.3.4', replication_port=5678,
-                    device='sda1')
+                    device='sda1', index=0)
        job = dict(partition='9', policy=POLICIES.legacy)
-        self.sender = ssync_sender.Sender(self.replicator, node, job, None)
+        self.sender = ssync_sender.Sender(self.daemon, node, job, None)
        self.sender.suffixes = ['abc']
        class FakeBufferedHTTPConnection(NullBufferedHTTPConnection):
@ -422,8 +512,8 @@ class TestSender(unittest.TestCase):
                FakeBufferedHTTPConnection):
            success, candidates = self.sender()
            self.assertFalse(success)
-            self.assertEquals(candidates, set())
+            self.assertEquals(candidates, {})
-        error_lines = self.replicator.logger.get_lines_for_level('error')
+        error_lines = self.daemon.logger.get_lines_for_level('error')
        for line in error_lines:
            self.assertTrue(line.startswith(
                '1.2.3.4:5678/sda1/9 Expected status 200; got 503'))
@ -434,7 +524,7 @@ class TestSender(unittest.TestCase):
        self.assertEqual(self.sender.response_buffer, 'Okay.')
    def test_readline_buffer_exceeds_network_chunk_size_somehow(self):
-        self.replicator.network_chunk_size = 2
+        self.daemon.network_chunk_size = 2
        self.sender.response_buffer = '1234567890'
        self.assertEqual(self.sender.readline(), '1234567890')
        self.assertEqual(self.sender.response_buffer, '')
@ -514,7 +604,7 @@ class TestSender(unittest.TestCase):
            '17\r\n:MISSING_CHECK: START\r\n\r\n'
            '15\r\n:MISSING_CHECK: END\r\n\r\n')
        self.assertEqual(self.sender.send_list, [])
-        self.assertEqual(self.sender.available_set, set())
+        self.assertEqual(self.sender.available_map, {})
    def test_missing_check_has_suffixes(self):
        def yield_hashes(device, partition, policy, suffixes=None, **kwargs):
@ -562,10 +652,10 @@ class TestSender(unittest.TestCase):
            '33\r\n9d41d8cd98f00b204e9800998ecf1def 1380144474.44444\r\n\r\n'
            '15\r\n:MISSING_CHECK: END\r\n\r\n')
        self.assertEqual(self.sender.send_list, [])
-        candidates = ['9d41d8cd98f00b204e9800998ecf0abc',
+        candidates = [('9d41d8cd98f00b204e9800998ecf0abc', '1380144470.00000'),
-                      '9d41d8cd98f00b204e9800998ecf0def',
+                      ('9d41d8cd98f00b204e9800998ecf0def', '1380144472.22222'),
-                      '9d41d8cd98f00b204e9800998ecf1def']
+                      ('9d41d8cd98f00b204e9800998ecf1def', '1380144474.44444')]
-        self.assertEqual(self.sender.available_set, set(candidates))
+        self.assertEqual(self.sender.available_map, dict(candidates))
    def test_missing_check_far_end_disconnect(self):
        def yield_hashes(device, partition, policy, suffixes=None, **kwargs):
@ -602,8 +692,9 @@ class TestSender(unittest.TestCase):
            '17\r\n:MISSING_CHECK: START\r\n\r\n'
            '33\r\n9d41d8cd98f00b204e9800998ecf0abc 1380144470.00000\r\n\r\n'
            '15\r\n:MISSING_CHECK: END\r\n\r\n')
-        self.assertEqual(self.sender.available_set,
+        self.assertEqual(self.sender.available_map,
-                         set(['9d41d8cd98f00b204e9800998ecf0abc']))
+                         dict([('9d41d8cd98f00b204e9800998ecf0abc',
                                '1380144470.00000')]))
    def test_missing_check_far_end_disconnect2(self):
        def yield_hashes(device, partition, policy, suffixes=None, **kwargs):
@ -641,8 +732,9 @@ class TestSender(unittest.TestCase):
            '17\r\n:MISSING_CHECK: START\r\n\r\n'
            '33\r\n9d41d8cd98f00b204e9800998ecf0abc 1380144470.00000\r\n\r\n'
            '15\r\n:MISSING_CHECK: END\r\n\r\n')
-        self.assertEqual(self.sender.available_set,
+        self.assertEqual(self.sender.available_map,
-                         set(['9d41d8cd98f00b204e9800998ecf0abc']))
+                         dict([('9d41d8cd98f00b204e9800998ecf0abc',
                                '1380144470.00000')]))
    def test_missing_check_far_end_unexpected(self):
        def yield_hashes(device, partition, policy, suffixes=None, **kwargs):
@ -679,8 +771,9 @@ class TestSender(unittest.TestCase):
            '17\r\n:MISSING_CHECK: START\r\n\r\n'
            '33\r\n9d41d8cd98f00b204e9800998ecf0abc 1380144470.00000\r\n\r\n'
            '15\r\n:MISSING_CHECK: END\r\n\r\n')
-        self.assertEqual(self.sender.available_set,
+        self.assertEqual(self.sender.available_map,
-                         set(['9d41d8cd98f00b204e9800998ecf0abc']))
+                         dict([('9d41d8cd98f00b204e9800998ecf0abc',
                                '1380144470.00000')]))
    def test_missing_check_send_list(self):
        def yield_hashes(device, partition, policy, suffixes=None, **kwargs):
@ -717,8 +810,45 @@ class TestSender(unittest.TestCase):
            '33\r\n9d41d8cd98f00b204e9800998ecf0abc 1380144470.00000\r\n\r\n'
            '15\r\n:MISSING_CHECK: END\r\n\r\n')
        self.assertEqual(self.sender.send_list, ['0123abc'])
-        self.assertEqual(self.sender.available_set,
+        self.assertEqual(self.sender.available_map,
-                         set(['9d41d8cd98f00b204e9800998ecf0abc']))
+                         dict([('9d41d8cd98f00b204e9800998ecf0abc',
                                '1380144470.00000')]))
    def test_missing_check_extra_line_parts(self):
        # check that sender tolerates extra parts in missing check
        # line responses to allow for protocol upgrades
        def yield_hashes(device, partition, policy, suffixes=None, **kwargs):
            if (device == 'dev' and partition == '9' and
                    policy == POLICIES.legacy and
                    suffixes == ['abc']):
                yield (
                    '/srv/node/dev/objects/9/abc/'
                    '9d41d8cd98f00b204e9800998ecf0abc',
                    '9d41d8cd98f00b204e9800998ecf0abc',
                    '1380144470.00000')
            else:
                raise Exception(
                    'No match for %r %r %r %r' % (device, partition,
                                                  policy, suffixes))
        self.sender.connection = FakeConnection()
        self.sender.job = {
            'device': 'dev',
            'partition': '9',
            'policy': POLICIES.legacy,
        }
        self.sender.suffixes = ['abc']
        self.sender.response = FakeResponse(
            chunk_body=(
                ':MISSING_CHECK: START\r\n'
                '0123abc extra response parts\r\n'
                ':MISSING_CHECK: END\r\n'))
        self.sender.daemon._diskfile_mgr.yield_hashes = yield_hashes
        self.sender.missing_check()
        self.assertEqual(self.sender.send_list, ['0123abc'])
        self.assertEqual(self.sender.available_map,
                         dict([('9d41d8cd98f00b204e9800998ecf0abc',
                                '1380144470.00000')]))
    def test_updates_timeout(self):
        self.sender.connection = FakeConnection()
@ -790,6 +920,7 @@ class TestSender(unittest.TestCase):
            'device': device,
            'partition': part,
            'policy': POLICIES.legacy,
            'frag_index': 0,
        }
        self.sender.node = {}
        self.sender.send_list = [object_hash]
@ -823,6 +954,7 @@ class TestSender(unittest.TestCase):
            'device': device,
            'partition': part,
            'policy': POLICIES.legacy,
            'frag_index': 0,
        }
        self.sender.node = {}
        self.sender.send_list = [object_hash]
@ -853,6 +985,7 @@ class TestSender(unittest.TestCase):
            'device': device,
            'partition': part,
            'policy': POLICIES.legacy,
            'frag_index': 0,
        }
        self.sender.node = {}
        self.sender.send_list = [object_hash]
@ -1112,5 +1245,466 @@ class TestSender(unittest.TestCase):
        self.assertTrue(self.sender.connection.closed)
@patch_policies(with_ec_default=True)
 class TestSsync(BaseTestSender):
    """
    Test interactions between sender and receiver. The basis for each test is
    actual diskfile state on either side - the connection between sender and
    receiver is faked. Assertions are made about the final state of the sender
    and receiver diskfiles.
    """
    def make_fake_ssync_connect(self, sender, rx_obj_controller, device,
                                partition, policy):
        trace = []
        def add_trace(type, msg):
            # record a protocol event for later analysis
            if msg.strip():
                trace.append((type, msg.strip()))
        def start_response(status, headers, exc_info=None):
            assert(status == '200 OK')
        class FakeConnection:
            def __init__(self, trace):
                self.trace = trace
                self.queue = []
                self.src = FileLikeIter(self.queue)
            def send(self, msg):
                msg = msg.split('\r\n', 1)[1]
                msg = msg.rsplit('\r\n', 1)[0]
                add_trace('tx', msg)
                self.queue.append(msg)
            def close(self):
                pass
        def wrap_gen(gen):
            # Strip response head and tail
            while True:
                try:
                    msg = gen.next()
                    if msg:
                        add_trace('rx', msg)
                        msg = '%x\r\n%s\r\n' % (len(msg), msg)
                        yield msg
                except StopIteration:
                    break
        def fake_connect():
            sender.connection = FakeConnection(trace)
            headers = {'Transfer-Encoding': 'chunked',
                       'X-Backend-Storage-Policy-Index': str(int(policy))}
            env = {'REQUEST_METHOD': 'SSYNC'}
            path = '/%s/%s' % (device, partition)
            req = Request.blank(path, environ=env, headers=headers)
            req.environ['wsgi.input'] = sender.connection.src
            resp = rx_obj_controller(req.environ, start_response)
            wrapped_gen = wrap_gen(resp)
            sender.response = FileLikeIter(wrapped_gen)
            sender.response.fp = sender.response
        return fake_connect
    def setUp(self):
        self.device = 'dev'
        self.partition = '9'
        self.tmpdir = tempfile.mkdtemp()
        # sender side setup
        self.tx_testdir = os.path.join(self.tmpdir, 'tmp_test_ssync_sender')
        utils.mkdirs(os.path.join(self.tx_testdir, self.device))
        self.daemon = FakeReplicator(self.tx_testdir)
        # rx side setup
        self.rx_testdir = os.path.join(self.tmpdir, 'tmp_test_ssync_receiver')
        utils.mkdirs(os.path.join(self.rx_testdir, self.device))
        conf = {
            'devices': self.rx_testdir,
            'mount_check': 'false',
            'replication_one_per_device': 'false',
            'log_requests': 'false'}
        self.rx_controller = server.ObjectController(conf)
        self.orig_ensure_flush = ssync_receiver.Receiver._ensure_flush
        ssync_receiver.Receiver._ensure_flush = lambda *args: ''
        self.ts_iter = (Timestamp(t)
                        for t in itertools.count(int(time.time())))
    def tearDown(self):
        if self.orig_ensure_flush:
            ssync_receiver.Receiver._ensure_flush = self.orig_ensure_flush
        shutil.rmtree(self.tmpdir, ignore_errors=True)
    def _create_ondisk_files(self, df_mgr, obj_name, policy, timestamp,
                             frag_indexes=None):
        frag_indexes = [] if frag_indexes is None else frag_indexes
        metadata = {'Content-Type': 'plain/text'}
        diskfiles = []
        for frag_index in frag_indexes:
            object_data = '/a/c/%s___%s' % (obj_name, frag_index)
            if frag_index is not None:
                metadata['X-Object-Sysmeta-Ec-Frag-Index'] = str(frag_index)
            df = self._make_open_diskfile(
                device=self.device, partition=self.partition, account='a',
                container='c', obj=obj_name, body=object_data,
                extra_metadata=metadata, timestamp=timestamp, policy=policy,
                frag_index=frag_index, df_mgr=df_mgr)
            # sanity checks
            listing = os.listdir(df._datadir)
            self.assertTrue(listing)
            for filename in listing:
                self.assertTrue(filename.startswith(timestamp.internal))
            diskfiles.append(df)
        return diskfiles
    def _open_tx_diskfile(self, obj_name, policy, frag_index=None):
        df_mgr = self.daemon._diskfile_router[policy]
        df = df_mgr.get_diskfile(
            self.device, self.partition, account='a', container='c',
            obj=obj_name, policy=policy, frag_index=frag_index)
        df.open()
        return df
    def _open_rx_diskfile(self, obj_name, policy, frag_index=None):
        df = self.rx_controller.get_diskfile(
            self.device, self.partition, 'a', 'c', obj_name, policy=policy,
            frag_index=frag_index)
        df.open()
        return df
    def _verify_diskfile_sync(self, tx_df, rx_df, frag_index):
        # verify that diskfiles' metadata match
        # sanity check, they are not the same ondisk files!
        self.assertNotEqual(tx_df._datadir, rx_df._datadir)
        rx_metadata = dict(rx_df.get_metadata())
        for k, v in tx_df.get_metadata().iteritems():
            self.assertEqual(v, rx_metadata.pop(k))
        # ugh, ssync duplicates ETag with Etag so have to clear it out here
        if 'Etag' in rx_metadata:
            rx_metadata.pop('Etag')
        self.assertFalse(rx_metadata)
        if frag_index:
            rx_metadata = rx_df.get_metadata()
            fi_key = 'X-Object-Sysmeta-Ec-Frag-Index'
            self.assertTrue(fi_key in rx_metadata)
            self.assertEqual(frag_index, int(rx_metadata[fi_key]))
    def _analyze_trace(self, trace):
        """
        Parse protocol trace captured by fake connection, making some
        assertions along the way, and return results  as a dict of form:
        results = {'tx_missing': <list of messages>,
                   'rx_missing': <list of messages>,
                   'tx_updates': <list of subreqs>,
                   'rx_updates': <list of messages>}
        Each subreq is a dict with keys: 'method', 'path', 'headers', 'body'
        """
        def tx_missing(results, line):
            self.assertEqual('tx', line[0])
            results['tx_missing'].append(line[1])
        def rx_missing(results, line):
            self.assertEqual('rx', line[0])
            parts = line[1].split('\r\n')
            for part in parts:
                results['rx_missing'].append(part)
        def tx_updates(results, line):
            self.assertEqual('tx', line[0])
            subrequests = results['tx_updates']
            if line[1].startswith(('PUT', 'DELETE')):
                parts = line[1].split('\r\n')
                method, path = parts[0].split()
                subreq = {'method': method, 'path': path, 'req': line[1],
                          'headers': parts[1:]}
                subrequests.append(subreq)
            else:
                self.assertTrue(subrequests)
                body = (subrequests[-1]).setdefault('body', '')
                body += line[1]
                subrequests[-1]['body'] = body
        def rx_updates(results, line):
            self.assertEqual('rx', line[0])
            results.setdefault['rx_updates'].append(line[1])
        def unexpected(results, line):
            results.setdefault('unexpected', []).append(line)
        # each trace line is a tuple of ([tx|rx], msg)
        handshakes = iter([(('tx', ':MISSING_CHECK: START'), tx_missing),
                           (('tx', ':MISSING_CHECK: END'), unexpected),
                           (('rx', ':MISSING_CHECK: START'), rx_missing),
                           (('rx', ':MISSING_CHECK: END'), unexpected),
                           (('tx', ':UPDATES: START'), tx_updates),
                           (('tx', ':UPDATES: END'), unexpected),
                           (('rx', ':UPDATES: START'), rx_updates),
                           (('rx', ':UPDATES: END'), unexpected)])
        expect_handshake = handshakes.next()
        phases = ('tx_missing', 'rx_missing', 'tx_updates', 'rx_updates')
        results = dict((k, []) for k in phases)
        handler = unexpected
        lines = list(trace)
        lines.reverse()
        while lines:
            line = lines.pop()
            if line == expect_handshake[0]:
                handler = expect_handshake[1]
                try:
                    expect_handshake = handshakes.next()
                except StopIteration:
                    # should be the last line
                    self.assertFalse(
                        lines, 'Unexpected trailing lines %s' % lines)
                continue
            handler(results, line)
        try:
            # check all handshakes occurred
            missed = handshakes.next()
            self.fail('Handshake %s not found' % str(missed[0]))
        except StopIteration:
            pass
        # check no message outside of a phase
        self.assertFalse(results.get('unexpected'),
                         'Message outside of a phase: %s' % results.get(None))
        return results
    def _verify_ondisk_files(self, tx_objs, policy, rx_node_index):
        # verify tx and rx files that should be in sync
        for o_name, diskfiles in tx_objs.iteritems():
            for tx_df in diskfiles:
                frag_index = tx_df._frag_index
                if frag_index == rx_node_index:
                    # this frag_index should have been sync'd,
                    # check rx file is ok
                    rx_df = self._open_rx_diskfile(o_name, policy, frag_index)
                    self._verify_diskfile_sync(tx_df, rx_df, frag_index)
                    expected_body = '/a/c/%s___%s' % (o_name, rx_node_index)
                    actual_body = ''.join([chunk for chunk in rx_df.reader()])
                    self.assertEqual(expected_body, actual_body)
                else:
                    # this frag_index should not have been sync'd,
                    # check no rx file,
                    self.assertRaises(DiskFileNotExist,
                                      self._open_rx_diskfile,
                                      o_name, policy, frag_index=frag_index)
                # check tx file still intact - ssync does not do any cleanup!
                self._open_tx_diskfile(o_name, policy, frag_index)
    def _verify_tombstones(self, tx_objs, policy):
        # verify tx and rx tombstones that should be in sync
        for o_name, diskfiles in tx_objs.iteritems():
            for tx_df_ in diskfiles:
                try:
                    self._open_tx_diskfile(o_name, policy)
                    self.fail('DiskFileDeleted expected')
                except DiskFileDeleted as exc:
                    tx_delete_time = exc.timestamp
                try:
                    self._open_rx_diskfile(o_name, policy)
                    self.fail('DiskFileDeleted expected')
                except DiskFileDeleted as exc:
                    rx_delete_time = exc.timestamp
                self.assertEqual(tx_delete_time, rx_delete_time)
    def test_handoff_fragment_revert(self):
        # test that a sync_revert type job does send the correct frag archives
        # to the receiver, and that those frag archives are then removed from
        # local node.
        policy = POLICIES.default
        rx_node_index = 0
        tx_node_index = 1
        frag_index = rx_node_index
        # create sender side diskfiles...
        tx_objs = {}
        rx_objs = {}
        tx_tombstones = {}
        tx_df_mgr = self.daemon._diskfile_router[policy]
        rx_df_mgr = self.rx_controller._diskfile_router[policy]
        # o1 has primary and handoff fragment archives
        t1 = self.ts_iter.next()
        tx_objs['o1'] = self._create_ondisk_files(
            tx_df_mgr, 'o1', policy, t1, (rx_node_index, tx_node_index))
        # o2 only has primary
        t2 = self.ts_iter.next()
        tx_objs['o2'] = self._create_ondisk_files(
            tx_df_mgr, 'o2', policy, t2, (tx_node_index,))
        # o3 only has handoff
        t3 = self.ts_iter.next()
        tx_objs['o3'] = self._create_ondisk_files(
            tx_df_mgr, 'o3', policy, t3, (rx_node_index,))
        # o4 primary and handoff fragment archives on tx, handoff in sync on rx
        t4 = self.ts_iter.next()
        tx_objs['o4'] = self._create_ondisk_files(
            tx_df_mgr, 'o4', policy, t4, (tx_node_index, rx_node_index,))
        rx_objs['o4'] = self._create_ondisk_files(
            rx_df_mgr, 'o4', policy, t4, (rx_node_index,))
        # o5 is a tombstone, missing on receiver
        t5 = self.ts_iter.next()
        tx_tombstones['o5'] = self._create_ondisk_files(
            tx_df_mgr, 'o5', policy, t5, (tx_node_index,))
        tx_tombstones['o5'][0].delete(t5)
        suffixes = set()
        for diskfiles in (tx_objs.values() + tx_tombstones.values()):
            for df in diskfiles:
                suffixes.add(os.path.basename(os.path.dirname(df._datadir)))
        # create ssync sender instance...
        job = {'device': self.device,
               'partition': self.partition,
               'policy': policy,
               'frag_index': frag_index,
               'purge': True}
        node = {'index': rx_node_index}
        self.sender = ssync_sender.Sender(self.daemon, node, job, suffixes)
        # fake connection from tx to rx...
        self.sender.connect = self.make_fake_ssync_connect(
            self.sender, self.rx_controller, self.device, self.partition,
            policy)
        # run the sync protocol...
        self.sender()
        # verify protocol
        results = self._analyze_trace(self.sender.connection.trace)
        # sender has handoff frags for o1, o3 and o4 and ts for o5
        self.assertEqual(4, len(results['tx_missing']))
        # receiver is missing frags for o1, o3 and ts for o5
        self.assertEqual(3, len(results['rx_missing']))
        self.assertEqual(3, len(results['tx_updates']))
        self.assertFalse(results['rx_updates'])
        sync_paths = []
        for subreq in results.get('tx_updates'):
            if subreq.get('method') == 'PUT':
                self.assertTrue(
                    'X-Object-Sysmeta-Ec-Frag-Index: %s' % rx_node_index
                    in subreq.get('headers'))
                expected_body = '%s___%s' % (subreq['path'], rx_node_index)
                self.assertEqual(expected_body, subreq['body'])
            elif subreq.get('method') == 'DELETE':
                self.assertEqual('/a/c/o5', subreq['path'])
            sync_paths.append(subreq.get('path'))
        self.assertEqual(['/a/c/o1', '/a/c/o3', '/a/c/o5'], sorted(sync_paths))
        # verify on disk files...
        self._verify_ondisk_files(tx_objs, policy, rx_node_index)
        self._verify_tombstones(tx_tombstones, policy)
    def test_fragment_sync(self):
        # check that a sync_only type job does call reconstructor to build a
        # diskfile to send, and continues making progress despite an error
        # when building one diskfile
        policy = POLICIES.default
        rx_node_index = 0
        tx_node_index = 1
        # for a sync job we iterate over frag index that belongs on local node
        frag_index = tx_node_index
        # create sender side diskfiles...
        tx_objs = {}
        tx_tombstones = {}
        rx_objs = {}
        tx_df_mgr = self.daemon._diskfile_router[policy]
        rx_df_mgr = self.rx_controller._diskfile_router[policy]
        # o1 only has primary
        t1 = self.ts_iter.next()
        tx_objs['o1'] = self._create_ondisk_files(
            tx_df_mgr, 'o1', policy, t1, (tx_node_index,))
        # o2 only has primary
        t2 = self.ts_iter.next()
        tx_objs['o2'] = self._create_ondisk_files(
            tx_df_mgr, 'o2', policy, t2, (tx_node_index,))
        # o3 only has primary
        t3 = self.ts_iter.next()
        tx_objs['o3'] = self._create_ondisk_files(
            tx_df_mgr, 'o3', policy, t3, (tx_node_index,))
        # o4 primary fragment archives on tx, handoff in sync on rx
        t4 = self.ts_iter.next()
        tx_objs['o4'] = self._create_ondisk_files(
            tx_df_mgr, 'o4', policy, t4, (tx_node_index,))
        rx_objs['o4'] = self._create_ondisk_files(
            rx_df_mgr, 'o4', policy, t4, (rx_node_index,))
        # o5 is a tombstone, missing on receiver
        t5 = self.ts_iter.next()
        tx_tombstones['o5'] = self._create_ondisk_files(
            tx_df_mgr, 'o5', policy, t5, (tx_node_index,))
        tx_tombstones['o5'][0].delete(t5)
        suffixes = set()
        for diskfiles in (tx_objs.values() + tx_tombstones.values()):
            for df in diskfiles:
                suffixes.add(os.path.basename(os.path.dirname(df._datadir)))
        reconstruct_fa_calls = []
        def fake_reconstruct_fa(job, node, metadata):
            reconstruct_fa_calls.append((job, node, policy, metadata))
            if len(reconstruct_fa_calls) == 2:
                # simulate second reconstruct failing
                raise DiskFileError
            content = '%s___%s' % (metadata['name'], rx_node_index)
            return RebuildingECDiskFileStream(
                metadata, rx_node_index, iter([content]))
        # create ssync sender instance...
        job = {'device': self.device,
               'partition': self.partition,
               'policy': policy,
               'frag_index': frag_index,
               'sync_diskfile_builder': fake_reconstruct_fa}
        node = {'index': rx_node_index}
        self.sender = ssync_sender.Sender(self.daemon, node, job, suffixes)
        # fake connection from tx to rx...
        self.sender.connect = self.make_fake_ssync_connect(
            self.sender, self.rx_controller, self.device, self.partition,
            policy)
        # run the sync protocol...
        self.sender()
        # verify protocol
        results = self._analyze_trace(self.sender.connection.trace)
        # sender has primary for o1, o2 and o3, o4 and ts for o5
        self.assertEqual(5, len(results['tx_missing']))
        # receiver is missing o1, o2 and o3 and ts for o5
        self.assertEqual(4, len(results['rx_missing']))
        # sender can only construct 2 out of 3 missing frags
        self.assertEqual(3, len(results['tx_updates']))
        self.assertEqual(3, len(reconstruct_fa_calls))
        self.assertFalse(results['rx_updates'])
        actual_sync_paths = []
        for subreq in results.get('tx_updates'):
            if subreq.get('method') == 'PUT':
                self.assertTrue(
                    'X-Object-Sysmeta-Ec-Frag-Index: %s' % rx_node_index
                    in subreq.get('headers'))
                expected_body = '%s___%s' % (subreq['path'], rx_node_index)
                self.assertEqual(expected_body, subreq['body'])
            elif subreq.get('method') == 'DELETE':
                self.assertEqual('/a/c/o5', subreq['path'])
            actual_sync_paths.append(subreq.get('path'))
        # remove the failed df from expected synced df's
        expect_sync_paths = ['/a/c/o1', '/a/c/o2', '/a/c/o3', '/a/c/o5']
        failed_path = reconstruct_fa_calls[1][3]['name']
        expect_sync_paths.remove(failed_path)
        failed_obj = None
        for obj, diskfiles in tx_objs.iteritems():
            if diskfiles[0]._name == failed_path:
                failed_obj = obj
        # sanity check
        self.assertTrue(tx_objs.pop(failed_obj))
        # verify on disk files...
        self.assertEqual(sorted(expect_sync_paths), sorted(actual_sync_paths))
        self._verify_ondisk_files(tx_objs, policy, rx_node_index)
        self._verify_tombstones(tx_tombstones, policy)
 if __name__ == '__main__':
    unittest.main()