diff --git a/doc/source/deployment_guide.rst b/doc/source/deployment_guide.rst index fa404bb300..9843ab5cdb 100644 --- a/doc/source/deployment_guide.rst +++ b/doc/source/deployment_guide.rst @@ -1676,187 +1676,207 @@ ionice_priority None I/O scheduling p [proxy-server] -============================ =============== ===================================== -Option Default Description ----------------------------- --------------- ------------------------------------- -use Entry point for paste.deploy for - the proxy server. For most - cases, this should be - `egg:swift#proxy`. -set log_name proxy-server Label used when logging -set log_facility LOG_LOCAL0 Syslog log facility -set log_level INFO Log level -set log_headers True If True, log headers in each - request -set log_handoffs True If True, the proxy will log - whenever it has to failover to a - handoff node -recheck_account_existence 60 Cache timeout in seconds to - send memcached for account - existence -recheck_container_existence 60 Cache timeout in seconds to - send memcached for container - existence -object_chunk_size 65536 Chunk size to read from - object servers -client_chunk_size 65536 Chunk size to read from - clients -memcache_servers 127.0.0.1:11211 Comma separated list of - memcached servers - ip:port or [ipv6addr]:port -memcache_max_connections 2 Max number of connections to - each memcached server per - worker -node_timeout 10 Request timeout to external - services -recoverable_node_timeout node_timeout Request timeout to external - services for requests that, on - failure, can be recovered - from. For example, object GET. -client_timeout 60 Timeout to read one chunk - from a client -conn_timeout 0.5 Connection timeout to - external services -error_suppression_interval 60 Time in seconds that must - elapse since the last error - for a node to be considered - no longer error limited -error_suppression_limit 10 Error count to consider a - node error limited -allow_account_management false Whether account PUTs and DELETEs - are even callable -object_post_as_copy false Deprecated. -account_autocreate false If set to 'true' authorized - accounts that do not yet exist - within the Swift cluster will - be automatically created. -max_containers_per_account 0 If set to a positive value, - trying to create a container - when the account already has at - least this maximum containers - will result in a 403 Forbidden. - Note: This is a soft limit, - meaning a user might exceed the - cap for - recheck_account_existence before - the 403s kick in. -max_containers_whitelist This is a comma separated list - of account names that ignore - the max_containers_per_account - cap. -rate_limit_after_segment 10 Rate limit the download of - large object segments after - this segment is downloaded. -rate_limit_segments_per_sec 1 Rate limit large object - downloads at this rate. -request_node_count 2 * replicas Set to the number of nodes to - contact for a normal request. - You can use '* replicas' at the - end to have it use the number - given times the number of - replicas for the ring being used - for the request. -swift_owner_headers up to the auth system in use, - but usually indicates - administrative responsibilities. -sorting_method shuffle Storage nodes can be chosen at - random (shuffle), by using timing - measurements (timing), or by using - an explicit match (affinity). - Using timing measurements may allow - for lower overall latency, while - using affinity allows for finer - control. In both the timing and - affinity cases, equally-sorting nodes - are still randomly chosen to spread - load. This option may be overridden - in a per-policy configuration - section. -timing_expiry 300 If the "timing" sorting_method is - used, the timings will only be valid - for the number of seconds configured - by timing_expiry. -concurrent_gets off Use replica count number of - threads concurrently during a - GET/HEAD and return with the - first successful response. In - the EC case, this parameter only - effects an EC HEAD as an EC GET - behaves differently. -concurrency_timeout conn_timeout This parameter controls how long - to wait before firing off the - next concurrent_get thread. A - value of 0 would we fully concurrent - any other number will stagger the - firing of the threads. This number - should be between 0 and node_timeout. - The default is conn_timeout (0.5). -nice_priority None Scheduling priority of server - processes. - Niceness values range from -20 (most - favorable to the process) to 19 (least - favorable to the process). The default - does not modify priority. -ionice_class None I/O scheduling class of server - processes. I/O niceness class values - are IOPRIO_CLASS_RT (realtime), - IOPRIO_CLASS_BE (best-effort), - and IOPRIO_CLASS_IDLE (idle). - The default does not modify class and - priority. Linux supports io scheduling - priorities and classes since 2.6.13 - with the CFQ io scheduler. - Work only with ionice_priority. -ionice_priority None I/O scheduling priority of server - processes. I/O niceness priority is - a number which goes from 0 to 7. - The higher the value, the lower the - I/O priority of the process. Work - only with ionice_class. - Ignored if IOPRIO_CLASS_IDLE is set. -read_affinity None Specifies which backend servers to - prefer on reads; used in conjunction - with the sorting_method option being - set to 'affinity'. Format is a comma - separated list of affinity descriptors - of the form =. - The may be r for - selecting nodes in region N or - rz for selecting nodes in - region N, zone M. The - value should be a whole number - that represents the priority to - be given to the selection; lower - numbers are higher priority. - Default is empty, meaning no - preference. This option may be - overridden in a per-policy - configuration section. -write_affinity None Specifies which backend servers to - prefer on writes. Format is a comma - separated list of affinity - descriptors of the form r for - region N or rz for region N, - zone M. Default is empty, meaning no - preference. This option may be - overridden in a per-policy - configuration section. -write_affinity_node_count 2 * replicas The number of local (as governed by - the write_affinity setting) nodes to - attempt to contact first on writes, - before any non-local ones. The value - should be an integer number, or use - '* replicas' at the end to have it - use the number given times the number - of replicas for the ring being used - for the request. This option may be - overridden in a per-policy - configuration section. -============================ =============== ===================================== +====================================== =============== ===================================== +Option Default Description +-------------------------------------- --------------- ------------------------------------- +use Entry point for paste.deploy for + the proxy server. For most + cases, this should be + `egg:swift#proxy`. +set log_name proxy-server Label used when logging +set log_facility LOG_LOCAL0 Syslog log facility +set log_level INFO Log level +set log_headers True If True, log headers in each + request +set log_handoffs True If True, the proxy will log + whenever it has to failover to a + handoff node +recheck_account_existence 60 Cache timeout in seconds to + send memcached for account + existence +recheck_container_existence 60 Cache timeout in seconds to + send memcached for container + existence +object_chunk_size 65536 Chunk size to read from + object servers +client_chunk_size 65536 Chunk size to read from + clients +memcache_servers 127.0.0.1:11211 Comma separated list of + memcached servers + ip:port or [ipv6addr]:port +memcache_max_connections 2 Max number of connections to + each memcached server per + worker +node_timeout 10 Request timeout to external + services +recoverable_node_timeout node_timeout Request timeout to external + services for requests that, on + failure, can be recovered + from. For example, object GET. +client_timeout 60 Timeout to read one chunk + from a client +conn_timeout 0.5 Connection timeout to + external services +error_suppression_interval 60 Time in seconds that must + elapse since the last error + for a node to be considered + no longer error limited +error_suppression_limit 10 Error count to consider a + node error limited +allow_account_management false Whether account PUTs and DELETEs + are even callable +object_post_as_copy false Deprecated. +account_autocreate false If set to 'true' authorized + accounts that do not yet exist + within the Swift cluster will + be automatically created. +max_containers_per_account 0 If set to a positive value, + trying to create a container + when the account already has at + least this maximum containers + will result in a 403 Forbidden. + Note: This is a soft limit, + meaning a user might exceed the + cap for + recheck_account_existence before + the 403s kick in. +max_containers_whitelist This is a comma separated list + of account names that ignore + the max_containers_per_account + cap. +rate_limit_after_segment 10 Rate limit the download of + large object segments after + this segment is downloaded. +rate_limit_segments_per_sec 1 Rate limit large object + downloads at this rate. +request_node_count 2 * replicas Set to the number of nodes to + contact for a normal request. + You can use '* replicas' at the + end to have it use the number + given times the number of + replicas for the ring being used + for the request. +swift_owner_headers up to the auth system in use, + but usually indicates + administrative responsibilities. +sorting_method shuffle Storage nodes can be chosen at + random (shuffle), by using timing + measurements (timing), or by using + an explicit match (affinity). + Using timing measurements may allow + for lower overall latency, while + using affinity allows for finer + control. In both the timing and + affinity cases, equally-sorting nodes + are still randomly chosen to spread + load. This option may be overridden + in a per-policy configuration + section. +timing_expiry 300 If the "timing" sorting_method is + used, the timings will only be valid + for the number of seconds configured + by timing_expiry. +concurrent_gets off Use replica count number of + threads concurrently during a + GET/HEAD and return with the + first successful response. In + the EC case, this parameter only + effects an EC HEAD as an EC GET + behaves differently. +concurrency_timeout conn_timeout This parameter controls how long + to wait before firing off the + next concurrent_get thread. A + value of 0 would we fully concurrent + any other number will stagger the + firing of the threads. This number + should be between 0 and node_timeout. + The default is conn_timeout (0.5). +nice_priority None Scheduling priority of server + processes. + Niceness values range from -20 (most + favorable to the process) to 19 (least + favorable to the process). The default + does not modify priority. +ionice_class None I/O scheduling class of server + processes. I/O niceness class values + are IOPRIO_CLASS_RT (realtime), + IOPRIO_CLASS_BE (best-effort), + and IOPRIO_CLASS_IDLE (idle). + The default does not modify class and + priority. Linux supports io scheduling + priorities and classes since 2.6.13 + with the CFQ io scheduler. + Work only with ionice_priority. +ionice_priority None I/O scheduling priority of server + processes. I/O niceness priority is + a number which goes from 0 to 7. + The higher the value, the lower the + I/O priority of the process. Work + only with ionice_class. + Ignored if IOPRIO_CLASS_IDLE is set. +read_affinity None Specifies which backend servers to + prefer on reads; used in conjunction + with the sorting_method option being + set to 'affinity'. Format is a comma + separated list of affinity descriptors + of the form =. + The may be r for + selecting nodes in region N or + rz for selecting nodes in + region N, zone M. The + value should be a whole number + that represents the priority to + be given to the selection; lower + numbers are higher priority. + Default is empty, meaning no + preference. This option may be + overridden in a per-policy + configuration section. +write_affinity None Specifies which backend servers to + prefer on writes. Format is a comma + separated list of affinity + descriptors of the form r for + region N or rz for region N, + zone M. Default is empty, meaning no + preference. This option may be + overridden in a per-policy + configuration section. +write_affinity_node_count 2 * replicas The number of local (as governed by + the write_affinity setting) nodes to + attempt to contact first on writes, + before any non-local ones. The value + should be an integer number, or use + '* replicas' at the end to have it + use the number given times the number + of replicas for the ring being used + for the request. This option may be + overridden in a per-policy + configuration section. +write_affinity_handoff_delete_count auto The number of local (as governed by + the write_affinity setting) handoff + nodes to attempt to contact on + deletion, in addition to primary + nodes. Example: in geographically + distributed deployment, If replicas=3, + sometimes there may be 1 primary node + and 2 local handoff nodes in one region + holding the object after uploading but + before object replicated to the + appropriate locations in other regions. + In this case, include these handoff + nodes to send request when deleting + object could help make correct decision + for the response. The default value 'auto' + means Swift will calculate the number + automatically, the default value is + (replicas - len(local_primary_nodes)). + This option may be overridden in a + per-policy configuration section. +====================================== =============== ===================================== .. _proxy_server_per_policy_config: @@ -1871,6 +1891,7 @@ options are: - ``read_affinity`` - ``write_affinity`` - ``write_affinity_node_count`` +- ``write_affinity_handoff_delete_count`` The per-policy config section name must be of the form:: @@ -1900,6 +1921,7 @@ policy with index ``3``:: read_affinity = r2=1 write_affinity = r2 write_affinity_node_count = 1 * replicas + write_affinity_handoff_delete_count = 2 .. note:: diff --git a/doc/source/overview_global_cluster.rst b/doc/source/overview_global_cluster.rst index 5b757b24f2..2f1c40bf88 100644 --- a/doc/source/overview_global_cluster.rst +++ b/doc/source/overview_global_cluster.rst @@ -82,9 +82,9 @@ Note that read_affinity only affects the ordering of primary nodes (see ring docs for definition of primary node), not the ordering of handoff nodes. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -write_affinity and write_affinity_node_count -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~ +write_affinity +~~~~~~~~~~~~~~ This setting makes the proxy server prefer local backend servers for object PUT requests over non-local ones. For example, it may be @@ -97,9 +97,15 @@ the object won't immediately have any replicas in NY. However, replication will move the object's replicas to their proper homes in both SF and NY. -Note that only object PUT requests are affected by the write_affinity -setting; POST, GET, HEAD, DELETE, OPTIONS, and account/container PUT -requests are not affected. +One potential issue with write_affinity is, end user may get 404 error when +deleting objects before replication. The write_affinity_handoff_delete_count +setting is used together with write_affinity in order to solve that issue. +With its default configuration, Swift will calculate the proper number of +handoff nodes to send requests to. + +Note that only object PUT/DELETE requests are affected by the write_affinity +setting; POST, GET, HEAD, OPTIONS, and account/container PUT requests are +not affected. This setting lets you trade data distribution for throughput. If write_affinity is enabled, then object replicas will initially be diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index 73fce00ae0..ea1620a54c 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -236,6 +236,20 @@ use = egg:swift#proxy # This option may be overridden in a per-policy configuration section. # write_affinity_node_count = 2 * replicas # +# The number of local (as governed by the write_affinity setting) handoff nodes +# to attempt to contact on deletion, in addition to primary nodes. +# +# Example: in geographically distributed deployment of 2 regions, If +# replicas=3, sometimes there may be 1 primary node and 2 local handoff nodes +# in one region holding the object after uploading but before object replicated +# to the appropriate locations in other regions. In this case, include these +# handoff nodes to send request when deleting object could help make correct +# decision for the response. The default value 'auto' means Swift will +# calculate the number automatically, the default value is +# (replicas - len(local_primary_nodes)). This option may be overridden in a +# per-policy configuration section. +# write_affinity_handoff_delete_count = auto +# # These are the headers whose values will only be shown to swift_owners. The # exact definition of a swift_owner is up to the auth system in use, but # usually indicates administrative responsibilities. @@ -264,6 +278,7 @@ use = egg:swift#proxy # read_affinity = # write_affinity = # write_affinity_node_count = +# write_affinity_handoff_delete_count = [filter:tempauth] use = egg:swift#tempauth diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py index 25a43f8b51..d625dc34e6 100644 --- a/swift/proxy/controllers/base.py +++ b/swift/proxy/controllers/base.py @@ -1596,7 +1596,8 @@ class Controller(object): {'method': method, 'path': path}) def make_requests(self, req, ring, part, method, path, headers, - query_string='', overrides=None): + query_string='', overrides=None, node_count=None, + node_iterator=None): """ Sends an HTTP request to multiple nodes and aggregates the results. It attempts the primary nodes concurrently, then iterates over the @@ -1613,11 +1614,16 @@ class Controller(object): :param query_string: optional query string to send to the backend :param overrides: optional return status override map used to override the returned status of a request. + :param node_count: optional number of nodes to send request to. + :param node_iterator: optional node iterator. :returns: a swob.Response object """ - start_nodes = ring.get_part_nodes(part) - nodes = GreenthreadSafeIterator(self.app.iter_nodes(ring, part)) - pile = GreenAsyncPile(len(start_nodes)) + nodes = GreenthreadSafeIterator( + node_iterator or self.app.iter_nodes(ring, part) + ) + node_number = node_count or len(ring.get_part_nodes(part)) + pile = GreenAsyncPile(node_number) + for head in headers: pile.spawn(self._make_request, nodes, part, method, path, head, query_string, self.app.logger.thread_locals) @@ -1628,7 +1634,7 @@ class Controller(object): continue response.append(resp) statuses.append(resp[0]) - if self.have_quorum(statuses, len(start_nodes)): + if self.have_quorum(statuses, node_number): break # give any pending requests *some* chance to finish finished_quickly = pile.waitall(self.app.post_quorum_timeout) @@ -1637,7 +1643,7 @@ class Controller(object): continue response.append(resp) statuses.append(resp[0]) - while len(response) < len(start_nodes): + while len(response) < node_number: response.append((HTTP_SERVICE_UNAVAILABLE, '', '', '')) statuses, reasons, resp_headers, bodies = zip(*response) return self.best_response(req, statuses, reasons, bodies, diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index 125fe62c13..9f6cf37ecd 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -128,7 +128,8 @@ class BaseObjectController(Controller): self.container_name = unquote(container_name) self.object_name = unquote(object_name) - def iter_nodes_local_first(self, ring, partition, policy=None): + def iter_nodes_local_first(self, ring, partition, policy=None, + local_handoffs_first=False): """ Yields nodes for a ring partition. @@ -141,6 +142,9 @@ class BaseObjectController(Controller): :param ring: ring to get nodes from :param partition: ring partition to yield nodes for + :param policy: optional, an instance of :class:`BaseStoragePolicy + :param local_handoffs_first: optional, if True prefer primaries and + local handoff nodes first before looking elsewhere. """ policy_options = self.app.get_policy_options(policy) is_local = policy_options.write_affinity_is_local_fn @@ -148,23 +152,38 @@ class BaseObjectController(Controller): return self.app.iter_nodes(ring, partition, policy=policy) primary_nodes = ring.get_part_nodes(partition) - num_locals = policy_options.write_affinity_node_count_fn( - len(primary_nodes)) + handoff_nodes = ring.get_more_nodes(partition) + all_nodes = itertools.chain(primary_nodes, handoff_nodes) - all_nodes = itertools.chain(primary_nodes, - ring.get_more_nodes(partition)) - first_n_local_nodes = list(itertools.islice( - (node for node in all_nodes if is_local(node)), num_locals)) + if local_handoffs_first: + num_locals = policy_options.write_affinity_handoff_delete_count + if num_locals is None: + local_primaries = [node for node in primary_nodes + if is_local(node)] + num_locals = len(primary_nodes) - len(local_primaries) - # refresh it; it moved when we computed first_n_local_nodes - all_nodes = itertools.chain(primary_nodes, - ring.get_more_nodes(partition)) - local_first_node_iter = itertools.chain( - first_n_local_nodes, - (node for node in all_nodes if node not in first_n_local_nodes)) + first_local_handoffs = list(itertools.islice( + (node for node in handoff_nodes if is_local(node)), num_locals) + ) + preferred_nodes = primary_nodes + first_local_handoffs + else: + num_locals = policy_options.write_affinity_node_count_fn( + len(primary_nodes) + ) + preferred_nodes = list(itertools.islice( + (node for node in all_nodes if is_local(node)), num_locals) + ) + # refresh it; it moved when we computed preferred_nodes + handoff_nodes = ring.get_more_nodes(partition) + all_nodes = itertools.chain(primary_nodes, handoff_nodes) - return self.app.iter_nodes( - ring, partition, node_iter=local_first_node_iter, policy=policy) + node_iter = itertools.chain( + preferred_nodes, + (node for node in all_nodes if node not in preferred_nodes) + ) + + return self.app.iter_nodes(ring, partition, node_iter=node_iter, + policy=policy) def GETorHEAD(self, req): """Handle HTTP GET or HEAD requests.""" @@ -592,10 +611,12 @@ class BaseObjectController(Controller): raise NotImplementedError() def _delete_object(self, req, obj_ring, partition, headers): - """ - send object DELETE request to storage nodes. Subclasses of - the BaseObjectController can provide their own implementation - of this method. + """Delete object considering write-affinity. + + When deleting object in write affinity deployment, also take configured + handoff nodes number into consideration, instead of just sending + requests to primary nodes. Otherwise (write-affinity is disabled), + go with the same way as before. :param req: the DELETE Request :param obj_ring: the object ring @@ -603,11 +624,37 @@ class BaseObjectController(Controller): :param headers: system headers to storage nodes :return: Response object """ - # When deleting objects treat a 404 status as 204. + policy_index = req.headers.get('X-Backend-Storage-Policy-Index') + policy = POLICIES.get_by_index(policy_index) + + node_count = None + node_iterator = None + + policy_options = self.app.get_policy_options(policy) + is_local = policy_options.write_affinity_is_local_fn + if is_local is not None: + primaries = obj_ring.get_part_nodes(partition) + node_count = len(primaries) + + local_handoffs = policy_options.write_affinity_handoff_delete_count + if local_handoffs is None: + local_primaries = [node for node in primaries + if is_local(node)] + local_handoffs = len(primaries) - len(local_primaries) + + node_count += local_handoffs + + node_iterator = self.iter_nodes_local_first( + obj_ring, partition, policy=policy, local_handoffs_first=True + ) + status_overrides = {404: 204} resp = self.make_requests(req, obj_ring, partition, 'DELETE', req.swift_entity_path, - headers, overrides=status_overrides) + headers, overrides=status_overrides, + node_count=node_count, + node_iterator=node_iterator) + return resp def _post_object(self, req, obj_ring, partition, headers): @@ -734,8 +781,20 @@ class BaseObjectController(Controller): else: req.headers['X-Timestamp'] = Timestamp(time.time()).internal + # Include local handoff nodes if write-affinity is enabled. + node_count = len(nodes) + policy = POLICIES.get_by_index(policy_index) + policy_options = self.app.get_policy_options(policy) + is_local = policy_options.write_affinity_is_local_fn + if is_local is not None: + local_handoffs = policy_options.write_affinity_handoff_delete_count + if local_handoffs is None: + local_primaries = [node for node in nodes if is_local(node)] + local_handoffs = len(nodes) - len(local_primaries) + node_count += local_handoffs + headers = self._backend_requests( - req, len(nodes), container_partition, container_nodes) + req, node_count, container_partition, container_nodes) return self._delete_object(req, obj_ring, partition, headers) diff --git a/swift/proxy/server.py b/swift/proxy/server.py index 3e1d3e1ea9..ec728d3875 100644 --- a/swift/proxy/server.py +++ b/swift/proxy/server.py @@ -35,7 +35,7 @@ from swift.common.ring import Ring from swift.common.utils import cache_from_env, get_logger, \ get_remote_client, split_path, config_true_value, generate_trans_id, \ affinity_key_function, affinity_locality_predicate, list_from_csv, \ - register_swift_info, readconf + register_swift_info, readconf, config_auto_int_value from swift.common.constraints import check_utf8, valid_api_version from swift.proxy.controllers import AccountController, ContainerController, \ ObjectControllerRouter, InfoController @@ -130,13 +130,18 @@ class ProxyOverrideOptions(object): 'Invalid write_affinity_node_count value: %r' % (' '.join(value))) + self.write_affinity_handoff_delete_count = config_auto_int_value( + get('write_affinity_handoff_delete_count', 'auto'), None + ) + def __repr__(self): return '%s({}, {%s})' % (self.__class__.__name__, ', '.join( '%r: %r' % (k, getattr(self, k)) for k in ( 'sorting_method', 'read_affinity', 'write_affinity', - 'write_affinity_node_count'))) + 'write_affinity_node_count', + 'write_affinity_handoff_delete_count'))) def __eq__(self, other): if not isinstance(other, ProxyOverrideOptions): @@ -145,7 +150,8 @@ class ProxyOverrideOptions(object): 'sorting_method', 'read_affinity', 'write_affinity', - 'write_affinity_node_count')) + 'write_affinity_node_count', + 'write_affinity_handoff_delete_count')) class Application(object): diff --git a/test/unit/proxy/controllers/test_obj.py b/test/unit/proxy/controllers/test_obj.py index e91c104cd3..29b348ba4d 100644 --- a/test/unit/proxy/controllers/test_obj.py +++ b/test/unit/proxy/controllers/test_obj.py @@ -279,6 +279,86 @@ class BaseObjectControllerMixin(object): self.assertEqual(len(all_nodes), len(local_first_nodes)) self.assertEqual(sorted(all_nodes), sorted(local_first_nodes)) + def test_iter_nodes_local_handoff_first_noops_when_no_affinity(self): + # this test needs a stable node order - most don't + self.app.sort_nodes = lambda l, *args, **kwargs: l + controller = self.controller_cls( + self.app, 'a', 'c', 'o') + policy = self.policy + self.app.get_policy_options(policy).write_affinity_is_local_fn = None + object_ring = policy.object_ring + all_nodes = object_ring.get_part_nodes(1) + all_nodes.extend(object_ring.get_more_nodes(1)) + + local_first_nodes = list(controller.iter_nodes_local_first( + object_ring, 1, local_handoffs_first=True)) + + self.maxDiff = None + + self.assertEqual(all_nodes, local_first_nodes) + + def test_iter_nodes_handoff_local_first_default(self): + controller = self.controller_cls( + self.app, 'a', 'c', 'o') + policy_conf = self.app.get_policy_options(self.policy) + policy_conf.write_affinity_is_local_fn = ( + lambda node: node['region'] == 1) + + object_ring = self.policy.object_ring + primary_nodes = object_ring.get_part_nodes(1) + handoff_nodes_iter = object_ring.get_more_nodes(1) + all_nodes = primary_nodes + list(handoff_nodes_iter) + handoff_nodes_iter = object_ring.get_more_nodes(1) + local_handoffs = [n for n in handoff_nodes_iter if + policy_conf.write_affinity_is_local_fn(n)] + + prefered_nodes = list(controller.iter_nodes_local_first( + object_ring, 1, local_handoffs_first=True)) + + self.assertEqual(len(all_nodes), self.replicas() + + POLICIES.default.object_ring.max_more_nodes) + + first_primary_nodes = prefered_nodes[:len(primary_nodes)] + self.assertEqual(sorted(primary_nodes), sorted(first_primary_nodes)) + + handoff_count = self.replicas() - len(primary_nodes) + first_handoffs = prefered_nodes[len(primary_nodes):][:handoff_count] + self.assertEqual(first_handoffs, local_handoffs[:handoff_count]) + + def test_iter_nodes_handoff_local_first_non_default(self): + # Obviously this test doesn't work if we're testing 1 replica. + # In that case, we don't have any failovers to check. + if self.replicas() == 1: + return + + controller = self.controller_cls( + self.app, 'a', 'c', 'o') + policy_conf = self.app.get_policy_options(self.policy) + policy_conf.write_affinity_is_local_fn = ( + lambda node: node['region'] == 1) + policy_conf.write_affinity_handoff_delete_count = 1 + + object_ring = self.policy.object_ring + primary_nodes = object_ring.get_part_nodes(1) + handoff_nodes_iter = object_ring.get_more_nodes(1) + all_nodes = primary_nodes + list(handoff_nodes_iter) + handoff_nodes_iter = object_ring.get_more_nodes(1) + local_handoffs = [n for n in handoff_nodes_iter if + policy_conf.write_affinity_is_local_fn(n)] + + prefered_nodes = list(controller.iter_nodes_local_first( + object_ring, 1, local_handoffs_first=True)) + + self.assertEqual(len(all_nodes), self.replicas() + + POLICIES.default.object_ring.max_more_nodes) + + first_primary_nodes = prefered_nodes[:len(primary_nodes)] + self.assertEqual(sorted(primary_nodes), sorted(first_primary_nodes)) + + handoff_count = policy_conf.write_affinity_handoff_delete_count + first_handoffs = prefered_nodes[len(primary_nodes):][:handoff_count] + self.assertEqual(first_handoffs, local_handoffs[:handoff_count]) + def test_connect_put_node_timeout(self): controller = self.controller_cls( self.app, 'a', 'c', 'o') @@ -369,6 +449,36 @@ class BaseObjectControllerMixin(object): resp = req.get_response(self.app) self.assertEqual(resp.status_int, 204) + def test_DELETE_write_affinity_before_replication(self): + policy_conf = self.app.get_policy_options(self.policy) + policy_conf.write_affinity_handoff_delete_count = self.replicas() / 2 + policy_conf.write_affinity_is_local_fn = ( + lambda node: node['region'] == 1) + handoff_count = policy_conf.write_affinity_handoff_delete_count + + req = swift.common.swob.Request.blank('/v1/a/c/o', method='DELETE') + codes = [204] * self.replicas() + [404] * handoff_count + with set_http_connect(*codes): + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 204) + + def test_DELETE_write_affinity_after_replication(self): + policy_conf = self.app.get_policy_options(self.policy) + policy_conf.write_affinity_handoff_delete_count = self.replicas() / 2 + policy_conf.write_affinity_is_local_fn = ( + lambda node: node['region'] == 1) + handoff_count = policy_conf.write_affinity_handoff_delete_count + + req = swift.common.swob.Request.blank('/v1/a/c/o', method='DELETE') + codes = ([204] * (self.replicas() - handoff_count) + + [404] * handoff_count + + [204] * handoff_count) + with set_http_connect(*codes): + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 204) + def test_POST_non_int_delete_after(self): t = str(int(time.time() + 100)) + '.1' req = swob.Request.blank('/v1/a/c/o', method='POST', diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index d5da66a6ba..0b38658130 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -1366,16 +1366,19 @@ class TestProxyServerConfigLoading(unittest.TestCase): read_affinity = r1=100 write_affinity = r1 write_affinity_node_count = 1 * replicas + write_affinity_handoff_delete_count = 4 """ expected_default = {"read_affinity": "", "sorting_method": "shuffle", "write_affinity": "", - "write_affinity_node_count_fn": 6} + "write_affinity_node_count_fn": 6, + "write_affinity_handoff_delete_count": None} exp_options = {None: expected_default, POLICIES[0]: {"read_affinity": "r1=100", "sorting_method": "affinity", "write_affinity": "r1", - "write_affinity_node_count_fn": 3}, + "write_affinity_node_count_fn": 3, + "write_affinity_handoff_delete_count": 4}, POLICIES[1]: expected_default} exp_is_local = {POLICIES[0]: [({'region': 1, 'zone': 2}, True), ({'region': 2, 'zone': 1}, False)], @@ -1387,7 +1390,8 @@ class TestProxyServerConfigLoading(unittest.TestCase): self.assertEqual( "ProxyOverrideOptions({}, {'sorting_method': 'shuffle', " "'read_affinity': '', 'write_affinity': '', " - "'write_affinity_node_count': '2 * replicas'})", + "'write_affinity_node_count': '2 * replicas', " + "'write_affinity_handoff_delete_count': None})", repr(default_options)) self.assertEqual(default_options, eval(repr(default_options), { 'ProxyOverrideOptions': default_options.__class__})) @@ -1396,7 +1400,8 @@ class TestProxyServerConfigLoading(unittest.TestCase): self.assertEqual( "ProxyOverrideOptions({}, {'sorting_method': 'affinity', " "'read_affinity': 'r1=100', 'write_affinity': 'r1', " - "'write_affinity_node_count': '1 * replicas'})", + "'write_affinity_node_count': '1 * replicas', " + "'write_affinity_handoff_delete_count': 4})", repr(policy_0_options)) self.assertEqual(policy_0_options, eval(repr(policy_0_options), { 'ProxyOverrideOptions': policy_0_options.__class__})) @@ -1411,6 +1416,7 @@ class TestProxyServerConfigLoading(unittest.TestCase): use = egg:swift#proxy sorting_method = affinity write_affinity_node_count = 1 * replicas + write_affinity_handoff_delete_count = 3 [proxy-server:policy:0] read_affinity = r1=100 @@ -1419,12 +1425,14 @@ class TestProxyServerConfigLoading(unittest.TestCase): expected_default = {"read_affinity": "", "sorting_method": "affinity", "write_affinity": "", - "write_affinity_node_count_fn": 3} + "write_affinity_node_count_fn": 3, + "write_affinity_handoff_delete_count": 3} exp_options = {None: expected_default, POLICIES[0]: {"read_affinity": "r1=100", "sorting_method": "affinity", "write_affinity": "r1", - "write_affinity_node_count_fn": 3}, + "write_affinity_node_count_fn": 3, + "write_affinity_handoff_delete_count": 3}, POLICIES[1]: expected_default} exp_is_local = {POLICIES[0]: [({'region': 1, 'zone': 2}, True), ({'region': 2, 'zone': 1}, False)], @@ -1440,29 +1448,35 @@ class TestProxyServerConfigLoading(unittest.TestCase): read_affinity = r2=10 write_affinity_node_count = 1 * replicas write_affinity = r2 + write_affinity_handoff_delete_count = 2 [proxy-server:policy:0] read_affinity = r1=100 write_affinity = r1 write_affinity_node_count = 5 + write_affinity_handoff_delete_count = 3 [proxy-server:policy:1] read_affinity = r1=1 write_affinity = r3 write_affinity_node_count = 4 + write_affinity_handoff_delete_count = 4 """ exp_options = {None: {"read_affinity": "r2=10", "sorting_method": "affinity", "write_affinity": "r2", - "write_affinity_node_count_fn": 3}, + "write_affinity_node_count_fn": 3, + "write_affinity_handoff_delete_count": 2}, POLICIES[0]: {"read_affinity": "r1=100", "sorting_method": "affinity", "write_affinity": "r1", - "write_affinity_node_count_fn": 5}, + "write_affinity_node_count_fn": 5, + "write_affinity_handoff_delete_count": 3}, POLICIES[1]: {"read_affinity": "r1=1", "sorting_method": "affinity", "write_affinity": "r3", - "write_affinity_node_count_fn": 4}} + "write_affinity_node_count_fn": 4, + "write_affinity_handoff_delete_count": 4}} exp_is_local = {POLICIES[0]: [({'region': 1, 'zone': 2}, True), ({'region': 2, 'zone': 1}, False)], POLICIES[1]: [({'region': 3, 'zone': 2}, True), @@ -1533,18 +1547,21 @@ class TestProxyServerConfigLoading(unittest.TestCase): None: {"read_affinity": "r1=100", "sorting_method": "shuffle", "write_affinity": "r0", - "write_affinity_node_count_fn": 6}, + "write_affinity_node_count_fn": 6, + "write_affinity_handoff_delete_count": None}, # policy 0 read affinity is r2, dictated by policy 0 section POLICIES[0]: {"read_affinity": "r2=100", "sorting_method": "affinity", "write_affinity": "r2", - "write_affinity_node_count_fn": 6}, + "write_affinity_node_count_fn": 6, + "write_affinity_handoff_delete_count": None}, # policy 1 read_affinity is r0, dictated by DEFAULT section, # overrides proxy server section POLICIES[1]: {"read_affinity": "r0=100", "sorting_method": "affinity", "write_affinity": "r0", - "write_affinity_node_count_fn": 6}} + "write_affinity_node_count_fn": 6, + "write_affinity_handoff_delete_count": None}} exp_is_local = { # default write_affinity is r0, dictated by DEFAULT section None: [({'region': 0, 'zone': 2}, True),