Merge "Add the max_clients parameter to bound clients"

2013-04-30 05:57:10 +00:00 · 2013-04-30 05:57:10 +00:00 · 52a6595033
commit 52a6595033
parent 407e08fa30 2d42b37303
10 changed files with 122 additions and 26 deletions
--- a/doc/manpages/account-server.conf.5
+++ b/doc/manpages/account-server.conf.5
@ -114,6 +114,12 @@ Logging level. The default is INFO.
 Enables request logging. The default is True.
 .IP "\fB set log_address\fR
 Logging address. The default is /dev/log.
+.IP \fBmax_clients\fR
+Maximum number of clients one worker can process simultaneously (it will
+actually accept(2) N + 1). Setting this to one (1) will only handle one request
+at a time, without accepting another request concurrently. By increasing the
+number of workers to a much higher value, one can reduce the impact of slow file system
+operations in one request from negatively impacting other requests. The default is 1024.
 .RE
 .PD

@ -253,5 +259,3 @@ and

 .SH "SEE ALSO"
 .BR swift-account-server(1),
-
-
--- a/doc/manpages/container-server.conf.5
+++ b/doc/manpages/container-server.conf.5
@ -120,6 +120,12 @@ Logging address. The default is /dev/log.
 Request timeout to external services. The default is 3 seconds.
 .IP \fBconn_timeout\fR
 Connection timeout to external services. The default is 0.5 seconds.
+.IP \fBmax_clients\fR
+Maximum number of clients one worker can process simultaneously (it will
+actually accept(2) N + 1). Setting this to one (1) will only handle one request
+at a time, without accepting another request concurrently. By increasing the
+number of workers to a much higher value, one can reduce the impact of slow file system
+operations in one request from negatively impacting other requests. The default is 1024.
 .RE
 .PD

@ -278,5 +284,3 @@ and

 .SH "SEE ALSO"
 .BR swift-container-server(1),
-
-
--- a/doc/manpages/object-server.conf.5
+++ b/doc/manpages/object-server.conf.5
@ -120,6 +120,12 @@ Logging address. The default is /dev/log.
 Request timeout to external services. The default is 3 seconds.
 .IP \fBconn_timeout\fR
 Connection timeout to external services. The default is 0.5 seconds.
+.IP \fBmax_clients\fR
+Maximum number of clients one worker can process simultaneously (it will
+actually accept(2) N + 1). Setting this to one (1) will only handle one request
+at a time, without accepting another request concurrently. By increasing the
+number of workers to a much higher value, one can reduce the impact of slow file system
+operations in one request from negatively impacting other requests. The default is 1024.
 .RE
 .PD

@ -270,5 +276,3 @@ and

 .SH "SEE ALSO"
 .BR swift-object-server(1),
-
-
--- a/doc/manpages/proxy-server.conf.5
+++ b/doc/manpages/proxy-server.conf.5
@ -537,6 +537,12 @@ object.  The default is 10 segments.
 .IP \fBrate_limit_segments_per_sec\fR
 Once segment rate-limiting kicks in for an object, limit segments served to N
 per second.  The default is 1.
+.IP \fBmax_clients\fR
+Maximum number of clients one worker can process simultaneously (it will
+actually accept(2) N + 1). Setting this to one (1) will only handle one request
+at a time, without accepting another request concurrently. By increasing the
+number of workers to a much higher value, one can reduce the impact of slow file system
+operations in one request from negatively impacting other requests. The default is 1024.
 .RE
 .PD

--- a/doc/source/deployment_guide.rst
+++ b/doc/source/deployment_guide.rst
@ -57,7 +57,7 @@ Web Front End Options
 ---------------------

 Swift comes with an integral web front end. However, it can also be deployed
-as a request processor of an Apache2 using mod_wsgi as described in 
+as a request processor of an Apache2 using mod_wsgi as described in
 :doc:`Apache Deployment Guide <apache_deployment_guide>`.

 .. _ring-preparing:
@ -241,6 +241,16 @@ bind_ip              0.0.0.0     IP Address for server to bind to
 bind_port            6000        Port for server to bind to
 bind_timeout         30          Seconds to attempt bind before giving up
 workers              1           Number of workers to fork
+max_clients          1024        Maximum number of clients one worker can
+                                 process simultaneously (it will actually
+                                 accept(2) N + 1). Setting this to one (1)
+                                 will only handle one request at a time,
+                                 without accepting another request
+                                 concurrently. By increasing the number of
+                                 workers to a much higher value, one can
+                                 reduce the impact of slow file system
+                                 operations in one request from negatively
+                                 impacting other requests.
 disable_fallocate    false       Disable "fast fail" fallocate checks if the
                                 underlying filesystem does not support it.
 log_custom_handlers  None        Comma-separated list of functions to call
@ -358,6 +368,16 @@ bind_ip              0.0.0.0     IP Address for server to bind to
 bind_port            6001        Port for server to bind to
 bind_timeout         30          Seconds to attempt bind before giving up
 workers              1           Number of workers to fork
+max_clients          1024        Maximum number of clients one worker can
+                                 process simultaneously (it will actually
+                                 accept(2) N + 1). Setting this to one (1)
+                                 will only handle one request at a time,
+                                 without accepting another request
+                                 concurrently. By increasing the number of
+                                 workers to a much higher value, one can
+                                 reduce the impact of slow file system
+                                 operations in one request from negatively
+                                 impacting other requests.
 user                 swift       User to run as
 disable_fallocate    false       Disable "fast fail" fallocate checks if the
                                 underlying filesystem does not support it.
@ -440,7 +460,7 @@ log_name               container-auditor  Label used when logging
 log_facility           LOG_LOCAL0         Syslog log facility
 log_level              INFO               Logging level
 interval               1800               Minimum time for a pass to take
-containers_per_second  200                Maximum containers audited per second. 
+containers_per_second  200                Maximum containers audited per second.
                                          Should be tuned according to individual
                                          system specs. 0 is unlimited.
 =====================  =================  =======================================
@ -468,6 +488,16 @@ bind_ip              0.0.0.0     IP Address for server to bind to
 bind_port            6002        Port for server to bind to
 bind_timeout         30          Seconds to attempt bind before giving up
 workers              1           Number of workers to fork
+max_clients          1024        Maximum number of clients one worker can
+                                 process simultaneously (it will actually
+                                 accept(2) N + 1). Setting this to one (1)
+                                 will only handle one request at a time,
+                                 without accepting another request
+                                 concurrently. By increasing the number of
+                                 workers to a much higher value, one can
+                                 reduce the impact of slow file system
+                                 operations in one request from negatively
+                                 impacting other requests.
 user                 swift       User to run as
 db_preallocation     off         If you don't mind the extra disk space usage in
                                 overhead, you can turn this on to preallocate
@ -527,9 +557,9 @@ log_name              account-auditor  Label used when logging
 log_facility          LOG_LOCAL0       Syslog log facility
 log_level             INFO             Logging level
 interval              1800             Minimum time for a pass to take
-accounts_per_second   200              Maximum accounts audited per second. 
+accounts_per_second   200              Maximum accounts audited per second.
                                       Should be tuned according to individual
-                                       system specs. 0 is unlimited. 
+                                       system specs. 0 is unlimited.
 ====================  ===============  =======================================

 [account-reaper]
@ -572,6 +602,21 @@ bind_timeout                  30               Seconds to attempt bind before
                                               giving up
 swift_dir                     /etc/swift       Swift configuration directory
 workers                       1                Number of workers to fork
+max_clients                   1024             Maximum number of clients one
+                                               worker can process
+                                               simultaneously (it will
+                                               actually accept(2) N +
+                                               1). Setting this to one (1)
+                                               will only handle one request at
+                                               a time, without accepting
+                                               another request
+                                               concurrently. By increasing the
+                                               number of workers to a much
+                                               higher value, one can reduce
+                                               the impact of slow file system
+                                               operations in one request from
+                                               negatively impacting other
+                                               requests.
 user                          swift            User to run as
 cert_file                                      Path to the ssl .crt. This
                                               should be enabled for testing
@ -580,9 +625,9 @@ key_file                                       Path to the ssl .key. This
                                               should be enabled for testing
                                               purposes only.
 cors_allow_origin                              This is a list of hosts that
-                                               are included with any CORS 
-                                               request by default and 
-                                               returned with the 
+                                               are included with any CORS
+                                               request by default and
+                                               returned with the
                                               Access-Control-Allow-Origin
                                               header in addition to what
                                               the container has set.
@ -785,12 +830,12 @@ should also be monitored to ensure that the times do not vary too much.
 General Service Tuning
 ----------------------

-Most services support either a worker or concurrency value in the settings.
-This allows the services to make effective use of the cores available. A good
-starting point to set the concurrency level for the proxy and storage services
-to 2 times the number of cores available. If more than one service is
-sharing a server, then some experimentation may be needed to find the best
-balance.
+Most services support either a `worker` or `concurrency` value in the
+settings.  This allows the services to make effective use of the cores
+available. A good starting point to set the concurrency level for the proxy
+and storage services to 2 times the number of cores available. If more than
+one service is sharing a server, then some experimentation may be needed to
+find the best balance.

 At Rackspace, our Proxy servers have dual quad core processors, giving us 8
 cores. Our testing has shown 16 workers to be a pretty good balance when
@ -798,9 +843,21 @@ saturating a 10g network and gives good CPU utilization.

 Our Storage servers all run together on the same servers. These servers have
 dual quad core processors, for 8 cores total. We run the Account, Container,
-and Object servers with 8 workers each. Most of the background jobs are run
-at a concurrency of 1, with the exception of the replicators which are run at
-a concurrency of 2.
+and Object servers with 8 workers each. Most of the background jobs are run at
+a concurrency of 1, with the exception of the replicators which are run at a
+concurrency of 2.
+
+The `max_clients` parameter can be used to adjust the number of client
+requests an individual worker accepts for processing. The fewer requests being
+processed at one time, the less likely a request that consumes the worker's
+CPU time, or blocks in the OS, will negatively impact other requests. The more
+requests being processed at one time, the more likely one worker can utilize
+network and disk capacity.
+
+On systems that have more cores, and more memory, where one can afford to run
+more workers, raising the number of workers and lowering the maximum number of
+clients serviced per worker can lessen the impact of CPU intensive or stalled
+requests.

 The above configuration setting should be taken as suggestions and testing
 of configuration settings should be done to ensure best utilization of CPU,
--- a/etc/account-server.conf-sample
+++ b/etc/account-server.conf-sample
@ -47,6 +47,7 @@ use = egg:swift#account
 # set log_requests = True
 # set log_address = /dev/log
 # auto_create_account_prefix = .
+# max_clients = 1024

 [filter:healthcheck]
 use = egg:swift#healthcheck
--- a/etc/container-server.conf-sample
+++ b/etc/container-server.conf-sample
@ -53,6 +53,7 @@ use = egg:swift#container
 # conn_timeout = 0.5
 # allow_versions = False
 # auto_create_account_prefix = .
+# max_clients = 1024

 [filter:healthcheck]
 use = egg:swift#healthcheck
--- a/etc/object-server.conf-sample
+++ b/etc/object-server.conf-sample
@ -62,6 +62,7 @@ use = egg:swift#object
 # Content-Type, etag, Content-Length, or deleted
 # allowed_headers = Content-Disposition, Content-Encoding, X-Delete-At, X-Object-Manifest, X-Static-Large-Object
 # auto_create_account_prefix = .
+# max_clients = 1024

 [filter:healthcheck]
 use = egg:swift#healthcheck
--- a/etc/proxy-server.conf-sample
+++ b/etc/proxy-server.conf-sample
@ -37,6 +37,7 @@
 # Use a comma separated list of full url (http://foo.bar:1234,https://foo.bar)
 # cors_allow_origin =
 # eventlet_debug = false
+# max_clients = 1024

 [pipeline:main]
 pipeline = catch_errors healthcheck proxy-logging cache slo ratelimit tempauth container-quotas account-quotas proxy-logging proxy-server
--- a/swift/common/wsgi.py
+++ b/swift/common/wsgi.py
@ -105,6 +105,22 @@ def get_socket(conf, default_port=8080):
    return sock


+class RestrictedGreenPool(GreenPool):
+    """
+    Works the same as GreenPool, but if the size is specified as one, then the
+    spawn_n() method will invoke waitall() before returning to prevent the
+    caller from doing any other work (like calling accept()).
+    """
+    def __init__(self, size=1024):
+        super(RestrictedGreenPool, self).__init__(size=size)
+        self._rgp_do_wait = (size == 1)
+
+    def spawn_n(self, *args, **kwargs):
+        super(RestrictedGreenPool, self).spawn_n(*args, **kwargs)
+        if self._rgp_do_wait:
+            self.waitall()
+
+
 # TODO: pull pieces of this out to test
 def run_wsgi(conf_file, app_section, *args, **kwargs):
    """
@ -132,7 +148,7 @@ def run_wsgi(conf_file, app_section, *args, **kwargs):
    # redirect errors to logger and close stdio
    capture_stdio(logger)

-    def run_server():
+    def run_server(max_clients):
        wsgi.HttpProtocol.default_request_version = "HTTP/1.0"
        # Turn off logging requests by the underlying WSGI software.
        wsgi.HttpProtocol.log_request = lambda *a: None
@ -147,7 +163,7 @@ def run_wsgi(conf_file, app_section, *args, **kwargs):
        eventlet.debug.hub_exceptions(eventlet_debug)
        app = loadapp('config:%s' % conf_file,
                      global_conf={'log_name': log_name})
-        pool = GreenPool(size=1024)
+        pool = RestrictedGreenPool(size=max_clients)
        try:
            wsgi.server(sock, app, NullLogger(), custom_pool=pool)
        except socket.error, err:
@ -155,10 +171,11 @@ def run_wsgi(conf_file, app_section, *args, **kwargs):
                raise
        pool.waitall()

+    max_clients = int(conf.get('max_clients', '1024'))
    worker_count = int(conf.get('workers', '1'))
    # Useful for profiling [no forks].
    if worker_count == 0:
-        run_server()
+        run_server(max_clients)
        return

    def kill_children(*args):
@ -184,7 +201,7 @@ def run_wsgi(conf_file, app_section, *args, **kwargs):
            if pid == 0:
                signal.signal(signal.SIGHUP, signal.SIG_DFL)
                signal.signal(signal.SIGTERM, signal.SIG_DFL)
-                run_server()
+                run_server(max_clients)
                logger.notice('Child %d exiting normally' % os.getpid())
                return
            else: