Merge "Add databases_per_second to db daemons"
This commit is contained in:
commit
35c5f666de
@ -1173,94 +1173,98 @@ ionice_priority None I/O scheduling priority of ser
|
||||
[container-replicator]
|
||||
**********************
|
||||
|
||||
================== =========================== =============================
|
||||
Option Default Description
|
||||
------------------ --------------------------- -----------------------------
|
||||
log_name container-replicator Label used when logging
|
||||
log_facility LOG_LOCAL0 Syslog log facility
|
||||
log_level INFO Logging level
|
||||
log_address /dev/log Logging directory
|
||||
per_diff 1000 Maximum number of database
|
||||
rows that will be sync'd in a
|
||||
single HTTP replication
|
||||
request. Databases with less
|
||||
than or equal to this number
|
||||
of differing rows will always
|
||||
be sync'd using an HTTP
|
||||
replication request rather
|
||||
than using rsync.
|
||||
max_diffs 100 Maximum number of HTTP
|
||||
replication requests attempted
|
||||
on each replication pass for
|
||||
any one container. This caps
|
||||
how long the replicator will
|
||||
spend trying to sync a given
|
||||
database per pass so the other
|
||||
databases don't get starved.
|
||||
concurrency 8 Number of replication workers
|
||||
to spawn
|
||||
interval 30 Time in seconds to wait
|
||||
between replication passes
|
||||
node_timeout 10 Request timeout to external
|
||||
services
|
||||
conn_timeout 0.5 Connection timeout to external
|
||||
services
|
||||
reclaim_age 604800 Time elapsed in seconds before
|
||||
a container can be reclaimed
|
||||
rsync_module {replication_ip}::container Format of the rsync module
|
||||
where the replicator will send
|
||||
data. The configuration value
|
||||
can include some variables
|
||||
that will be extracted from
|
||||
the ring. Variables must
|
||||
follow the format {NAME} where
|
||||
NAME is one of: ip, port,
|
||||
replication_ip,
|
||||
replication_port, region,
|
||||
zone, device, meta. See
|
||||
etc/rsyncd.conf-sample for
|
||||
some examples.
|
||||
rsync_compress no Allow rsync to compress data
|
||||
which is transmitted to
|
||||
destination node during sync.
|
||||
However, this is applicable
|
||||
only when destination node is
|
||||
in a different region than the
|
||||
local one. NOTE: Objects that
|
||||
are already compressed (for
|
||||
example: .tar.gz, mp3) might
|
||||
slow down the syncing process.
|
||||
recon_cache_path /var/cache/swift Path to recon cache
|
||||
nice_priority None Scheduling priority of server
|
||||
processes. Niceness values
|
||||
range from -20 (most favorable
|
||||
to the process) to 19 (least
|
||||
favorable to the process).
|
||||
The default does not modify
|
||||
priority.
|
||||
ionice_class None I/O scheduling class of server
|
||||
processes. I/O niceness class
|
||||
values are
|
||||
IOPRIO_CLASS_RT (realtime),
|
||||
IOPRIO_CLASS_BE (best-effort),
|
||||
and IOPRIO_CLASS_IDLE (idle).
|
||||
The default does not modify
|
||||
class and priority. Linux
|
||||
supports io scheduling
|
||||
priorities and classes since
|
||||
2.6.13 with the CFQ io
|
||||
scheduler.
|
||||
Work only with ionice_priority.
|
||||
ionice_priority None I/O scheduling priority of
|
||||
server processes. I/O niceness
|
||||
priority is a number which goes
|
||||
from 0 to 7.
|
||||
The higher the value, the lower
|
||||
the I/O priority of the process.
|
||||
Work only with ionice_class.
|
||||
Ignored if IOPRIO_CLASS_IDLE
|
||||
is set.
|
||||
================== =========================== =============================
|
||||
==================== =========================== =============================
|
||||
Option Default Description
|
||||
-------------------- --------------------------- -----------------------------
|
||||
log_name container-replicator Label used when logging
|
||||
log_facility LOG_LOCAL0 Syslog log facility
|
||||
log_level INFO Logging level
|
||||
log_address /dev/log Logging directory
|
||||
per_diff 1000 Maximum number of database
|
||||
rows that will be sync'd in a
|
||||
single HTTP replication
|
||||
request. Databases with less
|
||||
than or equal to this number
|
||||
of differing rows will always
|
||||
be sync'd using an HTTP
|
||||
replication request rather
|
||||
than using rsync.
|
||||
max_diffs 100 Maximum number of HTTP
|
||||
replication requests attempted
|
||||
on each replication pass for
|
||||
any one container. This caps
|
||||
how long the replicator will
|
||||
spend trying to sync a given
|
||||
database per pass so the other
|
||||
databases don't get starved.
|
||||
concurrency 8 Number of replication workers
|
||||
to spawn
|
||||
interval 30 Time in seconds to wait
|
||||
between replication passes
|
||||
databases_per_second 50 Maximum databases to process
|
||||
per second. Should be tuned
|
||||
according to individual
|
||||
system specs. 0 is unlimited.
|
||||
node_timeout 10 Request timeout to external
|
||||
services
|
||||
conn_timeout 0.5 Connection timeout to external
|
||||
services
|
||||
reclaim_age 604800 Time elapsed in seconds before
|
||||
a container can be reclaimed
|
||||
rsync_module {replication_ip}::container Format of the rsync module
|
||||
where the replicator will send
|
||||
data. The configuration value
|
||||
can include some variables
|
||||
that will be extracted from
|
||||
the ring. Variables must
|
||||
follow the format {NAME} where
|
||||
NAME is one of: ip, port,
|
||||
replication_ip,
|
||||
replication_port, region,
|
||||
zone, device, meta. See
|
||||
etc/rsyncd.conf-sample for
|
||||
some examples.
|
||||
rsync_compress no Allow rsync to compress data
|
||||
which is transmitted to
|
||||
destination node during sync.
|
||||
However, this is applicable
|
||||
only when destination node is
|
||||
in a different region than the
|
||||
local one. NOTE: Objects that
|
||||
are already compressed (for
|
||||
example: .tar.gz, mp3) might
|
||||
slow down the syncing process.
|
||||
recon_cache_path /var/cache/swift Path to recon cache
|
||||
nice_priority None Scheduling priority of server
|
||||
processes. Niceness values
|
||||
range from -20 (most favorable
|
||||
to the process) to 19 (least
|
||||
favorable to the process).
|
||||
The default does not modify
|
||||
priority.
|
||||
ionice_class None I/O scheduling class of server
|
||||
processes. I/O niceness class
|
||||
values are
|
||||
IOPRIO_CLASS_RT (realtime),
|
||||
IOPRIO_CLASS_BE (best-effort),
|
||||
and IOPRIO_CLASS_IDLE (idle).
|
||||
The default does not modify
|
||||
class and priority. Linux
|
||||
supports io scheduling
|
||||
priorities and classes since
|
||||
2.6.13 with the CFQ io
|
||||
scheduler.
|
||||
Work only with ionice_priority.
|
||||
ionice_priority None I/O scheduling priority of
|
||||
server processes. I/O niceness
|
||||
priority is a number which goes
|
||||
from 0 to 7.
|
||||
The higher the value, the lower
|
||||
the I/O priority of the process.
|
||||
Work only with ionice_class.
|
||||
Ignored if IOPRIO_CLASS_IDLE
|
||||
is set.
|
||||
==================== =========================== =============================
|
||||
|
||||
*******************
|
||||
[container-updater]
|
||||
@ -1524,89 +1528,93 @@ ionice_priority None I/O scheduling priority of server
|
||||
[account-replicator]
|
||||
********************
|
||||
|
||||
================== ========================= ===============================
|
||||
Option Default Description
|
||||
------------------ ------------------------- -------------------------------
|
||||
log_name account-replicator Label used when logging
|
||||
log_facility LOG_LOCAL0 Syslog log facility
|
||||
log_level INFO Logging level
|
||||
log_address /dev/log Logging directory
|
||||
per_diff 1000 Maximum number of database rows
|
||||
that will be sync'd in a single
|
||||
HTTP replication request.
|
||||
Databases with less than or
|
||||
equal to this number of
|
||||
differing rows will always be
|
||||
sync'd using an HTTP replication
|
||||
request rather than using rsync.
|
||||
max_diffs 100 Maximum number of HTTP
|
||||
replication requests attempted
|
||||
on each replication pass for any
|
||||
one container. This caps how
|
||||
long the replicator will spend
|
||||
trying to sync a given database
|
||||
per pass so the other databases
|
||||
don't get starved.
|
||||
concurrency 8 Number of replication workers
|
||||
to spawn
|
||||
interval 30 Time in seconds to wait between
|
||||
replication passes
|
||||
node_timeout 10 Request timeout to external
|
||||
services
|
||||
conn_timeout 0.5 Connection timeout to external
|
||||
services
|
||||
reclaim_age 604800 Time elapsed in seconds before
|
||||
an account can be reclaimed
|
||||
rsync_module {replication_ip}::account Format of the rsync module where
|
||||
the replicator will send data.
|
||||
The configuration value can
|
||||
include some variables that will
|
||||
be extracted from the ring.
|
||||
Variables must follow the format
|
||||
{NAME} where NAME is one of: ip,
|
||||
port, replication_ip,
|
||||
replication_port, region, zone,
|
||||
device, meta. See
|
||||
etc/rsyncd.conf-sample for some
|
||||
examples.
|
||||
rsync_compress no Allow rsync to compress data
|
||||
which is transmitted to
|
||||
destination node during sync.
|
||||
However, this is applicable only
|
||||
when destination node is in a
|
||||
different region than the local
|
||||
one. NOTE: Objects that are
|
||||
already compressed (for example:
|
||||
.tar.gz, mp3) might slow down
|
||||
the syncing process.
|
||||
recon_cache_path /var/cache/swift Path to recon cache
|
||||
nice_priority None Scheduling priority of server
|
||||
processes. Niceness values
|
||||
range from -20 (most favorable
|
||||
to the process) to 19 (least
|
||||
favorable to the process).
|
||||
The default does not modify
|
||||
priority.
|
||||
ionice_class None I/O scheduling class of server
|
||||
processes. I/O niceness class
|
||||
values are IOPRIO_CLASS_RT
|
||||
(realtime), IOPRIO_CLASS_BE
|
||||
(best-effort), and IOPRIO_CLASS_IDLE
|
||||
(idle).
|
||||
The default does not modify
|
||||
class and priority. Linux supports
|
||||
io scheduling priorities and classes
|
||||
since 2.6.13 with the CFQ io scheduler.
|
||||
Work only with ionice_priority.
|
||||
ionice_priority None I/O scheduling priority of server
|
||||
processes. I/O niceness priority
|
||||
is a number which goes from 0 to 7.
|
||||
The higher the value, the lower
|
||||
the I/O priority of the process.
|
||||
Work only with ionice_class.
|
||||
Ignored if IOPRIO_CLASS_IDLE
|
||||
is set.
|
||||
================== ========================= ===============================
|
||||
==================== ========================= ===============================
|
||||
Option Default Description
|
||||
-------------------- ------------------------- -------------------------------
|
||||
log_name account-replicator Label used when logging
|
||||
log_facility LOG_LOCAL0 Syslog log facility
|
||||
log_level INFO Logging level
|
||||
log_address /dev/log Logging directory
|
||||
per_diff 1000 Maximum number of database rows
|
||||
that will be sync'd in a single
|
||||
HTTP replication request.
|
||||
Databases with less than or
|
||||
equal to this number of
|
||||
differing rows will always be
|
||||
sync'd using an HTTP replication
|
||||
request rather than using rsync.
|
||||
max_diffs 100 Maximum number of HTTP
|
||||
replication requests attempted
|
||||
on each replication pass for any
|
||||
one container. This caps how
|
||||
long the replicator will spend
|
||||
trying to sync a given database
|
||||
per pass so the other databases
|
||||
don't get starved.
|
||||
concurrency 8 Number of replication workers
|
||||
to spawn
|
||||
interval 30 Time in seconds to wait between
|
||||
replication passes
|
||||
databases_per_second 50 Maximum databases to process
|
||||
per second. Should be tuned
|
||||
according to individual
|
||||
system specs. 0 is unlimited.
|
||||
node_timeout 10 Request timeout to external
|
||||
services
|
||||
conn_timeout 0.5 Connection timeout to external
|
||||
services
|
||||
reclaim_age 604800 Time elapsed in seconds before
|
||||
an account can be reclaimed
|
||||
rsync_module {replication_ip}::account Format of the rsync module where
|
||||
the replicator will send data.
|
||||
The configuration value can
|
||||
include some variables that will
|
||||
be extracted from the ring.
|
||||
Variables must follow the format
|
||||
{NAME} where NAME is one of: ip,
|
||||
port, replication_ip,
|
||||
replication_port, region, zone,
|
||||
device, meta. See
|
||||
etc/rsyncd.conf-sample for some
|
||||
examples.
|
||||
rsync_compress no Allow rsync to compress data
|
||||
which is transmitted to
|
||||
destination node during sync.
|
||||
However, this is applicable only
|
||||
when destination node is in a
|
||||
different region than the local
|
||||
one. NOTE: Objects that are
|
||||
already compressed (for example:
|
||||
.tar.gz, mp3) might slow down
|
||||
the syncing process.
|
||||
recon_cache_path /var/cache/swift Path to recon cache
|
||||
nice_priority None Scheduling priority of server
|
||||
processes. Niceness values
|
||||
range from -20 (most favorable
|
||||
to the process) to 19 (least
|
||||
favorable to the process).
|
||||
The default does not modify
|
||||
priority.
|
||||
ionice_class None I/O scheduling class of server
|
||||
processes. I/O niceness class
|
||||
values are IOPRIO_CLASS_RT
|
||||
(realtime), IOPRIO_CLASS_BE
|
||||
(best-effort), and IOPRIO_CLASS_IDLE
|
||||
(idle).
|
||||
The default does not modify
|
||||
class and priority. Linux supports
|
||||
io scheduling priorities and classes
|
||||
since 2.6.13 with the CFQ io scheduler.
|
||||
Work only with ionice_priority.
|
||||
ionice_priority None I/O scheduling priority of server
|
||||
processes. I/O niceness priority
|
||||
is a number which goes from 0 to 7.
|
||||
The higher the value, the lower
|
||||
the I/O priority of the process.
|
||||
Work only with ionice_class.
|
||||
Ignored if IOPRIO_CLASS_IDLE
|
||||
is set.
|
||||
==================== ========================= ===============================
|
||||
|
||||
*****************
|
||||
[account-auditor]
|
||||
|
@ -143,6 +143,9 @@ use = egg:swift#recon
|
||||
# run_pause is deprecated, use interval instead
|
||||
# run_pause = 30
|
||||
#
|
||||
# Process at most this many databases per second
|
||||
# databases_per_second = 50
|
||||
#
|
||||
# node_timeout = 10
|
||||
# conn_timeout = 0.5
|
||||
#
|
||||
|
@ -156,6 +156,9 @@ use = egg:swift#recon
|
||||
# run_pause is deprecated, use interval instead
|
||||
# run_pause = 30
|
||||
#
|
||||
# Process at most this many databases per second
|
||||
# databases_per_second = 50
|
||||
#
|
||||
# node_timeout = 10
|
||||
# conn_timeout = 0.5
|
||||
#
|
||||
@ -436,6 +439,9 @@ use = egg:swift#xprofile
|
||||
# Time in seconds to wait between sharder cycles
|
||||
# interval = 30
|
||||
#
|
||||
# Process at most this many databases per second
|
||||
# databases_per_second = 50
|
||||
#
|
||||
# The container-sharder accepts the following configuration options as defined
|
||||
# in the container-replicator section:
|
||||
#
|
||||
|
@ -33,7 +33,7 @@ from swift.common.utils import get_logger, whataremyips, storage_directory, \
|
||||
renamer, mkdirs, lock_parent_directory, config_true_value, \
|
||||
unlink_older_than, dump_recon_cache, rsync_module_interpolation, \
|
||||
json, parse_override_options, round_robin_iter, Everything, get_db_files, \
|
||||
parse_db_filename, quote
|
||||
parse_db_filename, quote, RateLimitedIterator
|
||||
from swift.common import ring
|
||||
from swift.common.ring.utils import is_local_device
|
||||
from swift.common.http import HTTP_NOT_FOUND, HTTP_INSUFFICIENT_STORAGE, \
|
||||
@ -204,6 +204,8 @@ class Replicator(Daemon):
|
||||
' to use option %(type)s-replicator/'
|
||||
'interval.'
|
||||
% {'type': self.server_type})
|
||||
self.databases_per_second = int(
|
||||
conf.get('databases_per_second', 50))
|
||||
self.node_timeout = float(conf.get('node_timeout', 10))
|
||||
self.conn_timeout = float(conf.get('conn_timeout', 0.5))
|
||||
self.rsync_compress = config_true_value(
|
||||
@ -733,6 +735,11 @@ class Replicator(Daemon):
|
||||
def report_up_to_date(self, full_info):
|
||||
return True
|
||||
|
||||
def roundrobin_datadirs(self, dirs):
|
||||
return RateLimitedIterator(
|
||||
roundrobin_datadirs(dirs),
|
||||
elements_per_second=self.databases_per_second)
|
||||
|
||||
def run_once(self, *args, **kwargs):
|
||||
"""Run a replication pass once."""
|
||||
override_options = parse_override_options(once=True, **kwargs)
|
||||
@ -789,7 +796,7 @@ class Replicator(Daemon):
|
||||
"file, not replicating",
|
||||
", ".join(ips), self.port)
|
||||
self.logger.info(_('Beginning replication run'))
|
||||
for part, object_file, node_id in roundrobin_datadirs(dirs):
|
||||
for part, object_file, node_id in self.roundrobin_datadirs(dirs):
|
||||
self.cpool.spawn_n(
|
||||
self._replicate_object, part, object_file, node_id)
|
||||
self.cpool.waitall()
|
||||
|
@ -23,7 +23,7 @@ import os
|
||||
import six
|
||||
from eventlet import Timeout
|
||||
|
||||
from swift.common import internal_client, db_replicator
|
||||
from swift.common import internal_client
|
||||
from swift.common.constraints import check_drive
|
||||
from swift.common.direct_client import (direct_put_container,
|
||||
DirectClientException)
|
||||
@ -1500,7 +1500,7 @@ class ContainerSharder(ContainerReplicator):
|
||||
dirs.append((datadir, node, part_filt))
|
||||
if not dirs:
|
||||
self.logger.warning('Found no data dirs!')
|
||||
for part, path, node in db_replicator.roundrobin_datadirs(dirs):
|
||||
for part, path, node in self.roundrobin_datadirs(dirs):
|
||||
# NB: get_part_nodes always provides an 'index' key;
|
||||
# this will be used in leader selection
|
||||
for primary in self.ring.get_part_nodes(int(part)):
|
||||
|
@ -321,6 +321,7 @@ class TestDBReplicator(unittest.TestCase):
|
||||
# later config should be extended to assert more config options
|
||||
replicator = TestReplicator({'node_timeout': '3.5'})
|
||||
self.assertEqual(replicator.node_timeout, 3.5)
|
||||
self.assertEqual(replicator.databases_per_second, 50)
|
||||
|
||||
def test_repl_connection(self):
|
||||
node = {'replication_ip': '127.0.0.1', 'replication_port': 80,
|
||||
|
@ -128,6 +128,7 @@ class TestSharder(BaseTestSharder):
|
||||
expected = {
|
||||
'mount_check': True, 'bind_ip': '0.0.0.0', 'port': 6201,
|
||||
'per_diff': 1000, 'max_diffs': 100, 'interval': 30,
|
||||
'databases_per_second': 50,
|
||||
'cleave_row_batch_size': 10000,
|
||||
'node_timeout': 10, 'conn_timeout': 5,
|
||||
'rsync_compress': False,
|
||||
@ -154,6 +155,7 @@ class TestSharder(BaseTestSharder):
|
||||
conf = {
|
||||
'mount_check': False, 'bind_ip': '10.11.12.13', 'bind_port': 62010,
|
||||
'per_diff': 2000, 'max_diffs': 200, 'interval': 60,
|
||||
'databases_per_second': 5,
|
||||
'cleave_row_batch_size': 3000,
|
||||
'node_timeout': 20, 'conn_timeout': 1,
|
||||
'rsync_compress': True,
|
||||
@ -176,6 +178,7 @@ class TestSharder(BaseTestSharder):
|
||||
expected = {
|
||||
'mount_check': False, 'bind_ip': '10.11.12.13', 'port': 62010,
|
||||
'per_diff': 2000, 'max_diffs': 200, 'interval': 60,
|
||||
'databases_per_second': 5,
|
||||
'cleave_row_batch_size': 3000,
|
||||
'node_timeout': 20, 'conn_timeout': 1,
|
||||
'rsync_compress': True,
|
||||
@ -485,7 +488,7 @@ class TestSharder(BaseTestSharder):
|
||||
0, 'text/plain', 'etag', 0)
|
||||
|
||||
# check only sharding enabled containers are processed
|
||||
with mock.patch.object(
|
||||
with mock.patch('eventlet.sleep'), mock.patch.object(
|
||||
sharder, '_process_broker'
|
||||
) as mock_process_broker:
|
||||
sharder._local_device_ids = {'stale_node_id'}
|
||||
@ -539,7 +542,7 @@ class TestSharder(BaseTestSharder):
|
||||
"for %s" % broker.path)
|
||||
|
||||
# check exceptions are handled
|
||||
with mock.patch.object(
|
||||
with mock.patch('eventlet.sleep'), mock.patch.object(
|
||||
sharder, '_process_broker', side_effect=mock_processing
|
||||
) as mock_process_broker:
|
||||
sharder._local_device_ids = {'stale_node_id'}
|
||||
@ -593,7 +596,7 @@ class TestSharder(BaseTestSharder):
|
||||
for i in range(10):
|
||||
brokers[1].delete_object(
|
||||
'o%s' % i, next(self.ts_iter).internal)
|
||||
with mock.patch.object(
|
||||
with mock.patch('eventlet.sleep'), mock.patch.object(
|
||||
sharder, '_process_broker'
|
||||
) as mock_process_broker:
|
||||
sharder._local_device_ids = {999}
|
||||
@ -612,6 +615,53 @@ class TestSharder(BaseTestSharder):
|
||||
expected_candidate_stats, sharder, 'sharding_candidates')
|
||||
self._assert_recon_stats(None, sharder, 'sharding_progress')
|
||||
|
||||
def test_ratelimited_roundrobin(self):
|
||||
n_databases = 100
|
||||
|
||||
def stub_iter(dirs):
|
||||
for i in range(n_databases):
|
||||
yield i, '/srv/node/sda/path/to/container.db', {}
|
||||
|
||||
now = time.time()
|
||||
clock = {
|
||||
'sleeps': [],
|
||||
'now': now,
|
||||
}
|
||||
|
||||
def fake_sleep(t):
|
||||
clock['sleeps'].append(t)
|
||||
clock['now'] += t
|
||||
|
||||
def fake_time():
|
||||
return clock['now']
|
||||
|
||||
with self._mock_sharder({'databases_per_second': 1}) as sharder, \
|
||||
mock.patch('swift.common.db_replicator.roundrobin_datadirs',
|
||||
stub_iter), \
|
||||
mock.patch('time.time', fake_time), \
|
||||
mock.patch('eventlet.sleep', fake_sleep):
|
||||
list(sharder.roundrobin_datadirs(None))
|
||||
# 100 db at 1/s should take ~100s
|
||||
run_time = sum(clock['sleeps'])
|
||||
self.assertTrue(97 <= run_time < 100, 'took %s' % run_time)
|
||||
|
||||
n_databases = 1000
|
||||
now = time.time()
|
||||
clock = {
|
||||
'sleeps': [],
|
||||
'now': now,
|
||||
}
|
||||
|
||||
with self._mock_sharder({'databases_per_second': 50}) as sharder, \
|
||||
mock.patch('swift.common.db_replicator.roundrobin_datadirs',
|
||||
stub_iter), \
|
||||
mock.patch('time.time', fake_time), \
|
||||
mock.patch('eventlet.sleep', fake_sleep):
|
||||
list(sharder.roundrobin_datadirs(None))
|
||||
# 1000 db at 50/s
|
||||
run_time = sum(clock['sleeps'])
|
||||
self.assertTrue(18 <= run_time < 20, 'took %s' % run_time)
|
||||
|
||||
@contextmanager
|
||||
def _mock_sharder(self, conf=None, replicas=3):
|
||||
conf = conf or {}
|
||||
|
Loading…
Reference in New Issue
Block a user