Added fallocate_reserve option

Some systems behave badly when they completely run out of space. To
alleviate this problem, you can set the fallocate_reserve conf value
to a number of bytes to "reserve" on each disk. When the disk free
space falls at or below this amount, fallocate calls will fail, even
if the underlying OS fallocate call would succeed. For example, a
fallocate_reserve of 5368709120 (5G) would make all fallocate calls
fail, even for zero-byte files, when the disk free space falls under
5G.

The default fallocate_reserve is 0, meaning "no reserve", and so the
software behaves exactly as it always has unless you set this conf
value to something non-zero.

Also fixed ring builder's search_devs doc bugs.

Related: To get rsync to do the same, see
https://github.com/rackspace/cloudfiles-rsync
Specifically, see this patch:
https://github.com/rackspace/cloudfiles-rsync/blob/master/debian/patches/limit-fs-fullness.diff

DocImpact

Change-Id: I8db176ae0ca5b41c9bcfeb7cb8abb31c2e614527
This commit is contained in:
gholt 2013-01-25 02:11:19 +00:00
parent 2f663ff9a0
commit 87a42ab9ca
10 changed files with 218 additions and 20 deletions

View File

@ -237,6 +237,13 @@ disable_fallocate false Disable "fast fail" fallocate checks if the
log_custom_handlers None Comma-separated list of functions to call
to setup custom log handlers.
eventlet_debug false If true, turn on debug logging for eventlet
fallocate_reserve 0 You can set fallocate_reserve to the number of
bytes you'd like fallocate to reserve, whether
there is space for the given file size or not.
This is useful for systems that behave badly
when they completely run out of space; you can
make the services pretend they're out of space
early.
=================== ========== =============================================
[object-server]
@ -348,6 +355,13 @@ disable_fallocate false Disable "fast fail" fallocate checks if the
log_custom_handlers None Comma-separated list of functions to call
to setup custom log handlers.
eventlet_debug false If true, turn on debug logging for eventlet
fallocate_reserve 0 You can set fallocate_reserve to the number of
bytes you'd like fallocate to reserve, whether
there is space for the given file size or not.
This is useful for systems that behave badly
when they completely run out of space; you can
make the services pretend they're out of space
early.
=================== ========== ============================================
[container-server]
@ -452,6 +466,13 @@ disable_fallocate false Disable "fast fail" fallocate checks if the
log_custom_handlers None Comma-separated list of functions to call
to setup custom log handlers.
eventlet_debug false If true, turn on debug logging for eventlet
fallocate_reserve 0 You can set fallocate_reserve to the number of
bytes you'd like fallocate to reserve, whether
there is space for the given file size or not.
This is useful for systems that behave badly
when they completely run out of space; you can
make the services pretend they're out of space
early.
=================== ========== =============================================
[account-server]

View File

@ -30,6 +30,9 @@
# on to preallocate disk space with SQLite databases to decrease fragmentation.
# db_preallocation = off
# eventlet_debug = false
# You can set fallocate_reserve to the number of bytes you'd like fallocate to
# reserve, whether there is space for the given file size or not.
# fallocate_reserve = 0
[pipeline:main]
pipeline = healthcheck recon account-server

View File

@ -33,6 +33,9 @@
# on to preallocate disk space with SQLite databases to decrease fragmentation.
# db_preallocation = off
# eventlet_debug = false
# You can set fallocate_reserve to the number of bytes you'd like fallocate to
# reserve, whether there is space for the given file size or not.
# fallocate_reserve = 0
[pipeline:main]
pipeline = healthcheck recon container-server

View File

@ -28,6 +28,9 @@
# log_statsd_default_sample_rate = 1
# log_statsd_metric_prefix =
# eventlet_debug = false
# You can set fallocate_reserve to the number of bytes you'd like fallocate to
# reserve, whether there is space for the given file size or not.
# fallocate_reserve = 0
[pipeline:main]
pipeline = healthcheck recon object-server

View File

@ -90,6 +90,10 @@ def run_daemon(klass, conf_file, section_name='', once=False, **kwargs):
# disable fallocate if desired
if utils.config_true_value(conf.get('disable_fallocate', 'no')):
utils.disable_fallocate()
# set utils.FALLOCATE_RESERVE if desired
reserve = int(conf.get('fallocate_reserve', 0))
if reserve > 0:
utils.FALLOCATE_RESERVE = reserve
# By default, disable eventlet printing stacktraces
eventlet_debug = utils.config_true_value(conf.get('eventlet_debug', 'no'))

View File

@ -799,10 +799,14 @@ class RingBuilder(object):
def search_devs(self, search_value):
"""
The <search-value> can be of the form:
d<device_id>z<zone>-<ip>:<port>/<device_name>_<meta>
The <search-value> can be of the form::
d<device_id>z<zone>-<ip>:<port>/<device_name>_<meta>
Any part is optional, but you must include at least one part.
Examples:
Examples::
d74 Matches the device id 74
z1 Matches devices in zone 1
z1-1.2.3.4 Matches devices in zone 1 with the ip 1.2.3.4
@ -814,9 +818,13 @@ The <search-value> can be of the form:
_"snet: 5.6.7.8" Matches devices with snet: 5.6.7.8 in the meta data
[::1] Matches devices in any zone with the ip ::1
z1-[::1]:5678 Matches devices in zone 1 with ip ::1 and port 5678
Most specific example:
Most specific example::
d74z1-1.2.3.4:5678/sdb1_"snet: 5.6.7.8"
Nerd explanation:
All items require their single character prefix except the ip, in which
case the - is optional unless the device id or zone is also included.
"""

View File

@ -70,6 +70,10 @@ _sys_fsync = None
_sys_fallocate = None
_posix_fadvise = None
# If set to non-zero, fallocate routines will fail based on free space
# available being at or below this amount, in bytes.
FALLOCATE_RESERVE = 0
# Used by hash_path to offer a bit more security when generating hashes for
# paths. It simply appends this value to all paths; guessing the hash a path
# will end up with would also require knowing this suffix.
@ -156,10 +160,17 @@ class FallocateWrapper(object):
logging.warn(_("Unable to locate fallocate, posix_fallocate in "
"libc. Leaving as a no-op."))
def __call__(self, fd, mode, offset, len):
def __call__(self, fd, mode, offset, length):
""" The length parameter must be a ctypes.c_uint64 """
if FALLOCATE_RESERVE > 0:
st = os.fstatvfs(fd)
free = st.f_frsize * st.f_bavail - length.value
if free <= FALLOCATE_RESERVE:
raise OSError('FALLOCATE_RESERVE fail %s <= %s' % (
free, FALLOCATE_RESERVE))
args = {
'fallocate': (fd, mode, offset, len),
'posix_fallocate': (fd, offset, len)
'fallocate': (fd, mode, offset, length),
'posix_fallocate': (fd, offset, length)
}
return self.fallocate(*args[self.func_name])
@ -179,13 +190,14 @@ def fallocate(fd, size):
global _sys_fallocate
if _sys_fallocate is None:
_sys_fallocate = FallocateWrapper()
if size > 0:
# 1 means "FALLOC_FL_KEEP_SIZE", which means it pre-allocates invisibly
ret = _sys_fallocate(fd, 1, 0, ctypes.c_uint64(size))
err = ctypes.get_errno()
if ret and err not in (0, errno.ENOSYS, errno.EOPNOTSUPP,
errno.EINVAL):
raise OSError(err, 'Unable to fallocate(%s)' % size)
if size < 0:
size = 0
# 1 means "FALLOC_FL_KEEP_SIZE", which means it pre-allocates invisibly
ret = _sys_fallocate(fd, 1, 0, ctypes.c_uint64(size))
err = ctypes.get_errno()
if ret and err not in (0, errno.ENOSYS, errno.EOPNOTSUPP,
errno.EINVAL):
raise OSError(err, 'Unable to fallocate(%s)' % size)
class FsyncWrapper(object):

View File

@ -30,6 +30,7 @@ from paste.deploy import loadapp, appconfig
from eventlet.green import socket, ssl
from urllib import unquote
from swift.common import utils
from swift.common.swob import Request
from swift.common.utils import capture_stdio, disable_fallocate, \
drop_privileges, get_logger, NullLogger, config_true_value, \
@ -124,6 +125,10 @@ def run_wsgi(conf_file, app_section, *args, **kwargs):
sock = get_socket(conf, default_port=kwargs.get('default_port', 8080))
# remaining tasks should not require elevated privileges
drop_privileges(conf.get('user', 'swift'))
# set utils.FALLOCATE_RESERVE if desired
reserve = int(conf.get('fallocate_reserve', 0))
if reserve > 0:
utils.FALLOCATE_RESERVE = reserve
# redirect errors to logger and close stdio
capture_stdio(logger)

View File

@ -644,12 +644,10 @@ class ObjectController(object):
upload_size = 0
last_sync = 0
with file.mkstemp() as fd:
if 'content-length' in request.headers:
try:
fallocate(fd, int(request.headers['content-length']))
except OSError:
return HTTPInsufficientStorage(drive=device,
request=request)
try:
fallocate(fd, int(request.headers.get('content-length', 0)))
except OSError:
return HTTPInsufficientStorage(drive=device, request=request)
reader = request.environ['wsgi.input'].read
for chunk in iter(lambda: reader(self.network_chunk_size), ''):
upload_size += len(chunk)

View File

@ -17,6 +17,7 @@
from __future__ import with_statement
from test.unit import temptree
import ctypes
import errno
import logging
import mimetools
@ -933,6 +934,146 @@ log_name = %(yarr)s'''
self.assertEqual(
utils.rsync_ip('::ffff:192.0.2.128'), '[::ffff:192.0.2.128]')
def test_fallocate_reserve(self):
class StatVFS(object):
f_frsize = 1024
f_bavail = 1
def fstatvfs(fd):
return StatVFS()
orig_FALLOCATE_RESERVE = utils.FALLOCATE_RESERVE
orig_fstatvfs = utils.os.fstatvfs
try:
fallocate = utils.FallocateWrapper(noop=True)
utils.os.fstatvfs = fstatvfs
# Want 1023 reserved, have 1024 * 1 free, so succeeds
utils.FALLOCATE_RESERVE = 1023
StatVFS.f_frsize = 1024
StatVFS.f_bavail = 1
self.assertEquals(fallocate(0, 1, 0, ctypes.c_uint64(0)), 0)
# Want 1023 reserved, have 512 * 2 free, so succeeds
utils.FALLOCATE_RESERVE = 1023
StatVFS.f_frsize = 512
StatVFS.f_bavail = 2
self.assertEquals(fallocate(0, 1, 0, ctypes.c_uint64(0)), 0)
# Want 1024 reserved, have 1024 * 1 free, so fails
utils.FALLOCATE_RESERVE = 1024
StatVFS.f_frsize = 1024
StatVFS.f_bavail = 1
exc = None
try:
fallocate(0, 1, 0, ctypes.c_uint64(0))
except OSError, err:
exc = err
self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 1024')
# Want 1024 reserved, have 512 * 2 free, so fails
utils.FALLOCATE_RESERVE = 1024
StatVFS.f_frsize = 512
StatVFS.f_bavail = 2
exc = None
try:
fallocate(0, 1, 0, ctypes.c_uint64(0))
except OSError, err:
exc = err
self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 1024')
# Want 2048 reserved, have 1024 * 1 free, so fails
utils.FALLOCATE_RESERVE = 2048
StatVFS.f_frsize = 1024
StatVFS.f_bavail = 1
exc = None
try:
fallocate(0, 1, 0, ctypes.c_uint64(0))
except OSError, err:
exc = err
self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 2048')
# Want 2048 reserved, have 512 * 2 free, so fails
utils.FALLOCATE_RESERVE = 2048
StatVFS.f_frsize = 512
StatVFS.f_bavail = 2
exc = None
try:
fallocate(0, 1, 0, ctypes.c_uint64(0))
except OSError, err:
exc = err
self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 2048')
# Want 1023 reserved, have 1024 * 1 free, but file size is 1, so
# fails
utils.FALLOCATE_RESERVE = 1023
StatVFS.f_frsize = 1024
StatVFS.f_bavail = 1
exc = None
try:
fallocate(0, 1, 0, ctypes.c_uint64(1))
except OSError, err:
exc = err
self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1023 <= 1023')
# Want 1022 reserved, have 1024 * 1 free, and file size is 1, so
# succeeds
utils.FALLOCATE_RESERVE = 1022
StatVFS.f_frsize = 1024
StatVFS.f_bavail = 1
self.assertEquals(fallocate(0, 1, 0, ctypes.c_uint64(1)), 0)
# Want 1023 reserved, have 1024 * 1 free, and file size is 0, so
# succeeds
utils.FALLOCATE_RESERVE = 1023
StatVFS.f_frsize = 1024
StatVFS.f_bavail = 1
self.assertEquals(fallocate(0, 1, 0, ctypes.c_uint64(0)), 0)
# Want 1024 reserved, have 1024 * 1 free, and even though
# file size is 0, since we're under the reserve, fails
utils.FALLOCATE_RESERVE = 1024
StatVFS.f_frsize = 1024
StatVFS.f_bavail = 1
exc = None
try:
fallocate(0, 1, 0, ctypes.c_uint64(0))
except OSError, err:
exc = err
self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 1024')
finally:
utils.FALLOCATE_RESERVE = orig_FALLOCATE_RESERVE
utils.os.fstatvfs = orig_fstatvfs
def test_fallocate_func(self):
class FallocateWrapper(object):
def __init__(self):
self.last_call = None
def __call__(self, *args):
self.last_call = list(args)
self.last_call[-1] = self.last_call[-1].value
return 0
orig__sys_fallocate = utils._sys_fallocate
try:
utils._sys_fallocate = FallocateWrapper()
# Ensure fallocate calls _sys_fallocate even with 0 bytes
utils._sys_fallocate.last_call = None
utils.fallocate(1234, 0)
self.assertEquals(utils._sys_fallocate.last_call,
[1234, 1, 0, 0])
# Ensure fallocate calls _sys_fallocate even with negative bytes
utils._sys_fallocate.last_call = None
utils.fallocate(1234, -5678)
self.assertEquals(utils._sys_fallocate.last_call,
[1234, 1, 0, 0])
# Ensure fallocate calls _sys_fallocate properly with positive
# bytes
utils._sys_fallocate.last_call = None
utils.fallocate(1234, 1)
self.assertEquals(utils._sys_fallocate.last_call,
[1234, 1, 0, 1])
utils._sys_fallocate.last_call = None
utils.fallocate(1234, 10 * 1024 * 1024 * 1024)
self.assertEquals(utils._sys_fallocate.last_call,
[1234, 1, 0, 10 * 1024 * 1024 * 1024])
finally:
utils._sys_fallocate = orig__sys_fallocate
class TestStatsdLogging(unittest.TestCase):
def test_get_logger_statsd_client_not_specified(self):