Configuration options for error regex and log file in the config now
Making it possible for one to overwrite the default set of regexes used to search for device block errors in the log file. Also making the log file naming pattern configurable by setting them in the drive-audit.conf file. Updating "Detecting Failed Drives" section on the admin guide as well. Change-Id: I7bd3acffed196da3e09db4c9dcbb48a20bdd1cf0
This commit is contained in:
parent
c6e53721e0
commit
d2dd3e5488
@ -25,13 +25,6 @@ from ConfigParser import ConfigParser
|
|||||||
from swift.common.utils import backward, get_logger
|
from swift.common.utils import backward, get_logger
|
||||||
|
|
||||||
|
|
||||||
# To search for more types of errors, add the regex to the list below
|
|
||||||
error_re = [
|
|
||||||
re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
|
|
||||||
re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def get_devices(device_dir, logger):
|
def get_devices(device_dir, logger):
|
||||||
devices = []
|
devices = []
|
||||||
for line in open('/proc/mounts').readlines():
|
for line in open('/proc/mounts').readlines():
|
||||||
@ -61,12 +54,17 @@ def get_devices(device_dir, logger):
|
|||||||
return devices
|
return devices
|
||||||
|
|
||||||
|
|
||||||
def get_errors(minutes):
|
def get_errors(error_re, log_file_pattern, minutes):
|
||||||
# Assuming log rotation is being used, we need to examine
|
# Assuming log rotation is being used, we need to examine
|
||||||
# recently rotated files in case the rotation occured
|
# recently rotated files in case the rotation occured
|
||||||
# just before the script is being run - the data we are
|
# just before the script is being run - the data we are
|
||||||
# looking for may have rotated.
|
# looking for may have rotated.
|
||||||
log_files = [f for f in glob.glob('/var/log/kern.*[!.][!g][!z]')]
|
#
|
||||||
|
# The globbing used before would not work with all out-of-box
|
||||||
|
# distro setup for logrotate and syslog therefore moving this
|
||||||
|
# to the config where one can set it with the desired
|
||||||
|
# globbing pattern.
|
||||||
|
log_files = [f for f in glob.glob(log_file_pattern)]
|
||||||
log_files.sort()
|
log_files.sort()
|
||||||
|
|
||||||
now_time = datetime.datetime.now()
|
now_time = datetime.datetime.now()
|
||||||
@ -143,13 +141,30 @@ if __name__ == '__main__':
|
|||||||
device_dir = conf.get('device_dir', '/srv/node')
|
device_dir = conf.get('device_dir', '/srv/node')
|
||||||
minutes = int(conf.get('minutes', 60))
|
minutes = int(conf.get('minutes', 60))
|
||||||
error_limit = int(conf.get('error_limit', 1))
|
error_limit = int(conf.get('error_limit', 1))
|
||||||
|
log_file_pattern = conf.get('log_file_pattern',
|
||||||
|
'/var/log/kern.*[!.][!g][!z]')
|
||||||
|
error_re = []
|
||||||
|
for conf_key in conf:
|
||||||
|
if conf_key.startswith('regex_pattern_'):
|
||||||
|
error_pattern = conf[conf_key]
|
||||||
|
try:
|
||||||
|
r = re.compile(error_pattern)
|
||||||
|
except re.error:
|
||||||
|
sys.exit('Error: unable to compile regex pattern "%s"' %
|
||||||
|
error_pattern)
|
||||||
|
error_re.append(r)
|
||||||
|
if not error_re:
|
||||||
|
error_re = [
|
||||||
|
re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
|
||||||
|
re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
|
||||||
|
]
|
||||||
conf['log_name'] = conf.get('log_name', 'drive-audit')
|
conf['log_name'] = conf.get('log_name', 'drive-audit')
|
||||||
logger = get_logger(conf, log_route='drive-audit')
|
logger = get_logger(conf, log_route='drive-audit')
|
||||||
devices = get_devices(device_dir, logger)
|
devices = get_devices(device_dir, logger)
|
||||||
logger.debug("Devices found: %s" % str(devices))
|
logger.debug("Devices found: %s" % str(devices))
|
||||||
if not devices:
|
if not devices:
|
||||||
logger.error("Error: No devices found!")
|
logger.error("Error: No devices found!")
|
||||||
errors = get_errors(minutes)
|
errors = get_errors(error_re, log_file_pattern, minutes)
|
||||||
logger.debug("Errors found: %s" % str(errors))
|
logger.debug("Errors found: %s" % str(errors))
|
||||||
unmounts = 0
|
unmounts = 0
|
||||||
for kernel_device, count in errors.items():
|
for kernel_device, count in errors.items():
|
||||||
|
@ -156,20 +156,30 @@ settings:
|
|||||||
|
|
||||||
[drive-audit]
|
[drive-audit]
|
||||||
|
|
||||||
================== ========== ===========================================
|
================== ============== ===========================================
|
||||||
Option Default Description
|
Option Default Description
|
||||||
------------------ ---------- -------------------------------------------
|
------------------ -------------- -------------------------------------------
|
||||||
log_facility LOG_LOCAL0 Syslog log facility
|
log_facility LOG_LOCAL0 Syslog log facility
|
||||||
log_level INFO Log level
|
log_level INFO Log level
|
||||||
device_dir /srv/node Directory devices are mounted under
|
device_dir /srv/node Directory devices are mounted under
|
||||||
minutes 60 Number of minutes to look back in
|
minutes 60 Number of minutes to look back in
|
||||||
`/var/log/kern.log`
|
`/var/log/kern.log`
|
||||||
error_limit 1 Number of errors to find before a device
|
error_limit 1 Number of errors to find before a device
|
||||||
is unmounted
|
is unmounted
|
||||||
================== ========== ===========================================
|
log_file_pattern /var/log/kern* Location of the log file with globbing
|
||||||
|
pattern to check against device errors
|
||||||
|
regex_pattern_X (see below) Regular expression patterns to be used to
|
||||||
|
locate device blocks with errors in the
|
||||||
|
log file
|
||||||
|
================== ============== ===========================================
|
||||||
|
|
||||||
This script has only been tested on Ubuntu 10.04, so if you are using a
|
The default regex pattern used to locate device blocks with errors are
|
||||||
different distro or OS, some care should be taken before using in production.
|
`\berror\b.*\b(sd[a-z]{1,2}\d?)\b` and `\b(sd[a-z]{1,2}\d?)\b.*\berror\b`.
|
||||||
|
One is able to overwrite the default above by providing new expressions
|
||||||
|
using the format `regex_pattern_X = regex_expression`, where `X` is a number.
|
||||||
|
|
||||||
|
This script has been tested on Ubuntu 10.04 and Ubuntu 12.04, so if you are
|
||||||
|
using a different distro or OS, some care should be taken before using in production.
|
||||||
|
|
||||||
--------------
|
--------------
|
||||||
Cluster Health
|
Cluster Health
|
||||||
|
@ -5,3 +5,18 @@
|
|||||||
# log_address = /dev/log
|
# log_address = /dev/log
|
||||||
# minutes = 60
|
# minutes = 60
|
||||||
# error_limit = 1
|
# error_limit = 1
|
||||||
|
#
|
||||||
|
# Location of the log file with globbing
|
||||||
|
# pattern to check against device errors.
|
||||||
|
# log_file_pattern = /var/log/kern*
|
||||||
|
#
|
||||||
|
# Regular expression patterns to be used to locate
|
||||||
|
# device blocks with errors in the log file. Currently
|
||||||
|
# the default ones are as follows:
|
||||||
|
# \berror\b.*\b(sd[a-z]{1,2}\d?)\b
|
||||||
|
# \b(sd[a-z]{1,2}\d?)\b.*\berror\b
|
||||||
|
# One can overwrite the default ones by providing
|
||||||
|
# new expressions using the format below:
|
||||||
|
# Format: regex_pattern_X = regex_expression
|
||||||
|
# Example:
|
||||||
|
# regex_pattern_1 = \berror\b.*\b(dm-[0-9]{1,2}\d?)\b
|
||||||
|
Loading…
Reference in New Issue
Block a user