Added exception catchalls for auditors.
Also, changed some os.listdir calls to a new swift.common.utils.listdir call which returns an empty list for non-existent paths. Change-Id: I89964636899d39bc07b1ecf2688786ffca67bf17
This commit is contained in:
parent
dfb9a9f0a3
commit
2eb0eb510b
@ -44,9 +44,9 @@ class AccountAuditor(Daemon):
|
|||||||
time.sleep(random() * self.interval)
|
time.sleep(random() * self.interval)
|
||||||
while True:
|
while True:
|
||||||
begin = time.time()
|
begin = time.time()
|
||||||
|
try:
|
||||||
all_locs = audit_location_generator(self.devices,
|
all_locs = audit_location_generator(self.devices,
|
||||||
account_server.DATADIR,
|
account_server.DATADIR, mount_check=self.mount_check,
|
||||||
mount_check=self.mount_check,
|
|
||||||
logger=self.logger)
|
logger=self.logger)
|
||||||
for path, device, partition in all_locs:
|
for path, device, partition in all_locs:
|
||||||
self.account_audit(path)
|
self.account_audit(path)
|
||||||
@ -59,6 +59,8 @@ class AccountAuditor(Daemon):
|
|||||||
reported = time.time()
|
reported = time.time()
|
||||||
self.account_passes = 0
|
self.account_passes = 0
|
||||||
self.account_failures = 0
|
self.account_failures = 0
|
||||||
|
except (Exception, Timeout):
|
||||||
|
self.logger.exception(_('ERROR auditing'))
|
||||||
elapsed = time.time() - begin
|
elapsed = time.time() - begin
|
||||||
if elapsed < self.interval:
|
if elapsed < self.interval:
|
||||||
time.sleep(self.interval - elapsed)
|
time.sleep(self.interval - elapsed)
|
||||||
|
@ -731,11 +731,11 @@ def iter_devices_partitions(devices_dir, item_type):
|
|||||||
:param item_type: One of 'accounts', 'containers', or 'objects'
|
:param item_type: One of 'accounts', 'containers', or 'objects'
|
||||||
:returns: Each iteration returns a tuple of (device, partition)
|
:returns: Each iteration returns a tuple of (device, partition)
|
||||||
"""
|
"""
|
||||||
devices = os.listdir(devices_dir)
|
devices = listdir(devices_dir)
|
||||||
shuffle(devices)
|
shuffle(devices)
|
||||||
devices_partitions = []
|
devices_partitions = []
|
||||||
for device in devices:
|
for device in devices:
|
||||||
partitions = os.listdir(os.path.join(devices_dir, device, item_type))
|
partitions = listdir(os.path.join(devices_dir, device, item_type))
|
||||||
shuffle(partitions)
|
shuffle(partitions)
|
||||||
devices_partitions.append((device, iter(partitions)))
|
devices_partitions.append((device, iter(partitions)))
|
||||||
yielded = True
|
yielded = True
|
||||||
@ -757,7 +757,7 @@ def unlink_older_than(path, mtime):
|
|||||||
:mtime: timestamp of oldest file to keep
|
:mtime: timestamp of oldest file to keep
|
||||||
"""
|
"""
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
for fname in os.listdir(path):
|
for fname in listdir(path):
|
||||||
fpath = os.path.join(path, fname)
|
fpath = os.path.join(path, fname)
|
||||||
try:
|
try:
|
||||||
if os.path.getmtime(fpath) < mtime:
|
if os.path.getmtime(fpath) < mtime:
|
||||||
@ -929,7 +929,7 @@ def audit_location_generator(devices, datadir, mount_check=True, logger=None):
|
|||||||
on devices
|
on devices
|
||||||
:param logger: a logger object
|
:param logger: a logger object
|
||||||
'''
|
'''
|
||||||
device_dir = os.listdir(devices)
|
device_dir = listdir(devices)
|
||||||
# randomize devices in case of process restart before sweep completed
|
# randomize devices in case of process restart before sweep completed
|
||||||
shuffle(device_dir)
|
shuffle(device_dir)
|
||||||
for device in device_dir:
|
for device in device_dir:
|
||||||
@ -942,22 +942,22 @@ def audit_location_generator(devices, datadir, mount_check=True, logger=None):
|
|||||||
datadir_path = os.path.join(devices, device, datadir)
|
datadir_path = os.path.join(devices, device, datadir)
|
||||||
if not os.path.exists(datadir_path):
|
if not os.path.exists(datadir_path):
|
||||||
continue
|
continue
|
||||||
partitions = os.listdir(datadir_path)
|
partitions = listdir(datadir_path)
|
||||||
for partition in partitions:
|
for partition in partitions:
|
||||||
part_path = os.path.join(datadir_path, partition)
|
part_path = os.path.join(datadir_path, partition)
|
||||||
if not os.path.isdir(part_path):
|
if not os.path.isdir(part_path):
|
||||||
continue
|
continue
|
||||||
suffixes = os.listdir(part_path)
|
suffixes = listdir(part_path)
|
||||||
for suffix in suffixes:
|
for suffix in suffixes:
|
||||||
suff_path = os.path.join(part_path, suffix)
|
suff_path = os.path.join(part_path, suffix)
|
||||||
if not os.path.isdir(suff_path):
|
if not os.path.isdir(suff_path):
|
||||||
continue
|
continue
|
||||||
hashes = os.listdir(suff_path)
|
hashes = listdir(suff_path)
|
||||||
for hsh in hashes:
|
for hsh in hashes:
|
||||||
hash_path = os.path.join(suff_path, hsh)
|
hash_path = os.path.join(suff_path, hsh)
|
||||||
if not os.path.isdir(hash_path):
|
if not os.path.isdir(hash_path):
|
||||||
continue
|
continue
|
||||||
for fname in sorted(os.listdir(hash_path),
|
for fname in sorted(listdir(hash_path),
|
||||||
reverse=True):
|
reverse=True):
|
||||||
path = os.path.join(hash_path, fname)
|
path = os.path.join(hash_path, fname)
|
||||||
yield path, device, partition
|
yield path, device, partition
|
||||||
@ -1106,3 +1106,12 @@ def dump_recon_cache(cache_key, cache_value, cache_file, lock_timeout=2):
|
|||||||
except OSError, err:
|
except OSError, err:
|
||||||
if err.errno != errno.ENOENT:
|
if err.errno != errno.ENOENT:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def listdir(path):
|
||||||
|
try:
|
||||||
|
return os.listdir(path)
|
||||||
|
except OSError, err:
|
||||||
|
if err.errno != errno.ENOENT:
|
||||||
|
raise
|
||||||
|
return []
|
||||||
|
@ -45,22 +45,24 @@ class ContainerAuditor(Daemon):
|
|||||||
time.sleep(random() * self.interval)
|
time.sleep(random() * self.interval)
|
||||||
while True:
|
while True:
|
||||||
begin = time.time()
|
begin = time.time()
|
||||||
|
try:
|
||||||
all_locs = audit_location_generator(self.devices,
|
all_locs = audit_location_generator(self.devices,
|
||||||
container_server.DATADIR,
|
container_server.DATADIR, mount_check=self.mount_check,
|
||||||
mount_check=self.mount_check,
|
|
||||||
logger=self.logger)
|
logger=self.logger)
|
||||||
for path, device, partition in all_locs:
|
for path, device, partition in all_locs:
|
||||||
self.container_audit(path)
|
self.container_audit(path)
|
||||||
if time.time() - reported >= 3600: # once an hour
|
if time.time() - reported >= 3600: # once an hour
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
_('Since %(time)s: Container audits: %(pass)s passed '
|
_('Since %(time)s: Container audits: %(pass)s '
|
||||||
'audit, %(fail)s failed audit'),
|
'passed audit, %(fail)s failed audit'),
|
||||||
{'time': time.ctime(reported),
|
{'time': time.ctime(reported),
|
||||||
'pass': self.container_passes,
|
'pass': self.container_passes,
|
||||||
'fail': self.container_failures})
|
'fail': self.container_failures})
|
||||||
reported = time.time()
|
reported = time.time()
|
||||||
self.container_passes = 0
|
self.container_passes = 0
|
||||||
self.container_failures = 0
|
self.container_failures = 0
|
||||||
|
except (Exception, Timeout):
|
||||||
|
self.logger.exception(_('ERROR auditing'))
|
||||||
elapsed = time.time() - begin
|
elapsed = time.time() - begin
|
||||||
if elapsed < self.interval:
|
if elapsed < self.interval:
|
||||||
time.sleep(self.interval - elapsed)
|
time.sleep(self.interval - elapsed)
|
||||||
|
@ -204,7 +204,10 @@ class ObjectAuditor(Daemon):
|
|||||||
if parent:
|
if parent:
|
||||||
kwargs['zero_byte_fps'] = zbo_fps or self.conf_zero_byte_fps
|
kwargs['zero_byte_fps'] = zbo_fps or self.conf_zero_byte_fps
|
||||||
while True:
|
while True:
|
||||||
|
try:
|
||||||
self.run_once(**kwargs)
|
self.run_once(**kwargs)
|
||||||
|
except (Exception, Timeout):
|
||||||
|
self.logger.exception(_('ERROR auditing'))
|
||||||
self._sleep()
|
self._sleep()
|
||||||
|
|
||||||
def run_once(self, *args, **kwargs):
|
def run_once(self, *args, **kwargs):
|
||||||
|
Loading…
Reference in New Issue
Block a user