Fix logging in eventlet native threads

There is a bug in eventlet where logging within a native thread can lead
to a deadlock situation: https://github.com/eventlet/eventlet/issues/432

When encountered with this issue some projects in OpenStack using
oslo.log, eg. Cinder, resolve them by removing any logging withing
native threads.

There is actually a better approach. The Swift team came up with a
solution a long time ago [1], and in this patch that fix is included as
part of the setup method, but will only be run if the eventlet library
has already been loaded.

This patch adds the eventlet library as a testing dependency for the
PipeMutext unit tests.

[1]: 69c715c505

Closes-Bug: #1983863
Change-Id: Iac1b0891ae584ce4b95964e6cdc0ff2483a4e57d
This commit is contained in:
Gorka Eguileor 2022-08-08 16:44:20 +02:00
parent 6401da71b3
commit 94b9dc32ec
5 changed files with 376 additions and 0 deletions

View File

@ -265,8 +265,25 @@ def register_options(conf):
conf.register_mutate_hook(_mutate_hook)
def _fix_eventlet_logging():
"""Properly setup logging with eventlet on native threads.
Workaround for: https://github.com/eventlet/eventlet/issues/432
"""
# If eventlet was not loaded before call to setup assume it's not used.
eventlet = sys.modules.get('eventlet')
if eventlet:
import eventlet.green.threading
from oslo_log import pipe_mutex
logging.threading = eventlet.green.threading
logging._lock = logging.threading.RLock()
logging.Handler.createLock = pipe_mutex.pipe_createLock
def setup(conf, product_name, version='unknown'):
"""Setup logging for the current application."""
_fix_eventlet_logging()
if conf.log_config_append:
_load_log_config(conf.log_config_append)
else:

142
oslo_log/pipe_mutex.py Normal file
View File

@ -0,0 +1,142 @@
# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import errno
import fcntl
import os
import eventlet
import eventlet.debug
import eventlet.greenthread
import eventlet.hubs
class PipeMutex(object):
"""Mutex using a pipe.
Works across both greenlets and real threads, even at the same time.
Class code copied from Swift's swift/common/utils.py
Related eventlet bug: https://github.com/eventlet/eventlet/issues/432
"""
def __init__(self):
self.rfd, self.wfd = os.pipe()
# You can't create a pipe in non-blocking mode; you must set it
# later.
rflags = fcntl.fcntl(self.rfd, fcntl.F_GETFL)
fcntl.fcntl(self.rfd, fcntl.F_SETFL, rflags | os.O_NONBLOCK)
os.write(self.wfd, b'-') # start unlocked
self.owner = None
self.recursion_depth = 0
# Usually, it's an error to have multiple greenthreads all waiting
# to read the same file descriptor. It's often a sign of inadequate
# concurrency control; for example, if you have two greenthreads
# trying to use the same memcache connection, they'll end up writing
# interleaved garbage to the socket or stealing part of each others'
# responses.
#
# In this case, we have multiple greenthreads waiting on the same
# file descriptor by design. This lets greenthreads in real thread A
# wait with greenthreads in real thread B for the same mutex.
# Therefore, we must turn off eventlet's multiple-reader detection.
#
# It would be better to turn off multiple-reader detection for only
# our calls to trampoline(), but eventlet does not support that.
eventlet.debug.hub_prevent_multiple_readers(False)
def acquire(self, blocking=True):
"""Acquire the mutex.
If called with blocking=False, returns True if the mutex was
acquired and False if it wasn't. Otherwise, blocks until the mutex
is acquired and returns True.
This lock is recursive; the same greenthread may acquire it as many
times as it wants to, though it must then release it that many times
too.
"""
current_greenthread_id = id(eventlet.greenthread.getcurrent())
if self.owner == current_greenthread_id:
self.recursion_depth += 1
return True
while True:
try:
# If there is a byte available, this will read it and remove
# it from the pipe. If not, this will raise OSError with
# errno=EAGAIN.
os.read(self.rfd, 1)
self.owner = current_greenthread_id
return True
except OSError as err:
if err.errno != errno.EAGAIN:
raise
if not blocking:
return False
# Tell eventlet to suspend the current greenthread until
# self.rfd becomes readable. This will happen when someone
# else writes to self.wfd.
eventlet.hubs.trampoline(self.rfd, read=True)
def release(self):
"""Release the mutex."""
current_greenthread_id = id(eventlet.greenthread.getcurrent())
if self.owner != current_greenthread_id:
raise RuntimeError("cannot release un-acquired lock")
if self.recursion_depth > 0:
self.recursion_depth -= 1
return
self.owner = None
os.write(self.wfd, b'X')
def close(self):
"""Close the mutex.
This releases its file descriptors.
You can't use a mutex after it's been closed.
"""
if self.wfd is not None:
os.close(self.wfd)
self.wfd = None
if self.rfd is not None:
os.close(self.rfd)
self.rfd = None
self.owner = None
self.recursion_depth = 0
def __del__(self):
# We need this so we don't leak file descriptors. Otherwise, if you
# call get_logger() and don't explicitly dispose of it by calling
# logger.logger.handlers[0].lock.close() [1], the pipe file
# descriptors are leaked.
#
# This only really comes up in tests. Service processes tend to call
# get_logger() once and then hang on to it until they exit, but the
# test suite calls get_logger() a lot.
#
# [1] and that's a completely ridiculous thing to expect callers to
# do, so nobody does it and that's okay.
self.close()
def pipe_createLock(self):
"""Replacement for logging.Handler.createLock method."""
self.lock = PipeMutex()

View File

@ -0,0 +1,209 @@
# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import unittest
from unittest import mock
import eventlet
from eventlet import debug as eventlet_debug
from eventlet import greenpool
from oslo_log import pipe_mutex
@contextlib.contextmanager
def quiet_eventlet_exceptions():
orig_state = greenpool.DEBUG
eventlet_debug.hub_exceptions(False)
try:
yield
finally:
eventlet_debug.hub_exceptions(orig_state)
class TestPipeMutex(unittest.TestCase):
"""From Swift's test/unit/common/test_utils.py"""
def setUp(self):
self.mutex = pipe_mutex.PipeMutex()
def tearDown(self):
self.mutex.close()
def test_nonblocking(self):
evt_lock1 = eventlet.event.Event()
evt_lock2 = eventlet.event.Event()
evt_unlock = eventlet.event.Event()
def get_the_lock():
self.mutex.acquire()
evt_lock1.send('got the lock')
evt_lock2.wait()
self.mutex.release()
evt_unlock.send('released the lock')
eventlet.spawn(get_the_lock)
evt_lock1.wait() # Now, the other greenthread has the lock.
self.assertFalse(self.mutex.acquire(blocking=False))
evt_lock2.send('please release the lock')
evt_unlock.wait() # The other greenthread has released the lock.
self.assertTrue(self.mutex.acquire(blocking=False))
def test_recursive(self):
self.assertTrue(self.mutex.acquire(blocking=False))
self.assertTrue(self.mutex.acquire(blocking=False))
def try_acquire_lock():
return self.mutex.acquire(blocking=False)
self.assertFalse(eventlet.spawn(try_acquire_lock).wait())
self.mutex.release()
self.assertFalse(eventlet.spawn(try_acquire_lock).wait())
self.mutex.release()
self.assertTrue(eventlet.spawn(try_acquire_lock).wait())
def test_release_without_acquire(self):
self.assertRaises(RuntimeError, self.mutex.release)
def test_too_many_releases(self):
self.mutex.acquire()
self.mutex.release()
self.assertRaises(RuntimeError, self.mutex.release)
def test_wrong_releaser(self):
self.mutex.acquire()
with quiet_eventlet_exceptions():
self.assertRaises(RuntimeError,
eventlet.spawn(self.mutex.release).wait)
def test_blocking(self):
evt = eventlet.event.Event()
sequence = []
def coro1():
eventlet.sleep(0) # let coro2 go
self.mutex.acquire()
sequence.append('coro1 acquire')
evt.send('go')
self.mutex.release()
sequence.append('coro1 release')
def coro2():
evt.wait() # wait for coro1 to start us
self.mutex.acquire()
sequence.append('coro2 acquire')
self.mutex.release()
sequence.append('coro2 release')
c1 = eventlet.spawn(coro1)
c2 = eventlet.spawn(coro2)
c1.wait()
c2.wait()
self.assertEqual(sequence, [
'coro1 acquire',
'coro1 release',
'coro2 acquire',
'coro2 release'])
def test_blocking_tpool(self):
# Note: this test's success isn't a guarantee that the mutex is
# working. However, this test's failure means that the mutex is
# definitely broken.
sequence = []
def do_stuff():
n = 10
while n > 0:
self.mutex.acquire()
sequence.append("<")
eventlet.sleep(0.0001)
sequence.append(">")
self.mutex.release()
n -= 1
greenthread1 = eventlet.spawn(do_stuff)
greenthread2 = eventlet.spawn(do_stuff)
real_thread1 = eventlet.patcher.original('threading').Thread(
target=do_stuff)
real_thread1.start()
real_thread2 = eventlet.patcher.original('threading').Thread(
target=do_stuff)
real_thread2.start()
greenthread1.wait()
greenthread2.wait()
real_thread1.join()
real_thread2.join()
self.assertEqual(''.join(sequence), "<>" * 40)
def test_blocking_preserves_ownership(self):
pthread1_event = eventlet.patcher.original('threading').Event()
pthread2_event1 = eventlet.patcher.original('threading').Event()
pthread2_event2 = eventlet.patcher.original('threading').Event()
thread_id = []
owner = []
def pthread1():
thread_id.append(id(eventlet.greenthread.getcurrent()))
self.mutex.acquire()
owner.append(self.mutex.owner)
pthread2_event1.set()
orig_os_write = pipe_mutex.os.write
def patched_os_write(*a, **kw):
try:
return orig_os_write(*a, **kw)
finally:
pthread1_event.wait()
with mock.patch.object(pipe_mutex.os, 'write', patched_os_write):
self.mutex.release()
pthread2_event2.set()
def pthread2():
pthread2_event1.wait() # ensure pthread1 acquires lock first
thread_id.append(id(eventlet.greenthread.getcurrent()))
self.mutex.acquire()
pthread1_event.set()
pthread2_event2.wait()
owner.append(self.mutex.owner)
self.mutex.release()
real_thread1 = eventlet.patcher.original('threading').Thread(
target=pthread1)
real_thread1.start()
real_thread2 = eventlet.patcher.original('threading').Thread(
target=pthread2)
real_thread2.start()
real_thread1.join()
real_thread2.join()
self.assertEqual(thread_id, owner)
self.assertIsNone(self.mutex.owner)
@classmethod
def tearDownClass(cls):
# PipeMutex turns this off when you instantiate one
eventlet.debug.hub_prevent_multiple_readers(True)

View File

@ -0,0 +1,6 @@
---
fixes:
- |
`Bug #1983863
<https://bugs.launchpad.net/oslo.log/+bug/1983863>`_: Fixed logging in
eventlet native threads.

View File

@ -16,3 +16,5 @@ bandit>=1.6.0,<1.7.0 # Apache-2.0
fixtures>=3.0.0 # Apache-2.0/BSD
pre-commit>=2.6.0 # MIT
eventlet>=0.30.1,!=0.32.0 # MIT