Merge "[OVN] Improve Hash Ring logs"
This commit is contained in:
@@ -33,6 +33,7 @@ class StandardAttributeIDNotFound(n_exc.NeutronException):
|
|||||||
|
|
||||||
|
|
||||||
class HashRingIsEmpty(n_exc.NeutronException):
|
class HashRingIsEmpty(n_exc.NeutronException):
|
||||||
message = _('Hash Ring returned empty when hashing "%(key)s". '
|
message = _('Hash Ring returned empty when hashing "%(key)s". All '
|
||||||
'This should never happen in a normal situation, please '
|
'%(node_count)d nodes were found offline. This should never '
|
||||||
'check the status of your cluster')
|
'happen in a normal situation, please check the status '
|
||||||
|
'of your cluster')
|
||||||
|
@@ -38,6 +38,7 @@ class HashRingManager(object):
|
|||||||
# Flag to rate limit the caching log
|
# Flag to rate limit the caching log
|
||||||
self._prev_num_nodes = -1
|
self._prev_num_nodes = -1
|
||||||
self.admin_ctx = context.get_admin_context()
|
self.admin_ctx = context.get_admin_context()
|
||||||
|
self._offline_node_count = 0
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _wait_startup_before_caching(self):
|
def _wait_startup_before_caching(self):
|
||||||
@@ -92,6 +93,11 @@ class HashRingManager(object):
|
|||||||
self._hash_ring = hashring.HashRing({node.node_uuid
|
self._hash_ring = hashring.HashRing({node.node_uuid
|
||||||
for node in nodes})
|
for node in nodes})
|
||||||
self._last_time_loaded = timeutils.utcnow()
|
self._last_time_loaded = timeutils.utcnow()
|
||||||
|
self._offline_node_count = db_hash_ring.count_offline_nodes(
|
||||||
|
self.admin_ctx, constants.HASH_RING_NODES_TIMEOUT,
|
||||||
|
self._group)
|
||||||
|
LOG.debug("Hash Ring loaded. %d active nodes. %d offline nodes",
|
||||||
|
len(nodes), self._offline_node_count)
|
||||||
|
|
||||||
def refresh(self):
|
def refresh(self):
|
||||||
self._load_hash_ring(refresh=True)
|
self._load_hash_ring(refresh=True)
|
||||||
@@ -108,4 +114,5 @@ class HashRingManager(object):
|
|||||||
# KeyError is raised
|
# KeyError is raised
|
||||||
return self._hash_ring[key].pop()
|
return self._hash_ring[key].pop()
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise exceptions.HashRingIsEmpty(key=key)
|
raise exceptions.HashRingIsEmpty(
|
||||||
|
key=key, node_count=self._offline_node_count)
|
||||||
|
@@ -17,12 +17,14 @@ import datetime
|
|||||||
|
|
||||||
from neutron_lib.db import api as db_api
|
from neutron_lib.db import api as db_api
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
|
from oslo_log import log
|
||||||
from oslo_utils import timeutils
|
from oslo_utils import timeutils
|
||||||
from oslo_utils import uuidutils
|
from oslo_utils import uuidutils
|
||||||
|
|
||||||
from neutron.db.models import ovn as ovn_models
|
from neutron.db.models import ovn as ovn_models
|
||||||
|
|
||||||
CONF = cfg.CONF
|
CONF = cfg.CONF
|
||||||
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# NOTE(ralonsoh): this was migrated from networking-ovn to neutron and should
|
# NOTE(ralonsoh): this was migrated from networking-ovn to neutron and should
|
||||||
@@ -34,6 +36,8 @@ def add_node(context, group_name, node_uuid=None):
|
|||||||
with db_api.CONTEXT_WRITER.using(context):
|
with db_api.CONTEXT_WRITER.using(context):
|
||||||
context.session.add(ovn_models.OVNHashRing(
|
context.session.add(ovn_models.OVNHashRing(
|
||||||
node_uuid=node_uuid, hostname=CONF.host, group_name=group_name))
|
node_uuid=node_uuid, hostname=CONF.host, group_name=group_name))
|
||||||
|
LOG.info('Node %s from host "%s" and group "%s" added to the Hash Ring',
|
||||||
|
node_uuid, CONF.host, group_name)
|
||||||
return node_uuid
|
return node_uuid
|
||||||
|
|
||||||
|
|
||||||
@@ -42,6 +46,8 @@ def remove_nodes_from_host(context, group_name):
|
|||||||
context.session.query(ovn_models.OVNHashRing).filter(
|
context.session.query(ovn_models.OVNHashRing).filter(
|
||||||
ovn_models.OVNHashRing.hostname == CONF.host,
|
ovn_models.OVNHashRing.hostname == CONF.host,
|
||||||
ovn_models.OVNHashRing.group_name == group_name).delete()
|
ovn_models.OVNHashRing.group_name == group_name).delete()
|
||||||
|
LOG.info('Nodes from host "%s" and group "%s" removed from the Hash Ring',
|
||||||
|
CONF.host, group_name)
|
||||||
|
|
||||||
|
|
||||||
def _touch(context, **filter_args):
|
def _touch(context, **filter_args):
|
||||||
@@ -58,12 +64,30 @@ def touch_node(context, node_uuid):
|
|||||||
_touch(context, node_uuid=node_uuid)
|
_touch(context, node_uuid=node_uuid)
|
||||||
|
|
||||||
|
|
||||||
def get_active_nodes(context, interval, group_name, from_host=False):
|
def _get_nodes_query(context, interval, group_name, offline=False,
|
||||||
|
from_host=False):
|
||||||
limit = timeutils.utcnow() - datetime.timedelta(seconds=interval)
|
limit = timeutils.utcnow() - datetime.timedelta(seconds=interval)
|
||||||
with db_api.CONTEXT_READER.using(context):
|
with db_api.CONTEXT_READER.using(context):
|
||||||
query = context.session.query(ovn_models.OVNHashRing).filter(
|
query = context.session.query(ovn_models.OVNHashRing).filter(
|
||||||
ovn_models.OVNHashRing.updated_at >= limit,
|
|
||||||
ovn_models.OVNHashRing.group_name == group_name)
|
ovn_models.OVNHashRing.group_name == group_name)
|
||||||
|
|
||||||
|
if offline:
|
||||||
|
query = query.filter(ovn_models.OVNHashRing.updated_at < limit)
|
||||||
|
else:
|
||||||
|
query = query.filter(ovn_models.OVNHashRing.updated_at >= limit)
|
||||||
|
|
||||||
if from_host:
|
if from_host:
|
||||||
query = query.filter_by(hostname=CONF.host)
|
query = query.filter_by(hostname=CONF.host)
|
||||||
return query.all()
|
|
||||||
|
return query
|
||||||
|
|
||||||
|
|
||||||
|
def get_active_nodes(context, interval, group_name, from_host=False):
|
||||||
|
query = _get_nodes_query(context, interval, group_name,
|
||||||
|
from_host=from_host)
|
||||||
|
return query.all()
|
||||||
|
|
||||||
|
|
||||||
|
def count_offline_nodes(context, interval, group_name):
|
||||||
|
query = _get_nodes_query(context, interval, group_name, offline=True)
|
||||||
|
return query.count()
|
||||||
|
@@ -242,3 +242,30 @@ class TestHashRing(testlib_api.SqlTestCaseLight):
|
|||||||
for node in group2:
|
for node in group2:
|
||||||
node_db = self._get_node_row(node)
|
node_db = self._get_node_row(node)
|
||||||
self.assertEqual(node_db.created_at, node_db.updated_at)
|
self.assertEqual(node_db.created_at, node_db.updated_at)
|
||||||
|
|
||||||
|
def test_count_offline_nodes(self):
|
||||||
|
self._add_nodes_and_assert_exists(count=3)
|
||||||
|
|
||||||
|
# Assert no nodes are considered offline
|
||||||
|
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
|
||||||
|
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
|
||||||
|
|
||||||
|
# Subtract 60 seconds from utcnow() and touch the nodes to make
|
||||||
|
# them to appear offline
|
||||||
|
fake_utcnow = timeutils.utcnow() - datetime.timedelta(seconds=60)
|
||||||
|
with mock.patch.object(timeutils, 'utcnow') as mock_utcnow:
|
||||||
|
mock_utcnow.return_value = fake_utcnow
|
||||||
|
ovn_hash_ring_db.touch_nodes_from_host(self.admin_ctx,
|
||||||
|
HASH_RING_TEST_GROUP)
|
||||||
|
|
||||||
|
# Now assert that all nodes from our host are seeing as offline
|
||||||
|
self.assertEqual(3, ovn_hash_ring_db.count_offline_nodes(
|
||||||
|
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
|
||||||
|
|
||||||
|
# Touch the nodes again without faking utcnow()
|
||||||
|
ovn_hash_ring_db.touch_nodes_from_host(self.admin_ctx,
|
||||||
|
HASH_RING_TEST_GROUP)
|
||||||
|
|
||||||
|
# Assert no nodes are considered offline
|
||||||
|
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
|
||||||
|
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
|
||||||
|
Reference in New Issue
Block a user