Route conductor notification RPC to the same conductor

RPC continue_node_{deploy,clean} are called from a conductor handling
the node, so they don't have to go through the hash ring. This avoids
situation when take over happens in the middle of a deploy/clean step
processing, breaking it.

Eventually, we should stop using RPC for that at all, but that will be
a much more invasive change.

Story: #2008200
Task: #40984
Change-Id: I76293f8ec30d5957b99bdbce5b70e87e8378d135
This commit is contained in:
Dmitry Tantsur 2020-09-25 17:34:01 +02:00
parent 484dcd5b60
commit bc628ac6ef
4 changed files with 15 additions and 6 deletions

View File

@ -202,6 +202,10 @@ class ConductorAPI(object):
host = random.choice(list(ring.nodes)) host = random.choice(list(ring.nodes))
return self.topic + "." + host return self.topic + "." + host
def get_current_topic(self):
"""Get RPC topic name for the current conductor."""
return self.topic + "." + CONF.host
def can_send_create_port(self): def can_send_create_port(self):
"""Return whether the RPCAPI supports the create_port method.""" """Return whether the RPCAPI supports the create_port method."""
return self.client.can_send_version("1.41") return self.client.can_send_version("1.41")

View File

@ -828,7 +828,7 @@ def notify_conductor_resume_operation(task, operation):
from ironic.conductor import rpcapi from ironic.conductor import rpcapi
uuid = task.node.uuid uuid = task.node.uuid
rpc = rpcapi.ConductorAPI() rpc = rpcapi.ConductorAPI()
topic = rpc.get_topic_for(task.node) topic = rpc.get_current_topic()
# Need to release the lock to let the conductor take it # Need to release the lock to let the conductor take it
task.release_resources() task.release_resources()
getattr(rpc, method)(task.context, uuid, topic=topic) getattr(rpc, method)(task.context, uuid, topic=topic)

View File

@ -1728,15 +1728,14 @@ class MiscTestCase(db_base.DbTestCase):
@mock.patch.object(rpcapi.ConductorAPI, 'continue_node_deploy', @mock.patch.object(rpcapi.ConductorAPI, 'continue_node_deploy',
autospec=True) autospec=True)
@mock.patch.object(rpcapi.ConductorAPI, 'get_topic_for', autospec=True) def test_notify_conductor_resume_operation(self, mock_rpc_call):
def test_notify_conductor_resume_operation(self, mock_topic, self.config(host='fake-host')
mock_rpc_call):
mock_topic.return_value = 'topic'
with task_manager.acquire( with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task: self.context, self.node.uuid, shared=False) as task:
conductor_utils.notify_conductor_resume_operation(task, 'deploy') conductor_utils.notify_conductor_resume_operation(task, 'deploy')
mock_rpc_call.assert_called_once_with( mock_rpc_call.assert_called_once_with(
mock.ANY, task.context, self.node.uuid, topic='topic') mock.ANY, task.context, self.node.uuid,
topic='ironic.conductor_manager.fake-host')
@mock.patch.object(conductor_utils, 'notify_conductor_resume_operation', @mock.patch.object(conductor_utils, 'notify_conductor_resume_operation',
autospec=True) autospec=True)

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Prevents a take over from happening in the middle of a deploy step
processing. This could happen if the RPC call ``continue_node_deploy``
is routed to a different conductor.