Rework joining mechanism

Current joining mechanism in some cases can be expensive because it uses a multi-step recursive search, which leads to a huge amount of db requests. This work changes this behavior by precaching required task executions to prevent hammering the database during the lookup. Change-Id: I2d1b7e72c728a14c85b015dfdb0f8800b95f3749
2019-04-17 16:00:20 +02:00 · 2019-04-17 16:00:20 +02:00 · ff00c9c778
commit ff00c9c778
parent e51730b511
3 changed files with 91 additions and 16 deletions
--- a/mistral/lang/v2/tasks.py
+++ b/mistral/lang/v2/tasks.py
@ -350,6 +350,18 @@ class DirectWorkflowTaskSpec(TaskSpec):
    def get_on_error(self):
        return self._on_error
    def is_conditional_transition(self, state):
        data = self._data.get(state) or self._data.get('on-complete')
        if not data:
            return False
        for item in data:
            if type(item) is dict:
                return True
        return False
 class ReverseWorkflowTaskSpec(TaskSpec):
    _polymorphic_value = 'reverse'
--- a/mistral/workflow/data_flow.py
+++ b/mistral/workflow/data_flow.py
@ -236,7 +236,6 @@ def evaluate_task_outbound_context(task_ex):
    :param task_ex: DB task.
    :return: Outbound task Data Flow context.
    """
    # NOTE(rakhmerov): 'task_ex.in_context' has the SQLAlchemy specific
    # type MutableDict. So we need to create a shallow copy using dict(...)
    # initializer and use it. It's enough to be safe in order to manipulate
@ -248,10 +247,10 @@ def evaluate_task_outbound_context(task_ex):
    # footprint and reduces performance.
    in_context = (
        dict(task_ex.in_context)
-        if task_ex.in_context is not None else {}
+        if getattr(task_ex, 'in_context', None) is not None else {}
    )
-    return utils.update_dict(in_context, task_ex.published)
+    return utils.update_dict(in_context, getattr(task_ex, 'published', {}))
 def evaluate_workflow_output(wf_ex, wf_output, ctx):
--- a/mistral/workflow/direct_workflow.py
+++ b/mistral/workflow/direct_workflow.py
@ -15,6 +15,7 @@
 from oslo_log import log as logging
 from osprofiler import profiler
 from mistral.db.v2 import api as db_api
 from mistral import exceptions as exc
 from mistral import expressions as expr
 from mistral import utils
@ -64,7 +65,8 @@ class DirectWorkflowController(base.WorkflowController):
        induced_state, _, _ = self._get_induced_join_state(
            self.wf_spec.get_tasks()[t_ex_candidate.name],
            self._find_task_execution_by_name(t_ex_candidate.name),
-            t_spec
+            t_spec,
            {}
        )
        return induced_state == states.RUNNING
@ -353,9 +355,6 @@ class DirectWorkflowController(base.WorkflowController):
        # TODO(rakhmerov): We need to use task_ex instead of task_spec
        # in order to cover a use case when there's more than one instance
        # of the same 'join' task in a workflow.
        # TODO(rakhmerov): In some cases this method will be expensive because
        # it uses a multi-step recursive search. We need to optimize it moving
        # forward (e.g. with Workflow Execution Graph).
        join_expr = task_spec.get_join()
@ -364,13 +363,29 @@ class DirectWorkflowController(base.WorkflowController):
        if not in_task_specs:
            return base.TaskLogicalState(states.RUNNING)
        names = self._find_all_parent_task_names(task_spec)
        t_execs_cache = {
            t_ex.name: t_ex for t_ex in db_api.get_task_executions(
                fields=('id', 'name', 'state'),
                sort_keys=[],
                workflow_execution_id=self.wf_ex.id,
                name={'in': names}
            )
        } if names else {}  # don't perform a db request if 'names' are empty
        # List of tuples (task_name, task_ex, state, depth, event_name).
        induced_states = []
        for t_s in in_task_specs:
-            t_ex = self._find_task_execution_by_name(t_s.get_name())
+            t_ex = t_execs_cache.get(t_s.get_name())
-            tup = self._get_induced_join_state(t_s, t_ex, task_spec)
+            tup = self._get_induced_join_state(
                t_s,
                t_ex,
                task_spec,
                t_execs_cache
            )
            induced_states.append(
                (
@ -470,11 +485,14 @@ class DirectWorkflowController(base.WorkflowController):
    # we may have multiple task executions for a task. It should
    # accept inbound task execution rather than a spec.
    def _get_induced_join_state(self, in_task_spec, in_task_ex,
-                                join_task_spec):
+                                join_task_spec, t_execs_cache):
        join_task_name = join_task_spec.get_name()
        if not in_task_ex:
-            possible, depth = self._possible_route(in_task_spec)
+            possible, depth = self._possible_route(
                in_task_spec,
                t_execs_cache
            )
            if possible:
                return states.WAITING, depth, None
@ -484,6 +502,11 @@ class DirectWorkflowController(base.WorkflowController):
        if not states.is_completed(in_task_ex.state):
            return states.WAITING, 1, None
        if self._is_conditional_transition(in_task_ex, in_task_spec) and \
                not hasattr(in_task_ex, "in_context"):
            in_task_ex = db_api.get_task_execution(in_task_ex.id)
            t_execs_cache[in_task_ex.name] = in_task_ex
        # [(task name, params, event name), ...]
        next_tasks_tuples = self._find_next_tasks(in_task_ex)
@ -506,25 +529,66 @@ class DirectWorkflowController(base.WorkflowController):
        # TODO(rakhmerov): Temporary hack. See the previous comment.
        return t_execs[-1] if t_execs else None
-    def _possible_route(self, task_spec, depth=1):
+    def _possible_route(self, task_spec, t_execs_cache, depth=1):
        in_task_specs = self.wf_spec.find_inbound_task_specs(task_spec)
        if not in_task_specs:
            return True, depth
        for t_s in in_task_specs:
-            t_ex = self._find_task_execution_by_name(t_s.get_name())
+            t_ex = t_execs_cache.get(t_s.get_name())
            if not t_ex:
-                possible, depth = self._possible_route(t_s, depth + 1)
+                possible, depth = self._possible_route(
                    t_s,
                    t_execs_cache,
                    depth + 1
                )
                if possible:
                    return True, depth
            else:
                t_name = task_spec.get_name()
-                if (not states.is_completed(t_ex.state) or
+                if not states.is_completed(t_ex.state):
-                        t_name in self._find_next_task_names(t_ex)):
+                    return True, depth
                # By default we don't download task context from the database,
                # but just basic fields: 'id', 'name' and 'state'. It's a good
                # optimization, because contexts can be too heavy and we don't
                # need them most of the time.
                # But sometimes we need it for conditional transitions (when
                # the decision where to go is based on the current context),
                # and if this is the case, we download full task execution
                # and then evaluate its context to find the route.
                # TODO(mfedosin): Think of a way to avoid this.
                if self._is_conditional_transition(t_ex, task_spec) and \
                        not hasattr(t_ex, "in_context"):
                    t_ex = db_api.get_task_execution(t_ex.id)
                    t_execs_cache[t_ex.name] = t_ex
                if t_name in self._find_next_task_names(t_ex):
                    return True, depth
        return False, depth
    def _find_all_parent_task_names(self, task_spec):
        all_parent_names = set()
        inbound_specs = self.wf_spec.find_inbound_task_specs(task_spec)[:]
        while inbound_specs:
            spec = inbound_specs.pop()
            all_parent_names.add(spec.get_name())
            inbound_specs += self.wf_spec.find_inbound_task_specs(spec)
        return all_parent_names
    @staticmethod
    def _is_conditional_transition(t_ex, t_spec):
        if t_ex.state == states.SUCCESS:
            return t_spec.is_conditional_transition('on-success')
        elif t_ex.state == states.ERROR:
            return t_spec.is_conditional_transition('on-error')
        return False