spiff-arena/SpiffWorkflow/bpmn/specs/MultiInstanceTask.py
burnettk 7ac4b51c6a Squashed 'SpiffWorkflow/' changes from a6392d1906..8d820dce1f
8d820dce1f Track spiff step details more granularly (#17)
426da26d8f Clear the remaining __init__.py imports in SpiffWorkflow (#14)
9a1d1c484a Fix FutureWarning in SpiffWorkflow (#16)

git-subtree-dir: SpiffWorkflow
git-subtree-split: 8d820dce1f439bb76bc07e39629832d998d6f634
2022-11-04 09:33:42 -04:00

525 lines
21 KiB
Python

# -*- coding: utf-8 -*-
# Copyright (C) 2020 Sartography
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA
import copy
from builtins import range
from uuid import uuid4
import re
from SpiffWorkflow.bpmn.exceptions import WorkflowTaskExecException
from .SubWorkflowTask import SubWorkflowTask, CallActivity
from .ParallelGateway import ParallelGateway
from .ScriptTask import ScriptTask
from .ExclusiveGateway import ExclusiveGateway
from ...dmn.specs.BusinessRuleTask import BusinessRuleTask
from ...operators import valueof, is_number
from ...specs.SubWorkflow import SubWorkflow
from ...specs.base import TaskSpec
from ...util.impl import get_class
from ...task import Task, TaskState
from ...util.deep_merge import DeepMerge
def gendict(path, d):
if len(path) == 0:
return d
else:
return gendict(path[:-1], {path[-1]: d})
class MultiInstanceTask(TaskSpec):
"""
When executed, this task performs a split on the current task.
The number of outgoing tasks depends on the runtime value of a
specified data field.
If more than one input is connected, the task performs an implicit
multi merge.
This task has one or more inputs and may have any number of outputs.
"""
def __init__(self, wf_spec, name, times, **kwargs):
"""
Constructor.
:type wf_spec: WorkflowSpec
:param wf_spec: A reference to the workflow specification.
:type name: str
:param name: The name of the task spec.
:type times: int or :class:`SpiffWorkflow.operators.Term`
:param times: The number of tasks to create.
:type kwargs: dict
:param kwargs: See :class:`SpiffWorkflow.specs.TaskSpec`.
"""
if times is None:
raise ValueError('times argument is required')
self.times = times
# We don't really pass these things in (we should), but putting them here to document that they exist
self.loopTask = kwargs.get('loopTask', False)
self.isSequential = kwargs.get('isSequential', False)
self.expanded = kwargs.get('expanded', 1)
self.elementVar = kwargs.get('element_var')
self.collection = kwargs.get('collection')
self.multiInstance = True
TaskSpec.__init__(self, wf_spec, name, **kwargs)
# DO NOT OVERRIDE THE SPEC TYPE.
# @property
# def spec_type(self):
# return 'MultiInstance Task'
def _find_my_task(self, task):
for thetask in task.workflow.task_tree:
if thetask.thread_id != task.thread_id:
continue
if thetask.task_spec == self:
return thetask
return None
def _on_trigger(self, task_spec):
"""
May be called after execute() was already completed to create an
additional outbound task.
"""
# Find a Task for this TaksSpec.
my_task = self._find_my_task(task_spec)
if my_task._has_state(TaskState.COMPLETED):
state = TaskState.READY
else:
state = TaskState.FUTURE
for output in self.outputs:
new_task = my_task._add_child(output, state)
new_task.triggered = True
output._predict(new_task)
def _check_inputs(self, my_task):
if self.collection is None:
return
# look for variable in context, if we don't find it, default to 1
variable = valueof(my_task, self.times, 1)
if self.times.name == self.collection.name and type(variable) == type([]):
raise WorkflowTaskExecException(my_task,
'If we are updating a collection,'
' then the collection must be a '
'dictionary.')
def _get_loop_completion(self,my_task):
if not self.completioncondition == None:
terminate = my_task.workflow.script_engine.evaluate(my_task,self.completioncondition)
if terminate:
my_task.terminate_current_loop = True
return terminate
return False
def _get_count(self, my_task):
"""
self.times has the text entered in the BPMN model.
It could be just a number - in this case return the number
it could be a variable name - so we get the variable value from my_task
the variable could be a number (text representation??) - in this case return the integer value of the number
it could be a list of records - in this case return the cardinality of the list
it could be a dict with a bunch of keys - it this case return the cardinality of the keys
"""
if is_number(self.times.name):
return int(self.times.name)
variable = valueof(my_task, self.times, 1) # look for variable in context, if we don't find it, default to 1
if is_number(variable):
return int(variable)
if isinstance(variable,list):
return len(variable)
if isinstance(variable,dict):
return len(variable.keys())
return 1 # we shouldn't ever get here, but just in case return a sane value.
def _get_current_var(self, my_task, pos):
variable = valueof(my_task, self.times, 1)
if is_number(variable):
return pos
if isinstance(variable,list) and len(variable) >= pos:
return variable[pos - 1]
elif isinstance(variable,dict) and len(list(variable.keys())) >= pos:
return variable[list(variable.keys())[pos - 1]]
else:
return pos
def _get_predicted_outputs(self, my_task):
split_n = self._get_count(my_task)
# Predict the outputs.
outputs = []
for i in range(split_n):
outputs += self.outputs
return outputs
def _build_gateway_name(self,position):
"""
Build a unique name for each task - need to be the
same over save/restore of the workflow spec.
"""
return 'Gateway_for_' + str(self.name) + "_" + position
def _make_new_gateway(self,my_task,suffix,descr):
gw_spec = ParallelGateway(self._wf_spec,
self._build_gateway_name(suffix),
triggered=False,
description=descr)
gw = Task(my_task.workflow, task_spec=gw_spec)
return gw_spec,gw
def _add_gateway(self, my_task):
""" Generate parallel gateway tasks on either side of the current task.
This emulates a standard BPMN pattern of having parallel tasks between
two parallel gateways.
Once we have set up the gateways, we write a note into our internal data so that
we don't do it again.
"""
# Expand this
# A-> ME -> C
# into this
# A -> GW_start -> ME -> GW_end -> C
# where GW is a parallel gateway
# check to see if we have already done this, this code gets called multiple times
# as we build the tree
if my_task.parent.task_spec.name[:11] == 'Gateway_for':
return
# build the gateway specs and the tasks.
# Spiff wants a distinct spec for each task
# that it has in the workflow or it will throw an error
start_gw_spec, start_gw = self._make_new_gateway(my_task,'start','Begin Gateway')
end_gw_spec, end_gw = self._make_new_gateway(my_task,'end','End Gateway')
# Set up the parent task and insert it into the workflow
# remove the current task spec from the parent, it will be replaced with the new construct.
my_task.parent.task_spec.outputs = [x for x in my_task.parent.task_spec.outputs if x != my_task.task_spec]
# in the case that our parent is a gateway with a default route,
# we need to ensure that the default route is empty
# so that connect can set it up properly
if hasattr(my_task.parent.task_spec,'default_task_spec') and \
my_task.parent.task_spec.default_task_spec == my_task.task_spec.name:
my_task.parent.task_spec.default_task_spec = None
my_task.parent.task_spec.connect(start_gw_spec)
elif isinstance(my_task.parent.task_spec, ExclusiveGateway):
for cond, name in [ (cond, name) for cond, name in my_task.parent.task_spec.cond_task_specs\
if name == my_task.task_spec.name]:
my_task.parent.task_spec.cond_task_specs.remove((cond, name))
my_task.parent.task_spec.cond_task_specs.append((cond, start_gw_spec.name))
start_gw_spec.inputs.append(my_task.parent.task_spec)
else:
my_task.parent.task_spec.outputs.append(start_gw_spec)
start_gw_spec.inputs.append(my_task.parent.task_spec)
# get a list of all siblings and replace myself with the new gateway task
# in the parent task
newchildren = []
for child in my_task.parent.children:
if child == my_task:
newchildren.append(start_gw)
else:
newchildren.append(child)
my_task.parent.children = newchildren
# update the gatways parent to be my parent
start_gw.parent = my_task.parent
# update my parent to be the gateway
my_task.parent = start_gw
start_gw_spec.connect(self)
start_gw.children = [my_task]
# transfer my outputs to the ending gateway and set up the
# child parent links
end_gw_spec.outputs = self.outputs.copy()
self.connect(end_gw_spec)
self.outputs = [end_gw_spec]
end_gw.parent = my_task
my_task.children = [end_gw]
def multiinstance_info(self, my_task):
split_n = self._get_count(my_task)
runtimes = int(my_task._get_internal_data('runtimes', 1)) # set a default if not already run
loop = False
parallel = False
sequential = False
if my_task.task_spec.loopTask:
loop = True
elif my_task.task_spec.isSequential:
sequential = True
else:
parallel = True
return {'is_looping': loop,
'is_sequential_mi': sequential,
'is_parallel_mi': parallel,
'mi_count': split_n,
'mi_index': runtimes}
def _make_new_child_task(self,my_task,x):
# here we generate a distinct copy of our original task each
# parallel instance, and hook them up into the task tree
new_child = copy.copy(my_task)
new_child.id = uuid4()
# I think we will need to update both every variables
# internal data and the copy of the public data to get the
# variables correct
new_child.internal_data = copy.deepcopy(my_task.internal_data)
new_child.internal_data[
'runtimes'] = x + 2 # working with base 1 and we already have one done
new_child.data = copy.deepcopy(my_task.data)
new_child.data[self.elementVar] = self._get_current_var(my_task,
x + 2)
new_child.children = [] # these will be updated later
# in the case of parallel, the children list will get updated during the predict loop
return new_child
def _expand_sequential(self,my_task,split_n):
# this should be only for SMI and not looping tasks -
# we need to patch up the children and make sure they chain correctly
# this is different from PMI because the children all link together, not to
# the gateways on both ends.
# first let's check for a task in the task spec tree
# we have to jump through some hoops to determine if we have already
# expanded this properly as we may have a cardinality that may change
# and this code gets run a bunch of times.
expanded = getattr(self, 'expanded', 1)
if split_n >= expanded:
setattr(self, 'expanded', split_n)
if not (expanded == split_n):
# Initialize based on current task
my_task_copy = copy.copy(my_task)
current_task = my_task
current_task_spec = self
proto_task_spec = copy.copy(self)
# Essentially we are expanding like this:
# A -> B0 -> C
# A -> B0 -> B1 -> B2 -> C
# each new child has the last child we created as its parent
# and the outputs of what B0 had previously.
# this has to be done for both the task and the task spec.
for x in range(split_n - expanded):
# create Bx from Bx-1
new_child = self._make_new_child_task(my_task,x)
# set children of Bx = children of B0
new_child.children = copy.copy(my_task_copy.children)
# all of C's parents should be Bx
for child in new_child.children:
child.parent = new_child
# create a new task spec for this new task and update it
new_task_spec = self._make_new_task_spec(proto_task_spec, my_task, x)
new_child.task_spec = new_task_spec
new_child._set_state(TaskState.MAYBE)
# update task spec inputs and outputs like we did for the task
current_task_spec.outputs = [new_task_spec]
new_task_spec.inputs = [current_task_spec]
current_task.children = [new_child]
# update the parent of the new task
new_child.parent = current_task
# set up variables for next pass.
current_task = new_child
current_task_spec = new_task_spec
def _expand_parallel(self,my_task,split_n):
# add a parallel gateway on either side of this task
self._add_gateway(my_task)
# we use the child count of the parallel gateway to determine
# if we have expanded this or not. Children of the gateway we just created
# should match the split level provided by the multiinstance
for x in range(split_n - len(my_task.parent.children)):
new_child = self._make_new_child_task(my_task,x)
new_task_spec = self._make_new_task_spec(my_task.task_spec, my_task, x)
new_child.task_spec = new_task_spec
# patch up the right hand side gateway
self.outputs[0].inputs.append(new_task_spec)
# patch up the left hand side gateway task and task_spec
my_task.parent.children.append(new_child)
my_task.parent.task_spec.outputs.append(new_task_spec)
def _make_new_task_spec(self,proto_task_spec,my_task,suffix):
new_task_spec = copy.copy(proto_task_spec)
new_task_spec.name = new_task_spec.name + "_%d" % suffix
new_task_spec.id = str(new_task_spec.id) + "_%d" % suffix
my_task.workflow.spec.task_specs[new_task_spec.name] = new_task_spec # add to registry
return new_task_spec
def _predict_hook(self, my_task):
split_n = self._get_count(my_task)
runtimes = int(my_task._get_internal_data('runtimes', 1)) # set a default if not already run
my_task._set_internal_data(splits=split_n, runtimes=runtimes)
if not self.elementVar:
self.elementVar = my_task.task_spec.name + "_CurrentVar"
my_task.data[self.elementVar] = copy.copy(self._get_current_var(my_task, runtimes))
# Create the outgoing tasks.
outputs = []
# In the special case that this is a Parallel multiInstance, we need
# to expand the children in the middle. This method gets called
# during every pass through the tree, so we need to wait until our
# real cardinality gets updated to expand the tree.
if (not self.isSequential):
self._expand_parallel(my_task,split_n)
elif not self.loopTask:
self._expand_sequential(my_task,split_n)
outputs += self.outputs
if my_task._is_definite():
my_task._sync_children(outputs, TaskState.FUTURE)
else:
my_task._sync_children(outputs, TaskState.LIKELY)
def _handle_special_cases(self, my_task):
classes = [BusinessRuleTask, ScriptTask, SubWorkflowTask, SubWorkflow, CallActivity]
classes = {x.__module__ + "." + x.__name__: x for x in classes}
terminate = self._get_loop_completion(my_task)
if my_task.task_spec.prevtaskclass in classes.keys() and not terminate:
super()._on_complete_hook(my_task)
def _merge_element_variable(self,my_task,collect,runtimes,colvarname):
# if we are updating the same collection as was our loopcardinality
# then all the keys should be there and we can use the sorted keylist
# if not, we use an integer - we should be guaranteed that the
# collection is a dictionary
if self.collection is not None and self.times.name == self.collection.name:
keys = list(collect.keys())
if len(keys) < runtimes:
msg = f"There is a mismatch between runtimes and the number " \
f"items in the collection, please check for empty " \
f"collection {self.collection.name}."
raise WorkflowTaskExecException(my_task, msg)
runtimesvar = keys[runtimes - 1]
else:
runtimesvar = runtimes
if self.elementVar in my_task.data and isinstance(my_task.data[self.elementVar], dict):
collect[str(runtimesvar)] = DeepMerge.merge(collect.get(runtimesvar, {}),
copy.copy(my_task.data[self.elementVar]))
my_task.data = DeepMerge.merge(my_task.data,
gendict(colvarname.split('/'), collect))
def _update_sibling_data(self,my_task,runtimes,runcount,colvarname,collect):
if (runtimes < runcount) and not my_task.terminate_current_loop and self.loopTask:
my_task._set_state(TaskState.READY)
my_task._set_internal_data(runtimes=runtimes + 1)
my_task.data[self.elementVar] = self._get_current_var(my_task, runtimes + 1)
element_var_data = None
else:
# The element var data should not be passed on to children
# but will add this back onto this task later.
element_var_data = my_task.data.pop(self.elementVar, None)
# if this is a parallel mi - then update all siblings with the
# current data
if not self.isSequential:
for task in my_task.parent.children:
task.data = DeepMerge.merge(
task.data,
gendict(colvarname.split('/'),
collect)
)
return element_var_data
def _on_complete_hook(self, my_task):
# do special stuff for non-user tasks
self._handle_special_cases(my_task)
self.__iteration_complete(my_task)
def __iteration_complete(self, my_task):
# this is all about updating the collection for a MI
self._check_inputs(my_task)
# initialize
runcount = self._get_count(my_task)
runtimes = int(my_task._get_internal_data('runtimes', 1))
if self.collection is not None:
colvarname = self.collection.name
else:
colvarname = my_task.task_spec.name
collect = valueof(my_task, self.collection, {})
self._merge_element_variable(my_task,collect,runtimes,colvarname)
element_var_data = self._update_sibling_data(my_task,runtimes,runcount,colvarname,collect)
# please see MultiInstance code for previous version
outputs = []
outputs += self.outputs
if not isinstance(my_task.task_spec,SubWorkflowTask):
my_task._sync_children(outputs, TaskState.FUTURE)
for child in my_task.children:
child.task_spec._update(child)
# If removed, add the element_var_data back onto this task, after
# updating the children.
if(element_var_data):
my_task.data[self.elementVar] = element_var_data
def serialize(self, serializer):
return serializer.serialize_multi_instance(self)
@classmethod
def deserialize(self, serializer, wf_spec, s_state):
prevclass = get_class(s_state['prevtaskclass'])
spec = getDynamicMIClass(s_state['name'], prevclass)(wf_spec,s_state['name'],s_state['times'])
spec.prevtaskclass = s_state['prevtaskclass']
return serializer.deserialize_multi_instance(wf_spec, s_state, spec)
def getDynamicMIClass(id,prevclass):
id = re.sub('(.+)_[0-9]$','\\1',id)
return type(id + '_class', (
MultiInstanceTask, prevclass), {})