diff --git a/SpiffWorkflow/SpiffWorkflow/serializer/prettyxml.py b/SpiffWorkflow/SpiffWorkflow/serializer/prettyxml.py index 9bd189c5..d861b8ec 100644 --- a/SpiffWorkflow/SpiffWorkflow/serializer/prettyxml.py +++ b/SpiffWorkflow/SpiffWorkflow/serializer/prettyxml.py @@ -16,26 +16,29 @@ # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301 USA -import re -import xml.dom.minidom as minidom from .. import operators from ..specs.Simple import Simple from ..specs.WorkflowSpec import WorkflowSpec -from ..exceptions import StorageException +from ..exceptions import SpiffWorkflowException from .base import Serializer, spec_map, op_map # Create a list of tag names out of the spec names. _spec_map = spec_map() _op_map = op_map() -_exc = StorageException + +class XMLParserExcetion(SpiffWorkflowException): + pass class XmlSerializer(Serializer): + """Parses XML into a WorkflowSpec object.""" - """ - Parses XML into a WorkflowSpec object. - """ + # Note: This is not a serializer. It is a parser for Spiff's XML format + # However, it is too disruptive to rename everything that uses it. + + def raise_parser_exception(self, message): + raise XMLParserExcetion(message) def deserialize_assign(self, workflow, start_node): """ @@ -43,17 +46,18 @@ class XmlSerializer(Serializer): start_node -- the xml node (xml.dom.minidom.Node) """ - name = start_node.getAttribute('name') - attrib = start_node.getAttribute('field') - value = start_node.getAttribute('value') + name = start_node.attrib.get('name') + attrib = start_node.attrib.get('field') + value = start_node.attrib.get('value') + kwargs = {} if name == '': - _exc('name attribute required') - if attrib != '' and value != '': - _exc('Both, field and right-value attributes found') - elif attrib == '' and value == '': - _exc('field or value attribute required') - elif value != '': + self.raise_parser_exception('name attribute required') + if attrib is not None and value is not None: + self.raise_parser_exception('Both, field and right-value attributes found') + elif attrib is None and value is None: + self.raise_parser_exception('field or value attribute required') + elif value is not None: kwargs['right'] = value else: kwargs['right_attribute'] = attrib @@ -65,8 +69,8 @@ class XmlSerializer(Serializer): start_node -- the xml node (xml.dom.minidom.Node) """ - name = start_node.getAttribute('name') - value = start_node.getAttribute('value') + name = start_node.attrib.get('name') + value = start_node.attrib.get('value') return name, value def deserialize_assign_list(self, workflow, start_node): @@ -78,13 +82,13 @@ class XmlSerializer(Serializer): """ # Collect all information. assignments = [] - for node in start_node.childNodes: - if node.nodeType != minidom.Node.ELEMENT_NODE: - continue - if node.nodeName.lower() == 'assign': + for node in start_node.getchildren(): + if not isinstance(start_node.tag, str): + pass + elif node.tag.lower() == 'assign': assignments.append(self.deserialize_assign(workflow, node)) else: - _exc('Unknown node: %s' % node.nodeName) + self.raise_parser_exception('Unknown node: %s' % node.tag) return assignments def deserialize_logical(self, node): @@ -93,26 +97,26 @@ class XmlSerializer(Serializer): node -- the xml node (xml.dom.minidom.Node) """ - term1_attrib = node.getAttribute('left-field') - term1_value = node.getAttribute('left-value') - op = node.nodeName.lower() - term2_attrib = node.getAttribute('right-field') - term2_value = node.getAttribute('right-value') + term1_attrib = node.attrib.get('left-field') + term1_value = node.attrib.get('left-value') + op = node.tag.lower() + term2_attrib = node.attrib.get('right-field') + term2_value = node.attrib.get('right-value') if op not in _op_map: - _exc('Invalid operator') - if term1_attrib != '' and term1_value != '': - _exc('Both, left-field and left-value attributes found') - elif term1_attrib == '' and term1_value == '': - _exc('left-field or left-value attribute required') - elif term1_value != '': + self.raise_parser_exception('Invalid operator') + if term1_attrib is not None and term1_value is not None: + self.raise_parser_exception('Both, left-field and left-value attributes found') + elif term1_attrib is None and term1_value is None: + self.raise_parser_exception('left-field or left-value attribute required') + elif term1_value is not None: left = term1_value else: left = operators.Attrib(term1_attrib) - if term2_attrib != '' and term2_value != '': - _exc('Both, right-field and right-value attributes found') - elif term2_attrib == '' and term2_value == '': - _exc('right-field or right-value attribute required') - elif term2_value != '': + if term2_attrib is not None and term2_value is not None: + self.raise_parser_exception('Both, right-field and right-value attributes found') + elif term2_attrib is None and term2_value is None: + self.raise_parser_exception('right-field or right-value attribute required') + elif term2_value is not None: right = term2_value else: right = operators.Attrib(term2_attrib) @@ -128,26 +132,26 @@ class XmlSerializer(Serializer): # Collect all information. condition = None spec_name = None - for node in start_node.childNodes: - if node.nodeType != minidom.Node.ELEMENT_NODE: - continue - if node.nodeName.lower() == 'successor': + for node in start_node.getchildren(): + if not isinstance(node.tag, str): + pass + elif node.tag.lower() == 'successor': if spec_name is not None: - _exc('Duplicate task name %s' % spec_name) - if node.firstChild is None: - _exc('Successor tag without a task name') - spec_name = node.firstChild.nodeValue - elif node.nodeName.lower() in _op_map: + self.raise_parser_exception('Duplicate task name %s' % spec_name) + if node.text is None: + self.raise_parser_exception('Successor tag without a task name') + spec_name = node.text + elif node.tag.lower() in _op_map: if condition is not None: - _exc('Multiple conditions are not yet supported') + self.raise_parser_exception('Multiple conditions are not yet supported') condition = self.deserialize_logical(node) else: - _exc('Unknown node: %s' % node.nodeName) + self.raise_parser_exception('Unknown node: %s' % node.tag) if condition is None: - _exc('Missing condition in conditional statement') + self.raise_parser_exception('Missing condition in conditional statement') if spec_name is None: - _exc('A %s has no task specified' % start_node.nodeName) + self.raise_parser_exception('A %s has no task specified' % start_node.tag) return condition, spec_name def deserialize_task_spec(self, workflow, start_node, read_specs): @@ -160,31 +164,31 @@ class XmlSerializer(Serializer): start_node -- the xml structure (xml.dom.minidom.Node) """ # Extract attributes from the node. - nodetype = start_node.nodeName.lower() - name = start_node.getAttribute('name').lower() - context = start_node.getAttribute('context').lower() - mutex = start_node.getAttribute('mutex').lower() - cancel = start_node.getAttribute('cancel').lower() - success = start_node.getAttribute('success').lower() - times = start_node.getAttribute('times').lower() - times_field = start_node.getAttribute('times-field').lower() - threshold = start_node.getAttribute('threshold').lower() - threshold_field = start_node.getAttribute('threshold-field').lower() - file_name = start_node.getAttribute('file').lower() - file_field = start_node.getAttribute('file-field').lower() + nodetype = start_node.tag.lower() + name = start_node.attrib.get('name', '').lower() + context = start_node.attrib.get('context', '').lower() + mutex = start_node.attrib.get('mutex', '').lower() + cancel = start_node.attrib.get('cancel', '').lower() + success = start_node.attrib.get('success', '').lower() + times = start_node.attrib.get('times', '').lower() + times_field = start_node.attrib.get('times-field', '').lower() + threshold = start_node.attrib.get('threshold', '').lower() + threshold_field = start_node.attrib.get('threshold-field', '').lower() + file_name = start_node.attrib.get('file', '').lower() + file_field = start_node.attrib.get('file-field', '').lower() kwargs = {'lock': [], 'data': {}, 'defines': {}, 'pre_assign': [], 'post_assign': []} if nodetype not in _spec_map: - _exc('Invalid task type "%s"' % nodetype) + self.raise_parser_exception('Invalid task type "%s"' % nodetype) if nodetype == 'start-task': name = 'start' if name == '': - _exc('Invalid task name "%s"' % name) + self.raise_parser_exception('Invalid task name "%s"' % name) if name in read_specs: - _exc('Duplicate task name "%s"' % name) + self.raise_parser_exception('Duplicate task name "%s"' % name) if cancel != '' and cancel != '0': kwargs['cancel'] = True if success != '' and success != '0': @@ -210,55 +214,55 @@ class XmlSerializer(Serializer): # Walk through the children of the node. successors = [] - for node in start_node.childNodes: - if node.nodeType != minidom.Node.ELEMENT_NODE: - continue - if node.nodeName == 'description': - kwargs['description'] = node.firstChild.nodeValue - elif node.nodeName == 'successor' \ - or node.nodeName == 'default-successor': - if node.firstChild is None: - _exc('Empty %s tag' % node.nodeName) - successors.append((None, node.firstChild.nodeValue)) - elif node.nodeName == 'conditional-successor': + for node in start_node.getchildren(): + if not isinstance(node.tag, str): + pass + elif node.tag == 'description': + kwargs['description'] = node.text + elif node.tag == 'successor' \ + or node.tag == 'default-successor': + if not node.text: + self.raise_parser_exception('Empty %s tag' % node.tag) + successors.append((None, node.text)) + elif node.tag == 'conditional-successor': successors.append(self.deserialize_condition(workflow, node)) - elif node.nodeName == 'define': + elif node.tag == 'define': key, value = self.deserialize_data(workflow, node) kwargs['defines'][key] = value # "property" tag exists for backward compatibility. - elif node.nodeName == 'data' or node.nodeName == 'property': + elif node.tag == 'data' or node.tag == 'property': key, value = self.deserialize_data(workflow, node) kwargs['data'][key] = value - elif node.nodeName == 'pre-assign': + elif node.tag == 'pre-assign': kwargs['pre_assign'].append( self.deserialize_assign(workflow, node)) - elif node.nodeName == 'post-assign': + elif node.tag == 'post-assign': kwargs['post_assign'].append( self.deserialize_assign(workflow, node)) - elif node.nodeName == 'in': + elif node.tag == 'in': kwargs['in_assign'] = self.deserialize_assign_list( workflow, node) - elif node.nodeName == 'out': + elif node.tag == 'out': kwargs['out_assign'] = self.deserialize_assign_list( workflow, node) - elif node.nodeName == 'cancel': - if node.firstChild is None: - _exc('Empty %s tag' % node.nodeName) + elif node.tag == 'cancel': + if not node.text: + self.raise_parser_exception('Empty %s tag' % node.tag) if context == '': context = [] elif not isinstance(context, list): context = [context] - context.append(node.firstChild.nodeValue) - elif node.nodeName == 'lock': - if node.firstChild is None: - _exc('Empty %s tag' % node.nodeName) - kwargs['lock'].append(node.firstChild.nodeValue) - elif node.nodeName == 'pick': - if node.firstChild is None: - _exc('Empty %s tag' % node.nodeName) - kwargs['choice'].append(node.firstChild.nodeValue) + context.append(node.text) + elif node.tag == 'lock': + if not node.text: + self.raise_parser_exception('Empty %s tag' % node.tag) + kwargs['lock'].append(node.text) + elif node.tag == 'pick': + if not node.text: + self.raise_parser_exception('Empty %s tag' % node.tag) + kwargs['choice'].append(node.text) else: - _exc('Unknown node: %s' % node.nodeName) + self.raise_parser_exception('Unknown node: %s' % node.tag) # Create a new instance of the task spec. module = _spec_map[nodetype] @@ -266,9 +270,9 @@ class XmlSerializer(Serializer): spec = module(workflow, **kwargs) elif nodetype == 'multi-instance' or nodetype == 'thread-split': if times == '' and times_field == '': - _exc('Missing "times" or "times-field" in "%s"' % name) + self.raise_parser_exception('Missing "times" or "times-field" in "%s"' % name) elif times != '' and times_field != '': - _exc('Both, "times" and "times-field" in "%s"' % name) + self.raise_parser_exception('Both, "times" and "times-field" in "%s"' % name) spec = module(workflow, name, **kwargs) elif context == '': spec = module(workflow, name, **kwargs) @@ -277,34 +281,31 @@ class XmlSerializer(Serializer): read_specs[name] = spec, successors - def deserialize_workflow_spec(self, s_state, filename=None): + def deserialize_workflow_spec(self, root_node, filename=None): """ Reads the workflow from the given XML structure and returns a WorkflowSpec instance. """ - dom = minidom.parseString(s_state) - node = dom.getElementsByTagName('process-definition')[0] - name = node.getAttribute('name') + name = root_node.attrib.get('name') if name == '': - _exc('%s without a name attribute' % node.nodeName) + self.raise_parser_exception('%s without a name attribute' % root_node.tag) # Read all task specs and create a list of successors. workflow_spec = WorkflowSpec(name, filename) del workflow_spec.task_specs['Start'] end = Simple(workflow_spec, 'End'), [] read_specs = dict(end=end) - for child_node in node.childNodes: - if child_node.nodeType != minidom.Node.ELEMENT_NODE: - continue - if child_node.nodeName == 'name': - workflow_spec.name = child_node.firstChild.nodeValue - elif child_node.nodeName == 'description': - workflow_spec.description = child_node.firstChild.nodeValue - elif child_node.nodeName.lower() in _spec_map: - self.deserialize_task_spec( - workflow_spec, child_node, read_specs) + for child_node in root_node.getchildren(): + if not isinstance(child_node.tag, str): + pass + elif child_node.tag == 'name': + workflow_spec.name = child_node.text + elif child_node.tag == 'description': + workflow_spec.description = child_node.text + elif child_node.tag.lower() in _spec_map: + self.deserialize_task_spec(workflow_spec, child_node, read_specs) else: - _exc('Unknown node: %s' % child_node.nodeName) + self.raise_parser_exception('Unknown node: %s' % child_node.tag) # Remove the default start-task from the workflow. workflow_spec.start = read_specs['start'][0] @@ -314,7 +315,7 @@ class XmlSerializer(Serializer): spec, successors = read_specs[name] for condition, successor_name in successors: if successor_name not in read_specs: - _exc('Unknown successor: "%s"' % successor_name) + self.raise_parser_exception('Unknown successor: "%s"' % successor_name) successor, foo = read_specs[successor_name] if condition is None: spec.connect(successor) diff --git a/SpiffWorkflow/SpiffWorkflow/specs/SubWorkflow.py b/SpiffWorkflow/SpiffWorkflow/specs/SubWorkflow.py index 12887ac0..d0fc7449 100644 --- a/SpiffWorkflow/SpiffWorkflow/specs/SubWorkflow.py +++ b/SpiffWorkflow/SpiffWorkflow/specs/SubWorkflow.py @@ -18,6 +18,8 @@ # 02110-1301 USA import os +from lxml import etree + from .StartTask import StartTask from .base import TaskSpec from ..task import TaskState @@ -93,9 +95,8 @@ class SubWorkflow(TaskSpec): file_name = valueof(my_task, self.file) serializer = XmlSerializer() with open(file_name) as fp: - xml = fp.read() - wf_spec = WorkflowSpec.deserialize( - serializer, xml, filename=file_name) + xml = etree.parse(fp).getroot() + wf_spec = WorkflowSpec.deserialize(serializer, xml, filename=file_name) outer_workflow = my_task.workflow.outer_workflow return Workflow(wf_spec, parent=outer_workflow) diff --git a/SpiffWorkflow/tests/SpiffWorkflow/PatternTest.py b/SpiffWorkflow/tests/SpiffWorkflow/PatternTest.py index f0d013ce..85a90d27 100644 --- a/SpiffWorkflow/tests/SpiffWorkflow/PatternTest.py +++ b/SpiffWorkflow/tests/SpiffWorkflow/PatternTest.py @@ -6,6 +6,8 @@ import unittest import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) +from lxml import etree + from SpiffWorkflow.specs.WorkflowSpec import WorkflowSpec from SpiffWorkflow.task import Task from SpiffWorkflow.serializer.prettyxml import XmlSerializer @@ -64,7 +66,7 @@ class PatternTest(unittest.TestCase): # Test patterns that are defined in XML format. if filename.endswith('.xml'): with open(filename) as fp: - xml = fp.read() + xml = etree.parse(fp).getroot() serializer = XmlSerializer() wf_spec = WorkflowSpec.deserialize( serializer, xml, filename=filename) diff --git a/SpiffWorkflow/tests/SpiffWorkflow/WorkflowTest.py b/SpiffWorkflow/tests/SpiffWorkflow/WorkflowTest.py index 0d2b0b3d..273ced0d 100644 --- a/SpiffWorkflow/tests/SpiffWorkflow/WorkflowTest.py +++ b/SpiffWorkflow/tests/SpiffWorkflow/WorkflowTest.py @@ -6,6 +6,8 @@ import os data_dir = os.path.join(os.path.dirname(__file__), 'data') sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) +from lxml import etree + from SpiffWorkflow.workflow import Workflow from SpiffWorkflow.specs.Cancel import Cancel from SpiffWorkflow.specs.Simple import Simple @@ -27,7 +29,7 @@ class WorkflowTest(unittest.TestCase): """ xml_file = os.path.join(data_dir, 'spiff', 'workflow1.xml') with open(xml_file) as fp: - xml = fp.read() + xml = etree.parse(fp).getroot() wf_spec = WorkflowSpec.deserialize(XmlSerializer(), xml) workflow = Workflow(wf_spec) diff --git a/SpiffWorkflow/tests/SpiffWorkflow/specs/SubWorkflowTest.py b/SpiffWorkflow/tests/SpiffWorkflow/specs/SubWorkflowTest.py index 0590192f..3e99f993 100644 --- a/SpiffWorkflow/tests/SpiffWorkflow/specs/SubWorkflowTest.py +++ b/SpiffWorkflow/tests/SpiffWorkflow/specs/SubWorkflowTest.py @@ -4,6 +4,8 @@ import sys import unittest import os +from lxml import etree + sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', '..')) from SpiffWorkflow.specs.WorkflowSpec import WorkflowSpec @@ -30,7 +32,7 @@ class TaskSpecTest(unittest.TestCase): os.path.dirname(__file__), '..', 'data', 'spiff', folder, f) serializer = XmlSerializer() with open(file) as fp: - xml = fp.read() + xml = etree.parse(fp).getroot() self.wf_spec = WorkflowSpec.deserialize( serializer, xml, filename=file) self.workflow = Workflow(self.wf_spec) diff --git a/SpiffWorkflow/tests/SpiffWorkflow/specs/WorkflowSpecTest.py b/SpiffWorkflow/tests/SpiffWorkflow/specs/WorkflowSpecTest.py index c2a5638e..1ad5b125 100644 --- a/SpiffWorkflow/tests/SpiffWorkflow/specs/WorkflowSpecTest.py +++ b/SpiffWorkflow/tests/SpiffWorkflow/specs/WorkflowSpecTest.py @@ -8,6 +8,8 @@ import unittest data_dir = os.path.join(os.path.dirname(__file__), '..', 'data') sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', '..')) +from lxml import etree + import pickle from random import randint try: @@ -82,7 +84,7 @@ class WorkflowSpecTest(unittest.TestCase): # Read a complete workflow spec. xml_file = os.path.join(data_dir, 'spiff', 'workflow1.xml') with open(xml_file) as fp: - xml = fp.read() + xml = etree.parse(fp).getroot() path_file = os.path.splitext(xml_file)[0] + '.path' with open(path_file) as fp: expected_path = fp.read().strip().split('\n')