From fe19a172cf8b0e349e829533d63a84797f1b8cfc Mon Sep 17 00:00:00 2001 From: jasquat Date: Tue, 17 Jan 2023 14:58:54 -0500 Subject: [PATCH] do not resolve entities when parsing xml w/ burnettk --- .../routes/script_unit_tests_controller.py | 2 +- .../services/process_instance_processor.py | 4 ++-- .../services/process_model_service.py | 4 +++- .../services/spec_file_service.py | 14 ++++++++--- .../tests/data/xml_with_entity/file_to_inject | 1 + .../tests/data/xml_with_entity/invoice.bpmn | 6 +++++ .../helpers/base_test.py | 14 ++++++++--- .../unit/test_spec_file_service.py | 23 +++++++++++++++++++ 8 files changed, 58 insertions(+), 10 deletions(-) create mode 100644 spiffworkflow-backend/tests/data/xml_with_entity/file_to_inject create mode 100644 spiffworkflow-backend/tests/data/xml_with_entity/invoice.bpmn diff --git a/spiffworkflow-backend/src/spiffworkflow_backend/routes/script_unit_tests_controller.py b/spiffworkflow-backend/src/spiffworkflow_backend/routes/script_unit_tests_controller.py index e97b26ae6..d51641ad4 100644 --- a/spiffworkflow-backend/src/spiffworkflow_backend/routes/script_unit_tests_controller.py +++ b/spiffworkflow-backend/src/spiffworkflow_backend/routes/script_unit_tests_controller.py @@ -49,7 +49,7 @@ def script_unit_test_create( # TODO: move this to an xml service or something file_contents = SpecFileService.get_data(process_model, file.name) - bpmn_etree_element = etree.fromstring(file_contents) + bpmn_etree_element = SpecFileService.get_etree_from_xml_bytes(file_contents) nsmap = bpmn_etree_element.nsmap spiff_element_maker = ElementMaker( diff --git a/spiffworkflow-backend/src/spiffworkflow_backend/services/process_instance_processor.py b/spiffworkflow-backend/src/spiffworkflow_backend/services/process_instance_processor.py index 510c66fda..5a299ef5d 100644 --- a/spiffworkflow-backend/src/spiffworkflow_backend/services/process_instance_processor.py +++ b/spiffworkflow-backend/src/spiffworkflow_backend/services/process_instance_processor.py @@ -1021,10 +1021,10 @@ class ProcessInstanceProcessor: for file in files: data = SpecFileService.get_data(process_model_info, file.name) if file.type == FileType.bpmn.value: - bpmn: etree.Element = etree.fromstring(data) + bpmn: etree.Element = SpecFileService.get_etree_from_xml_bytes(data) parser.add_bpmn_xml(bpmn, filename=file.name) elif file.type == FileType.dmn.value: - dmn: etree.Element = etree.fromstring(data) + dmn: etree.Element = SpecFileService.get_etree_from_xml_bytes(data) parser.add_dmn_xml(dmn, filename=file.name) if ( process_model_info.primary_process_id is None diff --git a/spiffworkflow-backend/src/spiffworkflow_backend/services/process_model_service.py b/spiffworkflow-backend/src/spiffworkflow_backend/services/process_model_service.py index 893baaeae..60574508a 100644 --- a/spiffworkflow-backend/src/spiffworkflow_backend/services/process_model_service.py +++ b/spiffworkflow-backend/src/spiffworkflow_backend/services/process_model_service.py @@ -130,7 +130,9 @@ class ProcessModelService(FileSystemService): def save_process_model(cls, process_model: ProcessModelInfo) -> None: """Save_process_model.""" process_model_path = os.path.abspath( - os.path.join(FileSystemService.root_path(), process_model.id) + os.path.join( + FileSystemService.root_path(), process_model.id_for_file_path() + ) ) os.makedirs(process_model_path, exist_ok=True) json_path = os.path.abspath( diff --git a/spiffworkflow-backend/src/spiffworkflow_backend/services/spec_file_service.py b/spiffworkflow-backend/src/spiffworkflow_backend/services/spec_file_service.py index 868d0fcdf..55f8df478 100644 --- a/spiffworkflow-backend/src/spiffworkflow_backend/services/spec_file_service.py +++ b/spiffworkflow-backend/src/spiffworkflow_backend/services/spec_file_service.py @@ -93,6 +93,12 @@ class SpecFileService(FileSystemService): process_model_info, file.name, file_contents ) + @classmethod + def get_etree_from_xml_bytes(cls, binary_data: bytes) -> etree.Element: + """Get_etree_from_xml_bytes.""" + etree_xml_parser = etree.XMLParser(resolve_entities=False) + return etree.fromstring(binary_data, parser=etree_xml_parser) + @classmethod def get_references_for_file_contents( cls, process_model_info: ProcessModelInfo, file_name: str, binary_data: bytes @@ -118,13 +124,13 @@ class SpecFileService(FileSystemService): correlations = {} start_messages = [] if file_type.value == FileType.bpmn.value: - parser.add_bpmn_xml(etree.fromstring(binary_data)) + parser.add_bpmn_xml(cls.get_etree_from_xml_bytes(binary_data)) parser_type = "process" sub_parsers = list(parser.process_parsers.values()) messages = parser.messages correlations = parser.correlations elif file_type.value == FileType.dmn.value: - parser.add_dmn_xml(etree.fromstring(binary_data)) + parser.add_dmn_xml(cls.get_etree_from_xml_bytes(binary_data)) sub_parsers = list(parser.dmn_parsers.values()) parser_type = "decision" else: @@ -172,7 +178,9 @@ class SpecFileService(FileSystemService): validator = BpmnValidator() parser = MyCustomParser(validator=validator) try: - parser.add_bpmn_xml(etree.fromstring(binary_data), filename=file_name) + parser.add_bpmn_xml( + cls.get_etree_from_xml_bytes(binary_data), filename=file_name + ) except etree.XMLSyntaxError as exception: raise ProcessModelFileInvalidError( f"Received error trying to parse bpmn xml: {str(exception)}" diff --git a/spiffworkflow-backend/tests/data/xml_with_entity/file_to_inject b/spiffworkflow-backend/tests/data/xml_with_entity/file_to_inject new file mode 100644 index 000000000..81ddf4e64 --- /dev/null +++ b/spiffworkflow-backend/tests/data/xml_with_entity/file_to_inject @@ -0,0 +1 @@ +THIS_STRING_SHOULD_NOT_EXIST_ITS_SECRET diff --git a/spiffworkflow-backend/tests/data/xml_with_entity/invoice.bpmn b/spiffworkflow-backend/tests/data/xml_with_entity/invoice.bpmn new file mode 100644 index 000000000..44216f1e3 --- /dev/null +++ b/spiffworkflow-backend/tests/data/xml_with_entity/invoice.bpmn @@ -0,0 +1,6 @@ + + ]> + + John + &ent; + diff --git a/spiffworkflow-backend/tests/spiffworkflow_backend/helpers/base_test.py b/spiffworkflow-backend/tests/spiffworkflow_backend/helpers/base_test.py index df62f5be1..7c8515db5 100644 --- a/spiffworkflow-backend/tests/spiffworkflow_backend/helpers/base_test.py +++ b/spiffworkflow-backend/tests/spiffworkflow_backend/helpers/base_test.py @@ -173,11 +173,11 @@ class BaseTest: " model" ) - def get_test_data_file_contents( + def get_test_data_file_full_path( self, file_name: str, process_model_test_data_dir: str - ) -> bytes: + ) -> str: """Get_test_data_file_contents.""" - file_full_path = os.path.join( + return os.path.join( current_app.instance_path, "..", "..", @@ -186,6 +186,14 @@ class BaseTest: process_model_test_data_dir, file_name, ) + + def get_test_data_file_contents( + self, file_name: str, process_model_test_data_dir: str + ) -> bytes: + """Get_test_data_file_contents.""" + file_full_path = self.get_test_data_file_full_path( + file_name, process_model_test_data_dir + ) with open(file_full_path, "rb") as file: return file.read() diff --git a/spiffworkflow-backend/tests/spiffworkflow_backend/unit/test_spec_file_service.py b/spiffworkflow-backend/tests/spiffworkflow_backend/unit/test_spec_file_service.py index 2ce011b23..506713cae 100644 --- a/spiffworkflow-backend/tests/spiffworkflow_backend/unit/test_spec_file_service.py +++ b/spiffworkflow-backend/tests/spiffworkflow_backend/unit/test_spec_file_service.py @@ -5,6 +5,7 @@ import pytest from flask import Flask from flask.testing import FlaskClient from flask_bpmn.models.db import db +from lxml import etree # type: ignore from tests.spiffworkflow_backend.helpers.base_test import BaseTest from tests.spiffworkflow_backend.helpers.test_data import load_test_spec @@ -236,3 +237,25 @@ class TestSpecFileService(BaseTest): full_file_path = SpecFileService.full_file_path(process_model, "bad_xml.bpmn") assert not os.path.isfile(full_file_path) + + def test_does_not_evaluate_entities( + self, + app: Flask, + client: FlaskClient, + with_db_and_bpmn_file_cleanup: None, + ) -> None: + """Test_does_not_evaluate_entities.""" + string_replacement = b"THIS_STRING_SHOULD_NOT_EXIST_ITS_SECRET" + tmp_file = os.path.normpath( + self.get_test_data_file_full_path("file_to_inject", "xml_with_entity") + ) + file_contents = self.get_test_data_file_contents( + "invoice.bpmn", "xml_with_entity" + ) + file_contents = ( + file_contents.decode("utf-8") + .replace("{{FULL_PATH_TO_FILE}}", tmp_file) + .encode() + ) + etree_element = SpecFileService.get_etree_from_xml_bytes(file_contents) + assert string_replacement not in etree.tostring(etree_element)