do not resolve entities when parsing xml w/ burnettk

This commit is contained in:
jasquat 2023-01-17 14:58:54 -05:00
parent 994620bc5f
commit fe19a172cf
8 changed files with 58 additions and 10 deletions

View File

@ -49,7 +49,7 @@ def script_unit_test_create(
# TODO: move this to an xml service or something # TODO: move this to an xml service or something
file_contents = SpecFileService.get_data(process_model, file.name) file_contents = SpecFileService.get_data(process_model, file.name)
bpmn_etree_element = etree.fromstring(file_contents) bpmn_etree_element = SpecFileService.get_etree_from_xml_bytes(file_contents)
nsmap = bpmn_etree_element.nsmap nsmap = bpmn_etree_element.nsmap
spiff_element_maker = ElementMaker( spiff_element_maker = ElementMaker(

View File

@ -1021,10 +1021,10 @@ class ProcessInstanceProcessor:
for file in files: for file in files:
data = SpecFileService.get_data(process_model_info, file.name) data = SpecFileService.get_data(process_model_info, file.name)
if file.type == FileType.bpmn.value: if file.type == FileType.bpmn.value:
bpmn: etree.Element = etree.fromstring(data) bpmn: etree.Element = SpecFileService.get_etree_from_xml_bytes(data)
parser.add_bpmn_xml(bpmn, filename=file.name) parser.add_bpmn_xml(bpmn, filename=file.name)
elif file.type == FileType.dmn.value: elif file.type == FileType.dmn.value:
dmn: etree.Element = etree.fromstring(data) dmn: etree.Element = SpecFileService.get_etree_from_xml_bytes(data)
parser.add_dmn_xml(dmn, filename=file.name) parser.add_dmn_xml(dmn, filename=file.name)
if ( if (
process_model_info.primary_process_id is None process_model_info.primary_process_id is None

View File

@ -130,7 +130,9 @@ class ProcessModelService(FileSystemService):
def save_process_model(cls, process_model: ProcessModelInfo) -> None: def save_process_model(cls, process_model: ProcessModelInfo) -> None:
"""Save_process_model.""" """Save_process_model."""
process_model_path = os.path.abspath( process_model_path = os.path.abspath(
os.path.join(FileSystemService.root_path(), process_model.id) os.path.join(
FileSystemService.root_path(), process_model.id_for_file_path()
)
) )
os.makedirs(process_model_path, exist_ok=True) os.makedirs(process_model_path, exist_ok=True)
json_path = os.path.abspath( json_path = os.path.abspath(

View File

@ -93,6 +93,12 @@ class SpecFileService(FileSystemService):
process_model_info, file.name, file_contents process_model_info, file.name, file_contents
) )
@classmethod
def get_etree_from_xml_bytes(cls, binary_data: bytes) -> etree.Element:
"""Get_etree_from_xml_bytes."""
etree_xml_parser = etree.XMLParser(resolve_entities=False)
return etree.fromstring(binary_data, parser=etree_xml_parser)
@classmethod @classmethod
def get_references_for_file_contents( def get_references_for_file_contents(
cls, process_model_info: ProcessModelInfo, file_name: str, binary_data: bytes cls, process_model_info: ProcessModelInfo, file_name: str, binary_data: bytes
@ -118,13 +124,13 @@ class SpecFileService(FileSystemService):
correlations = {} correlations = {}
start_messages = [] start_messages = []
if file_type.value == FileType.bpmn.value: if file_type.value == FileType.bpmn.value:
parser.add_bpmn_xml(etree.fromstring(binary_data)) parser.add_bpmn_xml(cls.get_etree_from_xml_bytes(binary_data))
parser_type = "process" parser_type = "process"
sub_parsers = list(parser.process_parsers.values()) sub_parsers = list(parser.process_parsers.values())
messages = parser.messages messages = parser.messages
correlations = parser.correlations correlations = parser.correlations
elif file_type.value == FileType.dmn.value: elif file_type.value == FileType.dmn.value:
parser.add_dmn_xml(etree.fromstring(binary_data)) parser.add_dmn_xml(cls.get_etree_from_xml_bytes(binary_data))
sub_parsers = list(parser.dmn_parsers.values()) sub_parsers = list(parser.dmn_parsers.values())
parser_type = "decision" parser_type = "decision"
else: else:
@ -172,7 +178,9 @@ class SpecFileService(FileSystemService):
validator = BpmnValidator() validator = BpmnValidator()
parser = MyCustomParser(validator=validator) parser = MyCustomParser(validator=validator)
try: try:
parser.add_bpmn_xml(etree.fromstring(binary_data), filename=file_name) parser.add_bpmn_xml(
cls.get_etree_from_xml_bytes(binary_data), filename=file_name
)
except etree.XMLSyntaxError as exception: except etree.XMLSyntaxError as exception:
raise ProcessModelFileInvalidError( raise ProcessModelFileInvalidError(
f"Received error trying to parse bpmn xml: {str(exception)}" f"Received error trying to parse bpmn xml: {str(exception)}"

View File

@ -0,0 +1 @@
THIS_STRING_SHOULD_NOT_EXIST_ITS_SECRET

View File

@ -0,0 +1,6 @@
<!--?xml version="1.0" ?-->
<!DOCTYPE replace [<!ENTITY ent SYSTEM "file://{{FULL_PATH_TO_FILE}}"> ]>
<userInfo>
<firstName>John</firstName>
<lastName>&ent;</lastName>
</userInfo>

View File

@ -173,11 +173,11 @@ class BaseTest:
" model" " model"
) )
def get_test_data_file_contents( def get_test_data_file_full_path(
self, file_name: str, process_model_test_data_dir: str self, file_name: str, process_model_test_data_dir: str
) -> bytes: ) -> str:
"""Get_test_data_file_contents.""" """Get_test_data_file_contents."""
file_full_path = os.path.join( return os.path.join(
current_app.instance_path, current_app.instance_path,
"..", "..",
"..", "..",
@ -186,6 +186,14 @@ class BaseTest:
process_model_test_data_dir, process_model_test_data_dir,
file_name, file_name,
) )
def get_test_data_file_contents(
self, file_name: str, process_model_test_data_dir: str
) -> bytes:
"""Get_test_data_file_contents."""
file_full_path = self.get_test_data_file_full_path(
file_name, process_model_test_data_dir
)
with open(file_full_path, "rb") as file: with open(file_full_path, "rb") as file:
return file.read() return file.read()

View File

@ -5,6 +5,7 @@ import pytest
from flask import Flask from flask import Flask
from flask.testing import FlaskClient from flask.testing import FlaskClient
from flask_bpmn.models.db import db from flask_bpmn.models.db import db
from lxml import etree # type: ignore
from tests.spiffworkflow_backend.helpers.base_test import BaseTest from tests.spiffworkflow_backend.helpers.base_test import BaseTest
from tests.spiffworkflow_backend.helpers.test_data import load_test_spec from tests.spiffworkflow_backend.helpers.test_data import load_test_spec
@ -236,3 +237,25 @@ class TestSpecFileService(BaseTest):
full_file_path = SpecFileService.full_file_path(process_model, "bad_xml.bpmn") full_file_path = SpecFileService.full_file_path(process_model, "bad_xml.bpmn")
assert not os.path.isfile(full_file_path) assert not os.path.isfile(full_file_path)
def test_does_not_evaluate_entities(
self,
app: Flask,
client: FlaskClient,
with_db_and_bpmn_file_cleanup: None,
) -> None:
"""Test_does_not_evaluate_entities."""
string_replacement = b"THIS_STRING_SHOULD_NOT_EXIST_ITS_SECRET"
tmp_file = os.path.normpath(
self.get_test_data_file_full_path("file_to_inject", "xml_with_entity")
)
file_contents = self.get_test_data_file_contents(
"invoice.bpmn", "xml_with_entity"
)
file_contents = (
file_contents.decode("utf-8")
.replace("{{FULL_PATH_TO_FILE}}", tmp_file)
.encode()
)
etree_element = SpecFileService.get_etree_from_xml_bytes(file_contents)
assert string_replacement not in etree.tostring(etree_element)