Assure better error messages when parsing word documents with jinja syntax.

I also have a pull request into the folks that run the python docx library to improve their error messages that we depend on here.
This commit is contained in:
Dan 2021-11-23 14:38:48 -05:00
parent 4a1285b8e0
commit 8ec58fd6d7
7 changed files with 72 additions and 13 deletions

2
Pipfile.lock generated
View File

@ -1238,7 +1238,7 @@
},
"spiffworkflow": {
"git": "https://github.com/sartography/SpiffWorkflow",
"ref": "e79a3da7a489d12ed8b4666d1b01d9a4a328ba6d"
"ref": "9b5b99fd7b1dac19586fe78026d1604f9f8a7e26"
},
"sqlalchemy": {
"hashes": [

View File

@ -13,7 +13,8 @@ import sentry_sdk
class ApiError(Exception):
def __init__(self, code, message, status_code=400,
file_name="", task_id="", task_name="", tag="", task_data=None, error_type="", line_number=0, offset=0):
file_name="", task_id="", task_name="", tag="",
task_data=None, error_type="", error_line="", line_number=0, offset=0):
if task_data is None:
task_data = {}
self.status_code = status_code
@ -27,6 +28,7 @@ class ApiError(Exception):
self.line_number = line_number
self.offset = offset
self.error_type = error_type
self.error_line = error_line
if hasattr(g, 'user'):
user = g.user.uid
else:

View File

@ -1,10 +1,7 @@
import copy
import re
from io import BytesIO
import jinja2
from docx.shared import Inches
from docxtpl import DocxTemplate, Listing, InlineImage
from SpiffWorkflow.exceptions import WorkflowTaskExecException
from crc import session
from crc.api.common import ApiError
@ -78,7 +75,14 @@ Takes two arguments:
else:
image_file_data = None
return JinjaService().make_template(BytesIO(file_data_model.data), task.data, image_file_data)
try:
return JinjaService().make_template(BytesIO(file_data_model.data), task.data, image_file_data)
except ApiError as ae:
# In some cases we want to provide a very specific error, that does not get obscured when going
# through the python expression engine. We can do that by throwing a WorkflowTaskExecException,
# which the expression engine should just pass through.
raise WorkflowTaskExecException(task, ae.message, exception=ae, line_number=ae.line_number,
error_line=ae.error_line)
def get_image_file_data(self, fields_str, task):
image_file_data = []

View File

@ -1,10 +1,16 @@
import re
import sys
import traceback
from docx.shared import Inches
from docxtpl import DocxTemplate, Listing, InlineImage
from io import BytesIO
from jinja2 import Environment, DictLoader
from jinja2 import Environment, DictLoader, TemplateSyntaxError
import copy
from crc.api.common import ApiError
class JinjaService:
"""Service for Jinja2 templates.
@ -49,8 +55,30 @@ Cool Right?
try:
doc.render(doc_context, jinja_env)
except Exception as e:
print(e)
except TemplateSyntaxError as tse:
line_number = tse.lineno
# the doc renderer code is trying to give context, returning 3 lines
# before and after the error if possible. The can result in a lot of
# garbage, too much to send back. Sometimes the line itself is too large.
# Just trying to get something sensible if possible.
context = list(tse.docx_context)
if len(context) == 1: # It just sent us the whole damn thing back. Useless.
error_line = "Unable to determine location of error in the word document. Opening and Saving the " \
"template in LibreOffice may fix this problem."
elif len(context) == 7:
error_line = context[3]
else:
error_line = ", ".join(context)
# If the bloodly thing is still stupid long, try grabbing the middle 200
# characters.
if len(error_line) > 500:
offset = int(len(error_line)/2 - 100)
error_line = "Error occurred near: " + error_line[offset: -offset]
raise ApiError(code="template_error", message="Word Document creation error : %s" % str(tse),
line_number=line_number, error_line=error_line)
target_stream = BytesIO()
doc.save(target_stream)
target_stream.seek(0) # move to the beginning of the stream.

BIN
tests/data/template.docx Normal file

Binary file not shown.

Binary file not shown.

View File

@ -1,8 +1,15 @@
import os
from io import BytesIO
from lxml import etree
from tests.base_test import BaseTest
from crc.services.workflow_processor import WorkflowProcessor
from crc.services.workflow_service import WorkflowService
from crc.services.jinja_service import JinjaService
from crc.api.common import ApiError
from crc import mail
from crc import mail, app
import json
@ -50,8 +57,26 @@ class TestJinjaService(BaseTest):
self.assert_success(rv)
self.assertIn("Hello World", rv.get_data(as_text=True))
def test_jinja_service_documents(self):
pass
def test_jinja_service_word_documents(self):
filepath = os.path.join(app.root_path, '..', 'tests', 'data', 'template.docx')
with open(filepath, 'rb') as myfile:
file_data = BytesIO(myfile.read())
context = {'title': 'My Title', 'my_list': ["a", "b", "c"], 'show_table': True}
result = JinjaService().make_template(file_data, context)
self.assertIsNotNone(result) # Not a lot we can do here, just assure there is not an error.
def test_jinja_service_word_document_errors_are_sensible(self):
filepath = os.path.join(app.root_path, '..', 'tests', 'data', 'template_error.docx')
with open(filepath, 'rb') as myfile:
file_data = BytesIO(myfile.read())
context = {'title': 'My Title', 'my_list': ["a", "b", "c"], 'show_table': True}
with self.assertRaises(ApiError) as ae:
result = JinjaService().make_template(file_data, context)
self.assertIn('{{% no_such_variable_error ! @ __ %}}', ae.exception.error_line)
self.assertEquals("Word Document creation error : unexpected '%'", ae.exception.message)
self.assertEquals(14, ae.exception.line_number)
def test_jinja_service_properties(self):
pass