Make file upload detection more flexible, add integration test (#854)

This commit is contained in:
jbirddog 2024-01-04 11:32:13 -05:00 committed by GitHub
parent c96b4b751a
commit ad3e63367f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 248 additions and 247 deletions

View File

@ -0,0 +1,42 @@
"""empty message
Revision ID: bc2b84d013e0
Revises: 3191627ae224
Create Date: 2024-01-03 13:06:57.981736
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
# revision identifiers, used by Alembic.
revision = 'bc2b84d013e0'
down_revision = '3191627ae224'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('process_instance_file_data', schema=None) as batch_op:
batch_op.drop_column('identifier')
batch_op.drop_column('list_index')
with op.batch_alter_table('task', schema=None) as batch_op:
batch_op.drop_index('ix_task_guid')
batch_op.create_index(batch_op.f('ix_task_guid'), ['guid'], unique=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('task', schema=None) as batch_op:
batch_op.drop_index(batch_op.f('ix_task_guid'))
batch_op.create_index('ix_task_guid', ['guid'], unique=False)
with op.batch_alter_table('process_instance_file_data', schema=None) as batch_op:
batch_op.add_column(sa.Column('list_index', mysql.INTEGER(), autoincrement=False, nullable=True))
batch_op.add_column(sa.Column('identifier', mysql.VARCHAR(length=255), nullable=False))
# ### end Alembic commands ###

View File

@ -14,13 +14,10 @@ class ProcessInstanceFileDataModel(SpiffworkflowBaseDBModel):
id: int = db.Column(db.Integer, primary_key=True) id: int = db.Column(db.Integer, primary_key=True)
process_instance_id: int = db.Column(ForeignKey(ProcessInstanceModel.id), nullable=False, index=True) # type: ignore process_instance_id: int = db.Column(ForeignKey(ProcessInstanceModel.id), nullable=False, index=True) # type: ignore
identifier: str = db.Column(db.String(255), nullable=False)
list_index: int | None = db.Column(db.Integer, nullable=True)
mimetype: str = db.Column(db.String(255), nullable=False) mimetype: str = db.Column(db.String(255), nullable=False)
filename: str = db.Column(db.String(255), nullable=False) filename: str = db.Column(db.String(255), nullable=False)
# this is not deferred because there is no reason to query this model if you do not want the contents # this is not deferred because there is no reason to query this model if you do not want the contents
contents: str = db.Column(db.LargeBinary().with_variant(LONGBLOB, "mysql"), nullable=False) contents: str = db.Column(db.LargeBinary().with_variant(LONGBLOB, "mysql"), nullable=False)
digest: str = db.Column(db.String(64), nullable=False, index=True) digest: str = db.Column(db.String(64), nullable=False, index=True)
updated_at_in_seconds: int = db.Column(db.Integer, nullable=False) updated_at_in_seconds: int = db.Column(db.Integer, nullable=False)
created_at_in_seconds: int = db.Column(db.Integer, nullable=False) created_at_in_seconds: int = db.Column(db.Integer, nullable=False)

View File

@ -2,6 +2,7 @@ import base64
import hashlib import hashlib
import time import time
from collections.abc import Generator from collections.abc import Generator
from contextlib import suppress
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
from typing import Any from typing import Any
@ -46,6 +47,8 @@ from spiffworkflow_backend.services.workflow_execution_service import TaskRunnab
from spiffworkflow_backend.services.workflow_service import WorkflowService from spiffworkflow_backend.services.workflow_service import WorkflowService
from spiffworkflow_backend.specs.start_event import StartConfiguration from spiffworkflow_backend.specs.start_event import StartConfiguration
FileDataGenerator = Generator[tuple[dict | list, str | int, str], None, None]
class ProcessInstanceService: class ProcessInstanceService:
FILE_DATA_DIGEST_PREFIX = "spifffiledatadigest+" FILE_DATA_DIGEST_PREFIX = "spifffiledatadigest+"
@ -359,12 +362,10 @@ class ProcessInstanceService:
@classmethod @classmethod
def file_data_model_for_value( def file_data_model_for_value(
cls, cls,
identifier: str,
value: str, value: str,
process_instance_id: int, process_instance_id: int,
) -> ProcessInstanceFileDataModel | None: ) -> ProcessInstanceFileDataModel | None:
if value.startswith("data:"): with suppress(Exception):
try:
parts = value.split(";") parts = value.split(";")
mimetype = parts[0][5:] mimetype = parts[0][5:]
filename = unquote(parts[1].split("=")[1]) filename = unquote(parts[1].split("=")[1])
@ -376,7 +377,6 @@ class ProcessInstanceService:
return ProcessInstanceFileDataModel( return ProcessInstanceFileDataModel(
process_instance_id=process_instance_id, process_instance_id=process_instance_id,
identifier=identifier,
mimetype=mimetype, mimetype=mimetype,
filename=filename, filename=filename,
contents=contents, # type: ignore contents=contents, # type: ignore
@ -384,8 +384,6 @@ class ProcessInstanceService:
updated_at_in_seconds=now_in_seconds, updated_at_in_seconds=now_in_seconds,
created_at_in_seconds=now_in_seconds, created_at_in_seconds=now_in_seconds,
) )
except Exception as e:
current_app.logger.warning(e)
return None return None
@ -393,66 +391,52 @@ class ProcessInstanceService:
def possible_file_data_values( def possible_file_data_values(
cls, cls,
data: dict[str, Any], data: dict[str, Any],
) -> Generator[tuple[str, str, int | None], None, None]: ) -> FileDataGenerator:
for identifier, value in data.items(): def values(collection: dict | list, elem: str | int | None, value: Any) -> FileDataGenerator:
if isinstance(value, str): match (collection, elem, value):
yield (identifier, value, None) case (dict(), None, None):
if isinstance(value, list): for k, v in collection.items(): # type: ignore
for list_index, list_value in enumerate(value): yield from values(collection, k, v)
if isinstance(list_value, str): case (list(), None, None):
yield (identifier, list_value, list_index) for i, v in enumerate(collection):
if isinstance(list_value, dict) and len(list_value) == 1: yield from values(collection, i, v)
for v in list_value.values(): case (_, _, dict() | list()):
if isinstance(v, str): yield from values(value, None, None)
yield (identifier, v, list_index) case (_, _, str()) if elem is not None and value.startswith("data:"):
yield (collection, elem, value)
yield from values(data, None, None)
@classmethod @classmethod
def file_data_models_for_data( def replace_file_data_with_digest_references(
cls, cls,
data: dict[str, Any], data: dict[str, Any],
process_instance_id: int, process_instance_id: int,
) -> list[ProcessInstanceFileDataModel]: ) -> list[ProcessInstanceFileDataModel]:
models = [] models = []
for identifier, value, list_index in cls.possible_file_data_values(data): for collection, elem, value in cls.possible_file_data_values(data):
model = cls.file_data_model_for_value(identifier, value, process_instance_id) model = cls.file_data_model_for_value(value, process_instance_id)
if model is not None: if model is None:
model.list_index = list_index continue
models.append(model) models.append(model)
digest_reference = f"data:{model.mimetype};name={model.filename};base64,{cls.FILE_DATA_DIGEST_PREFIX}{model.digest}"
collection[elem] = digest_reference # type: ignore
return models return models
@classmethod
def replace_file_data_with_digest_references(
cls,
data: dict[str, Any],
models: list[ProcessInstanceFileDataModel],
) -> None:
for model in models:
digest_reference = f"data:{model.mimetype};name={model.filename};base64,{cls.FILE_DATA_DIGEST_PREFIX}{model.digest}"
if model.list_index is None:
data[model.identifier] = digest_reference
else:
old_value = data[model.identifier][model.list_index]
new_value: Any = digest_reference
if isinstance(old_value, dict) and len(old_value) == 1:
new_value = {k: digest_reference for k in old_value.keys()}
data[model.identifier][model.list_index] = new_value
@classmethod @classmethod
def save_file_data_and_replace_with_digest_references( def save_file_data_and_replace_with_digest_references(
cls, cls,
data: dict[str, Any], data: dict[str, Any],
process_instance_id: int, process_instance_id: int,
) -> None: ) -> None:
models = cls.file_data_models_for_data(data, process_instance_id) models = cls.replace_file_data_with_digest_references(data, process_instance_id)
for model in models: for model in models:
db.session.add(model) db.session.add(model)
db.session.commit() db.session.commit()
cls.replace_file_data_with_digest_references(data, models)
@classmethod @classmethod
def update_form_task_data( def update_form_task_data(
cls, cls,

View File

@ -1,4 +1,5 @@
"""Test Process Api Blueprint.""" """Test Process Api Blueprint."""
import base64
import io import io
import json import json
import os import os
@ -1643,6 +1644,80 @@ class TestProcessApi(BaseTest):
assert response.json assert response.json
assert response.json["error_code"] == "message_not_accepted" assert response.json["error_code"] == "message_not_accepted"
def test_can_download_uploaded_file(
self,
app: Flask,
client: FlaskClient,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
process_group_id = "test_message_send"
process_model_id = "message_sender"
bpmn_file_name = "message_sender.bpmn"
bpmn_file_location = "message_send_one_conversation"
process_model = self.create_group_and_model_with_bpmn(
client,
with_super_admin_user,
process_group_id=process_group_id,
process_model_id=process_model_id,
bpmn_file_name=bpmn_file_name,
bpmn_file_location=bpmn_file_location,
)
def _file_data(i: int, c: bytes) -> str:
b64 = base64.b64encode(c).decode()
return f"data:some/mimetype;name=testing{i}.txt;base64,{b64}"
def _digest_reference(i: int, sha: str) -> str:
b64 = f"{ProcessInstanceService.FILE_DATA_DIGEST_PREFIX}{sha}"
return f"data:some/mimetype;name=testing{i}.txt;base64,{b64}"
file_contents = [f"contents{i}".encode() for i in range(3)]
file_data = [_file_data(i, c) for i, c in enumerate(file_contents)]
digests = [sha256(c).hexdigest() for c in file_contents]
[_digest_reference(i, d) for i, d in enumerate(digests)]
payload = {
"customer_id": "sartography",
"po_number": "1001",
"amount": "One Billion Dollars! Mwhahahahahaha",
"description": "But seriously.",
"file0": file_data[0],
"key": [{"file1": file_data[1]}],
"key2": {"key3": [{"key4": file_data[2], "key5": "bob"}]},
}
response = self.create_process_instance_from_process_model_id_with_api(
client,
process_model.id,
self.logged_in_headers(with_super_admin_user),
)
assert response.json is not None
process_instance_id = response.json["id"]
process_instance = ProcessInstanceModel.query.filter_by(id=process_instance_id).first()
processor = ProcessInstanceProcessor(process_instance)
processor.do_engine_steps(save=True)
task = processor.get_all_user_tasks()[0]
human_task = process_instance.active_human_tasks[0]
ProcessInstanceService.complete_form_task(
processor,
task,
payload,
with_super_admin_user,
human_task,
)
processor.save()
for expected_content, digest in zip(file_contents, digests, strict=True):
response = client.get(
f"/v1.0/process-data-file-download/{self.modify_process_identifier_for_path_param(process_model.id)}/{process_instance_id}/{digest}",
headers=self.logged_in_headers(with_super_admin_user),
)
assert response.status_code == 200
assert response.data == expected_content
def test_process_instance_can_be_terminated( def test_process_instance_can_be_terminated(
self, self,
app: Flask, app: Flask,

View File

@ -1,202 +1,105 @@
import base64
import hashlib
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
from typing import Any
import pytest
from flask.app import Flask from flask.app import Flask
from SpiffWorkflow.bpmn.event import PendingBpmnEvent # type: ignore from SpiffWorkflow.bpmn.event import PendingBpmnEvent # type: ignore
from spiffworkflow_backend.models.process_instance_file_data import ProcessInstanceFileDataModel
from spiffworkflow_backend.services.process_instance_service import ProcessInstanceService from spiffworkflow_backend.services.process_instance_service import ProcessInstanceService
from tests.spiffworkflow_backend.helpers.base_test import BaseTest from tests.spiffworkflow_backend.helpers.base_test import BaseTest
def _file_content(i: int) -> bytes:
return f"testing{i}\n".encode()
def _file_data(i: int) -> str:
b64 = base64.b64encode(_file_content(i)).decode()
return f"data:some/mimetype;name=testing{i}.txt;base64,{b64}"
def _digest(i: int) -> str:
return hashlib.sha256(_file_content(i)).hexdigest()
def _digest_reference(i: int) -> str:
sha = _digest(i)
b64 = f"{ProcessInstanceService.FILE_DATA_DIGEST_PREFIX}{sha}"
return f"data:some/mimetype;name=testing{i}.txt;base64,{b64}"
class TestProcessInstanceService(BaseTest): class TestProcessInstanceService(BaseTest):
SAMPLE_FILE_DATA = "data:some/mimetype;name=testing.txt;base64,dGVzdGluZwo=" @pytest.mark.parametrize(
SAMPLE_DIGEST_REFERENCE = f"data:some/mimetype;name=testing.txt;base64,{ProcessInstanceService.FILE_DATA_DIGEST_PREFIX}12a61f4e173fb3a11c05d6471f74728f76231b4a5fcd9667cef3af87a3ae4dc2" # noqa: B950,E501 "data,expected_data,expected_models_len",
[
def _check_sample_file_data_model( ({}, {}, 0),
({"k": "v"}, {"k": "v"}, 0),
({"k": _file_data(0)}, {"k": _digest_reference(0)}, 1),
({"k": [_file_data(0)]}, {"k": [_digest_reference(0)]}, 1),
({"k": [_file_data(0), _file_data(1)]}, {"k": [_digest_reference(0), _digest_reference(1)]}, 2),
({"k": [{"k2": _file_data(0)}]}, {"k": [{"k2": _digest_reference(0)}]}, 1),
(
{"k": [{"k2": _file_data(0)}, {"k2": _file_data(1)}, {"k2": _file_data(2)}]},
{"k": [{"k2": _digest_reference(0)}, {"k2": _digest_reference(1)}, {"k2": _digest_reference(2)}]},
3,
),
({"k": [{"k2": _file_data(0), "k3": "bob"}]}, {"k": [{"k2": _digest_reference(0), "k3": "bob"}]}, 1),
(
{"k": [{"k2": _file_data(0), "k3": "bob"}, {"k2": _file_data(1), "k3": "bob"}]},
{"k": [{"k2": _digest_reference(0), "k3": "bob"}, {"k2": _digest_reference(1), "k3": "bob"}]},
2,
),
(
{
"k": [
{
"k2": [
{"k3": [_file_data(0)]},
{"k4": {"k5": {"k6": [_file_data(1), _file_data(2)], "k7": _file_data(3)}}},
]
}
]
},
{
"k": [
{
"k2": [
{"k3": [_digest_reference(0)]},
{"k4": {"k5": {"k6": [_digest_reference(1), _digest_reference(2)], "k7": _digest_reference(3)}}},
]
}
]
},
4,
),
],
)
def test_save_file_data_v0(
self, self,
identifier: str, app: Flask,
list_index: int | None, with_db_and_bpmn_file_cleanup: None,
model: ProcessInstanceFileDataModel | None, data: dict[str, Any],
expected_data: dict[str, Any],
expected_models_len: int,
) -> None: ) -> None:
assert model is not None process_instance_id = 123
assert model.identifier == identifier models = ProcessInstanceService.replace_file_data_with_digest_references(
assert model.process_instance_id == 111 data,
assert model.list_index == list_index process_instance_id,
)
assert data == expected_data
assert len(models) == expected_models_len
for i, model in enumerate(models):
assert model.process_instance_id == process_instance_id
assert model.mimetype == "some/mimetype" assert model.mimetype == "some/mimetype"
assert model.filename == "testing.txt" assert model.filename == f"testing{i}.txt"
assert model.contents == b"testing\n" # type: ignore assert model.contents == _file_content(i) # type: ignore
assert model.digest == "12a61f4e173fb3a11c05d6471f74728f76231b4a5fcd9667cef3af87a3ae4dc2" assert model.digest == _digest(i)
def test_can_create_file_data_model_for_file_data_value(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
model = ProcessInstanceService.file_data_model_for_value(
"uploaded_file",
self.SAMPLE_FILE_DATA,
111,
)
self._check_sample_file_data_model("uploaded_file", None, model)
def test_does_not_create_file_data_model_for_non_file_data_value(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
model = ProcessInstanceService.file_data_model_for_value(
"not_a_file",
"just a value",
111,
)
assert model is None
def test_can_create_file_data_models_for_single_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
data = {
"uploaded_file": self.SAMPLE_FILE_DATA,
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 1
self._check_sample_file_data_model("uploaded_file", None, models[0])
def test_can_create_file_data_models_for_multiple_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
data = {
"uploaded_files": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 2
self._check_sample_file_data_model("uploaded_files", 0, models[0])
self._check_sample_file_data_model("uploaded_files", 1, models[1])
def test_can_create_file_data_models_for_mix_of_file_data_and_non_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
data = {
"not_a_file": "just a value",
"uploaded_file": self.SAMPLE_FILE_DATA,
"not_a_file2": "just a value 2",
"uploaded_files": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
"not_a_file3": "just a value 3",
"uploaded_files2": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
"uploaded_file2": self.SAMPLE_FILE_DATA,
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 6
self._check_sample_file_data_model("uploaded_file", None, models[0])
self._check_sample_file_data_model("uploaded_files", 0, models[1])
self._check_sample_file_data_model("uploaded_files", 1, models[2])
self._check_sample_file_data_model("uploaded_files2", 0, models[3])
self._check_sample_file_data_model("uploaded_files2", 1, models[4])
self._check_sample_file_data_model("uploaded_file2", None, models[5])
def test_does_not_create_file_data_models_for_non_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
data = {
"not_a_file": "just a value",
"also_no_files": ["not a file", "also not a file"],
"still_no_files": [{"key": "value"}],
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 0
def test_can_replace_file_data_values_with_digest_references(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
data = {
"uploaded_file": self.SAMPLE_FILE_DATA,
"uploaded_files": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
ProcessInstanceService.replace_file_data_with_digest_references(data, models)
assert data == {
"uploaded_file": self.SAMPLE_DIGEST_REFERENCE,
"uploaded_files": [
self.SAMPLE_DIGEST_REFERENCE,
self.SAMPLE_DIGEST_REFERENCE,
],
}
def test_does_not_replace_non_file_data_values_with_digest_references(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
data = {
"not_a_file": "just a value",
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
ProcessInstanceService.replace_file_data_with_digest_references(data, models)
assert len(data) == 1
assert data["not_a_file"] == "just a value"
def test_can_replace_file_data_values_with_digest_references_when_non_file_data_values_are_present(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
data = {
"not_a_file": "just a value",
"uploaded_file": self.SAMPLE_FILE_DATA,
"not_a_file2": "just a value2",
"uploaded_files": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
"not_a_file3": "just a value3",
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
ProcessInstanceService.replace_file_data_with_digest_references(data, models)
assert data == {
"not_a_file": "just a value",
"uploaded_file": self.SAMPLE_DIGEST_REFERENCE,
"not_a_file2": "just a value2",
"uploaded_files": [
self.SAMPLE_DIGEST_REFERENCE,
self.SAMPLE_DIGEST_REFERENCE,
],
"not_a_file3": "just a value3",
}
def test_can_create_file_data_models_for_mulitple_single_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
) -> None:
data = {
"File": [
{
"supporting_files": self.SAMPLE_FILE_DATA,
},
{
"supporting_files": self.SAMPLE_FILE_DATA,
},
],
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 2
self._check_sample_file_data_model("File", 0, models[0])
self._check_sample_file_data_model("File", 1, models[1])
def test_does_not_skip_events_it_does_not_know_about(self) -> None: def test_does_not_skip_events_it_does_not_know_about(self) -> None:
name = None name = None