Special case file data during user form submission (#164)

This commit is contained in:
jbirddog 2023-03-03 12:24:18 -05:00 committed by GitHub
parent 6e23a179b7
commit b4546e86e6
9 changed files with 449 additions and 40 deletions

View File

@ -0,0 +1,43 @@
"""empty message
Revision ID: 1e8167de6df8
Revises: 8930711a75a4
Create Date: 2023-03-02 15:18:19.515864
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '1e8167de6df8'
down_revision = '8930711a75a4'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('process_instance_file_data',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('process_instance_id', sa.Integer(), nullable=False),
sa.Column('identifier', sa.String(length=255), nullable=False),
sa.Column('list_index', sa.Integer(), nullable=True),
sa.Column('mimetype', sa.String(length=255), nullable=False),
sa.Column('filename', sa.String(length=255), nullable=False),
sa.Column('contents', sa.LargeBinary(), nullable=False),
sa.Column('digest', sa.String(length=64), nullable=False),
sa.Column('updated_at_in_seconds', sa.Integer(), nullable=False),
sa.Column('created_at_in_seconds', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['process_instance_id'], ['process_instance.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_process_instance_file_data_digest'), 'process_instance_file_data', ['digest'], unique=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_process_instance_file_data_digest'), table_name='process_instance_file_data')
op.drop_table('process_instance_file_data')
# ### end Alembic commands ###

View File

@ -1643,12 +1643,6 @@ paths:
description: The identifier of the process data.
schema:
type: string
- name: index
in: query
required: false
description: The optional index of the value if key's value is an array
schema:
type: integer
get:
operationId: spiffworkflow_backend.routes.process_api_blueprint.process_data_file_download
summary: Download the file referneced in the process data value.

View File

@ -56,6 +56,9 @@ from spiffworkflow_backend.models.serialized_bpmn_definition import (
from spiffworkflow_backend.models.process_instance_data import (
ProcessInstanceDataModel,
) # noqa: F401
from spiffworkflow_backend.models.process_instance_file_data import (
ProcessInstanceFileDataModel,
) # noqa: F401
add_listeners()

View File

@ -0,0 +1,31 @@
"""Process_instance_file_data."""
from dataclasses import dataclass
from typing import Optional
from sqlalchemy import ForeignKey
from spiffworkflow_backend.models.db import db
from spiffworkflow_backend.models.db import SpiffworkflowBaseDBModel
from spiffworkflow_backend.models.process_instance import ProcessInstanceModel
@dataclass
class ProcessInstanceFileDataModel(SpiffworkflowBaseDBModel):
"""ProcessInstanceFileDataModel."""
__tablename__ = "process_instance_file_data"
id: int = db.Column(db.Integer, primary_key=True)
process_instance_id: int = db.Column(
ForeignKey(ProcessInstanceModel.id), nullable=False # type: ignore
)
identifier: str = db.Column(db.String(255), nullable=False)
list_index: Optional[int] = db.Column(db.Integer, nullable=True)
mimetype: str = db.Column(db.String(255), nullable=False)
filename: str = db.Column(db.String(255), nullable=False)
# this is not deferred because there is no reason to query this model if you do not want the contents
contents: str = db.Column(db.LargeBinary, nullable=False)
digest: str = db.Column(db.String(64), nullable=False, index=True)
updated_at_in_seconds: int = db.Column(db.Integer, nullable=False)
created_at_in_seconds: int = db.Column(db.Integer, nullable=False)

View File

@ -1,9 +1,7 @@
"""APIs for dealing with process groups, process models, and process instances."""
import base64
import json
from typing import Any
from typing import Dict
from typing import Optional
import flask.wrappers
from flask import Blueprint
@ -25,6 +23,9 @@ from spiffworkflow_backend.models.process_instance import ProcessInstanceModelSc
from spiffworkflow_backend.models.process_instance import (
ProcessInstanceTaskDataCannotBeUpdatedError,
)
from spiffworkflow_backend.models.process_instance_file_data import (
ProcessInstanceFileDataModel,
)
from spiffworkflow_backend.models.process_model import ProcessModelInfo
from spiffworkflow_backend.models.spec_reference import SpecReferenceCache
from spiffworkflow_backend.models.spec_reference import SpecReferenceSchema
@ -87,9 +88,31 @@ def _process_data_fetcher(
process_instance_id: int,
process_data_identifier: str,
download_file_data: bool,
index: Optional[int] = None,
) -> flask.wrappers.Response:
"""Process_data_show."""
if download_file_data:
file_data = ProcessInstanceFileDataModel.query.filter_by(
digest=process_data_identifier,
process_instance_id=process_instance_id,
).first()
if file_data is None:
raise ApiError(
error_code="process_instance_file_data_not_found",
message=(
"Could not find file data related to the digest:"
f" {process_data_identifier}"
),
)
mimetype = file_data.mimetype
filename = file_data.filename
file_contents = file_data.contents
return Response(
file_contents,
mimetype=mimetype,
headers={"Content-disposition": f"attachment; filename={filename}"},
)
process_instance = _find_process_instance_by_id_or_raise(process_instance_id)
processor = ProcessInstanceProcessor(process_instance)
all_process_data = processor.get_data()
@ -99,26 +122,6 @@ def _process_data_fetcher(
script_engine_last_result = processor._script_engine.environment.last_result()
process_data_value = script_engine_last_result.get(process_data_identifier)
if process_data_value is not None and index is not None:
process_data_value = process_data_value[index]
if (
download_file_data
and isinstance(process_data_value, str)
and process_data_value.startswith("data:")
):
parts = process_data_value.split(";")
mimetype = parts[0][4:]
filename = parts[1].split("=")[1]
base64_value = parts[2].split(",")[1]
file_contents = base64.b64decode(base64_value)
return Response(
file_contents,
mimetype=mimetype,
headers={"Content-disposition": f"attachment; filename={filename}"},
)
return make_response(
jsonify(
{
@ -140,7 +143,6 @@ def process_data_show(
process_instance_id,
process_data_identifier,
download_file_data=False,
index=None,
)
@ -148,14 +150,12 @@ def process_data_file_download(
process_instance_id: int,
process_data_identifier: str,
modified_process_model_identifier: str,
index: Optional[int] = None,
) -> flask.wrappers.Response:
"""Process_data_file_download."""
return _process_data_fetcher(
process_instance_id,
process_data_identifier,
download_file_data=True,
index=index,
)

View File

@ -0,0 +1,49 @@
"""Get_encoded_file_data."""
import base64
from typing import Any
from spiffworkflow_backend.models.process_instance_file_data import (
ProcessInstanceFileDataModel,
)
from spiffworkflow_backend.models.script_attributes_context import (
ScriptAttributesContext,
)
from spiffworkflow_backend.scripts.script import Script
class GetEncodedFileData(Script):
"""GetEncodedFileData."""
@staticmethod
def requires_privileged_permissions() -> bool:
"""We have deemed this function safe to run without elevated permissions."""
return False
def get_description(self) -> str:
"""Get_description."""
return """Returns a string which is the encoded file data. This is a very expensive call."""
def run(
self,
script_attributes_context: ScriptAttributesContext,
*args: Any,
**kwargs: Any,
) -> Any:
"""Run."""
# example input:
# "data:some/mimetype;name=testing.txt;base64,spifffiledatadigest+7a2051ffefd1eaf475dbef9fda019cb3d4a10eb8aea4c2c2a84a50a797a541bf"
digest_reference = args[0]
digest = digest_reference[-64:]
process_instance_id = script_attributes_context.process_instance_id
file_data = ProcessInstanceFileDataModel.query.filter_by(
digest=digest,
process_instance_id=process_instance_id,
).first()
base64_value = base64.b64encode(file_data.contents).decode("ascii")
encoded_file_data = (
f"data:{file_data.mimetype};name={file_data.filename};base64,{base64_value}"
)
return encoded_file_data

View File

@ -1,6 +1,5 @@
"""Markdown_file_download_link."""
from typing import Any
from urllib.parse import unquote
from flask import current_app
@ -26,16 +25,16 @@ class GetMarkdownFileDownloadLink(Script):
def run(
self,
script_attributes_context: ScriptAttributesContext,
*_args: Any,
*args: Any,
**kwargs: Any,
) -> Any:
"""Run."""
# example input:
# "data:application/pdf;name=Harmeet_1234.pdf;base64,JV...."
process_data_identifier = kwargs["key"]
parts = kwargs["file_data"].split(";")
file_index = kwargs["file_index"]
label = unquote(parts[1].split("=")[1])
# "data:some/mimetype;name=testing.txt;base64,spifffiledatadigest+7a2051ffefd1eaf475dbef9fda019cb3d4a10eb8aea4c2c2a84a50a797a541bf"
digest_reference = args[0]
parts = digest_reference.split(";")
digest = parts[2].split(",")[1][-64:]
label = parts[1].split("=")[1]
process_model_identifier = script_attributes_context.process_model_identifier
modified_process_model_identifier = (
ProcessModelInfo.modify_process_identifier_for_path_param(
@ -46,7 +45,7 @@ class GetMarkdownFileDownloadLink(Script):
url = current_app.config["SPIFFWORKFLOW_BACKEND_URL"]
url += (
f"/v1.0/process-data-file-download/{modified_process_model_identifier}/"
+ f"{process_instance_id}/{process_data_identifier}?index={file_index}"
+ f"{process_instance_id}/{digest}"
)
link = f"[{label}]({url})"

View File

@ -1,8 +1,13 @@
"""Process_instance_service."""
import base64
import hashlib
import time
from typing import Any
from typing import Generator
from typing import List
from typing import Optional
from typing import Tuple
from urllib.parse import unquote
import sentry_sdk
from flask import current_app
@ -14,6 +19,9 @@ from spiffworkflow_backend.models.human_task import HumanTaskModel
from spiffworkflow_backend.models.process_instance import ProcessInstanceApi
from spiffworkflow_backend.models.process_instance import ProcessInstanceModel
from spiffworkflow_backend.models.process_instance import ProcessInstanceStatus
from spiffworkflow_backend.models.process_instance_file_data import (
ProcessInstanceFileDataModel,
)
from spiffworkflow_backend.models.process_model import ProcessModelInfo
from spiffworkflow_backend.models.task import Task
from spiffworkflow_backend.models.user import UserModel
@ -32,6 +40,7 @@ from spiffworkflow_backend.services.process_model_service import ProcessModelSer
class ProcessInstanceService:
"""ProcessInstanceService."""
FILE_DATA_DIGEST_PREFIX = "spifffiledatadigest+"
TASK_STATE_LOCKED = "locked"
@classmethod
@ -209,6 +218,97 @@ class ProcessInstanceService:
return lane_uids
@classmethod
def file_data_model_for_value(
cls,
identifier: str,
value: str,
process_instance_id: int,
) -> Optional[ProcessInstanceFileDataModel]:
if value.startswith("data:"):
try:
parts = value.split(";")
mimetype = parts[0][5:]
filename = unquote(parts[1].split("=")[1])
base64_value = parts[2].split(",")[1]
if not base64_value.startswith(cls.FILE_DATA_DIGEST_PREFIX):
contents = base64.b64decode(base64_value)
digest = hashlib.sha256(contents).hexdigest()
now_in_seconds = round(time.time())
return ProcessInstanceFileDataModel(
process_instance_id=process_instance_id,
identifier=identifier,
mimetype=mimetype,
filename=filename,
contents=contents, # type: ignore
digest=digest,
updated_at_in_seconds=now_in_seconds,
created_at_in_seconds=now_in_seconds,
)
except Exception as e:
print(e)
return None
@classmethod
def possible_file_data_values(
cls,
data: dict[str, Any],
) -> Generator[Tuple[str, str, Optional[int]], None, None]:
for identifier, value in data.items():
if isinstance(value, str):
yield (identifier, value, None)
if isinstance(value, list):
for list_index, list_value in enumerate(value):
if isinstance(list_value, str):
yield (identifier, list_value, list_index)
@classmethod
def file_data_models_for_data(
cls,
data: dict[str, Any],
process_instance_id: int,
) -> List[ProcessInstanceFileDataModel]:
models = []
for identifier, value, list_index in cls.possible_file_data_values(data):
model = cls.file_data_model_for_value(
identifier, value, process_instance_id
)
if model is not None:
model.list_index = list_index
models.append(model)
return models
@classmethod
def replace_file_data_with_digest_references(
cls,
data: dict[str, Any],
models: List[ProcessInstanceFileDataModel],
) -> None:
for model in models:
digest_reference = f"data:{model.mimetype};name={model.filename};base64,{cls.FILE_DATA_DIGEST_PREFIX}{model.digest}"
if model.list_index is None:
data[model.identifier] = digest_reference
else:
data[model.identifier][model.list_index] = digest_reference
@classmethod
def save_file_data_and_replace_with_digest_references(
cls,
data: dict[str, Any],
process_instance_id: int,
) -> None:
models = cls.file_data_models_for_data(data, process_instance_id)
for model in models:
db.session.add(model)
db.session.commit()
cls.replace_file_data_with_digest_references(data, models)
@staticmethod
def complete_form_task(
processor: ProcessInstanceProcessor,
@ -226,6 +326,11 @@ class ProcessInstanceService:
processor.process_instance_model.id, spiff_task, user
)
ProcessInstanceService.save_file_data_and_replace_with_digest_references(
data,
processor.process_instance_model.id,
)
dot_dct = ProcessInstanceService.create_dot_dict(data)
spiff_task.update_data(dot_dct)
# ProcessInstanceService.post_process_form(spiff_task) # some properties may update the data store.

View File

@ -1,18 +1,203 @@
"""Test_process_instance_processor."""
from typing import Optional
from flask.app import Flask
from tests.spiffworkflow_backend.helpers.base_test import BaseTest
from tests.spiffworkflow_backend.helpers.test_data import load_test_spec
from spiffworkflow_backend.models.process_instance_file_data import (
ProcessInstanceFileDataModel,
)
from spiffworkflow_backend.models.spiff_logging import SpiffLoggingModel
from spiffworkflow_backend.models.user import UserModel
from spiffworkflow_backend.services.process_instance_processor import (
ProcessInstanceProcessor,
)
from spiffworkflow_backend.services.process_instance_service import (
ProcessInstanceService,
)
class TestProcessInstanceService(BaseTest):
"""TestProcessInstanceService."""
SAMPLE_FILE_DATA = "data:some/mimetype;name=testing.txt;base64,dGVzdGluZwo="
SAMPLE_DIGEST_REFERENCE = f"data:some/mimetype;name=testing.txt;base64,{ProcessInstanceService.FILE_DATA_DIGEST_PREFIX}12a61f4e173fb3a11c05d6471f74728f76231b4a5fcd9667cef3af87a3ae4dc2" # noqa: B950
def _check_sample_file_data_model(
self,
identifier: str,
list_index: Optional[int],
model: Optional[ProcessInstanceFileDataModel],
) -> None:
assert model is not None
assert model.identifier == identifier
assert model.process_instance_id == 111
assert model.list_index == list_index
assert model.mimetype == "some/mimetype"
assert model.filename == "testing.txt"
assert model.contents == b"testing\n" # type: ignore
assert (
model.digest
== "12a61f4e173fb3a11c05d6471f74728f76231b4a5fcd9667cef3af87a3ae4dc2"
)
def test_can_create_file_data_model_for_file_data_value(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
model = ProcessInstanceService.file_data_model_for_value(
"uploaded_file",
self.SAMPLE_FILE_DATA,
111,
)
self._check_sample_file_data_model("uploaded_file", None, model)
def test_does_not_create_file_data_model_for_non_file_data_value(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
model = ProcessInstanceService.file_data_model_for_value(
"not_a_file",
"just a value",
111,
)
assert model is None
def test_can_create_file_data_models_for_single_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
data = {
"uploaded_file": self.SAMPLE_FILE_DATA,
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 1
self._check_sample_file_data_model("uploaded_file", None, models[0])
def test_can_create_file_data_models_for_multiple_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
data = {
"uploaded_files": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 2
self._check_sample_file_data_model("uploaded_files", 0, models[0])
self._check_sample_file_data_model("uploaded_files", 1, models[1])
def test_can_create_file_data_models_for_fix_of_file_data_and_non_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
data = {
"not_a_file": "just a value",
"uploaded_file": self.SAMPLE_FILE_DATA,
"not_a_file2": "just a value 2",
"uploaded_files": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
"not_a_file3": "just a value 3",
"uploaded_files2": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
"uploaded_file2": self.SAMPLE_FILE_DATA,
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 6
self._check_sample_file_data_model("uploaded_file", None, models[0])
self._check_sample_file_data_model("uploaded_files", 0, models[1])
self._check_sample_file_data_model("uploaded_files", 1, models[2])
self._check_sample_file_data_model("uploaded_files2", 0, models[3])
self._check_sample_file_data_model("uploaded_files2", 1, models[4])
self._check_sample_file_data_model("uploaded_file2", None, models[5])
def test_does_not_create_file_data_models_for_non_file_data_values(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
data = {
"not_a_file": "just a value",
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
assert len(models) == 0
def test_can_replace_file_data_values_with_digest_references(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
data = {
"uploaded_file": self.SAMPLE_FILE_DATA,
"uploaded_files": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
ProcessInstanceService.replace_file_data_with_digest_references(data, models)
assert data == {
"uploaded_file": self.SAMPLE_DIGEST_REFERENCE,
"uploaded_files": [
self.SAMPLE_DIGEST_REFERENCE,
self.SAMPLE_DIGEST_REFERENCE,
],
}
def test_does_not_replace_non_file_data_values_with_digest_references(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
data = {
"not_a_file": "just a value",
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
ProcessInstanceService.replace_file_data_with_digest_references(data, models)
assert len(data) == 1
assert data["not_a_file"] == "just a value"
def test_can_replace_file_data_values_with_digest_references_when_non_file_data_values_are_present(
self,
app: Flask,
with_db_and_bpmn_file_cleanup: None,
with_super_admin_user: UserModel,
) -> None:
data = {
"not_a_file": "just a value",
"uploaded_file": self.SAMPLE_FILE_DATA,
"not_a_file2": "just a value2",
"uploaded_files": [self.SAMPLE_FILE_DATA, self.SAMPLE_FILE_DATA],
"not_a_file3": "just a value3",
}
models = ProcessInstanceService.file_data_models_for_data(data, 111)
ProcessInstanceService.replace_file_data_with_digest_references(data, models)
assert data == {
"not_a_file": "just a value",
"uploaded_file": self.SAMPLE_DIGEST_REFERENCE,
"not_a_file2": "just a value2",
"uploaded_files": [
self.SAMPLE_DIGEST_REFERENCE,
self.SAMPLE_DIGEST_REFERENCE,
],
"not_a_file3": "just a value3",
}
def test_does_not_log_set_data_when_calling_engine_steps_on_waiting_call_activity(
self,
app: Flask,