mirror of
https://github.com/sartography/cr-connect-workflow.git
synced 2025-02-20 11:48:16 +00:00
Improve version handling of files. Consolidate more of this logic in FileService. Place the version on the actual data model, not the file model, so the file model remains the same, and we just version the data associated with it.
This commit is contained in:
parent
94f828dfd6
commit
c5cee4761e
@ -8,6 +8,7 @@ from flask import send_file
|
||||
from crc import session
|
||||
from crc.api.common import ApiErrorSchema, ApiError
|
||||
from crc.models.file import FileModelSchema, FileModel, FileDataModel, FileType
|
||||
from crc.models.workflow import WorkflowSpecModel
|
||||
from crc.services.file_service import FileService
|
||||
|
||||
|
||||
@ -34,7 +35,8 @@ def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=Non
|
||||
|
||||
file = connexion.request.files['file']
|
||||
if workflow_spec_id:
|
||||
file_model = FileService.add_workflow_spec_file(workflow_spec_id, file.filename, file.content_type, file.stream.read())
|
||||
workflow_spec = session.query(WorkflowSpecModel).filter_by(id=workflow_spec_id).first()
|
||||
file_model = FileService.add_workflow_spec_file(workflow_spec, file.filename, file.content_type, file.stream.read())
|
||||
else:
|
||||
file_model = FileService.add_form_field_file(study_id, workflow_id, task_id, form_field_key, file.filename, file.content_type, file.stream.read())
|
||||
|
||||
|
@ -76,7 +76,7 @@ def __get_workflow_api_model(processor: WorkflowProcessor):
|
||||
user_tasks=user_tasks,
|
||||
workflow_spec_id=processor.workflow_spec_id
|
||||
)
|
||||
if(processor.next_task()):
|
||||
if processor.next_task():
|
||||
workflow_api.next_task = Task.from_spiff(processor.next_task())
|
||||
return workflow_api
|
||||
|
||||
|
@ -3,6 +3,7 @@ import enum
|
||||
from marshmallow_enum import EnumField
|
||||
from marshmallow_sqlalchemy import ModelSchema
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
|
||||
from crc import db
|
||||
|
||||
@ -30,10 +31,36 @@ class FileType(enum.Enum):
|
||||
zip = 'zip'
|
||||
|
||||
|
||||
CONTENT_TYPES = {
|
||||
"bpmn": "text/xml",
|
||||
"csv": "text/csv",
|
||||
"dmn": "text/xml",
|
||||
"doc": "application/msword",
|
||||
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"gif": "image/gif",
|
||||
"jpg": "image/jpeg",
|
||||
"md" : "text/plain",
|
||||
"pdf": "application/pdf",
|
||||
"png": "image/png",
|
||||
"ppt": "application/vnd.ms-powerpoint",
|
||||
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"rtf": "application/rtf",
|
||||
"svg": "image/svg+xml",
|
||||
"svg_xml": "image/svg+xml",
|
||||
"txt": "text/plain",
|
||||
"xls": "application/vnd.ms-excel",
|
||||
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"xml": "application/xml",
|
||||
"zip": "application/zip"
|
||||
}
|
||||
|
||||
class FileDataModel(db.Model):
|
||||
__tablename__ = 'file_data'
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
|
||||
data = db.Column(db.LargeBinary)
|
||||
version = db.Column(db.Integer, default=0)
|
||||
last_updated = db.Column(db.DateTime(timezone=True), default=func.now())
|
||||
file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
|
||||
file_model = db.relationship("FileModel")
|
||||
|
||||
@ -42,8 +69,6 @@ class FileModel(db.Model):
|
||||
__tablename__ = 'file'
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
name = db.Column(db.String)
|
||||
version = db.Column(db.Integer, default=0)
|
||||
last_updated = db.Column(db.DateTime(timezone=True), default=func.now())
|
||||
type = db.Column(db.Enum(FileType))
|
||||
primary = db.Column(db.Boolean)
|
||||
content_type = db.Column(db.String)
|
||||
@ -52,6 +77,7 @@ class FileModel(db.Model):
|
||||
study_id = db.Column(db.Integer, db.ForeignKey('study.id'), nullable=True)
|
||||
task_id = db.Column(db.String, nullable=True)
|
||||
form_field_key = db.Column(db.String, nullable=True)
|
||||
latest_version = db.Column(db.Integer, default=0)
|
||||
|
||||
|
||||
class FileModelSchema(ModelSchema):
|
||||
|
@ -1,12 +1,14 @@
|
||||
import enum
|
||||
|
||||
import jinja2
|
||||
import marshmallow
|
||||
from jinja2 import Environment, BaseLoader, Undefined, Template
|
||||
from jinja2 import Template
|
||||
from marshmallow import INCLUDE
|
||||
from marshmallow_enum import EnumField
|
||||
from marshmallow_sqlalchemy import ModelSchema
|
||||
|
||||
from crc import db, ma
|
||||
from crc.api.common import ApiError
|
||||
|
||||
|
||||
class WorkflowSpecModel(db.Model):
|
||||
@ -38,6 +40,7 @@ class WorkflowModel(db.Model):
|
||||
study_id = db.Column(db.Integer, db.ForeignKey('study.id'))
|
||||
workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'))
|
||||
|
||||
|
||||
class Task(object):
|
||||
def __init__(self, id, name, title, type, state, form, documentation, data):
|
||||
self.id = id
|
||||
@ -72,8 +75,11 @@ class Task(object):
|
||||
create loops, etc...'''
|
||||
|
||||
template = Template(documentation)
|
||||
self.documentation = template.render(**self.data)
|
||||
|
||||
try:
|
||||
self.documentation = template.render(**self.data)
|
||||
except jinja2.exceptions.UndefinedError as ue:
|
||||
raise ApiError(code="template_error", message="Error processing template for task %s: %s" %
|
||||
(self.name, str(ue)), status_code=500)
|
||||
|
||||
class OptionSchema(ma.Schema):
|
||||
class Meta:
|
||||
|
@ -4,7 +4,7 @@ from jinja2 import UndefinedError
|
||||
|
||||
from crc import session
|
||||
from crc.api.common import ApiError
|
||||
from crc.models.file import FileModel, FileDataModel
|
||||
from crc.models.file import FileModel, FileDataModel, CONTENT_TYPES
|
||||
from crc.models.workflow import WorkflowSpecModel
|
||||
from docxtpl import DocxTemplate
|
||||
import jinja2
|
||||
@ -48,7 +48,7 @@ class CompleteTemplate(Script):
|
||||
workflow_id = task.workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY]
|
||||
FileService.add_task_file(study_id=study_id, workflow_id=workflow_id, task_id=task.id,
|
||||
name=file_name,
|
||||
content_type=FileService.DOCX_MIME,
|
||||
content_type=CONTENT_TYPES['docx'],
|
||||
binary_data=final_document_stream.read())
|
||||
|
||||
print("Complete Task was called with %s" % str(args))
|
||||
|
@ -1,24 +1,33 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
from uuid import UUID
|
||||
from xml.etree import ElementTree
|
||||
|
||||
from crc import session
|
||||
from crc.api.common import ApiErrorSchema, ApiError
|
||||
from crc.models.file import FileType, FileDataModel, FileModelSchema, FileModel
|
||||
from crc.models.file import FileType, FileDataModel, FileModelSchema, FileModel, CONTENT_TYPES
|
||||
from crc.models.workflow import WorkflowSpecModel
|
||||
from crc.services.workflow_processor import WorkflowProcessor
|
||||
import hashlib
|
||||
|
||||
|
||||
class FileService(object):
|
||||
"""Provides consistent management and rules for storing, retrieving and processing files."""
|
||||
|
||||
DOCX_MIME = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
|
||||
@staticmethod
|
||||
def add_workflow_spec_file(workflow_spec_id, name, content_type, binary_data):
|
||||
def add_workflow_spec_file(workflow_spec: WorkflowSpecModel,
|
||||
name, content_type, binary_data, primary=False):
|
||||
"""Create a new file and associate it with a workflow spec."""
|
||||
file_model = FileModel(
|
||||
version=0,
|
||||
workflow_spec_id=workflow_spec_id,
|
||||
workflow_spec_id=workflow_spec.id,
|
||||
name=name,
|
||||
primary=primary
|
||||
)
|
||||
if primary:
|
||||
bpmn: ElementTree.Element = ElementTree.fromstring(binary_data)
|
||||
workflow_spec.primary_process_id = WorkflowProcessor.get_process_id(bpmn)
|
||||
print("Locating Process Id for " + name + " " + workflow_spec.primary_process_id)
|
||||
|
||||
return FileService.update_file(file_model, binary_data, content_type)
|
||||
|
||||
@staticmethod
|
||||
@ -38,7 +47,6 @@ class FileService(object):
|
||||
def add_task_file(study_id, workflow_id, task_id, name, content_type, binary_data):
|
||||
"""Create a new file and associate it with an executing task within a workflow."""
|
||||
file_model = FileModel(
|
||||
version=0,
|
||||
study_id=study_id,
|
||||
workflow_id=workflow_id,
|
||||
task_id=task_id,
|
||||
@ -49,9 +57,14 @@ class FileService(object):
|
||||
@staticmethod
|
||||
def update_file(file_model, binary_data, content_type):
|
||||
|
||||
file_model.version = file_model.version + 1
|
||||
file_model.last_updated = datetime.now()
|
||||
file_model.content_type = content_type
|
||||
file_data_model = session.query(FileDataModel).\
|
||||
filter_by(file_model_id=file_model.id,
|
||||
version=file_model.latest_version
|
||||
).with_for_update().first()
|
||||
md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
|
||||
if(file_data_model is not None and md5_checksum == file_data_model.md5_hash):
|
||||
# This file does not need to be updated, it's the same file.
|
||||
return file_model
|
||||
|
||||
# Verify the extension
|
||||
basename, file_extension = os.path.splitext(file_model.name)
|
||||
@ -62,12 +75,16 @@ class FileService(object):
|
||||
file_extension)), 404
|
||||
else:
|
||||
file_model.type = FileType[file_extension]
|
||||
file_model.content_type = content_type
|
||||
|
||||
file_data_model = session.query(FileDataModel).filter_by(file_model_id=file_model.id).with_for_update().first()
|
||||
if file_data_model is None:
|
||||
file_data_model = FileDataModel(data=binary_data, file_model=file_model)
|
||||
version = 1
|
||||
else:
|
||||
file_data_model.data = binary_data
|
||||
version = file_data_model.version + 1
|
||||
|
||||
file_model.latest_version = version
|
||||
file_data_model = FileDataModel(data=binary_data, file_model=file_model, version=version,
|
||||
md5_hash=md5_checksum)
|
||||
|
||||
session.add_all([file_model, file_data_model])
|
||||
session.commit()
|
||||
@ -94,4 +111,8 @@ class FileService(object):
|
||||
@staticmethod
|
||||
def get_file_data(file_id):
|
||||
"""Returns the file_data that is associated with the file model id"""
|
||||
return session.query(FileDataModel).filter(FileDataModel.file_model_id == file_id).first()
|
||||
file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
|
||||
return session.query(FileDataModel)\
|
||||
.filter(FileDataModel.file_model_id == file_id)\
|
||||
.filter(FileDataModel.version == file_model.latest_version)\
|
||||
.first()
|
||||
|
100
example_data.py
100
example_data.py
@ -4,15 +4,23 @@ import os
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
|
||||
from crc import app, db, session
|
||||
from crc.models.file import FileType, FileModel, FileDataModel
|
||||
from crc.models.file import FileType, FileModel, FileDataModel, CONTENT_TYPES
|
||||
from crc.models.study import StudyModel
|
||||
from crc.models.user import UserModel
|
||||
from crc.models.workflow import WorkflowSpecModel
|
||||
from crc.services.file_service import FileService
|
||||
from crc.services.workflow_processor import WorkflowProcessor
|
||||
|
||||
|
||||
class ExampleDataLoader:
|
||||
def make_data(self):
|
||||
@staticmethod
|
||||
def clean_db():
|
||||
session.flush() # Clear out any transactions before deleting it all to avoid spurious errors.
|
||||
for table in reversed(db.metadata.sorted_tables):
|
||||
session.execute(table.delete())
|
||||
session.flush()
|
||||
|
||||
def load_all(self):
|
||||
users = [
|
||||
UserModel(
|
||||
uid='dhf8r',
|
||||
@ -25,6 +33,8 @@ class ExampleDataLoader:
|
||||
title='SOFTWARE ENGINEER V'
|
||||
)
|
||||
]
|
||||
db.session.add_all(users)
|
||||
db.session.commit()
|
||||
|
||||
studies = [
|
||||
StudyModel(
|
||||
@ -48,89 +58,47 @@ class ExampleDataLoader:
|
||||
user_uid='dhf8r'
|
||||
),
|
||||
]
|
||||
db.session.add_all(studies)
|
||||
db.session.commit()
|
||||
|
||||
workflow_specifications = \
|
||||
self.create_spec(id="crc2_training_session_enter_core_info",
|
||||
name="crc2_training_session_enter_core_info",
|
||||
display_name="CR Connect2 - Training Session - Core Info",
|
||||
description='Part of Milestone 3 Deliverable')
|
||||
workflow_specifications += \
|
||||
self.create_spec(id="crc2_training_session_data_security_plan",
|
||||
name="crc2_training_session_data_security_plan",
|
||||
display_name="CR Connect2 - Training Session - Data Security Plan",
|
||||
description='Part of Milestone 3 Deliverable')
|
||||
workflow_specifications += \
|
||||
self.create_spec(id="sponsor_funding_source",
|
||||
name="sponsor_funding_source",
|
||||
display_name="Sponsor and/or Funding Source ",
|
||||
description='TBD')
|
||||
# workflow_specifications += \
|
||||
# self.create_spec(id="m2_demo",
|
||||
# name="m2_demo",
|
||||
# display_name="Milestone 2 Demo",
|
||||
# description='A simplified CR Connect workflow for demonstration purposes.')
|
||||
# workflow_specifications += \
|
||||
# self.create_spec(id="crc_study_workflow",
|
||||
# name="crc_study_workflow",
|
||||
# display_name="CR Connect Study Workflow",
|
||||
# description='Draft workflow for CR Connect studies.')
|
||||
all_data = users + studies + workflow_specifications
|
||||
return all_data
|
||||
self.create_spec(id="crc2_training_session_enter_core_info",
|
||||
name="crc2_training_session_enter_core_info",
|
||||
display_name="CR Connect2 - Training Session - Core Info",
|
||||
description='Part of Milestone 3 Deliverable')
|
||||
self.create_spec(id="crc2_training_session_data_security_plan",
|
||||
name="crc2_training_session_data_security_plan",
|
||||
display_name="CR Connect2 - Training Session - Data Security Plan",
|
||||
description='Part of Milestone 3 Deliverable')
|
||||
self.create_spec(id="sponsor_funding_source",
|
||||
name="sponsor_funding_source",
|
||||
display_name="Sponsor and/or Funding Source ",
|
||||
description='TBD')
|
||||
|
||||
def create_spec(self, id, name, display_name="", description="", filepath=None):
|
||||
"""Assumes that a directory exists in static/bpmn with the same name as the given id.
|
||||
further assumes that the [id].bpmn is the primary file for the workflow.
|
||||
returns an array of data models to be added to the database."""
|
||||
models = []
|
||||
file_service = FileService()
|
||||
|
||||
spec = WorkflowSpecModel(id=id,
|
||||
name=name,
|
||||
display_name=display_name,
|
||||
description=description)
|
||||
models.append(spec)
|
||||
db.session.add(spec)
|
||||
db.session.commit()
|
||||
if not filepath:
|
||||
filepath = os.path.join(app.root_path, 'static', 'bpmn', id, "*")
|
||||
files = glob.glob(filepath)
|
||||
for file_path in files:
|
||||
noise, file_extension = os.path.splitext(file_path)
|
||||
filename = os.path.basename(file_path)
|
||||
if file_extension.lower() == '.bpmn':
|
||||
type = FileType.bpmn
|
||||
elif file_extension.lower() == '.dmn':
|
||||
type = FileType.dmn
|
||||
elif file_extension.lower() == '.svg':
|
||||
type = FileType.svg
|
||||
elif file_extension.lower() == '.docx':
|
||||
type = FileType.docx
|
||||
else:
|
||||
raise Exception("Unsupported file type:" + file_path)
|
||||
continue
|
||||
|
||||
is_primary = filename.lower() == id + ".bpmn"
|
||||
file_model = FileModel(name=filename, type=type, content_type='text/xml', version="1",
|
||||
last_updated=datetime.datetime.now(), primary=is_primary,
|
||||
workflow_spec_id=id)
|
||||
models.append(file_model)
|
||||
try:
|
||||
file = open(file_path, "rb")
|
||||
data = file.read()
|
||||
if (is_primary):
|
||||
bpmn: ElementTree.Element = ElementTree.fromstring(data)
|
||||
spec.primary_process_id = WorkflowProcessor.get_process_id(bpmn)
|
||||
print("Locating Process Id for " + filename + " " + spec.primary_process_id)
|
||||
models.append(FileDataModel(data=data, file_model=file_model))
|
||||
content_type = CONTENT_TYPES[file_extension[1:]]
|
||||
file_service.add_workflow_spec_file(workflow_spec=spec, name=filename, content_type=content_type,
|
||||
binary_data=data, primary=is_primary)
|
||||
finally:
|
||||
file.close()
|
||||
return models
|
||||
|
||||
@staticmethod
|
||||
def clean_db():
|
||||
session.flush() # Clear out any transactions before deleting it all to avoid spurious errors.
|
||||
for table in reversed(db.metadata.sorted_tables):
|
||||
session.execute(table.delete())
|
||||
session.flush()
|
||||
|
||||
def load_all(self):
|
||||
for data in self.make_data():
|
||||
session.add(data)
|
||||
session.commit()
|
||||
session.flush()
|
||||
return spec
|
||||
|
@ -92,15 +92,7 @@ class BaseTest(unittest.TestCase):
|
||||
if session.query(WorkflowSpecModel).filter_by(id=dir_name).count() > 0:
|
||||
return
|
||||
filepath = os.path.join(app.root_path, '..', 'tests', 'data', dir_name, "*")
|
||||
models = ExampleDataLoader().create_spec(id=dir_name, name=dir_name, filepath=filepath)
|
||||
spec = None
|
||||
for model in models:
|
||||
if isinstance(model, WorkflowSpecModel):
|
||||
spec = model
|
||||
session.add(model)
|
||||
session.commit()
|
||||
session.flush()
|
||||
return spec
|
||||
return ExampleDataLoader().create_spec(id=dir_name, name=dir_name, filepath=filepath)
|
||||
|
||||
@staticmethod
|
||||
def protocol_builder_response(file_name):
|
||||
|
@ -25,7 +25,7 @@ class TestFilesApi(BaseTest):
|
||||
def test_list_multiple_files_for_workflow_spec(self):
|
||||
self.load_example_data()
|
||||
spec = session.query(WorkflowSpecModel).first()
|
||||
svgFile = FileModel(name="test.svg", type=FileType.svg, version=1, last_updated=datetime.now(),
|
||||
svgFile = FileModel(name="test.svg", type=FileType.svg,
|
||||
primary=False, workflow_spec_id=spec.id)
|
||||
session.add(svgFile)
|
||||
session.flush()
|
||||
@ -47,7 +47,6 @@ class TestFilesApi(BaseTest):
|
||||
self.assertIsNotNone(rv.get_data())
|
||||
json_data = json.loads(rv.get_data(as_text=True))
|
||||
file = FileModelSchema().load(json_data, session=session)
|
||||
self.assertEqual(1, file.version)
|
||||
self.assertEqual(FileType.svg, file.type)
|
||||
self.assertFalse(file.primary)
|
||||
self.assertEqual("image/svg+xml", file.content_type)
|
||||
@ -89,13 +88,36 @@ class TestFilesApi(BaseTest):
|
||||
self.assertIsNotNone(rv.get_data())
|
||||
json_data = json.loads(rv.get_data(as_text=True))
|
||||
file = FileModelSchema().load(json_data, session=session)
|
||||
self.assertEqual(2, file.version)
|
||||
self.assertEqual(2, file.latest_version)
|
||||
self.assertEqual(FileType.bpmn, file.type)
|
||||
self.assertEqual("application/octet-stream", file.content_type)
|
||||
self.assertEqual(spec.id, file.workflow_spec_id)
|
||||
|
||||
data_model = session.query(FileDataModel).filter_by(file_model_id=file.id).first()
|
||||
self.assertEqual(b"hijklim", data_model.data)
|
||||
rv = self.app.get('/v1.0/file/%i/data' % file.id)
|
||||
self.assert_success(rv)
|
||||
data = rv.get_data()
|
||||
self.assertIsNotNone(data)
|
||||
self.assertEqual(b"hijklim", data)
|
||||
|
||||
def test_update_with_same_exact_data_does_not_increment_version(self):
|
||||
self.load_example_data()
|
||||
spec = session.query(WorkflowSpecModel).first()
|
||||
data = {}
|
||||
data['file'] = io.BytesIO(b"abcdef"), 'my_new_file.bpmn'
|
||||
rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True,
|
||||
content_type='multipart/form-data')
|
||||
self.assertIsNotNone(rv.get_data())
|
||||
json_data = json.loads(rv.get_data(as_text=True))
|
||||
file = FileModelSchema().load(json_data, session=session)
|
||||
self.assertEqual(1, file.latest_version)
|
||||
data['file'] = io.BytesIO(b"abcdef"), 'my_new_file.bpmn'
|
||||
rv = self.app.put('/v1.0/file/%i/data' % file.id, data=data, follow_redirects=True,
|
||||
content_type='multipart/form-data')
|
||||
self.assertIsNotNone(rv.get_data())
|
||||
json_data = json.loads(rv.get_data(as_text=True))
|
||||
file = FileModelSchema().load(json_data, session=session)
|
||||
self.assertEqual(1, file.latest_version)
|
||||
|
||||
|
||||
def test_get_file(self):
|
||||
self.load_example_data()
|
||||
|
Loading…
x
Reference in New Issue
Block a user