Ludicrously stupid launch in a refactor of the way all files work in the system at a time where I crave sleep and peace above all other things.

Added a File class, that we wrap around the FileModel so the api endpoints don't change, but File no longer holds refences to versions or dates of the file_data model, we
figure this out based on a clean database structure.

The ApprovalFile is directly related to the file_data_model - so no chance that a reviewer would review the incorrect version of a file.py

Noticed that our FileType enum called "bpmn" "bpmm", hope this doesn't screw someone up.

Workflows are directly related to the data_models that create the workflow spec it needs.  So the files should always be there.  There are no more hashes, and thus no more hash errors where it can't find the files to rebuild the workflow.py

Not much to report here, other than I broke every single test in the system at one point.  So I'm super concerned about this, and will be testing it a lot before creating the pull request.
This commit is contained in:
Dan Funk 2020-05-28 20:03:50 -04:00
parent 8f41dfa95f
commit dba41f4759
19 changed files with 329 additions and 269 deletions

View File

@ -1,30 +1,39 @@
import io
from typing import List
import connexion
from flask import send_file
from crc import session
from crc.api.common import ApiError
from crc.models.file import FileModelSchema, FileModel
from crc.models.file import FileSchema, FileModel, File, FileModelSchema
from crc.models.workflow import WorkflowSpecModel
from crc.services.file_service import FileService
def to_file_api(file_model):
"""Converts a FileModel object to something we can return via the aip"""
return File.from_models(file_model, FileService.get_file_data(file_model.id))
def get_files(workflow_spec_id=None, workflow_id=None, form_field_key=None):
if all(v is None for v in [workflow_spec_id, workflow_id, form_field_key]):
raise ApiError('missing_parameter',
'Please specify either a workflow_spec_id or a '
'workflow_id with an optional form_field_key')
results = FileService.get_files(workflow_spec_id=workflow_spec_id,
workflow_id=workflow_id,
irb_doc_code=form_field_key)
return FileModelSchema(many=True).dump(results)
file_models = FileService.get_files(workflow_spec_id=workflow_spec_id,
workflow_id=workflow_id,
irb_doc_code=form_field_key)
files = (to_file_api(model) for model in file_models)
return FileSchema(many=True).dump(files)
def get_reference_files():
results = FileService.get_files(is_reference=True)
return FileModelSchema(many=True).dump(results)
files = (to_file_api(model) for model in results)
return FileSchema(many=True).dump(files)
def add_file(workflow_spec_id=None, workflow_id=None, form_field_key=None):
@ -43,7 +52,7 @@ def add_file(workflow_spec_id=None, workflow_id=None, form_field_key=None):
else:
raise ApiError("invalid_file", "You must supply either a workflow spec id or a workflow_id and form_field_key.")
return FileModelSchema().dump(file_model)
return FileSchema().dump(to_file_api(file_model))
def get_reference_file(name):
@ -78,7 +87,7 @@ def set_reference_file(name):
file_model = file_models[0]
FileService.update_file(file_models[0], file.stream.read(), file.content_type)
return FileModelSchema().dump(file_model)
return FileSchema().dump(to_file_api(file_model))
def update_file_data(file_id):
@ -87,7 +96,7 @@ def update_file_data(file_id):
if file_model is None:
raise ApiError('no_such_file', 'The file id you provided does not exist')
file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
return FileModelSchema().dump(file_model)
return FileSchema().dump(to_file_api(file_model))
def get_file_data(file_id, version=None):
@ -99,7 +108,7 @@ def get_file_data(file_id, version=None):
attachment_filename=file_data.file_model.name,
mimetype=file_data.file_model.content_type,
cache_timeout=-1, # Don't cache these files on the browser.
last_modified=file_data.last_updated
last_modified=file_data.date_created
)
@ -107,7 +116,7 @@ def get_file_info(file_id):
file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first()
if file_model is None:
raise ApiError('no_such_file', 'The file id you provided does not exist', status_code=404)
return FileModelSchema().dump(file_model)
return FileSchema().dump(to_file_api(file_model))
def update_file_info(file_id, body):
@ -122,7 +131,7 @@ def update_file_info(file_id, body):
file_model = FileModelSchema().load(body, session=session)
session.add(file_model)
session.commit()
return FileModelSchema().dump(file_model)
return FileSchema().dump(to_file_api(file_model))
def delete_file(file_id):

View File

@ -118,8 +118,8 @@ def __get_workflow_api_model(processor: WorkflowProcessor, next_task = None):
next_task=None,
navigation=navigation,
workflow_spec_id=processor.workflow_spec_id,
spec_version=processor.get_spec_version(),
is_latest_spec=processor.get_spec_version() == processor.get_latest_version_string(processor.workflow_spec_id),
spec_version=processor.get_version_string(),
is_latest_spec=processor.is_latest_spec,
total_tasks=processor.workflow_model.total_tasks,
completed_tasks=processor.workflow_model.completed_tasks,
last_updated=processor.workflow_model.last_updated

View File

@ -4,7 +4,7 @@ from marshmallow import INCLUDE
from sqlalchemy import func
from crc import db, ma
from crc.models.file import FileModel
from crc.models.file import FileModel, FileDataModel
from crc.models.study import StudyModel
from crc.models.workflow import WorkflowModel
@ -17,13 +17,11 @@ class ApprovalStatus(enum.Enum):
class ApprovalFile(db.Model):
id = db.Column(db.Integer, primary_key=True)
file_id = db.Column(db.Integer, db.ForeignKey(FileModel.id), nullable=False)
approval_id = db.Column(db.Integer, db.ForeignKey("approval.id"), nullable=False)
file_version = db.Column(db.Integer, nullable=False)
file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True)
approval_id = db.Column(db.Integer, db.ForeignKey("approval.id"), primary_key=True)
approval = db.relationship("ApprovalModel")
file = db.relationship(FileModel)
file_data = db.relationship(FileDataModel)
class ApprovalModel(db.Model):
@ -38,9 +36,9 @@ class ApprovalModel(db.Model):
message = db.Column(db.String)
date_created = db.Column(db.DateTime(timezone=True), default=func.now())
version = db.Column(db.Integer) # Incremented integer, so 1,2,3 as requests are made.
workflow_hash = db.Column(db.String) # A hash of the workflow at the moment the approval was created.
approval_files = db.relationship(ApprovalFile, back_populates="approval")
approval_files = db.relationship(ApprovalFile, back_populates="approval",
cascade="all, delete, delete-orphan",
order_by=ApprovalFile.file_data_id)
class Approval(object):
@ -57,7 +55,6 @@ class Approval(object):
instance.message = model.message
instance.date_created = model.date_created
instance.version = model.version
instance.workflow_hash = model.workflow_hash
instance.title = ''
if model.study:
instance.title = model.study.title

View File

@ -1,6 +1,7 @@
import enum
from typing import cast
from marshmallow import INCLUDE, EXCLUDE
from marshmallow_enum import EnumField
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from sqlalchemy import func, Index
@ -10,7 +11,7 @@ from crc import db, ma
class FileType(enum.Enum):
bpmn = "bpmm"
bpmn = "bpmn"
csv = 'csv'
dmn = "dmn"
doc = "doc"
@ -55,15 +56,16 @@ CONTENT_TYPES = {
"zip": "application/zip"
}
class FileDataModel(db.Model):
__tablename__ = 'file_data'
id = db.Column(db.Integer, primary_key=True)
md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
data = db.Column(db.LargeBinary)
version = db.Column(db.Integer, default=0)
last_updated = db.Column(db.DateTime(timezone=True), default=func.now())
date_created = db.Column(db.DateTime(timezone=True), default=func.now())
file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
file_model = db.relationship("FileModel")
file_model = db.relationship("FileModel", foreign_keys=[file_model_id])
class FileModel(db.Model):
@ -79,9 +81,30 @@ class FileModel(db.Model):
workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True)
workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the irb_documents.xlsx reference file.
latest_version = db.Column(db.Integer, default=0)
class File(object):
@classmethod
def from_models(cls, model: FileModel, data_model: FileDataModel):
instance = cls()
instance.id = model.id
instance.name = model.name
instance.is_status = model.is_status
instance.is_reference = model.is_reference
instance.content_type = model.content_type
instance.primary = model.primary
instance.primary_process_id = model.primary_process_id
instance.workflow_spec_id = model.workflow_spec_id
instance.workflow_id = model.workflow_id
instance.irb_doc_code = model.irb_doc_code
instance.type = model.type
if data_model:
instance.last_modified = data_model.date_created
instance.latest_version = data_model.version
else:
instance.last_modified = None
instance.latest_version = None
return instance
class FileModelSchema(SQLAlchemyAutoSchema):
class Meta:
@ -89,6 +112,17 @@ class FileModelSchema(SQLAlchemyAutoSchema):
load_instance = True
include_relationships = True
include_fk = True # Includes foreign keys
unknown = EXCLUDE
type = EnumField(FileType)
class FileSchema(ma.Schema):
class Meta:
model = File
fields = ["id", "name", "is_status", "is_reference", "content_type",
"primary", "primary_process_id", "workflow_spec_id", "workflow_id",
"irb_doc_code", "last_modified", "latest_version", "type"]
unknown = INCLUDE
type = EnumField(FileType)

View File

@ -64,7 +64,7 @@ class WorkflowMetadata(object):
name=workflow.workflow_spec.name,
display_name=workflow.workflow_spec.display_name,
description=workflow.workflow_spec.description,
spec_version=workflow.spec_version,
spec_version=workflow.spec_version(),
category_id=workflow.workflow_spec.category_id,
state=WorkflowState.optional,
status=workflow.status,

View File

@ -5,6 +5,7 @@ from marshmallow import EXCLUDE
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from crc import db
from crc.models.file import FileModel, FileDataModel
class WorkflowSpecCategoryModel(db.Model):
@ -67,6 +68,14 @@ class WorkflowStatus(enum.Enum):
complete = "complete"
class WorkflowSpecDependencyFile(db.Model):
"""Connects a workflow to the version of the specification files it depends on to execute"""
file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True)
workflow_id = db.Column(db.Integer, db.ForeignKey("workflow.id"), primary_key=True)
file_data = db.relationship(FileDataModel)
class WorkflowModel(db.Model):
__tablename__ = 'workflow'
id = db.Column(db.Integer, primary_key=True)
@ -76,7 +85,13 @@ class WorkflowModel(db.Model):
study = db.relationship("StudyModel", backref='workflow')
workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'))
workflow_spec = db.relationship("WorkflowSpecModel")
spec_version = db.Column(db.String)
total_tasks = db.Column(db.Integer, default=0)
completed_tasks = db.Column(db.Integer, default=0)
last_updated = db.Column(db.DateTime)
# Order By is important or generating hashes on reviews.
dependencies = db.relationship(WorkflowSpecDependencyFile, cascade="all, delete, delete-orphan",
order_by="WorkflowSpecDependencyFile.file_data_id")
def spec_version(self):
dep_ids = list(dep.file_data_id for dep in self.dependencies)
return "-".join(str(dep_ids))

View File

@ -59,13 +59,13 @@ Takes two arguments:
file_data_model = None
if workflow is not None:
# Get the workflow's latest files
joined_file_data_models = WorkflowProcessor\
.get_file_models_for_version(workflow.workflow_spec_id, workflow.spec_version)
for joined_file_data in joined_file_data_models:
if joined_file_data.file_model.name == file_name:
file_data_model = session.query(FileDataModel).filter_by(id=joined_file_data.id).first()
# Get the workflow specification file with the given name.
file_data_models = FileService.get_spec_data_files(
workflow_spec_id=workflow.workflow_spec_id,
workflow_id=workflow.id)
for file_data in file_data_models:
if file_data.file_model.name == file_name:
file_data_model = file_data
if workflow is None or file_data_model is None:
file_data_model = FileService.get_workflow_file_data(task.workflow, file_name)

View File

@ -3,8 +3,10 @@ from datetime import datetime
from sqlalchemy import desc
from crc import db, session
from crc.api.common import ApiError
from crc.models.approval import ApprovalModel, ApprovalStatus, ApprovalFile
from crc.models.workflow import WorkflowModel
from crc.services.file_service import FileService
@ -51,15 +53,22 @@ class ApprovalService(object):
# Construct as hash of the latest files to see if things have changed since
# the last approval.
latest_files = FileService.get_workflow_files(workflow_id)
current_workflow_hash = ApprovalService._generate_workflow_hash(latest_files)
workflow = db.session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
workflow_data_files = FileService.get_workflow_data_files(workflow_id)
current_data_file_ids = list(data_file.id for data_file in workflow_data_files)
if len(current_data_file_ids) == 0:
raise ApiError("invalid_workflow_approval", "You can't create an approval for a workflow that has"
"no files to approve in it.")
# If an existing approval request exists and no changes were made, do nothing.
# If there is an existing approval request for a previous version of the workflow
# then add a new request, and cancel any waiting/pending requests.
if latest_approval_request:
# We could just compare the ApprovalFile lists here and do away with this hash.
if latest_approval_request.workflow_hash == current_workflow_hash:
request_file_ids = list(file.file_data_id for file in latest_approval_request.approval_files)
current_data_file_ids.sort()
request_file_ids.sort()
if current_data_file_ids == request_file_ids:
return # This approval already exists.
else:
latest_approval_request.status = ApprovalStatus.CANCELED.value
@ -71,27 +80,18 @@ class ApprovalService(object):
model = ApprovalModel(study_id=study_id, workflow_id=workflow_id,
approver_uid=approver_uid, status=ApprovalStatus.WAITING.value,
message="", date_created=datetime.now(),
version=version, workflow_hash=current_workflow_hash)
approval_files = ApprovalService._create_approval_files(latest_files, model)
version=version)
approval_files = ApprovalService._create_approval_files(workflow_data_files, model)
db.session.add(model)
db.session.add_all(approval_files)
db.session.commit()
@staticmethod
def _create_approval_files(files, approval):
def _create_approval_files(workflow_data_files, approval):
"""Currently based exclusively on the status of files associated with a workflow."""
file_approval_models = []
for file in files:
file_approval_models.append(ApprovalFile(file_id=file.id,
approval=approval,
file_version=file.latest_version))
for file_data in workflow_data_files:
file_approval_models.append(ApprovalFile(file_data_id=file_data.id,
approval=approval))
return file_approval_models
@staticmethod
def _generate_workflow_hash(files):
"""Currently based exclusively on the status of files associated with a workflow."""
version_array = []
for file in files:
version_array.append(str(file.id) + "[" + str(file.latest_version) + "]")
full_version = "-".join(version_array)
return full_version

View File

@ -5,13 +5,14 @@ from datetime import datetime
from uuid import UUID
from xml.etree import ElementTree
from SpiffWorkflow.bpmn.parser.ValidationException import ValidationException
from pandas import ExcelFile
from sqlalchemy import desc
from crc import session
from crc.api.common import ApiError
from crc.models.file import FileType, FileDataModel, FileModel, LookupFileModel, LookupDataModel
from crc.models.workflow import WorkflowSpecModel, WorkflowModel
from crc.services.workflow_processor import WorkflowProcessor
from crc.models.workflow import WorkflowSpecModel, WorkflowModel, WorkflowSpecDependencyFile
class FileService(object):
@ -111,12 +112,12 @@ class FileService(object):
def update_file(file_model, binary_data, content_type):
session.flush() # Assure the database is up-to-date before running this.
file_data_model = session.query(FileDataModel). \
filter_by(file_model_id=file_model.id,
version=file_model.latest_version
).with_for_update().first()
latest_data_model = session.query(FileDataModel). \
filter(FileDataModel.file_model_id == file_model.id).\
order_by(desc(FileDataModel.date_created)).first()
md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
if (file_data_model is not None) and (md5_checksum == file_data_model.md5_hash):
if (latest_data_model is not None) and (md5_checksum == latest_data_model.md5_hash):
# This file does not need to be updated, it's the same file.
return file_model
@ -130,28 +131,50 @@ class FileService(object):
file_model.type = FileType[file_extension]
file_model.content_type = content_type
if file_data_model is None:
if latest_data_model is None:
version = 1
else:
version = file_data_model.version + 1
version = latest_data_model.version + 1
# If this is a BPMN, extract the process id.
if file_model.type == FileType.bpmn:
bpmn: ElementTree.Element = ElementTree.fromstring(binary_data)
file_model.primary_process_id = WorkflowProcessor.get_process_id(bpmn)
file_model.primary_process_id = FileService.get_process_id(bpmn)
file_model.latest_version = version
new_file_data_model = FileDataModel(
data=binary_data, file_model_id=file_model.id, file_model=file_model,
version=version, md5_hash=md5_checksum, last_updated=datetime.now()
version=version, md5_hash=md5_checksum, date_created=datetime.now()
)
session.add_all([file_model, new_file_data_model])
session.commit()
session.flush() # Assure the id is set on the model before returning it.
return file_model
@staticmethod
def get_process_id(et_root: ElementTree.Element):
process_elements = []
for child in et_root:
if child.tag.endswith('process') and child.attrib.get('isExecutable', False):
process_elements.append(child)
if len(process_elements) == 0:
raise ValidationException('No executable process tag found')
# There are multiple root elements
if len(process_elements) > 1:
# Look for the element that has the startEvent in it
for e in process_elements:
this_element: ElementTree.Element = e
for child_element in list(this_element):
if child_element.tag.endswith('startEvent'):
return this_element.attrib['id']
raise ValidationException('No start event found in %s' % et_root.attrib['id'])
return process_elements[0].attrib['id']
@staticmethod
def get_files_for_study(study_id, irb_doc_code=None):
query = session.query(FileModel).\
@ -176,23 +199,51 @@ class FileService(object):
if name:
query = query.filter_by(name=name)
query = query.order_by(FileModel.id)
results = query.all()
return results
@staticmethod
def get_file_data(file_id, file_model=None, version=None):
def get_spec_data_files(workflow_spec_id, workflow_id=None):
"""Returns all the FileDataModels related to a workflow specification.
If a workflow is specified, returns the version of the spec relatted
to that workflow, otherwise, returns the lastes files."""
if workflow_id:
files = session.query(FileDataModel) \
.join(WorkflowSpecDependencyFile) \
.filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \
.order_by(FileDataModel.id).all()
return files
else:
"""Returns all the latest files related to a workflow specification"""
file_models = FileService.get_files(workflow_spec_id=workflow_spec_id)
latest_data_files = []
for file_model in file_models:
latest_data_files.append(FileService.get_file_data(file_model.id))
return latest_data_files
"""Returns the file_data that is associated with the file model id, if an actual file_model
is provided, uses that rather than looking it up again."""
if file_model is None:
file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
if version is None:
version = file_model.latest_version
return session.query(FileDataModel) \
.filter(FileDataModel.file_model_id == file_id) \
.filter(FileDataModel.version == version) \
.first()
@staticmethod
def get_workflow_data_files(workflow_id=None):
"""Returns all the FileDataModels related to a running workflow -
So these are the latest data files that were uploaded or generated
that go along with this workflow. Not related to the spec in any way"""
file_models = FileService.get_files(workflow_id=workflow_id)
latest_data_files = []
for file_model in file_models:
latest_data_files.append(FileService.get_file_data(file_model.id))
return latest_data_files
@staticmethod
def get_file_data(file_id: int, version: int = None):
"""Returns the file data with the given version, or the lastest file, if version isn't provided."""
query = session.query(FileDataModel) \
.filter(FileDataModel.file_model_id == file_id)
if version:
query = query.filter(FileDataModel.version == version)
else:
query = query.order_by(desc(FileDataModel.date_created))
return query.first()
@staticmethod
def get_reference_file_data(file_name):
@ -201,7 +252,7 @@ class FileService(object):
filter(FileModel.name == file_name).first()
if not file_model:
raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name)
return FileService.get_file_data(file_model.id, file_model)
return FileService.get_file_data(file_model.id)
@staticmethod
def get_workflow_file_data(workflow, file_name):

View File

@ -78,6 +78,8 @@ class StudyService(object):
def delete_workflow(workflow):
for file in session.query(FileModel).filter_by(workflow_id=workflow.id).all():
FileService.delete_file(file.id)
for deb in workflow.dependencies:
session.delete(deb)
session.query(TaskEventModel).filter_by(workflow_id=workflow.id).delete()
session.query(WorkflowModel).filter_by(id=workflow.id).delete()

View File

@ -1,8 +1,7 @@
import random
import re
import string
import xml.etree.ElementTree as ElementTree
from datetime import datetime
from typing import List
from SpiffWorkflow import Task as SpiffTask, WorkflowException
from SpiffWorkflow.bpmn.BpmnScriptEngine import BpmnScriptEngine
@ -13,14 +12,15 @@ from SpiffWorkflow.bpmn.workflow import BpmnWorkflow
from SpiffWorkflow.camunda.parser.CamundaParser import CamundaParser
from SpiffWorkflow.dmn.parser.BpmnDmnParser import BpmnDmnParser
from SpiffWorkflow.exceptions import WorkflowTaskExecException
from SpiffWorkflow.operators import Operator
from SpiffWorkflow.specs import WorkflowSpec
from sqlalchemy import desc
from crc import session
from crc.api.common import ApiError
from crc.models.file import FileDataModel, FileModel, FileType
from crc.models.workflow import WorkflowStatus, WorkflowModel
from crc.models.workflow import WorkflowStatus, WorkflowModel, WorkflowSpecDependencyFile
from crc.scripts.script import Script
from crc.services.file_service import FileService
class CustomBpmnScriptEngine(BpmnScriptEngine):
@ -48,7 +48,7 @@ class CustomBpmnScriptEngine(BpmnScriptEngine):
mod = __import__(module_name, fromlist=[class_name])
klass = getattr(mod, class_name)
study_id = task.workflow.data[WorkflowProcessor.STUDY_ID_KEY]
if(WorkflowProcessor.WORKFLOW_ID_KEY in task.workflow.data):
if WorkflowProcessor.WORKFLOW_ID_KEY in task.workflow.data:
workflow_id = task.workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY]
else:
workflow_id = None
@ -75,7 +75,7 @@ class CustomBpmnScriptEngine(BpmnScriptEngine):
Evaluate the given expression, within the context of the given task and
return the result.
"""
exp,valid = self.validateExpression(expression)
exp, valid = self.validateExpression(expression)
return self._eval(exp, **task.data)
@staticmethod
@ -108,18 +108,22 @@ class WorkflowProcessor(object):
If neither flag is set, it will use the same version of the specification that was used to originally
create the workflow model. """
self.workflow_model = workflow_model
orig_version = workflow_model.spec_version
if soft_reset or workflow_model.spec_version is None:
self.workflow_model.spec_version = WorkflowProcessor.get_latest_version_string(
workflow_model.workflow_spec_id)
spec = self.get_spec(workflow_model.workflow_spec_id, workflow_model.spec_version)
if soft_reset or len(workflow_model.dependencies) == 0:
self.spec_data_files = FileService.get_spec_data_files(
workflow_spec_id=workflow_model.workflow_spec_id)
else:
self.spec_data_files = FileService.get_spec_data_files(
workflow_spec_id=workflow_model.workflow_spec_id,
workflow_id=workflow_model.id)
spec = self.get_spec(self.spec_data_files, workflow_model.workflow_spec_id)
self.workflow_spec_id = workflow_model.workflow_spec_id
try:
self.bpmn_workflow = self.__get_bpmn_workflow(workflow_model, spec)
self.bpmn_workflow.script_engine = self._script_engine
if not self.WORKFLOW_ID_KEY in self.bpmn_workflow.data:
if self.WORKFLOW_ID_KEY not in self.bpmn_workflow.data:
if not workflow_model.id:
session.add(workflow_model)
session.commit()
@ -132,22 +136,26 @@ class WorkflowProcessor(object):
self.save()
except KeyError as ke:
if soft_reset:
# Undo the soft-reset.
workflow_model.spec_version = orig_version
raise ApiError(code="unexpected_workflow_structure",
message="Failed to deserialize workflow"
" '%s' version %s, due to a mis-placed or missing task '%s'" %
(self.workflow_spec_id, workflow_model.spec_version, str(ke)) +
" This is very likely due to a soft reset where there was a structural change.")
(self.workflow_spec_id, self.get_version_string(), str(ke)) +
" This is very likely due to a soft reset where there was a structural change.")
if hard_reset:
# Now that the spec is loaded, get the data and rebuild the bpmn with the new details
workflow_model.spec_version = self.hard_reset()
self.hard_reset()
workflow_model.bpmn_workflow_json = WorkflowProcessor._serializer.serialize_workflow(self.bpmn_workflow)
self.save()
if soft_reset:
self.save()
# set whether this is the latest spec file.
if self.spec_data_files == FileService.get_spec_data_files(workflow_spec_id=workflow_model.workflow_spec_id):
self.is_latest_spec = True
else:
self.is_latest_spec = False
def __get_bpmn_workflow(self, workflow_model: WorkflowModel, spec: WorkflowSpec):
if workflow_model.bpmn_workflow_json:
bpmn_workflow = self._serializer.deserialize_workflow(workflow_model.bpmn_workflow_json, workflow_spec=spec)
else:
@ -159,44 +167,32 @@ class WorkflowProcessor(object):
def save(self):
"""Saves the current state of this processor to the database """
workflow_model = self.workflow_model
workflow_model.bpmn_workflow_json = self.serialize()
self.workflow_model.bpmn_workflow_json = self.serialize()
complete_states = [SpiffTask.CANCELLED, SpiffTask.COMPLETED]
tasks = list(self.get_all_user_tasks())
workflow_model.status = self.get_status()
workflow_model.total_tasks = len(tasks)
workflow_model.completed_tasks = sum(1 for t in tasks if t.state in complete_states)
workflow_model.last_updated = datetime.now()
session.add(workflow_model)
self.workflow_model.status = self.get_status()
self.workflow_model.total_tasks = len(tasks)
self.workflow_model.completed_tasks = sum(1 for t in tasks if t.state in complete_states)
self.workflow_model.last_updated = datetime.now()
self.update_dependencies(self.spec_data_files)
session.add(self.workflow_model)
session.commit()
@staticmethod
def run_master_spec(spec_model, study):
"""Executes a BPMN specification for the given study, without recording any information to the database
Useful for running the master specification, which should not persist. """
version = WorkflowProcessor.get_latest_version_string(spec_model.id)
spec = WorkflowProcessor.get_spec(spec_model.id, version)
try:
bpmn_workflow = BpmnWorkflow(spec, script_engine=WorkflowProcessor._script_engine)
bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = study.id
bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = False
bpmn_workflow.do_engine_steps()
except WorkflowException as we:
raise ApiError.from_task_spec("error_running_master_spec", str(we), we.sender)
if not bpmn_workflow.is_completed():
raise ApiError("master_spec_not_automatic",
"The master spec should only contain fully automated tasks, it failed to complete.")
return bpmn_workflow.last_task.data
def get_version_string(self):
# this could potentially become expensive to load all the data in the data models.
# in which case we might consider using a deferred loader for the actual data, but
# trying not to pre-optimize.
file_data_models = FileService.get_spec_data_files(self.workflow_model.workflow_spec_id,
self.workflow_model.id)
return WorkflowProcessor.__get_version_string_for_data_models(file_data_models)
@staticmethod
def get_parser():
parser = MyCustomParser()
return parser
def get_latest_version_string_for_spec(spec_id):
file_data_models = FileService.get_spec_data_files(spec_id)
return WorkflowProcessor.__get_version_string_for_data_models(file_data_models)
@staticmethod
def get_latest_version_string(workflow_spec_id):
def __get_version_string_for_data_models(file_data_models):
"""Version is in the format v[VERSION] (FILE_ID_LIST)
For example, a single bpmn file with only one version would be
v1 (12) Where 12 is the id of the file data model that is used to create the
@ -205,10 +201,6 @@ class WorkflowProcessor(object):
a Spec that includes a BPMN, DMN, an a Word file all on the first
version would be v1.1.1 (12.45.21)"""
# this could potentially become expensive to load all the data in the data models.
# in which case we might consider using a deferred loader for the actual data, but
# trying not to pre-optimize.
file_data_models = WorkflowProcessor.__get_latest_file_models(workflow_spec_id)
major_version = 0 # The version of the primary file.
minor_version = [] # The versions of the minor files if any.
file_ids = []
@ -224,60 +216,72 @@ class WorkflowProcessor(object):
full_version = "v%s (%s)" % (version, files)
return full_version
@staticmethod
def get_file_models_for_version(workflow_spec_id, version):
file_id_strings = re.findall('\((.*)\)', version)[0].split(".")
file_ids = [int(i) for i in file_id_strings]
files = session.query(FileDataModel)\
.join(FileModel) \
.filter(FileModel.workflow_spec_id == workflow_spec_id)\
.filter(FileDataModel.id.in_(file_ids)).all()
if len(files) != len(file_ids):
raise ApiError("invalid_version",
"The version '%s' of workflow specification '%s' is invalid. " %
(version, workflow_spec_id) +
" Unable to locate the correct files to recreate it.")
return files
def update_dependencies(self, spec_data_files):
existing_dependencies = FileService.get_spec_data_files(
workflow_spec_id=self.workflow_model.workflow_spec_id,
workflow_id=self.workflow_model.id)
# Don't save the dependencies if they haven't changed.
if existing_dependencies == spec_data_files:
return
# Remove all existing dependencies, and replace them.
self.workflow_model.dependencies = []
for file_data in spec_data_files:
self.workflow_model.dependencies.append(WorkflowSpecDependencyFile(file_data_id=file_data.id))
@staticmethod
def __get_latest_file_models(workflow_spec_id):
"""Returns all the latest files related to a workflow specification"""
return session.query(FileDataModel) \
.join(FileModel) \
.filter(FileModel.workflow_spec_id == workflow_spec_id)\
.filter(FileDataModel.version == FileModel.latest_version)\
.order_by(FileModel.id)\
.all()
def run_master_spec(spec_model, study):
"""Executes a BPMN specification for the given study, without recording any information to the database
Useful for running the master specification, which should not persist. """
spec_data_files = FileService.get_spec_data_files(spec_model.id)
spec = WorkflowProcessor.get_spec(spec_data_files, spec_model.id)
try:
bpmn_workflow = BpmnWorkflow(spec, script_engine=WorkflowProcessor._script_engine)
bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = study.id
bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = False
bpmn_workflow.do_engine_steps()
except WorkflowException as we:
raise ApiError.from_task_spec("error_running_master_spec", str(we), we.sender)
if not bpmn_workflow.is_completed():
raise ApiError("master_spec_not_automatic",
"The master spec should only contain fully automated tasks, it failed to complete.")
return bpmn_workflow.last_task.data
@staticmethod
def get_spec(workflow_spec_id, version=None):
"""Returns the requested version of the specification,
or the latest version if none is specified."""
def get_parser():
parser = MyCustomParser()
return parser
@staticmethod
def get_spec(file_data_models: List[FileDataModel], workflow_spec_id):
"""Returns a SpiffWorkflow specification for the given workflow spec,
using the files provided. The Workflow_spec_id is only used to generate
better error messages."""
parser = WorkflowProcessor.get_parser()
process_id = None
if version is None:
file_data_models = WorkflowProcessor.__get_latest_file_models(workflow_spec_id)
else:
file_data_models = WorkflowProcessor.get_file_models_for_version(workflow_spec_id, version)
for file_data in file_data_models:
if file_data.file_model.type == FileType.bpmn:
bpmn: ElementTree.Element = ElementTree.fromstring(file_data.data)
if file_data.file_model.primary:
process_id = WorkflowProcessor.get_process_id(bpmn)
process_id = FileService.get_process_id(bpmn)
parser.add_bpmn_xml(bpmn, filename=file_data.file_model.name)
elif file_data.file_model.type == FileType.dmn:
dmn: ElementTree.Element = ElementTree.fromstring(file_data.data)
parser.add_dmn_xml(dmn, filename=file_data.file_model.name)
if process_id is None:
raise(ApiError(code="no_primary_bpmn_error",
message="There is no primary BPMN model defined for workflow %s" % workflow_spec_id))
raise (ApiError(code="no_primary_bpmn_error",
message="There is no primary BPMN model defined for workflow %s" % workflow_spec_id))
try:
spec = parser.get_spec(process_id)
except ValidationException as ve:
raise ApiError(code="workflow_validation_error",
message="Failed to parse Workflow Specification '%s' %s." % (workflow_spec_id, version) +
message="Failed to parse Workflow Specification '%s'" % workflow_spec_id +
"Error is %s" % str(ve),
file_name=ve.filename,
task_id=ve.id,
@ -301,8 +305,8 @@ class WorkflowProcessor(object):
Returns the new version.
"""
version = WorkflowProcessor.get_latest_version_string(self.workflow_spec_id)
spec = WorkflowProcessor.get_spec(self.workflow_spec_id) # Force latest version by NOT specifying version
self.spec_data_files = FileService.get_spec_data_files(workflow_spec_id=self.workflow_spec_id)
spec = WorkflowProcessor.get_spec(self.spec_data_files, self.workflow_spec_id)
# spec = WorkflowProcessor.get_spec(self.workflow_spec_id, version)
bpmn_workflow = BpmnWorkflow(spec, script_engine=self._script_engine)
bpmn_workflow.data = self.bpmn_workflow.data
@ -310,14 +314,10 @@ class WorkflowProcessor(object):
task.data = self.bpmn_workflow.last_task.data
bpmn_workflow.do_engine_steps()
self.bpmn_workflow = bpmn_workflow
return version
def get_status(self):
return self.status_of(self.bpmn_workflow)
def get_spec_version(self):
return self.workflow_model.spec_version
def do_engine_steps(self):
try:
self.bpmn_workflow.do_engine_steps()
@ -398,32 +398,7 @@ class WorkflowProcessor(object):
return [t for t in all_tasks
if not self.bpmn_workflow._is_engine_task(t.task_spec) and t.state in [t.COMPLETED, t.CANCELLED]]
@staticmethod
def get_process_id(et_root: ElementTree.Element):
process_elements = []
for child in et_root:
if child.tag.endswith('process') and child.attrib.get('isExecutable', False):
process_elements.append(child)
if len(process_elements) == 0:
raise ValidationException('No executable process tag found')
# There are multiple root elements
if len(process_elements) > 1:
# Look for the element that has the startEvent in it
for e in process_elements:
this_element: ElementTree.Element = e
for child_element in list(this_element):
if child_element.tag.endswith('startEvent'):
return this_element.attrib['id']
raise ValidationException('No start event found in %s' % et_root.attrib['id'])
return process_elements[0].attrib['id']
def get_nav_item(self, task):
for nav_item in self.bpmn_workflow.get_nav_list():
if nav_item['task_id'] == task.id:
return nav_item

View File

@ -40,8 +40,10 @@ class WorkflowService(object):
def test_spec(cls, spec_id):
"""Runs a spec through it's paces to see if it results in any errors. Not fool-proof, but a good
sanity check."""
version = WorkflowProcessor.get_latest_version_string(spec_id)
spec = WorkflowProcessor.get_spec(spec_id, version)
spec = WorkflowProcessor.get_spec(
file_data_models=FileService.get_spec_data_files(workflow_spec_id=spec_id),
workflow_spec_id=spec_id)
bpmn_workflow = BpmnWorkflow(spec, script_engine=CustomBpmnScriptEngine())
bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = 1
bpmn_workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY] = spec_id
@ -269,7 +271,7 @@ class WorkflowService(object):
user_uid=g.user.uid,
workflow_id=workflow_model.id,
workflow_spec_id=workflow_model.workflow_spec_id,
spec_version=workflow_model.spec_version,
spec_version=processor.get_version_string(),
action=action,
task_id=task.id,
task_name=task.name,

View File

@ -9,7 +9,12 @@ from crc.services.workflow_processor import WorkflowProcessor
class TestApprovalsService(BaseTest):
def test_create_approval_record(self):
self.create_reference_document()
workflow = self.create_workflow("empty_workflow")
FileService.add_workflow_file(workflow_id=workflow.id,
name="anything.png", content_type="text",
binary_data=b'5678', irb_doc_code="UVACompl_PRCAppr" )
ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r")
self.assertEquals(1, db.session.query(ApprovalModel).count())
model = db.session.query(ApprovalModel).first()
@ -17,10 +22,14 @@ class TestApprovalsService(BaseTest):
self.assertEquals(workflow.id, model.workflow_id)
self.assertEquals("dhf8r", model.approver_uid)
self.assertEquals(1, model.version)
self.assertIsNotNone(model.workflow_hash)
def test_new_requests_dont_add_if_approval_exists_for_current_workflow(self):
self.create_reference_document()
workflow = self.create_workflow("empty_workflow")
FileService.add_workflow_file(workflow_id=workflow.id,
name="anything.png", content_type="text",
binary_data=b'5678', irb_doc_code="UVACompl_PRCAppr" )
ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r")
ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r")
self.assertEquals(1, db.session.query(ApprovalModel).count())
@ -31,15 +40,15 @@ class TestApprovalsService(BaseTest):
self.load_example_data()
self.create_reference_document()
workflow = self.create_workflow('empty_workflow')
processor = WorkflowProcessor(workflow)
task = processor.next_task()
FileService.add_workflow_file(workflow_id=workflow.id,
name="anything.png", content_type="text",
binary_data=b'5678', irb_doc_code="AD_CoCAppr")
ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r")
irb_code_1 = "UVACompl_PRCAppr" # The first file referenced in pb required docs.
FileService.add_workflow_file(workflow_id=workflow.id,
name="anything.png", content_type="text",
binary_data=b'5678', irb_doc_code=irb_code_1)
binary_data=b'5678', irb_doc_code="UVACompl_PRCAppr")
ApprovalService.add_approval(study_id=workflow.study_id, workflow_id=workflow.id, approver_uid="dhf8r")
self.assertEquals(2, db.session.query(ApprovalModel).count())
@ -48,38 +57,3 @@ class TestApprovalsService(BaseTest):
self.assertEquals(2, models[1].version)
def test_generate_workflow_hash_and_version(self):
self.load_example_data()
self.create_reference_document()
workflow = self.create_workflow('empty_workflow')
processor = WorkflowProcessor(workflow)
task = processor.next_task()
irb_code_1 = "UVACompl_PRCAppr" # The first file referenced in pb required docs.
irb_code_2 = "NonUVAIRB_AssuranceForm" # The second file in above.
# Add a task file to the workflow.
FileService.add_workflow_file(workflow_id=workflow.id,
name="anything.png", content_type="text",
binary_data=b'5678', irb_doc_code=irb_code_1)
# Add a two form field files with the same irb_code, but different names
FileService.add_workflow_file(workflow_id=workflow.id,
name="anything.png", content_type="text",
binary_data=b'1234', irb_doc_code=irb_code_2)
FileService.add_workflow_file(workflow_id=workflow.id,
name="another_anything.png", content_type="text",
binary_data=b'5678', irb_doc_code=irb_code_2)
# Workflow hash should look be id[1]-id[1]-id[1]
# Sould be three files, each with a version of 1.
# where id is the file id, which we don't know, thus the regex.
latest_files = FileService.get_workflow_files(workflow.id)
self.assertRegexpMatches(ApprovalService._generate_workflow_hash(latest_files), "\d+\[1\]-\d+\[1\]-\d+\[1\]")
# Replace last file
# should now be id[1]-id[1]-id[2]
FileService.add_workflow_file(workflow_id=workflow.id,
irb_doc_code=irb_code_2,
name="another_anything.png", content_type="text",
binary_data=b'9999')
self.assertRegexpMatches(ApprovalService._generate_workflow_hash(latest_files), "\d+\[1\]-\d+\[1\]-\d+\[2\]")

View File

@ -23,7 +23,11 @@ class TestFileService(BaseTest):
file_models = FileService.get_workflow_files(workflow_id=workflow.id)
self.assertEquals(1, len(file_models))
self.assertEquals(2, file_models[0].latest_version)
file_data = FileService.get_workflow_data_files(workflow_id=workflow.id)
self.assertEquals(1, len(file_data))
self.assertEquals(2, file_data[0].version)
def test_add_file_from_form_increments_version_and_replaces_on_subsequent_add_with_same_name(self):
self.load_example_data()
@ -44,7 +48,10 @@ class TestFileService(BaseTest):
file_models = FileService.get_workflow_files(workflow_id=workflow.id)
self.assertEquals(1, len(file_models))
self.assertEquals(2, file_models[0].latest_version)
file_data = FileService.get_workflow_data_files(workflow_id=workflow.id)
self.assertEquals(1, len(file_data))
self.assertEquals(2, file_data[0].version)
def test_add_file_from_form_allows_multiple_files_with_different_names(self):
self.load_example_data()
@ -64,5 +71,3 @@ class TestFileService(BaseTest):
binary_data=b'5678')
file_models = FileService.get_workflow_files(workflow_id=workflow.id)
self.assertEquals(2, len(file_models))
self.assertEquals(1, file_models[0].latest_version)
self.assertEquals(1, file_models[1].latest_version)

View File

@ -4,7 +4,7 @@ import json
from tests.base_test import BaseTest
from crc import session
from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel
from crc.models.file import FileModel, FileType, FileSchema, FileModelSchema
from crc.models.workflow import WorkflowSpecModel
from crc.services.file_service import FileService
from crc.services.workflow_processor import WorkflowProcessor
@ -165,16 +165,16 @@ class TestFilesApi(BaseTest):
content_type='multipart/form-data', headers=self.logged_in_headers())
self.assert_success(rv)
self.assertIsNotNone(rv.get_data())
json_data = json.loads(rv.get_data(as_text=True))
file = FileModelSchema().load(json_data, session=session)
self.assertEqual(2, file.latest_version)
self.assertEqual(FileType.bpmn, file.type)
self.assertEqual("application/octet-stream", file.content_type)
file_json = json.loads(rv.get_data(as_text=True))
self.assertEqual(2, file_json['latest_version'])
self.assertEqual(FileType.bpmn.value, file_json['type'])
self.assertEqual("application/octet-stream", file_json['content_type'])
self.assertEqual(spec.id, file.workflow_spec_id)
# Assure it is updated in the database and properly persisted.
file_model = session.query(FileModel).filter(FileModel.id == file.id).first()
self.assertEqual(2, file_model.latest_version)
file_data = FileService.get_file_data(file_model.id)
self.assertEqual(2, file_data.version)
rv = self.app.get('/v1.0/file/%i/data' % file.id, headers=self.logged_in_headers())
self.assert_success(rv)
@ -191,15 +191,13 @@ class TestFilesApi(BaseTest):
content_type='multipart/form-data', headers=self.logged_in_headers())
self.assertIsNotNone(rv.get_data())
json_data = json.loads(rv.get_data(as_text=True))
file = FileModelSchema().load(json_data, session=session)
self.assertEqual(1, file.latest_version)
self.assertEqual(1, json_data['latest_version'])
data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn'
rv = self.app.put('/v1.0/file/%i/data' % file.id, data=data, follow_redirects=True,
rv = self.app.put('/v1.0/file/%i/data' % json_data['id'], data=data, follow_redirects=True,
content_type='multipart/form-data', headers=self.logged_in_headers())
self.assertIsNotNone(rv.get_data())
json_data = json.loads(rv.get_data(as_text=True))
file = FileModelSchema().load(json_data, session=session)
self.assertEqual(1, file.latest_version)
self.assertEqual(1, json_data['latest_version'])
def test_get_file(self):
self.load_example_data()

View File

@ -1,3 +1,4 @@
from crc.services.file_service import FileService
from tests.base_test import BaseTest
from crc.scripts.request_approval import RequestApproval
@ -17,7 +18,10 @@ class TestRequestApprovalScript(BaseTest):
processor = WorkflowProcessor(workflow)
task = processor.next_task()
task.data = {"study": {"approval1": "dhf8r", 'approval2':'lb3dp'}}
FileService.add_workflow_file(workflow_id=workflow.id,
irb_doc_code="UVACompl_PRCAppr",
name="anything.png", content_type="text",
binary_data=b'1234')
script = RequestApproval()
script.do_task(task, workflow.study_id, workflow.id, "study.approval1", "study.approval2")
self.assertEquals(2, db.session.query(ApprovalModel).count())

View File

@ -74,7 +74,6 @@ class TestStudyService(BaseTest):
# workflow should not be started, and it should have 0 completed tasks, and 0 total tasks.
self.assertEqual(WorkflowStatus.not_started, workflow.status)
self.assertEqual(None, workflow.spec_version)
self.assertEqual(0, workflow.total_tasks)
self.assertEqual(0, workflow.completed_tasks)

View File

@ -182,7 +182,6 @@ class TestTasksApi(BaseTest):
self.assertEquals("Task 2b", nav[5]['title'])
self.assertEquals("Task 3", nav[6]['title'])
def test_document_added_to_workflow_shows_up_in_file_list(self):
self.load_example_data()
self.create_reference_document()

View File

@ -254,12 +254,12 @@ class TestWorkflowProcessor(BaseTest):
study = session.query(StudyModel).first()
workflow_spec_model = self.load_test_spec("decision_table")
processor = self.get_processor(study, workflow_spec_model)
self.assertTrue(processor.get_spec_version().startswith('v1.1'))
self.assertTrue(processor.get_version_string().startswith('v1.1'))
file_service = FileService()
file_service.add_workflow_spec_file(workflow_spec_model, "new_file.txt", "txt", b'blahblah')
processor = self.get_processor(study, workflow_spec_model)
self.assertTrue(processor.get_spec_version().startswith('v1.1.1'))
self.assertTrue(processor.get_version_string().startswith('v1.1.1'))
file_path = os.path.join(app.root_path, '..', 'tests', 'data', 'docx', 'docx.bpmn')
file = open(file_path, "rb")
@ -268,7 +268,7 @@ class TestWorkflowProcessor(BaseTest):
file_model = db.session.query(FileModel).filter(FileModel.name == "decision_table.bpmn").first()
file_service.update_file(file_model, data, "txt")
processor = self.get_processor(study, workflow_spec_model)
self.assertTrue(processor.get_spec_version().startswith('v2.1.1'))
self.assertTrue(processor.get_version_string().startswith('v2.1.1'))
def test_restart_workflow(self):
self.load_example_data()
@ -339,7 +339,7 @@ class TestWorkflowProcessor(BaseTest):
# Assure that creating a new processor doesn't cause any issues, and maintains the spec version.
processor.workflow_model.bpmn_workflow_json = processor.serialize()
processor2 = WorkflowProcessor(processor.workflow_model)
self.assertTrue(processor2.get_spec_version().startswith("v1 ")) # Still at version 1.
self.assertFalse(processor2.is_latest_spec) # Still at version 1.
# Do a hard reset, which should bring us back to the beginning, but retain the data.
processor3 = WorkflowProcessor(processor.workflow_model, hard_reset=True)
@ -349,10 +349,6 @@ class TestWorkflowProcessor(BaseTest):
self.assertEqual("New Step", processor3.next_task().task_spec.description)
self.assertEqual("blue", processor3.next_task().data["color"])
def test_get_latest_spec_version(self):
workflow_spec_model = self.load_test_spec("two_forms")
version = WorkflowProcessor.get_latest_version_string("two_forms")
self.assertTrue(version.startswith("v1 "))
@patch('crc.services.protocol_builder.ProtocolBuilderService.get_studies')
@patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators')