2020-04-08 17:28:43 +00:00
|
|
|
import json
|
2020-02-10 21:19:23 +00:00
|
|
|
import os
|
|
|
|
from datetime import datetime
|
2020-03-04 18:40:25 +00:00
|
|
|
from uuid import UUID
|
|
|
|
from xml.etree import ElementTree
|
2020-02-10 21:19:23 +00:00
|
|
|
|
2020-03-19 21:13:30 +00:00
|
|
|
from pandas import ExcelFile
|
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
from crc import session
|
2020-03-13 19:03:57 +00:00
|
|
|
from crc.api.common import ApiError
|
|
|
|
from crc.models.file import FileType, FileDataModel, FileModel
|
2020-03-04 18:40:25 +00:00
|
|
|
from crc.models.workflow import WorkflowSpecModel
|
|
|
|
from crc.services.workflow_processor import WorkflowProcessor
|
|
|
|
import hashlib
|
2020-02-10 21:19:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
class FileService(object):
|
|
|
|
"""Provides consistent management and rules for storing, retrieving and processing files."""
|
2020-03-20 12:21:21 +00:00
|
|
|
IRB_PRO_CATEGORIES_FILE = "irb_documents.xlsx"
|
2020-02-10 21:19:23 +00:00
|
|
|
|
|
|
|
@staticmethod
|
2020-03-04 18:40:25 +00:00
|
|
|
def add_workflow_spec_file(workflow_spec: WorkflowSpecModel,
|
2020-03-13 18:56:46 +00:00
|
|
|
name, content_type, binary_data, primary=False, is_status=False):
|
2020-02-10 21:19:23 +00:00
|
|
|
"""Create a new file and associate it with a workflow spec."""
|
|
|
|
file_model = FileModel(
|
2020-03-04 18:40:25 +00:00
|
|
|
workflow_spec_id=workflow_spec.id,
|
2020-02-10 21:19:23 +00:00
|
|
|
name=name,
|
2020-03-13 18:56:46 +00:00
|
|
|
primary=primary,
|
2020-04-17 17:30:32 +00:00
|
|
|
is_status=is_status,
|
2020-02-10 21:19:23 +00:00
|
|
|
)
|
2020-03-04 18:40:25 +00:00
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
return FileService.update_file(file_model, binary_data, content_type)
|
|
|
|
|
2020-04-17 17:30:32 +00:00
|
|
|
|
2020-02-11 20:03:25 +00:00
|
|
|
@staticmethod
|
|
|
|
def add_form_field_file(study_id, workflow_id, task_id, form_field_key, name, content_type, binary_data):
|
2020-03-19 21:13:30 +00:00
|
|
|
"""Create a new file and associate it with a user task form field within a workflow.
|
|
|
|
Please note that the form_field_key MUST be a known file in the irb_documents.xslx reference document."""
|
|
|
|
if not FileService.irb_document_reference_exists(form_field_key):
|
|
|
|
raise ApiError("invalid_form_field_key",
|
|
|
|
"When uploading files, the form field id must match a known document in the "
|
|
|
|
"irb_docunents.xslx reference file. This code is not found in that file '%s'" % form_field_key)
|
|
|
|
|
2020-02-11 20:03:25 +00:00
|
|
|
file_model = FileModel(
|
|
|
|
study_id=study_id,
|
|
|
|
workflow_id=workflow_id,
|
|
|
|
task_id=task_id,
|
|
|
|
name=name,
|
2020-03-19 21:13:30 +00:00
|
|
|
form_field_key=form_field_key,
|
|
|
|
irb_doc_code=form_field_key
|
2020-02-11 20:03:25 +00:00
|
|
|
)
|
|
|
|
return FileService.update_file(file_model, binary_data, content_type)
|
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
@staticmethod
|
2020-03-19 21:13:30 +00:00
|
|
|
def irb_document_reference_exists(code):
|
|
|
|
data_model = FileService.get_reference_file_data(FileService.IRB_PRO_CATEGORIES_FILE)
|
|
|
|
xls = ExcelFile(data_model.data)
|
|
|
|
df = xls.parse(xls.sheet_names[0])
|
|
|
|
return code in df['Code'].values
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_file_reference_dictionary():
|
|
|
|
"""Loads up the xsl file that contains the IRB Pro Categories and converts it to
|
|
|
|
a Panda's data frame for processing."""
|
|
|
|
data_model = FileService.get_reference_file_data(FileService.IRB_PRO_CATEGORIES_FILE)
|
|
|
|
xls = ExcelFile(data_model.data)
|
|
|
|
df = xls.parse(xls.sheet_names[0])
|
2020-04-08 17:28:43 +00:00
|
|
|
df['Id'] = df['Id'].fillna(0)
|
|
|
|
df = df.astype({'Id': 'Int64'})
|
|
|
|
df = df.fillna('')
|
|
|
|
df = df.applymap(str)
|
|
|
|
df = df.set_index('Code')
|
|
|
|
# IF we need to convert the column names to something more sensible.
|
|
|
|
# df.columns = [snakeCase(x) for x in df.columns]
|
|
|
|
return json.loads(df.to_json(orient='index'))
|
2020-04-06 20:56:00 +00:00
|
|
|
# # Pandas is lovely, but weird. Here we drop records without an Id, and convert it to an integer.
|
|
|
|
# df = df.drop_duplicates(subset='Id').astype({'Id': 'Int64'})
|
2020-03-19 21:13:30 +00:00
|
|
|
# Now we index on the ID column and convert to a dictionary, where the key is the id, and the value
|
|
|
|
# is a dictionary with all the remaining data in it. It's kinda pretty really.
|
2020-04-06 20:56:00 +00:00
|
|
|
# all_dict = df.set_index('Id').to_dict('index')
|
2020-03-19 21:13:30 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def add_task_file(study_id, workflow_id, task_id, name, content_type, binary_data,
|
|
|
|
irb_doc_code=None):
|
2020-02-10 21:19:23 +00:00
|
|
|
"""Create a new file and associate it with an executing task within a workflow."""
|
|
|
|
file_model = FileModel(
|
|
|
|
study_id=study_id,
|
|
|
|
workflow_id=workflow_id,
|
|
|
|
task_id=task_id,
|
|
|
|
name=name,
|
2020-03-19 21:13:30 +00:00
|
|
|
irb_doc_code=irb_doc_code
|
2020-02-10 21:19:23 +00:00
|
|
|
)
|
|
|
|
return FileService.update_file(file_model, binary_data, content_type)
|
|
|
|
|
2020-03-13 19:03:57 +00:00
|
|
|
@staticmethod
|
|
|
|
def add_reference_file(name, content_type, binary_data):
|
|
|
|
"""Create a file with the given name, but not associated with a spec or workflow.
|
|
|
|
Only one file with the given reference name can exist."""
|
2020-03-19 14:40:07 +00:00
|
|
|
file_model = session.query(FileModel). \
|
|
|
|
filter(FileModel.is_reference == True). \
|
|
|
|
filter(FileModel.name == name).first()
|
|
|
|
if not file_model:
|
|
|
|
file_model = FileModel(
|
|
|
|
name=name,
|
|
|
|
is_reference=True
|
|
|
|
)
|
2020-03-13 19:03:57 +00:00
|
|
|
return FileService.update_file(file_model, binary_data, content_type)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_extension(file_name):
|
|
|
|
basename, file_extension = os.path.splitext(file_name)
|
|
|
|
return file_extension.lower().strip()[1:]
|
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
@staticmethod
|
|
|
|
def update_file(file_model, binary_data, content_type):
|
|
|
|
|
2020-03-04 18:40:25 +00:00
|
|
|
file_data_model = session.query(FileDataModel).\
|
|
|
|
filter_by(file_model_id=file_model.id,
|
|
|
|
version=file_model.latest_version
|
|
|
|
).with_for_update().first()
|
|
|
|
md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
|
|
|
|
if(file_data_model is not None and md5_checksum == file_data_model.md5_hash):
|
|
|
|
# This file does not need to be updated, it's the same file.
|
|
|
|
return file_model
|
2020-02-10 21:19:23 +00:00
|
|
|
|
|
|
|
# Verify the extension
|
2020-03-13 19:03:57 +00:00
|
|
|
file_extension = FileService.get_extension(file_model.name)
|
2020-02-10 21:19:23 +00:00
|
|
|
if file_extension not in FileType._member_names_:
|
2020-03-13 19:03:57 +00:00
|
|
|
raise ApiError('unknown_extension',
|
|
|
|
'The file you provided does not have an accepted extension:' +
|
|
|
|
file_extension, status_code=404)
|
2020-02-10 21:19:23 +00:00
|
|
|
else:
|
|
|
|
file_model.type = FileType[file_extension]
|
2020-03-04 18:40:25 +00:00
|
|
|
file_model.content_type = content_type
|
2020-02-10 21:19:23 +00:00
|
|
|
|
|
|
|
if file_data_model is None:
|
2020-03-04 18:40:25 +00:00
|
|
|
version = 1
|
2020-02-10 21:19:23 +00:00
|
|
|
else:
|
2020-03-04 18:40:25 +00:00
|
|
|
version = file_data_model.version + 1
|
|
|
|
|
2020-04-17 17:30:32 +00:00
|
|
|
# If this is a BPMN, extract the process id.
|
|
|
|
if file_model.type == FileType.bpmn:
|
|
|
|
bpmn: ElementTree.Element = ElementTree.fromstring(binary_data)
|
|
|
|
file_model.primary_process_id = WorkflowProcessor.get_process_id(bpmn)
|
|
|
|
|
2020-03-04 18:40:25 +00:00
|
|
|
file_model.latest_version = version
|
|
|
|
file_data_model = FileDataModel(data=binary_data, file_model=file_model, version=version,
|
2020-04-08 16:58:55 +00:00
|
|
|
md5_hash=md5_checksum, last_updated=datetime.now())
|
2020-02-10 21:19:23 +00:00
|
|
|
|
|
|
|
session.add_all([file_model, file_data_model])
|
|
|
|
session.commit()
|
|
|
|
session.flush() # Assure the id is set on the model before returning it.
|
|
|
|
return file_model
|
|
|
|
|
|
|
|
@staticmethod
|
2020-03-13 19:03:57 +00:00
|
|
|
def get_files(workflow_spec_id=None,
|
|
|
|
study_id=None, workflow_id=None, task_id=None, form_field_key=None,
|
2020-03-19 21:13:30 +00:00
|
|
|
name=None, is_reference=False, irb_doc_code=None):
|
2020-03-13 19:03:57 +00:00
|
|
|
query = session.query(FileModel).filter_by(is_reference=is_reference)
|
2020-02-10 21:27:57 +00:00
|
|
|
if workflow_spec_id:
|
|
|
|
query = query.filter_by(workflow_spec_id=workflow_spec_id)
|
|
|
|
if study_id:
|
|
|
|
query = query.filter_by(study_id=study_id)
|
|
|
|
if workflow_id:
|
|
|
|
query = query.filter_by(workflow_id=workflow_id)
|
|
|
|
if task_id:
|
|
|
|
query = query.filter_by(task_id=str(task_id))
|
|
|
|
if form_field_key:
|
|
|
|
query = query.filter_by(form_field_key=form_field_key)
|
2020-03-13 19:03:57 +00:00
|
|
|
if name:
|
|
|
|
query = query.filter_by(name=form_field_key)
|
2020-03-19 21:13:30 +00:00
|
|
|
if irb_doc_code:
|
|
|
|
query = query.filter_by(irb_doc_code=irb_doc_code)
|
2020-02-10 21:27:57 +00:00
|
|
|
|
|
|
|
results = query.all()
|
2020-02-10 21:19:23 +00:00
|
|
|
return results
|
|
|
|
|
|
|
|
@staticmethod
|
2020-03-13 19:03:57 +00:00
|
|
|
def get_file_data(file_id, file_model=None):
|
|
|
|
"""Returns the file_data that is associated with the file model id, if an actual file_model
|
|
|
|
is provided, uses that rather than looking it up again."""
|
|
|
|
if file_model is None:
|
|
|
|
file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
|
|
|
|
return session.query(FileDataModel) \
|
|
|
|
.filter(FileDataModel.file_model_id == file_id) \
|
|
|
|
.filter(FileDataModel.version == file_model.latest_version) \
|
2020-03-04 18:40:25 +00:00
|
|
|
.first()
|
2020-03-13 19:03:57 +00:00
|
|
|
|
2020-04-17 17:30:32 +00:00
|
|
|
|
2020-03-13 19:03:57 +00:00
|
|
|
@staticmethod
|
|
|
|
def get_reference_file_data(file_name):
|
|
|
|
file_model = session.query(FileModel). \
|
|
|
|
filter(FileModel.is_reference == True). \
|
|
|
|
filter(FileModel.name == file_name).first()
|
|
|
|
if not file_model:
|
|
|
|
raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name)
|
|
|
|
return FileService.get_file_data(file_model.id, file_model)
|
2020-04-15 15:13:32 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_workflow_file_data(workflow, file_name):
|
|
|
|
"""Given a SPIFF Workflow Model, tracks down a file with the given name in the datbase and returns it's data"""
|
2020-04-19 19:14:10 +00:00
|
|
|
workflow_spec_model = FileService.find_spec_model_in_db(workflow)
|
2020-04-15 15:13:32 +00:00
|
|
|
study_id = workflow.data[WorkflowProcessor.STUDY_ID_KEY]
|
|
|
|
|
|
|
|
if workflow_spec_model is None:
|
|
|
|
raise ApiError(code="workflow_model_error",
|
|
|
|
message="Something is wrong. I can't find the workflow you are using.")
|
|
|
|
|
|
|
|
file_data_model = session.query(FileDataModel) \
|
|
|
|
.join(FileModel) \
|
|
|
|
.filter(FileModel.name == file_name) \
|
|
|
|
.filter(FileModel.workflow_spec_id == workflow_spec_model.id).first()
|
|
|
|
|
|
|
|
if file_data_model is None:
|
|
|
|
raise ApiError(code="file_missing",
|
|
|
|
message="Can not find a file called '%s' within workflow specification '%s'"
|
|
|
|
% (file_name, workflow_spec_model.id))
|
|
|
|
|
|
|
|
return file_data_model
|
|
|
|
|
|
|
|
@staticmethod
|
2020-04-19 19:14:10 +00:00
|
|
|
def find_spec_model_in_db(workflow):
|
2020-04-15 15:13:32 +00:00
|
|
|
""" Search for the workflow """
|
|
|
|
# When the workflow spec model is created, we record the primary process id,
|
|
|
|
# then we can look it up. As there is the potential for sub-workflows, we
|
|
|
|
# may need to travel up to locate the primary process.
|
|
|
|
spec = workflow.spec
|
2020-04-17 17:30:32 +00:00
|
|
|
workflow_model = session.query(WorkflowSpecModel).join(FileModel). \
|
|
|
|
filter(FileModel.primary_process_id == spec.name).first()
|
2020-04-15 15:13:32 +00:00
|
|
|
if workflow_model is None and workflow != workflow.outer_workflow:
|
2020-04-19 19:14:10 +00:00
|
|
|
return FileService.find_spec_model_in_db(workflow.outer_workflow)
|
2020-04-15 15:13:32 +00:00
|
|
|
|
|
|
|
return workflow_model
|
|
|
|
|