Refactoring Reference files to use the lookup table, rather than parsing the results directly out of the spreadsheet, or attempting to cache them.

Adding a DocumentService to clean up the FileService, and get Documents well seperated, as it seems likely be pulled out or seperated in the future, there is now a Documents api file as well, for the same reason.
Some other minor changes are just fixing white space to assure our code is linting correctly.
I removed _create_study_workflow_approvals from the base test, as we don't use approvals like this anymore.
This commit is contained in:
Dan 2021-07-06 13:10:20 -04:00
parent fafa79a07d
commit 1b1a994360
20 changed files with 243 additions and 241 deletions

View File

@ -55,7 +55,7 @@ def process_waiting_tasks():
with app.app_context():
WorkflowService.do_waiting()
scheduler.add_job(process_waiting_tasks,'interval',minutes=5)
scheduler.add_job(process_waiting_tasks,'interval',minutes=1)
scheduler.start()

View File

@ -82,7 +82,7 @@ paths:
schema :
type : integer
get:
operationId: crc.api.file.get_document_directory
operationId: crc.api.document.get_document_directory
summary: Returns a directory of all files for study in a nested structure
tags:
- Document Categories

18
crc/api/document.py Normal file
View File

@ -0,0 +1,18 @@
from crc.models.api_models import DocumentDirectorySchema
from crc.models.file import File
from crc.services.document_service import DocumentService
from crc.services.file_service import FileService
from crc.services.lookup_service import LookupService
def get_document_directory(study_id, workflow_id=None):
"""
return a nested list of files arranged according to the category hierarchy
defined in the doc dictionary
"""
file_models = FileService.get_files_for_study(study_id=study_id)
doc_dict = DocumentService.get_dictionary()
files = (File.from_models(model, FileService.get_file_data(model.id), doc_dict) for model in file_models)
directory = DocumentService.get_directory(doc_dict, files, workflow_id)
return DocumentDirectorySchema(many=True).dump(directory)

View File

@ -7,71 +7,15 @@ from flask import send_file
from crc import session
from crc.api.common import ApiError
from crc.api.user import verify_token
from crc.models.api_models import DocumentDirectory, DocumentDirectorySchema
from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel, FileType
from crc.models.workflow import WorkflowSpecModel
from crc.services.document_service import DocumentService
from crc.services.file_service import FileService
def ensure_exists(output, categories, expanded):
"""
This is a recursive function, it expects a list of
levels with a file object at the end (kinda like duck,duck,duck,goose)
for each level, it makes sure that level is already in the structure and if it is not
it will add it
function terminates upon getting an entry that is a file object ( or really anything but string)
"""
current_item = categories[0]
found = False
if isinstance(current_item, str):
for item in output:
if item.level == current_item:
found = True
item.filecount = item.filecount + 1
item.expanded = expanded | item.expanded
ensure_exists(item.children, categories[1:], expanded)
if not found:
new_level = DocumentDirectory(level=current_item)
new_level.filecount = 1
new_level.expanded = expanded
output.append(new_level)
ensure_exists(new_level.children, categories[1:], expanded)
else:
new_level = DocumentDirectory(file=current_item)
new_level.expanded = expanded
output.append(new_level)
def get_document_directory(study_id, workflow_id=None):
"""
return a nested list of files arranged according to the category hirearchy
defined in the doc dictionary
"""
output = []
doc_dict = FileService.get_doc_dictionary()
file_models = FileService.get_files_for_study(study_id=study_id)
files = (to_file_api(model) for model in file_models)
for file in files:
if file.irb_doc_code in doc_dict:
doc_code = doc_dict[file.irb_doc_code]
else:
doc_code = {'category1': "Unknown", 'category2': '', 'category3': ''}
if workflow_id:
expand = file.workflow_id == int(workflow_id)
else:
expand = False
print(expand)
categories = [x for x in [doc_code['category1'],doc_code['category2'],doc_code['category3'],file] if x != '']
ensure_exists(output, categories, expanded=expand)
return DocumentDirectorySchema(many=True).dump(output)
def to_file_api(file_model):
"""Converts a FileModel object to something we can return via the api"""
return File.from_models(file_model, FileService.get_file_data(file_model.id),
FileService.get_doc_dictionary())
DocumentService.get_dictionary())
def get_files(workflow_spec_id=None, workflow_id=None, form_field_key=None,study_id=None):

View File

@ -1,15 +1,14 @@
import enum
from typing import cast
from marshmallow import INCLUDE, EXCLUDE, fields, Schema
from marshmallow import INCLUDE, EXCLUDE, Schema
from marshmallow_enum import EnumField
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from sqlalchemy import func, Index
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import deferred, relationship
from crc.models.data_store import DataStoreModel # this is needed by the relationship
from crc import db, ma
from crc.models.data_store import DataStoreModel
class FileType(enum.Enum):
@ -43,7 +42,7 @@ CONTENT_TYPES = {
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"gif": "image/gif",
"jpg": "image/jpeg",
"md" : "text/plain",
"md": "text/plain",
"pdf": "application/pdf",
"png": "image/png",
"ppt": "application/vnd.ms-powerpoint",
@ -71,7 +70,6 @@ class FileDataModel(db.Model):
file_model = db.relationship("FileModel", foreign_keys=[file_model_id])
class FileModel(db.Model):
__tablename__ = 'file'
id = db.Column(db.Integer, primary_key=True)
@ -79,18 +77,19 @@ class FileModel(db.Model):
type = db.Column(db.Enum(FileType))
is_status = db.Column(db.Boolean)
content_type = db.Column(db.String)
is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file.
primary = db.Column(db.Boolean, nullable=False, default=False) # Is this the primary BPMN in a workflow?
primary_process_id = db.Column(db.String, nullable=True) # An id in the xml of BPMN documents, critical for primary BPMN.
is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file.
primary = db.Column(db.Boolean, nullable=False, default=False) # Is this the primary BPMN in a workflow?
primary_process_id = db.Column(db.String, nullable=True) # An id in the xml of BPMN documents, for primary BPMN.
workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True)
workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the irb_documents.xlsx reference file.
irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the irb_documents.xlsx reference file.
# A request was made to delete the file, but we can't because there are
# active approvals or running workflows that depend on it. So we archive
# it instead, hide it in the interface.
is_review = db.Column(db.Boolean, default=False, nullable=True)
archived = db.Column(db.Boolean, default=False, nullable=False)
data_stores = relationship("DataStoreModel", cascade="all,delete", backref="file")
data_stores = relationship(DataStoreModel, cascade="all,delete", backref="file")
class File(object):
@classmethod
@ -107,7 +106,7 @@ class File(object):
instance.workflow_id = model.workflow_id
instance.irb_doc_code = model.irb_doc_code
instance.type = model.type
if model.irb_doc_code and model.irb_doc_code in doc_dictionary:
if model.irb_doc_code and model.irb_doc_code in doc_dictionary:
instance.document = doc_dictionary[model.irb_doc_code]
else:
instance.document = {}
@ -147,7 +146,6 @@ class FileSchema(Schema):
type = EnumField(FileType)
class LookupFileModel(db.Model):
"""Gives us a quick way to tell what kind of lookup is set on a form field.
Connected to the file data model, so that if a new version of the same file is
@ -158,8 +156,10 @@ class LookupFileModel(db.Model):
task_spec_id = db.Column(db.String)
field_id = db.Column(db.String)
is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup.
is_reference = db.Column(db.Boolean) # For lookup models that are globally referenced.
file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan")
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model",
cascade="all, delete, delete-orphan")
class LookupDataModel(db.Model):
@ -169,7 +169,7 @@ class LookupDataModel(db.Model):
value = db.Column(db.String)
label = db.Column(db.String)
# In the future, we might allow adding an additional "search" column if we want to search things not in label.
data = db.Column(db.JSON) # all data for the row is stored in a json structure here, but not searched presently.
data = db.Column(db.JSON) # all data for the row is stored in a json structure here, but not searched presently.
# Assure there is a searchable index on the label column, so we can get fast results back.
# query with:
@ -192,7 +192,7 @@ class LookupDataSchema(SQLAlchemyAutoSchema):
load_instance = True
include_relationships = False
include_fk = False # Includes foreign keys
exclude = ['id'] # Do not include the id field, it should never be used via the API.
exclude = ['id'] # Do not include the id field, it should never be used via the API.
class SimpleFileSchema(ma.Schema):

View File

@ -2,6 +2,7 @@ from crc import session
from crc.api.common import ApiError
from crc.models.file import FileModel
from crc.scripts.script import Script
from crc.services.document_service import DocumentService
from crc.services.file_service import FileService
@ -9,7 +10,7 @@ class DeleteFile(Script):
@staticmethod
def process_document_deletion(doc_code, workflow_id, task):
if FileService.is_allowed_document(doc_code):
if DocumentService.is_allowed_document(doc_code):
result = session.query(FileModel).filter(
FileModel.workflow_id == workflow_id, FileModel.irb_doc_code == doc_code).all()
if isinstance(result, list) and len(result) > 0 and isinstance(result[0], FileModel):

View File

@ -3,6 +3,7 @@ from flask import g
from crc.api.common import ApiError
from crc.services.data_store_service import DataStoreBase
from crc.scripts.script import Script
from crc.services.document_service import DocumentService
from crc.services.file_service import FileService
@ -17,17 +18,22 @@ class FileDataSet(Script, DataStoreBase):
del(kwargs['file_id'])
return True
def validate_kw_args(self,**kwargs):
if kwargs.get('key',None) is None:
def validate_kw_args(self, **kwargs):
if kwargs.get('key', None) is None:
raise ApiError(code="missing_argument",
message=f"The 'file_data_get' script requires a keyword argument of 'key'")
message=f"The 'file_data_get' script requires a keyword argument of 'key'")
if kwargs.get('file_id', None) is None:
raise ApiError(code="missing_argument",
message=f"The 'file_data_get' script requires a keyword argument of 'file_id'")
if kwargs.get('value', None) is None:
raise ApiError(code="missing_argument",
message=f"The 'file_data_get' script requires a keyword argument of 'value'")
if kwargs.get('file_id',None) is None:
raise ApiError(code="missing_argument",
message=f"The 'file_data_get' script requires a keyword argument of 'file_id'")
if kwargs.get('value',None) is None:
raise ApiError(code="missing_argument",
message=f"The 'file_data_get' script requires a keyword argument of 'value'")
if kwargs['key'] == 'irb_code' and not DocumentService.is_allowed_document(kwargs.get('value')):
raise ApiError("invalid_form_field_key",
"When setting an irb_code, the form field id must match a known document in the "
"irb_docunents.xslx reference file. This code is not found in that file '%s'" %
kwargs.get('value'))
return True

View File

@ -10,6 +10,7 @@ from crc.models.protocol_builder import ProtocolBuilderInvestigatorType
from crc.models.study import StudyModel, StudySchema
from crc.api import workflow as workflow_api
from crc.scripts.script import Script
from crc.services.document_service import DocumentService
from crc.services.file_service import FileService
from crc.services.protocol_builder import ProtocolBuilderService
from crc.services.study_service import StudyService
@ -168,8 +169,8 @@ Please note this is just a few examples, ALL known document types are returned i
"""For validation only, pretend no results come back from pb"""
self.check_args(args, 2)
# Assure the reference file exists (a bit hacky, but we want to raise this error early, and cleanly.)
FileService.get_reference_file_data(FileService.DOCUMENT_LIST)
FileService.get_reference_file_data(FileService.INVESTIGATOR_LIST)
FileService.get_reference_file_data(DocumentService.DOCUMENT_LIST)
FileService.get_reference_file_data(StudyService.INVESTIGATOR_LIST)
# we call the real do_task so we can
# seed workflow validations with settings from studies in PB Mock
# in order to test multiple paths thru the workflow

View File

@ -0,0 +1,92 @@
from crc.api.common import ApiError
from crc.models.api_models import DocumentDirectory
from crc.services.file_service import FileService
from crc.services.lookup_service import LookupService
class DocumentService(object):
DOCUMENT_LIST = "irb_documents.xlsx"
@staticmethod
def is_allowed_document(code):
doc_dict = DocumentService.get_dictionary()
return code in doc_dict
@staticmethod
def verify_doc_dictionary(dd):
"""
We are currently getting structured information from an XLS file, if someone accidentally
changes a header we will have problems later, so we will verify we have the headers we need
here
"""
required_fields = ['category1', 'category2', 'category3', 'description']
# we only need to check the first item, as all of the keys should be the same
key = list(dd.keys())[0]
for field in required_fields:
if field not in dd[key].keys():
raise ApiError(code="Invalid document list %s" % DocumentService.DOCUMENT_LIST,
message='Please check the headers in %s' % DocumentService.DOCUMENT_LIST)
@staticmethod
def get_dictionary():
"""Returns a dictionary of document details keyed on the doc_code."""
file_data = FileService.get_reference_file_data(DocumentService.DOCUMENT_LIST)
lookup_model = LookupService.get_lookup_model_for_file_data(file_data, 'code', 'description')
doc_dict = {}
for lookup_data in lookup_model.dependencies:
doc_dict[lookup_data.value] = lookup_data.data
return doc_dict
@staticmethod
def get_directory(doc_dict, files, workflow_id):
"""Returns a list of directories, hierarchically nested by category, with files at the deepest level.
Empty directories are not include."""
directory = []
if files:
for file in files:
if file.irb_doc_code in doc_dict:
doc_code = doc_dict[file.irb_doc_code]
else:
doc_code = {'category1': "Unknown", 'category2': None, 'category3': None}
if workflow_id:
expand = file.workflow_id == int(workflow_id)
else:
expand = False
print(expand)
categories = [x for x in [doc_code['category1'], doc_code['category2'], doc_code['category3'], file] if x]
DocumentService.ensure_exists(directory, categories, expanded=expand)
return directory
@staticmethod
def ensure_exists(output, categories, expanded):
"""
This is a recursive function, it expects a list of
levels with a file object at the end (kinda like duck,duck,duck,goose)
for each level, it makes sure that level is already in the structure and if it is not
it will add it
function terminates upon getting an entry that is a file object ( or really anything but string)
"""
current_item = categories[0]
found = False
if isinstance(current_item, str):
for item in output:
if item.level == current_item:
found = True
item.filecount = item.filecount + 1
item.expanded = expanded | item.expanded
DocumentService.ensure_exists(item.children, categories[1:], expanded)
if not found:
new_level = DocumentDirectory(level=current_item)
new_level.filecount = 1
new_level.expanded = expanded
output.append(new_level)
DocumentService.ensure_exists(new_level.children, categories[1:], expanded)
else:
print("Found it")
else:
new_level = DocumentDirectory(file=current_item)
new_level.expanded = expanded
output.append(new_level)

View File

@ -10,8 +10,6 @@ from lxml import etree
from SpiffWorkflow.bpmn.parser.ValidationException import ValidationException
from lxml.etree import XMLSyntaxError
from pandas import ExcelFile
from pandas._libs.missing import NA
from sqlalchemy import desc
from sqlalchemy.exc import IntegrityError
@ -38,34 +36,6 @@ def camel_to_snake(camel):
class FileService(object):
"""Provides consistent management and rules for storing, retrieving and processing files."""
DOCUMENT_LIST = "irb_documents.xlsx"
INVESTIGATOR_LIST = "investigators.xlsx"
__doc_dictionary = None
@staticmethod
def verify_doc_dictionary(dd):
"""
We are currently getting structured information from an XLS file, if someone accidentally
changes a header we will have problems later, so we will verify we have the headers we need
here
"""
required_fields = ['category1','category2','category3','description']
# we only need to check the first item, as all of the keys should be the same
key = list(dd.keys())[0]
for field in required_fields:
if field not in dd[key].keys():
raise ApiError(code="Invalid document list %s"%FileService.DOCUMENT_LIST,
message='Please check the headers in %s'%FileService.DOCUMENT_LIST)
@staticmethod
def get_doc_dictionary():
if not FileService.__doc_dictionary:
FileService.__doc_dictionary = FileService.get_reference_data(FileService.DOCUMENT_LIST, 'code', ['id'])
FileService.verify_doc_dictionary(FileService.__doc_dictionary)
return FileService.__doc_dictionary
@staticmethod
def add_workflow_spec_file(workflow_spec: WorkflowSpecModel,
@ -88,10 +58,7 @@ class FileService(object):
return FileService.update_file(file_model, binary_data, content_type)
@staticmethod
def is_allowed_document(code):
doc_dict = FileService.get_doc_dictionary()
return code in doc_dict
@staticmethod
@cache
@ -104,12 +71,6 @@ class FileService(object):
def update_irb_code(file_id, irb_doc_code):
"""Create a new file and associate it with the workflow
Please note that the irb_doc_code MUST be a known file in the irb_documents.xslx reference document."""
if not FileService.is_allowed_document(irb_doc_code):
raise ApiError("invalid_form_field_key",
"When uploading files, the form field id must match a known document in the "
"irb_docunents.xslx reference file. This code is not found in that file '%s'" % irb_doc_code)
""" """
file_model = session.query(FileModel)\
.filter(FileModel.id == file_id).first()
if file_model is None:
@ -137,28 +98,6 @@ class FileService(object):
)
return FileService.update_file(file_model, binary_data, content_type)
@staticmethod
def get_reference_data(reference_file_name, index_column, int_columns=[]):
""" Opens a reference file (assumes that it is xls file) and returns the data as a
dictionary, each row keyed on the given index_column name. If there are columns
that should be represented as integers, pass these as an array of int_columns, lest
you get '1.0' rather than '1'
fixme: This is stupid stupid slow. Place it in the database and just check if it is up to date."""
data_model = FileService.get_reference_file_data(reference_file_name)
xls = ExcelFile(data_model.data, engine='openpyxl')
df = xls.parse(xls.sheet_names[0])
df = df.convert_dtypes()
df = pd.DataFrame(df).dropna(how='all') # Drop null rows
df = pd.DataFrame(df).replace({NA: None}) # replace NA with None.
for c in int_columns:
df[c] = df[c].fillna(0)
df = df.astype({c: 'Int64'})
df = df.fillna('')
df = df.applymap(str)
df = df.set_index(index_column)
return json.loads(df.to_json(orient='index'))
@staticmethod
def get_workflow_files(workflow_id):
"""Returns all the file models associated with a running workflow."""

View File

@ -12,7 +12,7 @@ from sqlalchemy.sql.functions import GenericFunction
from crc import db
from crc.api.common import ApiError
from crc.models.api_models import Task
from crc.models.file import FileDataModel, LookupFileModel, LookupDataModel
from crc.models.file import FileModel, FileDataModel, LookupFileModel, LookupDataModel
from crc.models.workflow import WorkflowModel, WorkflowSpecDependencyFile
from crc.services.file_service import FileService
from crc.services.ldap_service import LdapService
@ -25,11 +25,14 @@ class TSRank(GenericFunction):
class LookupService(object):
"""Provides tools for doing lookups for auto-complete fields.
This can currently take two forms:
"""Provides tools for doing lookups for auto-complete fields, and rapid access to any
uploaded spreadsheets.
This can currently take three forms:
1) Lookup from spreadsheet data associated with a workflow specification.
in which case we store the spreadsheet data in a lookup table with full
text indexing enabled, and run searches against that table.
2) Lookup from spreadsheet data associated with a specific file. This allows us
to get a lookup model for a specific file object, such as a reference file.
2) Lookup from LDAP records. In which case we call out to an external service
to pull back detailed records and return them.
@ -44,6 +47,14 @@ class LookupService(object):
workflow = db.session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
return LookupService.__get_lookup_model(workflow, spiff_task.task_spec.name, field.id)
@staticmethod
def get_lookup_model_for_file_data(file_data: FileDataModel, value_column, label_column):
lookup_model = db.session.query(LookupFileModel).filter(LookupFileModel.file_data_model_id == file_data.id).first()
if not lookup_model:
logging.warning("!!!! Making a very expensive call to update the lookup model.")
lookup_model = LookupService.build_lookup_table(file_data, value_column, label_column)
return lookup_model
@staticmethod
def __get_lookup_model(workflow, task_spec_id, field_id):
lookup_model = db.session.query(LookupFileModel) \
@ -139,7 +150,8 @@ class LookupService(object):
return lookup_model
@staticmethod
def build_lookup_table(data_model: FileDataModel, value_column, label_column, workflow_spec_id, task_spec_id, field_id):
def build_lookup_table(data_model: FileDataModel, value_column, label_column,
workflow_spec_id=None, task_spec_id=None, field_id=None):
""" In some cases the lookup table can be very large. This method will add all values to the database
in a way that can be searched and returned via an api call - rather than sending the full set of
options along with the form. It will only open the file and process the options if something has

View File

@ -22,13 +22,17 @@ from crc.models.study import StudyModel, Study, StudyStatus, Category, WorkflowM
from crc.models.task_event import TaskEventModel, TaskEvent
from crc.models.workflow import WorkflowSpecCategoryModel, WorkflowModel, WorkflowSpecModel, WorkflowState, \
WorkflowStatus, WorkflowSpecDependencyFile
from crc.services.document_service import DocumentService
from crc.services.file_service import FileService
from crc.services.ldap_service import LdapService
from crc.services.lookup_service import LookupService
from crc.services.protocol_builder import ProtocolBuilderService
from crc.services.workflow_processor import WorkflowProcessor
class StudyService(object):
"""Provides common tools for working with a Study"""
"""Provides consistent management and rules for storing, retrieving and processing files."""
INVESTIGATOR_LIST = "investigators.xlsx"
@staticmethod
def get_studies_for_user(user):
@ -77,7 +81,7 @@ class StudyService(object):
workflow_metas = StudyService._get_workflow_metas(study_id)
files = FileService.get_files_for_study(study.id)
files = (File.from_models(model, FileService.get_file_data(model.id),
FileService.get_doc_dictionary()) for model in files)
DocumentService.get_dictionary()) for model in files)
study.files = list(files)
# Calling this line repeatedly is very very slow. It creates the
# master spec and runs it. Don't execute this for Abandoned studies, as
@ -265,14 +269,14 @@ class StudyService(object):
# Loop through all known document types, get the counts for those files,
# and use pb_docs to mark those as required.
doc_dictionary = FileService.get_reference_data(FileService.DOCUMENT_LIST, 'code', ['id'])
doc_dictionary = DocumentService.get_dictionary()
documents = {}
for code, doc in doc_dictionary.items():
if ProtocolBuilderService.is_enabled():
doc['required'] = False
if ProtocolBuilderService.is_enabled() and doc['id']:
pb_data = next((item for item in pb_docs if int(item['AUXDOCID']) == int(doc['id'])), None)
doc['required'] = False
if pb_data:
doc['required'] = True
@ -282,7 +286,7 @@ class StudyService(object):
# Make a display name out of categories
name_list = []
for cat_key in ['category1', 'category2', 'category3']:
if doc[cat_key] not in ['', 'NULL']:
if doc[cat_key] not in ['', 'NULL', None]:
name_list.append(doc[cat_key])
doc['display_name'] = ' / '.join(name_list)
@ -319,12 +323,22 @@ class StudyService(object):
documents[code] = doc
return Box(documents)
@staticmethod
def get_investigator_dictionary():
"""Returns a dictionary of document details keyed on the doc_code."""
file_data = FileService.get_reference_file_data(StudyService.INVESTIGATOR_LIST)
lookup_model = LookupService.get_lookup_model_for_file_data(file_data, 'code', 'label')
doc_dict = {}
for lookup_data in lookup_model.dependencies:
doc_dict[lookup_data.value] = lookup_data.data
return doc_dict
@staticmethod
def get_investigators(study_id, all=False):
"""Convert array of investigators from protocol builder into a dictionary keyed on the type. """
# Loop through all known investigator types as set in the reference file
inv_dictionary = FileService.get_reference_data(FileService.INVESTIGATOR_LIST, 'code')
inv_dictionary = StudyService.get_investigator_dictionary()
# Get PB required docs
pb_investigators = ProtocolBuilderService.get_investigators(study_id=study_id)

View File

@ -30,6 +30,7 @@ from crc.models.study import StudyModel
from crc.models.task_event import TaskEventModel
from crc.models.user import UserModel, UserModelSchema
from crc.models.workflow import WorkflowModel, WorkflowStatus, WorkflowSpecModel
from crc.services.document_service import DocumentService
from crc.services.file_service import FileService
from crc.services.lookup_service import LookupService
from crc.services.study_service import StudyService
@ -97,12 +98,15 @@ class WorkflowService(object):
def do_waiting():
records = db.session.query(WorkflowModel).filter(WorkflowModel.status==WorkflowStatus.waiting).all()
for workflow_model in records:
print('processing workflow %d'%workflow_model.id)
processor = WorkflowProcessor(workflow_model)
processor.bpmn_workflow.refresh_waiting_tasks()
processor.bpmn_workflow.do_engine_steps()
processor.save()
# fixme: Try catch with a very explicit error about the study, workflow and task that failed.
try:
app.logger.info('Processing workflow %s' % workflow_model.id)
processor = WorkflowProcessor(workflow_model)
processor.bpmn_workflow.refresh_waiting_tasks()
processor.bpmn_workflow.do_engine_steps()
processor.save()
except:
app.logger.error('Failed to process workflow')
@staticmethod
@timeit
@ -424,7 +428,7 @@ class WorkflowService(object):
doc_code = WorkflowService.evaluate_property('doc_code', field, task)
file_model = FileModel(name="test.png",
irb_doc_code = field.id)
doc_dict = FileService.get_doc_dictionary()
doc_dict = DocumentService.get_dictionary()
file = File.from_models(file_model, None, doc_dict)
return FileSchema().dump(file)
elif field.type == 'files':

View File

@ -7,7 +7,9 @@ from crc.models.file import CONTENT_TYPES
from crc.models.ldap import LdapModel
from crc.models.user import UserModel
from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel
from crc.services.document_service import DocumentService
from crc.services.file_service import FileService
from crc.services.study_service import StudyService
class ExampleDataLoader:
@ -315,14 +317,14 @@ class ExampleDataLoader:
def load_reference_documents(self):
file_path = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx')
file = open(file_path, "rb")
FileService.add_reference_file(FileService.DOCUMENT_LIST,
FileService.add_reference_file(DocumentService.DOCUMENT_LIST,
binary_data=file.read(),
content_type=CONTENT_TYPES['xls'])
file.close()
file_path = os.path.join(app.root_path, 'static', 'reference', 'investigators.xlsx')
file = open(file_path, "rb")
FileService.add_reference_file(FileService.INVESTIGATOR_LIST,
FileService.add_reference_file(StudyService.INVESTIGATOR_LIST,
binary_data=file.read(),
content_type=CONTENT_TYPES['xls'])
file.close()

View File

@ -2,6 +2,7 @@
# IMPORTANT - Environment must be loaded before app, models, etc....
import os
os.environ["TESTING"] = "true"
import json
@ -23,6 +24,7 @@ from crc.services.file_service import FileService
from crc.services.study_service import StudyService
from crc.services.user_service import UserService
from crc.services.workflow_service import WorkflowService
from crc.services.document_service import DocumentService
from example_data import ExampleDataLoader
# UNCOMMENT THIS FOR DEBUGGING SQL ALCHEMY QUERIES
@ -138,8 +140,7 @@ class BaseTest(unittest.TestCase):
delete everything that matters in the local database - this is used to
test ground zero copy of workflow specs.
"""
session.execute("delete from workflow; delete from file_data; delete from file; delete from workflow_spec;")
session.commit()
ExampleDataLoader.clean_db()
def load_example_data(self, use_crc_data=False, use_rrt_data=False):
"""use_crc_data will cause this to load the mammoth collection of documents
@ -282,28 +283,6 @@ class BaseTest(unittest.TestCase):
session.commit()
return study
def _create_study_workflow_approvals(self, user_uid, title, primary_investigator_id, approver_uids, statuses,
workflow_spec_name="random_fact"):
study = self.create_study(uid=user_uid, title=title, primary_investigator_id=primary_investigator_id)
workflow = self.create_workflow(workflow_name=workflow_spec_name, study=study)
approvals = []
for i in range(len(approver_uids)):
approvals.append(self.create_approval(
study=study,
workflow=workflow,
approver_uid=approver_uids[i],
status=statuses[i],
version=1
))
full_study = {
'study': study,
'workflow': workflow,
'approvals': approvals,
}
return full_study
def create_workflow(self, workflow_name, display_name=None, study=None, category_id=None, as_user="dhf8r"):
session.flush()
@ -320,30 +299,11 @@ class BaseTest(unittest.TestCase):
def create_reference_document(self):
file_path = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx')
file = open(file_path, "rb")
FileService.add_reference_file(FileService.DOCUMENT_LIST,
FileService.add_reference_file(DocumentService.DOCUMENT_LIST,
binary_data=file.read(),
content_type=CONTENT_TYPES['xls'])
content_type=CONTENT_TYPES['xlsx'])
file.close()
def create_approval(
self,
study=None,
workflow=None,
approver_uid=None,
status=None,
version=None,
):
study = study or self.create_study()
workflow = workflow or self.create_workflow()
approver_uid = approver_uid or self.test_uid
status = status or ApprovalStatus.PENDING.value
version = version or 1
approval = ApprovalModel(study=study, workflow=workflow, approver_uid=approver_uid, status=status,
version=version)
session.add(approval)
session.commit()
return approval
def get_workflow_common(self, url, user):
rv = self.app.get(url,
headers=self.logged_in_headers(user),

View File

@ -16,6 +16,12 @@
OGC will upload the Non-Funded Executed Agreement after it has been negotiated by OSP contract negotiator.</bpmn:documentation>
<bpmn:extensionElements>
<camunda:formData>
<camunda:formField id="Date" label="Version Date" type="date">
<camunda:properties>
<camunda:property id="group" value="PCRApproval" />
<camunda:property id="file_data" value="Some_File" />
</camunda:properties>
</camunda:formField>
<camunda:formField id="file_type" type="enum" defaultValue="AD_CoCApp">
<camunda:value id="AD_CoCApp" name="Ancillary Documents / Case Report Form" />
<camunda:value id="AD_CoCAppr" name="Ancillary Documents / CoC Approval" />
@ -32,12 +38,6 @@ OGC will upload the Non-Funded Executed Agreement after it has been negotiated b
<camunda:property id="file_data" value="Some_File" />
</camunda:properties>
</camunda:formField>
<camunda:formField id="Date" label="Version Date" type="date">
<camunda:properties>
<camunda:property id="group" value="PCRApproval" />
<camunda:property id="file_data" value="Some_File" />
</camunda:properties>
</camunda:formField>
</camunda:formData>
</bpmn:extensionElements>
<bpmn:incoming>SequenceFlow_0ea9hvd</bpmn:incoming>
@ -67,4 +67,4 @@ OGC will upload the Non-Funded Executed Agreement after it has been negotiated b
</bpmndi:BPMNShape>
</bpmndi:BPMNPlane>
</bpmndi:BPMNDiagram>
</bpmn:definitions>
</bpmn:definitions>

View File

@ -1,14 +1,16 @@
import io
import json
import os
from tests.base_test import BaseTest
from crc import session, db
from crc import session, db, app
from crc.models.file import FileModel, FileType, FileSchema, FileModelSchema
from crc.models.workflow import WorkflowSpecModel
from crc.services.file_service import FileService
from crc.services.workflow_processor import WorkflowProcessor
from crc.models.data_store import DataStoreModel
from crc.services.document_service import DocumentService
from example_data import ExampleDataLoader
@ -110,20 +112,23 @@ class TestFilesApi(BaseTest):
self.assertEqual(0, len(json.loads(rv.get_data(as_text=True))))
def test_set_reference_file(self):
file_name = "irb_document_types.xls"
data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.xls")}
file_name = "irb_documents.xlsx"
filepath = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx')
with open(filepath, 'rb') as myfile:
file_data = myfile.read()
data = {'file': (io.BytesIO(file_data), file_name)}
rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
content_type='multipart/form-data', headers=self.logged_in_headers())
self.assert_success(rv)
self.assertIsNotNone(rv.get_data())
json_data = json.loads(rv.get_data(as_text=True))
file = FileModelSchema().load(json_data, session=session)
self.assertEqual(FileType.xls, file.type)
self.assertEqual(FileType.xlsx, file.type)
self.assertTrue(file.is_reference)
self.assertEqual("application/vnd.ms-excel", file.content_type)
self.assertEqual("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", file.content_type)
def test_set_reference_file_bad_extension(self):
file_name = FileService.DOCUMENT_LIST
file_name = DocumentService.DOCUMENT_LIST
data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.ppt")}
rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
content_type='multipart/form-data', headers=self.logged_in_headers())
@ -131,22 +136,28 @@ class TestFilesApi(BaseTest):
def test_get_reference_file(self):
file_name = "irb_document_types.xls"
data = {'file': (io.BytesIO(b"abcdef"), "some crazy thing do not care.xls")}
filepath = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx')
with open(filepath, 'rb') as myfile:
file_data = myfile.read()
data = {'file': (io.BytesIO(file_data), file_name)}
rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
content_type='multipart/form-data', headers=self.logged_in_headers())
rv = self.app.get('/v1.0/reference_file/%s' % file_name, headers=self.logged_in_headers())
self.assert_success(rv)
data_out = rv.get_data()
self.assertEqual(b"abcdef", data_out)
self.assertEqual(file_data, data_out)
def test_list_reference_files(self):
ExampleDataLoader.clean_db()
file_name = FileService.DOCUMENT_LIST
data = {'file': (io.BytesIO(b"abcdef"), file_name)}
file_name = DocumentService.DOCUMENT_LIST
filepath = os.path.join(app.root_path, 'static', 'reference', 'irb_documents.xlsx')
with open(filepath, 'rb') as myfile:
file_data = myfile.read()
data = {'file': (io.BytesIO(file_data), file_name)}
rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
content_type='multipart/form-data', headers=self.logged_in_headers())
self.assert_success(rv)
rv = self.app.get('/v1.0/reference_file',
follow_redirects=True,
content_type="application/json", headers=self.logged_in_headers())
@ -159,7 +170,8 @@ class TestFilesApi(BaseTest):
def test_update_file_info(self):
self.load_example_data()
file: FileModel = session.query(FileModel).first()
self.create_reference_document()
file: FileModel = session.query(FileModel).filter(FileModel.is_reference==False).first()
file.name = "silly_new_name.bpmn"
rv = self.app.put('/v1.0/file/%i' % file.id,

View File

@ -1,4 +1,3 @@
import json
from SpiffWorkflow.bpmn.PythonScriptEngine import Box
@ -15,6 +14,7 @@ from crc.services.file_service import FileService
from crc.services.study_service import StudyService
from crc.services.workflow_processor import WorkflowProcessor
from crc.scripts.file_data_set import FileDataSet
from crc.services.document_service import DocumentService
class TestStudyDetailsDocumentsScript(BaseTest):
@ -43,8 +43,8 @@ class TestStudyDetailsDocumentsScript(BaseTest):
# Remove the reference file.
file_model = db.session.query(FileModel). \
filter(FileModel.is_reference == True). \
filter(FileModel.name == FileService.DOCUMENT_LIST).first()
filter(FileModel.is_reference is True). \
filter(FileModel.name == DocumentService.DOCUMENT_LIST).first()
if file_model:
db.session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model.id).delete()
db.session.query(FileModel).filter(FileModel.id == file_model.id).delete()
@ -71,7 +71,7 @@ class TestStudyDetailsDocumentsScript(BaseTest):
def test_load_lookup_data(self):
self.create_reference_document()
dict = FileService.get_reference_data(FileService.DOCUMENT_LIST, 'code', ['id'])
dict = DocumentService.get_dictionary()
self.assertIsNotNone(dict)
def get_required_docs(self):

View File

@ -122,11 +122,11 @@ class TestStudyService(BaseTest):
self.assertEqual("Cancer Center's PRC Approval Form", documents["UVACompl_PRCAppr"]['description'])
self.assertEqual("UVA Compliance", documents["UVACompl_PRCAppr"]['category1'])
self.assertEqual("PRC Approval", documents["UVACompl_PRCAppr"]['category2'])
self.assertEqual("", documents["UVACompl_PRCAppr"]['category3'])
self.assertEqual(None, documents["UVACompl_PRCAppr"]['category3'])
self.assertEqual("CRC", documents["UVACompl_PRCAppr"]['Who Uploads?'])
self.assertEqual(0, documents["UVACompl_PRCAppr"]['count'])
self.assertEqual(True, documents["UVACompl_PRCAppr"]['required'])
self.assertEqual('6', documents["UVACompl_PRCAppr"]['id'])
self.assertEqual(6, documents["UVACompl_PRCAppr"]['id'])
@patch('crc.services.protocol_builder.ProtocolBuilderService.get_required_docs') # mock_docs
def test_get_documents_has_file_details(self, mock_docs):

View File

@ -3,9 +3,6 @@ from tests.base_test import BaseTest
from crc.services.file_service import FileService
class TestDocumentDirectories(BaseTest):
def test_directory_list(self):