252 lines
8.9 KiB
Python
Raw Normal View History

import enum
import urllib
import flask
from flask import url_for
from marshmallow import INCLUDE, EXCLUDE, Schema
from marshmallow.fields import Method
from marshmallow_enum import EnumField
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from sqlalchemy import func, Index
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import deferred, relationship
2022-01-12 15:00:26 -05:00
from crc import db, ma
from crc.models.data_store import DataStoreModel
class FileType(enum.Enum):
bpmn = "bpmn"
csv = 'csv'
dmn = "dmn"
doc = "doc"
2020-02-04 14:26:53 -05:00
docx = "docx"
gif = 'gif'
jpg = 'jpg'
md = 'md'
2020-02-04 14:26:53 -05:00
pdf = 'pdf'
png = 'png'
ppt = 'ppt'
pptx = 'pptx'
rtf = 'rtf'
2020-02-04 14:26:53 -05:00
svg = "svg"
svg_xml = "svg+xml"
txt = 'txt'
xls = 'xls'
2020-02-04 14:26:53 -05:00
xlsx = 'xlsx'
xml = 'xml'
2020-02-04 14:26:53 -05:00
zip = 'zip'
2020-02-18 10:14:03 -05:00
CONTENT_TYPES = {
"bpmn": "text/xml",
"csv": "text/csv",
"dmn": "text/xml",
"doc": "application/msword",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"gif": "image/gif",
"jpg": "image/jpeg",
"md": "text/plain",
"pdf": "application/pdf",
"png": "image/png",
"ppt": "application/vnd.ms-powerpoint",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"rtf": "application/rtf",
"svg": "image/svg+xml",
"svg_xml": "image/svg+xml",
"txt": "text/plain",
"xls": "application/vnd.ms-excel",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xml": "application/xml",
"zip": "application/zip"
}
class DocumentModel(db.Model):
__tablename__ = 'document'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String, nullable=False)
type = db.Column(db.String, nullable=False)
content_type = db.Column(db.String, nullable=False)
workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
task_spec = db.Column(db.String, nullable=True)
irb_doc_code = db.Column(db.String, nullable=False) # Code reference to the documents.xlsx reference file.
# TODO: Fix relationship with data_store table, then add this back in
# data_stores = relationship(DataStoreModel, cascade="all,delete", backref="file")
md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to.
# TODO: Determine whether size is used (in frontend/bpmn)
# size = db.Column(db.Integer, default=0) # Do we need this?
date_modified = db.Column(db.DateTime(timezone=True), onupdate=func.now())
date_created = db.Column(db.DateTime(timezone=True), server_default=func.now())
user_uid = db.Column(db.String, db.ForeignKey('user.uid'), nullable=True)
archived = db.Column(db.Boolean, default=False)
class FileDataModel(db.Model):
__tablename__ = 'file_data'
id = db.Column(db.Integer, primary_key=True)
md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to.
version = db.Column(db.Integer, default=0)
size = db.Column(db.Integer, default=0)
date_created = db.Column(db.DateTime(timezone=True), server_default=func.now())
file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
file_model = db.relationship("FileModel", foreign_keys=[file_model_id])
2021-07-06 17:07:47 -04:00
user_uid = db.Column(db.String, db.ForeignKey('user.uid'), nullable=True)
class FileModel(db.Model):
__tablename__ = 'file'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
type = db.Column(db.Enum(FileType))
content_type = db.Column(db.String)
workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
task_spec = db.Column(db.String, nullable=True)
irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the documents.xlsx reference file.
data_stores = relationship(DataStoreModel, cascade="all,delete", backref="file")
class File(object):
def __init__(self):
self.content_type = None
self.name = None
self.content_type = None
self.workflow_id = None
self.irb_doc_code = None
self.type = None
self.document = {}
self.last_modified = None
self.size = None
self.data_store = {}
@classmethod
2022-04-12 13:32:33 -04:00
def from_document_model(cls, document_model: DocumentModel, doc_dictionary):
if document_model.irb_doc_code and document_model.irb_doc_code in doc_dictionary:
document = doc_dictionary[document_model.irb_doc_code]
else:
2022-04-12 13:32:33 -04:00
document = {}
instance = cls()
instance.id = document_model.id
instance.name = document_model.name
instance.content_type = document_model.content_type
instance.workflow_id = document_model.workflow_id
instance.irb_doc_code = document_model.irb_doc_code
instance.type = document_model.type
instance.document = document
instance.last_modified = document_model.date_modified
instance.size = None
instance.data_store = {}
return instance
Made some modifications to the Approval so that it knows exactly what versions of every file are being sent for approval Added the following columns: * date_created - so we know when the file was created * renamed workflow_version to just "version", because everything has a version, this is the version of the request. * workflow_hash - this is just a quick way to see what files and versions are associated with the request, it could be factored out. * study - a quick relationship link to the study, so that this model is easier to use. * workflow - ditto * approval_files - these is a list from a new link table that links an approval to specific files and versions. The RequestApproval is logically sound, but still needs some additional pieces in place to be callable from a BPMN workflow diagram. Altered the file service to pick up on changes to files vs adding new files, so that versions are picked up correctly as users modify their submission - adding new files or replacing existing ones. Deleting files worries me, and I will need to revisit this. The damn base test keeps giving me a headache, so I made changes there to see if clearing and dropping the database each time won't allow the tests to pass more consistently. Lots more tests around the file service to make sure it is versioning user uploaded files correctly. The "Test Request Approval Script" tries to find to assure the correct behavior as this is likely to be called many times repeatedly and with little knowledge of the internal system. So it should just "do the right thing".
2020-05-23 15:08:17 -04:00
@classmethod
def from_file_system(cls, file_name, file_type, content_type,
last_modified, file_size):
instance = cls()
instance.name = file_name
instance.content_type = content_type
instance.type = file_type
instance.document = {}
instance.last_modified = last_modified
instance.size = file_size
#fixme: How to track the user id?
instance.data_store = {}
return instance
class DocumentModelSchema(SQLAlchemyAutoSchema):
class Meta:
model = DocumentModel
load_instance = True
include_relationships = True
include_fk = True # Includes foreign keys
unknown = EXCLUDE
class FileModelSchema(SQLAlchemyAutoSchema):
class Meta:
model = FileModel
load_instance = True
include_relationships = True
2020-01-13 17:52:37 -05:00
include_fk = True # Includes foreign keys
unknown = EXCLUDE
type = EnumField(FileType)
class FileSchema(Schema):
class Meta:
model = File
fields = ["id", "name", "content_type", "workflow_id",
"irb_doc_code", "last_modified", "type",
"size", "data_store", "document", "user_uid", "url"]
unknown = INCLUDE
url = Method("get_url")
def get_url(self, obj):
token = 'not_available'
if hasattr(obj, 'id') and obj.id is not None:
file_url = url_for("/v1_0.crc_api_file_refactor_get_file_data_link", file_id=obj.id, _external=True)
if hasattr(flask.g, 'user'):
token = flask.g.user.encode_auth_token()
url = file_url + '?auth_token=' + urllib.parse.quote_plus(token)
return url
else:
return ""
2022-04-12 13:38:38 -04:00
class LookupFileModel(db.Model):
"""Gives us a quick way to tell what kind of lookup is set on a form field."""
__tablename__ = 'lookup_file'
id = db.Column(db.Integer, primary_key=True)
workflow_spec_id = db.Column(db.String)
task_spec_id = db.Column(db.String)
field_id = db.Column(db.String)
file_name = db.Column(db.String)
2022-04-12 13:38:38 -04:00
file_timestamp = db.Column(db.FLOAT) # The file systems time stamp, to check for changes to the file.
is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup.
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model",
cascade="all, delete, delete-orphan")
2022-04-12 13:38:38 -04:00
class LookupDataModel(db.Model):
__tablename__ = 'lookup_data'
id = db.Column(db.Integer, primary_key=True)
lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id'))
value = db.Column(db.String)
label = db.Column(db.String)
# In the future, we might allow adding an additional "search" column if we want to search things not in label.
data = db.Column(db.JSON) # all data for the row is stored in a json structure here, but not searched presently.
# Assure there is a searchable index on the label column, so we can get fast results back.
# query with:
# search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
__ts_vector__ = func.to_tsvector('simple', label)
__table_args__ = (
Index(
'ix_lookupdata_tsv',
__ts_vector__, # Use simple, not english to keep stop words in place.
postgresql_using='gin'
),
)
class LookupDataSchema(ma.Schema):
class Meta:
model = LookupDataModel
load_instance = True
include_relationships = False
include_fk = False # Includes foreign keys
exclude = ['id'] # Do not include the id field, it should never be used via the API.
2020-05-20 15:10:22 -06:00
class SimpleFileSchema(ma.Schema):
class Meta:
2022-04-12 13:33:24 -04:00
model = DocumentModel
2020-05-20 15:10:22 -06:00
fields = ["name"]