import enum import urllib import connexion import flask from flask import url_for from marshmallow import INCLUDE, EXCLUDE, Schema from marshmallow.fields import Method from marshmallow_enum import EnumField from marshmallow_sqlalchemy import SQLAlchemyAutoSchema from sqlalchemy import func, Index from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.orm import deferred, relationship from crc import db, ma, app from crc.models.data_store import DataStoreModel class FileType(enum.Enum): bpmn = "bpmn" csv = 'csv' dmn = "dmn" doc = "doc" docx = "docx" gif = 'gif' jpg = 'jpg' md = 'md' pdf = 'pdf' png = 'png' ppt = 'ppt' pptx = 'pptx' rtf = 'rtf' svg = "svg" svg_xml = "svg+xml" txt = 'txt' xls = 'xls' xlsx = 'xlsx' xml = 'xml' zip = 'zip' CONTENT_TYPES = { "bpmn": "text/xml", "csv": "text/csv", "dmn": "text/xml", "doc": "application/msword", "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "gif": "image/gif", "jpg": "image/jpeg", "md": "text/plain", "pdf": "application/pdf", "png": "image/png", "ppt": "application/vnd.ms-powerpoint", "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "rtf": "application/rtf", "svg": "image/svg+xml", "svg_xml": "image/svg+xml", "txt": "text/plain", "xls": "application/vnd.ms-excel", "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xml": "application/xml", "zip": "application/zip" } class FileDataModel(db.Model): __tablename__ = 'file_data' id = db.Column(db.Integer, primary_key=True) md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False) data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to. version = db.Column(db.Integer, default=0) size = db.Column(db.Integer, default=0) date_created = db.Column(db.DateTime(timezone=True), server_default=func.now()) file_model_id = db.Column(db.Integer, db.ForeignKey('file.id')) file_model = db.relationship("FileModel", foreign_keys=[file_model_id]) user_uid = db.Column(db.String, db.ForeignKey('user.uid'), nullable=True) class FileModel(db.Model): __tablename__ = 'file' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String) type = db.Column(db.Enum(FileType)) is_status = db.Column(db.Boolean) content_type = db.Column(db.String) is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file. primary = db.Column(db.Boolean, nullable=False, default=False) # Is this the primary BPMN in a workflow? primary_process_id = db.Column(db.String, nullable=True) # An id in the xml of BPMN documents, for primary BPMN. workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True) workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True) task_spec = db.Column(db.String, nullable=True) irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the irb_documents.xlsx reference file. # A request was made to delete the file, but we can't because there are # active approvals or running workflows that depend on it. So we archive # it instead, hide it in the interface. is_review = db.Column(db.Boolean, default=False, nullable=True) archived = db.Column(db.Boolean, default=False, nullable=False) data_stores = relationship(DataStoreModel, cascade="all,delete", backref="file") class File(object): @classmethod def from_models(cls, model: FileModel, data_model: FileDataModel, doc_dictionary): instance = cls() instance.id = model.id instance.name = model.name instance.is_status = model.is_status instance.is_reference = model.is_reference instance.content_type = model.content_type instance.primary = model.primary instance.primary_process_id = model.primary_process_id instance.workflow_spec_id = model.workflow_spec_id instance.workflow_id = model.workflow_id instance.irb_doc_code = model.irb_doc_code instance.type = model.type if model.irb_doc_code and model.irb_doc_code in doc_dictionary: instance.document = doc_dictionary[model.irb_doc_code] else: instance.document = {} if data_model: instance.last_modified = data_model.date_created instance.latest_version = data_model.version instance.size = data_model.size instance.user_uid = data_model.user_uid else: instance.last_modified = None instance.latest_version = None instance.data_store = {} for ds in model.data_stores: instance.data_store[ds.key] = ds.value return instance class FileModelSchema(SQLAlchemyAutoSchema): class Meta: model = FileModel load_instance = True include_relationships = True include_fk = True # Includes foreign keys unknown = EXCLUDE type = EnumField(FileType) class FileSchema(Schema): class Meta: model = File fields = ["id", "name", "is_status", "is_reference", "content_type", "primary", "primary_process_id", "workflow_spec_id", "workflow_id", "irb_doc_code", "last_modified", "latest_version", "type", "size", "data_store", "document", "user_uid", "url"] unknown = INCLUDE type = EnumField(FileType) url = Method("get_url") def get_url(self, obj): token = 'not_available' if obj.id is None: return "" # We can't return a url for a file that isn't stored yet. file_url = url_for("/v1_0.crc_api_file_get_file_data_link", file_id=obj.id, _external=True) if hasattr(flask.g, 'user'): token = flask.g.user.encode_auth_token() url = file_url + '?auth_token=' + urllib.parse.quote_plus(token) return url class LookupFileModel(db.Model): """Gives us a quick way to tell what kind of lookup is set on a form field. Connected to the file data model, so that if a new version of the same file is created, we can update the listing.""" __tablename__ = 'lookup_file' id = db.Column(db.Integer, primary_key=True) workflow_spec_id = db.Column(db.String) task_spec_id = db.Column(db.String) field_id = db.Column(db.String) is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup. file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id')) dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan") class LookupDataModel(db.Model): __tablename__ = 'lookup_data' id = db.Column(db.Integer, primary_key=True) lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id')) value = db.Column(db.String) label = db.Column(db.String) # In the future, we might allow adding an additional "search" column if we want to search things not in label. data = db.Column(db.JSON) # all data for the row is stored in a json structure here, but not searched presently. # Assure there is a searchable index on the label column, so we can get fast results back. # query with: # search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all() __ts_vector__ = func.to_tsvector('simple', label) __table_args__ = ( Index( 'ix_lookupdata_tsv', __ts_vector__, # Use simple, not english to keep stop words in place. postgresql_using='gin' ), ) class LookupDataSchema(ma.Schema): class Meta: model = LookupDataModel load_instance = True include_relationships = False include_fk = False # Includes foreign keys exclude = ['id'] # Do not include the id field, it should never be used via the API. class SimpleFileSchema(ma.Schema): class Meta: model = FileModel fields = ["name"]