cr-connect-workflow/crc/models/file.py

import enum
import urllib

import flask
from flask import url_for
from marshmallow import INCLUDE, EXCLUDE, Schema
from marshmallow.fields import Method
from marshmallow_enum import EnumField
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from sqlalchemy import func, Index
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import deferred, relationship

from crc import db, ma
from crc.models.data_store import DataStoreModel


class FileType(enum.Enum):
    bpmn = "bpmn"
    csv = 'csv'
    dmn = "dmn"
    doc = "doc"
    docx = "docx"
    gif = 'gif'
    jpg = 'jpg'
    md = 'md'
    pdf = 'pdf'
    png = 'png'
    ppt = 'ppt'
    pptx = 'pptx'
    rtf = 'rtf'
    svg = "svg"
    svg_xml = "svg+xml"
    txt = 'txt'
    xls = 'xls'
    xlsx = 'xlsx'
    xml = 'xml'
    zip = 'zip'


CONTENT_TYPES = {
    "bpmn":  "text/xml",
    "csv": "text/csv",
    "dmn": "text/xml",
    "doc": "application/msword",
    "docx":  "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    "gif": "image/gif",
    "jpg": "image/jpeg",
    "md": "text/plain",
    "pdf": "application/pdf",
    "png": "image/png",
    "ppt": "application/vnd.ms-powerpoint",
    "pptx":  "application/vnd.openxmlformats-officedocument.presentationml.presentation",
    "rtf": "application/rtf",
    "svg": "image/svg+xml",
    "svg_xml": "image/svg+xml",
    "txt": "text/plain",
    "xls": "application/vnd.ms-excel",
    "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    "xml": "application/xml",
    "zip": "application/zip"
}


class FileDataModel(db.Model):
    __tablename__ = 'file_data'
    id = db.Column(db.Integer, primary_key=True)
    md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
    data = deferred(db.Column(db.LargeBinary))  # Don't load it unless you have to.
    version = db.Column(db.Integer, default=0)
    size = db.Column(db.Integer, default=0)
    date_created = db.Column(db.DateTime(timezone=True), server_default=func.now())
    file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
    file_model = db.relationship("FileModel", foreign_keys=[file_model_id])
    user_uid = db.Column(db.String, db.ForeignKey('user.uid'), nullable=True)


class FileModel(db.Model):
    __tablename__ = 'file'
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String)
    type = db.Column(db.Enum(FileType))
    is_status = db.Column(db.Boolean)
    content_type = db.Column(db.String)
    is_reference = db.Column(db.Boolean, nullable=False, default=False)  # A global reference file.
    primary = db.Column(db.Boolean, nullable=False, default=False)  # Is this the primary BPMN in a workflow?
    primary_process_id = db.Column(db.String, nullable=True)  # An id in the xml of BPMN documents, for primary BPMN.
    workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True)
    workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
    task_spec = db.Column(db.String, nullable=True)
    irb_doc_code = db.Column(db.String, nullable=True)  # Code reference to the irb_documents.xlsx reference file.
    # A request was made to delete the file, but we can't because there are
    # active approvals or running workflows that depend on it.  So we archive
    # it instead, hide it in the interface.
    is_review = db.Column(db.Boolean, default=False, nullable=True)
    archived = db.Column(db.Boolean, default=False, nullable=False)
    data_stores = relationship(DataStoreModel, cascade="all,delete", backref="file")


class File(object):
    @classmethod
    def from_models(cls, model: FileModel, data_model, doc_dictionary):
        instance = cls()
        instance.id = model.id
        instance.name = model.name
        instance.is_status = model.is_status
        instance.is_reference = model.is_reference
        instance.content_type = model.content_type
        instance.primary = model.primary
        instance.primary_process_id = model.primary_process_id
        instance.workflow_spec_id = model.workflow_spec_id
        instance.workflow_id = model.workflow_id
        instance.irb_doc_code = model.irb_doc_code
        instance.type = model.type
        if model.irb_doc_code and model.irb_doc_code in doc_dictionary:
            instance.document = doc_dictionary[model.irb_doc_code]
        else:
            instance.document = {}
        if data_model:
            instance.last_modified = data_model.date_created
            instance.latest_version = data_model.version
            instance.size = data_model.size
            instance.user_uid = data_model.user_uid
        else:
            instance.last_modified = None
            instance.latest_version = None

        instance.data_store = {}
        for ds in model.data_stores:
            instance.data_store[ds.key] = ds.value

        return instance


class FileModelSchema(SQLAlchemyAutoSchema):
    class Meta:
        model = FileModel
        load_instance = True
        include_relationships = True
        include_fk = True  # Includes foreign keys
        unknown = EXCLUDE
    type = EnumField(FileType)


class FileSchema(Schema):
    class Meta:
        model = File
        fields = ["id", "name", "is_status", "is_reference", "content_type",
                  "primary", "primary_process_id", "workflow_spec_id", "workflow_id",
                  "irb_doc_code", "last_modified", "latest_version", "type", "size", "data_store",
                  "document", "user_uid", "url"]
        unknown = INCLUDE
    type = EnumField(FileType)
    url = Method("get_url")

    def get_url(self, obj):
        token = 'not_available'
        if obj.id is None:
            return "" # We can't return a url for a file that isn't stored yet.
        file_url = url_for("/v1_0.crc_api_file_get_file_data_link", file_id=obj.id, _external=True)
        if hasattr(flask.g, 'user'):
            token = flask.g.user.encode_auth_token()
        url = file_url + '?auth_token=' + urllib.parse.quote_plus(token)
        return url


class LookupFileModel(db.Model):
    """Gives us a quick way to tell what kind of lookup is set on a form field.
    Connected to the file data model, so that if a new version of the same file is
    created, we can update the listing."""
    __tablename__ = 'lookup_file'
    id = db.Column(db.Integer, primary_key=True)
    workflow_spec_id = db.Column(db.String)
    task_spec_id = db.Column(db.String)
    field_id = db.Column(db.String)
    is_ldap = db.Column(db.Boolean)  # Allows us to run an ldap query instead of a db lookup.
    file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
    last_updated = db.Column(db.DateTime(timezone=True))
    dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model",
                                   cascade="all, delete, delete-orphan")
    file_model = db.relationship("FileModel")


class LookupDataModel(db.Model):
    __tablename__ = 'lookup_data'
    id = db.Column(db.Integer, primary_key=True)
    lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id'))
    value = db.Column(db.String)
    label = db.Column(db.String)
    # In the future, we might allow adding an additional "search" column if we want to search things not in label.
    data = db.Column(db.JSON)  # all data for the row is stored in a json structure here, but not searched presently.

    # Assure there is a searchable index on the label column, so we can get fast results back.
    # query with:
    # search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()

    __ts_vector__ = func.to_tsvector('simple', label)

    __table_args__ = (
        Index(
            'ix_lookupdata_tsv',
            __ts_vector__,  # Use simple, not english to keep stop words in place.
            postgresql_using='gin'
            ),
        )


class LookupDataSchema(ma.Schema):
    class Meta:
        model = LookupDataModel
        load_instance = True
        include_relationships = False
        include_fk = False  # Includes foreign keys
        exclude = ['id']  # Do not include the id field, it should never be used via the API.


class SimpleFileSchema(ma.Schema):

    class Meta:
        model = FileModel
        fields = ["name"]