Dan f815add699 1. Add a default directory for the location of SYNC files.
2. Added a last_updated column to the lookup table
3. The Lookup service now uses the above, and compares it to the actual file date, we can then rebuild the lookup if needed.
4. That 755 migration loads up the models, so when you change the models, the migration starts to fail.  Not really sure what to do here, but modify the migration while we are in process.
2022-01-20 13:05:58 -05:00

222 lines
8.2 KiB
Python

import enum
import urllib
import flask
from flask import url_for
from marshmallow import INCLUDE, EXCLUDE, Schema
from marshmallow.fields import Method
from marshmallow_enum import EnumField
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from sqlalchemy import func, Index
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import deferred, relationship
from crc import db, ma
from crc.models.data_store import DataStoreModel
class FileType(enum.Enum):
bpmn = "bpmn"
csv = 'csv'
dmn = "dmn"
doc = "doc"
docx = "docx"
gif = 'gif'
jpg = 'jpg'
md = 'md'
pdf = 'pdf'
png = 'png'
ppt = 'ppt'
pptx = 'pptx'
rtf = 'rtf'
svg = "svg"
svg_xml = "svg+xml"
txt = 'txt'
xls = 'xls'
xlsx = 'xlsx'
xml = 'xml'
zip = 'zip'
CONTENT_TYPES = {
"bpmn": "text/xml",
"csv": "text/csv",
"dmn": "text/xml",
"doc": "application/msword",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"gif": "image/gif",
"jpg": "image/jpeg",
"md": "text/plain",
"pdf": "application/pdf",
"png": "image/png",
"ppt": "application/vnd.ms-powerpoint",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"rtf": "application/rtf",
"svg": "image/svg+xml",
"svg_xml": "image/svg+xml",
"txt": "text/plain",
"xls": "application/vnd.ms-excel",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xml": "application/xml",
"zip": "application/zip"
}
class FileDataModel(db.Model):
__tablename__ = 'file_data'
id = db.Column(db.Integer, primary_key=True)
md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to.
version = db.Column(db.Integer, default=0)
size = db.Column(db.Integer, default=0)
date_created = db.Column(db.DateTime(timezone=True), server_default=func.now())
file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
file_model = db.relationship("FileModel", foreign_keys=[file_model_id])
user_uid = db.Column(db.String, db.ForeignKey('user.uid'), nullable=True)
class FileModel(db.Model):
__tablename__ = 'file'
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String)
type = db.Column(db.Enum(FileType))
is_status = db.Column(db.Boolean)
content_type = db.Column(db.String)
is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file.
primary = db.Column(db.Boolean, nullable=False, default=False) # Is this the primary BPMN in a workflow?
primary_process_id = db.Column(db.String, nullable=True) # An id in the xml of BPMN documents, for primary BPMN.
workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True)
workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
task_spec = db.Column(db.String, nullable=True)
irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the irb_documents.xlsx reference file.
# A request was made to delete the file, but we can't because there are
# active approvals or running workflows that depend on it. So we archive
# it instead, hide it in the interface.
is_review = db.Column(db.Boolean, default=False, nullable=True)
archived = db.Column(db.Boolean, default=False, nullable=False)
data_stores = relationship(DataStoreModel, cascade="all,delete", backref="file")
class File(object):
@classmethod
def from_models(cls, model: FileModel, data_model, doc_dictionary):
instance = cls()
instance.id = model.id
instance.name = model.name
instance.is_status = model.is_status
instance.is_reference = model.is_reference
instance.content_type = model.content_type
instance.primary = model.primary
instance.primary_process_id = model.primary_process_id
instance.workflow_spec_id = model.workflow_spec_id
instance.workflow_id = model.workflow_id
instance.irb_doc_code = model.irb_doc_code
instance.type = model.type
if model.irb_doc_code and model.irb_doc_code in doc_dictionary:
instance.document = doc_dictionary[model.irb_doc_code]
else:
instance.document = {}
if data_model:
instance.last_modified = data_model.date_created
instance.latest_version = data_model.version
instance.size = data_model.size
instance.user_uid = data_model.user_uid
else:
instance.last_modified = None
instance.latest_version = None
instance.data_store = {}
for ds in model.data_stores:
instance.data_store[ds.key] = ds.value
return instance
class FileModelSchema(SQLAlchemyAutoSchema):
class Meta:
model = FileModel
load_instance = True
include_relationships = True
include_fk = True # Includes foreign keys
unknown = EXCLUDE
type = EnumField(FileType)
class FileSchema(Schema):
class Meta:
model = File
fields = ["id", "name", "is_status", "is_reference", "content_type",
"primary", "primary_process_id", "workflow_spec_id", "workflow_id",
"irb_doc_code", "last_modified", "latest_version", "type", "size", "data_store",
"document", "user_uid", "url"]
unknown = INCLUDE
type = EnumField(FileType)
url = Method("get_url")
def get_url(self, obj):
token = 'not_available'
if obj.id is None:
return "" # We can't return a url for a file that isn't stored yet.
file_url = url_for("/v1_0.crc_api_file_get_file_data_link", file_id=obj.id, _external=True)
if hasattr(flask.g, 'user'):
token = flask.g.user.encode_auth_token()
url = file_url + '?auth_token=' + urllib.parse.quote_plus(token)
return url
class LookupFileModel(db.Model):
"""Gives us a quick way to tell what kind of lookup is set on a form field.
Connected to the file data model, so that if a new version of the same file is
created, we can update the listing."""
__tablename__ = 'lookup_file'
id = db.Column(db.Integer, primary_key=True)
workflow_spec_id = db.Column(db.String)
task_spec_id = db.Column(db.String)
field_id = db.Column(db.String)
is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup.
file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
last_updated = db.Column(db.DateTime(timezone=True))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model",
cascade="all, delete, delete-orphan")
file_model = db.relationship("FileModel")
class LookupDataModel(db.Model):
__tablename__ = 'lookup_data'
id = db.Column(db.Integer, primary_key=True)
lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id'))
value = db.Column(db.String)
label = db.Column(db.String)
# In the future, we might allow adding an additional "search" column if we want to search things not in label.
data = db.Column(db.JSON) # all data for the row is stored in a json structure here, but not searched presently.
# Assure there is a searchable index on the label column, so we can get fast results back.
# query with:
# search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
__ts_vector__ = func.to_tsvector('simple', label)
__table_args__ = (
Index(
'ix_lookupdata_tsv',
__ts_vector__, # Use simple, not english to keep stop words in place.
postgresql_using='gin'
),
)
class LookupDataSchema(ma.Schema):
class Meta:
model = LookupDataModel
load_instance = True
include_relationships = False
include_fk = False # Includes foreign keys
exclude = ['id'] # Do not include the id field, it should never be used via the API.
class SimpleFileSchema(ma.Schema):
class Meta:
model = FileModel
fields = ["name"]