From 5da6a9aa12af7213007f23f21015a3e06dfb68a5 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Fri, 20 May 2022 10:42:04 -0400 Subject: [PATCH 1/2] database cleanup after file refactor --- .../546575fa21a8_file_refactor_cleanup.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 migrations/versions/546575fa21a8_file_refactor_cleanup.py diff --git a/migrations/versions/546575fa21a8_file_refactor_cleanup.py b/migrations/versions/546575fa21a8_file_refactor_cleanup.py new file mode 100644 index 00000000..51d5ba84 --- /dev/null +++ b/migrations/versions/546575fa21a8_file_refactor_cleanup.py @@ -0,0 +1,28 @@ +"""file refactor cleanup + +Revision ID: 546575fa21a8 +Revises: ea1cd0f3d603 +Create Date: 2022-05-20 08:11:10.540804 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '546575fa21a8' +down_revision = 'ea1cd0f3d603' +branch_labels = None +depends_on = None + + +def upgrade(): + op.drop_constraint('document_id_key', 'data_store', type_='foreignkey') + op.drop_table('document') + op.drop_table('file_data') + op.drop_table('old_file') + + +def downgrade(): + # This is cleanup from file refactor. There is no downgrade. + pass From 4c24fde9523f88763e36d15a0b27a847e5215087 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Fri, 20 May 2022 10:47:14 -0400 Subject: [PATCH 2/2] Code cleanup after file refactor --- crc/models/file.py | 43 +------------------ crc/scripts/get_zipped_files.py | 1 - crc/services/email_service.py | 1 - crc/services/user_file_service.py | 16 +------ ...2c0_migrate_file_data_to_document_table.py | 2 +- .../7225d990740e_move_files_to_filesystem.py | 2 +- 6 files changed, 6 insertions(+), 59 deletions(-) diff --git a/crc/models/file.py b/crc/models/file.py index d4abb8fb..e38a3704 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -80,37 +80,6 @@ class FileModel(db.Model): archived = db.Column(db.Boolean, default=False) -# class DocumentModel(FileModel): -# ... - - -class FileDataModel(db.Model): - # TODO: remove when the file refactor is finished - __tablename__ = 'file_data' - id = db.Column(db.Integer, primary_key=True) - md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False) - data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to. - version = db.Column(db.Integer, default=0) - size = db.Column(db.Integer, default=0) - date_created = db.Column(db.DateTime(timezone=True), server_default=func.now()) - file_model_id = db.Column(db.Integer, db.ForeignKey('file.id')) - file_model = db.relationship("FileModel", foreign_keys=[file_model_id]) - user_uid = db.Column(db.String, db.ForeignKey('user.uid'), nullable=True) - - -class OldFileModel(db.Model): - # TODO: remove when the file refactor is finished - __tablename__ = 'old_file' - id = db.Column(db.Integer, primary_key=True) - name = db.Column(db.String) - type = db.Column(db.Enum(FileType)) - content_type = db.Column(db.String) - workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True) - task_spec = db.Column(db.String, nullable=True) - irb_doc_code = db.Column(db.String, nullable=True) # Code reference to the documents.xlsx reference file. - # data_stores = relationship(DataStoreModel, cascade="all,delete", backref="file") - - class File(object): def __init__(self): self.content_type = None @@ -165,15 +134,6 @@ class File(object): return instance -# class DocumentModelSchema(SQLAlchemyAutoSchema): -# class Meta: -# model = DocumentModel -# load_instance = True -# include_relationships = True -# include_fk = True # Includes foreign keys -# unknown = EXCLUDE - - class FileModelSchema(SQLAlchemyAutoSchema): class Meta: model = FileModel @@ -193,7 +153,8 @@ class FileSchema(Schema): unknown = INCLUDE url = Method("get_url") - def get_url(self, obj): + @staticmethod + def get_url(obj): token = 'not_available' if hasattr(obj, 'id') and obj.id is not None: file_url = url_for("/v1_0.crc_api_file_get_file_data_link", file_id=obj.id, _external=True) diff --git a/crc/scripts/get_zipped_files.py b/crc/scripts/get_zipped_files.py index 577c5903..de595c85 100644 --- a/crc/scripts/get_zipped_files.py +++ b/crc/scripts/get_zipped_files.py @@ -44,7 +44,6 @@ class GetZippedFiles(Script): for file in files: zip_key_words = doc_info[file.irb_doc_code]['zip_key_words'] file_name = f'{study_id} {zip_key_words} {file.name}' - # file_data = session.query(FileDataModel).filter(FileDataModel.file_model_id == file.id).first() zfw.writestr(file_name, file.data) with open(temp_file.name, mode='rb') as handle: diff --git a/crc/services/email_service.py b/crc/services/email_service.py index 43e9428a..49ab69ef 100644 --- a/crc/services/email_service.py +++ b/crc/services/email_service.py @@ -8,7 +8,6 @@ from jinja2 import Template from crc import app, db, mail, session from crc.models.email import EmailModel -from crc.models.file import FileDataModel from crc.models.study import StudyModel from crc.services.jinja_service import JinjaService diff --git a/crc/services/user_file_service.py b/crc/services/user_file_service.py index d409a727..b476a362 100644 --- a/crc/services/user_file_service.py +++ b/crc/services/user_file_service.py @@ -5,7 +5,6 @@ import random import string import pandas as pd -from github import Github, GithubObject, UnknownObjectException from uuid import UUID from lxml import etree @@ -15,7 +14,7 @@ from sqlalchemy.exc import IntegrityError from crc import session, app from crc.api.common import ApiError from crc.models.data_store import DataStoreModel -from crc.models.file import FileType, FileDataModel, FileModel, FileModel +from crc.models.file import FileType, FileModel from crc.models.workflow import WorkflowModel from crc.services.cache_service import cache from crc.services.user_service import UserService @@ -136,23 +135,12 @@ class UserFileService(object): @staticmethod def get_workflow_data_files(workflow_id=None): - """Returns all the FileDataModels related to a running workflow - + """Returns all the FileModels related to a running workflow - So these are the latest data files that were uploaded or generated that go along with this workflow. Not related to the spec in any way""" file_models = UserFileService.get_files(workflow_id=workflow_id) return file_models - @staticmethod - def get_file_data(file_id: int, version: int = None): - """Returns the file data with the given version, or the lastest file, if version isn't provided.""" - query = session.query(FileDataModel) \ - .filter(FileDataModel.file_model_id == file_id) - if version: - query = query.filter(FileDataModel.version == version) - else: - query = query.order_by(desc(FileDataModel.date_created)) - return query.first() - @staticmethod def delete_file_data_stores(file_id): try: diff --git a/migrations/versions/3489d5a6a2c0_migrate_file_data_to_document_table.py b/migrations/versions/3489d5a6a2c0_migrate_file_data_to_document_table.py index 6b748163..fa7a94f3 100644 --- a/migrations/versions/3489d5a6a2c0_migrate_file_data_to_document_table.py +++ b/migrations/versions/3489d5a6a2c0_migrate_file_data_to_document_table.py @@ -9,7 +9,7 @@ from alembic import op import sqlalchemy as sa from crc.models.data_store import DataStoreModel -from crc.models.file import OldFileModel, FileModel, FileDataModel +from crc.models.file import FileModel # OldFileModel, , FileDataModel diff --git a/migrations/versions/7225d990740e_move_files_to_filesystem.py b/migrations/versions/7225d990740e_move_files_to_filesystem.py index d3641763..3628be32 100644 --- a/migrations/versions/7225d990740e_move_files_to_filesystem.py +++ b/migrations/versions/7225d990740e_move_files_to_filesystem.py @@ -11,7 +11,7 @@ import sqlalchemy as sa # import crc from crc import app, session -from crc.models.file import FileModel, FileModelSchema, FileDataModel, LookupFileModel, CONTENT_TYPES +from crc.models.file import FileModel, FileModelSchema, LookupFileModel, CONTENT_TYPES # , FileDataModel from crc.services.spec_file_service import SpecFileService from crc.services.reference_file_service import ReferenceFileService from crc.services.workflow_service import WorkflowService