From a719cf4bf99db1f0d616ec6f465334fda8fc5d9f Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 30 Apr 2021 11:55:12 -0400 Subject: [PATCH] When retrieving the study, only update the status of underlying workflows if specifically requested. Record the size of a file in the database for quick access (this helps with a frontend refactor, so it isn't downloading the file just to see it's size) Cleaning up the timing/performance metric reporting to make it easier to read. Fixing a bug that prevented non-admins for getting the document-directory --- crc/api.yml | 8 ++++++++ crc/api/study.py | 4 ++-- crc/models/file.py | 12 +++++++++--- crc/services/cache_service.py | 4 ++-- crc/services/file_service.py | 5 ++++- crc/services/study_service.py | 2 +- crc/services/workflow_processor.py | 2 +- migrations/versions/62910318009f_.py | 28 ++++++++++++++++++++++++++++ tests/files/test_file_service.py | 2 +- tests/files/test_files_api.py | 12 ++++++------ 10 files changed, 62 insertions(+), 17 deletions(-) create mode 100644 migrations/versions/62910318009f_.py diff --git a/crc/api.yml b/crc/api.yml index 980d6d13..f8f4ee0f 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -83,6 +83,8 @@ paths: type : integer get: operationId: crc.api.file.get_document_directory + security: + - auth_admin: ['secret'] summary: Returns a directory of all files for study in a nested structure tags: - Document Categories @@ -349,6 +351,12 @@ paths: schema: type: integer format: int32 + - name: update_status + in: query + required: false + description: If set to true, will synch the study with protocol builder and assure the status of all workflows is up to date (expensive). + schema: + type: boolean get: operationId: crc.api.study.get_study summary: Provides a single study diff --git a/crc/api/study.py b/crc/api/study.py index d91827b6..ddec1e02 100644 --- a/crc/api/study.py +++ b/crc/api/study.py @@ -74,8 +74,8 @@ def update_study(study_id, body): return StudySchema().dump(study) -def get_study(study_id): - study = StudyService.get_study(study_id) +def get_study(study_id, update_status=False): + study = StudyService.get_study(study_id, do_status=update_status) if (study is None): raise ApiError("unknown_study", 'The study "' + study_id + '" is not recognized.', status_code=404) return StudySchema().dump(study) diff --git a/crc/models/file.py b/crc/models/file.py index 8e73d12a..cdf29faf 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -1,7 +1,7 @@ import enum from typing import cast -from marshmallow import INCLUDE, EXCLUDE +from marshmallow import INCLUDE, EXCLUDE, fields, Schema from marshmallow_enum import EnumField from marshmallow_sqlalchemy import SQLAlchemyAutoSchema from sqlalchemy import func, Index @@ -65,11 +65,13 @@ class FileDataModel(db.Model): md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False) data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to. version = db.Column(db.Integer, default=0) + size = db.Column(db.Integer, default=0) date_created = db.Column(db.DateTime(timezone=True), default=func.now()) file_model_id = db.Column(db.Integer, db.ForeignKey('file.id')) file_model = db.relationship("FileModel", foreign_keys=[file_model_id]) + class FileModel(db.Model): __tablename__ = 'file' id = db.Column(db.Integer, primary_key=True) @@ -117,11 +119,13 @@ class File(object): if data_model: instance.last_modified = data_model.date_created instance.latest_version = data_model.version + instance.size = data_model.size else: instance.last_modified = None instance.latest_version = None return instance + class FileModelSchema(SQLAlchemyAutoSchema): class Meta: model = FileModel @@ -132,17 +136,19 @@ class FileModelSchema(SQLAlchemyAutoSchema): type = EnumField(FileType) -class FileSchema(ma.Schema): +class FileSchema(Schema): class Meta: model = File fields = ["id", "name", "is_status", "is_reference", "content_type", "primary", "primary_process_id", "workflow_spec_id", "workflow_id", "irb_doc_code", "last_modified", "latest_version", "type", "categories", - "description", "category", "description", "download_name"] + "description", "category", "description", "download_name", "size"] + unknown = INCLUDE type = EnumField(FileType) + class LookupFileModel(db.Model): """Gives us a quick way to tell what kind of lookup is set on a form field. Connected to the file data model, so that if a new version of the same file is diff --git a/crc/services/cache_service.py b/crc/services/cache_service.py index ea6b1a6b..eb662fda 100644 --- a/crc/services/cache_service.py +++ b/crc/services/cache_service.py @@ -10,7 +10,7 @@ def firsttime(): def sincetime(txt,lasttime): thistime=firsttime() - print('%s runtime was %2f'%(txt,thistime-lasttime)) + print('%2.4f sec | %s' % (thistime-lasttime, txt)) return thistime def timeit(f): @@ -20,7 +20,7 @@ def timeit(f): ts = time.time() result = f(*args, **kw) te = time.time() - print('func:%r args:[%r, %r] took: %2.4f sec' % (f.__name__, args, kw, te-ts)) + print('%2.4f sec | func:%r args:[%r, %r] ' % (te-ts, f.__name__, args, kw)) return result return timed diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 14e1b20e..2ecd0362 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -175,6 +175,8 @@ class FileService(object): order_by(desc(FileDataModel.date_created)).first() md5_checksum = UUID(hashlib.md5(binary_data).hexdigest()) + size = len(binary_data) + if (latest_data_model is not None) and (md5_checksum == latest_data_model.md5_hash): # This file does not need to be updated, it's the same file. If it is arhived, # then de-arvhive it. @@ -210,7 +212,8 @@ class FileService(object): new_file_data_model = FileDataModel( data=binary_data, file_model_id=file_model.id, file_model=file_model, - version=version, md5_hash=md5_checksum, date_created=datetime.now() + version=version, md5_hash=md5_checksum, date_created=datetime.now(), + size=size ) session.add_all([file_model, new_file_data_model]) session.commit() diff --git a/crc/services/study_service.py b/crc/services/study_service.py index 749a4e14..331d6a35 100644 --- a/crc/services/study_service.py +++ b/crc/services/study_service.py @@ -53,7 +53,7 @@ class StudyService(object): return studies @staticmethod - def get_study(study_id, study_model: StudyModel = None, do_status=True): + def get_study(study_id, study_model: StudyModel = None, do_status=False): """Returns a study model that contains all the workflows organized by category. IMPORTANT: This is intended to be a lightweight call, it should never involve loading up and executing all the workflows in a study to calculate information.""" diff --git a/crc/services/workflow_processor.py b/crc/services/workflow_processor.py index c2ede980..d9f3556c 100644 --- a/crc/services/workflow_processor.py +++ b/crc/services/workflow_processor.py @@ -42,7 +42,7 @@ class CustomBpmnScriptEngine(BpmnScriptEngine): """ return self.evaluate_expression(task, expression) - + @timeit def execute(self, task: SpiffTask, script, data): study_id = task.workflow.data[WorkflowProcessor.STUDY_ID_KEY] diff --git a/migrations/versions/62910318009f_.py b/migrations/versions/62910318009f_.py new file mode 100644 index 00000000..3915480f --- /dev/null +++ b/migrations/versions/62910318009f_.py @@ -0,0 +1,28 @@ +"""empty message + +Revision ID: 62910318009f +Revises: 665624ac29f1 +Create Date: 2021-04-28 14:09:57.648732 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '62910318009f' +down_revision = '665624ac29f1' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('file_data', sa.Column('size', sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('file_data', 'size') + # ### end Alembic commands ### diff --git a/tests/files/test_file_service.py b/tests/files/test_file_service.py index 339943d4..53a644a0 100644 --- a/tests/files/test_file_service.py +++ b/tests/files/test_file_service.py @@ -72,7 +72,7 @@ class TestFileService(BaseTest): file_data = FileService.get_workflow_data_files(workflow_id=workflow.id) self.assertEqual(1, len(file_data)) self.assertEqual(2, file_data[0].version) - + self.assertEquals(4, file_data[0].size) # File dat size is included. def test_add_file_from_form_increments_version_and_replaces_on_subsequent_add_with_same_name(self): self.load_example_data() diff --git a/tests/files/test_files_api.py b/tests/files/test_files_api.py index ac146878..cce55fb5 100644 --- a/tests/files/test_files_api.py +++ b/tests/files/test_files_api.py @@ -181,11 +181,11 @@ class TestFilesApi(BaseTest): data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn' rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) - json_data = json.loads(rv.get_data(as_text=True)) - file = FileModelSchema().load(json_data, session=session) + file_json = json.loads(rv.get_data(as_text=True)) + self.assertEquals(80, file_json['size']) data['file'] = io.BytesIO(self.minimal_bpmn("efghijk")), 'my_new_file.bpmn' - rv = self.app.put('/v1.0/file/%i/data' % file.id, data=data, follow_redirects=True, + rv = self.app.put('/v1.0/file/%i/data' % file_json['id'], data=data, follow_redirects=True, content_type='multipart/form-data', headers=self.logged_in_headers()) self.assert_success(rv) self.assertIsNotNone(rv.get_data()) @@ -193,14 +193,14 @@ class TestFilesApi(BaseTest): self.assertEqual(2, file_json['latest_version']) self.assertEqual(FileType.bpmn.value, file_json['type']) self.assertEqual("application/octet-stream", file_json['content_type']) - self.assertEqual(spec.id, file.workflow_spec_id) + self.assertEqual(spec.id, file_json['workflow_spec_id']) # Assure it is updated in the database and properly persisted. - file_model = session.query(FileModel).filter(FileModel.id == file.id).first() + file_model = session.query(FileModel).filter(FileModel.id == file_json['id']).first() file_data = FileService.get_file_data(file_model.id) self.assertEqual(2, file_data.version) - rv = self.app.get('/v1.0/file/%i/data' % file.id, headers=self.logged_in_headers()) + rv = self.app.get('/v1.0/file/%i/data' % file_json['id'], headers=self.logged_in_headers()) self.assert_success(rv) data = rv.get_data() self.assertIsNotNone(data)