From a719cf4bf99db1f0d616ec6f465334fda8fc5d9f Mon Sep 17 00:00:00 2001
From: Dan <daniel.h.funk@gmail.com>
Date: Fri, 30 Apr 2021 11:55:12 -0400
Subject: [PATCH] When retrieving the study, only update the status of
 underlying workflows if specifically requested. Record the size of a file in
 the database for quick access (this helps with a frontend refactor, so it
 isn't downloading the file just to see it's size) Cleaning up the
 timing/performance metric reporting to make it easier to read. Fixing a bug
 that prevented non-admins for getting the document-directory

---
 crc/api.yml                          |  8 ++++++++
 crc/api/study.py                     |  4 ++--
 crc/models/file.py                   | 12 +++++++++---
 crc/services/cache_service.py        |  4 ++--
 crc/services/file_service.py         |  5 ++++-
 crc/services/study_service.py        |  2 +-
 crc/services/workflow_processor.py   |  2 +-
 migrations/versions/62910318009f_.py | 28 ++++++++++++++++++++++++++++
 tests/files/test_file_service.py     |  2 +-
 tests/files/test_files_api.py        | 12 ++++++------
 10 files changed, 62 insertions(+), 17 deletions(-)
 create mode 100644 migrations/versions/62910318009f_.py

diff --git a/crc/api.yml b/crc/api.yml
index 980d6d13..f8f4ee0f 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -83,6 +83,8 @@ paths:
           type : integer
     get:
       operationId: crc.api.file.get_document_directory
+      security:
+        - auth_admin: ['secret']
       summary: Returns a directory of all files for study in a nested structure
       tags:
         - Document Categories
@@ -349,6 +351,12 @@ paths:
         schema:
           type: integer
           format: int32
+      - name: update_status
+        in: query
+        required: false
+        description: If set to true, will synch the study with protocol builder and assure the status of all workflows is up to date (expensive).
+        schema:
+          type: boolean
     get:
       operationId: crc.api.study.get_study
       summary: Provides a single study
diff --git a/crc/api/study.py b/crc/api/study.py
index d91827b6..ddec1e02 100644
--- a/crc/api/study.py
+++ b/crc/api/study.py
@@ -74,8 +74,8 @@ def update_study(study_id, body):
     return StudySchema().dump(study)
 
 
-def get_study(study_id):
-    study = StudyService.get_study(study_id)
+def get_study(study_id, update_status=False):
+    study = StudyService.get_study(study_id, do_status=update_status)
     if (study is None):
         raise ApiError("unknown_study",  'The study "' + study_id + '" is not recognized.', status_code=404)
     return StudySchema().dump(study)
diff --git a/crc/models/file.py b/crc/models/file.py
index 8e73d12a..cdf29faf 100644
--- a/crc/models/file.py
+++ b/crc/models/file.py
@@ -1,7 +1,7 @@
 import enum
 from typing import cast
 
-from marshmallow import INCLUDE, EXCLUDE
+from marshmallow import INCLUDE, EXCLUDE, fields, Schema
 from marshmallow_enum import EnumField
 from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
 from sqlalchemy import func, Index
@@ -65,11 +65,13 @@ class FileDataModel(db.Model):
     md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
     data = deferred(db.Column(db.LargeBinary))  # Don't load it unless you have to.
     version = db.Column(db.Integer, default=0)
+    size = db.Column(db.Integer, default=0)
     date_created = db.Column(db.DateTime(timezone=True), default=func.now())
     file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
     file_model = db.relationship("FileModel", foreign_keys=[file_model_id])
 
 
+
 class FileModel(db.Model):
     __tablename__ = 'file'
     id = db.Column(db.Integer, primary_key=True)
@@ -117,11 +119,13 @@ class File(object):
         if data_model:
             instance.last_modified = data_model.date_created
             instance.latest_version = data_model.version
+            instance.size = data_model.size
         else:
             instance.last_modified = None
             instance.latest_version = None
         return instance
 
+
 class FileModelSchema(SQLAlchemyAutoSchema):
     class Meta:
         model = FileModel
@@ -132,17 +136,19 @@ class FileModelSchema(SQLAlchemyAutoSchema):
     type = EnumField(FileType)
 
 
-class FileSchema(ma.Schema):
+class FileSchema(Schema):
     class Meta:
         model = File
         fields = ["id", "name", "is_status", "is_reference", "content_type",
                   "primary", "primary_process_id", "workflow_spec_id", "workflow_id",
                   "irb_doc_code", "last_modified", "latest_version", "type", "categories",
-                  "description", "category", "description", "download_name"]
+                  "description", "category", "description", "download_name", "size"]
+
         unknown = INCLUDE
     type = EnumField(FileType)
 
 
+
 class LookupFileModel(db.Model):
     """Gives us a quick way to tell what kind of lookup is set on a form field.
     Connected to the file data model, so that if a new version of the same file is
diff --git a/crc/services/cache_service.py b/crc/services/cache_service.py
index ea6b1a6b..eb662fda 100644
--- a/crc/services/cache_service.py
+++ b/crc/services/cache_service.py
@@ -10,7 +10,7 @@ def firsttime():
 
 def sincetime(txt,lasttime):
     thistime=firsttime()
-    print('%s runtime was %2f'%(txt,thistime-lasttime))
+    print('%2.4f sec | %s' % (thistime-lasttime, txt))
     return thistime
 
 def timeit(f):
@@ -20,7 +20,7 @@ def timeit(f):
         ts = time.time()
         result = f(*args, **kw)
         te = time.time()
-        print('func:%r args:[%r, %r] took: %2.4f sec' % (f.__name__, args, kw, te-ts))
+        print('%2.4f sec | func:%r args:[%r, %r] ' % (te-ts, f.__name__, args, kw))
         return result
 
     return timed
diff --git a/crc/services/file_service.py b/crc/services/file_service.py
index 14e1b20e..2ecd0362 100644
--- a/crc/services/file_service.py
+++ b/crc/services/file_service.py
@@ -175,6 +175,8 @@ class FileService(object):
             order_by(desc(FileDataModel.date_created)).first()
 
         md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
+        size = len(binary_data)
+
         if (latest_data_model is not None) and (md5_checksum == latest_data_model.md5_hash):
             # This file does not need to be updated, it's the same file.  If it is arhived,
             # then de-arvhive it.
@@ -210,7 +212,8 @@ class FileService(object):
 
         new_file_data_model = FileDataModel(
             data=binary_data, file_model_id=file_model.id, file_model=file_model,
-            version=version, md5_hash=md5_checksum, date_created=datetime.now()
+            version=version, md5_hash=md5_checksum, date_created=datetime.now(),
+            size=size
         )
         session.add_all([file_model, new_file_data_model])
         session.commit()
diff --git a/crc/services/study_service.py b/crc/services/study_service.py
index 749a4e14..331d6a35 100644
--- a/crc/services/study_service.py
+++ b/crc/services/study_service.py
@@ -53,7 +53,7 @@ class StudyService(object):
         return studies
 
     @staticmethod
-    def get_study(study_id, study_model: StudyModel = None, do_status=True):
+    def get_study(study_id, study_model: StudyModel = None, do_status=False):
         """Returns a study model that contains all the workflows organized by category.
         IMPORTANT:  This is intended to be a lightweight call, it should never involve
         loading up and executing all the workflows in a study to calculate information."""
diff --git a/crc/services/workflow_processor.py b/crc/services/workflow_processor.py
index c2ede980..d9f3556c 100644
--- a/crc/services/workflow_processor.py
+++ b/crc/services/workflow_processor.py
@@ -42,7 +42,7 @@ class CustomBpmnScriptEngine(BpmnScriptEngine):
         """
         return self.evaluate_expression(task, expression)
 
-
+    @timeit
     def execute(self, task: SpiffTask, script, data):
 
         study_id = task.workflow.data[WorkflowProcessor.STUDY_ID_KEY]
diff --git a/migrations/versions/62910318009f_.py b/migrations/versions/62910318009f_.py
new file mode 100644
index 00000000..3915480f
--- /dev/null
+++ b/migrations/versions/62910318009f_.py
@@ -0,0 +1,28 @@
+"""empty message
+
+Revision ID: 62910318009f
+Revises: 665624ac29f1
+Create Date: 2021-04-28 14:09:57.648732
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '62910318009f'
+down_revision = '665624ac29f1'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('file_data', sa.Column('size', sa.Integer(), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('file_data', 'size')
+    # ### end Alembic commands ###
diff --git a/tests/files/test_file_service.py b/tests/files/test_file_service.py
index 339943d4..53a644a0 100644
--- a/tests/files/test_file_service.py
+++ b/tests/files/test_file_service.py
@@ -72,7 +72,7 @@ class TestFileService(BaseTest):
         file_data = FileService.get_workflow_data_files(workflow_id=workflow.id)
         self.assertEqual(1, len(file_data))
         self.assertEqual(2, file_data[0].version)
-
+        self.assertEquals(4, file_data[0].size) # File dat size is included.
 
     def test_add_file_from_form_increments_version_and_replaces_on_subsequent_add_with_same_name(self):
         self.load_example_data()
diff --git a/tests/files/test_files_api.py b/tests/files/test_files_api.py
index ac146878..cce55fb5 100644
--- a/tests/files/test_files_api.py
+++ b/tests/files/test_files_api.py
@@ -181,11 +181,11 @@ class TestFilesApi(BaseTest):
         data['file'] = io.BytesIO(self.minimal_bpmn("abcdef")), 'my_new_file.bpmn'
         rv = self.app.post('/v1.0/file?workflow_spec_id=%s' % spec.id, data=data, follow_redirects=True,
                            content_type='multipart/form-data', headers=self.logged_in_headers())
-        json_data = json.loads(rv.get_data(as_text=True))
-        file = FileModelSchema().load(json_data, session=session)
+        file_json = json.loads(rv.get_data(as_text=True))
+        self.assertEquals(80, file_json['size'])
 
         data['file'] = io.BytesIO(self.minimal_bpmn("efghijk")), 'my_new_file.bpmn'
-        rv = self.app.put('/v1.0/file/%i/data' % file.id, data=data, follow_redirects=True,
+        rv = self.app.put('/v1.0/file/%i/data' % file_json['id'], data=data, follow_redirects=True,
                           content_type='multipart/form-data', headers=self.logged_in_headers())
         self.assert_success(rv)
         self.assertIsNotNone(rv.get_data())
@@ -193,14 +193,14 @@ class TestFilesApi(BaseTest):
         self.assertEqual(2, file_json['latest_version'])
         self.assertEqual(FileType.bpmn.value, file_json['type'])
         self.assertEqual("application/octet-stream", file_json['content_type'])
-        self.assertEqual(spec.id, file.workflow_spec_id)
+        self.assertEqual(spec.id, file_json['workflow_spec_id'])
 
         # Assure it is updated in the database and properly persisted.
-        file_model = session.query(FileModel).filter(FileModel.id == file.id).first()
+        file_model = session.query(FileModel).filter(FileModel.id == file_json['id']).first()
         file_data = FileService.get_file_data(file_model.id)
         self.assertEqual(2, file_data.version)
 
-        rv = self.app.get('/v1.0/file/%i/data' % file.id, headers=self.logged_in_headers())
+        rv = self.app.get('/v1.0/file/%i/data' %  file_json['id'], headers=self.logged_in_headers())
         self.assert_success(rv)
         data = rv.get_data()
         self.assertIsNotNone(data)