From f26a8615a431dbd4a31e448cd29c3b4048a8a4af Mon Sep 17 00:00:00 2001
From: Kelly McDonald <kelly@fammcdonald.net>
Date: Mon, 7 Dec 2020 08:49:38 -0500
Subject: [PATCH] Get more file details so we can fill out everything locally
 and also add a method to download the file by md5_hash

---
 crc/api.yml         | 23 +++++++++++++++++++++++
 crc/api/file.py     |  5 ++++-
 crc/api/workflow.py | 28 ++++++++++++++++++++++------
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index afeb2bc2..b35ad793 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -578,6 +578,29 @@ paths:
       responses:
         '204':
           description: The file has been removed.
+  /file/{md5_hash}/data:
+    parameters:
+      - name: md5_hash
+        in: path
+        required: true
+        description: The md5 hash of the file requested
+        schema:
+          type: string
+    get:
+      operationId: crc.api.file.get_file_data_by_hash
+      summary: Returns only the file contents
+      security: []  # Disable security for this endpoint only.
+      tags:
+        - Files
+      responses:
+        '200':
+          description: Returns the actual file
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
   /file/{file_id}/data:
     parameters:
       - name: file_id
diff --git a/crc/api/file.py b/crc/api/file.py
index 5cf54221..4f0b655f 100644
--- a/crc/api/file.py
+++ b/crc/api/file.py
@@ -6,7 +6,7 @@ from flask import send_file
 
 from crc import session
 from crc.api.common import ApiError
-from crc.models.file import FileSchema, FileModel, File, FileModelSchema
+from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel
 from crc.models.workflow import WorkflowSpecModel
 from crc.services.file_service import FileService
 
@@ -99,6 +99,9 @@ def update_file_data(file_id):
     file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
     return FileSchema().dump(to_file_api(file_model))
 
+def get_file_data_by_hash(md5_hash):
+    filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
+    return get_file_data(filedatamodel.file_model_id)
 
 def get_file_data(file_id, version=None):
     file_data = FileService.get_file_data(file_id, version)
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index 983097a6..f9c6ac58 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -345,11 +345,15 @@ def get_changed_files(remote,workflow_spec_id):
 
     # this takes the different date_created_x and date-created_y columns and
     # combines them back into one date_created column
-    index = different['date_created_x'].isnull()
-    different.loc[index,'date_created_x'] = different[index]['date_created_y']
-    different = different[['date_created_x','filename','location']].copy()
+    dualfields = ['date_created','type','primary','content_type','primary_process_id']
+    for merge in dualfields:
+        index = different[merge+'_x'].isnull()
+        different.loc[index,merge+'_x'] = different[index][merge+'_y']
 
-    different.columns=['date_created','filename','location']
+    fieldlist = [fld+'_x' for fld in dualfields]
+    different = different[ fieldlist + ['md5_hash','filename','location']].copy()
+
+    different.columns=dualfields+['md5_hash','filename','location']
     # our different list will have multiple entries for a workflow if there is a version on either side
     # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
     changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
@@ -363,7 +367,7 @@ def get_changed_files(remote,workflow_spec_id):
     changedfiles['new'] = False
     changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
     changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
-
+    changedfiles = changedfiles.replace({pd.np.nan: None})
     # return the list as a dict, let swagger convert it to json
     return changedfiles.reset_index().to_dict(orient='records')
 
@@ -404,9 +408,21 @@ def get_workflow_spec_files_dataframe(workflowid):
                          'workflow_spec_id': file.file_model.workflow_spec_id,
                          'md5_hash':file.md5_hash,
                          'filename':file.file_model.name,
+                         'type':file.file_model.type.name,
+                         'primary':file.file_model.primary,
+                         'content_type':file.file_model.content_type,
+                         'primary_process_id':file.file_model.primary_process_id,
                          'date_created':file.date_created})
     if len(filelist) == 0:
-        return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created'])
+        return pd.DataFrame(columns=['file_model_id',
+                                     'workflow_spec_id',
+                                     'md5_hash',
+                                     'filename',
+                                     'type',
+                                     'primary',
+                                     'content_type',
+                                     'primary_process_id',
+                                     'date_created'])
     df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
     df['date_created'] = df['date_created'].astype('str')
     return df