Get more file details so we can fill out everything locally and also add a method to download the file by md5_hash

2025-02-22 12:48:25 +00:00 · 2020-12-07 08:49:38 -05:00 · 2020-12-07 08:49:38 -05:00 · f26a8615a4
commit f26a8615a4
parent cad613cf63
3 changed files with 49 additions and 7 deletions
--- a/crc/api.yml
+++ b/crc/api.yml
@ -578,6 +578,29 @@ paths:
      responses:
        '204':
          description: The file has been removed.
  /file/{md5_hash}/data:
    parameters:
      - name: md5_hash
        in: path
        required: true
        description: The md5 hash of the file requested
        schema:
          type: string
    get:
      operationId: crc.api.file.get_file_data_by_hash
      summary: Returns only the file contents
      security: []  # Disable security for this endpoint only.
      tags:
        - Files
      responses:
        '200':
          description: Returns the actual file
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
                example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
  /file/{file_id}/data:
    parameters:
      - name: file_id
--- a/crc/api/file.py
+++ b/crc/api/file.py
@ -6,7 +6,7 @@ from flask import send_file
 from crc import session
 from crc.api.common import ApiError
-from crc.models.file import FileSchema, FileModel, File, FileModelSchema
+from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel
 from crc.models.workflow import WorkflowSpecModel
 from crc.services.file_service import FileService
@ -99,6 +99,9 @@ def update_file_data(file_id):
    file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
    return FileSchema().dump(to_file_api(file_model))
 def get_file_data_by_hash(md5_hash):
    filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
    return get_file_data(filedatamodel.file_model_id)
 def get_file_data(file_id, version=None):
    file_data = FileService.get_file_data(file_id, version)
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@ -345,11 +345,15 @@ def get_changed_files(remote,workflow_spec_id):
    # this takes the different date_created_x and date-created_y columns and
    # combines them back into one date_created column
-    index = different['date_created_x'].isnull()
+    dualfields = ['date_created','type','primary','content_type','primary_process_id']
-    different.loc[index,'date_created_x'] = different[index]['date_created_y']
+    for merge in dualfields:
-    different = different[['date_created_x','filename','location']].copy()
+        index = different[merge+'_x'].isnull()
        different.loc[index,merge+'_x'] = different[index][merge+'_y']
-    different.columns=['date_created','filename','location']
+    fieldlist = [fld+'_x' for fld in dualfields]
    different = different[ fieldlist + ['md5_hash','filename','location']].copy()
    different.columns=dualfields+['md5_hash','filename','location']
    # our different list will have multiple entries for a workflow if there is a version on either side
    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
    changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
@ -363,7 +367,7 @@ def get_changed_files(remote,workflow_spec_id):
    changedfiles['new'] = False
    changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
    changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
-
+    changedfiles = changedfiles.replace({pd.np.nan: None})
    # return the list as a dict, let swagger convert it to json
    return changedfiles.reset_index().to_dict(orient='records')
@ -404,9 +408,21 @@ def get_workflow_spec_files_dataframe(workflowid):
                         'workflow_spec_id': file.file_model.workflow_spec_id,
                         'md5_hash':file.md5_hash,
                         'filename':file.file_model.name,
                         'type':file.file_model.type.name,
                         'primary':file.file_model.primary,
                         'content_type':file.file_model.content_type,
                         'primary_process_id':file.file_model.primary_process_id,
                         'date_created':file.date_created})
    if len(filelist) == 0:
-        return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created'])
+        return pd.DataFrame(columns=['file_model_id',
                                     'workflow_spec_id',
                                     'md5_hash',
                                     'filename',
                                     'type',
                                     'primary',
                                     'content_type',
                                     'primary_process_id',
                                     'date_created'])
    df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
    df['date_created'] = df['date_created'].astype('str')
    return df