Get more file details so we can fill out everything locally and also add a method to download the file by md5_hash

This commit is contained in:
Kelly McDonald 2020-12-07 08:49:38 -05:00
parent cad613cf63
commit f26a8615a4
3 changed files with 49 additions and 7 deletions

View File

@ -578,6 +578,29 @@ paths:
responses:
'204':
description: The file has been removed.
/file/{md5_hash}/data:
parameters:
- name: md5_hash
in: path
required: true
description: The md5 hash of the file requested
schema:
type: string
get:
operationId: crc.api.file.get_file_data_by_hash
summary: Returns only the file contents
security: [] # Disable security for this endpoint only.
tags:
- Files
responses:
'200':
description: Returns the actual file
content:
application/octet-stream:
schema:
type: string
format: binary
example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
/file/{file_id}/data:
parameters:
- name: file_id

View File

@ -6,7 +6,7 @@ from flask import send_file
from crc import session
from crc.api.common import ApiError
from crc.models.file import FileSchema, FileModel, File, FileModelSchema
from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel
from crc.models.workflow import WorkflowSpecModel
from crc.services.file_service import FileService
@ -99,6 +99,9 @@ def update_file_data(file_id):
file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
return FileSchema().dump(to_file_api(file_model))
def get_file_data_by_hash(md5_hash):
filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
return get_file_data(filedatamodel.file_model_id)
def get_file_data(file_id, version=None):
file_data = FileService.get_file_data(file_id, version)

View File

@ -345,11 +345,15 @@ def get_changed_files(remote,workflow_spec_id):
# this takes the different date_created_x and date-created_y columns and
# combines them back into one date_created column
index = different['date_created_x'].isnull()
different.loc[index,'date_created_x'] = different[index]['date_created_y']
different = different[['date_created_x','filename','location']].copy()
dualfields = ['date_created','type','primary','content_type','primary_process_id']
for merge in dualfields:
index = different[merge+'_x'].isnull()
different.loc[index,merge+'_x'] = different[index][merge+'_y']
different.columns=['date_created','filename','location']
fieldlist = [fld+'_x' for fld in dualfields]
different = different[ fieldlist + ['md5_hash','filename','location']].copy()
different.columns=dualfields+['md5_hash','filename','location']
# our different list will have multiple entries for a workflow if there is a version on either side
# we want to grab the most recent one, so we sort and grab the most recent one for each workflow
changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
@ -363,7 +367,7 @@ def get_changed_files(remote,workflow_spec_id):
changedfiles['new'] = False
changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
changedfiles = changedfiles.replace({pd.np.nan: None})
# return the list as a dict, let swagger convert it to json
return changedfiles.reset_index().to_dict(orient='records')
@ -404,9 +408,21 @@ def get_workflow_spec_files_dataframe(workflowid):
'workflow_spec_id': file.file_model.workflow_spec_id,
'md5_hash':file.md5_hash,
'filename':file.file_model.name,
'type':file.file_model.type.name,
'primary':file.file_model.primary,
'content_type':file.file_model.content_type,
'primary_process_id':file.file_model.primary_process_id,
'date_created':file.date_created})
if len(filelist) == 0:
return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created'])
return pd.DataFrame(columns=['file_model_id',
'workflow_spec_id',
'md5_hash',
'filename',
'type',
'primary',
'content_type',
'primary_process_id',
'date_created'])
df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
df['date_created'] = df['date_created'].astype('str')
return df