Get more file details so we can fill out everything locally and also add a method to download the file by md5_hash

This commit is contained in:
Kelly McDonald 2020-12-07 08:49:38 -05:00
parent cad613cf63
commit f26a8615a4
3 changed files with 49 additions and 7 deletions

View File

@ -578,6 +578,29 @@ paths:
responses: responses:
'204': '204':
description: The file has been removed. description: The file has been removed.
/file/{md5_hash}/data:
parameters:
- name: md5_hash
in: path
required: true
description: The md5 hash of the file requested
schema:
type: string
get:
operationId: crc.api.file.get_file_data_by_hash
summary: Returns only the file contents
security: [] # Disable security for this endpoint only.
tags:
- Files
responses:
'200':
description: Returns the actual file
content:
application/octet-stream:
schema:
type: string
format: binary
example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
/file/{file_id}/data: /file/{file_id}/data:
parameters: parameters:
- name: file_id - name: file_id

View File

@ -6,7 +6,7 @@ from flask import send_file
from crc import session from crc import session
from crc.api.common import ApiError from crc.api.common import ApiError
from crc.models.file import FileSchema, FileModel, File, FileModelSchema from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel
from crc.models.workflow import WorkflowSpecModel from crc.models.workflow import WorkflowSpecModel
from crc.services.file_service import FileService from crc.services.file_service import FileService
@ -99,6 +99,9 @@ def update_file_data(file_id):
file_model = FileService.update_file(file_model, file.stream.read(), file.content_type) file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
return FileSchema().dump(to_file_api(file_model)) return FileSchema().dump(to_file_api(file_model))
def get_file_data_by_hash(md5_hash):
filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
return get_file_data(filedatamodel.file_model_id)
def get_file_data(file_id, version=None): def get_file_data(file_id, version=None):
file_data = FileService.get_file_data(file_id, version) file_data = FileService.get_file_data(file_id, version)

View File

@ -345,11 +345,15 @@ def get_changed_files(remote,workflow_spec_id):
# this takes the different date_created_x and date-created_y columns and # this takes the different date_created_x and date-created_y columns and
# combines them back into one date_created column # combines them back into one date_created column
index = different['date_created_x'].isnull() dualfields = ['date_created','type','primary','content_type','primary_process_id']
different.loc[index,'date_created_x'] = different[index]['date_created_y'] for merge in dualfields:
different = different[['date_created_x','filename','location']].copy() index = different[merge+'_x'].isnull()
different.loc[index,merge+'_x'] = different[index][merge+'_y']
different.columns=['date_created','filename','location'] fieldlist = [fld+'_x' for fld in dualfields]
different = different[ fieldlist + ['md5_hash','filename','location']].copy()
different.columns=dualfields+['md5_hash','filename','location']
# our different list will have multiple entries for a workflow if there is a version on either side # our different list will have multiple entries for a workflow if there is a version on either side
# we want to grab the most recent one, so we sort and grab the most recent one for each workflow # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first() changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
@ -363,7 +367,7 @@ def get_changed_files(remote,workflow_spec_id):
changedfiles['new'] = False changedfiles['new'] = False
changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
changedfiles = changedfiles.replace({pd.np.nan: None})
# return the list as a dict, let swagger convert it to json # return the list as a dict, let swagger convert it to json
return changedfiles.reset_index().to_dict(orient='records') return changedfiles.reset_index().to_dict(orient='records')
@ -404,9 +408,21 @@ def get_workflow_spec_files_dataframe(workflowid):
'workflow_spec_id': file.file_model.workflow_spec_id, 'workflow_spec_id': file.file_model.workflow_spec_id,
'md5_hash':file.md5_hash, 'md5_hash':file.md5_hash,
'filename':file.file_model.name, 'filename':file.file_model.name,
'type':file.file_model.type.name,
'primary':file.file_model.primary,
'content_type':file.file_model.content_type,
'primary_process_id':file.file_model.primary_process_id,
'date_created':file.date_created}) 'date_created':file.date_created})
if len(filelist) == 0: if len(filelist) == 0:
return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created']) return pd.DataFrame(columns=['file_model_id',
'workflow_spec_id',
'md5_hash',
'filename',
'type',
'primary',
'content_type',
'primary_process_id',
'date_created'])
df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last() df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
df['date_created'] = df['date_created'].astype('str') df['date_created'] = df['date_created'].astype('str')
return df return df