Get more file details so we can fill out everything locally and also add a method to download the file by md5_hash
This commit is contained in:
parent
cad613cf63
commit
f26a8615a4
23
crc/api.yml
23
crc/api.yml
|
@ -578,6 +578,29 @@ paths:
|
||||||
responses:
|
responses:
|
||||||
'204':
|
'204':
|
||||||
description: The file has been removed.
|
description: The file has been removed.
|
||||||
|
/file/{md5_hash}/data:
|
||||||
|
parameters:
|
||||||
|
- name: md5_hash
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
description: The md5 hash of the file requested
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
get:
|
||||||
|
operationId: crc.api.file.get_file_data_by_hash
|
||||||
|
summary: Returns only the file contents
|
||||||
|
security: [] # Disable security for this endpoint only.
|
||||||
|
tags:
|
||||||
|
- Files
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: Returns the actual file
|
||||||
|
content:
|
||||||
|
application/octet-stream:
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
|
||||||
/file/{file_id}/data:
|
/file/{file_id}/data:
|
||||||
parameters:
|
parameters:
|
||||||
- name: file_id
|
- name: file_id
|
||||||
|
|
|
@ -6,7 +6,7 @@ from flask import send_file
|
||||||
|
|
||||||
from crc import session
|
from crc import session
|
||||||
from crc.api.common import ApiError
|
from crc.api.common import ApiError
|
||||||
from crc.models.file import FileSchema, FileModel, File, FileModelSchema
|
from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel
|
||||||
from crc.models.workflow import WorkflowSpecModel
|
from crc.models.workflow import WorkflowSpecModel
|
||||||
from crc.services.file_service import FileService
|
from crc.services.file_service import FileService
|
||||||
|
|
||||||
|
@ -99,6 +99,9 @@ def update_file_data(file_id):
|
||||||
file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
|
file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
|
||||||
return FileSchema().dump(to_file_api(file_model))
|
return FileSchema().dump(to_file_api(file_model))
|
||||||
|
|
||||||
|
def get_file_data_by_hash(md5_hash):
|
||||||
|
filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
|
||||||
|
return get_file_data(filedatamodel.file_model_id)
|
||||||
|
|
||||||
def get_file_data(file_id, version=None):
|
def get_file_data(file_id, version=None):
|
||||||
file_data = FileService.get_file_data(file_id, version)
|
file_data = FileService.get_file_data(file_id, version)
|
||||||
|
|
|
@ -345,11 +345,15 @@ def get_changed_files(remote,workflow_spec_id):
|
||||||
|
|
||||||
# this takes the different date_created_x and date-created_y columns and
|
# this takes the different date_created_x and date-created_y columns and
|
||||||
# combines them back into one date_created column
|
# combines them back into one date_created column
|
||||||
index = different['date_created_x'].isnull()
|
dualfields = ['date_created','type','primary','content_type','primary_process_id']
|
||||||
different.loc[index,'date_created_x'] = different[index]['date_created_y']
|
for merge in dualfields:
|
||||||
different = different[['date_created_x','filename','location']].copy()
|
index = different[merge+'_x'].isnull()
|
||||||
|
different.loc[index,merge+'_x'] = different[index][merge+'_y']
|
||||||
|
|
||||||
different.columns=['date_created','filename','location']
|
fieldlist = [fld+'_x' for fld in dualfields]
|
||||||
|
different = different[ fieldlist + ['md5_hash','filename','location']].copy()
|
||||||
|
|
||||||
|
different.columns=dualfields+['md5_hash','filename','location']
|
||||||
# our different list will have multiple entries for a workflow if there is a version on either side
|
# our different list will have multiple entries for a workflow if there is a version on either side
|
||||||
# we want to grab the most recent one, so we sort and grab the most recent one for each workflow
|
# we want to grab the most recent one, so we sort and grab the most recent one for each workflow
|
||||||
changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
|
changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
|
||||||
|
@ -363,7 +367,7 @@ def get_changed_files(remote,workflow_spec_id):
|
||||||
changedfiles['new'] = False
|
changedfiles['new'] = False
|
||||||
changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
|
changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
|
||||||
changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
|
changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
|
||||||
|
changedfiles = changedfiles.replace({pd.np.nan: None})
|
||||||
# return the list as a dict, let swagger convert it to json
|
# return the list as a dict, let swagger convert it to json
|
||||||
return changedfiles.reset_index().to_dict(orient='records')
|
return changedfiles.reset_index().to_dict(orient='records')
|
||||||
|
|
||||||
|
@ -404,9 +408,21 @@ def get_workflow_spec_files_dataframe(workflowid):
|
||||||
'workflow_spec_id': file.file_model.workflow_spec_id,
|
'workflow_spec_id': file.file_model.workflow_spec_id,
|
||||||
'md5_hash':file.md5_hash,
|
'md5_hash':file.md5_hash,
|
||||||
'filename':file.file_model.name,
|
'filename':file.file_model.name,
|
||||||
|
'type':file.file_model.type.name,
|
||||||
|
'primary':file.file_model.primary,
|
||||||
|
'content_type':file.file_model.content_type,
|
||||||
|
'primary_process_id':file.file_model.primary_process_id,
|
||||||
'date_created':file.date_created})
|
'date_created':file.date_created})
|
||||||
if len(filelist) == 0:
|
if len(filelist) == 0:
|
||||||
return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created'])
|
return pd.DataFrame(columns=['file_model_id',
|
||||||
|
'workflow_spec_id',
|
||||||
|
'md5_hash',
|
||||||
|
'filename',
|
||||||
|
'type',
|
||||||
|
'primary',
|
||||||
|
'content_type',
|
||||||
|
'primary_process_id',
|
||||||
|
'date_created'])
|
||||||
df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
|
df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
|
||||||
df['date_created'] = df['date_created'].astype('str')
|
df['date_created'] = df['date_created'].astype('str')
|
||||||
return df
|
return df
|
||||||
|
|
Loading…
Reference in New Issue