diff --git a/crc/api.yml b/crc/api.yml index afeb2bc2..b35ad793 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -578,6 +578,29 @@ paths: responses: '204': description: The file has been removed. + /file/{md5_hash}/data: + parameters: + - name: md5_hash + in: path + required: true + description: The md5 hash of the file requested + schema: + type: string + get: + operationId: crc.api.file.get_file_data_by_hash + summary: Returns only the file contents + security: [] # Disable security for this endpoint only. + tags: + - Files + responses: + '200': + description: Returns the actual file + content: + application/octet-stream: + schema: + type: string + format: binary + example: '' /file/{file_id}/data: parameters: - name: file_id diff --git a/crc/api/file.py b/crc/api/file.py index 5cf54221..4f0b655f 100644 --- a/crc/api/file.py +++ b/crc/api/file.py @@ -6,7 +6,7 @@ from flask import send_file from crc import session from crc.api.common import ApiError -from crc.models.file import FileSchema, FileModel, File, FileModelSchema +from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel from crc.models.workflow import WorkflowSpecModel from crc.services.file_service import FileService @@ -99,6 +99,9 @@ def update_file_data(file_id): file_model = FileService.update_file(file_model, file.stream.read(), file.content_type) return FileSchema().dump(to_file_api(file_model)) +def get_file_data_by_hash(md5_hash): + filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first() + return get_file_data(filedatamodel.file_model_id) def get_file_data(file_id, version=None): file_data = FileService.get_file_data(file_id, version) diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 983097a6..f9c6ac58 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -345,11 +345,15 @@ def get_changed_files(remote,workflow_spec_id): # this takes the different date_created_x and date-created_y columns and # combines them back into one date_created column - index = different['date_created_x'].isnull() - different.loc[index,'date_created_x'] = different[index]['date_created_y'] - different = different[['date_created_x','filename','location']].copy() + dualfields = ['date_created','type','primary','content_type','primary_process_id'] + for merge in dualfields: + index = different[merge+'_x'].isnull() + different.loc[index,merge+'_x'] = different[index][merge+'_y'] - different.columns=['date_created','filename','location'] + fieldlist = [fld+'_x' for fld in dualfields] + different = different[ fieldlist + ['md5_hash','filename','location']].copy() + + different.columns=dualfields+['md5_hash','filename','location'] # our different list will have multiple entries for a workflow if there is a version on either side # we want to grab the most recent one, so we sort and grab the most recent one for each workflow changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first() @@ -363,7 +367,7 @@ def get_changed_files(remote,workflow_spec_id): changedfiles['new'] = False changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True - + changedfiles = changedfiles.replace({pd.np.nan: None}) # return the list as a dict, let swagger convert it to json return changedfiles.reset_index().to_dict(orient='records') @@ -404,9 +408,21 @@ def get_workflow_spec_files_dataframe(workflowid): 'workflow_spec_id': file.file_model.workflow_spec_id, 'md5_hash':file.md5_hash, 'filename':file.file_model.name, + 'type':file.file_model.type.name, + 'primary':file.file_model.primary, + 'content_type':file.file_model.content_type, + 'primary_process_id':file.file_model.primary_process_id, 'date_created':file.date_created}) if len(filelist) == 0: - return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created']) + return pd.DataFrame(columns=['file_model_id', + 'workflow_spec_id', + 'md5_hash', + 'filename', + 'type', + 'primary', + 'content_type', + 'primary_process_id', + 'date_created']) df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last() df['date_created'] = df['date_created'].astype('str') return df