2019-12-27 18:50:03 +00:00
|
|
|
import io
|
2020-05-29 00:03:50 +00:00
|
|
|
from typing import List
|
2019-12-27 18:50:03 +00:00
|
|
|
|
|
|
|
import connexion
|
|
|
|
from flask import send_file
|
|
|
|
|
2020-01-14 16:45:12 +00:00
|
|
|
from crc import session
|
2020-04-15 15:13:32 +00:00
|
|
|
from crc.api.common import ApiError
|
2021-05-05 15:30:08 +00:00
|
|
|
from crc.api.user import verify_token
|
2021-04-20 12:12:27 +00:00
|
|
|
from crc.models.api_models import DocumentDirectory, DocumentDirectorySchema
|
2021-04-15 16:39:12 +00:00
|
|
|
from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel, FileType
|
2020-03-04 18:40:25 +00:00
|
|
|
from crc.models.workflow import WorkflowSpecModel
|
2020-02-28 16:54:11 +00:00
|
|
|
from crc.services.file_service import FileService
|
2019-12-27 18:50:03 +00:00
|
|
|
|
2021-04-21 18:00:19 +00:00
|
|
|
|
|
|
|
def ensure_exists(output, categories, expanded):
|
2021-04-20 12:12:27 +00:00
|
|
|
"""
|
|
|
|
This is a recursive function, it expects a list of
|
|
|
|
levels with a file object at the end (kinda like duck,duck,duck,goose)
|
|
|
|
|
|
|
|
for each level, it makes sure that level is already in the structure and if it is not
|
|
|
|
it will add it
|
|
|
|
|
|
|
|
function terminates upon getting an entry that is a file object ( or really anything but string)
|
|
|
|
"""
|
2021-04-21 18:00:19 +00:00
|
|
|
current_item = categories[0]
|
2021-04-20 12:12:27 +00:00
|
|
|
found = False
|
2021-04-21 18:00:19 +00:00
|
|
|
if isinstance(current_item, str):
|
2021-04-20 12:12:27 +00:00
|
|
|
for item in output:
|
2021-04-21 18:00:19 +00:00
|
|
|
if item.level == current_item:
|
2021-04-20 12:12:27 +00:00
|
|
|
found = True
|
|
|
|
item.filecount = item.filecount + 1
|
2021-04-20 15:11:11 +00:00
|
|
|
item.expanded = expanded | item.expanded
|
2021-04-21 18:00:19 +00:00
|
|
|
ensure_exists(item.children, categories[1:], expanded)
|
2021-04-20 12:12:27 +00:00
|
|
|
if not found:
|
2021-04-21 18:00:19 +00:00
|
|
|
new_level = DocumentDirectory(level=current_item)
|
|
|
|
new_level.filecount = 1
|
|
|
|
new_level.expanded = expanded
|
|
|
|
output.append(new_level)
|
|
|
|
ensure_exists(new_level.children, categories[1:], expanded)
|
2021-04-20 12:12:27 +00:00
|
|
|
else:
|
2021-04-21 18:00:19 +00:00
|
|
|
new_level = DocumentDirectory(file=current_item)
|
|
|
|
new_level.expanded = expanded
|
|
|
|
output.append(new_level)
|
2021-04-20 12:12:27 +00:00
|
|
|
|
|
|
|
|
2021-04-21 18:00:19 +00:00
|
|
|
def get_document_directory(study_id, workflow_id=None):
|
2021-04-20 12:12:27 +00:00
|
|
|
"""
|
|
|
|
return a nested list of files arranged according to the category hirearchy
|
|
|
|
defined in the doc dictionary
|
|
|
|
"""
|
|
|
|
output = []
|
|
|
|
doc_dict = FileService.get_doc_dictionary()
|
|
|
|
file_models = FileService.get_files_for_study(study_id=study_id)
|
|
|
|
files = (to_file_api(model) for model in file_models)
|
|
|
|
for file in files:
|
2021-05-14 18:10:14 +00:00
|
|
|
if file.irb_doc_code in doc_dict:
|
|
|
|
doc_code = doc_dict[file.irb_doc_code]
|
|
|
|
else:
|
|
|
|
doc_code = {'category1': "Unknown", 'category2': None, 'category3': None}
|
2021-04-20 12:12:27 +00:00
|
|
|
if workflow_id:
|
|
|
|
expand = file.workflow_id == int(workflow_id)
|
|
|
|
else:
|
|
|
|
expand = False
|
|
|
|
print(expand)
|
|
|
|
categories = [x for x in [doc_code['category1'],doc_code['category2'],doc_code['category3'],file] if x != '']
|
2021-04-21 18:00:19 +00:00
|
|
|
ensure_exists(output, categories, expanded=expand)
|
2021-04-20 12:12:27 +00:00
|
|
|
return DocumentDirectorySchema(many=True).dump(output)
|
|
|
|
|
2019-12-27 18:50:03 +00:00
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
def to_file_api(file_model):
|
2020-06-01 01:15:40 +00:00
|
|
|
"""Converts a FileModel object to something we can return via the api"""
|
|
|
|
return File.from_models(file_model, FileService.get_file_data(file_model.id),
|
|
|
|
FileService.get_doc_dictionary())
|
2020-05-29 00:03:50 +00:00
|
|
|
|
|
|
|
|
2021-04-12 16:23:33 +00:00
|
|
|
def get_files(workflow_spec_id=None, workflow_id=None, form_field_key=None,study_id=None):
|
|
|
|
if all(v is None for v in [workflow_spec_id, workflow_id, form_field_key,study_id]):
|
2020-03-23 16:22:26 +00:00
|
|
|
raise ApiError('missing_parameter',
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
'Please specify either a workflow_spec_id or a '
|
|
|
|
'workflow_id with an optional form_field_key')
|
2019-12-27 18:50:03 +00:00
|
|
|
|
2021-04-12 16:23:33 +00:00
|
|
|
if study_id is not None:
|
2021-04-23 20:14:23 +00:00
|
|
|
file_models = FileService.get_files_for_study(study_id=study_id, irb_doc_code=form_field_key)
|
2021-04-12 16:23:33 +00:00
|
|
|
else:
|
|
|
|
file_models = FileService.get_files(workflow_spec_id=workflow_spec_id,
|
2020-05-29 00:03:50 +00:00
|
|
|
workflow_id=workflow_id,
|
|
|
|
irb_doc_code=form_field_key)
|
|
|
|
|
|
|
|
files = (to_file_api(model) for model in file_models)
|
|
|
|
return FileSchema(many=True).dump(files)
|
2020-02-04 02:56:18 +00:00
|
|
|
|
2020-03-23 16:22:26 +00:00
|
|
|
|
2020-03-13 19:03:57 +00:00
|
|
|
def get_reference_files():
|
|
|
|
results = FileService.get_files(is_reference=True)
|
2020-05-29 00:03:50 +00:00
|
|
|
files = (to_file_api(model) for model in results)
|
|
|
|
return FileSchema(many=True).dump(files)
|
2020-03-13 19:03:57 +00:00
|
|
|
|
2019-12-27 18:50:03 +00:00
|
|
|
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
def add_file(workflow_spec_id=None, workflow_id=None, form_field_key=None):
|
2020-02-10 21:19:23 +00:00
|
|
|
file = connexion.request.files['file']
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
if workflow_id:
|
|
|
|
if form_field_key is None:
|
|
|
|
raise ApiError('invalid_workflow_file',
|
|
|
|
'When adding a workflow related file, you must specify a form_field_key')
|
|
|
|
file_model = FileService.add_workflow_file(workflow_id=workflow_id, irb_doc_code=form_field_key,
|
|
|
|
name=file.filename, content_type=file.content_type,
|
|
|
|
binary_data=file.stream.read())
|
|
|
|
elif workflow_spec_id:
|
2021-04-15 16:39:12 +00:00
|
|
|
# check if we have a primary already
|
|
|
|
have_primary = FileModel.query.filter(FileModel.workflow_spec_id==workflow_spec_id, FileModel.type==FileType.bpmn, FileModel.primary==True).all()
|
|
|
|
# set this to primary if we don't already have one
|
|
|
|
if not have_primary:
|
|
|
|
primary = True
|
|
|
|
else:
|
|
|
|
primary = False
|
2020-03-04 18:40:25 +00:00
|
|
|
workflow_spec = session.query(WorkflowSpecModel).filter_by(id=workflow_spec_id).first()
|
2020-03-23 16:22:26 +00:00
|
|
|
file_model = FileService.add_workflow_spec_file(workflow_spec, file.filename, file.content_type,
|
2021-04-15 16:39:12 +00:00
|
|
|
file.stream.read(), primary=primary)
|
2020-02-10 21:19:23 +00:00
|
|
|
else:
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
raise ApiError("invalid_file", "You must supply either a workflow spec id or a workflow_id and form_field_key.")
|
2020-02-10 21:19:23 +00:00
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
return FileSchema().dump(to_file_api(file_model))
|
2019-12-27 18:50:03 +00:00
|
|
|
|
|
|
|
|
2020-03-13 19:03:57 +00:00
|
|
|
def get_reference_file(name):
|
|
|
|
file_data = FileService.get_reference_file_data(name)
|
|
|
|
return send_file(
|
|
|
|
io.BytesIO(file_data.data),
|
|
|
|
attachment_filename=file_data.file_model.name,
|
|
|
|
mimetype=file_data.file_model.content_type,
|
|
|
|
cache_timeout=-1 # Don't cache these files on the browser.
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def set_reference_file(name):
|
|
|
|
"""Uses the file service to manage reference-files. They will be used in script tasks to compute values."""
|
|
|
|
if 'file' not in connexion.request.files:
|
|
|
|
raise ApiError('invalid_file',
|
|
|
|
'Expected a file named "file" in the multipart form request', status_code=400)
|
|
|
|
|
|
|
|
file = connexion.request.files['file']
|
|
|
|
|
|
|
|
name_extension = FileService.get_extension(name)
|
|
|
|
file_extension = FileService.get_extension(file.filename)
|
|
|
|
if name_extension != file_extension:
|
|
|
|
raise ApiError('invalid_file_type',
|
|
|
|
"The file you uploaded has an extension '%s', but it should have an extension of '%s' " %
|
|
|
|
(file_extension, name_extension))
|
|
|
|
|
|
|
|
file_models = FileService.get_files(name=name, is_reference=True)
|
|
|
|
if len(file_models) == 0:
|
|
|
|
file_model = FileService.add_reference_file(name, file.content_type, file.stream.read())
|
|
|
|
else:
|
|
|
|
file_model = file_models[0]
|
|
|
|
FileService.update_file(file_models[0], file.stream.read(), file.content_type)
|
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
return FileSchema().dump(to_file_api(file_model))
|
2020-03-13 19:03:57 +00:00
|
|
|
|
|
|
|
|
2020-01-31 15:39:19 +00:00
|
|
|
def update_file_data(file_id):
|
2020-01-14 16:45:12 +00:00
|
|
|
file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first()
|
2020-02-10 21:19:23 +00:00
|
|
|
file = connexion.request.files['file']
|
2019-12-27 18:50:03 +00:00
|
|
|
if file_model is None:
|
2020-03-23 16:22:26 +00:00
|
|
|
raise ApiError('no_such_file', 'The file id you provided does not exist')
|
2020-02-10 21:19:23 +00:00
|
|
|
file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
|
2020-05-29 00:03:50 +00:00
|
|
|
return FileSchema().dump(to_file_api(file_model))
|
2019-12-27 18:50:03 +00:00
|
|
|
|
2020-12-07 13:49:38 +00:00
|
|
|
def get_file_data_by_hash(md5_hash):
|
|
|
|
filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
|
2020-12-07 13:50:20 +00:00
|
|
|
return get_file_data(filedatamodel.file_model_id,version=filedatamodel.version)
|
2019-12-27 18:50:03 +00:00
|
|
|
|
2020-05-23 19:21:30 +00:00
|
|
|
def get_file_data(file_id, version=None):
|
|
|
|
file_data = FileService.get_file_data(file_id, version)
|
2019-12-27 18:50:03 +00:00
|
|
|
if file_data is None:
|
2020-03-23 16:22:26 +00:00
|
|
|
raise ApiError('no_such_file', 'The file id you provided does not exist')
|
2019-12-27 18:50:03 +00:00
|
|
|
return send_file(
|
2020-01-14 16:45:12 +00:00
|
|
|
io.BytesIO(file_data.data),
|
|
|
|
attachment_filename=file_data.file_model.name,
|
|
|
|
mimetype=file_data.file_model.content_type,
|
2020-04-08 16:58:55 +00:00
|
|
|
cache_timeout=-1, # Don't cache these files on the browser.
|
2020-05-29 00:03:50 +00:00
|
|
|
last_modified=file_data.date_created
|
2020-01-14 16:45:12 +00:00
|
|
|
)
|
2019-12-27 18:50:03 +00:00
|
|
|
|
|
|
|
|
2021-05-05 15:30:08 +00:00
|
|
|
def get_file_data_link(file_id, auth_token, version=None):
|
|
|
|
if not verify_token(auth_token):
|
|
|
|
raise ApiError('not_authenticated', 'You need to include an authorization token in the URL with this')
|
|
|
|
file_data = FileService.get_file_data(file_id, version)
|
|
|
|
if file_data is None:
|
|
|
|
raise ApiError('no_such_file', 'The file id you provided does not exist')
|
|
|
|
return send_file(
|
|
|
|
io.BytesIO(file_data.data),
|
|
|
|
attachment_filename=file_data.file_model.name,
|
|
|
|
mimetype=file_data.file_model.content_type,
|
|
|
|
cache_timeout=-1, # Don't cache these files on the browser.
|
|
|
|
last_modified=file_data.date_created,
|
|
|
|
as_attachment = True
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2019-12-27 18:50:03 +00:00
|
|
|
def get_file_info(file_id):
|
2020-01-14 16:45:12 +00:00
|
|
|
file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first()
|
2019-12-27 18:50:03 +00:00
|
|
|
if file_model is None:
|
2020-03-23 16:22:26 +00:00
|
|
|
raise ApiError('no_such_file', 'The file id you provided does not exist', status_code=404)
|
2020-05-29 00:03:50 +00:00
|
|
|
return FileSchema().dump(to_file_api(file_model))
|
2019-12-27 18:50:03 +00:00
|
|
|
|
|
|
|
|
2020-01-31 15:39:19 +00:00
|
|
|
def update_file_info(file_id, body):
|
|
|
|
if file_id is None:
|
2020-06-03 19:03:22 +00:00
|
|
|
raise ApiError('no_such_file', 'Please provide a valid File ID.')
|
2020-01-31 15:39:19 +00:00
|
|
|
|
|
|
|
file_model = session.query(FileModel).filter_by(id=file_id).first()
|
|
|
|
|
|
|
|
if file_model is None:
|
2020-03-23 16:22:26 +00:00
|
|
|
raise ApiError('unknown_file_model', 'The file_model "' + file_id + '" is not recognized.')
|
2020-01-31 15:39:19 +00:00
|
|
|
|
|
|
|
file_model = FileModelSchema().load(body, session=session)
|
|
|
|
session.add(file_model)
|
|
|
|
session.commit()
|
2020-05-29 00:03:50 +00:00
|
|
|
return FileSchema().dump(to_file_api(file_model))
|
2020-01-31 15:39:19 +00:00
|
|
|
|
|
|
|
|
2019-12-27 18:50:03 +00:00
|
|
|
def delete_file(file_id):
|
2020-04-24 10:58:24 +00:00
|
|
|
FileService.delete_file(file_id)
|