Merge remote-tracking branch 'origin/feature/reference_files' into feature/workflow_spec_categories

This commit is contained in:
Aaron Louie 2020-03-18 16:58:57 -04:00
commit 9a26fc8e80
8 changed files with 234 additions and 22 deletions

View File

@ -538,6 +538,67 @@ paths:
format: binary format: binary
example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>' example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
# /v1.0/workflow/0 # /v1.0/workflow/0
/reference_file:
get:
operationId: crc.api.file.get_reference_files
summary: Provides a list of existing reference files that are available in the system.
tags:
- Files
responses:
'200':
description: An array of file descriptions (not the file content)
content:
application/json:
schema:
type: array
items:
$ref: "#/components/schemas/File"
/reference_file/{name}:
parameters:
- name: name
in: path
required: true
description: The special name of the reference file.
schema:
type: string
get:
operationId: crc.api.file.get_reference_file
summary: Reference files are called by name rather than by id.
tags:
- Files
responses:
'200':
description: Returns the actual file
content:
application/octet-stream:
schema:
type: string
format: binary
example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
put:
operationId: crc.api.file.set_reference_file
summary: Update the contents of a named reference file.
tags:
- Files
requestBody:
content:
multipart/form-data:
schema:
type: object
properties:
file:
type: string
format: binary
responses:
'200':
description: Returns the actual file
content:
application/octet-stream:
schema:
type: string
format: binary
example: '<?xml version="1.0" encoding="UTF-8"?><bpmn:definitions></bpmn:definitions>'
# /v1.0/workflow/0
/workflow/{workflow_id}: /workflow/{workflow_id}:
parameters: parameters:
- name: workflow_id - name: workflow_id

View File

@ -1,3 +1,4 @@
import enum
import io import io
import os import os
from datetime import datetime from datetime import datetime
@ -21,6 +22,10 @@ def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=No
results = FileService.get_files(workflow_spec_id, study_id, workflow_id, task_id, form_field_key) results = FileService.get_files(workflow_spec_id, study_id, workflow_id, task_id, form_field_key)
return FileModelSchema(many=True).dump(results) return FileModelSchema(many=True).dump(results)
def get_reference_files():
results = FileService.get_files(is_reference=True)
return FileModelSchema(many=True).dump(results)
def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None): def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None):
all_none = all(v is None for v in [workflow_spec_id, study_id, workflow_id, task_id, form_field_key]) all_none = all(v is None for v in [workflow_spec_id, study_id, workflow_id, task_id, form_field_key])
@ -43,6 +48,42 @@ def add_file(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=Non
return FileModelSchema().dump(file_model) return FileModelSchema().dump(file_model)
def get_reference_file(name):
file_data = FileService.get_reference_file_data(name)
return send_file(
io.BytesIO(file_data.data),
attachment_filename=file_data.file_model.name,
mimetype=file_data.file_model.content_type,
cache_timeout=-1 # Don't cache these files on the browser.
)
def set_reference_file(name):
"""Uses the file service to manage reference-files. They will be used in script tasks to compute values."""
if 'file' not in connexion.request.files:
raise ApiError('invalid_file',
'Expected a file named "file" in the multipart form request', status_code=400)
file = connexion.request.files['file']
name_extension = FileService.get_extension(name)
file_extension = FileService.get_extension(file.filename)
if name_extension != file_extension:
raise ApiError('invalid_file_type',
"The file you uploaded has an extension '%s', but it should have an extension of '%s' " %
(file_extension, name_extension))
file_models = FileService.get_files(name=name, is_reference=True)
if len(file_models) == 0:
file_model = FileService.add_reference_file(name, file.content_type, file.stream.read())
else:
file_model = file_models[0]
FileService.update_file(file_models[0], file.stream.read(), file.content_type)
return FileModelSchema().dump(file_model)
def update_file_data(file_id): def update_file_data(file_id):
file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first() file_model = session.query(FileModel).filter_by(id=file_id).with_for_update().first()
file = connexion.request.files['file'] file = connexion.request.files['file']

View File

@ -107,6 +107,7 @@ class WorkflowApi(object):
self.is_latest_spec = is_latest_spec self.is_latest_spec = is_latest_spec
self.is_active = is_active self.is_active = is_active
class WorkflowApiSchema(ma.Schema): class WorkflowApiSchema(ma.Schema):
class Meta: class Meta:
model = WorkflowApi model = WorkflowApi

View File

@ -66,6 +66,12 @@ class FileDataModel(db.Model):
class FileModel(db.Model): class FileModel(db.Model):
"""A file model defines one of the following increasingly specific types:
* A Reference file. Which just has a name and a reference flag set to true. These are global, and available everywhere.
* A Workflow Specification (such as BPMN or DMN model or a template)
* A Script generated file in a workflow. Which is specific to a study, workflow and task.
* An Uploaded file in a workflow. specific to a study, workflow, task, AND a field value.
"""
__tablename__ = 'file' __tablename__ = 'file'
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String) name = db.Column(db.String)
@ -73,6 +79,8 @@ class FileModel(db.Model):
primary = db.Column(db.Boolean) primary = db.Column(db.Boolean)
is_status = db.Column(db.Boolean) is_status = db.Column(db.Boolean)
content_type = db.Column(db.String) content_type = db.Column(db.String)
is_reference = db.Column(db.Boolean, nullable=False, default=False) # A global reference file.
primary = db.Column(db.Boolean) # Is this the primary BPMN in a workflow?
workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True) workflow_spec_id = db.Column(db.String, db.ForeignKey('workflow_spec.id'), nullable=True)
workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True) workflow_id = db.Column(db.Integer, db.ForeignKey('workflow.id'), nullable=True)
study_id = db.Column(db.Integer, db.ForeignKey('study.id'), nullable=True) study_id = db.Column(db.Integer, db.ForeignKey('study.id'), nullable=True)

View File

@ -1,14 +1,13 @@
import hashlib
import os import os
from datetime import datetime
from uuid import UUID from uuid import UUID
from xml.etree import ElementTree from xml.etree import ElementTree
from crc import session from crc import session
from crc.api.common import ApiErrorSchema, ApiError from crc.api.common import ApiError
from crc.models.file import FileType, FileDataModel, FileModelSchema, FileModel, CONTENT_TYPES from crc.models.file import FileType, FileDataModel, FileModel
from crc.models.workflow import WorkflowSpecModel from crc.models.workflow import WorkflowSpecModel
from crc.services.workflow_processor import WorkflowProcessor from crc.services.workflow_processor import WorkflowProcessor
import hashlib
class FileService(object): class FileService(object):
@ -55,25 +54,39 @@ class FileService(object):
) )
return FileService.update_file(file_model, binary_data, content_type) return FileService.update_file(file_model, binary_data, content_type)
@staticmethod
def add_reference_file(name, content_type, binary_data):
"""Create a file with the given name, but not associated with a spec or workflow.
Only one file with the given reference name can exist."""
file_model = FileModel(
name=name,
is_reference=True
)
return FileService.update_file(file_model, binary_data, content_type)
@staticmethod
def get_extension(file_name):
basename, file_extension = os.path.splitext(file_name)
return file_extension.lower().strip()[1:]
@staticmethod @staticmethod
def update_file(file_model, binary_data, content_type): def update_file(file_model, binary_data, content_type):
file_data_model = session.query(FileDataModel).\ file_data_model = session.query(FileDataModel). \
filter_by(file_model_id=file_model.id, filter_by(file_model_id=file_model.id,
version=file_model.latest_version version=file_model.latest_version
).with_for_update().first() ).with_for_update().first()
md5_checksum = UUID(hashlib.md5(binary_data).hexdigest()) md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
if(file_data_model is not None and md5_checksum == file_data_model.md5_hash): if (file_data_model is not None and md5_checksum == file_data_model.md5_hash):
# This file does not need to be updated, it's the same file. # This file does not need to be updated, it's the same file.
return file_model return file_model
# Verify the extension # Verify the extension
basename, file_extension = os.path.splitext(file_model.name) file_extension = FileService.get_extension(file_model.name)
file_extension = file_extension.lower().strip()[1:]
if file_extension not in FileType._member_names_: if file_extension not in FileType._member_names_:
return ApiErrorSchema().dump(ApiError('unknown_extension', raise ApiError('unknown_extension',
'The file you provided does not have an accepted extension:' + 'The file you provided does not have an accepted extension:' +
file_extension)), 404 file_extension, status_code=404)
else: else:
file_model.type = FileType[file_extension] file_model.type = FileType[file_extension]
file_model.content_type = content_type file_model.content_type = content_type
@ -93,8 +106,10 @@ class FileService(object):
return file_model return file_model
@staticmethod @staticmethod
def get_files(workflow_spec_id=None, study_id=None, workflow_id=None, task_id=None, form_field_key=None): def get_files(workflow_spec_id=None,
query = session.query(FileModel) study_id=None, workflow_id=None, task_id=None, form_field_key=None,
name=None, is_reference=False):
query = session.query(FileModel).filter_by(is_reference=is_reference)
if workflow_spec_id: if workflow_spec_id:
query = query.filter_by(workflow_spec_id=workflow_spec_id) query = query.filter_by(workflow_spec_id=workflow_spec_id)
if study_id: if study_id:
@ -105,15 +120,28 @@ class FileService(object):
query = query.filter_by(task_id=str(task_id)) query = query.filter_by(task_id=str(task_id))
if form_field_key: if form_field_key:
query = query.filter_by(form_field_key=form_field_key) query = query.filter_by(form_field_key=form_field_key)
if name:
query = query.filter_by(name=form_field_key)
results = query.all() results = query.all()
return results return results
@staticmethod @staticmethod
def get_file_data(file_id): def get_file_data(file_id, file_model=None):
"""Returns the file_data that is associated with the file model id""" """Returns the file_data that is associated with the file model id, if an actual file_model
file_model = session.query(FileModel).filter(FileModel.id == file_id).first() is provided, uses that rather than looking it up again."""
return session.query(FileDataModel)\ if file_model is None:
.filter(FileDataModel.file_model_id == file_id)\ file_model = session.query(FileModel).filter(FileModel.id == file_id).first()
.filter(FileDataModel.version == file_model.latest_version)\ return session.query(FileDataModel) \
.filter(FileDataModel.file_model_id == file_id) \
.filter(FileDataModel.version == file_model.latest_version) \
.first() .first()
@staticmethod
def get_reference_file_data(file_name):
file_model = session.query(FileModel). \
filter(FileModel.is_reference == True). \
filter(FileModel.name == file_name).first()
if not file_model:
raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name)
return FileService.get_file_data(file_model.id, file_model)

View File

@ -0,0 +1,28 @@
"""empty message
Revision ID: 0c8a2f8db28c
Revises: 8856126b6658
Create Date: 2020-03-13 14:05:46.983484
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '0c8a2f8db28c'
down_revision = '8856126b6658'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('file', sa.Column('is_reference', sa.Boolean(), nullable=False))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('file', 'is_reference')
# ### end Alembic commands ###

Binary file not shown.

View File

@ -1,9 +1,8 @@
import io import io
import json import json
from datetime import datetime
from crc import session from crc import session
from crc.models.file import FileModel, FileType, FileModelSchema, FileDataModel from crc.models.file import FileModel, FileType, FileModelSchema
from crc.models.workflow import WorkflowSpecModel from crc.models.workflow import WorkflowSpecModel
from tests.base_test import BaseTest from tests.base_test import BaseTest
@ -58,6 +57,52 @@ class TestFilesApi(BaseTest):
file2 = FileModelSchema().load(json_data, session=session) file2 = FileModelSchema().load(json_data, session=session)
self.assertEqual(file, file2) self.assertEqual(file, file2)
def test_set_reference_file(self):
file_name = "irb_document_types.xls"
data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.xls")}
rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
content_type='multipart/form-data')
self.assert_success(rv)
self.assertIsNotNone(rv.get_data())
json_data = json.loads(rv.get_data(as_text=True))
file = FileModelSchema().load(json_data, session=session)
self.assertEqual(FileType.xls, file.type)
self.assertTrue(file.is_reference)
self.assertEqual("application/vnd.ms-excel", file.content_type)
def test_set_reference_file_bad_extension(self):
file_name = "irb_document_types.xls"
data = {'file': (io.BytesIO(b"abcdef"), "does_not_matter.ppt")}
rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
content_type='multipart/form-data')
self.assert_failure(rv, error_code="invalid_file_type")
def test_get_reference_file(self):
file_name = "irb_document_types.xls"
data = {'file': (io.BytesIO(b"abcdef"), "some crazy thing do not care.xls")}
rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
content_type='multipart/form-data')
rv = self.app.get('/v1.0/reference_file/%s' % file_name)
self.assert_success(rv)
data_out = rv.get_data()
self.assertEqual(b"abcdef", data_out)
def test_list_reference_files(self):
file_name = "irb_document_types.xls"
data = {'file': (io.BytesIO(b"abcdef"), file_name)}
rv = self.app.put('/v1.0/reference_file/%s' % file_name, data=data, follow_redirects=True,
content_type='multipart/form-data')
rv = self.app.get('/v1.0/reference_file',
follow_redirects=True,
content_type="application/json")
self.assert_success(rv)
json_data = json.loads(rv.get_data(as_text=True))
self.assertEqual(1, len(json_data))
file = FileModelSchema(many=True).load(json_data, session=session)
self.assertEqual(file_name, file[0].name)
self.assertTrue(file[0].is_reference)
def test_update_file_info(self): def test_update_file_info(self):
self.load_example_data() self.load_example_data()
file: FileModel = session.query(FileModel).first() file: FileModel = session.query(FileModel).first()
@ -118,7 +163,6 @@ class TestFilesApi(BaseTest):
file = FileModelSchema().load(json_data, session=session) file = FileModelSchema().load(json_data, session=session)
self.assertEqual(1, file.latest_version) self.assertEqual(1, file.latest_version)
def test_get_file(self): def test_get_file(self):
self.load_example_data() self.load_example_data()
spec = session.query(WorkflowSpecModel).first() spec = session.query(WorkflowSpecModel).first()
@ -137,3 +181,4 @@ class TestFilesApi(BaseTest):
rv = self.app.delete('/v1.0/file/%i' % file.id) rv = self.app.delete('/v1.0/file/%i' % file.id)
rv = self.app.get('/v1.0/file/%i' % file.id) rv = self.app.get('/v1.0/file/%i' % file.id)
self.assertEqual(404, rv.status_code) self.assertEqual(404, rv.status_code)