2020-05-20 04:12:48 +00:00
|
|
|
import hashlib
|
2021-09-02 13:52:58 +00:00
|
|
|
import io
|
2020-04-08 17:28:43 +00:00
|
|
|
import json
|
2020-02-10 21:19:23 +00:00
|
|
|
import os
|
|
|
|
from datetime import datetime
|
2021-09-02 13:52:58 +00:00
|
|
|
import random
|
|
|
|
import string
|
2021-06-18 20:41:55 +00:00
|
|
|
|
|
|
|
import pandas as pd
|
2020-08-14 17:04:22 +00:00
|
|
|
from github import Github, GithubObject, UnknownObjectException
|
2020-03-04 18:40:25 +00:00
|
|
|
from uuid import UUID
|
2020-06-25 18:02:16 +00:00
|
|
|
from lxml import etree
|
2020-02-10 21:19:23 +00:00
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
from SpiffWorkflow.bpmn.parser.ValidationException import ValidationException
|
2020-12-28 22:33:38 +00:00
|
|
|
from lxml.etree import XMLSyntaxError
|
2020-05-29 00:03:50 +00:00
|
|
|
from sqlalchemy import desc
|
2020-05-30 19:37:04 +00:00
|
|
|
from sqlalchemy.exc import IntegrityError
|
2020-03-19 21:13:30 +00:00
|
|
|
|
2020-05-30 19:37:04 +00:00
|
|
|
from crc import session, app
|
2020-03-13 19:03:57 +00:00
|
|
|
from crc.api.common import ApiError
|
2021-05-06 00:21:33 +00:00
|
|
|
from crc.models.data_store import DataStoreModel
|
2020-04-24 10:58:24 +00:00
|
|
|
from crc.models.file import FileType, FileDataModel, FileModel, LookupFileModel, LookupDataModel
|
2021-08-10 13:15:38 +00:00
|
|
|
from crc.models.workflow import WorkflowSpecModel, WorkflowModel, WorkflowSpecDependencyFile, WorkflowLibraryModel
|
2021-03-23 14:17:48 +00:00
|
|
|
from crc.services.cache_service import cache
|
2021-07-06 21:09:00 +00:00
|
|
|
from crc.services.user_service import UserService
|
2021-04-20 12:12:27 +00:00
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
def camel_to_snake(camel):
|
|
|
|
"""
|
|
|
|
make a camelcase from a snakecase
|
|
|
|
with a few things thrown in - we had a case where
|
|
|
|
we were parsing a spreadsheet and using the headings as keys in an object
|
|
|
|
one of the headings was "Who Uploads?"
|
|
|
|
"""
|
|
|
|
camel = camel.strip()
|
|
|
|
camel = re.sub(' ', '', camel)
|
|
|
|
camel = re.sub('?', '', camel)
|
|
|
|
return re.sub(r'(?<!^)(?=[A-Z])', '_', camel).lower()
|
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
|
|
|
|
class FileService(object):
|
2020-06-01 01:15:40 +00:00
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
@staticmethod
|
2020-03-04 18:40:25 +00:00
|
|
|
def add_workflow_spec_file(workflow_spec: WorkflowSpecModel,
|
2020-03-13 18:56:46 +00:00
|
|
|
name, content_type, binary_data, primary=False, is_status=False):
|
2020-02-10 21:19:23 +00:00
|
|
|
"""Create a new file and associate it with a workflow spec."""
|
2021-01-04 20:47:45 +00:00
|
|
|
# Raise ApiError if the file already exists
|
|
|
|
if session.query(FileModel)\
|
|
|
|
.filter(FileModel.workflow_spec_id == workflow_spec.id)\
|
|
|
|
.filter(FileModel.name == name).first():
|
|
|
|
|
|
|
|
raise ApiError(code="Duplicate File",
|
|
|
|
message='If you want to replace the file, use the update mechanism.')
|
|
|
|
else:
|
|
|
|
file_model = FileModel(
|
|
|
|
workflow_spec_id=workflow_spec.id,
|
|
|
|
name=name,
|
|
|
|
primary=primary,
|
|
|
|
is_status=is_status,
|
|
|
|
)
|
|
|
|
|
|
|
|
return FileService.update_file(file_model, binary_data, content_type)
|
2020-02-10 21:19:23 +00:00
|
|
|
|
2021-07-06 17:10:20 +00:00
|
|
|
|
2020-04-17 17:30:32 +00:00
|
|
|
|
2021-02-10 16:58:19 +00:00
|
|
|
@staticmethod
|
2021-03-23 14:17:48 +00:00
|
|
|
@cache
|
2021-02-10 16:58:19 +00:00
|
|
|
def is_workflow_review(workflow_spec_id):
|
|
|
|
files = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id).all()
|
|
|
|
review = any([f.is_review for f in files])
|
|
|
|
return review
|
|
|
|
|
2021-05-27 16:24:30 +00:00
|
|
|
@staticmethod
|
|
|
|
def update_irb_code(file_id, irb_doc_code):
|
|
|
|
"""Create a new file and associate it with the workflow
|
|
|
|
Please note that the irb_doc_code MUST be a known file in the irb_documents.xslx reference document."""
|
|
|
|
file_model = session.query(FileModel)\
|
|
|
|
.filter(FileModel.id == file_id).first()
|
|
|
|
if file_model is None:
|
|
|
|
raise ApiError("invalid_file_id",
|
|
|
|
"When updating the irb_doc_code for a file, that file_id must already exist "
|
|
|
|
"This file_id is not found in the database '%d'" % file_id)
|
|
|
|
|
|
|
|
file_model.irb_doc_code = irb_doc_code
|
|
|
|
session.commit()
|
|
|
|
return True
|
|
|
|
|
2021-02-10 16:58:19 +00:00
|
|
|
|
2020-02-11 20:03:25 +00:00
|
|
|
@staticmethod
|
2021-08-26 14:40:47 +00:00
|
|
|
def add_workflow_file(workflow_id, irb_doc_code, task_spec_name, name, content_type, binary_data):
|
2020-05-23 19:08:17 +00:00
|
|
|
file_model = session.query(FileModel)\
|
|
|
|
.filter(FileModel.workflow_id == workflow_id)\
|
2021-08-26 14:40:47 +00:00
|
|
|
.filter(FileModel.name == name) \
|
|
|
|
.filter(FileModel.task_spec == task_spec_name) \
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
.filter(FileModel.irb_doc_code == irb_doc_code).first()
|
2020-05-23 19:08:17 +00:00
|
|
|
|
|
|
|
if not file_model:
|
|
|
|
file_model = FileModel(
|
|
|
|
workflow_id=workflow_id,
|
|
|
|
name=name,
|
2021-08-26 14:40:47 +00:00
|
|
|
task_spec=task_spec_name,
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
irb_doc_code=irb_doc_code
|
2020-05-23 19:08:17 +00:00
|
|
|
)
|
2020-02-11 20:03:25 +00:00
|
|
|
return FileService.update_file(file_model, binary_data, content_type)
|
|
|
|
|
2020-05-23 19:08:17 +00:00
|
|
|
@staticmethod
|
|
|
|
def get_workflow_files(workflow_id):
|
|
|
|
"""Returns all the file models associated with a running workflow."""
|
|
|
|
return session.query(FileModel).filter(FileModel.workflow_id == workflow_id).\
|
2020-06-04 13:49:42 +00:00
|
|
|
filter(FileModel.archived == False).\
|
2020-05-23 19:08:17 +00:00
|
|
|
order_by(FileModel.id).all()
|
|
|
|
|
2020-03-13 19:03:57 +00:00
|
|
|
@staticmethod
|
|
|
|
def add_reference_file(name, content_type, binary_data):
|
|
|
|
"""Create a file with the given name, but not associated with a spec or workflow.
|
|
|
|
Only one file with the given reference name can exist."""
|
2020-03-19 14:40:07 +00:00
|
|
|
file_model = session.query(FileModel). \
|
|
|
|
filter(FileModel.is_reference == True). \
|
|
|
|
filter(FileModel.name == name).first()
|
|
|
|
if not file_model:
|
|
|
|
file_model = FileModel(
|
|
|
|
name=name,
|
|
|
|
is_reference=True
|
|
|
|
)
|
2020-03-13 19:03:57 +00:00
|
|
|
return FileService.update_file(file_model, binary_data, content_type)
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_extension(file_name):
|
|
|
|
basename, file_extension = os.path.splitext(file_name)
|
|
|
|
return file_extension.lower().strip()[1:]
|
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
@staticmethod
|
|
|
|
def update_file(file_model, binary_data, content_type):
|
2020-05-20 14:02:30 +00:00
|
|
|
session.flush() # Assure the database is up-to-date before running this.
|
2020-02-10 21:19:23 +00:00
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
latest_data_model = session.query(FileDataModel). \
|
|
|
|
filter(FileDataModel.file_model_id == file_model.id).\
|
|
|
|
order_by(desc(FileDataModel.date_created)).first()
|
|
|
|
|
2020-03-04 18:40:25 +00:00
|
|
|
md5_checksum = UUID(hashlib.md5(binary_data).hexdigest())
|
2021-04-30 15:55:12 +00:00
|
|
|
size = len(binary_data)
|
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
if (latest_data_model is not None) and (md5_checksum == latest_data_model.md5_hash):
|
2020-06-04 14:09:36 +00:00
|
|
|
# This file does not need to be updated, it's the same file. If it is arhived,
|
|
|
|
# then de-arvhive it.
|
|
|
|
file_model.archived = False
|
|
|
|
session.add(file_model)
|
|
|
|
session.commit()
|
2020-03-04 18:40:25 +00:00
|
|
|
return file_model
|
2020-02-10 21:19:23 +00:00
|
|
|
|
|
|
|
# Verify the extension
|
2020-03-13 19:03:57 +00:00
|
|
|
file_extension = FileService.get_extension(file_model.name)
|
2020-02-10 21:19:23 +00:00
|
|
|
if file_extension not in FileType._member_names_:
|
2020-03-13 19:03:57 +00:00
|
|
|
raise ApiError('unknown_extension',
|
|
|
|
'The file you provided does not have an accepted extension:' +
|
|
|
|
file_extension, status_code=404)
|
2020-02-10 21:19:23 +00:00
|
|
|
else:
|
|
|
|
file_model.type = FileType[file_extension]
|
2020-03-04 18:40:25 +00:00
|
|
|
file_model.content_type = content_type
|
2020-06-04 13:49:42 +00:00
|
|
|
file_model.archived = False # Unarchive the file if it is archived.
|
2020-02-10 21:19:23 +00:00
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
if latest_data_model is None:
|
2020-03-04 18:40:25 +00:00
|
|
|
version = 1
|
2020-02-10 21:19:23 +00:00
|
|
|
else:
|
2020-05-29 00:03:50 +00:00
|
|
|
version = latest_data_model.version + 1
|
2020-03-04 18:40:25 +00:00
|
|
|
|
2020-04-17 17:30:32 +00:00
|
|
|
# If this is a BPMN, extract the process id.
|
|
|
|
if file_model.type == FileType.bpmn:
|
2020-12-28 22:33:38 +00:00
|
|
|
try:
|
|
|
|
bpmn: etree.Element = etree.fromstring(binary_data)
|
|
|
|
file_model.primary_process_id = FileService.get_process_id(bpmn)
|
2021-02-10 16:58:19 +00:00
|
|
|
file_model.is_review = FileService.has_swimlane(bpmn)
|
2020-12-28 22:33:38 +00:00
|
|
|
except XMLSyntaxError as xse:
|
|
|
|
raise ApiError("invalid_xml", "Failed to parse xml: " + str(xse), file_name=file_model.name)
|
2020-04-17 17:30:32 +00:00
|
|
|
|
2021-07-06 21:09:00 +00:00
|
|
|
try:
|
|
|
|
user_uid = UserService.current_user().uid
|
|
|
|
except ApiError as ae:
|
|
|
|
user_uid = None
|
2020-05-20 04:10:32 +00:00
|
|
|
new_file_data_model = FileDataModel(
|
|
|
|
data=binary_data, file_model_id=file_model.id, file_model=file_model,
|
2021-08-17 17:04:24 +00:00
|
|
|
version=version, md5_hash=md5_checksum,
|
2021-07-06 21:09:00 +00:00
|
|
|
size=size, user_uid=user_uid
|
2020-05-20 04:10:32 +00:00
|
|
|
)
|
|
|
|
session.add_all([file_model, new_file_data_model])
|
2020-02-10 21:19:23 +00:00
|
|
|
session.commit()
|
|
|
|
session.flush() # Assure the id is set on the model before returning it.
|
2020-05-20 04:10:32 +00:00
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
return file_model
|
|
|
|
|
2021-02-10 16:58:19 +00:00
|
|
|
@staticmethod
|
|
|
|
def has_swimlane(et_root: etree.Element):
|
|
|
|
"""
|
|
|
|
Look through XML and determine if there are any swimlanes present that have a label.
|
|
|
|
"""
|
|
|
|
elements = et_root.xpath('//bpmn:lane',
|
|
|
|
namespaces={'bpmn':'http://www.omg.org/spec/BPMN/20100524/MODEL'})
|
|
|
|
retval = False
|
|
|
|
for el in elements:
|
|
|
|
if el.get('name'):
|
|
|
|
retval = True
|
|
|
|
return retval
|
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
@staticmethod
|
2020-06-25 18:02:16 +00:00
|
|
|
def get_process_id(et_root: etree.Element):
|
2020-05-29 00:03:50 +00:00
|
|
|
process_elements = []
|
|
|
|
for child in et_root:
|
|
|
|
if child.tag.endswith('process') and child.attrib.get('isExecutable', False):
|
|
|
|
process_elements.append(child)
|
|
|
|
|
|
|
|
if len(process_elements) == 0:
|
|
|
|
raise ValidationException('No executable process tag found')
|
|
|
|
|
|
|
|
# There are multiple root elements
|
|
|
|
if len(process_elements) > 1:
|
|
|
|
|
|
|
|
# Look for the element that has the startEvent in it
|
|
|
|
for e in process_elements:
|
2020-06-25 18:02:16 +00:00
|
|
|
this_element: etree.Element = e
|
2020-05-29 00:03:50 +00:00
|
|
|
for child_element in list(this_element):
|
|
|
|
if child_element.tag.endswith('startEvent'):
|
|
|
|
return this_element.attrib['id']
|
|
|
|
|
|
|
|
raise ValidationException('No start event found in %s' % et_root.attrib['id'])
|
|
|
|
|
|
|
|
return process_elements[0].attrib['id']
|
|
|
|
|
2020-02-10 21:19:23 +00:00
|
|
|
@staticmethod
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
def get_files_for_study(study_id, irb_doc_code=None):
|
|
|
|
query = session.query(FileModel).\
|
|
|
|
join(WorkflowModel).\
|
2020-06-04 13:49:42 +00:00
|
|
|
filter(WorkflowModel.study_id == study_id).\
|
|
|
|
filter(FileModel.archived == False)
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
if irb_doc_code:
|
|
|
|
query = query.filter(FileModel.irb_doc_code == irb_doc_code)
|
|
|
|
return query.all()
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_files(workflow_spec_id=None, workflow_id=None,
|
2021-08-10 13:15:38 +00:00
|
|
|
name=None, is_reference=False, irb_doc_code=None, include_libraries=False):
|
2020-03-13 19:03:57 +00:00
|
|
|
query = session.query(FileModel).filter_by(is_reference=is_reference)
|
2020-02-10 21:27:57 +00:00
|
|
|
if workflow_spec_id:
|
2021-08-10 13:15:38 +00:00
|
|
|
if include_libraries:
|
|
|
|
libraries = session.query(WorkflowLibraryModel).filter(
|
|
|
|
WorkflowLibraryModel.workflow_spec_id==workflow_spec_id).all()
|
|
|
|
library_workflow_specs = [x.library_spec_id for x in libraries]
|
|
|
|
library_workflow_specs.append(workflow_spec_id)
|
|
|
|
query = query.filter(FileModel.workflow_spec_id.in_(library_workflow_specs))
|
|
|
|
else:
|
|
|
|
query = query.filter(FileModel.workflow_spec_id == workflow_spec_id)
|
|
|
|
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
elif workflow_id:
|
|
|
|
query = query.filter_by(workflow_id=workflow_id)
|
2020-05-20 01:51:54 +00:00
|
|
|
if irb_doc_code:
|
|
|
|
query = query.filter_by(irb_doc_code=irb_doc_code)
|
A major refactor of how we search and store files, as there was a lot of confusing bits in here.
From an API point of view you can do the following (and only the following)
/files?workflow_spec_id=x
* You can find all files associated with a workflow_spec_id, and add a file with a workflow_spec_id
/files?workflow_id=x
* You can find all files associated with a workflow_id, and add a file that is directly associated with the workflow
/files?workflow_id=x&form_field_key=y
* You can find all files associated with a form element on a running workflow, and add a new file.
Note: you can add multiple files to the same form_field_key, IF they have different file names. If the same name, the original file is archived,
and the new file takes its place.
The study endpoints always return a list of the file metadata associated with the study. Removed /studies-files, but there is an
endpoint called
/studies/all - that returns all the studies in the system, and does include their files.
On a deeper level:
The File model no longer contains:
- study_id,
- task_id,
- form_field_key
Instead, if the file is associated with workflow - then that is the one way it is connected to the study, and we use this relationship to find files for a study.
A file is never associated with a task_id, as these change when the workflow is reloaded.
The form_field_key must match the irb_doc_code, so when requesting files for a form field, we just look up the irb_doc_code.
2020-05-28 12:27:26 +00:00
|
|
|
elif is_reference:
|
|
|
|
query = query.filter_by(is_reference=True)
|
|
|
|
|
|
|
|
if name:
|
|
|
|
query = query.filter_by(name=name)
|
2020-06-03 21:34:27 +00:00
|
|
|
|
2020-06-04 13:49:42 +00:00
|
|
|
query = query.filter(FileModel.archived == False)
|
2020-06-03 21:34:27 +00:00
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
query = query.order_by(FileModel.id)
|
2020-02-10 21:27:57 +00:00
|
|
|
|
|
|
|
results = query.all()
|
2020-02-10 21:19:23 +00:00
|
|
|
return results
|
|
|
|
|
|
|
|
@staticmethod
|
2021-08-10 13:15:38 +00:00
|
|
|
def get_spec_data_files(workflow_spec_id, workflow_id=None, name=None, include_libraries=False):
|
2020-05-29 00:03:50 +00:00
|
|
|
"""Returns all the FileDataModels related to a workflow specification.
|
2021-08-10 13:15:38 +00:00
|
|
|
If a workflow is specified, returns the version of the spec related
|
2020-10-09 12:46:14 +00:00
|
|
|
to that workflow, otherwise, returns the lastest files."""
|
2020-05-29 00:03:50 +00:00
|
|
|
if workflow_id:
|
2020-05-29 05:39:39 +00:00
|
|
|
query = session.query(FileDataModel) \
|
|
|
|
.join(WorkflowSpecDependencyFile) \
|
|
|
|
.filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \
|
|
|
|
.order_by(FileDataModel.id)
|
|
|
|
if name:
|
|
|
|
query = query.join(FileModel).filter(FileModel.name == name)
|
|
|
|
return query.all()
|
2020-05-29 00:03:50 +00:00
|
|
|
else:
|
|
|
|
"""Returns all the latest files related to a workflow specification"""
|
2021-08-10 13:15:38 +00:00
|
|
|
file_models = FileService.get_files(workflow_spec_id=workflow_spec_id,include_libraries=include_libraries)
|
2020-05-29 00:03:50 +00:00
|
|
|
latest_data_files = []
|
|
|
|
for file_model in file_models:
|
2020-05-29 05:39:39 +00:00
|
|
|
if name and file_model.name == name:
|
|
|
|
latest_data_files.append(FileService.get_file_data(file_model.id))
|
|
|
|
elif not name:
|
|
|
|
latest_data_files.append(FileService.get_file_data(file_model.id))
|
2020-05-29 00:03:50 +00:00
|
|
|
return latest_data_files
|
2020-05-23 19:21:30 +00:00
|
|
|
|
2020-05-29 00:03:50 +00:00
|
|
|
@staticmethod
|
2020-06-04 13:49:42 +00:00
|
|
|
def get_workflow_data_files(workflow_id=None):
|
2020-05-29 00:03:50 +00:00
|
|
|
"""Returns all the FileDataModels related to a running workflow -
|
|
|
|
So these are the latest data files that were uploaded or generated
|
|
|
|
that go along with this workflow. Not related to the spec in any way"""
|
2020-06-04 13:49:42 +00:00
|
|
|
file_models = FileService.get_files(workflow_id=workflow_id)
|
2020-05-29 00:03:50 +00:00
|
|
|
latest_data_files = []
|
|
|
|
for file_model in file_models:
|
|
|
|
latest_data_files.append(FileService.get_file_data(file_model.id))
|
|
|
|
return latest_data_files
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_file_data(file_id: int, version: int = None):
|
|
|
|
"""Returns the file data with the given version, or the lastest file, if version isn't provided."""
|
|
|
|
query = session.query(FileDataModel) \
|
|
|
|
.filter(FileDataModel.file_model_id == file_id)
|
|
|
|
if version:
|
|
|
|
query = query.filter(FileDataModel.version == version)
|
|
|
|
else:
|
|
|
|
query = query.order_by(desc(FileDataModel.date_created))
|
|
|
|
return query.first()
|
2020-03-13 19:03:57 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_reference_file_data(file_name):
|
|
|
|
file_model = session.query(FileModel). \
|
|
|
|
filter(FileModel.is_reference == True). \
|
|
|
|
filter(FileModel.name == file_name).first()
|
|
|
|
if not file_model:
|
|
|
|
raise ApiError("file_not_found", "There is no reference file with the name '%s'" % file_name)
|
2020-05-29 00:03:50 +00:00
|
|
|
return FileService.get_file_data(file_model.id)
|
2020-04-15 15:13:32 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def get_workflow_file_data(workflow, file_name):
|
2020-06-04 13:49:42 +00:00
|
|
|
"""This method should be deleted, find where it is used, and remove this method.
|
|
|
|
Given a SPIFF Workflow Model, tracks down a file with the given name in the database and returns its data"""
|
2020-04-19 19:14:10 +00:00
|
|
|
workflow_spec_model = FileService.find_spec_model_in_db(workflow)
|
2020-04-15 15:13:32 +00:00
|
|
|
|
|
|
|
if workflow_spec_model is None:
|
2020-06-03 19:03:22 +00:00
|
|
|
raise ApiError(code="unknown_workflow",
|
2020-04-15 15:13:32 +00:00
|
|
|
message="Something is wrong. I can't find the workflow you are using.")
|
|
|
|
|
|
|
|
file_data_model = session.query(FileDataModel) \
|
|
|
|
.join(FileModel) \
|
|
|
|
.filter(FileModel.name == file_name) \
|
|
|
|
.filter(FileModel.workflow_spec_id == workflow_spec_model.id).first()
|
|
|
|
|
|
|
|
if file_data_model is None:
|
|
|
|
raise ApiError(code="file_missing",
|
|
|
|
message="Can not find a file called '%s' within workflow specification '%s'"
|
|
|
|
% (file_name, workflow_spec_model.id))
|
|
|
|
|
|
|
|
return file_data_model
|
|
|
|
|
|
|
|
@staticmethod
|
2020-04-19 19:14:10 +00:00
|
|
|
def find_spec_model_in_db(workflow):
|
2020-04-15 15:13:32 +00:00
|
|
|
""" Search for the workflow """
|
|
|
|
# When the workflow spec model is created, we record the primary process id,
|
|
|
|
# then we can look it up. As there is the potential for sub-workflows, we
|
|
|
|
# may need to travel up to locate the primary process.
|
|
|
|
spec = workflow.spec
|
2020-04-17 17:30:32 +00:00
|
|
|
workflow_model = session.query(WorkflowSpecModel).join(FileModel). \
|
|
|
|
filter(FileModel.primary_process_id == spec.name).first()
|
2020-04-15 15:13:32 +00:00
|
|
|
if workflow_model is None and workflow != workflow.outer_workflow:
|
2020-04-19 19:14:10 +00:00
|
|
|
return FileService.find_spec_model_in_db(workflow.outer_workflow)
|
2020-04-15 15:13:32 +00:00
|
|
|
|
|
|
|
return workflow_model
|
|
|
|
|
2020-04-24 10:58:24 +00:00
|
|
|
@staticmethod
|
|
|
|
def delete_file(file_id):
|
2020-05-30 19:37:04 +00:00
|
|
|
try:
|
|
|
|
data_models = session.query(FileDataModel).filter_by(file_model_id=file_id).all()
|
|
|
|
for dm in data_models:
|
|
|
|
lookup_files = session.query(LookupFileModel).filter_by(file_data_model_id=dm.id).all()
|
|
|
|
for lf in lookup_files:
|
|
|
|
session.query(LookupDataModel).filter_by(lookup_file_model_id=lf.id).delete()
|
|
|
|
session.query(LookupFileModel).filter_by(id=lf.id).delete()
|
|
|
|
session.query(FileDataModel).filter_by(file_model_id=file_id).delete()
|
2021-05-06 00:21:33 +00:00
|
|
|
session.query(DataStoreModel).filter_by(file_id=file_id).delete()
|
2020-05-30 19:37:04 +00:00
|
|
|
session.query(FileModel).filter_by(id=file_id).delete()
|
|
|
|
session.commit()
|
|
|
|
except IntegrityError as ie:
|
2020-06-03 21:34:27 +00:00
|
|
|
# We can't delete the file or file data, because it is referenced elsewhere,
|
|
|
|
# but we can at least mark it as deleted on the table.
|
|
|
|
session.rollback()
|
|
|
|
file_model = session.query(FileModel).filter_by(id=file_id).first()
|
|
|
|
file_model.archived = True
|
|
|
|
session.commit()
|
2020-06-04 18:59:36 +00:00
|
|
|
app.logger.info("Failed to delete file, so archiving it instead. %i, due to %s" % (file_id, str(ie)))
|
2020-07-17 22:59:25 +00:00
|
|
|
|
|
|
|
@staticmethod
|
2020-08-14 17:04:22 +00:00
|
|
|
def get_repo_branches():
|
2020-07-17 22:59:25 +00:00
|
|
|
gh_token = app.config['GITHUB_TOKEN']
|
2020-08-14 17:04:22 +00:00
|
|
|
github_repo = app.config['GITHUB_REPO']
|
2020-07-17 22:59:25 +00:00
|
|
|
_github = Github(gh_token)
|
2020-08-14 17:04:22 +00:00
|
|
|
repo = _github.get_user().get_repo(github_repo)
|
|
|
|
branches = [branch.name for branch in repo.get_branches()]
|
|
|
|
return branches
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def update_from_github(file_ids, source_target=GithubObject.NotSet):
|
|
|
|
gh_token = app.config['GITHUB_TOKEN']
|
|
|
|
github_repo = app.config['GITHUB_REPO']
|
|
|
|
_github = Github(gh_token)
|
|
|
|
repo = _github.get_user().get_repo(github_repo)
|
2020-07-17 22:59:25 +00:00
|
|
|
|
|
|
|
for file_id in file_ids:
|
|
|
|
file_data_model = FileDataModel.query.filter_by(
|
|
|
|
file_model_id=file_id
|
|
|
|
).order_by(
|
|
|
|
desc(FileDataModel.version)
|
|
|
|
).first()
|
|
|
|
try:
|
2020-08-14 17:04:22 +00:00
|
|
|
repo_file = repo.get_contents(file_data_model.file_model.name, ref=source_target)
|
2020-07-17 22:59:25 +00:00
|
|
|
except UnknownObjectException:
|
2020-08-14 17:04:22 +00:00
|
|
|
return {'error': 'Attempted to update from repository but file was not present'}
|
2020-07-17 22:59:25 +00:00
|
|
|
else:
|
|
|
|
file_data_model.data = repo_file.decoded_content
|
|
|
|
session.add(file_data_model)
|
|
|
|
session.commit()
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def publish_to_github(file_ids):
|
2020-08-14 17:04:22 +00:00
|
|
|
target_branch = app.config['TARGET_BRANCH'] if app.config['TARGET_BRANCH'] else GithubObject.NotSet
|
2020-07-17 22:59:25 +00:00
|
|
|
gh_token = app.config['GITHUB_TOKEN']
|
2020-08-14 17:04:22 +00:00
|
|
|
github_repo = app.config['GITHUB_REPO']
|
2020-07-17 22:59:25 +00:00
|
|
|
_github = Github(gh_token)
|
2020-08-14 17:04:22 +00:00
|
|
|
repo = _github.get_user().get_repo(github_repo)
|
2020-07-17 22:59:25 +00:00
|
|
|
for file_id in file_ids:
|
|
|
|
file_data_model = FileDataModel.query.filter_by(file_model_id=file_id).first()
|
|
|
|
try:
|
2020-08-14 17:04:22 +00:00
|
|
|
repo_file = repo.get_contents(file_data_model.file_model.name, ref=target_branch)
|
2020-07-17 22:59:25 +00:00
|
|
|
except UnknownObjectException:
|
|
|
|
repo.create_file(
|
|
|
|
path=file_data_model.file_model.name,
|
|
|
|
message=f'Creating {file_data_model.file_model.name}',
|
2020-08-14 17:04:22 +00:00
|
|
|
content=file_data_model.data,
|
|
|
|
branch=target_branch
|
2020-07-17 22:59:25 +00:00
|
|
|
)
|
|
|
|
return {'created': True}
|
|
|
|
else:
|
|
|
|
updated = repo.update_file(
|
|
|
|
path=repo_file.path,
|
|
|
|
message=f'Updating {file_data_model.file_model.name}',
|
2020-08-14 17:04:22 +00:00
|
|
|
content=file_data_model.data + b'brah-model',
|
|
|
|
sha=repo_file.sha,
|
|
|
|
branch=target_branch
|
2020-07-17 22:59:25 +00:00
|
|
|
)
|
|
|
|
return {'updated': True}
|
2021-09-02 13:52:58 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def dmn_from_spreadsheet(ss_data):
|
|
|
|
|
|
|
|
def _get_random_string(length):
|
|
|
|
return ''.join(
|
|
|
|
[random.choice(string.ascii_letters + string.digits) for n in range(length)])
|
|
|
|
|
|
|
|
def _row_has_value(values):
|
|
|
|
for value_item in values:
|
|
|
|
if not pd.isnull(value_item):
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2021-09-03 14:37:06 +00:00
|
|
|
df = pd.read_excel(io.BytesIO(ss_data.read()), header=None)
|
2021-09-02 13:52:58 +00:00
|
|
|
|
2021-09-07 16:11:27 +00:00
|
|
|
xml_ns = "https://www.omg.org/spec/DMN/20191111/MODEL/"
|
|
|
|
dmndi_ns = "https://www.omg.org/spec/DMN/20191111/DMNDI/"
|
|
|
|
dc_ns = "http://www.omg.org/spec/DMN/20180521/DC/"
|
|
|
|
dmndi = "{%s}" % dmndi_ns
|
|
|
|
dc = "{%s}" % dc_ns
|
|
|
|
nsmap = {None: xml_ns, 'dmndi': dmndi_ns, 'dc': dc_ns}
|
|
|
|
|
2021-09-02 13:52:58 +00:00
|
|
|
root = etree.Element("definitions",
|
2021-09-07 16:11:27 +00:00
|
|
|
id="Definitions",
|
2021-09-02 13:52:58 +00:00
|
|
|
name="DRD",
|
2021-09-07 16:11:27 +00:00
|
|
|
namespace="http://camunda.org/schema/1.0/dmn",
|
|
|
|
nsmap=nsmap,
|
|
|
|
)
|
|
|
|
|
2021-09-02 13:52:58 +00:00
|
|
|
decision_name = df.iat[0, 1]
|
|
|
|
decision_id = df.iat[1, 1]
|
|
|
|
decision = etree.SubElement(root, "decision",
|
|
|
|
id=decision_id,
|
|
|
|
name=decision_name
|
|
|
|
)
|
|
|
|
decision_table = etree.SubElement(decision, 'decisionTable', id='decisionTable_1')
|
|
|
|
input_output = df.iloc[2][1:]
|
|
|
|
count = 1
|
|
|
|
input_count = 1
|
|
|
|
output_count = 1
|
|
|
|
for item in input_output:
|
|
|
|
if item == 'Input':
|
|
|
|
label = df.iloc[3, count]
|
|
|
|
input_ = etree.SubElement(decision_table, 'input', id=f'input_{input_count}', label=label)
|
|
|
|
type_ref = df.iloc[5, count]
|
|
|
|
input_expression = etree.SubElement(input_, 'inputExpression', id=f'inputExpression_{input_count}',
|
|
|
|
typeRef=type_ref)
|
|
|
|
expression = df.iloc[4, count]
|
|
|
|
expression_text = etree.SubElement(input_expression, 'text')
|
|
|
|
expression_text.text = expression
|
|
|
|
|
|
|
|
input_count += 1
|
|
|
|
elif item == 'Output':
|
|
|
|
label = df.iloc[3, count]
|
|
|
|
name = df.iloc[4, count]
|
|
|
|
type_ref = df.iloc[5, count]
|
|
|
|
decision_table.append(etree.Element('output', id=f'output_{output_count}',
|
|
|
|
label=label, name=name, typeRef=type_ref))
|
|
|
|
output_count += 1
|
|
|
|
elif item == 'Annotation':
|
|
|
|
break
|
|
|
|
count += 1
|
|
|
|
|
|
|
|
row = 6
|
2021-09-03 14:37:06 +00:00
|
|
|
column_count = count
|
2021-09-02 13:52:58 +00:00
|
|
|
while row < df.shape[0]:
|
|
|
|
column = 1
|
|
|
|
row_values = df.iloc[row].values[1:column_count]
|
|
|
|
if _row_has_value(row_values):
|
|
|
|
rando = _get_random_string(7).lower()
|
|
|
|
rule = etree.SubElement(decision_table, 'rule', id=f'DecisionRule_{rando}')
|
|
|
|
|
|
|
|
i = 1
|
|
|
|
while i < input_count:
|
|
|
|
input_entry = etree.SubElement(rule, 'inputEntry', id=f'UnaryTests_{_get_random_string(7)}')
|
|
|
|
text_element = etree.SubElement(input_entry, 'text')
|
|
|
|
text_element.text = str(df.iloc[row, column]) if not pd.isnull(df.iloc[row, column]) else ''
|
|
|
|
i += 1
|
|
|
|
column += 1
|
|
|
|
i = 1
|
|
|
|
while i < output_count:
|
|
|
|
output_entry = etree.SubElement(rule, 'outputEntry', id=f'LiteralExpression_{_get_random_string(7)}')
|
|
|
|
text_element = etree.SubElement(output_entry, 'text')
|
|
|
|
text_element.text = str(df.iloc[row, column]) if not pd.isnull(df.iloc[row, column]) else ''
|
|
|
|
i += 1
|
|
|
|
column += 1
|
|
|
|
|
|
|
|
description = etree.SubElement(rule, 'description')
|
|
|
|
text = df.iloc[row, column] if not pd.isnull(df.iloc[row, column]) else ''
|
|
|
|
description.text = text
|
|
|
|
|
|
|
|
row += 1
|
|
|
|
|
2021-09-07 18:27:15 +00:00
|
|
|
dmndi_root = etree.SubElement(root, dmndi + "DMNDI")
|
|
|
|
dmndi_diagram = etree.SubElement(dmndi_root, dmndi + "DMNDiagram")
|
|
|
|
# rando = _get_random_string(7).lower()
|
|
|
|
dmndi_shape = etree.SubElement(dmndi_diagram, dmndi + "DMNShape",
|
|
|
|
dmnElementRef=decision_id)
|
|
|
|
bounds = etree.SubElement(dmndi_shape, dc + "Bounds",
|
|
|
|
height='80', width='180', x='100', y='100')
|
|
|
|
|
2021-09-02 13:52:58 +00:00
|
|
|
prefix = b'<?xml version="1.0" encoding="UTF-8"?>'
|
|
|
|
dmn_file = prefix + etree.tostring(root)
|
|
|
|
|
|
|
|
return dmn_file
|
2021-09-27 21:15:53 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def cleanup_file_data(copies_to_keep=1):
|
|
|
|
if isinstance(copies_to_keep, int) and copies_to_keep > 0:
|
|
|
|
|
|
|
|
deleted_models = []
|
|
|
|
saved_models = []
|
|
|
|
current_models = []
|
|
|
|
|
|
|
|
session.flush()
|
|
|
|
|
|
|
|
workflow_spec_models = session.query(WorkflowSpecModel).all()
|
|
|
|
|
|
|
|
for wf_spec_model in workflow_spec_models:
|
|
|
|
file_models = session.query(FileModel)\
|
|
|
|
.filter(FileModel.workflow_spec_id == wf_spec_model.id)\
|
|
|
|
.all()
|
2021-09-28 14:16:47 +00:00
|
|
|
|
2021-09-27 21:15:53 +00:00
|
|
|
for file_model in file_models:
|
|
|
|
file_data_models = session.query(FileDataModel)\
|
|
|
|
.filter(FileDataModel.file_model_id == file_model.id)\
|
|
|
|
.order_by(desc(FileDataModel.date_created))\
|
|
|
|
.all()
|
|
|
|
current_models.append(file_data_models[:copies_to_keep])
|
|
|
|
for fd_model in file_data_models[copies_to_keep:]:
|
|
|
|
dependencies = session.query(WorkflowSpecDependencyFile)\
|
|
|
|
.filter(WorkflowSpecDependencyFile.file_data_id == fd_model.id)\
|
|
|
|
.all()
|
|
|
|
if len(dependencies) > 0:
|
|
|
|
saved_models.append(fd_model)
|
|
|
|
continue
|
|
|
|
lookups = session.query(LookupFileModel)\
|
|
|
|
.filter(LookupFileModel.file_data_model_id == fd_model.id)\
|
|
|
|
.all()
|
|
|
|
if len(lookups) > 0:
|
|
|
|
saved_models.append(fd_model)
|
|
|
|
continue
|
|
|
|
deleted_models.append(fd_model)
|
2021-09-28 14:16:47 +00:00
|
|
|
session.delete(fd_model)
|
2021-09-27 21:15:53 +00:00
|
|
|
|
|
|
|
session.commit()
|
|
|
|
return current_models, saved_models, deleted_models
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise ApiError(code='bad_keep',
|
|
|
|
message='You must keep at least 1 version')
|