diff --git a/config/default.py b/config/default.py
index 13fc79ab..1570811c 100644
--- a/config/default.py
+++ b/config/default.py
@@ -6,6 +6,14 @@ basedir = os.path.abspath(os.path.dirname(__file__))
JSON_SORT_KEYS = False # CRITICAL. Do not sort the data when returning values to the front end.
+# The API_TOKEN is used to ensure that the
+# workflow synch can work without a lot of
+# back and forth.
+# you may want to change this to something simple for testing!!
+# NB, if you change this in the local endpoint,
+# it needs to be changed in the remote endpoint as well
+API_TOKEN = environ.get('API_TOKEN', default = 'af95596f327c9ecc007b60414fc84b61')
+
NAME = "CR Connect Workflow"
FLASK_PORT = environ.get('PORT0') or environ.get('FLASK_PORT', default="5000")
CORS_ALLOW_ORIGINS = re.split(r',\s*', environ.get('CORS_ALLOW_ORIGINS', default="localhost:4200, localhost:5002"))
diff --git a/crc/api.yml b/crc/api.yml
index ad22677e..a4b799ea 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -100,6 +100,200 @@ paths:
type: array
items:
$ref: "#/components/schemas/Study"
+ /workflow_sync/pullall:
+ get:
+ operationId: crc.api.workflow_sync.sync_all_changed_workflows
+ summary: Sync all workflows that have changed on the remote side and provide a list of the results
+ security:
+ - ApiKeyAuth : []
+ # in the endpoint
+ parameters:
+ - name: remote
+ in: query
+ required: true
+ description: The remote endpoint
+ schema:
+ type: string
+ tags:
+ - Workflow Sync API
+ responses:
+ '200':
+ description: An array of workflow specs that were synced from remote.
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ type: string
+ example : ['top_level_workflow','3b495037-f7d4-4509-bf58-cee41c0c6b0e']
+
+
+
+
+ /workflow_sync/diff:
+ get:
+ operationId: crc.api.workflow_sync.get_changed_workflows
+ summary: Provides a list of workflow that differ from remote and if it is new or not
+ security :
+ - ApiKeyAuth : []
+ # in the endpoint
+ parameters:
+ - name: remote
+ in: query
+ required: true
+ description: The remote endpoint
+ schema:
+ type: string
+ tags:
+ - Workflow Sync API
+ responses:
+ '200':
+ description: An array of workflow specs, with last touched date and which one is most recent.
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ $ref: "#/components/schemas/WorkflowSpecDiffList"
+
+ /workflow_sync/{workflow_spec_id}/spec:
+ parameters:
+ - name: workflow_spec_id
+ in: path
+ required: false
+ description: The unique id of an existing workflow specification to modify.
+ schema:
+ type: string
+ get:
+ operationId: crc.api.workflow_sync.get_sync_workflow_specification
+ summary: Returns a single workflow specification
+ security:
+ - ApiKeyAuth: []
+ tags:
+ - Workflow Sync API
+ responses:
+ '200':
+ description: Workflow specification.
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/WorkflowSpec"
+
+
+ /workflow_sync/{workflow_spec_id}/files:
+ get:
+ operationId: crc.api.workflow_sync.get_workflow_spec_files
+ summary: Provides a list of files for a workflow spec on this machine.
+ security :
+ - ApiKeyAuth : []
+ parameters:
+ - name: workflow_spec_id
+ in: path
+ required: true
+ description: The workflow_spec id
+ schema:
+ type: string
+
+ tags:
+ - Workflow Sync API
+ responses:
+ '200':
+ description: An array of files for a workflow spec on the local system, with details.
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ $ref: "#/components/schemas/WorkflowSpecFilesList"
+
+ /workflow_sync/{workflow_spec_id}/files/sync:
+ get:
+ operationId: crc.api.workflow_sync.sync_changed_files
+ summary: Syncs files from a workflow on a remote system and provides a list of files that were updated
+ security :
+ - ApiKeyAuth : []
+ parameters:
+ - name: workflow_spec_id
+ in: path
+ required: true
+ description: The workflow_spec id
+ schema:
+ type: string
+ - name: remote
+ in: query
+ required: true
+ description: The remote endpoint
+ schema:
+ type: string
+
+ tags:
+ - Workflow Sync API
+ responses:
+ '200':
+ description: A list of files that were synced for the workflow.
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ type : string
+ example : ["data_security_plan.dmn",'some_other_file.xml']
+
+
+ /workflow_sync/{workflow_spec_id}/files/diff:
+ get:
+ operationId: crc.api.workflow_sync.get_changed_files
+ summary: Provides a list of files for a workflow specs that differ from remote and their signature.
+ security :
+ - ApiKeyAuth : []
+
+ parameters:
+ - name: workflow_spec_id
+ in: path
+ required: true
+ description: The workflow_spec id
+ schema:
+ type: string
+ - name: remote
+ in: query
+ required: true
+ description: The remote endpoint
+ schema:
+ type: string
+
+ tags:
+ - Workflow Sync API
+ responses:
+ '200':
+ description: An array of files that are different from remote, with last touched date and file signature.
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ $ref: "#/components/schemas/WorkflowSpecFilesDiff"
+
+
+ /workflow_sync/all:
+ get:
+ operationId: crc.api.workflow_sync.get_all_spec_state
+ summary: Provides a list of workflow specs, last update date and thumbprint
+ security:
+ - ApiKeyAuth : []
+
+ tags:
+ - Workflow Sync API
+ responses:
+ '200':
+ description: An array of workflow specs, with last touched date and file signature.
+ content:
+ application/json:
+ schema:
+ type: array
+ items:
+ $ref: "#/components/schemas/WorkflowSpecAll"
+
+
/study/all:
get:
operationId: crc.api.study.all_studies
@@ -474,6 +668,30 @@ paths:
responses:
'204':
description: The file has been removed.
+ /file/{md5_hash}/hash_data:
+ parameters:
+ - name: md5_hash
+ in: path
+ required: true
+ description: The md5 hash of the file requested
+ schema:
+ type: string
+ get:
+ operationId: crc.api.file.get_file_data_by_hash
+ summary: Returns only the file contents
+ security:
+ - ApiKeyAuth: []
+ tags:
+ - Files
+ responses:
+ '200':
+ description: Returns the actual file
+ content:
+ application/octet-stream:
+ schema:
+ type: string
+ format: binary
+ example: ''
/file/{file_id}/data:
parameters:
- name: file_id
@@ -1154,6 +1372,12 @@ components:
scheme: bearer
bearerFormat: JWT
x-bearerInfoFunc: crc.api.user.verify_token_admin
+ ApiKeyAuth :
+ type : apiKey
+ in : header
+ name : X-CR-API-KEY
+ x-apikeyInfoFunc: crc.api.workflow_sync.verify_token
+
schemas:
User:
properties:
@@ -1177,6 +1401,92 @@ components:
properties:
id:
type: string
+ WorkflowSpecDiffList:
+ properties:
+ workflow_spec_id:
+ type: string
+ example : top_level_workflow
+ date_created :
+ type: string
+ example : 2020-12-09 16:55:12.951500+00:00
+ location :
+ type : string
+ example : remote
+ new :
+ type : boolean
+ example : false
+ WorkflowSpecFilesList:
+ properties:
+ file_model_id:
+ type : integer
+ example : 171
+ workflow_spec_id :
+ type: string
+ example : top_level_workflow
+ filename :
+ type: string
+ example : data_security_plan.dmn
+ date_created :
+ type: string
+ example : 2020-12-01 13:58:12.420333+00:00
+ type:
+ type : string
+ example : dmn
+ primary :
+ type : boolean
+ example : false
+ content_type:
+ type: string
+ example : text/xml
+ primary_process_id:
+ type : string
+ example : null
+ md5_hash:
+ type: string
+ example: f12e2bbd-a20c-673b-ccb8-a8a1ea9c5b7b
+
+
+ WorkflowSpecFilesDiff:
+ properties:
+ filename :
+ type: string
+ example : data_security_plan.dmn
+ date_created :
+ type: string
+ example : 2020-12-01 13:58:12.420333+00:00
+ type:
+ type : string
+ example : dmn
+ primary :
+ type : boolean
+ example : false
+ content_type:
+ type: string
+ example : text/xml
+ primary_process_id:
+ type : string
+ example : null
+ md5_hash:
+ type: string
+ example: f12e2bbd-a20c-673b-ccb8-a8a1ea9c5b7b
+ location:
+ type : string
+ example : remote
+ new:
+ type: boolean
+ example : false
+
+ WorkflowSpecAll:
+ properties:
+ workflow_spec_id :
+ type: string
+ example : acaf1258-43b4-437e-8846-f612afa66811
+ date_created :
+ type: string
+ example : 2020-12-01 13:58:12.420333+00:00
+ md5_hash:
+ type: string
+ example: c30fd597f21715018eab12f97f9d4956
Study:
properties:
id:
diff --git a/crc/api/file.py b/crc/api/file.py
index 5cf54221..861f9f04 100644
--- a/crc/api/file.py
+++ b/crc/api/file.py
@@ -6,7 +6,7 @@ from flask import send_file
from crc import session
from crc.api.common import ApiError
-from crc.models.file import FileSchema, FileModel, File, FileModelSchema
+from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel
from crc.models.workflow import WorkflowSpecModel
from crc.services.file_service import FileService
@@ -99,6 +99,9 @@ def update_file_data(file_id):
file_model = FileService.update_file(file_model, file.stream.read(), file.content_type)
return FileSchema().dump(to_file_api(file_model))
+def get_file_data_by_hash(md5_hash):
+ filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first()
+ return get_file_data(filedatamodel.file_model_id,version=filedatamodel.version)
def get_file_data(file_id, version=None):
file_data = FileService.get_file_data(file_id, version)
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index a148328a..d14daf11 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -1,11 +1,9 @@
import uuid
-
-from SpiffWorkflow.util.deep_merge import DeepMerge
from flask import g
-from crc import session, app
+from crc import session, db, app
from crc.api.common import ApiError, ApiErrorSchema
from crc.models.api_models import WorkflowApi, WorkflowApiSchema, NavigationItem, NavigationItemSchema
-from crc.models.file import FileModel, LookupDataSchema
+from crc.models.file import FileModel, LookupDataSchema, FileDataModel
from crc.models.study import StudyModel, WorkflowMetadata
from crc.models.task_event import TaskEventModel, TaskEventModelSchema, TaskEvent, TaskEventSchema
from crc.models.workflow import WorkflowModel, WorkflowSpecModelSchema, WorkflowSpecModel, WorkflowSpecCategoryModel, \
@@ -17,7 +15,6 @@ from crc.services.user_service import UserService
from crc.services.workflow_processor import WorkflowProcessor
from crc.services.workflow_service import WorkflowService
-
def all_specifications():
schema = WorkflowSpecModelSchema(many=True)
return schema.dump(session.query(WorkflowSpecModel).all())
diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py
new file mode 100644
index 00000000..68cd9fd2
--- /dev/null
+++ b/crc/api/workflow_sync.py
@@ -0,0 +1,313 @@
+import hashlib
+import json
+import pandas as pd
+import requests
+from crc import session, app
+from crc.api.common import ApiError
+from crc.models.file import FileModel, FileDataModel
+from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel
+from crc.services.file_service import FileService
+from crc.services.workflow_sync import WorkflowSyncService
+from crc.api.workflow import get_workflow_specification
+
+
+def get_sync_workflow_specification(workflow_spec_id):
+ return get_workflow_specification(workflow_spec_id)
+
+def join_uuids(uuids):
+ """Joins a pandas Series of uuids and combines them in one hash"""
+ combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always
+ # in the same order
+ return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
+
+def verify_token(token, required_scopes):
+ if token == app.config['API_TOKEN']:
+ return {'scope':['any']}
+ else:
+ raise ApiError("permission_denied", "API Token information is not correct")
+
+
+def get_changed_workflows(remote,as_df=False):
+ """
+ gets a remote endpoint - gets the workflows and then
+ determines what workflows are different from the remote endpoint
+ """
+
+ remote_workflows_list = WorkflowSyncService.get_all_remote_workflows(remote)
+ remote_workflows = pd.DataFrame(remote_workflows_list)
+
+ # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
+ local = get_all_spec_state_dataframe().reset_index()
+
+ # merge these on workflow spec id and hash - this will
+ # make two different date columns date_x and date_y
+ different = remote_workflows.merge(local,
+ right_on=['workflow_spec_id','md5_hash'],
+ left_on=['workflow_spec_id','md5_hash'],
+ how = 'outer' ,
+ indicator=True).loc[lambda x : x['_merge']!='both']
+ if len(different)==0:
+ return []
+ # each line has a tag on it - if was in the left or the right,
+ # label it so we know if that was on the remote or local machine
+ different.loc[different['_merge']=='left_only','location'] = 'remote'
+ different.loc[different['_merge']=='right_only','location'] = 'local'
+
+ # this takes the different date_created_x and date-created_y columns and
+ # combines them back into one date_created column
+ index = different['date_created_x'].isnull()
+ different.loc[index,'date_created_x'] = different[index]['date_created_y']
+ different = different[['workflow_spec_id','date_created_x','location']].copy()
+ different.columns=['workflow_spec_id','date_created','location']
+
+ # our different list will have multiple entries for a workflow if there is a version on either side
+ # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
+ changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
+
+ # get an exclusive or list of workflow ids - that is we want lists of files that are
+ # on one machine or the other, but not both
+ remote_spec_ids = remote_workflows[['workflow_spec_id']]
+ local_spec_ids = local[['workflow_spec_id']]
+ left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
+ right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
+
+ # flag files as new that are only on the remote box and remove the files that are only on the local box
+ changedfiles['new'] = False
+ changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
+ output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
+
+ # return the list as a dict, let swagger convert it to json
+ if as_df:
+ return output
+ else:
+ return output.reset_index().to_dict(orient='records')
+
+
+def sync_all_changed_workflows(remote):
+
+ workflowsdf = get_changed_workflows(remote,as_df=True)
+ if len(workflowsdf) ==0:
+ return []
+ workflows = workflowsdf.reset_index().to_dict(orient='records')
+ for workflow in workflows:
+ sync_changed_files(remote,workflow['workflow_spec_id'])
+ return [x['workflow_spec_id'] for x in workflows]
+
+
+def sync_changed_files(remote,workflow_spec_id):
+ # make sure that spec is local before syncing files
+
+ specdict = WorkflowSyncService.get_remote_workflow_spec(remote,workflow_spec_id)
+
+ localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
+ if localspec is None:
+ localspec = WorkflowSpecModel()
+ localspec.id = workflow_spec_id
+ if specdict['category'] == None:
+ localspec.category = None
+ else:
+ localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name
+ == specdict['category']['name']).first()
+ if localcategory == None:
+ #category doesn't exist - lets make it
+ localcategory = WorkflowSpecCategoryModel()
+ localcategory.name = specdict['category']['name']
+ localcategory.display_name = specdict['category']['display_name']
+ localcategory.display_order = specdict['category']['display_order']
+ session.add(localcategory)
+ localspec.category = localcategory
+
+ localspec.display_order = specdict['display_order']
+ localspec.display_name = specdict['display_name']
+ localspec.name = specdict['name']
+ localspec.description = specdict['description']
+ session.add(localspec)
+
+ changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True)
+ if len(changedfiles)==0:
+ return []
+ updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))]
+ updatefiles = updatefiles.reset_index().to_dict(orient='records')
+
+ deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))]
+ deletefiles = deletefiles.reset_index().to_dict(orient='records')
+
+ for delfile in deletefiles:
+ currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
+ FileModel.name == delfile['filename']).first()
+
+ # it is more appropriate to archive the file than delete
+ # due to the fact that we might have workflows that are using the
+ # file data
+ currentfile.archived = True
+ session.add(currentfile)
+
+ for updatefile in updatefiles:
+ currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
+ FileModel.name == updatefile['filename']).first()
+ if not currentfile:
+ currentfile = FileModel()
+ currentfile.name = updatefile['filename']
+ currentfile.workflow_spec_id = workflow_spec_id
+
+ currentfile.date_created = updatefile['date_created']
+ currentfile.type = updatefile['type']
+ currentfile.primary = updatefile['primary']
+ currentfile.content_type = updatefile['content_type']
+ currentfile.primary_process_id = updatefile['primary_process_id']
+ session.add(currentfile)
+ content = WorkflowSyncService.get_remote_file_by_hash(remote,updatefile['md5_hash'])
+ FileService.update_file(currentfile,content,updatefile['type'])
+ session.commit()
+ return [x['filename'] for x in updatefiles]
+
+
+def get_changed_files(remote,workflow_spec_id,as_df=False):
+ """
+ gets a remote endpoint - gets the files for a workflow_spec on both
+ local and remote and determines what files have been change and returns a list of those
+ files
+ """
+ remote_file_list = WorkflowSyncService.get_remote_workflow_spec_files(remote,workflow_spec_id)
+ remote_files = pd.DataFrame(remote_file_list)
+ # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
+ local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index()
+ local['md5_hash'] = local['md5_hash'].astype('str')
+ remote_files['md5_hash'] = remote_files['md5_hash'].astype('str')
+
+ different = remote_files.merge(local,
+ right_on=['filename','md5_hash'],
+ left_on=['filename','md5_hash'],
+ how = 'outer' ,
+ indicator=True).loc[lambda x : x['_merge']!='both']
+ if len(different) == 0:
+ if as_df:
+ return different
+ else:
+ return []
+ # each line has a tag on it - if was in the left or the right,
+ # label it so we know if that was on the remote or local machine
+ different.loc[different['_merge']=='left_only','location'] = 'remote'
+ different.loc[different['_merge']=='right_only','location'] = 'local'
+
+ # this takes the different date_created_x and date-created_y columns and
+ # combines them back into one date_created column
+ dualfields = ['date_created','type','primary','content_type','primary_process_id']
+ for merge in dualfields:
+ index = different[merge+'_x'].isnull()
+ different.loc[index,merge+'_x'] = different[index][merge+'_y']
+
+ fieldlist = [fld+'_x' for fld in dualfields]
+ different = different[ fieldlist + ['md5_hash','filename','location']].copy()
+
+ different.columns=dualfields+['md5_hash','filename','location']
+ # our different list will have multiple entries for a workflow if there is a version on either side
+ # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
+ changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first()
+
+ # get an exclusive or list of workflow ids - that is we want lists of files that are
+ # on one machine or the other, but not both
+ remote_spec_ids = remote_files[['filename']]
+ local_spec_ids = local[['filename']]
+ left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])]
+ right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])]
+ changedfiles['new'] = False
+ changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True
+ changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True
+ changedfiles = changedfiles.replace({pd.np.nan: None})
+ # return the list as a dict, let swagger convert it to json
+ if as_df:
+ return changedfiles
+ else:
+ return changedfiles.reset_index().to_dict(orient='records')
+
+
+
+def get_all_spec_state():
+ """
+ Return a list of all workflow specs along with last updated date and a
+ thumbprint of all of the files that are used for that workflow_spec
+ Convert into a dict list from a dataframe
+ """
+ df = get_all_spec_state_dataframe()
+ return df.reset_index().to_dict(orient='records')
+
+
+def get_workflow_spec_files(workflow_spec_id):
+ """
+ Return a list of all workflow specs along with last updated date and a
+ thumbprint of all of the files that are used for that workflow_spec
+ Convert into a dict list from a dataframe
+ """
+ df = get_workflow_spec_files_dataframe(workflow_spec_id)
+ return df.reset_index().to_dict(orient='records')
+
+
+def get_workflow_spec_files_dataframe(workflowid):
+ """
+ Return a list of all files for a workflow_spec along with last updated date and a
+ hash so we can determine file differences for a changed workflow on a box.
+ Return a dataframe
+ """
+ x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid)
+ # there might be a cleaner way of getting a data frome from some of the
+ # fields in the ORM - but this works OK
+ filelist = []
+ for file in x:
+ filelist.append({'file_model_id':file.file_model_id,
+ 'workflow_spec_id': file.file_model.workflow_spec_id,
+ 'md5_hash':file.md5_hash,
+ 'filename':file.file_model.name,
+ 'type':file.file_model.type.name,
+ 'primary':file.file_model.primary,
+ 'content_type':file.file_model.content_type,
+ 'primary_process_id':file.file_model.primary_process_id,
+ 'date_created':file.date_created})
+ if len(filelist) == 0:
+ return pd.DataFrame(columns=['file_model_id',
+ 'workflow_spec_id',
+ 'md5_hash',
+ 'filename',
+ 'type',
+ 'primary',
+ 'content_type',
+ 'primary_process_id',
+ 'date_created'])
+ df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last()
+ df['date_created'] = df['date_created'].astype('str')
+ return df
+
+
+
+def get_all_spec_state_dataframe():
+ """
+ Return a list of all workflow specs along with last updated date and a
+ thumbprint of all of the files that are used for that workflow_spec
+ Return a dataframe
+ """
+ x = session.query(FileDataModel).join(FileModel)
+ # there might be a cleaner way of getting a data frome from some of the
+ # fields in the ORM - but this works OK
+ filelist = []
+ for file in x:
+ filelist.append({'file_model_id':file.file_model_id,
+ 'workflow_spec_id': file.file_model.workflow_spec_id,
+ 'md5_hash':file.md5_hash,
+ 'filename':file.file_model.name,
+ 'date_created':file.date_created})
+ df = pd.DataFrame(filelist)
+
+ # get a distinct list of file_model_id's with the most recent file_data retained
+ df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy()
+
+ # take that list and then group by workflow_spec and retain the most recently touched file
+ # and make a consolidated hash of the md5_checksums - this acts as a 'thumbprint' for each
+ # workflow spec
+ df = df.groupby('workflow_spec_id').agg({'date_created':'max',
+ 'md5_hash':join_uuids}).copy()
+ # get only the columns we are really interested in returning
+ df = df[['date_created','md5_hash']].copy()
+ # convert dates to string
+ df['date_created'] = df['date_created'].astype('str')
+ return df
+
diff --git a/crc/models/workflow.py b/crc/models/workflow.py
index 718dfccf..0da32aec 100644
--- a/crc/models/workflow.py
+++ b/crc/models/workflow.py
@@ -69,7 +69,7 @@ class WorkflowStatus(enum.Enum):
class WorkflowSpecDependencyFile(db.Model):
- """Connects a workflow to the version of the specification files it depends on to execute"""
+ """Connects to a workflow to test the version of the specification files it depends on to execute"""
file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True)
workflow_id = db.Column(db.Integer, db.ForeignKey("workflow.id"), primary_key=True)
diff --git a/crc/services/workflow_sync.py b/crc/services/workflow_sync.py
new file mode 100644
index 00000000..26796413
--- /dev/null
+++ b/crc/services/workflow_sync.py
@@ -0,0 +1,52 @@
+import json
+from json import JSONDecodeError
+from typing import List, Optional
+
+import requests
+
+from crc import app
+from crc.api.common import ApiError
+
+
+class WorkflowSyncService(object):
+
+ @staticmethod
+ def get_remote_file_by_hash(remote,md5_hash):
+ url = remote+'/v1.0/file/'+md5_hash+'/hash_data'
+ return WorkflowSyncService.__make_request(url,return_contents=True)
+
+ @staticmethod
+ def get_remote_workflow_spec_files(remote,workflow_spec_id):
+ url = remote+'/v1.0/workflow_sync/'+workflow_spec_id+'/files'
+ return WorkflowSyncService.__make_request(url)
+
+ @staticmethod
+ def get_remote_workflow_spec(remote, workflow_spec_id):
+ """
+ this just gets the details of a workflow spec from the
+ remote side.
+ """
+ url = remote+'/v1.0/workflow-sync/'+workflow_spec_id+'/spec'
+ return WorkflowSyncService.__make_request(url)
+
+ @staticmethod
+ def get_all_remote_workflows(remote):
+ url = remote + '/v1.0/workflow_sync/all'
+ return WorkflowSyncService.__make_request(url)
+
+ @staticmethod
+ def __make_request(url,return_contents=False):
+ try:
+ response = requests.get(url,headers={'X-CR-API-KEY':app.config['API_TOKEN']})
+ except:
+ raise ApiError("workflow_sync_error",response.text)
+ if response.ok and response.text:
+ if return_contents:
+ return response.content
+ else:
+ return json.loads(response.text)
+ else:
+ raise ApiError("workflow_sync_error",
+ "Received an invalid response from the protocol builder (status %s): %s when calling "
+ "url '%s'." %
+ (response.status_code, response.text, url))
diff --git a/deploy/requirements.txt b/deploy/requirements.txt
index 420a888f..b9ab7b4c 100644
--- a/deploy/requirements.txt
+++ b/deploy/requirements.txt
@@ -1,75 +1,76 @@
alabaster==0.7.12
-alembic==1.4.2
-amqp==2.5.2
+alembic==1.4.3
aniso8601==8.0.0
-attrs==19.3.0
-babel==2.8.0
-bcrypt==3.1.7
-beautifulsoup4==4.9.1
-billiard==3.6.3.0
+attrs==20.3.0
+babel==2.9.0
+bcrypt==3.2.0
+beautifulsoup4==4.9.3
blinker==1.4
-celery==4.4.2
-certifi==2020.4.5.1
-cffi==1.14.0
+certifi==2020.11.8
+cffi==1.14.4
chardet==3.0.4
click==7.1.2
-clickclick==1.2.2
+clickclick==20.10.2
commonmark==0.9.1
-configparser==5.0.0
connexion==2.7.0
-coverage==5.1
+coverage==5.3
+deprecated==1.2.10
docutils==0.16
-docxtpl==0.9.2
+docxtpl==0.11.2
et-xmlfile==1.0.1
flask==1.1.2
+flask-admin==1.5.7
flask-bcrypt==0.7.1
-flask-cors==3.0.8
-flask-marshmallow==0.12.0
+flask-cors==3.0.9
+flask-mail==0.9.1
+flask-marshmallow==0.14.0
flask-migrate==2.5.3
flask-restful==0.3.8
-flask-sqlalchemy==2.4.1
-flask-sso==0.4.0
-future==0.18.2
-httpretty==1.0.2
-idna==2.9
+flask-sqlalchemy==2.4.4
+gunicorn==20.0.4
+httpretty==1.0.3
+idna==2.10
imagesize==1.2.0
-importlib-metadata==1.6.0
-inflection==0.4.0
+inflection==0.5.1
itsdangerous==1.1.0
jdcal==1.4.1
jinja2==2.11.2
jsonschema==3.2.0
-kombu==4.6.8
-ldap3==2.7
-lxml==4.5.1
-mako==1.1.2
+ldap3==2.8.1
+lxml==4.6.2
+mako==1.1.3
+markdown==3.3.3
markupsafe==1.1.1
-marshmallow==3.6.0
+marshmallow==3.9.1
marshmallow-enum==1.5.1
-marshmallow-sqlalchemy==0.23.0
-numpy==1.18.4
-openapi-spec-validator==0.2.8
-openpyxl==3.0.3
+marshmallow-sqlalchemy==0.24.1
+numpy==1.19.4
+openapi-spec-validator==0.2.9
+openpyxl==3.0.5
packaging==20.4
-pandas==1.0.3
-psycopg2-binary==2.8.5
+pandas==1.1.4
+psycopg2-binary==2.8.6
pyasn1==0.4.8
pycparser==2.20
-pygments==2.6.1
+pygithub==1.53
+pygments==2.7.2
pyjwt==1.7.1
pyparsing==2.4.7
-pyrsistent==0.16.0
+pyrsistent==0.17.3
+python-box==5.2.0
python-dateutil==2.8.1
python-docx==0.8.10
python-editor==1.0.4
-pytz==2020.1
+python-levenshtein==0.12.0
+pytz==2020.4
pyyaml==5.3.1
recommonmark==0.6.0
-requests==2.23.0
-six==1.14.0
+requests==2.25.0
+sentry-sdk==0.14.4
+six==1.15.0
snowballstemmer==2.0.0
soupsieve==2.0.1
-sphinx==3.0.3
+sphinx==3.3.1
sphinxcontrib-applehelp==1.0.2
sphinxcontrib-devhelp==1.0.2
sphinxcontrib-htmlhelp==1.0.3
@@ -77,14 +78,14 @@ sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.3
sphinxcontrib-serializinghtml==1.1.4
spiffworkflow
-sqlalchemy==1.3.17
-swagger-ui-bundle==0.0.6
-urllib3==1.25.9
-vine==1.3.0
-waitress==1.4.3
+sqlalchemy==1.3.20
+swagger-ui-bundle==0.0.8
+urllib3==1.26.2
+waitress==1.4.4
webob==1.8.6
webtest==2.0.35
werkzeug==1.0.1
+wrapt==1.12.1
+wtforms==2.3.3
xlrd==1.2.0
-xlsxwriter==1.2.8
-zipp==3.1.0
+xlsxwriter==1.3.7
diff --git a/example_data.py b/example_data.py
index 58d19c65..f3918846 100644
--- a/example_data.py
+++ b/example_data.py
@@ -66,6 +66,7 @@ class ExampleDataLoader:
display_order=6
),
]
+ db.session.execute("select setval('workflow_spec_category_id_seq',7);")
db.session.add_all(categories)
db.session.commit()
diff --git a/tests/base_test.py b/tests/base_test.py
index af899917..4f2306a2 100644
--- a/tests/base_test.py
+++ b/tests/base_test.py
@@ -204,6 +204,14 @@ class BaseTest(unittest.TestCase):
data = myfile.read()
return data
+ @staticmethod
+ def workflow_sync_response(file_name):
+ filepath = os.path.join(app.root_path, '..', 'tests', 'data', 'workflow_sync_responses', file_name)
+ with open(filepath, 'rb') as myfile:
+ data = myfile.read()
+ return data
+
+
def assert_success(self, rv, msg=""):
try:
data = json.loads(rv.get_data(as_text=True))
diff --git a/tests/data/random_fact/random_fact2.bpmn b/tests/data/random_fact/random_fact2.bpmn
new file mode 100644
index 00000000..bd0a9ef5
--- /dev/null
+++ b/tests/data/random_fact/random_fact2.bpmn
@@ -0,0 +1,200 @@
+
+
+
+
+ SequenceFlow_0c7wlth
+
+
+ # h1 Heading 8-)
+## h2 Heading
+### h3 Heading
+#### h4 Heading
+##### h5 Heading
+###### h6 Heading
+
+
+## Horizontal Rules
+
+___
+
+---
+
+***
+
+
+## Typographic replacements
+
+"double quotes" and 'single quotes'
+
+
+## Emphasis
+
+**This is bold text**
+
+__This is bold text__
+
+*This is italic text*
+
+_This is italic text_
+
+~~Strikethrough~~
+
+
+## Blockquotes
+
+
+> Blockquotes can also be nested...
+>> ...by using additional greater-than signs right next to each other...
+> > > ...or with spaces between arrows.
+
+
+## Lists
+
+Unordered
+
++ Create a list by starting a line with `+`, `-`, or `*`
++ Sub-lists are made by indenting 2 spaces:
+ - Marker character change forces new list start:
+ * Ac tristique libero volutpat at
+ + Facilisis in pretium nisl aliquet
+ - Nulla volutpat aliquam velit
++ Very easy!
+
+Ordered
+
+1. Lorem ipsum dolor sit amet
+2. Consectetur adipiscing elit
+3. Integer molestie lorem at massa
+
+
+1. You can use sequential numbers...
+1. ...or keep all the numbers as `1.`
+
+Start numbering with offset:
+
+57. foo
+1. bar
+
+## Tables
+
+| Option | Description |
+| ------ | ----------- |
+| data | path to data files to supply the data that will be passed into templates. |
+| engine | engine to be used for processing templates. Handlebars is the default. |
+| ext | extension to be used for dest files. |
+
+Right aligned columns
+
+| Option | Description |
+| ------:| -----------:|
+| data | path to data files to supply the data that will be passed into templates. |
+| engine | engine to be used for processing templates. Handlebars is the default. |
+| ext | extension to be used for dest files. |
+
+
+## Links
+
+[link text](http://dev.nodeca.com)
+
+[link with title](http://nodeca.github.io/pica/demo/ "title text!")
+
+Autoconverted link https://github.com/nodeca/pica (enable linkify to see)
+
+
+## Images
+
+![Minion](https://octodex.github.com/images/minion.png)
+![Stormtroopocat](https://octodex.github.com/images/stormtroopocat.jpg "The Stormtroopocat")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ SequenceFlow_0c7wlth
+ SequenceFlow_0641sh6
+
+
+
+
+
+
+
+
+ SequenceFlow_0641sh6
+ SequenceFlow_0t29gjo
+ FactService = fact_service()
+
+
+ # Great Job!
+You have completed the random fact generator.
+You chose to receive a random fact of the type: "{{type}}"
+
+Your random fact is:
+{{details}}
+ SequenceFlow_0t29gjo
+
+
+
+
+
+ User sets the Fact.type to cat, norris, or buzzword
+
+
+
+ Makes an API call to get a fact of the required type.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/workflow_sync_responses/random_fact2.bpmn b/tests/data/workflow_sync_responses/random_fact2.bpmn
new file mode 100644
index 00000000..f502a238
--- /dev/null
+++ b/tests/data/workflow_sync_responses/random_fact2.bpmn
@@ -0,0 +1,104 @@
+
+
+
+
+ SequenceFlow_0c7wlth
+
+
+ # h1 Heading 8-)
+ NEW_FILE_ADDED
+
+![Stormtroopocat](https://octodex.github.com/images/stormtroopocat.jpg "The Stormtroopocat")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ SequenceFlow_0c7wlth
+ SequenceFlow_0641sh6
+
+
+
+
+
+
+
+
+ SequenceFlow_0641sh6
+ SequenceFlow_0t29gjo
+ FactService = fact_service()
+
+
+ # Great Job!
+You have completed the random fact generator.
+You chose to receive a random fact of the type: "{{type}}"
+
+Your random fact is:
+{{details}}
+ SequenceFlow_0t29gjo
+
+
+
+
+
+ User sets the Fact.type to cat, norris, or buzzword
+
+
+
+ Makes an API call to get a fact of the required type.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/files/test_files_api.py b/tests/files/test_files_api.py
index 02feb8d0..958989d1 100644
--- a/tests/files/test_files_api.py
+++ b/tests/files/test_files_api.py
@@ -46,7 +46,7 @@ class TestFilesApi(BaseTest):
content_type="application/json", headers=self.logged_in_headers())
self.assert_success(rv)
json_data = json.loads(rv.get_data(as_text=True))
- self.assertEqual(2, len(json_data))
+ self.assertEqual(3, len(json_data))
def test_create_file(self):
diff --git a/tests/study/test_study_api.py b/tests/study/test_study_api.py
index e7d33a9d..df734086 100644
--- a/tests/study/test_study_api.py
+++ b/tests/study/test_study_api.py
@@ -165,7 +165,7 @@ class TestStudyApi(BaseTest):
self.assertEqual(study_event.comment, update_comment)
self.assertEqual(study_event.user_uid, self.test_uid)
- @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators') # mock_studies
+ @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators') # mock_investigators
@patch('crc.services.protocol_builder.ProtocolBuilderService.get_required_docs') # mock_docs
@patch('crc.services.protocol_builder.ProtocolBuilderService.get_study_details') # mock_details
@patch('crc.services.protocol_builder.ProtocolBuilderService.get_studies') # mock_studies
diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py
new file mode 100644
index 00000000..eb0cd1b5
--- /dev/null
+++ b/tests/test_workflow_sync.py
@@ -0,0 +1,156 @@
+from unittest.mock import patch
+
+from crc import db
+from tests.base_test import BaseTest
+from crc.api.workflow_sync import get_all_spec_state, \
+ get_changed_workflows, \
+ get_workflow_spec_files, \
+ get_changed_files, \
+ get_workflow_specification, \
+ sync_changed_files
+from crc.models.workflow import WorkflowSpecModel
+from datetime import datetime
+from crc.services.file_service import FileService
+
+
+
+class TestWorkflowSync(BaseTest):
+
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
+ def test_get_no_changes(self, mock_get):
+ self.load_example_data()
+ othersys = get_all_spec_state()
+ mock_get.return_value = othersys
+ response = get_changed_workflows('localhost:0000') # not actually used due to mock
+ self.assertIsNotNone(response)
+ self.assertEqual(response,[])
+
+
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
+ def test_remote_workflow_change(self, mock_get):
+ self.load_example_data()
+ othersys = get_all_spec_state()
+ othersys[1]['date_created'] = str(datetime.now())
+ othersys[1]['md5_hash'] = '12345'
+ mock_get.return_value = othersys
+ response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
+ self.assertIsNotNone(response)
+ self.assertEqual(len(response),1)
+ self.assertEqual(response[0]['workflow_spec_id'], 'random_fact')
+ self.assertEqual(response[0]['location'], 'remote')
+ self.assertEqual(response[0]['new'], False)
+
+
+
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
+ def test_remote_workflow_has_new(self, mock_get):
+ self.load_example_data()
+ othersys = get_all_spec_state()
+ othersys.append({'workflow_spec_id':'my_new_workflow',
+ 'date_created':str(datetime.now()),
+ 'md5_hash': '12345'})
+ mock_get.return_value = othersys
+ response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
+ self.assertIsNotNone(response)
+ self.assertEqual(len(response),1)
+ self.assertEqual(response[0]['workflow_spec_id'],'my_new_workflow')
+ self.assertEqual(response[0]['location'], 'remote')
+ self.assertEqual(response[0]['new'], True)
+
+
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows')
+ def test_local_workflow_has_new(self, mock_get):
+ self.load_example_data()
+
+ othersys = get_all_spec_state()
+ mock_get.return_value = othersys
+ wf_spec = WorkflowSpecModel()
+ wf_spec.id = 'abcdefg'
+ wf_spec.display_name = 'New Workflow - Yum!!'
+ wf_spec.name = 'my_new_workflow'
+ wf_spec.description = 'yep - its a new workflow'
+ wf_spec.category_id = 0
+ wf_spec.display_order = 0
+ db.session.add(wf_spec)
+ db.session.commit()
+ FileService.add_workflow_spec_file(wf_spec,'dummyfile.txt','text',b'this is a test')
+ # after setting up the test - I realized that this doesn't return anything for
+ # a workflow that is new locally - it just returns nothing
+ response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
+ self.assertIsNotNone(response)
+ self.assertEqual(response,[])
+
+
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+ def test_file_differences(self, mock_get):
+ self.load_example_data()
+ othersys = get_workflow_spec_files('random_fact')
+ othersys[1]['date_created'] = str(datetime.now())
+ othersys[1]['md5_hash'] = '12345'
+ mock_get.return_value = othersys
+ response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock
+ self.assertIsNotNone(response)
+ self.assertEqual(len(response),1)
+ self.assertEqual(response[0]['filename'], 'random_fact2.bpmn')
+ self.assertEqual(response[0]['location'], 'remote')
+ self.assertEqual(response[0]['new'], False)
+
+
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+ def test_file_differences(self, mock_get):
+ self.load_example_data()
+ othersys = get_workflow_spec_files('random_fact')
+ othersys[1]['date_created'] = str(datetime.now())
+ othersys[1]['md5_hash'] = '12345'
+ mock_get.return_value = othersys
+ response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock
+ self.assertIsNotNone(response)
+ self.assertEqual(len(response),1)
+ self.assertEqual(response[0]['filename'], 'random_fact2.bpmn')
+ self.assertEqual(response[0]['location'], 'remote')
+ self.assertEqual(response[0]['new'], False)
+
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash')
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec')
+ def test_file_differences(self, workflow_mock, spec_files_mock, file_data_mock):
+ self.load_example_data()
+ remote_workflow = get_workflow_specification('random_fact')
+ self.assertEqual(remote_workflow['display_name'],'Random Fact')
+ remote_workflow['description'] = 'This Workflow came from Remote'
+ remote_workflow['display_name'] = 'Remote Workflow'
+ workflow_mock.return_value = remote_workflow
+ othersys = get_workflow_spec_files('random_fact')
+ othersys[1]['date_created'] = str(datetime.now())
+ othersys[1]['md5_hash'] = '12345'
+ spec_files_mock.return_value = othersys
+ file_data_mock.return_value = self.workflow_sync_response('random_fact2.bpmn')
+ response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock
+ self.assertIsNotNone(response)
+ self.assertEqual(len(response),1)
+ self.assertEqual(response[0], 'random_fact2.bpmn')
+ files = FileService.get_spec_data_files('random_fact')
+ md5sums = [str(f.md5_hash) for f in files]
+ self.assertEqual('21bb6f9e-0af7-0ab2-0fc7-ec0f94787e58' in md5sums, True)
+ new_local_workflow = get_workflow_specification('random_fact')
+ self.assertEqual(new_local_workflow['display_name'],'Remote Workflow')
+
+
+
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+ @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec')
+ def test_file_deleted(self, workflow_mock, spec_files_mock):
+ self.load_example_data()
+ remote_workflow = get_workflow_specification('random_fact')
+ workflow_mock.return_value = remote_workflow
+ othersys = get_workflow_spec_files('random_fact')
+ del(othersys[1])
+ spec_files_mock.return_value = othersys
+ response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock
+ self.assertIsNotNone(response)
+ # when we delete a local file, we do not return that it was deleted - just
+ # a list of updated files. We may want to change this in the future.
+ self.assertEqual(len(response),0)
+ files = FileService.get_spec_data_files('random_fact')
+ self.assertEqual(len(files),1)
+