diff --git a/config/default.py b/config/default.py index 13fc79ab..1570811c 100644 --- a/config/default.py +++ b/config/default.py @@ -6,6 +6,14 @@ basedir = os.path.abspath(os.path.dirname(__file__)) JSON_SORT_KEYS = False # CRITICAL. Do not sort the data when returning values to the front end. +# The API_TOKEN is used to ensure that the +# workflow synch can work without a lot of +# back and forth. +# you may want to change this to something simple for testing!! +# NB, if you change this in the local endpoint, +# it needs to be changed in the remote endpoint as well +API_TOKEN = environ.get('API_TOKEN', default = 'af95596f327c9ecc007b60414fc84b61') + NAME = "CR Connect Workflow" FLASK_PORT = environ.get('PORT0') or environ.get('FLASK_PORT', default="5000") CORS_ALLOW_ORIGINS = re.split(r',\s*', environ.get('CORS_ALLOW_ORIGINS', default="localhost:4200, localhost:5002")) diff --git a/crc/api.yml b/crc/api.yml index ad22677e..a4b799ea 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -100,6 +100,200 @@ paths: type: array items: $ref: "#/components/schemas/Study" + /workflow_sync/pullall: + get: + operationId: crc.api.workflow_sync.sync_all_changed_workflows + summary: Sync all workflows that have changed on the remote side and provide a list of the results + security: + - ApiKeyAuth : [] + # in the endpoint + parameters: + - name: remote + in: query + required: true + description: The remote endpoint + schema: + type: string + tags: + - Workflow Sync API + responses: + '200': + description: An array of workflow specs that were synced from remote. + content: + application/json: + schema: + type: array + items: + type: string + example : ['top_level_workflow','3b495037-f7d4-4509-bf58-cee41c0c6b0e'] + + + + + /workflow_sync/diff: + get: + operationId: crc.api.workflow_sync.get_changed_workflows + summary: Provides a list of workflow that differ from remote and if it is new or not + security : + - ApiKeyAuth : [] + # in the endpoint + parameters: + - name: remote + in: query + required: true + description: The remote endpoint + schema: + type: string + tags: + - Workflow Sync API + responses: + '200': + description: An array of workflow specs, with last touched date and which one is most recent. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/WorkflowSpecDiffList" + + /workflow_sync/{workflow_spec_id}/spec: + parameters: + - name: workflow_spec_id + in: path + required: false + description: The unique id of an existing workflow specification to modify. + schema: + type: string + get: + operationId: crc.api.workflow_sync.get_sync_workflow_specification + summary: Returns a single workflow specification + security: + - ApiKeyAuth: [] + tags: + - Workflow Sync API + responses: + '200': + description: Workflow specification. + content: + application/json: + schema: + $ref: "#/components/schemas/WorkflowSpec" + + + /workflow_sync/{workflow_spec_id}/files: + get: + operationId: crc.api.workflow_sync.get_workflow_spec_files + summary: Provides a list of files for a workflow spec on this machine. + security : + - ApiKeyAuth : [] + parameters: + - name: workflow_spec_id + in: path + required: true + description: The workflow_spec id + schema: + type: string + + tags: + - Workflow Sync API + responses: + '200': + description: An array of files for a workflow spec on the local system, with details. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/WorkflowSpecFilesList" + + /workflow_sync/{workflow_spec_id}/files/sync: + get: + operationId: crc.api.workflow_sync.sync_changed_files + summary: Syncs files from a workflow on a remote system and provides a list of files that were updated + security : + - ApiKeyAuth : [] + parameters: + - name: workflow_spec_id + in: path + required: true + description: The workflow_spec id + schema: + type: string + - name: remote + in: query + required: true + description: The remote endpoint + schema: + type: string + + tags: + - Workflow Sync API + responses: + '200': + description: A list of files that were synced for the workflow. + content: + application/json: + schema: + type: array + items: + type : string + example : ["data_security_plan.dmn",'some_other_file.xml'] + + + /workflow_sync/{workflow_spec_id}/files/diff: + get: + operationId: crc.api.workflow_sync.get_changed_files + summary: Provides a list of files for a workflow specs that differ from remote and their signature. + security : + - ApiKeyAuth : [] + + parameters: + - name: workflow_spec_id + in: path + required: true + description: The workflow_spec id + schema: + type: string + - name: remote + in: query + required: true + description: The remote endpoint + schema: + type: string + + tags: + - Workflow Sync API + responses: + '200': + description: An array of files that are different from remote, with last touched date and file signature. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/WorkflowSpecFilesDiff" + + + /workflow_sync/all: + get: + operationId: crc.api.workflow_sync.get_all_spec_state + summary: Provides a list of workflow specs, last update date and thumbprint + security: + - ApiKeyAuth : [] + + tags: + - Workflow Sync API + responses: + '200': + description: An array of workflow specs, with last touched date and file signature. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/WorkflowSpecAll" + + /study/all: get: operationId: crc.api.study.all_studies @@ -474,6 +668,30 @@ paths: responses: '204': description: The file has been removed. + /file/{md5_hash}/hash_data: + parameters: + - name: md5_hash + in: path + required: true + description: The md5 hash of the file requested + schema: + type: string + get: + operationId: crc.api.file.get_file_data_by_hash + summary: Returns only the file contents + security: + - ApiKeyAuth: [] + tags: + - Files + responses: + '200': + description: Returns the actual file + content: + application/octet-stream: + schema: + type: string + format: binary + example: '' /file/{file_id}/data: parameters: - name: file_id @@ -1154,6 +1372,12 @@ components: scheme: bearer bearerFormat: JWT x-bearerInfoFunc: crc.api.user.verify_token_admin + ApiKeyAuth : + type : apiKey + in : header + name : X-CR-API-KEY + x-apikeyInfoFunc: crc.api.workflow_sync.verify_token + schemas: User: properties: @@ -1177,6 +1401,92 @@ components: properties: id: type: string + WorkflowSpecDiffList: + properties: + workflow_spec_id: + type: string + example : top_level_workflow + date_created : + type: string + example : 2020-12-09 16:55:12.951500+00:00 + location : + type : string + example : remote + new : + type : boolean + example : false + WorkflowSpecFilesList: + properties: + file_model_id: + type : integer + example : 171 + workflow_spec_id : + type: string + example : top_level_workflow + filename : + type: string + example : data_security_plan.dmn + date_created : + type: string + example : 2020-12-01 13:58:12.420333+00:00 + type: + type : string + example : dmn + primary : + type : boolean + example : false + content_type: + type: string + example : text/xml + primary_process_id: + type : string + example : null + md5_hash: + type: string + example: f12e2bbd-a20c-673b-ccb8-a8a1ea9c5b7b + + + WorkflowSpecFilesDiff: + properties: + filename : + type: string + example : data_security_plan.dmn + date_created : + type: string + example : 2020-12-01 13:58:12.420333+00:00 + type: + type : string + example : dmn + primary : + type : boolean + example : false + content_type: + type: string + example : text/xml + primary_process_id: + type : string + example : null + md5_hash: + type: string + example: f12e2bbd-a20c-673b-ccb8-a8a1ea9c5b7b + location: + type : string + example : remote + new: + type: boolean + example : false + + WorkflowSpecAll: + properties: + workflow_spec_id : + type: string + example : acaf1258-43b4-437e-8846-f612afa66811 + date_created : + type: string + example : 2020-12-01 13:58:12.420333+00:00 + md5_hash: + type: string + example: c30fd597f21715018eab12f97f9d4956 Study: properties: id: diff --git a/crc/api/file.py b/crc/api/file.py index 5cf54221..861f9f04 100644 --- a/crc/api/file.py +++ b/crc/api/file.py @@ -6,7 +6,7 @@ from flask import send_file from crc import session from crc.api.common import ApiError -from crc.models.file import FileSchema, FileModel, File, FileModelSchema +from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel from crc.models.workflow import WorkflowSpecModel from crc.services.file_service import FileService @@ -99,6 +99,9 @@ def update_file_data(file_id): file_model = FileService.update_file(file_model, file.stream.read(), file.content_type) return FileSchema().dump(to_file_api(file_model)) +def get_file_data_by_hash(md5_hash): + filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first() + return get_file_data(filedatamodel.file_model_id,version=filedatamodel.version) def get_file_data(file_id, version=None): file_data = FileService.get_file_data(file_id, version) diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py new file mode 100644 index 00000000..68cd9fd2 --- /dev/null +++ b/crc/api/workflow_sync.py @@ -0,0 +1,313 @@ +import hashlib +import json +import pandas as pd +import requests +from crc import session, app +from crc.api.common import ApiError +from crc.models.file import FileModel, FileDataModel +from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel +from crc.services.file_service import FileService +from crc.services.workflow_sync import WorkflowSyncService +from crc.api.workflow import get_workflow_specification + + +def get_sync_workflow_specification(workflow_spec_id): + return get_workflow_specification(workflow_spec_id) + +def join_uuids(uuids): + """Joins a pandas Series of uuids and combines them in one hash""" + combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always + # in the same order + return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes + +def verify_token(token, required_scopes): + if token == app.config['API_TOKEN']: + return {'scope':['any']} + else: + raise ApiError("permission_denied", "API Token information is not correct") + + +def get_changed_workflows(remote,as_df=False): + """ + gets a remote endpoint - gets the workflows and then + determines what workflows are different from the remote endpoint + """ + + remote_workflows_list = WorkflowSyncService.get_all_remote_workflows(remote) + remote_workflows = pd.DataFrame(remote_workflows_list) + + # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index + local = get_all_spec_state_dataframe().reset_index() + + # merge these on workflow spec id and hash - this will + # make two different date columns date_x and date_y + different = remote_workflows.merge(local, + right_on=['workflow_spec_id','md5_hash'], + left_on=['workflow_spec_id','md5_hash'], + how = 'outer' , + indicator=True).loc[lambda x : x['_merge']!='both'] + if len(different)==0: + return [] + # each line has a tag on it - if was in the left or the right, + # label it so we know if that was on the remote or local machine + different.loc[different['_merge']=='left_only','location'] = 'remote' + different.loc[different['_merge']=='right_only','location'] = 'local' + + # this takes the different date_created_x and date-created_y columns and + # combines them back into one date_created column + index = different['date_created_x'].isnull() + different.loc[index,'date_created_x'] = different[index]['date_created_y'] + different = different[['workflow_spec_id','date_created_x','location']].copy() + different.columns=['workflow_spec_id','date_created','location'] + + # our different list will have multiple entries for a workflow if there is a version on either side + # we want to grab the most recent one, so we sort and grab the most recent one for each workflow + changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first() + + # get an exclusive or list of workflow ids - that is we want lists of files that are + # on one machine or the other, but not both + remote_spec_ids = remote_workflows[['workflow_spec_id']] + local_spec_ids = local[['workflow_spec_id']] + left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])] + right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])] + + # flag files as new that are only on the remote box and remove the files that are only on the local box + changedfiles['new'] = False + changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True + output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])] + + # return the list as a dict, let swagger convert it to json + if as_df: + return output + else: + return output.reset_index().to_dict(orient='records') + + +def sync_all_changed_workflows(remote): + + workflowsdf = get_changed_workflows(remote,as_df=True) + if len(workflowsdf) ==0: + return [] + workflows = workflowsdf.reset_index().to_dict(orient='records') + for workflow in workflows: + sync_changed_files(remote,workflow['workflow_spec_id']) + return [x['workflow_spec_id'] for x in workflows] + + +def sync_changed_files(remote,workflow_spec_id): + # make sure that spec is local before syncing files + + specdict = WorkflowSyncService.get_remote_workflow_spec(remote,workflow_spec_id) + + localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first() + if localspec is None: + localspec = WorkflowSpecModel() + localspec.id = workflow_spec_id + if specdict['category'] == None: + localspec.category = None + else: + localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name + == specdict['category']['name']).first() + if localcategory == None: + #category doesn't exist - lets make it + localcategory = WorkflowSpecCategoryModel() + localcategory.name = specdict['category']['name'] + localcategory.display_name = specdict['category']['display_name'] + localcategory.display_order = specdict['category']['display_order'] + session.add(localcategory) + localspec.category = localcategory + + localspec.display_order = specdict['display_order'] + localspec.display_name = specdict['display_name'] + localspec.name = specdict['name'] + localspec.description = specdict['description'] + session.add(localspec) + + changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True) + if len(changedfiles)==0: + return [] + updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))] + updatefiles = updatefiles.reset_index().to_dict(orient='records') + + deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))] + deletefiles = deletefiles.reset_index().to_dict(orient='records') + + for delfile in deletefiles: + currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, + FileModel.name == delfile['filename']).first() + + # it is more appropriate to archive the file than delete + # due to the fact that we might have workflows that are using the + # file data + currentfile.archived = True + session.add(currentfile) + + for updatefile in updatefiles: + currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, + FileModel.name == updatefile['filename']).first() + if not currentfile: + currentfile = FileModel() + currentfile.name = updatefile['filename'] + currentfile.workflow_spec_id = workflow_spec_id + + currentfile.date_created = updatefile['date_created'] + currentfile.type = updatefile['type'] + currentfile.primary = updatefile['primary'] + currentfile.content_type = updatefile['content_type'] + currentfile.primary_process_id = updatefile['primary_process_id'] + session.add(currentfile) + content = WorkflowSyncService.get_remote_file_by_hash(remote,updatefile['md5_hash']) + FileService.update_file(currentfile,content,updatefile['type']) + session.commit() + return [x['filename'] for x in updatefiles] + + +def get_changed_files(remote,workflow_spec_id,as_df=False): + """ + gets a remote endpoint - gets the files for a workflow_spec on both + local and remote and determines what files have been change and returns a list of those + files + """ + remote_file_list = WorkflowSyncService.get_remote_workflow_spec_files(remote,workflow_spec_id) + remote_files = pd.DataFrame(remote_file_list) + # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index + local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index() + local['md5_hash'] = local['md5_hash'].astype('str') + remote_files['md5_hash'] = remote_files['md5_hash'].astype('str') + + different = remote_files.merge(local, + right_on=['filename','md5_hash'], + left_on=['filename','md5_hash'], + how = 'outer' , + indicator=True).loc[lambda x : x['_merge']!='both'] + if len(different) == 0: + if as_df: + return different + else: + return [] + # each line has a tag on it - if was in the left or the right, + # label it so we know if that was on the remote or local machine + different.loc[different['_merge']=='left_only','location'] = 'remote' + different.loc[different['_merge']=='right_only','location'] = 'local' + + # this takes the different date_created_x and date-created_y columns and + # combines them back into one date_created column + dualfields = ['date_created','type','primary','content_type','primary_process_id'] + for merge in dualfields: + index = different[merge+'_x'].isnull() + different.loc[index,merge+'_x'] = different[index][merge+'_y'] + + fieldlist = [fld+'_x' for fld in dualfields] + different = different[ fieldlist + ['md5_hash','filename','location']].copy() + + different.columns=dualfields+['md5_hash','filename','location'] + # our different list will have multiple entries for a workflow if there is a version on either side + # we want to grab the most recent one, so we sort and grab the most recent one for each workflow + changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first() + + # get an exclusive or list of workflow ids - that is we want lists of files that are + # on one machine or the other, but not both + remote_spec_ids = remote_files[['filename']] + local_spec_ids = local[['filename']] + left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])] + right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])] + changedfiles['new'] = False + changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True + changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True + changedfiles = changedfiles.replace({pd.np.nan: None}) + # return the list as a dict, let swagger convert it to json + if as_df: + return changedfiles + else: + return changedfiles.reset_index().to_dict(orient='records') + + + +def get_all_spec_state(): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + Convert into a dict list from a dataframe + """ + df = get_all_spec_state_dataframe() + return df.reset_index().to_dict(orient='records') + + +def get_workflow_spec_files(workflow_spec_id): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + Convert into a dict list from a dataframe + """ + df = get_workflow_spec_files_dataframe(workflow_spec_id) + return df.reset_index().to_dict(orient='records') + + +def get_workflow_spec_files_dataframe(workflowid): + """ + Return a list of all files for a workflow_spec along with last updated date and a + hash so we can determine file differences for a changed workflow on a box. + Return a dataframe + """ + x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid) + # there might be a cleaner way of getting a data frome from some of the + # fields in the ORM - but this works OK + filelist = [] + for file in x: + filelist.append({'file_model_id':file.file_model_id, + 'workflow_spec_id': file.file_model.workflow_spec_id, + 'md5_hash':file.md5_hash, + 'filename':file.file_model.name, + 'type':file.file_model.type.name, + 'primary':file.file_model.primary, + 'content_type':file.file_model.content_type, + 'primary_process_id':file.file_model.primary_process_id, + 'date_created':file.date_created}) + if len(filelist) == 0: + return pd.DataFrame(columns=['file_model_id', + 'workflow_spec_id', + 'md5_hash', + 'filename', + 'type', + 'primary', + 'content_type', + 'primary_process_id', + 'date_created']) + df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last() + df['date_created'] = df['date_created'].astype('str') + return df + + + +def get_all_spec_state_dataframe(): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + Return a dataframe + """ + x = session.query(FileDataModel).join(FileModel) + # there might be a cleaner way of getting a data frome from some of the + # fields in the ORM - but this works OK + filelist = [] + for file in x: + filelist.append({'file_model_id':file.file_model_id, + 'workflow_spec_id': file.file_model.workflow_spec_id, + 'md5_hash':file.md5_hash, + 'filename':file.file_model.name, + 'date_created':file.date_created}) + df = pd.DataFrame(filelist) + + # get a distinct list of file_model_id's with the most recent file_data retained + df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy() + + # take that list and then group by workflow_spec and retain the most recently touched file + # and make a consolidated hash of the md5_checksums - this acts as a 'thumbprint' for each + # workflow spec + df = df.groupby('workflow_spec_id').agg({'date_created':'max', + 'md5_hash':join_uuids}).copy() + # get only the columns we are really interested in returning + df = df[['date_created','md5_hash']].copy() + # convert dates to string + df['date_created'] = df['date_created'].astype('str') + return df + diff --git a/crc/models/workflow.py b/crc/models/workflow.py index 718dfccf..0da32aec 100644 --- a/crc/models/workflow.py +++ b/crc/models/workflow.py @@ -69,7 +69,7 @@ class WorkflowStatus(enum.Enum): class WorkflowSpecDependencyFile(db.Model): - """Connects a workflow to the version of the specification files it depends on to execute""" + """Connects to a workflow to test the version of the specification files it depends on to execute""" file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True) workflow_id = db.Column(db.Integer, db.ForeignKey("workflow.id"), primary_key=True) diff --git a/crc/services/workflow_sync.py b/crc/services/workflow_sync.py new file mode 100644 index 00000000..26796413 --- /dev/null +++ b/crc/services/workflow_sync.py @@ -0,0 +1,52 @@ +import json +from json import JSONDecodeError +from typing import List, Optional + +import requests + +from crc import app +from crc.api.common import ApiError + + +class WorkflowSyncService(object): + + @staticmethod + def get_remote_file_by_hash(remote,md5_hash): + url = remote+'/v1.0/file/'+md5_hash+'/hash_data' + return WorkflowSyncService.__make_request(url,return_contents=True) + + @staticmethod + def get_remote_workflow_spec_files(remote,workflow_spec_id): + url = remote+'/v1.0/workflow_sync/'+workflow_spec_id+'/files' + return WorkflowSyncService.__make_request(url) + + @staticmethod + def get_remote_workflow_spec(remote, workflow_spec_id): + """ + this just gets the details of a workflow spec from the + remote side. + """ + url = remote+'/v1.0/workflow-sync/'+workflow_spec_id+'/spec' + return WorkflowSyncService.__make_request(url) + + @staticmethod + def get_all_remote_workflows(remote): + url = remote + '/v1.0/workflow_sync/all' + return WorkflowSyncService.__make_request(url) + + @staticmethod + def __make_request(url,return_contents=False): + try: + response = requests.get(url,headers={'X-CR-API-KEY':app.config['API_TOKEN']}) + except: + raise ApiError("workflow_sync_error",response.text) + if response.ok and response.text: + if return_contents: + return response.content + else: + return json.loads(response.text) + else: + raise ApiError("workflow_sync_error", + "Received an invalid response from the protocol builder (status %s): %s when calling " + "url '%s'." % + (response.status_code, response.text, url)) diff --git a/deploy/requirements.txt b/deploy/requirements.txt index 420a888f..b9ab7b4c 100644 --- a/deploy/requirements.txt +++ b/deploy/requirements.txt @@ -1,75 +1,76 @@ alabaster==0.7.12 -alembic==1.4.2 -amqp==2.5.2 +alembic==1.4.3 aniso8601==8.0.0 -attrs==19.3.0 -babel==2.8.0 -bcrypt==3.1.7 -beautifulsoup4==4.9.1 -billiard==3.6.3.0 +attrs==20.3.0 +babel==2.9.0 +bcrypt==3.2.0 +beautifulsoup4==4.9.3 blinker==1.4 -celery==4.4.2 -certifi==2020.4.5.1 -cffi==1.14.0 +certifi==2020.11.8 +cffi==1.14.4 chardet==3.0.4 click==7.1.2 -clickclick==1.2.2 +clickclick==20.10.2 commonmark==0.9.1 -configparser==5.0.0 connexion==2.7.0 -coverage==5.1 +coverage==5.3 +deprecated==1.2.10 docutils==0.16 -docxtpl==0.9.2 +docxtpl==0.11.2 et-xmlfile==1.0.1 flask==1.1.2 +flask-admin==1.5.7 flask-bcrypt==0.7.1 -flask-cors==3.0.8 -flask-marshmallow==0.12.0 +flask-cors==3.0.9 +flask-mail==0.9.1 +flask-marshmallow==0.14.0 flask-migrate==2.5.3 flask-restful==0.3.8 -flask-sqlalchemy==2.4.1 -flask-sso==0.4.0 -future==0.18.2 -httpretty==1.0.2 -idna==2.9 +flask-sqlalchemy==2.4.4 +gunicorn==20.0.4 +httpretty==1.0.3 +idna==2.10 imagesize==1.2.0 -importlib-metadata==1.6.0 -inflection==0.4.0 +inflection==0.5.1 itsdangerous==1.1.0 jdcal==1.4.1 jinja2==2.11.2 jsonschema==3.2.0 -kombu==4.6.8 -ldap3==2.7 -lxml==4.5.1 -mako==1.1.2 +ldap3==2.8.1 +lxml==4.6.2 +mako==1.1.3 +markdown==3.3.3 markupsafe==1.1.1 -marshmallow==3.6.0 +marshmallow==3.9.1 marshmallow-enum==1.5.1 -marshmallow-sqlalchemy==0.23.0 -numpy==1.18.4 -openapi-spec-validator==0.2.8 -openpyxl==3.0.3 +marshmallow-sqlalchemy==0.24.1 +numpy==1.19.4 +openapi-spec-validator==0.2.9 +openpyxl==3.0.5 packaging==20.4 -pandas==1.0.3 -psycopg2-binary==2.8.5 +pandas==1.1.4 +psycopg2-binary==2.8.6 pyasn1==0.4.8 pycparser==2.20 -pygments==2.6.1 +pygithub==1.53 +pygments==2.7.2 pyjwt==1.7.1 pyparsing==2.4.7 -pyrsistent==0.16.0 +pyrsistent==0.17.3 +python-box==5.2.0 python-dateutil==2.8.1 python-docx==0.8.10 python-editor==1.0.4 -pytz==2020.1 +python-levenshtein==0.12.0 +pytz==2020.4 pyyaml==5.3.1 recommonmark==0.6.0 -requests==2.23.0 -six==1.14.0 +requests==2.25.0 +sentry-sdk==0.14.4 +six==1.15.0 snowballstemmer==2.0.0 soupsieve==2.0.1 -sphinx==3.0.3 +sphinx==3.3.1 sphinxcontrib-applehelp==1.0.2 sphinxcontrib-devhelp==1.0.2 sphinxcontrib-htmlhelp==1.0.3 @@ -77,14 +78,14 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.4 spiffworkflow -sqlalchemy==1.3.17 -swagger-ui-bundle==0.0.6 -urllib3==1.25.9 -vine==1.3.0 -waitress==1.4.3 +sqlalchemy==1.3.20 +swagger-ui-bundle==0.0.8 +urllib3==1.26.2 +waitress==1.4.4 webob==1.8.6 webtest==2.0.35 werkzeug==1.0.1 +wrapt==1.12.1 +wtforms==2.3.3 xlrd==1.2.0 -xlsxwriter==1.2.8 -zipp==3.1.0 +xlsxwriter==1.3.7 diff --git a/example_data.py b/example_data.py index 58d19c65..f3918846 100644 --- a/example_data.py +++ b/example_data.py @@ -66,6 +66,7 @@ class ExampleDataLoader: display_order=6 ), ] + db.session.execute("select setval('workflow_spec_category_id_seq',7);") db.session.add_all(categories) db.session.commit() diff --git a/tests/base_test.py b/tests/base_test.py index af899917..4f2306a2 100644 --- a/tests/base_test.py +++ b/tests/base_test.py @@ -204,6 +204,14 @@ class BaseTest(unittest.TestCase): data = myfile.read() return data + @staticmethod + def workflow_sync_response(file_name): + filepath = os.path.join(app.root_path, '..', 'tests', 'data', 'workflow_sync_responses', file_name) + with open(filepath, 'rb') as myfile: + data = myfile.read() + return data + + def assert_success(self, rv, msg=""): try: data = json.loads(rv.get_data(as_text=True)) diff --git a/tests/data/random_fact/random_fact2.bpmn b/tests/data/random_fact/random_fact2.bpmn new file mode 100644 index 00000000..bd0a9ef5 --- /dev/null +++ b/tests/data/random_fact/random_fact2.bpmn @@ -0,0 +1,200 @@ + + + + + SequenceFlow_0c7wlth + + + # h1 Heading 8-) +## h2 Heading +### h3 Heading +#### h4 Heading +##### h5 Heading +###### h6 Heading + + +## Horizontal Rules + +___ + +--- + +*** + + +## Typographic replacements + +"double quotes" and 'single quotes' + + +## Emphasis + +**This is bold text** + +__This is bold text__ + +*This is italic text* + +_This is italic text_ + +~~Strikethrough~~ + + +## Blockquotes + + +> Blockquotes can also be nested... +>> ...by using additional greater-than signs right next to each other... +> > > ...or with spaces between arrows. + + +## Lists + +Unordered + ++ Create a list by starting a line with `+`, `-`, or `*` ++ Sub-lists are made by indenting 2 spaces: + - Marker character change forces new list start: + * Ac tristique libero volutpat at + + Facilisis in pretium nisl aliquet + - Nulla volutpat aliquam velit ++ Very easy! + +Ordered + +1. Lorem ipsum dolor sit amet +2. Consectetur adipiscing elit +3. Integer molestie lorem at massa + + +1. You can use sequential numbers... +1. ...or keep all the numbers as `1.` + +Start numbering with offset: + +57. foo +1. bar + +## Tables + +| Option | Description | +| ------ | ----------- | +| data | path to data files to supply the data that will be passed into templates. | +| engine | engine to be used for processing templates. Handlebars is the default. | +| ext | extension to be used for dest files. | + +Right aligned columns + +| Option | Description | +| ------:| -----------:| +| data | path to data files to supply the data that will be passed into templates. | +| engine | engine to be used for processing templates. Handlebars is the default. | +| ext | extension to be used for dest files. | + + +## Links + +[link text](http://dev.nodeca.com) + +[link with title](http://nodeca.github.io/pica/demo/ "title text!") + +Autoconverted link https://github.com/nodeca/pica (enable linkify to see) + + +## Images + +![Minion](https://octodex.github.com/images/minion.png) +![Stormtroopocat](https://octodex.github.com/images/stormtroopocat.jpg "The Stormtroopocat") + + + + + + + + + + + + + + + + SequenceFlow_0c7wlth + SequenceFlow_0641sh6 + + + + + + + + + SequenceFlow_0641sh6 + SequenceFlow_0t29gjo + FactService = fact_service() + + + # Great Job! +You have completed the random fact generator. +You chose to receive a random fact of the type: "{{type}}" + +Your random fact is: +{{details}} + SequenceFlow_0t29gjo + + + + + + User sets the Fact.type to cat, norris, or buzzword + + + + Makes an API  call to get a fact of the required type. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/workflow_sync_responses/random_fact2.bpmn b/tests/data/workflow_sync_responses/random_fact2.bpmn new file mode 100644 index 00000000..f502a238 --- /dev/null +++ b/tests/data/workflow_sync_responses/random_fact2.bpmn @@ -0,0 +1,104 @@ + + + + + SequenceFlow_0c7wlth + + + # h1 Heading 8-) + NEW_FILE_ADDED + +![Stormtroopocat](https://octodex.github.com/images/stormtroopocat.jpg "The Stormtroopocat") + + + + + + + + + + + + + + + + SequenceFlow_0c7wlth + SequenceFlow_0641sh6 + + + + + + + + + SequenceFlow_0641sh6 + SequenceFlow_0t29gjo + FactService = fact_service() + + + # Great Job! +You have completed the random fact generator. +You chose to receive a random fact of the type: "{{type}}" + +Your random fact is: +{{details}} + SequenceFlow_0t29gjo + + + + + + User sets the Fact.type to cat, norris, or buzzword + + + + Makes an API  call to get a fact of the required type. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/files/test_files_api.py b/tests/files/test_files_api.py index 02feb8d0..958989d1 100644 --- a/tests/files/test_files_api.py +++ b/tests/files/test_files_api.py @@ -46,7 +46,7 @@ class TestFilesApi(BaseTest): content_type="application/json", headers=self.logged_in_headers()) self.assert_success(rv) json_data = json.loads(rv.get_data(as_text=True)) - self.assertEqual(2, len(json_data)) + self.assertEqual(3, len(json_data)) def test_create_file(self): diff --git a/tests/study/test_study_api.py b/tests/study/test_study_api.py index e7d33a9d..df734086 100644 --- a/tests/study/test_study_api.py +++ b/tests/study/test_study_api.py @@ -165,7 +165,7 @@ class TestStudyApi(BaseTest): self.assertEqual(study_event.comment, update_comment) self.assertEqual(study_event.user_uid, self.test_uid) - @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators') # mock_studies + @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators') # mock_investigators @patch('crc.services.protocol_builder.ProtocolBuilderService.get_required_docs') # mock_docs @patch('crc.services.protocol_builder.ProtocolBuilderService.get_study_details') # mock_details @patch('crc.services.protocol_builder.ProtocolBuilderService.get_studies') # mock_studies diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py new file mode 100644 index 00000000..eb0cd1b5 --- /dev/null +++ b/tests/test_workflow_sync.py @@ -0,0 +1,156 @@ +from unittest.mock import patch + +from crc import db +from tests.base_test import BaseTest +from crc.api.workflow_sync import get_all_spec_state, \ + get_changed_workflows, \ + get_workflow_spec_files, \ + get_changed_files, \ + get_workflow_specification, \ + sync_changed_files +from crc.models.workflow import WorkflowSpecModel +from datetime import datetime +from crc.services.file_service import FileService + + + +class TestWorkflowSync(BaseTest): + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') + def test_get_no_changes(self, mock_get): + self.load_example_data() + othersys = get_all_spec_state() + mock_get.return_value = othersys + response = get_changed_workflows('localhost:0000') # not actually used due to mock + self.assertIsNotNone(response) + self.assertEqual(response,[]) + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') + def test_remote_workflow_change(self, mock_get): + self.load_example_data() + othersys = get_all_spec_state() + othersys[1]['date_created'] = str(datetime.now()) + othersys[1]['md5_hash'] = '12345' + mock_get.return_value = othersys + response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0]['workflow_spec_id'], 'random_fact') + self.assertEqual(response[0]['location'], 'remote') + self.assertEqual(response[0]['new'], False) + + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') + def test_remote_workflow_has_new(self, mock_get): + self.load_example_data() + othersys = get_all_spec_state() + othersys.append({'workflow_spec_id':'my_new_workflow', + 'date_created':str(datetime.now()), + 'md5_hash': '12345'}) + mock_get.return_value = othersys + response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0]['workflow_spec_id'],'my_new_workflow') + self.assertEqual(response[0]['location'], 'remote') + self.assertEqual(response[0]['new'], True) + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') + def test_local_workflow_has_new(self, mock_get): + self.load_example_data() + + othersys = get_all_spec_state() + mock_get.return_value = othersys + wf_spec = WorkflowSpecModel() + wf_spec.id = 'abcdefg' + wf_spec.display_name = 'New Workflow - Yum!!' + wf_spec.name = 'my_new_workflow' + wf_spec.description = 'yep - its a new workflow' + wf_spec.category_id = 0 + wf_spec.display_order = 0 + db.session.add(wf_spec) + db.session.commit() + FileService.add_workflow_spec_file(wf_spec,'dummyfile.txt','text',b'this is a test') + # after setting up the test - I realized that this doesn't return anything for + # a workflow that is new locally - it just returns nothing + response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(response,[]) + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + def test_file_differences(self, mock_get): + self.load_example_data() + othersys = get_workflow_spec_files('random_fact') + othersys[1]['date_created'] = str(datetime.now()) + othersys[1]['md5_hash'] = '12345' + mock_get.return_value = othersys + response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0]['filename'], 'random_fact2.bpmn') + self.assertEqual(response[0]['location'], 'remote') + self.assertEqual(response[0]['new'], False) + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + def test_file_differences(self, mock_get): + self.load_example_data() + othersys = get_workflow_spec_files('random_fact') + othersys[1]['date_created'] = str(datetime.now()) + othersys[1]['md5_hash'] = '12345' + mock_get.return_value = othersys + response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0]['filename'], 'random_fact2.bpmn') + self.assertEqual(response[0]['location'], 'remote') + self.assertEqual(response[0]['new'], False) + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash') + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec') + def test_file_differences(self, workflow_mock, spec_files_mock, file_data_mock): + self.load_example_data() + remote_workflow = get_workflow_specification('random_fact') + self.assertEqual(remote_workflow['display_name'],'Random Fact') + remote_workflow['description'] = 'This Workflow came from Remote' + remote_workflow['display_name'] = 'Remote Workflow' + workflow_mock.return_value = remote_workflow + othersys = get_workflow_spec_files('random_fact') + othersys[1]['date_created'] = str(datetime.now()) + othersys[1]['md5_hash'] = '12345' + spec_files_mock.return_value = othersys + file_data_mock.return_value = self.workflow_sync_response('random_fact2.bpmn') + response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0], 'random_fact2.bpmn') + files = FileService.get_spec_data_files('random_fact') + md5sums = [str(f.md5_hash) for f in files] + self.assertEqual('21bb6f9e-0af7-0ab2-0fc7-ec0f94787e58' in md5sums, True) + new_local_workflow = get_workflow_specification('random_fact') + self.assertEqual(new_local_workflow['display_name'],'Remote Workflow') + + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec') + def test_file_deleted(self, workflow_mock, spec_files_mock): + self.load_example_data() + remote_workflow = get_workflow_specification('random_fact') + workflow_mock.return_value = remote_workflow + othersys = get_workflow_spec_files('random_fact') + del(othersys[1]) + spec_files_mock.return_value = othersys + response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock + self.assertIsNotNone(response) + # when we delete a local file, we do not return that it was deleted - just + # a list of updated files. We may want to change this in the future. + self.assertEqual(len(response),0) + files = FileService.get_spec_data_files('random_fact') + self.assertEqual(len(files),1) +