From 92aa1b971dfa0c7a05dd19149a01d33c9923ed0b Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Wed, 2 Dec 2020 16:04:00 -0500 Subject: [PATCH 01/35] commit before removing big long SQL --- crc/api.yml | 17 ++++++++++++ crc/api/workflow.py | 67 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index ad22677e..44138510 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -100,6 +100,23 @@ paths: type: array items: $ref: "#/components/schemas/Study" + /workflow_spec/all: + get: + operationId: crc.api.workflow.get_all_spec_state + summary: Provides a list of workflow specs and their signature + tags: + - Workflow Spec States + responses: + '200': + description: An array of workflow specs, with last touched date and file signature. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/Study" + + /study/all: get: operationId: crc.api.study.all_studies diff --git a/crc/api/workflow.py b/crc/api/workflow.py index a148328a..37a5059a 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -1,11 +1,15 @@ +import hashlib +import json import uuid +from hashlib import md5 +import pandas as pd from SpiffWorkflow.util.deep_merge import DeepMerge from flask import g -from crc import session, app +from crc import session, db from crc.api.common import ApiError, ApiErrorSchema from crc.models.api_models import WorkflowApi, WorkflowApiSchema, NavigationItem, NavigationItemSchema -from crc.models.file import FileModel, LookupDataSchema +from crc.models.file import FileModel, LookupDataSchema, FileDataModel from crc.models.study import StudyModel, WorkflowMetadata from crc.models.task_event import TaskEventModel, TaskEventModelSchema, TaskEvent, TaskEventSchema from crc.models.workflow import WorkflowModel, WorkflowSpecModelSchema, WorkflowSpecModel, WorkflowSpecCategoryModel, \ @@ -257,3 +261,62 @@ def _verify_user_and_role(processor, spiff_task): raise ApiError.from_task("permission_denied", f"This task must be completed by '{allowed_users}', " f"but you are {user.uid}", spiff_task) +def join_uuids(uuids): + """Joins a pandas Series of uuids and combines them in one hash""" + combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always + # in the same order + return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes + +def get_all_spec_state(): +# big_nasty_sql = """ +# +# with first as ( +# select file_model_id,version,max(version) over (partition by file_model_id) maxversion,f +# .name, +# workflow_spec_id,date_created,md5_hash +# from file f join file_data fd on fd. +# file_model_id = f.id +# where workflow_spec_id is not null +# order by workflow_spec_id,file_model_id,version desc), +# second as (select name,workflow_spec_id,date_created,md5_hash from first +# where maxversion = version) +# select row_to_json(t) workflow_spec_state +# from ( +# select workflow_spec_id,max(date_created) last_update,md5(max( +# ( +# select array_to_json(array_agg(row_to_json(d))) +# from (select name,md5_hash from second where second.workflow_spec_id = second2.workflow_spec_id) d +# +# )::text)) as files_hash +# from second as second2 +# group by 1 +# ) t +# """ +# json_results = [] +# result = db.engine.execute(big_nasty_sql) + x = session.query(FileDataModel).join(FileModel) + + # there might be a cleaner way of getting a data frome from some of the + # fields in the ORM - but this works OK + filelist = [] + for file in x: + filelist.append({'file_model_id':file.file_model_id, + 'workflow_spec_id': file.file_model.workflow_spec_id, + 'md5_hash':file.md5_hash, + 'filename':file.file_model.name, + 'date_created':file.date_created}) + df = pd.DataFrame(filelist) + + # get a distinct list of file_model_id's with the most recent file_data retained + df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy() + + # take that list and then group by workflow_spec and retain the most recently touched file + # and make a consolidated hash of the md5_checksums - this acts as a 'thumbprint' for each + # workflow spec + df = df.groupby('workflow_spec_id').agg({'date_created':'max', + 'md5_hash':join_uuids}).copy() + + df = df.reset_index()[['workflow_spec_id','date_created','md5_hash']].copy() + + return df.to_json(orient='records') + From 0e8913434abe61eddc21e92f4485bd92b4e904b0 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Thu, 3 Dec 2020 08:44:15 -0500 Subject: [PATCH 02/35] refactor a bit --- crc/api/workflow.py | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 37a5059a..7f517e22 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -267,33 +267,16 @@ def join_uuids(uuids): # in the same order return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes +@cross_origin() # allow all cross origin requests so we can hit it with a dev box def get_all_spec_state(): -# big_nasty_sql = """ -# -# with first as ( -# select file_model_id,version,max(version) over (partition by file_model_id) maxversion,f -# .name, -# workflow_spec_id,date_created,md5_hash -# from file f join file_data fd on fd. -# file_model_id = f.id -# where workflow_spec_id is not null -# order by workflow_spec_id,file_model_id,version desc), -# second as (select name,workflow_spec_id,date_created,md5_hash from first -# where maxversion = version) -# select row_to_json(t) workflow_spec_state -# from ( -# select workflow_spec_id,max(date_created) last_update,md5(max( -# ( -# select array_to_json(array_agg(row_to_json(d))) -# from (select name,md5_hash from second where second.workflow_spec_id = second2.workflow_spec_id) d -# -# )::text)) as files_hash -# from second as second2 -# group by 1 -# ) t -# """ -# json_results = [] -# result = db.engine.execute(big_nasty_sql) + df = get_all_spec_state_dataframe() + return df.reset_index().to_json(orient='records') + +def get_all_spec_state_dataframe(): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + """ x = session.query(FileDataModel).join(FileModel) # there might be a cleaner way of getting a data frome from some of the @@ -315,8 +298,9 @@ def get_all_spec_state(): # workflow spec df = df.groupby('workflow_spec_id').agg({'date_created':'max', 'md5_hash':join_uuids}).copy() + df = df[['date_created','md5_hash']].copy() + df['date_created'] = df['date_created'].astype('str') - df = df.reset_index()[['workflow_spec_id','date_created','md5_hash']].copy() - return df.to_json(orient='records') + return df From bcb45a59c8e42a65dcbd93d3c861273626d854d1 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Thu, 3 Dec 2020 08:46:34 -0500 Subject: [PATCH 03/35] allow cors --- crc/api/workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 7f517e22..a5f2d3b1 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -20,7 +20,7 @@ from crc.services.study_service import StudyService from crc.services.user_service import UserService from crc.services.workflow_processor import WorkflowProcessor from crc.services.workflow_service import WorkflowService - +from flask_cors import cross_origin def all_specifications(): schema = WorkflowSpecModelSchema(many=True) @@ -267,7 +267,7 @@ def join_uuids(uuids): # in the same order return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes -@cross_origin() # allow all cross origin requests so we can hit it with a dev box +@cross_origin() # allow even dev boxes to hit this without restrictions def get_all_spec_state(): df = get_all_spec_state_dataframe() return df.reset_index().to_json(orient='records') From 0f59d3de096f2f938e601d6ef75ffc62611f5e3c Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Thu, 3 Dec 2020 14:45:57 -0500 Subject: [PATCH 04/35] add endpoint that gets a record for all changed /new workflow_specs at a remote endpoint --- crc/api.yml | 28 ++++++++++++++++++++++++++++ crc/api/workflow.py | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 44138510..959d6061 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -100,10 +100,38 @@ paths: type: array items: $ref: "#/components/schemas/Study" + /workflow_spec/diff: + get: + operationId: crc.api.workflow.get_changed_workflows + summary: Provides a list of workflow specs and their signature + security: [] # Disable security for this endpoint only - we'll sanity check + # in the endpoint + parameters: + - name: remote + in: query + required: true + description: The remote endpoint + schema: + type: string + tags: + - Workflow Spec States + responses: + '200': + description: An array of workflow specs, with last touched date and file signature. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/Study" + + /workflow_spec/all: get: operationId: crc.api.workflow.get_all_spec_state summary: Provides a list of workflow specs and their signature + security: [] # Disable security for this endpoint only - we'll sanity check + # in the endpoint tags: - Workflow Spec States responses: diff --git a/crc/api/workflow.py b/crc/api/workflow.py index a5f2d3b1..d899d4fb 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -21,6 +21,7 @@ from crc.services.user_service import UserService from crc.services.workflow_processor import WorkflowProcessor from crc.services.workflow_service import WorkflowService from flask_cors import cross_origin +import requests def all_specifications(): schema = WorkflowSpecModelSchema(many=True) @@ -267,10 +268,44 @@ def join_uuids(uuids): # in the same order return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes -@cross_origin() # allow even dev boxes to hit this without restrictions +def get_changed_workflows(remote): + """ + gets a remote endpoint - gets the workflows and then + determines what workflows are different from the remote endpoint + """ + x = requests.get('http://'+remote+'/v1.0/workflow_spec/all') + + # This is probably very and may allow cross site attacks - fix later + remote = pd.DataFrame(eval(x.text)) + local = get_all_spec_state_dataframe().reset_index() + different = remote.merge(local, right_on=['workflow_spec_id','md5_hash'], left_on=['workflow_spec_id', + 'md5_hash'], how = 'outer' , indicator=True).loc[ + lambda x : x['_merge']!='both'] + + different.loc[different['_merge']=='left_only','location'] = 'remote' + different.loc[different['_merge']=='right_only','location'] = 'local' + #changedfiles = different.copy() + index = different['date_created_x'].isnull() + different.loc[index,'date_created_x'] = different[index]['date_created_y'] + different = different[['workflow_spec_id','date_created_x','location']].copy() + different.columns=['workflow_spec_id','date_created','location'] + changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first() + remote_spec_ids = remote[['workflow_spec_id']] + local_spec_ids = local[['workflow_spec_id']] + + left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])] + right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])] + changedfiles['new'] = False + changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True + output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])] + return output.reset_index().to_dict(orient='records') + + + + def get_all_spec_state(): df = get_all_spec_state_dataframe() - return df.reset_index().to_json(orient='records') + return df.reset_index().to_dict(orient='records') def get_all_spec_state_dataframe(): """ From 10dce542ec612027ba2d2a8d995f4c48410708d0 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Thu, 3 Dec 2020 15:27:45 -0500 Subject: [PATCH 05/35] documentation change --- crc/api/workflow.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crc/api/workflow.py b/crc/api/workflow.py index d899d4fb..a65f6dda 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -304,6 +304,11 @@ def get_changed_workflows(remote): def get_all_spec_state(): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + Convert into a dict list from a dataframe + """ df = get_all_spec_state_dataframe() return df.reset_index().to_dict(orient='records') @@ -311,6 +316,7 @@ def get_all_spec_state_dataframe(): """ Return a list of all workflow specs along with last updated date and a thumbprint of all of the files that are used for that workflow_spec + Return a dataframe """ x = session.query(FileDataModel).join(FileModel) From 3e8d4ca7c942af25f5a3f8f0608417068462de5b Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Fri, 4 Dec 2020 10:23:03 -0500 Subject: [PATCH 06/35] Add some inline documentation to make the process more clear --- crc/api/workflow.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/crc/api/workflow.py b/crc/api/workflow.py index a65f6dda..726aa78e 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -273,31 +273,50 @@ def get_changed_workflows(remote): gets a remote endpoint - gets the workflows and then determines what workflows are different from the remote endpoint """ - x = requests.get('http://'+remote+'/v1.0/workflow_spec/all') + response = requests.get('http://'+remote+'/v1.0/workflow_spec/all') # This is probably very and may allow cross site attacks - fix later - remote = pd.DataFrame(eval(x.text)) + remote = pd.DataFrame(json.loads(response.text)) + # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index local = get_all_spec_state_dataframe().reset_index() - different = remote.merge(local, right_on=['workflow_spec_id','md5_hash'], left_on=['workflow_spec_id', - 'md5_hash'], how = 'outer' , indicator=True).loc[ - lambda x : x['_merge']!='both'] + # merge these on workflow spec id and hash - this will + # make two different date columns date_x and date_y + different = remote.merge(local, + right_on=['workflow_spec_id','md5_hash'], + left_on=['workflow_spec_id','md5_hash'], + how = 'outer' , + indicator=True).loc[lambda x : x['_merge']!='both'] + + # each line has a tag on it - if was in the left or the right, + # label it so we know if that was on the remote or local machine different.loc[different['_merge']=='left_only','location'] = 'remote' different.loc[different['_merge']=='right_only','location'] = 'local' - #changedfiles = different.copy() + + # this takes the different date_created_x and date-created_y columns and + # combines them back into one date_created column index = different['date_created_x'].isnull() different.loc[index,'date_created_x'] = different[index]['date_created_y'] different = different[['workflow_spec_id','date_created_x','location']].copy() different.columns=['workflow_spec_id','date_created','location'] + + # our different list will have multiple entries for a workflow if there is a version on either side + # we want to grab the most recent one, so we sort and grab the most recent one for each workflow changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first() + + # get an exclusive or list of workflow ids - that is we want lists of files that are + # on one machine or the other, but not both remote_spec_ids = remote[['workflow_spec_id']] local_spec_ids = local[['workflow_spec_id']] - left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])] right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])] + + # flag files as new that are only on the remote box and remove the files that are only on the local box changedfiles['new'] = False changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])] + + # return the list as a dict, let swagger convert it to json return output.reset_index().to_dict(orient='records') @@ -339,9 +358,9 @@ def get_all_spec_state_dataframe(): # workflow spec df = df.groupby('workflow_spec_id').agg({'date_created':'max', 'md5_hash':join_uuids}).copy() + # get only the columns we are really interested in returning df = df[['date_created','md5_hash']].copy() + # convert dates to string df['date_created'] = df['date_created'].astype('str') - - return df From d41d018fe34fa91569c5c64225c69622514e0201 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Fri, 4 Dec 2020 11:49:07 -0500 Subject: [PATCH 07/35] For a given workflow - find the files that are different from a remote endpoint for the same workflow --- crc/api.yml | 61 +++++++++++++++++++++++++++++++++- crc/api/workflow.py | 81 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 140 insertions(+), 2 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 959d6061..afeb2bc2 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -126,10 +126,69 @@ paths: $ref: "#/components/schemas/Study" + /workflow_spec/{workflow_spec_id}/files: + get: + operationId: crc.api.workflow.get_workflow_spec_files + summary: Provides a list of workflow specs and their signature + security: [] # Disable security for this endpoint only - we'll sanity check + # in the endpoint + parameters: + - name: workflow_spec_id + in: path + required: true + description: The workflow_spec id + schema: + type: string + + tags: + - Workflow Spec States + responses: + '200': + description: An array of workflow specs, with last touched date and file signature. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/Study" + + /workflow_spec/{workflow_spec_id}/files/diff: + get: + operationId: crc.api.workflow.get_changed_files + summary: Provides a list of workflow specs and their signature + security: [] # Disable security for this endpoint only - we'll sanity check + # in the endpoint + parameters: + - name: workflow_spec_id + in: path + required: true + description: The workflow_spec id + schema: + type: string + - name: remote + in: query + required: true + description: The remote endpoint + schema: + type: string + + tags: + - Workflow Spec States + responses: + '200': + description: An array of workflow specs, with last touched date and file signature. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/Study" + + /workflow_spec/all: get: operationId: crc.api.workflow.get_all_spec_state - summary: Provides a list of workflow specs and their signature + summary: Provides a list of files for a workflow spec security: [] # Disable security for this endpoint only - we'll sanity check # in the endpoint tags: diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 726aa78e..9484d183 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -320,6 +320,53 @@ def get_changed_workflows(remote): return output.reset_index().to_dict(orient='records') +def get_changed_files(remote,workflow_spec_id): + """ + gets a remote endpoint - gets the files for a workflow_spec on both + local and remote and determines what files have been change and returns a list of those + files + """ + response = requests.get('http://'+remote+'/v1.0/workflow_spec/'+workflow_spec_id+'/files') + # This is probably very and may allow cross site attacks - fix later + remote = pd.DataFrame(json.loads(response.text)) + # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index + local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index() + local['md5_hash'] = local['md5_hash'].astype('str') + different = remote.merge(local, + right_on=['filename','md5_hash'], + left_on=['filename','md5_hash'], + how = 'outer' , + indicator=True).loc[lambda x : x['_merge']!='both'] + + # each line has a tag on it - if was in the left or the right, + # label it so we know if that was on the remote or local machine + different.loc[different['_merge']=='left_only','location'] = 'remote' + different.loc[different['_merge']=='right_only','location'] = 'local' + + # this takes the different date_created_x and date-created_y columns and + # combines them back into one date_created column + index = different['date_created_x'].isnull() + different.loc[index,'date_created_x'] = different[index]['date_created_y'] + different = different[['date_created_x','filename','location']].copy() + + different.columns=['date_created','filename','location'] + # our different list will have multiple entries for a workflow if there is a version on either side + # we want to grab the most recent one, so we sort and grab the most recent one for each workflow + changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first() + + # get an exclusive or list of workflow ids - that is we want lists of files that are + # on one machine or the other, but not both + remote_spec_ids = remote[['filename']] + local_spec_ids = local[['filename']] + left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])] + right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])] + changedfiles['new'] = False + changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True + changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True + + # return the list as a dict, let swagger convert it to json + return changedfiles.reset_index().to_dict(orient='records') + def get_all_spec_state(): @@ -331,6 +378,39 @@ def get_all_spec_state(): df = get_all_spec_state_dataframe() return df.reset_index().to_dict(orient='records') + +def get_workflow_spec_files(workflow_spec_id): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + Convert into a dict list from a dataframe + """ + df = get_workflow_spec_files_dataframe(workflow_spec_id) + return df.reset_index().to_dict(orient='records') + + +def get_workflow_spec_files_dataframe(workflowid): + """ + Return a list of all files for a workflow_spec along with last updated date and a + hash so we can determine file differences for a changed workflow on a box. + Return a dataframe + """ + x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid) + # there might be a cleaner way of getting a data frome from some of the + # fields in the ORM - but this works OK + filelist = [] + for file in x: + filelist.append({'file_model_id':file.file_model_id, + 'workflow_spec_id': file.file_model.workflow_spec_id, + 'md5_hash':file.md5_hash, + 'filename':file.file_model.name, + 'date_created':file.date_created}) + df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last() + df['date_created'] = df['date_created'].astype('str') + return df + + + def get_all_spec_state_dataframe(): """ Return a list of all workflow specs along with last updated date and a @@ -338,7 +418,6 @@ def get_all_spec_state_dataframe(): Return a dataframe """ x = session.query(FileDataModel).join(FileModel) - # there might be a cleaner way of getting a data frome from some of the # fields in the ORM - but this works OK filelist = [] From cad613cf6395c5fd3fbfb03e5f6f14b205fe0294 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Fri, 4 Dec 2020 12:00:02 -0500 Subject: [PATCH 08/35] Fix problem when method is run for a workflow that is non-existant locally --- crc/api/workflow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 9484d183..983097a6 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -405,6 +405,8 @@ def get_workflow_spec_files_dataframe(workflowid): 'md5_hash':file.md5_hash, 'filename':file.file_model.name, 'date_created':file.date_created}) + if len(filelist) == 0: + return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created']) df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last() df['date_created'] = df['date_created'].astype('str') return df From 93b12a8e82659677953dbdfec38d553bd49149e9 Mon Sep 17 00:00:00 2001 From: Dan Date: Fri, 4 Dec 2020 17:56:12 -0500 Subject: [PATCH 09/35] updates using new navigation from spiff workflow's navigation branch, all tests passing. --- Pipfile | 2 +- Pipfile.lock | 49 +- crc/api/workflow.py | 8 +- crc/models/api_models.py | 33 +- crc/services/workflow_service.py | 38 +- .../irb_api_personnel/irb_api_personnel.bpmn | 610 +++++++++++------- tests/test_tasks_api.py | 49 +- tests/test_user_roles.py | 56 +- .../test_workflow_processor_multi_instance.py | 2 +- 9 files changed, 491 insertions(+), 356 deletions(-) diff --git a/Pipfile b/Pipfile index 0a498025..e49b6613 100644 --- a/Pipfile +++ b/Pipfile @@ -39,7 +39,7 @@ requests = "*" sentry-sdk = {extras = ["flask"],version = "==0.14.4"} sphinx = "*" swagger-ui-bundle = "*" -spiffworkflow = {git = "https://github.com/sartography/SpiffWorkflow.git",ref = "master"} +spiffworkflow = {git = "https://github.com/sartography/SpiffWorkflow.git",ref = "bug/navigation"} webtest = "*" werkzeug = "*" xlrd = "*" diff --git a/Pipfile.lock b/Pipfile.lock index ed6931eb..edda7a09 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "621d57ec513f24c665dd34e08ae5a19fc27784e87c55bb4d2a91a1d48a473081" + "sha256": "1e41c4486a74db3ee30b7fd4572b0cc54cfbe1bc1e6246b855748ec78db6cc1e" }, "pipfile-spec": 6, "requires": { @@ -33,10 +33,10 @@ }, "aniso8601": { "hashes": [ - "sha256:529dcb1f5f26ee0df6c0a1ee84b7b27197c3c50fc3a6321d66c544689237d072", - "sha256:c033f63d028b9a58e3ab0c2c7d0532ab4bfa7452bfc788fbfe3ddabd327b181a" + "sha256:246bf8d3611527030889e6df970878969d3a2f760ba3eb694fa1fb10e6ce53f9", + "sha256:51047d4fb51d7b8afd522b70f2d21a1b2487cbb7f7bd84ea852e9aa7808e7704" ], - "version": "==8.0.0" + "version": "==8.1.0" }, "attrs": { "hashes": [ @@ -108,12 +108,14 @@ "sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35", "sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26", "sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b", + "sha256:a5ed8c05548b54b998b9498753fb9cadbfd92ee88e884641377d8a8b291bcc01", "sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb", "sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293", "sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd", "sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d", "sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3", "sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d", + "sha256:d5ff0621c88ce83a28a10d2ce719b2ee85635e85c515f12bac99a95306da4b2e", "sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca", "sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d", "sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775", @@ -548,10 +550,10 @@ }, "packaging": { "hashes": [ - "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", - "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" + "sha256:05af3bb85d320377db281cf254ab050e1a7ebcbf5410685a9a407e18a1f81236", + "sha256:eb41423378682dadb7166144a4926e443093863024de508ca5c9737d6bc08376" ], - "version": "==20.4" + "version": "==20.7" }, "pandas": { "hashes": [ @@ -640,11 +642,11 @@ }, "pygithub": { "hashes": [ - "sha256:776befaddab9d8fddd525d52a6ca1ac228cf62b5b1e271836d766f4925e1452e", - "sha256:8ad656bf79958e775ec59f7f5a3dbcbadac12147ae3dc42708b951064096af15" + "sha256:053f1b8d553a344ebd3ca3972765d923ee7e8ecc3ea55bd203683f164348fa1a", + "sha256:14c96d55e3c0e295598e52fbbbf2a7862a293723482ae9000cb9c816faab4fb4" ], "index": "pypi", - "version": "==1.53" + "version": "==1.54" }, "pygments": { "hashes": [ @@ -746,11 +748,11 @@ }, "requests": { "hashes": [ - "sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8", - "sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998" + "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", + "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898" ], "index": "pypi", - "version": "==2.25.0" + "version": "==2.24.0" }, "sentry-sdk": { "extras": [ @@ -837,7 +839,7 @@ }, "spiffworkflow": { "git": "https://github.com/sartography/SpiffWorkflow.git", - "ref": "6b2ed24bb340ebd31049312bd321f66ebf7b6b26" + "ref": "cdab930848493d74250224f1956177fee231a5b7" }, "sqlalchemy": { "hashes": [ @@ -892,10 +894,10 @@ }, "urllib3": { "hashes": [ - "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08", - "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473" + "sha256:8d7eaa5a82a1cac232164990f04874c594c9453ec55eef02eab885aa02fc17a2", + "sha256:f5321fbe4bf3fefa0efd0bfe7fb14e90909eb62a48ccda331726b4319897dd5e" ], - "version": "==1.26.2" + "version": "==1.25.11" }, "waitress": { "hashes": [ @@ -1014,10 +1016,10 @@ }, "packaging": { "hashes": [ - "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", - "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" + "sha256:05af3bb85d320377db281cf254ab050e1a7ebcbf5410685a9a407e18a1f81236", + "sha256:eb41423378682dadb7166144a4926e443093863024de508ca5c9737d6bc08376" ], - "version": "==20.4" + "version": "==20.7" }, "pbr": { "hashes": [ @@ -1056,13 +1058,6 @@ "index": "pypi", "version": "==6.1.2" }, - "six": { - "hashes": [ - "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", - "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" - ], - "version": "==1.15.0" - }, "toml": { "hashes": [ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", diff --git a/crc/api/workflow.py b/crc/api/workflow.py index a148328a..8462d6ec 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -1,13 +1,13 @@ import uuid -from SpiffWorkflow.util.deep_merge import DeepMerge from flask import g -from crc import session, app + +from crc import session from crc.api.common import ApiError, ApiErrorSchema -from crc.models.api_models import WorkflowApi, WorkflowApiSchema, NavigationItem, NavigationItemSchema +from crc.models.api_models import WorkflowApiSchema from crc.models.file import FileModel, LookupDataSchema from crc.models.study import StudyModel, WorkflowMetadata -from crc.models.task_event import TaskEventModel, TaskEventModelSchema, TaskEvent, TaskEventSchema +from crc.models.task_event import TaskEventModel, TaskEvent, TaskEventSchema from crc.models.workflow import WorkflowModel, WorkflowSpecModelSchema, WorkflowSpecModel, WorkflowSpecCategoryModel, \ WorkflowSpecCategoryModelSchema from crc.services.file_service import FileService diff --git a/crc/models/api_models.py b/crc/models/api_models.py index f526e4b2..6f2e08da 100644 --- a/crc/models/api_models.py +++ b/crc/models/api_models.py @@ -1,6 +1,7 @@ import enum import marshmallow +from SpiffWorkflow.navigation import NavItem from marshmallow import INCLUDE from marshmallow_enum import EnumField @@ -15,22 +16,6 @@ class MultiInstanceType(enum.Enum): sequential = "sequential" -class NavigationItem(object): - def __init__(self, id, task_id, name, title, backtracks, level, indent, child_count, state, is_decision, - task=None, lane=None): - self.id = id - self.task_id = task_id - self.name = name, - self.title = title - self.backtracks = backtracks - self.level = level - self.indent = indent - self.child_count = child_count - self.state = state - self.is_decision = is_decision - self.task = task - self.lane = lane - class Task(object): ########################################################################## @@ -158,15 +143,23 @@ class TaskSchema(ma.Schema): class NavigationItemSchema(ma.Schema): class Meta: - fields = ["id", "task_id", "name", "title", "backtracks", "level", "indent", "child_count", "state", - "is_decision", "task", "lane"] + fields = ["spec_id", "name", "spec_type", "task_id", "description", "backtracks", "indent", + "lane", "state"] unknown = INCLUDE - task = marshmallow.fields.Nested(TaskSchema, dump_only=True, required=False, allow_none=True) + state = marshmallow.fields.String(required=False, allow_none=True) + description = marshmallow.fields.String(required=False, allow_none=True) backtracks = marshmallow.fields.String(required=False, allow_none=True) lane = marshmallow.fields.String(required=False, allow_none=True) - title = marshmallow.fields.String(required=False, allow_none=True) task_id = marshmallow.fields.String(required=False, allow_none=True) + @marshmallow.post_load + def make_nav(self, data, **kwargs): + state = data.pop('state', None) + task_id = data.pop('task_id', None) + item = NavItem(**data) + item.state = state + item.task_id = task_id + return item class WorkflowApi(object): def __init__(self, id, status, next_task, navigation, diff --git a/crc/services/workflow_service.py b/crc/services/workflow_service.py index 9808766c..44d0add9 100644 --- a/crc/services/workflow_service.py +++ b/crc/services/workflow_service.py @@ -1,9 +1,8 @@ -import copy -import json import string -import uuid from datetime import datetime import random +import string +from datetime import datetime from typing import List import jinja2 @@ -17,15 +16,14 @@ from SpiffWorkflow.bpmn.specs.UserTask import UserTask from SpiffWorkflow.dmn.specs.BusinessRuleTask import BusinessRuleTask from SpiffWorkflow.specs import CancelTask, StartTask from SpiffWorkflow.util.deep_merge import DeepMerge -from flask import g from jinja2 import Template from crc import db, app from crc.api.common import ApiError -from crc.models.api_models import Task, MultiInstanceType, NavigationItem, NavigationItemSchema, WorkflowApi +from crc.models.api_models import Task, MultiInstanceType, WorkflowApi from crc.models.file import LookupDataModel -from crc.models.task_event import TaskEventModel from crc.models.study import StudyModel +from crc.models.task_event import TaskEventModel from crc.models.user import UserModel, UserModelSchema from crc.models.workflow import WorkflowModel, WorkflowStatus, WorkflowSpecModel from crc.services.file_service import FileService @@ -326,28 +324,20 @@ class WorkflowService(object): # Some basic cleanup of the title for the for the navigation. navigation = [] for nav_item in nav_dict: - spiff_task = processor.bpmn_workflow.get_task(nav_item['task_id']) - if 'description' in nav_item: - nav_item['title'] = nav_item.pop('description') - # fixme: duplicate code from the workflow_service. Should only do this in one place. - if nav_item['title'] is not None and ' ' in nav_item['title']: - nav_item['title'] = nav_item['title'].partition(' ')[2] - else: - nav_item['title'] = "" + spiff_task = processor.bpmn_workflow.get_task(nav_item.task_id) if spiff_task: - nav_item['task'] = WorkflowService.spiff_task_to_api_task(spiff_task, add_docs_and_forms=False) - nav_item['title'] = nav_item['task'].title # Prefer the task title. - + # Use existing logic to set the description, and alter the state based on permissions. + api_task = WorkflowService.spiff_task_to_api_task(spiff_task, add_docs_and_forms=False) + nav_item.description = api_task.title user_uids = WorkflowService.get_users_assigned_to_task(processor, spiff_task) if not UserService.in_list(user_uids, allow_admin_impersonate=True): - nav_item['state'] = WorkflowService.TASK_STATE_LOCKED - + nav_item.state = WorkflowService.TASK_STATE_LOCKED else: - nav_item['task'] = None - - - navigation.append(NavigationItem(**nav_item)) - NavigationItemSchema().dump(nav_item) + # Strip off the first word in the description, to meet guidlines for BPMN. + if nav_item.description: + if nav_item.description is not None and ' ' in nav_item.description: + nav_item.description = nav_item.description.partition(' ')[2] + navigation.append(nav_item) spec = db.session.query(WorkflowSpecModel).filter_by(id=processor.workflow_spec_id).first() workflow_api = WorkflowApi( diff --git a/crc/static/bpmn/irb_api_personnel/irb_api_personnel.bpmn b/crc/static/bpmn/irb_api_personnel/irb_api_personnel.bpmn index 5beeba52..29262cef 100644 --- a/crc/static/bpmn/irb_api_personnel/irb_api_personnel.bpmn +++ b/crc/static/bpmn/irb_api_personnel/irb_api_personnel.bpmn @@ -1,11 +1,12 @@ - + Flow_0kcrx5l Flow_0kcrx5l + Flow_00zanzw Flow_1dcsioh current_user = ldap() investigators = study_info('investigators') @@ -15,11 +16,11 @@ is_cu_pi = False if pi != None: hasPI = True if pi.get('uid', None) != None: - pi_has_uid = True + pi_invalid_uid = False if pi['uid'] == current_user['uid']: is_cu_pi = True else: - pi_has_uid = False + pi_invalid_uid = True else: hasPI = False @@ -27,9 +28,11 @@ else: dc = investigators.get('DEPT_CH', None) if dc != None: if dc.get('uid', None) != None: - dc_has_uid = True + dc_invalid_uid = False else: - dc_has_uid = False + dc_invalid_uid = True +else: + dc_invalid_uid = False # Primary Coordinators pcs = {} @@ -39,13 +42,19 @@ for k in investigators.keys(): if k in ['SC_I','SC_II','IRBC']: investigator = investigators.get(k) if investigator.get('uid', None) != None: - cnt_pcs_uid = cnt_pcs_uid + 1 if investigator['uid'] != current_user['uid']: pcs[k] = investigator + cnt_pcs_uid = cnt_pcs_uid + 1 else: is_cu_pc = True is_cu_pc_role = investigator['label'] + else: + pcs[k] = investigator cnt_pcs = len(pcs.keys()) +if cnt_pcs != cnt_pcs_uid: + pcs_invalid_uid = True +else: + pcs_invalid_uid = False if cnt_pcs > 0: del(k) del(investigator) @@ -58,13 +67,19 @@ for k in investigators.keys(): if k == 'AS_C': investigator = investigators.get(k) if investigator.get('uid', None) != None: - cnt_acs_uid = cnt_acs_uid + 1 if investigator['uid'] != current_user['uid']: acs[k] = investigator + cnt_acs_uid = cnt_acs_uid + 1 else: is_cu_ac = True is_cu_ac_role = investigator['label'] + else: + acs[k] = investigator cnt_acs = len(acs.keys()) +if cnt_pcs != cnt_pcs_uid: + acs_invalid_uid = True +else: + acs_invalid_uid = False if cnt_acs > 0: del(k) del(investigator) @@ -77,12 +92,18 @@ for k in investigators.keys(): if k[:2] == 'SI': investigator = investigators.get(k) if investigator.get('uid', None) != None: - cnt_subs_uid = cnt_subs_uid + 1 if investigator['uid'] != current_user['uid']: subs[k] = investigator + cnt_subs_uid = cnt_subs_uid + 1 else: is_cu_subs = True + else: + subs[k] = investigator cnt_subs = len(subs.keys()) +if cnt_subs != cnt_subs_uid: + subs_invalid_uid = True +else: + subs_invalid_uid = False if cnt_subs > 0: del(k) del(investigator) @@ -95,13 +116,19 @@ for k in investigators.keys(): if k in ['SCI','DC']: investigator = investigators.get(k) if investigator.get('uid', None) != None: - cnt_aps_uid = cnt_aps_uid + 1 if investigator['uid'] != current_user['uid']: aps[k] = investigator + cnt_aps_uid = cnt_aps_uid + 1 else: is_cu_ap = True is_cu_ap_role = investigator['label'] + else: + aps[k] = investigator cnt_aps = len(aps.keys()) +if cnt_aps != cnt_aps_uid: + aps_invalid_uid = True +else: + aps_invalid_uid = False if cnt_aps > 0: del(k) del(investigator) @@ -132,12 +159,12 @@ Since you are the person entering this information, you already have access and - + - + @@ -160,23 +187,23 @@ Since you are the person entering this information, you already have access and - + Flow_1dcsioh Flow_147b9li Flow_00prawo - - - not(hasPI) or (hasPI and not(pi_has_uid)) + + + not(hasPI) or (hasPI and pi_invalid_uid) - + No PI entered in PB Flow_00prawo Flow_16qr5jf Flow_0kpe12r - SequenceFlow_0cdtt11 + Flow_1ayisx2 Flow_0xifvai Flow_1oqem42 @@ -210,7 +237,8 @@ Otherwise, edit each Coordinator as necessary and select the Save button for eac cnt_pcs == 0 - Flow_147b9li + Flow_0tfprc8 + Flow_0tsdclr Flow_1grahhv LDAP_dept = pi.department length_LDAP_dept = len(LDAP_dept) @@ -274,36 +302,12 @@ else: Flow_0w4d2bz Flow_1oo0ijr - - ***Name & Degree:*** {{ RO_Chair_Name_Degree }} -***School:*** {{ RO_School }} -***Department:*** {{ RO_Department }} -***Title:*** {{ RO_Chair_Title }} -***Email:*** {{ RO_Chair_CID }} - - -{% if RO_Chair_CID != dc.uid %} - *Does not match the Department Chair specified in Protocol Builder, {{ dc.display_name }}* -{% endif %} - - - - - - - - - - Flow_0vi6thu - SequenceFlow_0cdtt11 - - Flow_070j5fg Flow_0vi6thu Flow_00yhlrq - + RO_Chair_CID == pi.uid @@ -360,6 +364,7 @@ Otherwise, edit each Sub-Investigator as necessary and select the Save button fo Flow_1kg5jot pi.E0.schoolName = PI_E0_schoolName pi.E0.deptName = PI_E0_deptName +pi.experience = user_data_get("pi_experience","") ro = {} ro['chair'] = {} @@ -605,17 +610,17 @@ ro.schoolAbbrv = RO_StudySchool.value Flow_0vff9k5 - + Flow_0ofpgml Flow_0jxzqw1 Flow_0q56tn8 Flow_0kp47dz - - cnt_aps > 0 - + - + + cnt_aps == 0 + The following Additional Personnel were entered in Protocol Builder: {%+ for key, value in aps.items() %}{{value.display_name}} ({{key}}){% if loop.index is lt cnt_aps %}, {% endif %}{% endfor %} @@ -646,400 +651,545 @@ Otherwise, edit each Additional Personnel as necessary and select the Save butto Flow_10zn0h1 + + Flow_147b9li + Flow_0tfprc8 + Flow_0nz62mu + + + + dc_invalid_uid or pcs_invalid_uid or acs_invalid_uid or subs_invalid_uid or aps_invalid_uid + + + Select No if all displayed invalid Computing IDs do not need system access and/or receive emails. If they do, correct in Protocol Builder first and then select Yes. + + +{% if dc_invalid_uid %} +Department Chair + {{ dc.error }} +{% endif %} +{% if pcs_invalid_uid %} +Primary Coordinators +{% for k, pc in pcs.items() %} + {% if pc.get('uid', None) == None: %} + {{ pc.error }} + {% endif %} +{% endfor %} +{% endif %} +{% if acs_invalid_uid %} +Additional Coordinators +{% for k, ac in acs.items() %} + {% if ac.get('uid', None) == None: %} + {{ ac.error }} + {% endif %} +{% endfor %} +{% endif %} +{% if subs_invalid_uid %} +Sub-Investigators +{% for k, sub in subs.items() %} + {% if sub.get('uid', None) == None: %} + {{ sub.error }} + {% endif %} +{% endfor %} +{% endif %} +{% if aps_invalid_uid %} +Additional Personnnel +{% for k, ap in aps.items() %} + {% if ap.get('uid', None) == None: %} + {{ ap.error }} + {% endif %} +{% endfor %} +{% endif %} + + + + + + + + + + Flow_0nz62mu + Flow_16bkbuc + + + Flow_16bkbuc + Flow_00zanzw + Flow_0tsdclr + + + + + not(FixInvalidUIDs) + + + ***Name & Degree:*** {{ RO_Chair_Name_Degree }} +***School:*** {{ RO_School }} +***Department:*** {{ RO_Department }} +***Title:*** {{ RO_Chair_Title }} +***Email:*** {{ RO_Chair_CID }} + + +{% if RO_Chair_CID != dc.uid %} + *Does not match the Department Chair specified in Protocol Builder, {{ dc.display_name }}* +{% endif %} + + + + + + + + + + Flow_0vi6thu + Flow_1ayisx2 + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + + + + - - + + - - + + - - + + - - - + + + - + - - - + + + - + - - + + - - - + + + - - + + - - + + - - + + - + - - + + - - + + - - + + - - + + - - - - + + + + - + - - - + + + - - + + - - + + - - + + - + - - + + - + - - + + - + - - - - + + + + - + - - + + - - + + - + - - + + - - + + - - + + - - - - + + + + - + - - + + - - + + - + - - - + + + - - + + - + - - + + - + - - - - - - + + - - + + - - + + - - + + - - - - + + + + - + - - + + - - + + - + - - - + + + - + - - + + - + - - + + - - + + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - + - + - - - - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + - + - + - + - + - + - + - - + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index a6ffa710..e80c79a6 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -68,7 +68,7 @@ class TestTasksApi(BaseTest): # get the first form in the two form workflow. workflow_api = self.get_workflow_api(workflow) self.assertEqual('two_forms', workflow_api.workflow_spec_id) - self.assertEqual(2, len(workflow_api.navigation)) + self.assertEqual(4, len(workflow_api.navigation)) self.assertIsNotNone(workflow_api.next_task.form) self.assertEqual("UserTask", workflow_api.next_task.type) self.assertEqual("StepOne", workflow_api.next_task.name) @@ -113,14 +113,19 @@ class TestTasksApi(BaseTest): self.assertIsNotNone(workflow_api.navigation) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) - self.assertEqual("Do You Have Bananas", nav[0]['title']) - self.assertEqual("Bananas?", nav[1]['title']) - self.assertEqual("FUTURE", nav[1]['state']) - self.assertEqual("yes", nav[2]['title']) - self.assertEqual("NOOP", nav[2]['state']) - self.assertEqual("no", nav[3]['title']) - self.assertEqual("NOOP", nav[3]['state']) + self.assertEqual(9, len(nav)) + self.assertEqual("Do You Have Bananas", nav[1].description) + self.assertEqual("Bananas?", nav[2].description) + self.assertEqual("FUTURE", nav[2].state) + self.assertEqual("yes", nav[3].description) + self.assertEqual(None, nav[3].state) + self.assertEqual("Task_Num_Bananas", nav[4].name) + self.assertEqual("LIKELY", nav[4].state) + self.assertEqual("EndEvent", nav[5].spec_type) + self.assertEqual("no", nav[6].description) + self.assertEqual(None, nav[6].state) + self.assertEqual("Task_Why_No_Bananas", nav[7].name) + self.assertEqual("MAYBE", nav[7].state) def test_navigation_with_exclusive_gateway(self): workflow = self.create_workflow('exclusive_gateway_2') @@ -129,14 +134,14 @@ class TestTasksApi(BaseTest): workflow_api = self.get_workflow_api(workflow) self.assertIsNotNone(workflow_api.navigation) nav = workflow_api.navigation - self.assertEqual(7, len(nav)) - self.assertEqual("Task 1", nav[0]['title']) - self.assertEqual("Which Branch?", nav[1]['title']) - self.assertEqual("a", nav[2]['title']) - self.assertEqual("Task 2a", nav[3]['title']) - self.assertEqual("b", nav[4]['title']) - self.assertEqual("Task 2b", nav[5]['title']) - self.assertEqual("Task 3", nav[6]['title']) + self.assertEqual(10, len(nav)) + self.assertEqual("Task 1", nav[1].description) + self.assertEqual("Which Branch?", nav[2].description) + self.assertEqual("a", nav[3].description) + self.assertEqual("Task 2a", nav[4].description) + self.assertEqual("b", nav[5].description) + self.assertEqual("Task 2b", nav[6].description) + self.assertEqual("Task 3", nav[8].description) def test_document_added_to_workflow_shows_up_in_file_list(self): self.create_reference_document() @@ -385,7 +390,7 @@ class TestTasksApi(BaseTest): navigation = workflow_api.navigation task = workflow_api.next_task - self.assertEqual(2, len(navigation)) + self.assertEqual(5, len(navigation)) self.assertEqual("UserTask", task.type) self.assertEqual("Activity_A", task.name) self.assertEqual("My Sub Process", task.process_name) @@ -453,7 +458,7 @@ class TestTasksApi(BaseTest): workflow_api = self.get_workflow_api(workflow) self.assertEqual(8, len(workflow_api.navigation)) - ready_items = [nav for nav in workflow_api.navigation if nav['state'] == "READY"] + ready_items = [nav for nav in workflow_api.navigation if nav.state == "READY"] self.assertEqual(5, len(ready_items)) self.assertEqual("UserTask", workflow_api.next_task.type) @@ -461,8 +466,8 @@ class TestTasksApi(BaseTest): self.assertEqual("Primary Investigator", workflow_api.next_task.title) for i in random.sample(range(5), 5): - task = TaskSchema().load(ready_items[i]['task']) - rv = self.app.put('/v1.0/workflow/%i/task/%s/set_token' % (workflow.id, task.id), + task_id = ready_items[i].task_id + rv = self.app.put('/v1.0/workflow/%i/task/%s/set_token' % (workflow.id, task_id), headers=self.logged_in_headers(), content_type="application/json") self.assert_success(rv) @@ -470,7 +475,7 @@ class TestTasksApi(BaseTest): workflow = WorkflowApiSchema().load(json_data) data = workflow.next_task.data data['investigator']['email'] = "dhf8r@virginia.edu" - self.complete_form(workflow, task, data) + self.complete_form(workflow, workflow.next_task, data) #tasks = self.get_workflow_api(workflow).user_tasks workflow = self.get_workflow_api(workflow) diff --git a/tests/test_user_roles.py b/tests/test_user_roles.py index 3dea94e7..90e452b0 100644 --- a/tests/test_user_roles.py +++ b/tests/test_user_roles.py @@ -1,6 +1,8 @@ import json from tests.base_test import BaseTest + +from crc.models.api_models import NavigationItemSchema from crc.models.workflow import WorkflowStatus from crc import db from crc.api.common import ApiError @@ -62,8 +64,8 @@ class TestTasksApi(BaseTest): workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) - self.assertEqual("supervisor", nav[1]['lane']) + self.assertEqual(9, len(nav)) + self.assertEqual("supervisor", nav[2].lane) def test_get_outstanding_tasks_awaiting_current_user(self): submitter = self.create_user(uid='lje5u') @@ -121,12 +123,12 @@ class TestTasksApi(BaseTest): # Navigation as Submitter with ready task. workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) - self.assertEqual('READY', nav[0]['state']) # First item is ready, no progress yet. - self.assertEqual('LOCKED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway, and belongs to no one, and is locked. - self.assertEqual('NOOP', nav[3]['state']) # Approved Path, has no operation - self.assertEqual('NOOP', nav[4]['state']) # Rejected Path, has no operation. + self.assertEqual(9, len(nav)) + self.assertEqual('READY', nav[1].state) # First item is ready, no progress yet. + self.assertEqual('LOCKED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. + # third item is a gateway, and belongs to no one + self.assertEqual(None, nav[4].state) # Approved Path, has no operation + self.assertEqual(None, nav[6].state) # Rejected Path, has no operation. self.assertEqual('READY', workflow_api.next_task.state) # Navigation as Submitter after handoff to supervisor @@ -134,9 +136,9 @@ class TestTasksApi(BaseTest): data['supervisor'] = supervisor.uid workflow_api = self.complete_form(workflow, workflow_api.next_task, data, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual('COMPLETED', nav[0]['state']) # First item is ready, no progress yet. - self.assertEqual('LOCKED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway, and belongs to no one, and is locked. + self.assertEqual('COMPLETED', nav[1].state) # First item is ready, no progress yet. + self.assertEqual('LOCKED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. + self.assertEqual('MAYBE', nav[7].state) # third item is a gateway, and belongs to no one, and is locked. self.assertEqual('LOCKED', workflow_api.next_task.state) # In the event the next task is locked, we should say something sensible here. # It is possible to look at the role of the task, and say The next task "TASK TITLE" will @@ -149,10 +151,10 @@ class TestTasksApi(BaseTest): # Navigation as Supervisor workflow_api = self.get_workflow_api(workflow, user_uid=supervisor.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) - self.assertEqual('LOCKED', nav[0]['state']) # First item belongs to the submitter, and is locked. - self.assertEqual('READY', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway, and belongs to no one, and is locked. + self.assertEqual(9, len(nav)) + self.assertEqual('LOCKED', nav[1].state) # First item belongs to the submitter, and is locked. + self.assertEqual('READY', nav[2].state) # Second item is ready, as we are now the supervisor. + self.assertEqual('LOCKED', nav[7].state) # Feedback is locked. self.assertEqual('READY', workflow_api.next_task.state) data = workflow_api.next_task.data @@ -161,28 +163,28 @@ class TestTasksApi(BaseTest): # Navigation as Supervisor, after completing task. nav = workflow_api.navigation - self.assertEqual(5, len(nav)) - self.assertEqual('LOCKED', nav[0]['state']) # First item belongs to the submitter, and is locked. - self.assertEqual('COMPLETED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('COMPLETED', nav[2]['state']) # third item is a gateway, and is now complete. + self.assertEqual(9, len(nav)) + self.assertEqual('LOCKED', nav[1].state) # First item belongs to the submitter, and is locked. + self.assertEqual('COMPLETED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. + self.assertEqual('LOCKED', nav[7].state) # Feedback is LOCKED self.assertEqual('LOCKED', workflow_api.next_task.state) # Navigation as Submitter, coming back in to a rejected workflow to view the rejection message. workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) - self.assertEqual('COMPLETED', nav[0]['state']) # First item belongs to the submitter, and is locked. - self.assertEqual('LOCKED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway belonging to the supervisor, and is locked. + self.assertEqual(9, len(nav)) + self.assertEqual('COMPLETED', nav[1].state) # First item belongs to the submitter, and is locked. + self.assertEqual('LOCKED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. + self.assertEqual('READY', nav[7].state) # Feedbck is now READY self.assertEqual('READY', workflow_api.next_task.state) # Navigation as Submitter, re-completing the original request a second time, and sending it for review. workflow_api = self.complete_form(workflow, workflow_api.next_task, data, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) - self.assertEqual('READY', nav[0]['state']) # When you loop back the task is again in the ready state. - self.assertEqual('LOCKED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway belonging to the supervisor, and is locked. + self.assertEqual(9, len(nav)) + self.assertEqual('READY', nav[1].state) # When you loop back the task is again in the ready state. + self.assertEqual('LOCKED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. + self.assertEqual('COMPLETED', nav[7].state) # Feedback is completed self.assertEqual('READY', workflow_api.next_task.state) data["favorite_color"] = "blue" diff --git a/tests/workflow/test_workflow_processor_multi_instance.py b/tests/workflow/test_workflow_processor_multi_instance.py index 6fccb091..99339101 100644 --- a/tests/workflow/test_workflow_processor_multi_instance.py +++ b/tests/workflow/test_workflow_processor_multi_instance.py @@ -171,7 +171,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): # Assure navigation picks up the label of the current element variable. nav = WorkflowService.processor_to_workflow_api(processor, task).navigation - self.assertEqual("Primary Investigator", nav[2].title) + self.assertEqual("Primary Investigator", nav[2].description) task.update_data({"investigator": {"email": "dhf8r@virginia.edu"}}) processor.complete_task(task) From 750a202e99d55326088bf162c4518b43dd1d2769 Mon Sep 17 00:00:00 2001 From: Sartography Date: Sun, 6 Dec 2020 10:22:32 -0500 Subject: [PATCH 10/35] Test and BPMN files for testing email script --- tests/data/email_script/email_script.bpmn | 71 +++++++++++++++++++++++ tests/test_email_script.py | 20 +++++++ 2 files changed, 91 insertions(+) create mode 100644 tests/data/email_script/email_script.bpmn create mode 100644 tests/test_email_script.py diff --git a/tests/data/email_script/email_script.bpmn b/tests/data/email_script/email_script.bpmn new file mode 100644 index 00000000..7719e696 --- /dev/null +++ b/tests/data/email_script/email_script.bpmn @@ -0,0 +1,71 @@ + + + + + Flow_0scd96e + + + + + + + + + + + + + Flow_0scd96e + Flow_0c60gne + + + + Flow_19fqvhc + + + + Dear Person, + + +Thank you for using this email example. +I hope this makes sense. + + +Yours faithfully, + + +Dan + Flow_0c60gne + Flow_19fqvhc + email("My Email Subject", email_address) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_email_script.py b/tests/test_email_script.py new file mode 100644 index 00000000..5ec05ca8 --- /dev/null +++ b/tests/test_email_script.py @@ -0,0 +1,20 @@ +from tests.base_test import BaseTest + + +class TestEmailScript(BaseTest): + + def test_email_script(self): + + workflow = self.create_workflow('email_script') + + # Start the workflow. + first_task = self.get_workflow_api(workflow).next_task + # self.assertEqual('Activity_GetData', first_task.name) + workflow = self.get_workflow_api(workflow) + # self.complete_form(workflow, first_task, {'email_address': 'mike@sartography.com'}) + # self.complete_form(workflow, first_task, {'email_address': 'kcm4zc'}, user_uid='kcm4zc') + result = self.complete_form(workflow, first_task, {'email_address': "'kcm4zc'"}) + print(result) + task = self.get_workflow_api(workflow).next_task + self.assertEqual(task.name, 'string') + # self.assertEqual('Activity_HowMany', workflow.next_task.name) From 8ba1e90f9ab7876a0d5ef9f323ccba46ce5c37dd Mon Sep 17 00:00:00 2001 From: Sartography Date: Mon, 7 Dec 2020 08:39:00 -0500 Subject: [PATCH 11/35] Test for ticket 153 to include user in error messages --- tests/test_user_in_logs.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/test_user_in_logs.py diff --git a/tests/test_user_in_logs.py b/tests/test_user_in_logs.py new file mode 100644 index 00000000..1ebb9b79 --- /dev/null +++ b/tests/test_user_in_logs.py @@ -0,0 +1,21 @@ +from crc.api.common import ApiError +from tests.base_test import BaseTest + + +class TestUserID(BaseTest): + + def test_user_id(self): + # try: + # False + # except: + # raise ApiError + + # ApiError() + # self.assertEqual(True,False) + + # with self.assertRaises(ApiError) as api_error: + # self.assertEqual(2, 3) + workflow = self.create_workflow('email') + first_task = self.get_workflow_api(workflow).next_task + + raise ApiError('unknown_approval', 'Please provide a valid Approval ID.') From f26a8615a431dbd4a31e448cd29c3b4048a8a4af Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Mon, 7 Dec 2020 08:49:38 -0500 Subject: [PATCH 12/35] Get more file details so we can fill out everything locally and also add a method to download the file by md5_hash --- crc/api.yml | 23 +++++++++++++++++++++++ crc/api/file.py | 5 ++++- crc/api/workflow.py | 28 ++++++++++++++++++++++------ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index afeb2bc2..b35ad793 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -578,6 +578,29 @@ paths: responses: '204': description: The file has been removed. + /file/{md5_hash}/data: + parameters: + - name: md5_hash + in: path + required: true + description: The md5 hash of the file requested + schema: + type: string + get: + operationId: crc.api.file.get_file_data_by_hash + summary: Returns only the file contents + security: [] # Disable security for this endpoint only. + tags: + - Files + responses: + '200': + description: Returns the actual file + content: + application/octet-stream: + schema: + type: string + format: binary + example: '' /file/{file_id}/data: parameters: - name: file_id diff --git a/crc/api/file.py b/crc/api/file.py index 5cf54221..4f0b655f 100644 --- a/crc/api/file.py +++ b/crc/api/file.py @@ -6,7 +6,7 @@ from flask import send_file from crc import session from crc.api.common import ApiError -from crc.models.file import FileSchema, FileModel, File, FileModelSchema +from crc.models.file import FileSchema, FileModel, File, FileModelSchema, FileDataModel from crc.models.workflow import WorkflowSpecModel from crc.services.file_service import FileService @@ -99,6 +99,9 @@ def update_file_data(file_id): file_model = FileService.update_file(file_model, file.stream.read(), file.content_type) return FileSchema().dump(to_file_api(file_model)) +def get_file_data_by_hash(md5_hash): + filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first() + return get_file_data(filedatamodel.file_model_id) def get_file_data(file_id, version=None): file_data = FileService.get_file_data(file_id, version) diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 983097a6..f9c6ac58 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -345,11 +345,15 @@ def get_changed_files(remote,workflow_spec_id): # this takes the different date_created_x and date-created_y columns and # combines them back into one date_created column - index = different['date_created_x'].isnull() - different.loc[index,'date_created_x'] = different[index]['date_created_y'] - different = different[['date_created_x','filename','location']].copy() + dualfields = ['date_created','type','primary','content_type','primary_process_id'] + for merge in dualfields: + index = different[merge+'_x'].isnull() + different.loc[index,merge+'_x'] = different[index][merge+'_y'] - different.columns=['date_created','filename','location'] + fieldlist = [fld+'_x' for fld in dualfields] + different = different[ fieldlist + ['md5_hash','filename','location']].copy() + + different.columns=dualfields+['md5_hash','filename','location'] # our different list will have multiple entries for a workflow if there is a version on either side # we want to grab the most recent one, so we sort and grab the most recent one for each workflow changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first() @@ -363,7 +367,7 @@ def get_changed_files(remote,workflow_spec_id): changedfiles['new'] = False changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True - + changedfiles = changedfiles.replace({pd.np.nan: None}) # return the list as a dict, let swagger convert it to json return changedfiles.reset_index().to_dict(orient='records') @@ -404,9 +408,21 @@ def get_workflow_spec_files_dataframe(workflowid): 'workflow_spec_id': file.file_model.workflow_spec_id, 'md5_hash':file.md5_hash, 'filename':file.file_model.name, + 'type':file.file_model.type.name, + 'primary':file.file_model.primary, + 'content_type':file.file_model.content_type, + 'primary_process_id':file.file_model.primary_process_id, 'date_created':file.date_created}) if len(filelist) == 0: - return pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created']) + return pd.DataFrame(columns=['file_model_id', + 'workflow_spec_id', + 'md5_hash', + 'filename', + 'type', + 'primary', + 'content_type', + 'primary_process_id', + 'date_created']) df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last() df['date_created'] = df['date_created'].astype('str') return df From 44c72115ae1cb2ff1134202485f112c55a349267 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Mon, 7 Dec 2020 08:50:20 -0500 Subject: [PATCH 13/35] Make sure we get the file we intended --- crc/api/file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crc/api/file.py b/crc/api/file.py index 4f0b655f..861f9f04 100644 --- a/crc/api/file.py +++ b/crc/api/file.py @@ -101,7 +101,7 @@ def update_file_data(file_id): def get_file_data_by_hash(md5_hash): filedatamodel = session.query(FileDataModel).filter(FileDataModel.md5_hash == md5_hash).first() - return get_file_data(filedatamodel.file_model_id) + return get_file_data(filedatamodel.file_model_id,version=filedatamodel.version) def get_file_data(file_id, version=None): file_data = FileService.get_file_data(file_id, version) From 730d0ca18fef399331a4b131f6704041a009f475 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Mon, 7 Dec 2020 16:23:41 -0500 Subject: [PATCH 14/35] Email script now uses an email address instead of a UVA LDAP user_id. --- crc/scripts/email.py | 42 +++++++++++++++++++++++++++++++++++++- tests/test_email_script.py | 36 +++++++++++++++++++++----------- 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/crc/scripts/email.py b/crc/scripts/email.py index 855ec8a4..8854a342 100644 --- a/crc/scripts/email.py +++ b/crc/scripts/email.py @@ -7,6 +7,8 @@ from crc.scripts.script import Script from crc.services.ldap_service import LdapService from crc.services.mails import send_mail +from email_validator import validate_email, EmailNotValidError + class Email(Script): """This Script allows to be introduced as part of a workflow and called from there, specifying @@ -30,7 +32,8 @@ Email Subject ApprvlApprvr1 PIComputingID def do_task(self, task, *args, **kwargs): args = [arg for arg in args if type(arg) == str] subject = self.get_subject(task, args) - recipients = self.get_users_info(task, args) + # recipients = self.get_users_info(task, args) + recipients = self.get_email_recipients(task, args) content, content_html = self.get_content(task) if recipients: send_mail( @@ -41,6 +44,43 @@ Email Subject ApprvlApprvr1 PIComputingID content_html=content_html ) + def get_email_recipients(self, task, args): + emails = [] + + if len(args[1]) < 1: + raise ApiError(code="missing_argument", + message="Email script requires at least one argument, " + "an email address to process. " + "Multiple email addresses are accepted.") + if isinstance(args[1], str): + try: + valid = validate_email(args[1]) + except EmailNotValidError as e: + # email is not valid, exception message is human-readable + raise ApiError(code="invalid_argument", + message="Email script requires a valid email address. " + "%s " % e) + print(str(e)) + else: + emails.append(valid.email) + + elif isinstance(args[1], list): + for arg in args[1]: + if isinstance(arg, str): + # TODO: need to validate + try: + valid = validate_email(args[1]) + except EmailNotValidError as e: + # email is not valid, exception message is human-readable + raise ApiError(code="invalid_argument", + message="Email script requires a valid email address." + "Multiple address are allowed.") + print(str(e)) + else: + emails.append(valid.email) + + return emails + def get_users_info(self, task, args): if len(args) < 1: raise ApiError(code="missing_argument", diff --git a/tests/test_email_script.py b/tests/test_email_script.py index 5ec05ca8..3bca598e 100644 --- a/tests/test_email_script.py +++ b/tests/test_email_script.py @@ -1,20 +1,32 @@ from tests.base_test import BaseTest +from crc import mail + + +# class TestEmailDirectly(BaseTest): +# +# def test_email_directly(self): +# recipients = ['michaelc@cullerton.com'] +# sender = 'michaelc@cullerton.com' +# with mail.record_messages() as outbox: +# mail.send_message(subject='testing', +# body='test', +# recipients=recipients, +# sender=sender) +# assert len(outbox) == 1 +# assert outbox[0].subject == "testing" class TestEmailScript(BaseTest): def test_email_script(self): + with mail.record_messages() as outbox: - workflow = self.create_workflow('email_script') + workflow = self.create_workflow('email_script') - # Start the workflow. - first_task = self.get_workflow_api(workflow).next_task - # self.assertEqual('Activity_GetData', first_task.name) - workflow = self.get_workflow_api(workflow) - # self.complete_form(workflow, first_task, {'email_address': 'mike@sartography.com'}) - # self.complete_form(workflow, first_task, {'email_address': 'kcm4zc'}, user_uid='kcm4zc') - result = self.complete_form(workflow, first_task, {'email_address': "'kcm4zc'"}) - print(result) - task = self.get_workflow_api(workflow).next_task - self.assertEqual(task.name, 'string') - # self.assertEqual('Activity_HowMany', workflow.next_task.name) + first_task = self.get_workflow_api(workflow).next_task + # self.assertEqual('Activity_GetData', first_task.name) + workflow = self.get_workflow_api(workflow) + self.complete_form(workflow, first_task, {'email_address': 'michaelc@cullerton.com'}) + + self.assertEqual(1, len(outbox)) + self.assertEqual("My Email Subject", outbox[0].subject) From 0e1aa59fa18373e87aeb15a09c031499e5124caa Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Tue, 8 Dec 2020 13:42:01 -0500 Subject: [PATCH 15/35] Make a change to make sure that if there is a new file locally that is not present remotely when we pull from the remote, the new local file gets deleted. Also: add several things to the requirements.txt that should have been there in the first place. --- crc/api.yml | 35 +++++++++++++++++++++++++- crc/api/workflow.py | 56 +++++++++++++++++++++++++++++++++++++++-- crc/models/workflow.py | 2 +- deploy/requirements.txt | 6 +++++ 4 files changed, 95 insertions(+), 4 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index b35ad793..3aa1f5d8 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -152,6 +152,38 @@ paths: items: $ref: "#/components/schemas/Study" + /workflow_spec/{workflow_spec_id}/files/sync: + get: + operationId: crc.api.workflow.sync_changed_files + summary: Provides a list of files that were updated + security: [] # Disable security for this endpoint only - we'll sanity check + # in the endpoint + parameters: + - name: workflow_spec_id + in: path + required: true + description: The workflow_spec id + schema: + type: string + - name: remote + in: query + required: true + description: The remote endpoint + schema: + type: string + + tags: + - Workflow Spec States + responses: + '200': + description: An array of workflow specs, with last touched date and file signature. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/Study" + /workflow_spec/{workflow_spec_id}/files/diff: get: operationId: crc.api.workflow.get_changed_files @@ -334,6 +366,7 @@ paths: get: operationId: crc.api.workflow.get_workflow_specification summary: Returns a single workflow specification + security: [] tags: - Workflow Specifications responses: @@ -578,7 +611,7 @@ paths: responses: '204': description: The file has been removed. - /file/{md5_hash}/data: + /file/{md5_hash}/hash_data: parameters: - name: md5_hash in: path diff --git a/crc/api/workflow.py b/crc/api/workflow.py index f9c6ac58..fb0c1e4b 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -1,6 +1,7 @@ import hashlib import json import uuid +from io import StringIO from hashlib import md5 import pandas as pd @@ -319,8 +320,56 @@ def get_changed_workflows(remote): # return the list as a dict, let swagger convert it to json return output.reset_index().to_dict(orient='records') +def sync_all_changed_files(remote): + pass -def get_changed_files(remote,workflow_spec_id): +def sync_changed_files(remote,workflow_spec_id): + # make sure that spec is local before syncing files + remotespectext = requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id) + specdict = json.loads(remotespectext.text) + localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first() + if localspec is None: + localspec = WorkflowSpecModel() + localspec.id = workflow_spec_id + if specdict['category'] == None: + localspec.category = None + else: + localspec.category = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.id + == specdict['category']['id']).first() + localspec.display_order = specdict['display_order'] + localspec.display_name = specdict['display_name'] + localspec.name = specdict['name'] + localspec.description = specdict['description'] + session.add(localspec) + + changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True) + updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))] + deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))] + for delfile in deletefiles.reset_index().to_dict(orient='records'): + currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, + FileModel.name == delfile['filename']).first() + FileService.delete_file(currentfile.id) + for updatefile in updatefiles.reset_index().to_dict(orient='records'): + currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, + FileModel.name == updatefile['filename']).first() + if not currentfile: + currentfile = FileModel() + currentfile.name = updatefile['filename'] + currentfile.workflow_spec_id = workflow_spec_id + + currentfile.date_created = updatefile['date_created'] + currentfile.type = updatefile['type'] + currentfile.primary = updatefile['primary'] + currentfile.content_type = updatefile['content_type'] + currentfile.primary_process_id = updatefile['primary_process_id'] + session.add(currentfile) + + response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data') + FileService.update_file(currentfile,response.content,updatefile['type']) + session.commit() + + +def get_changed_files(remote,workflow_spec_id,as_df=False): """ gets a remote endpoint - gets the files for a workflow_spec on both local and remote and determines what files have been change and returns a list of those @@ -369,7 +418,10 @@ def get_changed_files(remote,workflow_spec_id): changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True changedfiles = changedfiles.replace({pd.np.nan: None}) # return the list as a dict, let swagger convert it to json - return changedfiles.reset_index().to_dict(orient='records') + if as_df: + return changedfiles + else: + return changedfiles.reset_index().to_dict(orient='records') diff --git a/crc/models/workflow.py b/crc/models/workflow.py index 718dfccf..0da32aec 100644 --- a/crc/models/workflow.py +++ b/crc/models/workflow.py @@ -69,7 +69,7 @@ class WorkflowStatus(enum.Enum): class WorkflowSpecDependencyFile(db.Model): - """Connects a workflow to the version of the specification files it depends on to execute""" + """Connects to a workflow to test the version of the specification files it depends on to execute""" file_data_id = db.Column(db.Integer, db.ForeignKey(FileDataModel.id), primary_key=True) workflow_id = db.Column(db.Integer, db.ForeignKey("workflow.id"), primary_key=True) diff --git a/deploy/requirements.txt b/deploy/requirements.txt index 420a888f..af827f27 100644 --- a/deploy/requirements.txt +++ b/deploy/requirements.txt @@ -22,8 +22,10 @@ docutils==0.16 docxtpl==0.9.2 et-xmlfile==1.0.1 flask==1.1.2 +flask-admin==1.5.7 flask-bcrypt==0.7.1 flask-cors==3.0.8 +flask-mail==0.9.1 flask-marshmallow==0.12.0 flask-migrate==2.5.3 flask-restful==0.3.8 @@ -55,17 +57,21 @@ pandas==1.0.3 psycopg2-binary==2.8.5 pyasn1==0.4.8 pycparser==2.20 +PyGithub==1.53 pygments==2.6.1 pyjwt==1.7.1 pyparsing==2.4.7 pyrsistent==0.16.0 +python-box==5.2.0 python-dateutil==2.8.1 python-docx==0.8.10 python-editor==1.0.4 +python-Levenshtein==0.12.0 pytz==2020.1 pyyaml==5.3.1 recommonmark==0.6.0 requests==2.23.0 +sentry-sdk==0.14.4 six==1.14.0 snowballstemmer==2.0.0 soupsieve==2.0.1 From 32c5060a310dc099047d5675c23e973e0d633061 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Wed, 9 Dec 2020 12:11:46 -0500 Subject: [PATCH 16/35] No longer use eval on the email address. It is already parsed. Tests for single and multiple emails, and email error checking. Still need to figure out how to implement multiple emails. --- crc/scripts/email.py | 138 +++++++++++----------- tests/data/email_script/email_script.bpmn | 47 ++++---- tests/test_email_script.py | 40 ++++++- 3 files changed, 133 insertions(+), 92 deletions(-) diff --git a/crc/scripts/email.py b/crc/scripts/email.py index 8854a342..62b6aebc 100644 --- a/crc/scripts/email.py +++ b/crc/scripts/email.py @@ -1,3 +1,5 @@ +import re + import markdown from jinja2 import Template @@ -7,8 +9,6 @@ from crc.scripts.script import Script from crc.services.ldap_service import LdapService from crc.services.mails import send_mail -from email_validator import validate_email, EmailNotValidError - class Email(Script): """This Script allows to be introduced as part of a workflow and called from there, specifying @@ -26,14 +26,18 @@ Email Subject ApprvlApprvr1 PIComputingID def do_task_validate_only(self, task, *args, **kwargs): self.get_subject(task, args) - self.get_users_info(task, args) + self.get_email_recipients(task, args) self.get_content(task) def do_task(self, task, *args, **kwargs): - args = [arg for arg in args if type(arg) == str] - subject = self.get_subject(task, args) - # recipients = self.get_users_info(task, args) - recipients = self.get_email_recipients(task, args) + args = [arg for arg in args if type(arg) == str or type(arg) == list] + + subject = args[0] + recipients = None + try: + recipients = self.get_email_recipients(task, args) + except ApiError: + raise content, content_html = self.get_content(task) if recipients: send_mail( @@ -44,81 +48,81 @@ Email Subject ApprvlApprvr1 PIComputingID content_html=content_html ) + def check_valid_email(self, email): + # regex from https://emailregex.com/ + regex = r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)" + if (re.search(regex, email)): + return True + else: + return False + def get_email_recipients(self, task, args): emails = [] if len(args[1]) < 1: raise ApiError(code="missing_argument", - message="Email script requires at least one argument, " - "an email address to process. " + message="Email script requires at least one email address as an argument. " "Multiple email addresses are accepted.") + if isinstance(args[1], str): - try: - valid = validate_email(args[1]) - except EmailNotValidError as e: - # email is not valid, exception message is human-readable - raise ApiError(code="invalid_argument", - message="Email script requires a valid email address. " - "%s " % e) - print(str(e)) + if self.check_valid_email(args[1]): + emails.append(args[1]) else: - emails.append(valid.email) - - elif isinstance(args[1], list): - for arg in args[1]: - if isinstance(arg, str): - # TODO: need to validate - try: - valid = validate_email(args[1]) - except EmailNotValidError as e: - # email is not valid, exception message is human-readable - raise ApiError(code="invalid_argument", - message="Email script requires a valid email address." - "Multiple address are allowed.") - print(str(e)) - else: - emails.append(valid.email) - - return emails - - def get_users_info(self, task, args): - if len(args) < 1: - raise ApiError(code="missing_argument", - message="Email script requires at least one argument. The " - "name of the variable in the task data that contains user" - "id to process. Multiple arguments are accepted.") - emails = [] - for arg in args: - try: - uid = task.workflow.script_engine.evaluate_expression(task, arg) - except Exception as e: - app.logger.error(f'Workflow engines could not parse {arg}', exc_info=True) - continue - user_info = LdapService.user_info(uid) - email = user_info.email_address - emails.append(user_info.email_address) - if not isinstance(email, str): raise ApiError(code="invalid_argument", - message="The Email script requires at least 1 UID argument. The " - "name of the variable in the task data that contains subject and" - " user ids to process. This must point to an array or a string, but " - "it currently points to a %s " % emails.__class__.__name__) + message="The email address you provided could not be parsed. " + "The value you provided is '%s" % args[1]) - return emails + if isinstance(args[1], list): + for address in args[1]: + if self.check_valid_email(address): + emails.append(address) + else: + raise ApiError(code="invalid_argument", + message="The email address you provided could not be parsed. " + "The value you provided is '%s" % address) + + if len(emails) > 0: + return emails + else: + raise ApiError(code="invalid_argument", + message="Email script requires a valid email address.") + + # def get_users_info(self, task, args): + # if len(args) < 1: + # raise ApiError(code="missing_argument", + # message="Email script requires at least one argument. The " + # "name of the variable in the task data that contains user" + # "id to process. Multiple arguments are accepted.") + # emails = [] + # for arg in args: + # try: + # uid = task.workflow.script_engine.evaluate_expression(task, arg) + # except Exception as e: + # app.logger.error(f'Workflow engines could not parse {arg}', exc_info=True) + # continue + # user_info = LdapService.user_info(uid) + # email = user_info.email_address + # emails.append(user_info.email_address) + # if not isinstance(email, str): + # raise ApiError(code="invalid_argument", + # message="The Email script requires at least 1 UID argument. The " + # "name of the variable in the task data that contains subject and" + # " user ids to process. This must point to an array or a string, but " + # "it currently points to a %s " % emails.__class__.__name__) + # + # return emails def get_subject(self, task, args): - if len(args) < 1: + # subject = '' + if len(args[0]) < 1: raise ApiError(code="missing_argument", - message="Email script requires at least one subject argument. The " - "name of the variable in the task data that contains subject" - " to process. Multiple arguments are accepted.") + message="No subject was provided for the email message.") + subject = args[0] - if not isinstance(subject, str): + if not subject or not isinstance(subject, str): raise ApiError(code="invalid_argument", - message="The Email script requires 1 argument. The " - "the name of the variable in the task data that contains user" - "ids to process. This must point to an array or a string, but " - "it currently points to a %s " % subject.__class__.__name__) + message="The subject you provided could not be parsed. " + "The value is \"%s\" " % subject) return subject diff --git a/tests/data/email_script/email_script.bpmn b/tests/data/email_script/email_script.bpmn index 7719e696..201b70a1 100644 --- a/tests/data/email_script/email_script.bpmn +++ b/tests/data/email_script/email_script.bpmn @@ -1,11 +1,11 @@ - + Flow_0scd96e - - + + @@ -18,12 +18,12 @@ Flow_0scd96e Flow_0c60gne - - + + Flow_19fqvhc - - + + Dear Person, @@ -37,34 +37,35 @@ Yours faithfully, Dan Flow_0c60gne Flow_19fqvhc - email("My Email Subject", email_address) + subject = 'My Email Subject' +email(subject, email_address) - - - + + + - - + + - - - + + + - + - - + + - - + + - - + + diff --git a/tests/test_email_script.py b/tests/test_email_script.py index 3bca598e..42257d4b 100644 --- a/tests/test_email_script.py +++ b/tests/test_email_script.py @@ -24,9 +24,45 @@ class TestEmailScript(BaseTest): workflow = self.create_workflow('email_script') first_task = self.get_workflow_api(workflow).next_task - # self.assertEqual('Activity_GetData', first_task.name) + workflow = self.get_workflow_api(workflow) - self.complete_form(workflow, first_task, {'email_address': 'michaelc@cullerton.com'}) + self.complete_form(workflow, first_task, {'email_address': 'test@example.com'}) + + self.assertEqual(1, len(outbox)) + self.assertEqual('My Email Subject', outbox[0].subject) + self.assertEqual(['test@example.com'], outbox[0].recipients) + + def test_email_script_multiple(self): + with mail.record_messages() as outbox: + + workflow = self.create_workflow('email_script') + + first_task = self.get_workflow_api(workflow).next_task + + workflow = self.get_workflow_api(workflow) + self.complete_form(workflow, first_task, {'email_address': ['test@example.com', 'test2@example.com']}) self.assertEqual(1, len(outbox)) self.assertEqual("My Email Subject", outbox[0].subject) + self.assertEqual(2, len(outbox[0].recipients)) + self.assertEqual('test@example.com', outbox[0].recipients[0]) + self.assertEqual('test2@example.com', outbox[0].recipients[1]) + + def test_bad_email_address_1(self): + workflow = self.create_workflow('email_script') + + first_task = self.get_workflow_api(workflow).next_task + + workflow = self.get_workflow_api(workflow) + with self.assertRaises(AssertionError): + self.complete_form(workflow, first_task, {'email_address': 'test@example'}) + + + def test_bad_email_address_2(self): + workflow = self.create_workflow('email_script') + + first_task = self.get_workflow_api(workflow).next_task + + workflow = self.get_workflow_api(workflow) + with self.assertRaises(AssertionError): + self.complete_form(workflow, first_task, {'email_address': 'test'}) From c57b17df1ed1beabdde7084eaba72f25013b4870 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Wed, 9 Dec 2020 12:13:17 -0500 Subject: [PATCH 17/35] Add a robust way of adding an API key, update examples and documentation for swagger API and add the ability to completely sync the local system from the remote system. --- config/default.py | 8 ++ crc/api.yml | 177 +++++++++++++++++++++++++++++++++++++------- crc/api/workflow.py | 56 +++++++++++--- 3 files changed, 204 insertions(+), 37 deletions(-) diff --git a/config/default.py b/config/default.py index 13fc79ab..44651fd5 100644 --- a/config/default.py +++ b/config/default.py @@ -6,6 +6,14 @@ basedir = os.path.abspath(os.path.dirname(__file__)) JSON_SORT_KEYS = False # CRITICAL. Do not sort the data when returning values to the front end. +# The API_TOKEN is used to ensure that the +# workflow synch can work without a lot of +# back and forth. +# you may want to change this to something simple for testing!! +# NB, if you change this in the local endpoint, +# it needs to be changed in the remote endpoint as well +API_TOKEN = 'af95596f327c9ecc007b60414fc84b61' + NAME = "CR Connect Workflow" FLASK_PORT = environ.get('PORT0') or environ.get('FLASK_PORT', default="5000") CORS_ALLOW_ORIGINS = re.split(r',\s*', environ.get('CORS_ALLOW_ORIGINS', default="localhost:4200, localhost:5002")) diff --git a/crc/api.yml b/crc/api.yml index 3aa1f5d8..2789e249 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -100,11 +100,12 @@ paths: type: array items: $ref: "#/components/schemas/Study" - /workflow_spec/diff: + /workflow_spec/pullall: get: - operationId: crc.api.workflow.get_changed_workflows - summary: Provides a list of workflow specs and their signature - security: [] # Disable security for this endpoint only - we'll sanity check + operationId: crc.api.workflow.sync_all_changed_workflows + summary: Sync all workflows that have changed on the remote side and provide a list of the results + security: + - ApiKeyAuth : [] # in the endpoint parameters: - name: remote @@ -117,21 +118,51 @@ paths: - Workflow Spec States responses: '200': - description: An array of workflow specs, with last touched date and file signature. + description: An array of workflow specs that were synced from remote. content: application/json: schema: type: array items: - $ref: "#/components/schemas/Study" + type: string + example : ['top_level_workflow','3b495037-f7d4-4509-bf58-cee41c0c6b0e'] + + + + + /workflow_spec/diff: + get: + operationId: crc.api.workflow.get_changed_workflows + summary: Provides a list of workflow that differ from remote and if it is new or not + security : + - ApiKeyAuth : [] + # in the endpoint + parameters: + - name: remote + in: query + required: true + description: The remote endpoint + schema: + type: string + tags: + - Workflow Spec States + responses: + '200': + description: An array of workflow specs, with last touched date and which one is most recent. + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/WorkflowSpecDiffList" /workflow_spec/{workflow_spec_id}/files: get: operationId: crc.api.workflow.get_workflow_spec_files - summary: Provides a list of workflow specs and their signature - security: [] # Disable security for this endpoint only - we'll sanity check - # in the endpoint + summary: Provides a list of files for a workflow spec on this machine. + security : + - ApiKeyAuth : [] parameters: - name: workflow_spec_id in: path @@ -144,20 +175,20 @@ paths: - Workflow Spec States responses: '200': - description: An array of workflow specs, with last touched date and file signature. + description: An array of files for a workflow spec on the local system, with details. content: application/json: schema: type: array items: - $ref: "#/components/schemas/Study" + $ref: "#/components/schemas/WorkflowSpecFilesList" /workflow_spec/{workflow_spec_id}/files/sync: get: operationId: crc.api.workflow.sync_changed_files - summary: Provides a list of files that were updated - security: [] # Disable security for this endpoint only - we'll sanity check - # in the endpoint + summary: Syncs files from a workflow on a remote system and provides a list of files that were updated + security : + - ApiKeyAuth : [] parameters: - name: workflow_spec_id in: path @@ -176,20 +207,23 @@ paths: - Workflow Spec States responses: '200': - description: An array of workflow specs, with last touched date and file signature. + description: A list of files that were synced for the workflow. content: application/json: schema: type: array items: - $ref: "#/components/schemas/Study" + type : string + example : ["data_security_plan.dmn",'some_other_file.xml'] + /workflow_spec/{workflow_spec_id}/files/diff: get: operationId: crc.api.workflow.get_changed_files - summary: Provides a list of workflow specs and their signature - security: [] # Disable security for this endpoint only - we'll sanity check - # in the endpoint + summary: Provides a list of files for a workflow specs that differ from remote and their signature + security : + - ApiKeyAuth : [] + parameters: - name: workflow_spec_id in: path @@ -208,21 +242,22 @@ paths: - Workflow Spec States responses: '200': - description: An array of workflow specs, with last touched date and file signature. + description: An array of files that are different from remote, with last touched date and file signature. content: application/json: schema: type: array items: - $ref: "#/components/schemas/Study" + $ref: "#/components/schemas/WorkflowSpecFilesDiff" /workflow_spec/all: get: operationId: crc.api.workflow.get_all_spec_state - summary: Provides a list of files for a workflow spec - security: [] # Disable security for this endpoint only - we'll sanity check - # in the endpoint + summary: Provides a list of workflow specs, last update date and thumbprint + security: + - ApiKeyAuth : [] + tags: - Workflow Spec States responses: @@ -233,7 +268,7 @@ paths: schema: type: array items: - $ref: "#/components/schemas/Study" + $ref: "#/components/schemas/WorkflowSpecAll" /study/all: @@ -1314,6 +1349,12 @@ components: scheme: bearer bearerFormat: JWT x-bearerInfoFunc: crc.api.user.verify_token_admin + ApiKeyAuth : + type : apiKey + in : header + name : X-CR-API-KEY + x-apikeyInfoFunc: crc.api.workflow.verify_token + schemas: User: properties: @@ -1337,6 +1378,92 @@ components: properties: id: type: string + WorkflowSpecDiffList: + properties: + workflow_spec_id: + type: string + example : top_level_workflow + date_created : + type: string + example : 2020-12-09 16:55:12.951500+00:00 + location : + type : string + example : remote + new : + type : boolean + example : false + WorkflowSpecFilesList: + properties: + file_model_id: + type : integer + example : 171 + workflow_spec_id : + type: string + example : top_level_workflow + filename : + type: string + example : data_security_plan.dmn + date_created : + type: string + example : 2020-12-01 13:58:12.420333+00:00 + type: + type : string + example : dmn + primary : + type : boolean + example : false + content_type: + type: string + example : text/xml + primary_process_id: + type : string + example : null + md5_hash: + type: string + example: f12e2bbd-a20c-673b-ccb8-a8a1ea9c5b7b + + + WorkflowSpecFilesDiff: + properties: + filename : + type: string + example : data_security_plan.dmn + date_created : + type: string + example : 2020-12-01 13:58:12.420333+00:00 + type: + type : string + example : dmn + primary : + type : boolean + example : false + content_type: + type: string + example : text/xml + primary_process_id: + type : string + example : null + md5_hash: + type: string + example: f12e2bbd-a20c-673b-ccb8-a8a1ea9c5b7b + location: + type : string + example : remote + new: + type: boolean + example : false + + WorkflowSpecAll: + properties: + workflow_spec_id : + type: string + example : acaf1258-43b4-437e-8846-f612afa66811 + date_created : + type: string + example : 2020-12-01 13:58:12.420333+00:00 + md5_hash: + type: string + example: c30fd597f21715018eab12f97f9d4956 Study: properties: id: diff --git a/crc/api/workflow.py b/crc/api/workflow.py index fb0c1e4b..cc290435 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -7,7 +7,7 @@ from hashlib import md5 import pandas as pd from SpiffWorkflow.util.deep_merge import DeepMerge from flask import g -from crc import session, db +from crc import session, db, app from crc.api.common import ApiError, ApiErrorSchema from crc.models.api_models import WorkflowApi, WorkflowApiSchema, NavigationItem, NavigationItemSchema from crc.models.file import FileModel, LookupDataSchema, FileDataModel @@ -269,12 +269,19 @@ def join_uuids(uuids): # in the same order return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes -def get_changed_workflows(remote): +def verify_token(token, required_scopes): + if token == app.config['API_TOKEN']: + return {'scope':['any']} + else: + raise ApiError("permission_denied","API Token information is not correct") + + +def get_changed_workflows(remote,as_df=False): """ gets a remote endpoint - gets the workflows and then determines what workflows are different from the remote endpoint """ - response = requests.get('http://'+remote+'/v1.0/workflow_spec/all') + response = requests.get('http://'+remote+'/v1.0/workflow_spec/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']}) # This is probably very and may allow cross site attacks - fix later remote = pd.DataFrame(json.loads(response.text)) @@ -318,14 +325,25 @@ def get_changed_workflows(remote): output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])] # return the list as a dict, let swagger convert it to json - return output.reset_index().to_dict(orient='records') + if as_df: + return output + else: + return output.reset_index().to_dict(orient='records') + + +def sync_all_changed_workflows(remote): + + workflowsdf = get_changed_workflows(remote,as_df=True) + workflows = workflowsdf.reset_index().to_dict(orient='records') + for workflow in workflows: + sync_changed_files(remote,workflow['workflow_spec_id']) + return [x['workflow_spec_id'] for x in workflows] -def sync_all_changed_files(remote): - pass def sync_changed_files(remote,workflow_spec_id): # make sure that spec is local before syncing files - remotespectext = requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id) + remotespectext = requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id, + headers={'X-CR-API-KEY': app.config['API_TOKEN']}) specdict = json.loads(remotespectext.text) localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first() if localspec is None: @@ -343,13 +361,20 @@ def sync_changed_files(remote,workflow_spec_id): session.add(localspec) changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True) + if len(changedfiles)==0: + return [] updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))] + updatefiles = updatefiles.reset_index().to_dict(orient='records') + deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))] - for delfile in deletefiles.reset_index().to_dict(orient='records'): + deletefiles = deletefiles.reset_index().to_dict(orient='records') + + for delfile in deletefiles: currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, FileModel.name == delfile['filename']).first() FileService.delete_file(currentfile.id) - for updatefile in updatefiles.reset_index().to_dict(orient='records'): + + for updatefile in updatefiles: currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, FileModel.name == updatefile['filename']).first() if not currentfile: @@ -364,9 +389,11 @@ def sync_changed_files(remote,workflow_spec_id): currentfile.primary_process_id = updatefile['primary_process_id'] session.add(currentfile) - response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data') + response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data', + headers={'X-CR-API-KEY': app.config['API_TOKEN']}) FileService.update_file(currentfile,response.content,updatefile['type']) session.commit() + return [x['filename'] for x in updatefiles] def get_changed_files(remote,workflow_spec_id,as_df=False): @@ -375,7 +402,8 @@ def get_changed_files(remote,workflow_spec_id,as_df=False): local and remote and determines what files have been change and returns a list of those files """ - response = requests.get('http://'+remote+'/v1.0/workflow_spec/'+workflow_spec_id+'/files') + response = requests.get('http://'+remote+'/v1.0/workflow_spec/'+workflow_spec_id+'/files', + headers={'X-CR-API-KEY':app.config['API_TOKEN']}) # This is probably very and may allow cross site attacks - fix later remote = pd.DataFrame(json.loads(response.text)) # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index @@ -386,7 +414,11 @@ def get_changed_files(remote,workflow_spec_id,as_df=False): left_on=['filename','md5_hash'], how = 'outer' , indicator=True).loc[lambda x : x['_merge']!='both'] - + if len(different) == 0: + if as_df: + return different + else: + return [] # each line has a tag on it - if was in the left or the right, # label it so we know if that was on the remote or local machine different.loc[different['_merge']=='left_only','location'] = 'remote' From e377a05deaec8a372a6c52a397a48baaff073ee5 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Wed, 9 Dec 2020 13:50:52 -0500 Subject: [PATCH 18/35] Add some punctuation --- crc/api.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crc/api.yml b/crc/api.yml index 2789e249..78436178 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -220,7 +220,7 @@ paths: /workflow_spec/{workflow_spec_id}/files/diff: get: operationId: crc.api.workflow.get_changed_files - summary: Provides a list of files for a workflow specs that differ from remote and their signature + summary: Provides a list of files for a workflow specs that differ from remote and their signature. security : - ApiKeyAuth : [] From a8203ed01ddd86a48904c22caabda5d3acc3718a Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Thu, 10 Dec 2020 10:06:21 -0500 Subject: [PATCH 19/35] save changes before refactor --- config/default.py | 2 +- crc/api/workflow.py | 20 +++++++++++++++++--- example_data.py | 1 + 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/config/default.py b/config/default.py index 44651fd5..1570811c 100644 --- a/config/default.py +++ b/config/default.py @@ -12,7 +12,7 @@ JSON_SORT_KEYS = False # CRITICAL. Do not sort the data when returning values # you may want to change this to something simple for testing!! # NB, if you change this in the local endpoint, # it needs to be changed in the remote endpoint as well -API_TOKEN = 'af95596f327c9ecc007b60414fc84b61' +API_TOKEN = environ.get('API_TOKEN', default = 'af95596f327c9ecc007b60414fc84b61') NAME = "CR Connect Workflow" FLASK_PORT = environ.get('PORT0') or environ.get('FLASK_PORT', default="5000") diff --git a/crc/api/workflow.py b/crc/api/workflow.py index cc290435..8da5c88a 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -352,8 +352,17 @@ def sync_changed_files(remote,workflow_spec_id): if specdict['category'] == None: localspec.category = None else: - localspec.category = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.id - == specdict['category']['id']).first() + localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name + == specdict['category']['name']).first() + if localcategory == None: + #category doesn't exist - lets make it + localcategory = WorkflowSpecCategoryModel() + localcategory.name = specdict['category']['name'] + localcategory.display_name = specdict['category']['display_name'] + localcategory.display_order = specdict['category']['display_order'] + session.add(localcategory) + localspec.category = localcategory + localspec.display_order = specdict['display_order'] localspec.display_name = specdict['display_name'] localspec.name = specdict['name'] @@ -372,7 +381,12 @@ def sync_changed_files(remote,workflow_spec_id): for delfile in deletefiles: currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, FileModel.name == delfile['filename']).first() - FileService.delete_file(currentfile.id) + + # it is more appropriate to archive the file than delete + # due to the fact that we might have workflows that are using the + # file data + currentfile.archived = True + session.add(currentfile) for updatefile in updatefiles: currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, diff --git a/example_data.py b/example_data.py index d9b4c73b..a46112a2 100644 --- a/example_data.py +++ b/example_data.py @@ -67,6 +67,7 @@ class ExampleDataLoader: display_order=6 ), ] + db.session.execute("select setval('workflow_spec_category_id_seq',7);") db.session.add_all(categories) db.session.commit() From 3f56dfe48477677206b01e303084ab355ee4a077 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Thu, 10 Dec 2020 10:46:23 -0500 Subject: [PATCH 20/35] Move all workflow sync stuff into new file Make changes to api naming scheme add some error checking around endpoints for missing/invalid endpoints --- crc/api.yml | 52 +++--- crc/api/workflow.py | 305 ----------------------------------- crc/api/workflow_sync.py | 336 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 362 insertions(+), 331 deletions(-) create mode 100644 crc/api/workflow_sync.py diff --git a/crc/api.yml b/crc/api.yml index 78436178..41e76f90 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -100,9 +100,9 @@ paths: type: array items: $ref: "#/components/schemas/Study" - /workflow_spec/pullall: + /workflow_sync/pullall: get: - operationId: crc.api.workflow.sync_all_changed_workflows + operationId: crc.api.workflow_sync.sync_all_changed_workflows summary: Sync all workflows that have changed on the remote side and provide a list of the results security: - ApiKeyAuth : [] @@ -115,7 +115,7 @@ paths: schema: type: string tags: - - Workflow Spec States + - Workflow Sync API responses: '200': description: An array of workflow specs that were synced from remote. @@ -130,9 +130,9 @@ paths: - /workflow_spec/diff: + /workflow_sync/diff: get: - operationId: crc.api.workflow.get_changed_workflows + operationId: crc.api.workflow_sync.get_changed_workflows summary: Provides a list of workflow that differ from remote and if it is new or not security : - ApiKeyAuth : [] @@ -145,7 +145,7 @@ paths: schema: type: string tags: - - Workflow Spec States + - Workflow Sync API responses: '200': description: An array of workflow specs, with last touched date and which one is most recent. @@ -157,9 +157,9 @@ paths: $ref: "#/components/schemas/WorkflowSpecDiffList" - /workflow_spec/{workflow_spec_id}/files: + /workflow_sync/{workflow_spec_id}/files: get: - operationId: crc.api.workflow.get_workflow_spec_files + operationId: crc.api.workflow_sync.get_workflow_spec_files summary: Provides a list of files for a workflow spec on this machine. security : - ApiKeyAuth : [] @@ -172,7 +172,7 @@ paths: type: string tags: - - Workflow Spec States + - Workflow Sync API responses: '200': description: An array of files for a workflow spec on the local system, with details. @@ -183,9 +183,9 @@ paths: items: $ref: "#/components/schemas/WorkflowSpecFilesList" - /workflow_spec/{workflow_spec_id}/files/sync: + /workflow_sync/{workflow_spec_id}/files/sync: get: - operationId: crc.api.workflow.sync_changed_files + operationId: crc.api.workflow_sync.sync_changed_files summary: Syncs files from a workflow on a remote system and provides a list of files that were updated security : - ApiKeyAuth : [] @@ -204,7 +204,7 @@ paths: type: string tags: - - Workflow Spec States + - Workflow Sync API responses: '200': description: A list of files that were synced for the workflow. @@ -217,9 +217,9 @@ paths: example : ["data_security_plan.dmn",'some_other_file.xml'] - /workflow_spec/{workflow_spec_id}/files/diff: + /workflow_sync/{workflow_spec_id}/files/diff: get: - operationId: crc.api.workflow.get_changed_files + operationId: crc.api.workflow_sync.get_changed_files summary: Provides a list of files for a workflow specs that differ from remote and their signature. security : - ApiKeyAuth : [] @@ -239,7 +239,7 @@ paths: type: string tags: - - Workflow Spec States + - Workflow Sync API responses: '200': description: An array of files that are different from remote, with last touched date and file signature. @@ -251,15 +251,15 @@ paths: $ref: "#/components/schemas/WorkflowSpecFilesDiff" - /workflow_spec/all: + /workflow_sync/all: get: - operationId: crc.api.workflow.get_all_spec_state + operationId: crc.api.workflow_sync.get_all_spec_state summary: Provides a list of workflow specs, last update date and thumbprint security: - ApiKeyAuth : [] tags: - - Workflow Spec States + - Workflow Sync API responses: '200': description: An array of workflow specs, with last touched date and file signature. @@ -547,7 +547,7 @@ paths: description: The workflow spec category has been removed. /file: parameters: - - name: workflow_spec_id + - name: workflow_sync_id in: query required: false description: The unique id of a workflow specification @@ -1353,7 +1353,7 @@ components: type : apiKey in : header name : X-CR-API-KEY - x-apikeyInfoFunc: crc.api.workflow.verify_token + x-apikeyInfoFunc: crc.api.workflow_sync.verify_token schemas: User: @@ -1380,7 +1380,7 @@ components: type: string WorkflowSpecDiffList: properties: - workflow_spec_id: + workflow_sync_id: type: string example : top_level_workflow date_created : @@ -1397,7 +1397,7 @@ components: file_model_id: type : integer example : 171 - workflow_spec_id : + workflow_sync_id : type: string example : top_level_workflow filename : @@ -1455,7 +1455,7 @@ components: WorkflowSpecAll: properties: - workflow_spec_id : + workflow_sync_id : type: string example : acaf1258-43b4-437e-8846-f612afa66811 date_created : @@ -1587,7 +1587,7 @@ components: content_type: type: string example: "application/xml" - workflow_spec_id: + workflow_sync_id: type: string example: "random_fact" x-nullable: true @@ -1609,7 +1609,7 @@ components: $ref: "#/components/schemas/NavigationItem" next_task: $ref: "#/components/schemas/Task" - workflow_spec_id: + workflow_sync_id: type: string spec_version: type: string @@ -1625,7 +1625,7 @@ components: example: id: 291234 status: 'user_input_required' - workflow_spec_id: 'random_fact' + workflow_sync_id: 'random_fact' spec_version: 'v1.1 [22,23]' is_latest_spec: True next_task: diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 8da5c88a..d14daf11 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -1,11 +1,4 @@ -import hashlib -import json import uuid -from io import StringIO -from hashlib import md5 - -import pandas as pd -from SpiffWorkflow.util.deep_merge import DeepMerge from flask import g from crc import session, db, app from crc.api.common import ApiError, ApiErrorSchema @@ -21,8 +14,6 @@ from crc.services.study_service import StudyService from crc.services.user_service import UserService from crc.services.workflow_processor import WorkflowProcessor from crc.services.workflow_service import WorkflowService -from flask_cors import cross_origin -import requests def all_specifications(): schema = WorkflowSpecModelSchema(many=True) @@ -263,299 +254,3 @@ def _verify_user_and_role(processor, spiff_task): raise ApiError.from_task("permission_denied", f"This task must be completed by '{allowed_users}', " f"but you are {user.uid}", spiff_task) -def join_uuids(uuids): - """Joins a pandas Series of uuids and combines them in one hash""" - combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always - # in the same order - return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes - -def verify_token(token, required_scopes): - if token == app.config['API_TOKEN']: - return {'scope':['any']} - else: - raise ApiError("permission_denied","API Token information is not correct") - - -def get_changed_workflows(remote,as_df=False): - """ - gets a remote endpoint - gets the workflows and then - determines what workflows are different from the remote endpoint - """ - response = requests.get('http://'+remote+'/v1.0/workflow_spec/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']}) - - # This is probably very and may allow cross site attacks - fix later - remote = pd.DataFrame(json.loads(response.text)) - # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index - local = get_all_spec_state_dataframe().reset_index() - - # merge these on workflow spec id and hash - this will - # make two different date columns date_x and date_y - different = remote.merge(local, - right_on=['workflow_spec_id','md5_hash'], - left_on=['workflow_spec_id','md5_hash'], - how = 'outer' , - indicator=True).loc[lambda x : x['_merge']!='both'] - - # each line has a tag on it - if was in the left or the right, - # label it so we know if that was on the remote or local machine - different.loc[different['_merge']=='left_only','location'] = 'remote' - different.loc[different['_merge']=='right_only','location'] = 'local' - - # this takes the different date_created_x and date-created_y columns and - # combines them back into one date_created column - index = different['date_created_x'].isnull() - different.loc[index,'date_created_x'] = different[index]['date_created_y'] - different = different[['workflow_spec_id','date_created_x','location']].copy() - different.columns=['workflow_spec_id','date_created','location'] - - # our different list will have multiple entries for a workflow if there is a version on either side - # we want to grab the most recent one, so we sort and grab the most recent one for each workflow - changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first() - - # get an exclusive or list of workflow ids - that is we want lists of files that are - # on one machine or the other, but not both - remote_spec_ids = remote[['workflow_spec_id']] - local_spec_ids = local[['workflow_spec_id']] - left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])] - right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])] - - # flag files as new that are only on the remote box and remove the files that are only on the local box - changedfiles['new'] = False - changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True - output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])] - - # return the list as a dict, let swagger convert it to json - if as_df: - return output - else: - return output.reset_index().to_dict(orient='records') - - -def sync_all_changed_workflows(remote): - - workflowsdf = get_changed_workflows(remote,as_df=True) - workflows = workflowsdf.reset_index().to_dict(orient='records') - for workflow in workflows: - sync_changed_files(remote,workflow['workflow_spec_id']) - return [x['workflow_spec_id'] for x in workflows] - - -def sync_changed_files(remote,workflow_spec_id): - # make sure that spec is local before syncing files - remotespectext = requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id, - headers={'X-CR-API-KEY': app.config['API_TOKEN']}) - specdict = json.loads(remotespectext.text) - localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first() - if localspec is None: - localspec = WorkflowSpecModel() - localspec.id = workflow_spec_id - if specdict['category'] == None: - localspec.category = None - else: - localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name - == specdict['category']['name']).first() - if localcategory == None: - #category doesn't exist - lets make it - localcategory = WorkflowSpecCategoryModel() - localcategory.name = specdict['category']['name'] - localcategory.display_name = specdict['category']['display_name'] - localcategory.display_order = specdict['category']['display_order'] - session.add(localcategory) - localspec.category = localcategory - - localspec.display_order = specdict['display_order'] - localspec.display_name = specdict['display_name'] - localspec.name = specdict['name'] - localspec.description = specdict['description'] - session.add(localspec) - - changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True) - if len(changedfiles)==0: - return [] - updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))] - updatefiles = updatefiles.reset_index().to_dict(orient='records') - - deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))] - deletefiles = deletefiles.reset_index().to_dict(orient='records') - - for delfile in deletefiles: - currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, - FileModel.name == delfile['filename']).first() - - # it is more appropriate to archive the file than delete - # due to the fact that we might have workflows that are using the - # file data - currentfile.archived = True - session.add(currentfile) - - for updatefile in updatefiles: - currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, - FileModel.name == updatefile['filename']).first() - if not currentfile: - currentfile = FileModel() - currentfile.name = updatefile['filename'] - currentfile.workflow_spec_id = workflow_spec_id - - currentfile.date_created = updatefile['date_created'] - currentfile.type = updatefile['type'] - currentfile.primary = updatefile['primary'] - currentfile.content_type = updatefile['content_type'] - currentfile.primary_process_id = updatefile['primary_process_id'] - session.add(currentfile) - - response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data', - headers={'X-CR-API-KEY': app.config['API_TOKEN']}) - FileService.update_file(currentfile,response.content,updatefile['type']) - session.commit() - return [x['filename'] for x in updatefiles] - - -def get_changed_files(remote,workflow_spec_id,as_df=False): - """ - gets a remote endpoint - gets the files for a workflow_spec on both - local and remote and determines what files have been change and returns a list of those - files - """ - response = requests.get('http://'+remote+'/v1.0/workflow_spec/'+workflow_spec_id+'/files', - headers={'X-CR-API-KEY':app.config['API_TOKEN']}) - # This is probably very and may allow cross site attacks - fix later - remote = pd.DataFrame(json.loads(response.text)) - # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index - local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index() - local['md5_hash'] = local['md5_hash'].astype('str') - different = remote.merge(local, - right_on=['filename','md5_hash'], - left_on=['filename','md5_hash'], - how = 'outer' , - indicator=True).loc[lambda x : x['_merge']!='both'] - if len(different) == 0: - if as_df: - return different - else: - return [] - # each line has a tag on it - if was in the left or the right, - # label it so we know if that was on the remote or local machine - different.loc[different['_merge']=='left_only','location'] = 'remote' - different.loc[different['_merge']=='right_only','location'] = 'local' - - # this takes the different date_created_x and date-created_y columns and - # combines them back into one date_created column - dualfields = ['date_created','type','primary','content_type','primary_process_id'] - for merge in dualfields: - index = different[merge+'_x'].isnull() - different.loc[index,merge+'_x'] = different[index][merge+'_y'] - - fieldlist = [fld+'_x' for fld in dualfields] - different = different[ fieldlist + ['md5_hash','filename','location']].copy() - - different.columns=dualfields+['md5_hash','filename','location'] - # our different list will have multiple entries for a workflow if there is a version on either side - # we want to grab the most recent one, so we sort and grab the most recent one for each workflow - changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first() - - # get an exclusive or list of workflow ids - that is we want lists of files that are - # on one machine or the other, but not both - remote_spec_ids = remote[['filename']] - local_spec_ids = local[['filename']] - left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])] - right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])] - changedfiles['new'] = False - changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True - changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True - changedfiles = changedfiles.replace({pd.np.nan: None}) - # return the list as a dict, let swagger convert it to json - if as_df: - return changedfiles - else: - return changedfiles.reset_index().to_dict(orient='records') - - - -def get_all_spec_state(): - """ - Return a list of all workflow specs along with last updated date and a - thumbprint of all of the files that are used for that workflow_spec - Convert into a dict list from a dataframe - """ - df = get_all_spec_state_dataframe() - return df.reset_index().to_dict(orient='records') - - -def get_workflow_spec_files(workflow_spec_id): - """ - Return a list of all workflow specs along with last updated date and a - thumbprint of all of the files that are used for that workflow_spec - Convert into a dict list from a dataframe - """ - df = get_workflow_spec_files_dataframe(workflow_spec_id) - return df.reset_index().to_dict(orient='records') - - -def get_workflow_spec_files_dataframe(workflowid): - """ - Return a list of all files for a workflow_spec along with last updated date and a - hash so we can determine file differences for a changed workflow on a box. - Return a dataframe - """ - x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid) - # there might be a cleaner way of getting a data frome from some of the - # fields in the ORM - but this works OK - filelist = [] - for file in x: - filelist.append({'file_model_id':file.file_model_id, - 'workflow_spec_id': file.file_model.workflow_spec_id, - 'md5_hash':file.md5_hash, - 'filename':file.file_model.name, - 'type':file.file_model.type.name, - 'primary':file.file_model.primary, - 'content_type':file.file_model.content_type, - 'primary_process_id':file.file_model.primary_process_id, - 'date_created':file.date_created}) - if len(filelist) == 0: - return pd.DataFrame(columns=['file_model_id', - 'workflow_spec_id', - 'md5_hash', - 'filename', - 'type', - 'primary', - 'content_type', - 'primary_process_id', - 'date_created']) - df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last() - df['date_created'] = df['date_created'].astype('str') - return df - - - -def get_all_spec_state_dataframe(): - """ - Return a list of all workflow specs along with last updated date and a - thumbprint of all of the files that are used for that workflow_spec - Return a dataframe - """ - x = session.query(FileDataModel).join(FileModel) - # there might be a cleaner way of getting a data frome from some of the - # fields in the ORM - but this works OK - filelist = [] - for file in x: - filelist.append({'file_model_id':file.file_model_id, - 'workflow_spec_id': file.file_model.workflow_spec_id, - 'md5_hash':file.md5_hash, - 'filename':file.file_model.name, - 'date_created':file.date_created}) - df = pd.DataFrame(filelist) - - # get a distinct list of file_model_id's with the most recent file_data retained - df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy() - - # take that list and then group by workflow_spec and retain the most recently touched file - # and make a consolidated hash of the md5_checksums - this acts as a 'thumbprint' for each - # workflow spec - df = df.groupby('workflow_spec_id').agg({'date_created':'max', - 'md5_hash':join_uuids}).copy() - # get only the columns we are really interested in returning - df = df[['date_created','md5_hash']].copy() - # convert dates to string - df['date_created'] = df['date_created'].astype('str') - return df - diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py new file mode 100644 index 00000000..98f04236 --- /dev/null +++ b/crc/api/workflow_sync.py @@ -0,0 +1,336 @@ +import hashlib +import json +import pandas as pd +import requests +from crc import session, app +from crc.api.common import ApiError +from crc.models.file import FileModel, FileDataModel +from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel +from crc.services.file_service import FileService + + +def join_uuids(uuids): + """Joins a pandas Series of uuids and combines them in one hash""" + combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always + # in the same order + return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes + +def verify_token(token, required_scopes): + if token == app.config['API_TOKEN']: + return {'scope':['any']} + else: + raise ApiError("permission_denied", "API Token information is not correct") + + +def get_changed_workflows(remote,as_df=False): + """ + gets a remote endpoint - gets the workflows and then + determines what workflows are different from the remote endpoint + """ + try: + response = requests.get('http://'+remote+'/v1.0/workflow_sync/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']}) + except: + raise ApiError("endpoint error", 'had a problem connecting to '+remote) + if response.status_code != 200: + raise ApiError("endpoint error", response.text) + + remote = pd.DataFrame(json.loads(response.text)) + + # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index + local = get_all_spec_state_dataframe().reset_index() + + # merge these on workflow spec id and hash - this will + # make two different date columns date_x and date_y + different = remote.merge(local, + right_on=['workflow_spec_id','md5_hash'], + left_on=['workflow_spec_id','md5_hash'], + how = 'outer' , + indicator=True).loc[lambda x : x['_merge']!='both'] + if len(different)==0: + return [] + # each line has a tag on it - if was in the left or the right, + # label it so we know if that was on the remote or local machine + different.loc[different['_merge']=='left_only','location'] = 'remote' + different.loc[different['_merge']=='right_only','location'] = 'local' + + # this takes the different date_created_x and date-created_y columns and + # combines them back into one date_created column + index = different['date_created_x'].isnull() + different.loc[index,'date_created_x'] = different[index]['date_created_y'] + different = different[['workflow_spec_id','date_created_x','location']].copy() + different.columns=['workflow_spec_id','date_created','location'] + + # our different list will have multiple entries for a workflow if there is a version on either side + # we want to grab the most recent one, so we sort and grab the most recent one for each workflow + changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first() + + # get an exclusive or list of workflow ids - that is we want lists of files that are + # on one machine or the other, but not both + remote_spec_ids = remote[['workflow_spec_id']] + local_spec_ids = local[['workflow_spec_id']] + left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])] + right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])] + + # flag files as new that are only on the remote box and remove the files that are only on the local box + changedfiles['new'] = False + changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True + output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])] + + # return the list as a dict, let swagger convert it to json + if as_df: + return output + else: + return output.reset_index().to_dict(orient='records') + + +def sync_all_changed_workflows(remote): + + workflowsdf = get_changed_workflows(remote,as_df=True) + if len(workflowsdf) ==0: + return [] + workflows = workflowsdf.reset_index().to_dict(orient='records') + for workflow in workflows: + sync_changed_files(remote,workflow['workflow_spec_id']) + return [x['workflow_spec_id'] for x in workflows] + + +def sync_changed_files(remote,workflow_spec_id): + # make sure that spec is local before syncing files + try: + remotespectext = requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id, + headers={'X-CR-API-KEY': app.config['API_TOKEN']}) + except: + raise ApiError("endpoint error", 'had a problem connecting to '+remote) + + if remotespectext.status_code != 200: + raise ApiError("endpoint error", response.text) + + specdict = json.loads(remotespectext.text) + + localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first() + if localspec is None: + localspec = WorkflowSpecModel() + localspec.id = workflow_spec_id + if specdict['category'] == None: + localspec.category = None + else: + localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name + == specdict['category']['name']).first() + if localcategory == None: + #category doesn't exist - lets make it + localcategory = WorkflowSpecCategoryModel() + localcategory.name = specdict['category']['name'] + localcategory.display_name = specdict['category']['display_name'] + localcategory.display_order = specdict['category']['display_order'] + session.add(localcategory) + localspec.category = localcategory + + localspec.display_order = specdict['display_order'] + localspec.display_name = specdict['display_name'] + localspec.name = specdict['name'] + localspec.description = specdict['description'] + session.add(localspec) + + changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True) + if len(changedfiles)==0: + return [] + updatefiles = changedfiles[~((changedfiles['new']==True) & (changedfiles['location']=='local'))] + updatefiles = updatefiles.reset_index().to_dict(orient='records') + + deletefiles = changedfiles[((changedfiles['new']==True) & (changedfiles['location']=='local'))] + deletefiles = deletefiles.reset_index().to_dict(orient='records') + + for delfile in deletefiles: + currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, + FileModel.name == delfile['filename']).first() + + # it is more appropriate to archive the file than delete + # due to the fact that we might have workflows that are using the + # file data + currentfile.archived = True + session.add(currentfile) + + for updatefile in updatefiles: + currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, + FileModel.name == updatefile['filename']).first() + if not currentfile: + currentfile = FileModel() + currentfile.name = updatefile['filename'] + currentfile.workflow_spec_id = workflow_spec_id + + currentfile.date_created = updatefile['date_created'] + currentfile.type = updatefile['type'] + currentfile.primary = updatefile['primary'] + currentfile.content_type = updatefile['content_type'] + currentfile.primary_process_id = updatefile['primary_process_id'] + session.add(currentfile) + try: + response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data', + headers={'X-CR-API-KEY': app.config['API_TOKEN']}) + except: + raise ApiError("endpoint error", 'had a problem connecting to ' + remote) + + if response.status_code != 200: + raise ApiError("endpoint error", response.text) + + FileService.update_file(currentfile,response.content,updatefile['type']) + session.commit() + return [x['filename'] for x in updatefiles] + + +def get_changed_files(remote,workflow_spec_id,as_df=False): + """ + gets a remote endpoint - gets the files for a workflow_spec on both + local and remote and determines what files have been change and returns a list of those + files + """ + try: + response = requests.get('http://'+remote+'/v1.0/workflow_sync/'+workflow_spec_id+'/files', + headers={'X-CR-API-KEY':app.config['API_TOKEN']}) + except: + raise ApiError("endpoint error", 'had a problem connecting to '+remote) + + if response.status_code != 200: + raise ApiError("endpoint error", response.text) + + # This is probably very and may allow cross site attacks - fix later + remote = pd.DataFrame(json.loads(response.text)) + # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index + local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index() + local['md5_hash'] = local['md5_hash'].astype('str') + different = remote.merge(local, + right_on=['filename','md5_hash'], + left_on=['filename','md5_hash'], + how = 'outer' , + indicator=True).loc[lambda x : x['_merge']!='both'] + if len(different) == 0: + if as_df: + return different + else: + return [] + # each line has a tag on it - if was in the left or the right, + # label it so we know if that was on the remote or local machine + different.loc[different['_merge']=='left_only','location'] = 'remote' + different.loc[different['_merge']=='right_only','location'] = 'local' + + # this takes the different date_created_x and date-created_y columns and + # combines them back into one date_created column + dualfields = ['date_created','type','primary','content_type','primary_process_id'] + for merge in dualfields: + index = different[merge+'_x'].isnull() + different.loc[index,merge+'_x'] = different[index][merge+'_y'] + + fieldlist = [fld+'_x' for fld in dualfields] + different = different[ fieldlist + ['md5_hash','filename','location']].copy() + + different.columns=dualfields+['md5_hash','filename','location'] + # our different list will have multiple entries for a workflow if there is a version on either side + # we want to grab the most recent one, so we sort and grab the most recent one for each workflow + changedfiles = different.sort_values('date_created',ascending=False).groupby('filename').first() + + # get an exclusive or list of workflow ids - that is we want lists of files that are + # on one machine or the other, but not both + remote_spec_ids = remote[['filename']] + local_spec_ids = local[['filename']] + left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])] + right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])] + changedfiles['new'] = False + changedfiles.loc[changedfiles.index.isin(left['filename']), 'new'] = True + changedfiles.loc[changedfiles.index.isin(right['filename']),'new'] = True + changedfiles = changedfiles.replace({pd.np.nan: None}) + # return the list as a dict, let swagger convert it to json + if as_df: + return changedfiles + else: + return changedfiles.reset_index().to_dict(orient='records') + + + +def get_all_spec_state(): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + Convert into a dict list from a dataframe + """ + df = get_all_spec_state_dataframe() + return df.reset_index().to_dict(orient='records') + + +def get_workflow_spec_files(workflow_spec_id): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + Convert into a dict list from a dataframe + """ + df = get_workflow_spec_files_dataframe(workflow_spec_id) + return df.reset_index().to_dict(orient='records') + + +def get_workflow_spec_files_dataframe(workflowid): + """ + Return a list of all files for a workflow_spec along with last updated date and a + hash so we can determine file differences for a changed workflow on a box. + Return a dataframe + """ + x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid) + # there might be a cleaner way of getting a data frome from some of the + # fields in the ORM - but this works OK + filelist = [] + for file in x: + filelist.append({'file_model_id':file.file_model_id, + 'workflow_spec_id': file.file_model.workflow_spec_id, + 'md5_hash':file.md5_hash, + 'filename':file.file_model.name, + 'type':file.file_model.type.name, + 'primary':file.file_model.primary, + 'content_type':file.file_model.content_type, + 'primary_process_id':file.file_model.primary_process_id, + 'date_created':file.date_created}) + if len(filelist) == 0: + return pd.DataFrame(columns=['file_model_id', + 'workflow_spec_id', + 'md5_hash', + 'filename', + 'type', + 'primary', + 'content_type', + 'primary_process_id', + 'date_created']) + df = pd.DataFrame(filelist).sort_values('date_created').groupby('file_model_id').last() + df['date_created'] = df['date_created'].astype('str') + return df + + + +def get_all_spec_state_dataframe(): + """ + Return a list of all workflow specs along with last updated date and a + thumbprint of all of the files that are used for that workflow_spec + Return a dataframe + """ + x = session.query(FileDataModel).join(FileModel) + # there might be a cleaner way of getting a data frome from some of the + # fields in the ORM - but this works OK + filelist = [] + for file in x: + filelist.append({'file_model_id':file.file_model_id, + 'workflow_spec_id': file.file_model.workflow_spec_id, + 'md5_hash':file.md5_hash, + 'filename':file.file_model.name, + 'date_created':file.date_created}) + df = pd.DataFrame(filelist) + + # get a distinct list of file_model_id's with the most recent file_data retained + df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy() + + # take that list and then group by workflow_spec and retain the most recently touched file + # and make a consolidated hash of the md5_checksums - this acts as a 'thumbprint' for each + # workflow spec + df = df.groupby('workflow_spec_id').agg({'date_created':'max', + 'md5_hash':join_uuids}).copy() + # get only the columns we are really interested in returning + df = df[['date_created','md5_hash']].copy() + # convert dates to string + df['date_created'] = df['date_created'].astype('str') + return df + From 993c7bc76e6480d4d9e0ef6fa3ea41f2db4fafe1 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Fri, 11 Dec 2020 08:29:37 -0500 Subject: [PATCH 21/35] fixed error on api.yml from search / replace --- crc/api.yml | 14 +++---- deploy/requirements.txt | 93 +++++++++++++++++++---------------------- 2 files changed, 51 insertions(+), 56 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 41e76f90..ce02307f 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -547,7 +547,7 @@ paths: description: The workflow spec category has been removed. /file: parameters: - - name: workflow_sync_id + - name: workflow_spec_id in: query required: false description: The unique id of a workflow specification @@ -1380,7 +1380,7 @@ components: type: string WorkflowSpecDiffList: properties: - workflow_sync_id: + workflow_spec_id: type: string example : top_level_workflow date_created : @@ -1397,7 +1397,7 @@ components: file_model_id: type : integer example : 171 - workflow_sync_id : + workflow_spec_id : type: string example : top_level_workflow filename : @@ -1455,7 +1455,7 @@ components: WorkflowSpecAll: properties: - workflow_sync_id : + workflow_spec_id : type: string example : acaf1258-43b4-437e-8846-f612afa66811 date_created : @@ -1587,7 +1587,7 @@ components: content_type: type: string example: "application/xml" - workflow_sync_id: + workflow_spec_id: type: string example: "random_fact" x-nullable: true @@ -1609,7 +1609,7 @@ components: $ref: "#/components/schemas/NavigationItem" next_task: $ref: "#/components/schemas/Task" - workflow_sync_id: + workflow_spec_id: type: string spec_version: type: string @@ -1625,7 +1625,7 @@ components: example: id: 291234 status: 'user_input_required' - workflow_sync_id: 'random_fact' + workflow_spec_id: 'random_fact' spec_version: 'v1.1 [22,23]' is_latest_spec: True next_task: diff --git a/deploy/requirements.txt b/deploy/requirements.txt index af827f27..b9ab7b4c 100644 --- a/deploy/requirements.txt +++ b/deploy/requirements.txt @@ -1,81 +1,76 @@ alabaster==0.7.12 -alembic==1.4.2 -amqp==2.5.2 +alembic==1.4.3 aniso8601==8.0.0 -attrs==19.3.0 -babel==2.8.0 -bcrypt==3.1.7 -beautifulsoup4==4.9.1 -billiard==3.6.3.0 +attrs==20.3.0 +babel==2.9.0 +bcrypt==3.2.0 +beautifulsoup4==4.9.3 blinker==1.4 -celery==4.4.2 -certifi==2020.4.5.1 -cffi==1.14.0 +certifi==2020.11.8 +cffi==1.14.4 chardet==3.0.4 click==7.1.2 -clickclick==1.2.2 +clickclick==20.10.2 commonmark==0.9.1 -configparser==5.0.0 connexion==2.7.0 -coverage==5.1 +coverage==5.3 +deprecated==1.2.10 docutils==0.16 -docxtpl==0.9.2 +docxtpl==0.11.2 et-xmlfile==1.0.1 flask==1.1.2 flask-admin==1.5.7 flask-bcrypt==0.7.1 -flask-cors==3.0.8 +flask-cors==3.0.9 flask-mail==0.9.1 -flask-marshmallow==0.12.0 +flask-marshmallow==0.14.0 flask-migrate==2.5.3 flask-restful==0.3.8 -flask-sqlalchemy==2.4.1 -flask-sso==0.4.0 -future==0.18.2 -httpretty==1.0.2 -idna==2.9 +flask-sqlalchemy==2.4.4 +gunicorn==20.0.4 +httpretty==1.0.3 +idna==2.10 imagesize==1.2.0 -importlib-metadata==1.6.0 -inflection==0.4.0 +inflection==0.5.1 itsdangerous==1.1.0 jdcal==1.4.1 jinja2==2.11.2 jsonschema==3.2.0 -kombu==4.6.8 -ldap3==2.7 -lxml==4.5.1 -mako==1.1.2 +ldap3==2.8.1 +lxml==4.6.2 +mako==1.1.3 +markdown==3.3.3 markupsafe==1.1.1 -marshmallow==3.6.0 +marshmallow==3.9.1 marshmallow-enum==1.5.1 -marshmallow-sqlalchemy==0.23.0 -numpy==1.18.4 -openapi-spec-validator==0.2.8 -openpyxl==3.0.3 +marshmallow-sqlalchemy==0.24.1 +numpy==1.19.4 +openapi-spec-validator==0.2.9 +openpyxl==3.0.5 packaging==20.4 -pandas==1.0.3 -psycopg2-binary==2.8.5 +pandas==1.1.4 +psycopg2-binary==2.8.6 pyasn1==0.4.8 pycparser==2.20 -PyGithub==1.53 -pygments==2.6.1 +pygithub==1.53 +pygments==2.7.2 pyjwt==1.7.1 pyparsing==2.4.7 -pyrsistent==0.16.0 +pyrsistent==0.17.3 python-box==5.2.0 python-dateutil==2.8.1 python-docx==0.8.10 python-editor==1.0.4 -python-Levenshtein==0.12.0 -pytz==2020.1 +python-levenshtein==0.12.0 +pytz==2020.4 pyyaml==5.3.1 recommonmark==0.6.0 -requests==2.23.0 +requests==2.25.0 sentry-sdk==0.14.4 -six==1.14.0 +six==1.15.0 snowballstemmer==2.0.0 soupsieve==2.0.1 -sphinx==3.0.3 +sphinx==3.3.1 sphinxcontrib-applehelp==1.0.2 sphinxcontrib-devhelp==1.0.2 sphinxcontrib-htmlhelp==1.0.3 @@ -83,14 +78,14 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.4 spiffworkflow -sqlalchemy==1.3.17 -swagger-ui-bundle==0.0.6 -urllib3==1.25.9 -vine==1.3.0 -waitress==1.4.3 +sqlalchemy==1.3.20 +swagger-ui-bundle==0.0.8 +urllib3==1.26.2 +waitress==1.4.4 webob==1.8.6 webtest==2.0.35 werkzeug==1.0.1 +wrapt==1.12.1 +wtforms==2.3.3 xlrd==1.2.0 -xlsxwriter==1.2.8 -zipp==3.1.0 +xlsxwriter==1.3.7 From 9eea26e019de87a96a09baa66f27e3a9dd3d4472 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Fri, 11 Dec 2020 08:34:59 -0500 Subject: [PATCH 22/35] add workflow_sync test --- crc/api/workflow_sync.py | 8 ++++---- tests/base_test.py | 8 ++++++++ tests/test_workflow_sync.py | 19 +++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 tests/test_workflow_sync.py diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py index 98f04236..c4256a68 100644 --- a/crc/api/workflow_sync.py +++ b/crc/api/workflow_sync.py @@ -31,7 +31,7 @@ def get_changed_workflows(remote,as_df=False): response = requests.get('http://'+remote+'/v1.0/workflow_sync/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']}) except: raise ApiError("endpoint error", 'had a problem connecting to '+remote) - if response.status_code != 200: + if not response.ok: raise ApiError("endpoint error", response.text) remote = pd.DataFrame(json.loads(response.text)) @@ -102,7 +102,7 @@ def sync_changed_files(remote,workflow_spec_id): except: raise ApiError("endpoint error", 'had a problem connecting to '+remote) - if remotespectext.status_code != 200: + if not remotespectext.ok: raise ApiError("endpoint error", response.text) specdict = json.loads(remotespectext.text) @@ -170,7 +170,7 @@ def sync_changed_files(remote,workflow_spec_id): except: raise ApiError("endpoint error", 'had a problem connecting to ' + remote) - if response.status_code != 200: + if not response.ok: raise ApiError("endpoint error", response.text) FileService.update_file(currentfile,response.content,updatefile['type']) @@ -190,7 +190,7 @@ def get_changed_files(remote,workflow_spec_id,as_df=False): except: raise ApiError("endpoint error", 'had a problem connecting to '+remote) - if response.status_code != 200: + if not response.ok: raise ApiError("endpoint error", response.text) # This is probably very and may allow cross site attacks - fix later diff --git a/tests/base_test.py b/tests/base_test.py index af899917..e32bbd9b 100644 --- a/tests/base_test.py +++ b/tests/base_test.py @@ -204,6 +204,14 @@ class BaseTest(unittest.TestCase): data = myfile.read() return data + @staticmethod + def workflow_sync_response(file_name): + filepath = os.path.join(app.root_path, '..', 'tests', 'data', 'workflow_sync_responses', file_name) + with open(filepath, 'r') as myfile: + data = myfile.read() + return data + + def assert_success(self, rv, msg=""): try: data = json.loads(rv.get_data(as_text=True)) diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py new file mode 100644 index 00000000..9a739c48 --- /dev/null +++ b/tests/test_workflow_sync.py @@ -0,0 +1,19 @@ +from unittest.mock import patch + +from crc import app +from tests.base_test import BaseTest +from crc.api.workflow_sync import get_all_spec_state, get_changed_workflows +import json +pip + +class TestWorkflowSync(BaseTest): + + @patch('crc.api.workflow_sync.requests.get') + def test_get_no_changes(self, mock_get): + othersys = get_all_spec_state() + mock_get.return_value.ok = True + mock_get.return_value.text = json.dumps(othersys) + response = get_changed_workflows('somewhere over the rainbow') + self.assertIsNotNone(response) + self.assertEqual(response,[]) + From 3a1160efac2348748b083d1b9dd4f83c11aee2b0 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Fri, 11 Dec 2020 11:41:32 -0500 Subject: [PATCH 23/35] refactored calls into a service --- crc/api.yml | 3 +- crc/api/workflow_sync.py | 51 ++++++-------------------- tests/study/test_study_api.py | 2 +- tests/test_workflow_sync.py | 68 +++++++++++++++++++++++++++++++---- 4 files changed, 76 insertions(+), 48 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index ce02307f..0939daa5 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -657,7 +657,8 @@ paths: get: operationId: crc.api.file.get_file_data_by_hash summary: Returns only the file contents - security: [] # Disable security for this endpoint only. + security: + - ApiKeyAuth: [] tags: - Files responses: diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py index c4256a68..6a64fe7d 100644 --- a/crc/api/workflow_sync.py +++ b/crc/api/workflow_sync.py @@ -7,6 +7,7 @@ from crc.api.common import ApiError from crc.models.file import FileModel, FileDataModel from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel from crc.services.file_service import FileService +from crc.services.workflow_sync import WorkflowSyncService def join_uuids(uuids): @@ -27,21 +28,16 @@ def get_changed_workflows(remote,as_df=False): gets a remote endpoint - gets the workflows and then determines what workflows are different from the remote endpoint """ - try: - response = requests.get('http://'+remote+'/v1.0/workflow_sync/all',headers={'X-CR-API-KEY':app.config['API_TOKEN']}) - except: - raise ApiError("endpoint error", 'had a problem connecting to '+remote) - if not response.ok: - raise ApiError("endpoint error", response.text) - remote = pd.DataFrame(json.loads(response.text)) + remote_workflows_list = WorkflowSyncService.get_all_remote_workflows(remote) + remote_workflows = pd.DataFrame(remote_workflows_list) # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index local = get_all_spec_state_dataframe().reset_index() # merge these on workflow spec id and hash - this will # make two different date columns date_x and date_y - different = remote.merge(local, + different = remote_workflows.merge(local, right_on=['workflow_spec_id','md5_hash'], left_on=['workflow_spec_id','md5_hash'], how = 'outer' , @@ -66,7 +62,7 @@ def get_changed_workflows(remote,as_df=False): # get an exclusive or list of workflow ids - that is we want lists of files that are # on one machine or the other, but not both - remote_spec_ids = remote[['workflow_spec_id']] + remote_spec_ids = remote_workflows[['workflow_spec_id']] local_spec_ids = local[['workflow_spec_id']] left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])] right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])] @@ -96,16 +92,8 @@ def sync_all_changed_workflows(remote): def sync_changed_files(remote,workflow_spec_id): # make sure that spec is local before syncing files - try: - remotespectext = requests.get('http://'+remote+'/v1.0/workflow-specification/'+workflow_spec_id, - headers={'X-CR-API-KEY': app.config['API_TOKEN']}) - except: - raise ApiError("endpoint error", 'had a problem connecting to '+remote) - if not remotespectext.ok: - raise ApiError("endpoint error", response.text) - - specdict = json.loads(remotespectext.text) + specdict = WorkflowSyncService.get_remote_workfow_spec(remote,workflow_spec_id) localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first() if localspec is None: @@ -164,16 +152,8 @@ def sync_changed_files(remote,workflow_spec_id): currentfile.content_type = updatefile['content_type'] currentfile.primary_process_id = updatefile['primary_process_id'] session.add(currentfile) - try: - response = requests.get('http://'+remote+'/v1.0/file/'+updatefile['md5_hash']+'/hash_data', - headers={'X-CR-API-KEY': app.config['API_TOKEN']}) - except: - raise ApiError("endpoint error", 'had a problem connecting to ' + remote) - - if not response.ok: - raise ApiError("endpoint error", response.text) - - FileService.update_file(currentfile,response.content,updatefile['type']) + content = WorkflowSyncService.get_remote_file_by_hash(remote,updatefile['md5_hash']) + FileService.update_file(currentfile,content,updatefile['type']) session.commit() return [x['filename'] for x in updatefiles] @@ -184,21 +164,12 @@ def get_changed_files(remote,workflow_spec_id,as_df=False): local and remote and determines what files have been change and returns a list of those files """ - try: - response = requests.get('http://'+remote+'/v1.0/workflow_sync/'+workflow_spec_id+'/files', - headers={'X-CR-API-KEY':app.config['API_TOKEN']}) - except: - raise ApiError("endpoint error", 'had a problem connecting to '+remote) - - if not response.ok: - raise ApiError("endpoint error", response.text) - - # This is probably very and may allow cross site attacks - fix later - remote = pd.DataFrame(json.loads(response.text)) + remote_file_list = WorkflowSyncService.get_remote_workflow_spec_files(remote,workflow_spec_id) + remote_files = pd.DataFrame(remote_file_list) # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index() local['md5_hash'] = local['md5_hash'].astype('str') - different = remote.merge(local, + different = remote_files.merge(local, right_on=['filename','md5_hash'], left_on=['filename','md5_hash'], how = 'outer' , diff --git a/tests/study/test_study_api.py b/tests/study/test_study_api.py index e7d33a9d..df734086 100644 --- a/tests/study/test_study_api.py +++ b/tests/study/test_study_api.py @@ -165,7 +165,7 @@ class TestStudyApi(BaseTest): self.assertEqual(study_event.comment, update_comment) self.assertEqual(study_event.user_uid, self.test_uid) - @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators') # mock_studies + @patch('crc.services.protocol_builder.ProtocolBuilderService.get_investigators') # mock_investigators @patch('crc.services.protocol_builder.ProtocolBuilderService.get_required_docs') # mock_docs @patch('crc.services.protocol_builder.ProtocolBuilderService.get_study_details') # mock_details @patch('crc.services.protocol_builder.ProtocolBuilderService.get_studies') # mock_studies diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py index 9a739c48..f2c68719 100644 --- a/tests/test_workflow_sync.py +++ b/tests/test_workflow_sync.py @@ -1,19 +1,75 @@ from unittest.mock import patch -from crc import app +from crc import db from tests.base_test import BaseTest from crc.api.workflow_sync import get_all_spec_state, get_changed_workflows +from crc.models.workflow import WorkflowSpecModel import json -pip +from datetime import datetime +from crc.services.file_service import FileService class TestWorkflowSync(BaseTest): - @patch('crc.api.workflow_sync.requests.get') + @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') def test_get_no_changes(self, mock_get): + self.load_example_data() othersys = get_all_spec_state() - mock_get.return_value.ok = True - mock_get.return_value.text = json.dumps(othersys) - response = get_changed_workflows('somewhere over the rainbow') + mock_get.return_value = othersys + response = get_changed_workflows('localhost:0000') # not actually used due to mock self.assertIsNotNone(response) self.assertEqual(response,[]) + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') + def test_remote_workflow_change(self, mock_get): + self.load_example_data() + othersys = get_all_spec_state() + othersys[1]['date_created'] = str(datetime.now()) + othersys[1]['md5_hash'] = '12345' + mock_get.return_value = othersys + response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0]['workflow_spec_id'], 'random_fact') + self.assertEqual(response[0]['location'], 'remote') + self.assertEqual(response[0]['new'], False) + + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') + def test_remote_workflow_has_new(self, mock_get): + self.load_example_data() + othersys = get_all_spec_state() + othersys.append({'workflow_spec_id':'my_new_workflow', + 'date_created':str(datetime.now()), + 'md5_hash': '12345'}) + mock_get.return_value = othersys + response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0]['workflow_spec_id'],'my_new_workflow') + self.assertEqual(response[0]['location'], 'remote') + self.assertEqual(response[0]['new'], True) + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') + def test_local_workflow_has_new(self, mock_get): + self.load_example_data() + + othersys = get_all_spec_state() + mock_get.return_value = othersys + wf_spec = WorkflowSpecModel() + wf_spec.id = 'abcdefg' + wf_spec.display_name = 'New Workflow - Yum!!' + wf_spec.name = 'my_new_workflow' + wf_spec.description = 'yep - its a new workflow' + wf_spec.category_id = 0 + wf_spec.display_order = 0 + db.session.add(wf_spec) + db.session.commit() + FileService.add_workflow_spec_file(wf_spec,'dummyfile.txt','text',b'this is a test') + # after setting up the test - I realized that this doesn't return anything for + # a workflow that is new locally - it just returns nothing + response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(response,[]) From 55e6f5b753f7ad12f4d7e719161824c206af4d79 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Fri, 11 Dec 2020 11:42:00 -0500 Subject: [PATCH 24/35] refactored calls into a service - forgot to add actual service --- crc/services/workflow_sync.py | 57 +++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 crc/services/workflow_sync.py diff --git a/crc/services/workflow_sync.py b/crc/services/workflow_sync.py new file mode 100644 index 00000000..e2026746 --- /dev/null +++ b/crc/services/workflow_sync.py @@ -0,0 +1,57 @@ +import json +from json import JSONDecodeError +from typing import List, Optional + +import requests + +from crc import app +from crc.api.common import ApiError + + +class WorkflowSyncService(object): + + @staticmethod + def get_remote_file_by_hash(remote,md5_hash): + url = remote+'/v1.0/file/'+md5_hash+'/hash_data' + return WorkflowSyncService.__make_request(url,return_contents=True) + + @staticmethod + def get_remote_workflow_spec_files(remote,workflow_spec_id): + url = remote+'/v1.0/workflow_sync/'+workflow_spec_id+'/files' + return WorkflowSyncService.__make_request(url) + + @staticmethod + def get_remote_workflow_spec(remote, workflow_spec_id): + """ + this just gets the details of a workflow spec from the + remote side. + + FIXME: for testing I had changed the security on the API endpoint + below so that I could run it - I need to restore the security on this + and make a new workflow_sync endpoint that just calls this same function + so that I can use the API_TOKEN rather than the other token setup + """ + url = remote+'/v1.0/workflow-specification/'+workflow_spec_id + return WorkflowSyncService.__make_request(url) + + @staticmethod + def get_all_remote_workflows(remote): + url = remote + '/v1.0/workflow_sync/all' + return WorkflowSyncService.__make_request(url) + + @staticmethod + def __make_request(url,return_contents=False): + try: + response = requests.get(url,headers={'X-CR-API-KEY':app.config['API_TOKEN']}) + except: + raise ApiError("workflow_sync_error",response.text) + if response.ok and response.text: + if return_contents: + return response.content + else: + return json.loads(response.text) + else: + raise ApiError("workflow_sync_error", + "Received an invalid response from the protocol builder (status %s): %s when calling " + "url '%s'." % + (response.status_code, response.text, url)) From adc4dc4453392cbae7665a6e0e0b36fe90eb3b5a Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Fri, 11 Dec 2020 12:03:41 -0500 Subject: [PATCH 25/35] redid the api a bit so that nothing was using open security - added a new endpoint for getting a workflow spec that uses the alternate API_TOKEN security and leave the original endpoint as it was. --- crc/api.yml | 24 +++++++++++++++++++++++- crc/api/workflow_sync.py | 4 ++++ crc/services/workflow_sync.py | 7 +------ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 0939daa5..a4b799ea 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -156,6 +156,29 @@ paths: items: $ref: "#/components/schemas/WorkflowSpecDiffList" + /workflow_sync/{workflow_spec_id}/spec: + parameters: + - name: workflow_spec_id + in: path + required: false + description: The unique id of an existing workflow specification to modify. + schema: + type: string + get: + operationId: crc.api.workflow_sync.get_sync_workflow_specification + summary: Returns a single workflow specification + security: + - ApiKeyAuth: [] + tags: + - Workflow Sync API + responses: + '200': + description: Workflow specification. + content: + application/json: + schema: + $ref: "#/components/schemas/WorkflowSpec" + /workflow_sync/{workflow_spec_id}/files: get: @@ -401,7 +424,6 @@ paths: get: operationId: crc.api.workflow.get_workflow_specification summary: Returns a single workflow specification - security: [] tags: - Workflow Specifications responses: diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py index 6a64fe7d..4915af0f 100644 --- a/crc/api/workflow_sync.py +++ b/crc/api/workflow_sync.py @@ -8,8 +8,12 @@ from crc.models.file import FileModel, FileDataModel from crc.models.workflow import WorkflowSpecModel, WorkflowSpecCategoryModel from crc.services.file_service import FileService from crc.services.workflow_sync import WorkflowSyncService +from crc.api.workflow import get_workflow_specification +def get_sync_workflow_specification(workflow_spec_id): + return get_workflow_specification(workflow_spec_id) + def join_uuids(uuids): """Joins a pandas Series of uuids and combines them in one hash""" combined_uuids = ''.join([str(uuid) for uuid in uuids.sort_values()]) # ensure that values are always diff --git a/crc/services/workflow_sync.py b/crc/services/workflow_sync.py index e2026746..26796413 100644 --- a/crc/services/workflow_sync.py +++ b/crc/services/workflow_sync.py @@ -25,13 +25,8 @@ class WorkflowSyncService(object): """ this just gets the details of a workflow spec from the remote side. - - FIXME: for testing I had changed the security on the API endpoint - below so that I could run it - I need to restore the security on this - and make a new workflow_sync endpoint that just calls this same function - so that I can use the API_TOKEN rather than the other token setup """ - url = remote+'/v1.0/workflow-specification/'+workflow_spec_id + url = remote+'/v1.0/workflow-sync/'+workflow_spec_id+'/spec' return WorkflowSyncService.__make_request(url) @staticmethod From 856fe445b0b85c70ddbd41339174da8b3473c9ee Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Fri, 11 Dec 2020 16:26:03 -0500 Subject: [PATCH 26/35] Added user.uid to ApiError and Sentry logging --- crc/api/common.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crc/api/common.py b/crc/api/common.py index e2b13b9a..78cbc018 100644 --- a/crc/api/common.py +++ b/crc/api/common.py @@ -4,6 +4,8 @@ from flask import g from crc import ma, app +import sentry_sdk + class ApiError(Exception): def __init__(self, code, message, status_code=400, @@ -16,6 +18,13 @@ class ApiError(Exception): self.file_name = file_name or "" # OPTIONAL: The file that caused the error. self.tag = tag or "" # OPTIONAL: The XML Tag that caused the issue. self.task_data = task_data or "" # OPTIONAL: A snapshot of data connected to the task when error ocurred. + if hasattr(g,'user'): + user = g.user.uid + else: + user = 'Unknown' + self.task_user = user + # This is for sentry logging into Slack + sentry_sdk.set_context("User", {'user': user}) Exception.__init__(self, self.message) @classmethod @@ -59,7 +68,7 @@ class ApiError(Exception): class ApiErrorSchema(ma.Schema): class Meta: fields = ("code", "message", "workflow_name", "file_name", "task_name", "task_id", - "task_data") + "task_data", "task_user") @app.errorhandler(ApiError) From 7defc2b02f1eff37696dbc3dc5d863254971ccc6 Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Fri, 11 Dec 2020 17:47:53 -0500 Subject: [PATCH 27/35] Tests for uid in logs. Currently we test for uid in a response. This covers ApiError. Currently, we don't have a test for Sentry. Unsure how to do this. Also added a script, service and test workflow to help. (Also to learn about adding a script and service.) --- crc/scripts/failing_script.py | 15 ++++++ crc/services/failing_service.py | 11 ++++ .../failing_workflow/failing_workflow.bpmn | 52 +++++++++++++++++++ tests/test_user_in_logs.py | 35 ++++++++----- 4 files changed, 99 insertions(+), 14 deletions(-) create mode 100644 crc/scripts/failing_script.py create mode 100644 crc/services/failing_service.py create mode 100644 tests/data/failing_workflow/failing_workflow.bpmn diff --git a/crc/scripts/failing_script.py b/crc/scripts/failing_script.py new file mode 100644 index 00000000..2e381cd2 --- /dev/null +++ b/crc/scripts/failing_script.py @@ -0,0 +1,15 @@ +from crc.scripts.script import Script +from crc.services.failing_service import FailingService + + +class FailingScript(Script): + + def get_description(self): + return """It fails""" + + def do_task_validate_only(self, task, *args, **kwargs): + pass + + def do_task(self, task, *args, **kwargs): + + FailingService.fail_as_service() \ No newline at end of file diff --git a/crc/services/failing_service.py b/crc/services/failing_service.py new file mode 100644 index 00000000..6afae9fc --- /dev/null +++ b/crc/services/failing_service.py @@ -0,0 +1,11 @@ +from crc.api.common import ApiError + + +class FailingService(object): + + @staticmethod + def fail_as_service(): + """It fails""" + + raise ApiError(code='bad_service', + message='This is my failing service') diff --git a/tests/data/failing_workflow/failing_workflow.bpmn b/tests/data/failing_workflow/failing_workflow.bpmn new file mode 100644 index 00000000..36869390 --- /dev/null +++ b/tests/data/failing_workflow/failing_workflow.bpmn @@ -0,0 +1,52 @@ + + + + + Flow_0cszvz2 + + + + Flow_0cszvz2 + Flow_1l02umo + failing_script() + + + + Flow_1l02umo + Flow_08zq7mf + print('I am a placeholder.') + + + Flow_08zq7mf + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_user_in_logs.py b/tests/test_user_in_logs.py index 1ebb9b79..e6c65447 100644 --- a/tests/test_user_in_logs.py +++ b/tests/test_user_in_logs.py @@ -1,21 +1,28 @@ -from crc.api.common import ApiError from tests.base_test import BaseTest +from crc import db +from crc.models.user import UserModel +import json class TestUserID(BaseTest): - def test_user_id(self): - # try: - # False - # except: - # raise ApiError + def test_user_id_in_request(self): + """This assures the uid is in response via ApiError""" - # ApiError() - # self.assertEqual(True,False) + workflow = self.create_workflow('failing_workflow') + user_uid = workflow.study.user_uid + user = db.session.query(UserModel).filter_by(uid=user_uid).first() + rv = self.app.get(f'/v1.0/workflow/{workflow.id}' + f'?soft_reset={str(False)}' + f'&hard_reset={str(False)}' + f'&do_engine_steps={str(True)}', + headers=self.logged_in_headers(user), + content_type="application/json") + data = json.loads(rv.data) + self.assertEqual(data['task_user'], user_uid) - # with self.assertRaises(ApiError) as api_error: - # self.assertEqual(2, 3) - workflow = self.create_workflow('email') - first_task = self.get_workflow_api(workflow).next_task - - raise ApiError('unknown_approval', 'Please provide a valid Approval ID.') + def test_user_id_in_sentry(self): + """This assures the uid is in Sentry. + We use this to send errors to Slack.""" + # Currently have no clue how to do this :( + pass From 02ea414b94d36f1299ca647d025a7de2ffa923d8 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 14 Dec 2020 10:07:19 -0500 Subject: [PATCH 28/35] Additional fixes to Navigation to allow a nested navigation structure. --- Pipfile.lock | 64 +++++++++---------- crc/models/api_models.py | 5 +- crc/services/workflow_service.py | 48 ++++++++------ tests/test_tasks_api.py | 40 ++++++------ tests/test_user_roles.py | 37 ++++++----- .../test_workflow_processor_multi_instance.py | 14 ++-- 6 files changed, 114 insertions(+), 94 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index edda7a09..3298cbfd 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -80,10 +80,10 @@ }, "certifi": { "hashes": [ - "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", - "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2020.11.8" + "version": "==2020.12.5" }, "cffi": { "hashes": [ @@ -557,33 +557,33 @@ }, "pandas": { "hashes": [ - "sha256:09e0503758ad61afe81c9069505f8cb8c1e36ea8cc1e6826a95823ef5b327daf", - "sha256:0a11a6290ef3667575cbd4785a1b62d658c25a2fd70a5adedba32e156a8f1773", - "sha256:0d9a38a59242a2f6298fff45d09768b78b6eb0c52af5919ea9e45965d7ba56d9", - "sha256:112c5ba0f9ea0f60b2cc38c25f87ca1d5ca10f71efbee8e0f1bee9cf584ed5d5", - "sha256:185cf8c8f38b169dbf7001e1a88c511f653fbb9dfa3e048f5e19c38049e991dc", - "sha256:3aa8e10768c730cc1b610aca688f588831fa70b65a26cb549fbb9f35049a05e0", - "sha256:41746d520f2b50409dffdba29a15c42caa7babae15616bcf80800d8cfcae3d3e", - "sha256:43cea38cbcadb900829858884f49745eb1f42f92609d368cabcc674b03e90efc", - "sha256:5378f58172bd63d8c16dd5d008d7dcdd55bf803fcdbe7da2dcb65dbbf322f05b", - "sha256:54404abb1cd3f89d01f1fb5350607815326790efb4789be60508f458cdd5ccbf", - "sha256:5dac3aeaac5feb1016e94bde851eb2012d1733a222b8afa788202b836c97dad5", - "sha256:5fdb2a61e477ce58d3f1fdf2470ee142d9f0dde4969032edaf0b8f1a9dafeaa2", - "sha256:6613c7815ee0b20222178ad32ec144061cb07e6a746970c9160af1ebe3ad43b4", - "sha256:6d2b5b58e7df46b2c010ec78d7fb9ab20abf1d306d0614d3432e7478993fbdb0", - "sha256:8a5d7e57b9df2c0a9a202840b2881bb1f7a648eba12dd2d919ac07a33a36a97f", - "sha256:8b4c2055ebd6e497e5ecc06efa5b8aa76f59d15233356eb10dad22a03b757805", - "sha256:a15653480e5b92ee376f8458197a58cca89a6e95d12cccb4c2d933df5cecc63f", - "sha256:a7d2547b601ecc9a53fd41561de49a43d2231728ad65c7713d6b616cd02ddbed", - "sha256:a979d0404b135c63954dea79e6246c45dd45371a88631cdbb4877d844e6de3b6", - "sha256:b1f8111635700de7ac350b639e7e452b06fc541a328cf6193cf8fc638804bab8", - "sha256:c5a3597880a7a29a31ebd39b73b2c824316ae63a05c3c8a5ce2aea3fc68afe35", - "sha256:c681e8fcc47a767bf868341d8f0d76923733cbdcabd6ec3a3560695c69f14a1e", - "sha256:cf135a08f306ebbcfea6da8bf775217613917be23e5074c69215b91e180caab4", - "sha256:e2b8557fe6d0a18db4d61c028c6af61bfed44ef90e419ed6fadbdc079eba141e" + "sha256:0a643bae4283a37732ddfcecab3f62dd082996021b980f580903f4e8e01b3c5b", + "sha256:0de3ddb414d30798cbf56e642d82cac30a80223ad6fe484d66c0ce01a84d6f2f", + "sha256:19a2148a1d02791352e9fa637899a78e371a3516ac6da5c4edc718f60cbae648", + "sha256:21b5a2b033380adbdd36b3116faaf9a4663e375325831dac1b519a44f9e439bb", + "sha256:24c7f8d4aee71bfa6401faeba367dd654f696a77151a8a28bc2013f7ced4af98", + "sha256:26fa92d3ac743a149a31b21d6f4337b0594b6302ea5575b37af9ca9611e8981a", + "sha256:2860a97cbb25444ffc0088b457da0a79dc79f9c601238a3e0644312fcc14bf11", + "sha256:2b1c6cd28a0dfda75c7b5957363333f01d370936e4c6276b7b8e696dd500582a", + "sha256:2c2f7c670ea4e60318e4b7e474d56447cf0c7d83b3c2a5405a0dbb2600b9c48e", + "sha256:3be7a7a0ca71a2640e81d9276f526bca63505850add10206d0da2e8a0a325dae", + "sha256:4c62e94d5d49db116bef1bd5c2486723a292d79409fc9abd51adf9e05329101d", + "sha256:5008374ebb990dad9ed48b0f5d0038124c73748f5384cc8c46904dace27082d9", + "sha256:5447ea7af4005b0daf695a316a423b96374c9c73ffbd4533209c5ddc369e644b", + "sha256:573fba5b05bf2c69271a32e52399c8de599e4a15ab7cec47d3b9c904125ab788", + "sha256:5a780260afc88268a9d3ac3511d8f494fdcf637eece62fb9eb656a63d53eb7ca", + "sha256:70865f96bb38fec46f7ebd66d4b5cfd0aa6b842073f298d621385ae3898d28b5", + "sha256:731568be71fba1e13cae212c362f3d2ca8932e83cb1b85e3f1b4dd77d019254a", + "sha256:b61080750d19a0122469ab59b087380721d6b72a4e7d962e4d7e63e0c4504814", + "sha256:bf23a3b54d128b50f4f9d4675b3c1857a688cc6731a32f931837d72effb2698d", + "sha256:c16d59c15d946111d2716856dd5479221c9e4f2f5c7bc2d617f39d870031e086", + "sha256:c61c043aafb69329d0f961b19faa30b1dab709dd34c9388143fc55680059e55a", + "sha256:c94ff2780a1fd89f190390130d6d36173ca59fcfb3fe0ff596f9a56518191ccb", + "sha256:edda9bacc3843dfbeebaf7a701763e68e741b08fccb889c003b0a52f0ee95782", + "sha256:f10fc41ee3c75a474d3bdf68d396f10782d013d7f67db99c0efbfd0acb99701b" ], "index": "pypi", - "version": "==1.1.4" + "version": "==1.1.5" }, "psycopg2-binary": { "hashes": [ @@ -650,10 +650,10 @@ }, "pygments": { "hashes": [ - "sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0", - "sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773" + "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716", + "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08" ], - "version": "==2.7.2" + "version": "==2.7.3" }, "pyjwt": { "hashes": [ @@ -839,7 +839,7 @@ }, "spiffworkflow": { "git": "https://github.com/sartography/SpiffWorkflow.git", - "ref": "cdab930848493d74250224f1956177fee231a5b7" + "ref": "71f75612779822166f3fc97ca39bb4630202af9d" }, "sqlalchemy": { "hashes": [ diff --git a/crc/models/api_models.py b/crc/models/api_models.py index 6f2e08da..00c040b6 100644 --- a/crc/models/api_models.py +++ b/crc/models/api_models.py @@ -144,21 +144,24 @@ class TaskSchema(ma.Schema): class NavigationItemSchema(ma.Schema): class Meta: fields = ["spec_id", "name", "spec_type", "task_id", "description", "backtracks", "indent", - "lane", "state"] + "lane", "state", "children"] unknown = INCLUDE state = marshmallow.fields.String(required=False, allow_none=True) description = marshmallow.fields.String(required=False, allow_none=True) backtracks = marshmallow.fields.String(required=False, allow_none=True) lane = marshmallow.fields.String(required=False, allow_none=True) task_id = marshmallow.fields.String(required=False, allow_none=True) + children = marshmallow.fields.List(marshmallow.fields.Nested(lambda: NavigationItemSchema())) @marshmallow.post_load def make_nav(self, data, **kwargs): state = data.pop('state', None) task_id = data.pop('task_id', None) + children = data.pop('children', []) item = NavItem(**data) item.state = state item.task_id = task_id + item.children = children return item class WorkflowApi(object): diff --git a/crc/services/workflow_service.py b/crc/services/workflow_service.py index 44d0add9..06c48ff9 100644 --- a/crc/services/workflow_service.py +++ b/crc/services/workflow_service.py @@ -6,7 +6,7 @@ from datetime import datetime from typing import List import jinja2 -from SpiffWorkflow import Task as SpiffTask, WorkflowException +from SpiffWorkflow import Task as SpiffTask, WorkflowException, NavItem from SpiffWorkflow.bpmn.specs.EndEvent import EndEvent from SpiffWorkflow.bpmn.specs.ManualTask import ManualTask from SpiffWorkflow.bpmn.specs.MultiInstanceTask import MultiInstanceTask @@ -14,7 +14,7 @@ from SpiffWorkflow.bpmn.specs.ScriptTask import ScriptTask from SpiffWorkflow.bpmn.specs.StartEvent import StartEvent from SpiffWorkflow.bpmn.specs.UserTask import UserTask from SpiffWorkflow.dmn.specs.BusinessRuleTask import BusinessRuleTask -from SpiffWorkflow.specs import CancelTask, StartTask +from SpiffWorkflow.specs import CancelTask, StartTask, MultiChoice from SpiffWorkflow.util.deep_merge import DeepMerge from jinja2 import Template @@ -319,25 +319,9 @@ class WorkflowService(object): """Returns an API model representing the state of the current workflow, if requested, and possible, next_task is set to the current_task.""" - nav_dict = processor.bpmn_workflow.get_nav_list() + navigation = processor.bpmn_workflow.get_deep_nav_list() + WorkflowService.update_navigation(navigation, processor) - # Some basic cleanup of the title for the for the navigation. - navigation = [] - for nav_item in nav_dict: - spiff_task = processor.bpmn_workflow.get_task(nav_item.task_id) - if spiff_task: - # Use existing logic to set the description, and alter the state based on permissions. - api_task = WorkflowService.spiff_task_to_api_task(spiff_task, add_docs_and_forms=False) - nav_item.description = api_task.title - user_uids = WorkflowService.get_users_assigned_to_task(processor, spiff_task) - if not UserService.in_list(user_uids, allow_admin_impersonate=True): - nav_item.state = WorkflowService.TASK_STATE_LOCKED - else: - # Strip off the first word in the description, to meet guidlines for BPMN. - if nav_item.description: - if nav_item.description is not None and ' ' in nav_item.description: - nav_item.description = nav_item.description.partition(' ')[2] - navigation.append(nav_item) spec = db.session.query(WorkflowSpecModel).filter_by(id=processor.workflow_spec_id).first() workflow_api = WorkflowApi( @@ -366,6 +350,29 @@ class WorkflowService(object): workflow_api.next_task.state = WorkflowService.TASK_STATE_LOCKED return workflow_api + @staticmethod + def update_navigation(navigation: List[NavItem], processor: WorkflowProcessor): + # Recursive function to walk down through children, and clean up descriptions, and statuses + for nav_item in navigation: + spiff_task = processor.bpmn_workflow.get_task(nav_item.task_id) + if spiff_task: + # Use existing logic to set the description, and alter the state based on permissions. + api_task = WorkflowService.spiff_task_to_api_task(spiff_task, add_docs_and_forms=False) + nav_item.description = api_task.title + user_uids = WorkflowService.get_users_assigned_to_task(processor, spiff_task) + if (isinstance(spiff_task.task_spec, UserTask) or isinstance(spiff_task.task_spec, ManualTask)) \ + and not UserService.in_list(user_uids, allow_admin_impersonate=True): + nav_item.state = WorkflowService.TASK_STATE_LOCKED + else: + # Strip off the first word in the description, to meet guidlines for BPMN. + if nav_item.description: + if nav_item.description is not None and ' ' in nav_item.description: + nav_item.description = nav_item.description.partition(' ')[2] + + # Recurse here + WorkflowService.update_navigation(nav_item.children, processor) + + @staticmethod def get_previously_submitted_data(workflow_id, spiff_task): """ If the user has completed this task previously, find the form data for the last submission.""" @@ -393,6 +400,7 @@ class WorkflowService(object): return {} + @staticmethod def spiff_task_to_api_task(spiff_task, add_docs_and_forms=False): task_type = spiff_task.task_spec.__class__.__name__ diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index e80c79a6..bd2b300f 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -113,19 +113,20 @@ class TestTasksApi(BaseTest): self.assertIsNotNone(workflow_api.navigation) nav = workflow_api.navigation - self.assertEqual(9, len(nav)) + self.assertEqual(3, len(nav)) self.assertEqual("Do You Have Bananas", nav[1].description) self.assertEqual("Bananas?", nav[2].description) - self.assertEqual("FUTURE", nav[2].state) - self.assertEqual("yes", nav[3].description) - self.assertEqual(None, nav[3].state) - self.assertEqual("Task_Num_Bananas", nav[4].name) - self.assertEqual("LIKELY", nav[4].state) - self.assertEqual("EndEvent", nav[5].spec_type) - self.assertEqual("no", nav[6].description) - self.assertEqual(None, nav[6].state) - self.assertEqual("Task_Why_No_Bananas", nav[7].name) - self.assertEqual("MAYBE", nav[7].state) + self.assertEqual("LIKELY", nav[2].state) + + self.assertEqual("yes", nav[2].children[0].description) + self.assertEqual("LIKELY", nav[2].children[0].state) + self.assertEqual("of Bananas", nav[2].children[0].children[0].description) + self.assertEqual("EndEvent", nav[2].children[0].children[1].spec_type) + + self.assertEqual("no", nav[2].children[1].description) + self.assertEqual("MAYBE", nav[2].children[1].state) + self.assertEqual("no bananas", nav[2].children[1].children[0].description) + self.assertEqual("EndEvent", nav[2].children[1].children[1].spec_type) def test_navigation_with_exclusive_gateway(self): workflow = self.create_workflow('exclusive_gateway_2') @@ -134,14 +135,17 @@ class TestTasksApi(BaseTest): workflow_api = self.get_workflow_api(workflow) self.assertIsNotNone(workflow_api.navigation) nav = workflow_api.navigation - self.assertEqual(10, len(nav)) + self.assertEqual(6, len(nav)) self.assertEqual("Task 1", nav[1].description) self.assertEqual("Which Branch?", nav[2].description) - self.assertEqual("a", nav[3].description) - self.assertEqual("Task 2a", nav[4].description) - self.assertEqual("b", nav[5].description) - self.assertEqual("Task 2b", nav[6].description) - self.assertEqual("Task 3", nav[8].description) + self.assertEqual("a", nav[2].children[0].description) + self.assertEqual("Task 2a", nav[2].children[0].children[0].description) + self.assertEqual("b", nav[2].children[1].description) + self.assertEqual("Task 2b", nav[2].children[1].children[0].description) + self.assertEqual(None, nav[3].description) + self.assertEqual("Task 3", nav[4].description) + self.assertEqual("EndEvent", nav[5].spec_type) + def test_document_added_to_workflow_shows_up_in_file_list(self): self.create_reference_document() @@ -390,7 +394,7 @@ class TestTasksApi(BaseTest): navigation = workflow_api.navigation task = workflow_api.next_task - self.assertEqual(5, len(navigation)) + self.assertEqual(4, len(navigation)) self.assertEqual("UserTask", task.type) self.assertEqual("Activity_A", task.name) self.assertEqual("My Sub Process", task.process_name) diff --git a/tests/test_user_roles.py b/tests/test_user_roles.py index 90e452b0..95f9479b 100644 --- a/tests/test_user_roles.py +++ b/tests/test_user_roles.py @@ -64,7 +64,7 @@ class TestTasksApi(BaseTest): workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(9, len(nav)) + self.assertEqual(4, len(nav)) self.assertEqual("supervisor", nav[2].lane) def test_get_outstanding_tasks_awaiting_current_user(self): @@ -123,12 +123,10 @@ class TestTasksApi(BaseTest): # Navigation as Submitter with ready task. workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(9, len(nav)) + self.assertEqual(4, len(nav)) self.assertEqual('READY', nav[1].state) # First item is ready, no progress yet. self.assertEqual('LOCKED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. - # third item is a gateway, and belongs to no one - self.assertEqual(None, nav[4].state) # Approved Path, has no operation - self.assertEqual(None, nav[6].state) # Rejected Path, has no operation. + self.assertEqual('LIKELY', nav[3].state) # Third item is a gateway, which contains things that are also locked. self.assertEqual('READY', workflow_api.next_task.state) # Navigation as Submitter after handoff to supervisor @@ -138,8 +136,7 @@ class TestTasksApi(BaseTest): nav = workflow_api.navigation self.assertEqual('COMPLETED', nav[1].state) # First item is ready, no progress yet. self.assertEqual('LOCKED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('MAYBE', nav[7].state) # third item is a gateway, and belongs to no one, and is locked. - self.assertEqual('LOCKED', workflow_api.next_task.state) + self.assertEqual('LIKELY', nav[3].state) # third item is a gateway, and belongs to no one # In the event the next task is locked, we should say something sensible here. # It is possible to look at the role of the task, and say The next task "TASK TITLE" will # be handled by 'dhf8r', who is full-filling the role of supervisor. the Task Data @@ -151,10 +148,9 @@ class TestTasksApi(BaseTest): # Navigation as Supervisor workflow_api = self.get_workflow_api(workflow, user_uid=supervisor.uid) nav = workflow_api.navigation - self.assertEqual(9, len(nav)) self.assertEqual('LOCKED', nav[1].state) # First item belongs to the submitter, and is locked. self.assertEqual('READY', nav[2].state) # Second item is ready, as we are now the supervisor. - self.assertEqual('LOCKED', nav[7].state) # Feedback is locked. + self.assertEqual('LIKELY', nav[3].state) # Feedback is locked. self.assertEqual('READY', workflow_api.next_task.state) data = workflow_api.next_task.data @@ -163,28 +159,37 @@ class TestTasksApi(BaseTest): # Navigation as Supervisor, after completing task. nav = workflow_api.navigation - self.assertEqual(9, len(nav)) self.assertEqual('LOCKED', nav[1].state) # First item belongs to the submitter, and is locked. self.assertEqual('COMPLETED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('LOCKED', nav[7].state) # Feedback is LOCKED + self.assertEqual('READY', nav[3].state) # Gateway is ready, and should be unfolded + self.assertEqual(None, nav[3].children[0].state) # sequence flow for approved is none - we aren't going this way. + self.assertEqual('READY', nav[3].children[1].state) # sequence flow for denied is ready + self.assertEqual('LOCKED', nav[3].children[1].children[0].state) # Feedback is locked, it belongs to submitter + self.assertEqual('LOCKED', nav[3].children[1].children[0].state) # Approval is locked, it belongs to the submitter + + + self.assertEqual('LOCKED', workflow_api.next_task.state) # Navigation as Submitter, coming back in to a rejected workflow to view the rejection message. workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(9, len(nav)) + self.assertEqual(4, len(nav)) self.assertEqual('COMPLETED', nav[1].state) # First item belongs to the submitter, and is locked. self.assertEqual('LOCKED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('READY', nav[7].state) # Feedbck is now READY - self.assertEqual('READY', workflow_api.next_task.state) + self.assertEqual('READY', nav[3].state) + self.assertEqual(None, nav[3].children[0].state) # sequence flow for approved is none - we aren't going this way. + self.assertEqual('READY', nav[3].children[1].state) # sequence flow for denied is ready + self.assertEqual('READY', nav[3].children[1].children[0].state) # Feedback is locked, it belongs to submitter + self.assertEqual('READY', nav[3].children[1].children[0].state) # Approval is locked, it belongs to the submitter + # Navigation as Submitter, re-completing the original request a second time, and sending it for review. workflow_api = self.complete_form(workflow, workflow_api.next_task, data, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(9, len(nav)) self.assertEqual('READY', nav[1].state) # When you loop back the task is again in the ready state. self.assertEqual('LOCKED', nav[2].state) # Second item is locked, it is the review and doesn't belong to this user. - self.assertEqual('COMPLETED', nav[7].state) # Feedback is completed + self.assertEqual('COMPLETED', nav[3].state) # Feedback is completed self.assertEqual('READY', workflow_api.next_task.state) data["favorite_color"] = "blue" diff --git a/tests/workflow/test_workflow_processor_multi_instance.py b/tests/workflow/test_workflow_processor_multi_instance.py index 99339101..72af78b3 100644 --- a/tests/workflow/test_workflow_processor_multi_instance.py +++ b/tests/workflow/test_workflow_processor_multi_instance.py @@ -52,11 +52,11 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task_list = processor.get_ready_user_tasks() processor.complete_task(task_list[0]) processor.do_engine_steps() - nav_list = processor.bpmn_workflow.get_nav_list() + nav_list = processor.bpmn_workflow.get_flat_nav_list() processor.save() # reload after save processor = WorkflowProcessor(workflow_spec_model) - nav_list2 = processor.bpmn_workflow.get_nav_list() + nav_list2 = processor.bpmn_workflow.get_flat_nav_list() self.assertEqual(nav_list,nav_list2) @patch('crc.services.study_service.StudyService.get_investigators') @@ -158,7 +158,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): self.assertEqual(3, len(next_user_tasks)) # There should be six tasks in the navigation: start event, the script task, end event, and three tasks # for the three executions of hte multi-instance. - self.assertEqual(6, len(processor.bpmn_workflow.get_nav_list())) + self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) # We can complete the tasks out of order. task = next_user_tasks[2] @@ -176,7 +176,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task.update_data({"investigator": {"email": "dhf8r@virginia.edu"}}) processor.complete_task(task) processor.do_engine_steps() - self.assertEqual(6, len(processor.bpmn_workflow.get_nav_list())) + self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) task = next_user_tasks[0] api_task = WorkflowService.spiff_task_to_api_task(task) @@ -184,7 +184,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task.update_data({"investigator":{"email":"asd3v@virginia.edu"}}) processor.complete_task(task) processor.do_engine_steps() - self.assertEqual(6, len(processor.bpmn_workflow.get_nav_list())) + self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) task = next_user_tasks[1] api_task = WorkflowService.spiff_task_to_api_task(task) @@ -192,7 +192,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task.update_data({"investigator":{"email":"asdf32@virginia.edu"}}) processor.complete_task(task) processor.do_engine_steps() - self.assertEqual(6, len(processor.bpmn_workflow.get_nav_list())) + self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) # Completing the tasks out of order, still provides the correct information. expected = self.mock_investigator_response @@ -203,4 +203,4 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task.data['StudyInfo']['investigators']) self.assertEqual(WorkflowStatus.complete, processor.get_status()) - self.assertEqual(6, len(processor.bpmn_workflow.get_nav_list())) + self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) From ee3ee9fd4a7439b69285b6089d2d151aac4f8f52 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Mon, 14 Dec 2020 10:27:40 -0500 Subject: [PATCH 29/35] Added tests to cover most of the use cases and code, and a bunch of stuff to make the mocks happy --- crc/api/workflow_sync.py | 6 +- tests/base_test.py | 2 +- tests/data/random_fact/random_fact2.bpmn | 200 ++++++++++++++++++ .../workflow_sync_responses/random_fact2.bpmn | 104 +++++++++ tests/test_workflow_sync.py | 85 +++++++- 5 files changed, 392 insertions(+), 5 deletions(-) create mode 100644 tests/data/random_fact/random_fact2.bpmn create mode 100644 tests/data/workflow_sync_responses/random_fact2.bpmn diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py index 4915af0f..68cd9fd2 100644 --- a/crc/api/workflow_sync.py +++ b/crc/api/workflow_sync.py @@ -97,7 +97,7 @@ def sync_all_changed_workflows(remote): def sync_changed_files(remote,workflow_spec_id): # make sure that spec is local before syncing files - specdict = WorkflowSyncService.get_remote_workfow_spec(remote,workflow_spec_id) + specdict = WorkflowSyncService.get_remote_workflow_spec(remote,workflow_spec_id) localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first() if localspec is None: @@ -173,6 +173,8 @@ def get_changed_files(remote,workflow_spec_id,as_df=False): # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index() local['md5_hash'] = local['md5_hash'].astype('str') + remote_files['md5_hash'] = remote_files['md5_hash'].astype('str') + different = remote_files.merge(local, right_on=['filename','md5_hash'], left_on=['filename','md5_hash'], @@ -205,7 +207,7 @@ def get_changed_files(remote,workflow_spec_id,as_df=False): # get an exclusive or list of workflow ids - that is we want lists of files that are # on one machine or the other, but not both - remote_spec_ids = remote[['filename']] + remote_spec_ids = remote_files[['filename']] local_spec_ids = local[['filename']] left = remote_spec_ids[~remote_spec_ids['filename'].isin(local_spec_ids['filename'])] right = local_spec_ids[~local_spec_ids['filename'].isin(remote_spec_ids['filename'])] diff --git a/tests/base_test.py b/tests/base_test.py index e32bbd9b..4f2306a2 100644 --- a/tests/base_test.py +++ b/tests/base_test.py @@ -207,7 +207,7 @@ class BaseTest(unittest.TestCase): @staticmethod def workflow_sync_response(file_name): filepath = os.path.join(app.root_path, '..', 'tests', 'data', 'workflow_sync_responses', file_name) - with open(filepath, 'r') as myfile: + with open(filepath, 'rb') as myfile: data = myfile.read() return data diff --git a/tests/data/random_fact/random_fact2.bpmn b/tests/data/random_fact/random_fact2.bpmn new file mode 100644 index 00000000..bd0a9ef5 --- /dev/null +++ b/tests/data/random_fact/random_fact2.bpmn @@ -0,0 +1,200 @@ + + + + + SequenceFlow_0c7wlth + + + # h1 Heading 8-) +## h2 Heading +### h3 Heading +#### h4 Heading +##### h5 Heading +###### h6 Heading + + +## Horizontal Rules + +___ + +--- + +*** + + +## Typographic replacements + +"double quotes" and 'single quotes' + + +## Emphasis + +**This is bold text** + +__This is bold text__ + +*This is italic text* + +_This is italic text_ + +~~Strikethrough~~ + + +## Blockquotes + + +> Blockquotes can also be nested... +>> ...by using additional greater-than signs right next to each other... +> > > ...or with spaces between arrows. + + +## Lists + +Unordered + ++ Create a list by starting a line with `+`, `-`, or `*` ++ Sub-lists are made by indenting 2 spaces: + - Marker character change forces new list start: + * Ac tristique libero volutpat at + + Facilisis in pretium nisl aliquet + - Nulla volutpat aliquam velit ++ Very easy! + +Ordered + +1. Lorem ipsum dolor sit amet +2. Consectetur adipiscing elit +3. Integer molestie lorem at massa + + +1. You can use sequential numbers... +1. ...or keep all the numbers as `1.` + +Start numbering with offset: + +57. foo +1. bar + +## Tables + +| Option | Description | +| ------ | ----------- | +| data | path to data files to supply the data that will be passed into templates. | +| engine | engine to be used for processing templates. Handlebars is the default. | +| ext | extension to be used for dest files. | + +Right aligned columns + +| Option | Description | +| ------:| -----------:| +| data | path to data files to supply the data that will be passed into templates. | +| engine | engine to be used for processing templates. Handlebars is the default. | +| ext | extension to be used for dest files. | + + +## Links + +[link text](http://dev.nodeca.com) + +[link with title](http://nodeca.github.io/pica/demo/ "title text!") + +Autoconverted link https://github.com/nodeca/pica (enable linkify to see) + + +## Images + +![Minion](https://octodex.github.com/images/minion.png) +![Stormtroopocat](https://octodex.github.com/images/stormtroopocat.jpg "The Stormtroopocat") + + + + + + + + + + + + + + + + SequenceFlow_0c7wlth + SequenceFlow_0641sh6 + + + + + + + + + SequenceFlow_0641sh6 + SequenceFlow_0t29gjo + FactService = fact_service() + + + # Great Job! +You have completed the random fact generator. +You chose to receive a random fact of the type: "{{type}}" + +Your random fact is: +{{details}} + SequenceFlow_0t29gjo + + + + + + User sets the Fact.type to cat, norris, or buzzword + + + + Makes an API  call to get a fact of the required type. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/workflow_sync_responses/random_fact2.bpmn b/tests/data/workflow_sync_responses/random_fact2.bpmn new file mode 100644 index 00000000..f502a238 --- /dev/null +++ b/tests/data/workflow_sync_responses/random_fact2.bpmn @@ -0,0 +1,104 @@ + + + + + SequenceFlow_0c7wlth + + + # h1 Heading 8-) + NEW_FILE_ADDED + +![Stormtroopocat](https://octodex.github.com/images/stormtroopocat.jpg "The Stormtroopocat") + + + + + + + + + + + + + + + + SequenceFlow_0c7wlth + SequenceFlow_0641sh6 + + + + + + + + + SequenceFlow_0641sh6 + SequenceFlow_0t29gjo + FactService = fact_service() + + + # Great Job! +You have completed the random fact generator. +You chose to receive a random fact of the type: "{{type}}" + +Your random fact is: +{{details}} + SequenceFlow_0t29gjo + + + + + + User sets the Fact.type to cat, norris, or buzzword + + + + Makes an API  call to get a fact of the required type. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py index f2c68719..eb0cd1b5 100644 --- a/tests/test_workflow_sync.py +++ b/tests/test_workflow_sync.py @@ -2,12 +2,18 @@ from unittest.mock import patch from crc import db from tests.base_test import BaseTest -from crc.api.workflow_sync import get_all_spec_state, get_changed_workflows +from crc.api.workflow_sync import get_all_spec_state, \ + get_changed_workflows, \ + get_workflow_spec_files, \ + get_changed_files, \ + get_workflow_specification, \ + sync_changed_files from crc.models.workflow import WorkflowSpecModel -import json from datetime import datetime from crc.services.file_service import FileService + + class TestWorkflowSync(BaseTest): @patch('crc.services.workflow_sync.WorkflowSyncService.get_all_remote_workflows') @@ -73,3 +79,78 @@ class TestWorkflowSync(BaseTest): response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock self.assertIsNotNone(response) self.assertEqual(response,[]) + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + def test_file_differences(self, mock_get): + self.load_example_data() + othersys = get_workflow_spec_files('random_fact') + othersys[1]['date_created'] = str(datetime.now()) + othersys[1]['md5_hash'] = '12345' + mock_get.return_value = othersys + response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0]['filename'], 'random_fact2.bpmn') + self.assertEqual(response[0]['location'], 'remote') + self.assertEqual(response[0]['new'], False) + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + def test_file_differences(self, mock_get): + self.load_example_data() + othersys = get_workflow_spec_files('random_fact') + othersys[1]['date_created'] = str(datetime.now()) + othersys[1]['md5_hash'] = '12345' + mock_get.return_value = othersys + response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0]['filename'], 'random_fact2.bpmn') + self.assertEqual(response[0]['location'], 'remote') + self.assertEqual(response[0]['new'], False) + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash') + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec') + def test_file_differences(self, workflow_mock, spec_files_mock, file_data_mock): + self.load_example_data() + remote_workflow = get_workflow_specification('random_fact') + self.assertEqual(remote_workflow['display_name'],'Random Fact') + remote_workflow['description'] = 'This Workflow came from Remote' + remote_workflow['display_name'] = 'Remote Workflow' + workflow_mock.return_value = remote_workflow + othersys = get_workflow_spec_files('random_fact') + othersys[1]['date_created'] = str(datetime.now()) + othersys[1]['md5_hash'] = '12345' + spec_files_mock.return_value = othersys + file_data_mock.return_value = self.workflow_sync_response('random_fact2.bpmn') + response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0], 'random_fact2.bpmn') + files = FileService.get_spec_data_files('random_fact') + md5sums = [str(f.md5_hash) for f in files] + self.assertEqual('21bb6f9e-0af7-0ab2-0fc7-ec0f94787e58' in md5sums, True) + new_local_workflow = get_workflow_specification('random_fact') + self.assertEqual(new_local_workflow['display_name'],'Remote Workflow') + + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec') + def test_file_deleted(self, workflow_mock, spec_files_mock): + self.load_example_data() + remote_workflow = get_workflow_specification('random_fact') + workflow_mock.return_value = remote_workflow + othersys = get_workflow_spec_files('random_fact') + del(othersys[1]) + spec_files_mock.return_value = othersys + response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock + self.assertIsNotNone(response) + # when we delete a local file, we do not return that it was deleted - just + # a list of updated files. We may want to change this in the future. + self.assertEqual(len(response),0) + files = FileService.get_spec_data_files('random_fact') + self.assertEqual(len(files),1) + From 38a6fa782d935bdf5e9ec8f29fdd49b60e71fdce Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 14 Dec 2020 10:29:53 -0500 Subject: [PATCH 30/35] Fixing the email script test. --- tests/data/email/email.bpmn | 4 ++-- tests/emails/test_email_script.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/data/email/email.bpmn b/tests/data/email/email.bpmn index 3395e788..7ba69843 100644 --- a/tests/data/email/email.bpmn +++ b/tests/data/email/email.bpmn @@ -1,5 +1,5 @@ - + Flow_1synsig @@ -20,7 +20,7 @@ Email content to be delivered to {{ ApprvlApprvr1 }} --- Flow_08n2npe Flow_1xlrgne - email("Camunda Email Subject",'ApprvlApprvr1','PIComputingID') + email("Camunda Email Subject",ApprvlApprvr1,PIComputingID) diff --git a/tests/emails/test_email_script.py b/tests/emails/test_email_script.py index 1385609b..1a6c6860 100644 --- a/tests/emails/test_email_script.py +++ b/tests/emails/test_email_script.py @@ -1,6 +1,6 @@ +from tests.base_test import BaseTest from crc import mail from crc.models.email import EmailModel -from tests.base_test import BaseTest class TestEmailScript(BaseTest): @@ -9,8 +9,8 @@ class TestEmailScript(BaseTest): workflow = self.create_workflow('email') task_data = { - 'PIComputingID': 'dhf8r', - 'ApprvlApprvr1': 'lb3dp' + 'PIComputingID': 'dhf8r@virginia.edu', + 'ApprvlApprvr1': 'lb3dp@virginia.edu' } task = self.get_workflow_api(workflow).next_task From fd4b881416b671c5930fc5f57c110e9fb230c408 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 14 Dec 2020 10:30:10 -0500 Subject: [PATCH 31/35] upgrading the pip libraries. --- Pipfile.lock | 133 +++++++++++++++++------------------ crc/services/file_service.py | 3 +- 2 files changed, 65 insertions(+), 71 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index ed6931eb..b64e68b3 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -33,10 +33,10 @@ }, "aniso8601": { "hashes": [ - "sha256:529dcb1f5f26ee0df6c0a1ee84b7b27197c3c50fc3a6321d66c544689237d072", - "sha256:c033f63d028b9a58e3ab0c2c7d0532ab4bfa7452bfc788fbfe3ddabd327b181a" + "sha256:246bf8d3611527030889e6df970878969d3a2f760ba3eb694fa1fb10e6ce53f9", + "sha256:51047d4fb51d7b8afd522b70f2d21a1b2487cbb7f7bd84ea852e9aa7808e7704" ], - "version": "==8.0.0" + "version": "==8.1.0" }, "attrs": { "hashes": [ @@ -80,10 +80,10 @@ }, "certifi": { "hashes": [ - "sha256:1f422849db327d534e3d0c5f02a263458c3955ec0aae4ff09b95f195c59f4edd", - "sha256:f05def092c44fbf25834a51509ef6e631dc19765ab8a57b4e7ab85531f0a9cf4" + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" ], - "version": "==2020.11.8" + "version": "==2020.12.5" }, "cffi": { "hashes": [ @@ -108,12 +108,14 @@ "sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35", "sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26", "sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b", + "sha256:a5ed8c05548b54b998b9498753fb9cadbfd92ee88e884641377d8a8b291bcc01", "sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb", "sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293", "sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd", "sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d", "sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3", "sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d", + "sha256:d5ff0621c88ce83a28a10d2ce719b2ee85635e85c515f12bac99a95306da4b2e", "sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca", "sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d", "sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775", @@ -548,40 +550,40 @@ }, "packaging": { "hashes": [ - "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", - "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" + "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858", + "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093" ], - "version": "==20.4" + "version": "==20.8" }, "pandas": { "hashes": [ - "sha256:09e0503758ad61afe81c9069505f8cb8c1e36ea8cc1e6826a95823ef5b327daf", - "sha256:0a11a6290ef3667575cbd4785a1b62d658c25a2fd70a5adedba32e156a8f1773", - "sha256:0d9a38a59242a2f6298fff45d09768b78b6eb0c52af5919ea9e45965d7ba56d9", - "sha256:112c5ba0f9ea0f60b2cc38c25f87ca1d5ca10f71efbee8e0f1bee9cf584ed5d5", - "sha256:185cf8c8f38b169dbf7001e1a88c511f653fbb9dfa3e048f5e19c38049e991dc", - "sha256:3aa8e10768c730cc1b610aca688f588831fa70b65a26cb549fbb9f35049a05e0", - "sha256:41746d520f2b50409dffdba29a15c42caa7babae15616bcf80800d8cfcae3d3e", - "sha256:43cea38cbcadb900829858884f49745eb1f42f92609d368cabcc674b03e90efc", - "sha256:5378f58172bd63d8c16dd5d008d7dcdd55bf803fcdbe7da2dcb65dbbf322f05b", - "sha256:54404abb1cd3f89d01f1fb5350607815326790efb4789be60508f458cdd5ccbf", - "sha256:5dac3aeaac5feb1016e94bde851eb2012d1733a222b8afa788202b836c97dad5", - "sha256:5fdb2a61e477ce58d3f1fdf2470ee142d9f0dde4969032edaf0b8f1a9dafeaa2", - "sha256:6613c7815ee0b20222178ad32ec144061cb07e6a746970c9160af1ebe3ad43b4", - "sha256:6d2b5b58e7df46b2c010ec78d7fb9ab20abf1d306d0614d3432e7478993fbdb0", - "sha256:8a5d7e57b9df2c0a9a202840b2881bb1f7a648eba12dd2d919ac07a33a36a97f", - "sha256:8b4c2055ebd6e497e5ecc06efa5b8aa76f59d15233356eb10dad22a03b757805", - "sha256:a15653480e5b92ee376f8458197a58cca89a6e95d12cccb4c2d933df5cecc63f", - "sha256:a7d2547b601ecc9a53fd41561de49a43d2231728ad65c7713d6b616cd02ddbed", - "sha256:a979d0404b135c63954dea79e6246c45dd45371a88631cdbb4877d844e6de3b6", - "sha256:b1f8111635700de7ac350b639e7e452b06fc541a328cf6193cf8fc638804bab8", - "sha256:c5a3597880a7a29a31ebd39b73b2c824316ae63a05c3c8a5ce2aea3fc68afe35", - "sha256:c681e8fcc47a767bf868341d8f0d76923733cbdcabd6ec3a3560695c69f14a1e", - "sha256:cf135a08f306ebbcfea6da8bf775217613917be23e5074c69215b91e180caab4", - "sha256:e2b8557fe6d0a18db4d61c028c6af61bfed44ef90e419ed6fadbdc079eba141e" + "sha256:0a643bae4283a37732ddfcecab3f62dd082996021b980f580903f4e8e01b3c5b", + "sha256:0de3ddb414d30798cbf56e642d82cac30a80223ad6fe484d66c0ce01a84d6f2f", + "sha256:19a2148a1d02791352e9fa637899a78e371a3516ac6da5c4edc718f60cbae648", + "sha256:21b5a2b033380adbdd36b3116faaf9a4663e375325831dac1b519a44f9e439bb", + "sha256:24c7f8d4aee71bfa6401faeba367dd654f696a77151a8a28bc2013f7ced4af98", + "sha256:26fa92d3ac743a149a31b21d6f4337b0594b6302ea5575b37af9ca9611e8981a", + "sha256:2860a97cbb25444ffc0088b457da0a79dc79f9c601238a3e0644312fcc14bf11", + "sha256:2b1c6cd28a0dfda75c7b5957363333f01d370936e4c6276b7b8e696dd500582a", + "sha256:2c2f7c670ea4e60318e4b7e474d56447cf0c7d83b3c2a5405a0dbb2600b9c48e", + "sha256:3be7a7a0ca71a2640e81d9276f526bca63505850add10206d0da2e8a0a325dae", + "sha256:4c62e94d5d49db116bef1bd5c2486723a292d79409fc9abd51adf9e05329101d", + "sha256:5008374ebb990dad9ed48b0f5d0038124c73748f5384cc8c46904dace27082d9", + "sha256:5447ea7af4005b0daf695a316a423b96374c9c73ffbd4533209c5ddc369e644b", + "sha256:573fba5b05bf2c69271a32e52399c8de599e4a15ab7cec47d3b9c904125ab788", + "sha256:5a780260afc88268a9d3ac3511d8f494fdcf637eece62fb9eb656a63d53eb7ca", + "sha256:70865f96bb38fec46f7ebd66d4b5cfd0aa6b842073f298d621385ae3898d28b5", + "sha256:731568be71fba1e13cae212c362f3d2ca8932e83cb1b85e3f1b4dd77d019254a", + "sha256:b61080750d19a0122469ab59b087380721d6b72a4e7d962e4d7e63e0c4504814", + "sha256:bf23a3b54d128b50f4f9d4675b3c1857a688cc6731a32f931837d72effb2698d", + "sha256:c16d59c15d946111d2716856dd5479221c9e4f2f5c7bc2d617f39d870031e086", + "sha256:c61c043aafb69329d0f961b19faa30b1dab709dd34c9388143fc55680059e55a", + "sha256:c94ff2780a1fd89f190390130d6d36173ca59fcfb3fe0ff596f9a56518191ccb", + "sha256:edda9bacc3843dfbeebaf7a701763e68e741b08fccb889c003b0a52f0ee95782", + "sha256:f10fc41ee3c75a474d3bdf68d396f10782d013d7f67db99c0efbfd0acb99701b" ], "index": "pypi", - "version": "==1.1.4" + "version": "==1.1.5" }, "psycopg2-binary": { "hashes": [ @@ -640,18 +642,18 @@ }, "pygithub": { "hashes": [ - "sha256:776befaddab9d8fddd525d52a6ca1ac228cf62b5b1e271836d766f4925e1452e", - "sha256:8ad656bf79958e775ec59f7f5a3dbcbadac12147ae3dc42708b951064096af15" + "sha256:053f1b8d553a344ebd3ca3972765d923ee7e8ecc3ea55bd203683f164348fa1a", + "sha256:14c96d55e3c0e295598e52fbbbf2a7862a293723482ae9000cb9c816faab4fb4" ], "index": "pypi", - "version": "==1.53" + "version": "==1.54" }, "pygments": { "hashes": [ - "sha256:381985fcc551eb9d37c52088a32914e00517e57f4a21609f48141ba08e193fa0", - "sha256:88a0bbcd659fcb9573703957c6b9cff9fab7295e6e76db54c9d00ae42df32773" + "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716", + "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08" ], - "version": "==2.7.2" + "version": "==2.7.3" }, "pyjwt": { "hashes": [ @@ -746,11 +748,11 @@ }, "requests": { "hashes": [ - "sha256:7f1a0b932f4a60a1a65caa4263921bb7d9ee911957e0ae4a23a6dd08185ad5f8", - "sha256:e786fa28d8c9154e6a4de5d46a1d921b8749f8b74e28bde23768e5e16eece998" + "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", + "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898" ], "index": "pypi", - "version": "==2.25.0" + "version": "==2.24.0" }, "sentry-sdk": { "extras": [ @@ -779,11 +781,11 @@ }, "soupsieve": { "hashes": [ - "sha256:1634eea42ab371d3d346309b93df7870a88610f0725d47528be902a0d95ecc55", - "sha256:a59dc181727e95d25f781f0eb4fd1825ff45590ec8ff49eadfd7f1a537cc0232" + "sha256:4bb21a6ee4707bf43b61230e80740e71bfe56e55d1f1f50924b087bb2975c851", + "sha256:6dc52924dc0bc710a5d16794e6b3480b2c7c08b07729505feab2b2c16661ff6e" ], "markers": "python_version >= '3.0'", - "version": "==2.0.1" + "version": "==2.1" }, "sphinx": { "hashes": [ @@ -837,7 +839,7 @@ }, "spiffworkflow": { "git": "https://github.com/sartography/SpiffWorkflow.git", - "ref": "6b2ed24bb340ebd31049312bd321f66ebf7b6b26" + "ref": "aec8e8e21ec8efe09b3c9ff24465824886890ad8" }, "sqlalchemy": { "hashes": [ @@ -892,10 +894,10 @@ }, "urllib3": { "hashes": [ - "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08", - "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473" + "sha256:8d7eaa5a82a1cac232164990f04874c594c9453ec55eef02eab885aa02fc17a2", + "sha256:f5321fbe4bf3fefa0efd0bfe7fb14e90909eb62a48ccda331726b4319897dd5e" ], - "version": "==1.26.2" + "version": "==1.25.11" }, "waitress": { "hashes": [ @@ -942,11 +944,11 @@ }, "xlrd": { "hashes": [ - "sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2", - "sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde" + "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd", + "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88" ], "index": "pypi", - "version": "==1.2.0" + "version": "==2.0.1" }, "xlsxwriter": { "hashes": [ @@ -1014,10 +1016,10 @@ }, "packaging": { "hashes": [ - "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", - "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" + "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858", + "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093" ], - "version": "==20.4" + "version": "==20.8" }, "pbr": { "hashes": [ @@ -1036,10 +1038,10 @@ }, "py": { "hashes": [ - "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2", - "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342" + "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", + "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" ], - "version": "==1.9.0" + "version": "==1.10.0" }, "pyparsing": { "hashes": [ @@ -1050,18 +1052,11 @@ }, "pytest": { "hashes": [ - "sha256:4288fed0d9153d9646bfcdf0c0428197dba1ecb27a33bb6e031d002fa88653fe", - "sha256:c0a7e94a8cdbc5422a51ccdad8e6f1024795939cc89159a0ae7f0b316ad3823e" + "sha256:b12e09409c5bdedc28d308469e156127004a436b41e9b44f9bff6446cbab9152", + "sha256:d69e1a80b34fe4d596c9142f35d9e523d98a2838976f1a68419a8f051b24cec6" ], "index": "pypi", - "version": "==6.1.2" - }, - "six": { - "hashes": [ - "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", - "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" - ], - "version": "==1.15.0" + "version": "==6.2.0" }, "toml": { "hashes": [ diff --git a/crc/services/file_service.py b/crc/services/file_service.py index 2c55058c..a3882f26 100644 --- a/crc/services/file_service.py +++ b/crc/services/file_service.py @@ -6,7 +6,6 @@ from github import Github, GithubObject, UnknownObjectException from uuid import UUID from lxml import etree -import flask from SpiffWorkflow.bpmn.parser.ValidationException import ValidationException from pandas import ExcelFile from sqlalchemy import desc @@ -82,7 +81,7 @@ class FileService(object): you get '1.0' rather than '1' fixme: This is stupid stupid slow. Place it in the database and just check if it is up to date.""" data_model = FileService.get_reference_file_data(reference_file_name) - xls = ExcelFile(data_model.data) + xls = ExcelFile(data_model.data, engine='openpyxl') df = xls.parse(xls.sheet_names[0]) for c in int_columns: df[c] = df[c].fillna(0) From d8ac20b1c33da1bec8e01a8f2ba455b3d3243835 Mon Sep 17 00:00:00 2001 From: Kelly McDonald Date: Mon, 14 Dec 2020 10:37:16 -0500 Subject: [PATCH 32/35] I added a second file to 'random_fact' test workflow, so another test was expecting 2 files in it after adding a new file, but there were 3 - Nothing to see here - move along --- tests/files/test_files_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/files/test_files_api.py b/tests/files/test_files_api.py index 02feb8d0..958989d1 100644 --- a/tests/files/test_files_api.py +++ b/tests/files/test_files_api.py @@ -46,7 +46,7 @@ class TestFilesApi(BaseTest): content_type="application/json", headers=self.logged_in_headers()) self.assert_success(rv) json_data = json.loads(rv.get_data(as_text=True)) - self.assertEqual(2, len(json_data)) + self.assertEqual(3, len(json_data)) def test_create_file(self): From 082dee46e04b851dde907b3b1dd51bb092551cd4 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 14 Dec 2020 10:37:35 -0500 Subject: [PATCH 33/35] fixing some failing tests. --- tests/test_tasks_api.py | 6 +++--- tests/test_user_roles.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index a6ffa710..92106201 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -113,14 +113,14 @@ class TestTasksApi(BaseTest): self.assertIsNotNone(workflow_api.navigation) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) + self.assertEqual(6, len(nav)) self.assertEqual("Do You Have Bananas", nav[0]['title']) self.assertEqual("Bananas?", nav[1]['title']) self.assertEqual("FUTURE", nav[1]['state']) self.assertEqual("yes", nav[2]['title']) self.assertEqual("NOOP", nav[2]['state']) - self.assertEqual("no", nav[3]['title']) - self.assertEqual("NOOP", nav[3]['state']) + self.assertEqual("no", nav[4]['title']) + self.assertEqual("NOOP", nav[4]['state']) def test_navigation_with_exclusive_gateway(self): workflow = self.create_workflow('exclusive_gateway_2') diff --git a/tests/test_user_roles.py b/tests/test_user_roles.py index 3dea94e7..8f00332b 100644 --- a/tests/test_user_roles.py +++ b/tests/test_user_roles.py @@ -62,7 +62,7 @@ class TestTasksApi(BaseTest): workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) + self.assertEqual(7, len(nav)) self.assertEqual("supervisor", nav[1]['lane']) def test_get_outstanding_tasks_awaiting_current_user(self): @@ -121,7 +121,7 @@ class TestTasksApi(BaseTest): # Navigation as Submitter with ready task. workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) + self.assertEqual(7, len(nav)) self.assertEqual('READY', nav[0]['state']) # First item is ready, no progress yet. self.assertEqual('LOCKED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway, and belongs to no one, and is locked. @@ -149,7 +149,7 @@ class TestTasksApi(BaseTest): # Navigation as Supervisor workflow_api = self.get_workflow_api(workflow, user_uid=supervisor.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) + self.assertEqual(7, len(nav)) self.assertEqual('LOCKED', nav[0]['state']) # First item belongs to the submitter, and is locked. self.assertEqual('READY', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway, and belongs to no one, and is locked. @@ -161,7 +161,7 @@ class TestTasksApi(BaseTest): # Navigation as Supervisor, after completing task. nav = workflow_api.navigation - self.assertEqual(5, len(nav)) + self.assertEqual(7, len(nav)) self.assertEqual('LOCKED', nav[0]['state']) # First item belongs to the submitter, and is locked. self.assertEqual('COMPLETED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. self.assertEqual('COMPLETED', nav[2]['state']) # third item is a gateway, and is now complete. @@ -170,7 +170,7 @@ class TestTasksApi(BaseTest): # Navigation as Submitter, coming back in to a rejected workflow to view the rejection message. workflow_api = self.get_workflow_api(workflow, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) + self.assertEqual(7, len(nav)) self.assertEqual('COMPLETED', nav[0]['state']) # First item belongs to the submitter, and is locked. self.assertEqual('LOCKED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway belonging to the supervisor, and is locked. @@ -179,7 +179,7 @@ class TestTasksApi(BaseTest): # Navigation as Submitter, re-completing the original request a second time, and sending it for review. workflow_api = self.complete_form(workflow, workflow_api.next_task, data, user_uid=submitter.uid) nav = workflow_api.navigation - self.assertEqual(5, len(nav)) + self.assertEqual(7, len(nav)) self.assertEqual('READY', nav[0]['state']) # When you loop back the task is again in the ready state. self.assertEqual('LOCKED', nav[1]['state']) # Second item is locked, it is the review and doesn't belong to this user. self.assertEqual('LOCKED', nav[2]['state']) # third item is a gateway belonging to the supervisor, and is locked. From 6bf24cc438fda680f81a31e4ceee541774f5f809 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 14 Dec 2020 11:27:38 -0500 Subject: [PATCH 34/35] fixing some failing tests related to changes in the underlying spiffworkflow library. --- Pipfile.lock | 38 +++++++++---------- crc/models/api_models.py | 2 + tests/test_tasks_api.py | 12 +++--- .../test_workflow_processor_multi_instance.py | 10 ++--- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 3298cbfd..8a4ff381 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -550,10 +550,10 @@ }, "packaging": { "hashes": [ - "sha256:05af3bb85d320377db281cf254ab050e1a7ebcbf5410685a9a407e18a1f81236", - "sha256:eb41423378682dadb7166144a4926e443093863024de508ca5c9737d6bc08376" + "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858", + "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093" ], - "version": "==20.7" + "version": "==20.8" }, "pandas": { "hashes": [ @@ -781,11 +781,11 @@ }, "soupsieve": { "hashes": [ - "sha256:1634eea42ab371d3d346309b93df7870a88610f0725d47528be902a0d95ecc55", - "sha256:a59dc181727e95d25f781f0eb4fd1825ff45590ec8ff49eadfd7f1a537cc0232" + "sha256:4bb21a6ee4707bf43b61230e80740e71bfe56e55d1f1f50924b087bb2975c851", + "sha256:6dc52924dc0bc710a5d16794e6b3480b2c7c08b07729505feab2b2c16661ff6e" ], "markers": "python_version >= '3.0'", - "version": "==2.0.1" + "version": "==2.1" }, "sphinx": { "hashes": [ @@ -839,7 +839,7 @@ }, "spiffworkflow": { "git": "https://github.com/sartography/SpiffWorkflow.git", - "ref": "71f75612779822166f3fc97ca39bb4630202af9d" + "ref": "b81a9f26a3a4533690d80235811633b809c98b88" }, "sqlalchemy": { "hashes": [ @@ -944,11 +944,11 @@ }, "xlrd": { "hashes": [ - "sha256:546eb36cee8db40c3eaa46c351e67ffee6eeb5fa2650b71bc4c758a29a1b29b2", - "sha256:e551fb498759fa3a5384a94ccd4c3c02eb7c00ea424426e212ac0c57be9dfbde" + "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd", + "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88" ], "index": "pypi", - "version": "==1.2.0" + "version": "==2.0.1" }, "xlsxwriter": { "hashes": [ @@ -1016,10 +1016,10 @@ }, "packaging": { "hashes": [ - "sha256:05af3bb85d320377db281cf254ab050e1a7ebcbf5410685a9a407e18a1f81236", - "sha256:eb41423378682dadb7166144a4926e443093863024de508ca5c9737d6bc08376" + "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858", + "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093" ], - "version": "==20.7" + "version": "==20.8" }, "pbr": { "hashes": [ @@ -1038,10 +1038,10 @@ }, "py": { "hashes": [ - "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2", - "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342" + "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", + "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" ], - "version": "==1.9.0" + "version": "==1.10.0" }, "pyparsing": { "hashes": [ @@ -1052,11 +1052,11 @@ }, "pytest": { "hashes": [ - "sha256:4288fed0d9153d9646bfcdf0c0428197dba1ecb27a33bb6e031d002fa88653fe", - "sha256:c0a7e94a8cdbc5422a51ccdad8e6f1024795939cc89159a0ae7f0b316ad3823e" + "sha256:b12e09409c5bdedc28d308469e156127004a436b41e9b44f9bff6446cbab9152", + "sha256:d69e1a80b34fe4d596c9142f35d9e523d98a2838976f1a68419a8f051b24cec6" ], "index": "pypi", - "version": "==6.1.2" + "version": "==6.2.0" }, "toml": { "hashes": [ diff --git a/crc/models/api_models.py b/crc/models/api_models.py index 00c040b6..90991dd5 100644 --- a/crc/models/api_models.py +++ b/crc/models/api_models.py @@ -158,10 +158,12 @@ class NavigationItemSchema(ma.Schema): state = data.pop('state', None) task_id = data.pop('task_id', None) children = data.pop('children', []) + spec_type = data.pop('spec_type', None) item = NavItem(**data) item.state = state item.task_id = task_id item.children = children + item.spec_type = spec_type return item class WorkflowApi(object): diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index bd2b300f..f9794051 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -68,7 +68,7 @@ class TestTasksApi(BaseTest): # get the first form in the two form workflow. workflow_api = self.get_workflow_api(workflow) self.assertEqual('two_forms', workflow_api.workflow_spec_id) - self.assertEqual(4, len(workflow_api.navigation)) + self.assertEqual(5, len(workflow_api.navigation)) self.assertIsNotNone(workflow_api.next_task.form) self.assertEqual("UserTask", workflow_api.next_task.type) self.assertEqual("StepOne", workflow_api.next_task.name) @@ -113,7 +113,7 @@ class TestTasksApi(BaseTest): self.assertIsNotNone(workflow_api.navigation) nav = workflow_api.navigation - self.assertEqual(3, len(nav)) + self.assertEqual(4, len(nav)) self.assertEqual("Do You Have Bananas", nav[1].description) self.assertEqual("Bananas?", nav[2].description) self.assertEqual("LIKELY", nav[2].state) @@ -135,7 +135,7 @@ class TestTasksApi(BaseTest): workflow_api = self.get_workflow_api(workflow) self.assertIsNotNone(workflow_api.navigation) nav = workflow_api.navigation - self.assertEqual(6, len(nav)) + self.assertEqual(7, len(nav)) self.assertEqual("Task 1", nav[1].description) self.assertEqual("Which Branch?", nav[2].description) self.assertEqual("a", nav[2].children[0].description) @@ -276,7 +276,7 @@ class TestTasksApi(BaseTest): # get the first form in the two form workflow. workflow = self.get_workflow_api(workflow) navigation = self.get_workflow_api(workflow).navigation - self.assertEqual(4, len(navigation)) # Start task, form_task, multi_task, end task + self.assertEqual(5, len(navigation)) # Start task, form_task, multi_task, end task self.assertEqual("UserTask", workflow.next_task.type) self.assertEqual(MultiInstanceType.sequential.value, workflow.next_task.multi_instance_type) self.assertEqual(5, workflow.next_task.multi_instance_count) @@ -394,7 +394,7 @@ class TestTasksApi(BaseTest): navigation = workflow_api.navigation task = workflow_api.next_task - self.assertEqual(4, len(navigation)) + self.assertEqual(5, len(navigation)) self.assertEqual("UserTask", task.type) self.assertEqual("Activity_A", task.name) self.assertEqual("My Sub Process", task.process_name) @@ -461,7 +461,7 @@ class TestTasksApi(BaseTest): workflow = self.create_workflow('multi_instance_parallel') workflow_api = self.get_workflow_api(workflow) - self.assertEqual(8, len(workflow_api.navigation)) + self.assertEqual(9, len(workflow_api.navigation)) ready_items = [nav for nav in workflow_api.navigation if nav.state == "READY"] self.assertEqual(5, len(ready_items)) diff --git a/tests/workflow/test_workflow_processor_multi_instance.py b/tests/workflow/test_workflow_processor_multi_instance.py index 72af78b3..927588b6 100644 --- a/tests/workflow/test_workflow_processor_multi_instance.py +++ b/tests/workflow/test_workflow_processor_multi_instance.py @@ -158,7 +158,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): self.assertEqual(3, len(next_user_tasks)) # There should be six tasks in the navigation: start event, the script task, end event, and three tasks # for the three executions of hte multi-instance. - self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) + self.assertEqual(7, len(processor.bpmn_workflow.get_flat_nav_list())) # We can complete the tasks out of order. task = next_user_tasks[2] @@ -176,7 +176,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task.update_data({"investigator": {"email": "dhf8r@virginia.edu"}}) processor.complete_task(task) processor.do_engine_steps() - self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) + self.assertEqual(7, len(processor.bpmn_workflow.get_flat_nav_list())) task = next_user_tasks[0] api_task = WorkflowService.spiff_task_to_api_task(task) @@ -184,7 +184,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task.update_data({"investigator":{"email":"asd3v@virginia.edu"}}) processor.complete_task(task) processor.do_engine_steps() - self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) + self.assertEqual(7, len(processor.bpmn_workflow.get_flat_nav_list())) task = next_user_tasks[1] api_task = WorkflowService.spiff_task_to_api_task(task) @@ -192,7 +192,7 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task.update_data({"investigator":{"email":"asdf32@virginia.edu"}}) processor.complete_task(task) processor.do_engine_steps() - self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) + self.assertEqual(7, len(processor.bpmn_workflow.get_flat_nav_list())) # Completing the tasks out of order, still provides the correct information. expected = self.mock_investigator_response @@ -203,4 +203,4 @@ class TestWorkflowProcessorMultiInstance(BaseTest): task.data['StudyInfo']['investigators']) self.assertEqual(WorkflowStatus.complete, processor.get_status()) - self.assertEqual(6, len(processor.bpmn_workflow.get_flat_nav_list())) + self.assertEqual(7, len(processor.bpmn_workflow.get_flat_nav_list())) From 86ba20b2b4a926d7986598a229085641dc6ce7ec Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 14 Dec 2020 21:44:50 -0500 Subject: [PATCH 35/35] Moving back to using Spiffworkflow master. --- Pipfile | 2 +- Pipfile.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Pipfile b/Pipfile index e49b6613..0a498025 100644 --- a/Pipfile +++ b/Pipfile @@ -39,7 +39,7 @@ requests = "*" sentry-sdk = {extras = ["flask"],version = "==0.14.4"} sphinx = "*" swagger-ui-bundle = "*" -spiffworkflow = {git = "https://github.com/sartography/SpiffWorkflow.git",ref = "bug/navigation"} +spiffworkflow = {git = "https://github.com/sartography/SpiffWorkflow.git",ref = "master"} webtest = "*" werkzeug = "*" xlrd = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 8a4ff381..43ba05e6 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "1e41c4486a74db3ee30b7fd4572b0cc54cfbe1bc1e6246b855748ec78db6cc1e" + "sha256": "621d57ec513f24c665dd34e08ae5a19fc27784e87c55bb4d2a91a1d48a473081" }, "pipfile-spec": 6, "requires": { @@ -839,7 +839,7 @@ }, "spiffworkflow": { "git": "https://github.com/sartography/SpiffWorkflow.git", - "ref": "b81a9f26a3a4533690d80235811633b809c98b88" + "ref": "450c07b886ce6bd8425974f0349248daade90fa0" }, "sqlalchemy": { "hashes": [