diff --git a/crc/api/workflow_sync.py b/crc/api/workflow_sync.py index 08c877ba..3204e936 100644 --- a/crc/api/workflow_sync.py +++ b/crc/api/workflow_sync.py @@ -19,6 +19,14 @@ def join_uuids(uuids): return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes def verify_token(token, required_scopes): + """ + Part of the Swagger API permissions for the syncing API + The env variable for this is defined in config/default.py + + If you are 'playing' with the swagger interface, you will want to copy the + token that is defined there and use it to authenticate the API if you are + emulating copying files between systems. + """ if token == app.config['API_TOKEN']: return {'scope':['any']} else: @@ -95,20 +103,44 @@ def get_changed_workflows(remote,as_df=False): def sync_all_changed_workflows(remote): - + """ + Does what it says, gets a list of all workflows that are different between + two systems and pulls all of the workflows and files that are different on the + remote system. The idea is that we can make the local system 'look' like the remote + system for deployment or testing. + """ workflowsdf = get_changed_workflows(remote,as_df=True) if len(workflowsdf) ==0: return [] workflows = workflowsdf.reset_index().to_dict(orient='records') for workflow in workflows: sync_changed_files(remote,workflow['workflow_spec_id']) + sync_changed_files(remote,'REFERENCE_FILES') return [x['workflow_spec_id'] for x in workflows] -def sync_changed_files(remote,workflow_spec_id): - # make sure that spec is local before syncing files +def file_get(workflow_spec_id,filename): + """ + Helper function to take care of the special case where we + are looking for files that are marked is_reference + """ + if workflow_spec_id == 'REFERENCE_FILES': + currentfile = session.query(FileModel).filter(FileModel.is_reference == True, + FileModel.name == filename).first() + else: + currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, + FileModel.name == filename).first() + return currentfile - specdict = WorkflowSyncService.get_remote_workflow_spec(remote,workflow_spec_id) +def create_or_update_local_spec(remote,workflow_spec_id): + specdict = WorkflowSyncService.get_remote_workflow_spec(remote, workflow_spec_id) + # if we are updating from a master spec, then we want to make sure it is the only + # master spec in our local system. + if specdict['is_master_spec']: + masterspecs = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.is_master_spec == True).all() + for masterspec in masterspecs: + masterspec.is_master_spec = False + session.add(masterspec) localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first() if localspec is None: @@ -120,7 +152,7 @@ def sync_changed_files(remote,workflow_spec_id): localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name == specdict['category']['name']).first() if localcategory == None: - #category doesn't exist - lets make it + # category doesn't exist - lets make it localcategory = WorkflowSpecCategoryModel() localcategory.name = specdict['category']['name'] localcategory.display_name = specdict['category']['display_name'] @@ -131,9 +163,45 @@ def sync_changed_files(remote,workflow_spec_id): localspec.display_order = specdict['display_order'] localspec.display_name = specdict['display_name'] localspec.name = specdict['name'] + localspec.is_master_spec = specdict['is_master_spec'] localspec.description = specdict['description'] session.add(localspec) +def update_or_create_current_file(remote,workflow_spec_id,updatefile): + currentfile = file_get(workflow_spec_id, updatefile['filename']) + if not currentfile: + currentfile = FileModel() + currentfile.name = updatefile['filename'] + if workflow_spec_id == 'REFERENCE_FILES': + currentfile.workflow_spec_id = None + currentfile.is_reference = True + else: + currentfile.workflow_spec_id = workflow_spec_id + + currentfile.date_created = updatefile['date_created'] + currentfile.type = updatefile['type'] + currentfile.primary = updatefile['primary'] + currentfile.content_type = updatefile['content_type'] + currentfile.primary_process_id = updatefile['primary_process_id'] + session.add(currentfile) + content = WorkflowSyncService.get_remote_file_by_hash(remote, updatefile['md5_hash']) + FileService.update_file(currentfile, content, updatefile['type']) + + +def sync_changed_files(remote,workflow_spec_id): + """ + This grabs a list of all files for a workflow_spec that are different between systems, + and gets the remote copy of any file that has changed + + We also have a special case for "REFERENCE_FILES" where there is not workflow_spec_id, + but all of the files are marked in the database as is_reference - and they need to be + handled slightly differently. + """ + # make sure that spec is local before syncing files + if workflow_spec_id != 'REFERENCE_FILES': + create_or_update_local_spec(remote,workflow_spec_id) + + changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True) if len(changedfiles)==0: return [] @@ -144,8 +212,7 @@ def sync_changed_files(remote,workflow_spec_id): deletefiles = deletefiles.reset_index().to_dict(orient='records') for delfile in deletefiles: - currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, - FileModel.name == delfile['filename']).first() + currentfile = file_get(workflow_spec_id,delfile['filename']) # it is more appropriate to archive the file than delete # due to the fact that we might have workflows that are using the @@ -154,21 +221,7 @@ def sync_changed_files(remote,workflow_spec_id): session.add(currentfile) for updatefile in updatefiles: - currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id, - FileModel.name == updatefile['filename']).first() - if not currentfile: - currentfile = FileModel() - currentfile.name = updatefile['filename'] - currentfile.workflow_spec_id = workflow_spec_id - - currentfile.date_created = updatefile['date_created'] - currentfile.type = updatefile['type'] - currentfile.primary = updatefile['primary'] - currentfile.content_type = updatefile['content_type'] - currentfile.primary_process_id = updatefile['primary_process_id'] - session.add(currentfile) - content = WorkflowSyncService.get_remote_file_by_hash(remote,updatefile['md5_hash']) - FileService.update_file(currentfile,content,updatefile['type']) + update_or_create_current_file(remote,workflow_spec_id,updatefile) session.commit() return [x['filename'] for x in updatefiles] @@ -185,6 +238,13 @@ def get_changed_files(remote,workflow_spec_id,as_df=False): local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index() local['md5_hash'] = local['md5_hash'].astype('str') remote_files['md5_hash'] = remote_files['md5_hash'].astype('str') + if len(local) == 0: + remote_files['new'] = True + remote_files['location'] = 'remote' + if as_df: + return remote_files + else: + return remote_files.reset_index().to_dict(orient='records') different = remote_files.merge(local, right_on=['filename','md5_hash'], @@ -259,8 +319,14 @@ def get_workflow_spec_files_dataframe(workflowid): Return a list of all files for a workflow_spec along with last updated date and a hash so we can determine file differences for a changed workflow on a box. Return a dataframe + + In the special case of "REFERENCE_FILES" we get all of the files that are + marked as is_reference """ - x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid) + if workflowid == 'REFERENCE_FILES': + x = session.query(FileDataModel).join(FileModel).filter(FileModel.is_reference == True) + else: + x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id == workflowid) # there might be a cleaner way of getting a data frome from some of the # fields in the ORM - but this works OK filelist = [] @@ -306,7 +372,10 @@ def get_all_spec_state_dataframe(): 'md5_hash':file.md5_hash, 'filename':file.file_model.name, 'date_created':file.date_created}) - df = pd.DataFrame(filelist) + if len(filelist) == 0: + df = pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created']) + else: + df = pd.DataFrame(filelist) # If the file list is empty, return an empty data frame if df.empty: diff --git a/crc/services/workflow_sync.py b/crc/services/workflow_sync.py index b9f2dd21..849c815e 100644 --- a/crc/services/workflow_sync.py +++ b/crc/services/workflow_sync.py @@ -39,7 +39,7 @@ class WorkflowSyncService(object): try: response = requests.get(url,headers={'X-CR-API-KEY':app.config['API_TOKEN']}) except: - raise ApiError("workflow_sync_error",response.text) + raise ApiError("workflow_sync_error",url) if response.ok and response.text: if return_contents: return response.content diff --git a/tests/base_test.py b/tests/base_test.py index 67510eb6..a7575d9a 100644 --- a/tests/base_test.py +++ b/tests/base_test.py @@ -134,6 +134,15 @@ class BaseTest(unittest.TestCase): return dict(Authorization='Bearer ' + user_model.encode_auth_token().decode()) + def delete_example_data(self, use_crc_data=False, use_rrt_data=False): + """ + delete everything that matters in the local database - this is used to + test ground zero copy of workflow specs. + """ + session.execute("delete from workflow; delete from file_data; delete from file; delete from workflow_spec;") + session.commit() + + def load_example_data(self, use_crc_data=False, use_rrt_data=False): """use_crc_data will cause this to load the mammoth collection of documents we built up developing crc, use_rrt_data will do the same for hte rrt project, diff --git a/tests/data/workflow_sync_responses/test.txt b/tests/data/workflow_sync_responses/test.txt new file mode 100644 index 00000000..0ab68628 --- /dev/null +++ b/tests/data/workflow_sync_responses/test.txt @@ -0,0 +1 @@ +TEST - This is only a test - TEST \ No newline at end of file diff --git a/tests/test_workflow_sync.py b/tests/test_workflow_sync.py index edf79362..1a64c1b4 100644 --- a/tests/test_workflow_sync.py +++ b/tests/test_workflow_sync.py @@ -12,6 +12,27 @@ from crc.models.workflow import WorkflowSpecModel from datetime import datetime from crc.services.file_service import FileService +def get_random_fact_pos(othersys): + """ + Make sure we get the 'random_fact' workflow spec + no matter what order it is in + """ + rf2pos = 0 + for pos in range(len(othersys)): + if othersys[pos]['workflow_spec_id'] == 'random_fact': + rf2pos = pos + return rf2pos + + +def get_random_fact_2_pos(othersys): + """ + Makes sure we get the random_fact2.bpmn file no matter what order it is in + """ + rf2pos = 0 + for pos in range(len(othersys)): + if othersys[pos]['filename'] == 'random_fact2.bpmn': + rf2pos = pos + return rf2pos class TestWorkflowSync(BaseTest): @@ -30,8 +51,9 @@ class TestWorkflowSync(BaseTest): def test_remote_workflow_change(self, mock_get): self.load_example_data() othersys = get_all_spec_state() - othersys[1]['date_created'] = str(datetime.now()) - othersys[1]['md5_hash'] = '12345' + rf2pos = get_random_fact_pos(othersys) + othersys[rf2pos]['date_created'] = str(datetime.now()) + othersys[rf2pos]['md5_hash'] = '12345' mock_get.return_value = othersys response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock self.assertIsNotNone(response) @@ -80,28 +102,27 @@ class TestWorkflowSync(BaseTest): self.assertIsNotNone(response) self.assertEqual(response,[]) - @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') - def test_file_differences(self, mock_get): + def test_file_differences_clean_slate(self, mock_get): + """ This test is basically for coverage""" self.load_example_data() othersys = get_workflow_spec_files('random_fact') - othersys[1]['date_created'] = str(datetime.now()) - othersys[1]['md5_hash'] = '12345' mock_get.return_value = othersys + self.delete_example_data() response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock self.assertIsNotNone(response) - self.assertEqual(len(response),1) - self.assertEqual(response[0]['filename'], 'random_fact2.bpmn') + self.assertEqual(len(response),2) self.assertEqual(response[0]['location'], 'remote') - self.assertEqual(response[0]['new'], False) + self.assertEqual(response[0]['new'], True) @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') def test_file_differences(self, mock_get): self.load_example_data() othersys = get_workflow_spec_files('random_fact') - othersys[1]['date_created'] = str(datetime.now()) - othersys[1]['md5_hash'] = '12345' + rf2pos = get_random_fact_2_pos(othersys) + othersys[rf2pos]['date_created'] = str(datetime.now()) + othersys[rf2pos]['md5_hash'] = '12345' mock_get.return_value = othersys response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock self.assertIsNotNone(response) @@ -113,19 +134,24 @@ class TestWorkflowSync(BaseTest): @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash') @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec') - def test_file_differences(self, workflow_mock, spec_files_mock, file_data_mock): + def test_workflow_differences(self, workflow_mock, spec_files_mock, file_data_mock): self.load_example_data() + # make a remote workflow that is slightly different from local remote_workflow = get_workflow_specification('random_fact') self.assertEqual(remote_workflow['display_name'],'Random Fact') remote_workflow['description'] = 'This Workflow came from Remote' remote_workflow['display_name'] = 'Remote Workflow' workflow_mock.return_value = remote_workflow + # change the remote file date and hash othersys = get_workflow_spec_files('random_fact') - othersys[1]['date_created'] = str(datetime.now()) - othersys[1]['md5_hash'] = '12345' + rf2pos = get_random_fact_2_pos(othersys) + othersys[rf2pos]['date_created'] = str(datetime.now()) + othersys[rf2pos]['md5_hash'] = '12345' spec_files_mock.return_value = othersys + # actually go get a different file file_data_mock.return_value = self.workflow_sync_response('random_fact2.bpmn') response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock + # now make sure that everything gets pulled over self.assertIsNotNone(response) self.assertEqual(len(response),1) self.assertEqual(response[0], 'random_fact2.bpmn') @@ -137,6 +163,39 @@ class TestWorkflowSync(BaseTest): + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash') + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') + def test_ref_file_differences(self, spec_files_mock, file_data_mock): + """ + Make sure we copy over a new reference file if it exists + """ + self.load_example_data() + # make a remote workflow that is slightly different from local + othersys = get_workflow_spec_files('REFERENCE_FILES') + newfile = {'file_model_id':9999, + 'workflow_spec_id': None, + 'filename':'test.txt', + 'type':'txt', + 'primary':False, + 'content_type':'text/text', + 'primary_process_id':None, + 'date_created':str(datetime.now()), + 'md5_hash':'12345' + } + othersys.append(newfile) + spec_files_mock.return_value = othersys + # actually go get a different file + file_data_mock.return_value = self.workflow_sync_response('test.txt') + response = sync_changed_files('localhost:0000','REFERENCE_FILES') # endpoint not used due to mock + # now make sure that everything gets pulled over + self.assertIsNotNone(response) + self.assertEqual(len(response),1) + self.assertEqual(response[0], 'test.txt') + ref_file = FileService.get_reference_file_data('test.txt') + self.assertEqual('24a2ab0d-1138-a80a-0b98-ed38894f5a04',str(ref_file.md5_hash)) + + + @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files') @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec') def test_file_deleted(self, workflow_mock, spec_files_mock): @@ -144,7 +203,8 @@ class TestWorkflowSync(BaseTest): remote_workflow = get_workflow_specification('random_fact') workflow_mock.return_value = remote_workflow othersys = get_workflow_spec_files('random_fact') - del(othersys[1]) + rf2pos = get_random_fact_2_pos(othersys) + del(othersys[rf2pos]) spec_files_mock.return_value = othersys response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock self.assertIsNotNone(response)