Merge pull request #223 from sartography/152-workflow-spec-pull

152 workflow spec pull
2021-01-27 09:39:09 -05:00 · 2021-01-27 09:39:09 -05:00 · 691b779a0c
parent 5b13195182 5921ce9f10
commit 691b779a0c
5 changed files with 179 additions and 40 deletions
--- a/crc/api/workflow_sync.py
+++ b/crc/api/workflow_sync.py
@ -19,6 +19,14 @@ def join_uuids(uuids):
    return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes

 def verify_token(token, required_scopes):
+    """
+    Part of the Swagger API permissions for the syncing API
+    The env variable for this is defined in config/default.py
+
+    If you are 'playing' with the swagger interface, you will want to copy the
+    token that is defined there and use it to authenticate the API if you are
+    emulating copying files between systems.
+    """
    if token == app.config['API_TOKEN']:
        return {'scope':['any']}
    else:
@ -95,20 +103,44 @@ def get_changed_workflows(remote,as_df=False):


 def sync_all_changed_workflows(remote):
-
+    """
+    Does what it says, gets a list of all workflows that are different between
+    two systems and pulls all of the workflows and files that are different on the
+    remote system. The idea is that we can make the local system 'look' like the remote
+    system for deployment or testing.
+    """
    workflowsdf = get_changed_workflows(remote,as_df=True)
    if len(workflowsdf) ==0:
        return []
    workflows = workflowsdf.reset_index().to_dict(orient='records')
    for workflow in workflows:
        sync_changed_files(remote,workflow['workflow_spec_id'])
+    sync_changed_files(remote,'REFERENCE_FILES')
    return [x['workflow_spec_id'] for x in workflows]


-def sync_changed_files(remote,workflow_spec_id):
-    # make sure that spec is local before syncing files
+def file_get(workflow_spec_id,filename):
+    """
+    Helper function to take care of the special case where we
+    are looking for files that are marked is_reference
+    """
+    if workflow_spec_id == 'REFERENCE_FILES':
+        currentfile = session.query(FileModel).filter(FileModel.is_reference == True,
+                                                      FileModel.name == filename).first()
+    else:
+        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
+                                            FileModel.name == filename).first()
+    return currentfile

-    specdict = WorkflowSyncService.get_remote_workflow_spec(remote,workflow_spec_id)
+def create_or_update_local_spec(remote,workflow_spec_id):
+    specdict = WorkflowSyncService.get_remote_workflow_spec(remote, workflow_spec_id)
+    # if we are updating from a master spec, then we want to make sure it is the only
+    # master spec in our local system.
+    if specdict['is_master_spec']:
+        masterspecs = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.is_master_spec == True).all()
+        for masterspec in masterspecs:
+            masterspec.is_master_spec = False
+            session.add(masterspec)

    localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
    if localspec is None:
@ -120,7 +152,7 @@ def sync_changed_files(remote,workflow_spec_id):
        localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name
                                                                        == specdict['category']['name']).first()
        if localcategory == None:
-            #category doesn't exist - lets make it
+            # category doesn't exist - lets make it
            localcategory = WorkflowSpecCategoryModel()
            localcategory.name = specdict['category']['name']
            localcategory.display_name = specdict['category']['display_name']
@ -131,9 +163,45 @@ def sync_changed_files(remote,workflow_spec_id):
    localspec.display_order = specdict['display_order']
    localspec.display_name = specdict['display_name']
    localspec.name = specdict['name']
+    localspec.is_master_spec = specdict['is_master_spec']
    localspec.description = specdict['description']
    session.add(localspec)

+def update_or_create_current_file(remote,workflow_spec_id,updatefile):
+    currentfile = file_get(workflow_spec_id, updatefile['filename'])
+    if not currentfile:
+        currentfile = FileModel()
+        currentfile.name = updatefile['filename']
+        if workflow_spec_id == 'REFERENCE_FILES':
+            currentfile.workflow_spec_id = None
+            currentfile.is_reference = True
+        else:
+            currentfile.workflow_spec_id = workflow_spec_id
+
+    currentfile.date_created = updatefile['date_created']
+    currentfile.type = updatefile['type']
+    currentfile.primary = updatefile['primary']
+    currentfile.content_type = updatefile['content_type']
+    currentfile.primary_process_id = updatefile['primary_process_id']
+    session.add(currentfile)
+    content = WorkflowSyncService.get_remote_file_by_hash(remote, updatefile['md5_hash'])
+    FileService.update_file(currentfile, content, updatefile['type'])
+
+
+def sync_changed_files(remote,workflow_spec_id):
+    """
+    This grabs a list of all files for a workflow_spec that are different between systems,
+    and gets the remote copy of any file that has changed
+
+    We also have a special case for "REFERENCE_FILES" where there is not workflow_spec_id,
+    but all of the files are marked in the database as is_reference - and they need to be
+    handled slightly differently.
+    """
+    # make sure that spec is local before syncing files
+    if workflow_spec_id != 'REFERENCE_FILES':
+        create_or_update_local_spec(remote,workflow_spec_id)
+
+
    changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True)
    if len(changedfiles)==0:
        return []
@ -144,8 +212,7 @@ def sync_changed_files(remote,workflow_spec_id):
    deletefiles = deletefiles.reset_index().to_dict(orient='records')

    for delfile in deletefiles:
-        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
-                                            FileModel.name == delfile['filename']).first()
+        currentfile = file_get(workflow_spec_id,delfile['filename'])

        # it is more appropriate to archive the file than delete
        # due to the fact that we might have workflows that are using the
@ -154,21 +221,7 @@ def sync_changed_files(remote,workflow_spec_id):
        session.add(currentfile)

    for updatefile in updatefiles:
-        currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
-                                            FileModel.name == updatefile['filename']).first()
-        if not currentfile:
-            currentfile = FileModel()
-            currentfile.name = updatefile['filename']
-            currentfile.workflow_spec_id = workflow_spec_id
-
-        currentfile.date_created = updatefile['date_created']
-        currentfile.type = updatefile['type']
-        currentfile.primary = updatefile['primary']
-        currentfile.content_type = updatefile['content_type']
-        currentfile.primary_process_id = updatefile['primary_process_id']
-        session.add(currentfile)
-        content = WorkflowSyncService.get_remote_file_by_hash(remote,updatefile['md5_hash'])
-        FileService.update_file(currentfile,content,updatefile['type'])
+        update_or_create_current_file(remote,workflow_spec_id,updatefile)
    session.commit()
    return [x['filename'] for x in updatefiles]

@ -185,6 +238,13 @@ def get_changed_files(remote,workflow_spec_id,as_df=False):
    local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index()
    local['md5_hash'] = local['md5_hash'].astype('str')
    remote_files['md5_hash'] = remote_files['md5_hash'].astype('str')
+    if len(local) == 0:
+        remote_files['new'] = True
+        remote_files['location'] = 'remote'
+        if as_df:
+            return remote_files
+        else:
+            return remote_files.reset_index().to_dict(orient='records')

    different = remote_files.merge(local,
                             right_on=['filename','md5_hash'],
@ -259,8 +319,14 @@ def get_workflow_spec_files_dataframe(workflowid):
    Return a list of all files for a workflow_spec along with last updated date and a
    hash so we can determine file differences for a changed workflow on a box.
    Return a dataframe
+
+    In the special case of "REFERENCE_FILES" we get all of the files that are
+    marked as is_reference
    """
-    x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid)
+    if workflowid == 'REFERENCE_FILES':
+        x = session.query(FileDataModel).join(FileModel).filter(FileModel.is_reference == True)
+    else:
+        x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id == workflowid)
    # there might be a cleaner way of getting a data frome from some of the
    # fields in the ORM - but this works OK
    filelist = []
@ -306,7 +372,10 @@ def get_all_spec_state_dataframe():
                         'md5_hash':file.md5_hash,
                         'filename':file.file_model.name,
                         'date_created':file.date_created})
-    df = pd.DataFrame(filelist)
+    if len(filelist) == 0:
+        df = pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created'])
+    else:
+        df = pd.DataFrame(filelist)

    # If the file list is empty, return an empty data frame
    if df.empty:
--- a/crc/services/workflow_sync.py
+++ b/crc/services/workflow_sync.py
@ -39,7 +39,7 @@ class WorkflowSyncService(object):
        try:
            response = requests.get(url,headers={'X-CR-API-KEY':app.config['API_TOKEN']})
        except:
-            raise ApiError("workflow_sync_error",response.text)
+            raise ApiError("workflow_sync_error",url)
        if response.ok and response.text:
            if return_contents:
                return response.content
--- a/tests/base_test.py
+++ b/tests/base_test.py
@ -134,6 +134,15 @@ class BaseTest(unittest.TestCase):

        return dict(Authorization='Bearer ' + user_model.encode_auth_token().decode())

+    def delete_example_data(self, use_crc_data=False, use_rrt_data=False):
+        """
+        delete everything that matters in the local database - this is used to
+        test ground zero copy of workflow specs.
+        """
+        session.execute("delete from workflow; delete from file_data; delete from file; delete from workflow_spec;")
+        session.commit()
+
+
    def load_example_data(self, use_crc_data=False, use_rrt_data=False):
        """use_crc_data will cause this to load the mammoth collection of documents
        we built up developing crc, use_rrt_data will do the same for hte rrt project,
--- a/tests/data/workflow_sync_responses/test.txt
+++ b/tests/data/workflow_sync_responses/test.txt
@ -0,0 +1 @@
+TEST - This is only a test - TEST
--- a/tests/test_workflow_sync.py
+++ b/tests/test_workflow_sync.py
@ -12,6 +12,27 @@ from crc.models.workflow import WorkflowSpecModel
 from datetime import datetime
 from crc.services.file_service import FileService

+def get_random_fact_pos(othersys):
+    """
+    Make sure we get the 'random_fact' workflow spec
+    no matter what order it is in
+    """
+    rf2pos = 0
+    for pos in range(len(othersys)):
+        if othersys[pos]['workflow_spec_id'] == 'random_fact':
+            rf2pos = pos
+    return rf2pos
+
+
+def get_random_fact_2_pos(othersys):
+    """
+    Makes sure we get the random_fact2.bpmn file no matter what order it is in
+    """
+    rf2pos = 0
+    for pos in range(len(othersys)):
+        if othersys[pos]['filename'] == 'random_fact2.bpmn':
+            rf2pos = pos
+    return rf2pos


 class TestWorkflowSync(BaseTest):
@ -30,8 +51,9 @@ class TestWorkflowSync(BaseTest):
    def test_remote_workflow_change(self, mock_get):
        self.load_example_data()
        othersys = get_all_spec_state()
-        othersys[1]['date_created'] = str(datetime.now())
-        othersys[1]['md5_hash'] = '12345'
+        rf2pos = get_random_fact_pos(othersys)
+        othersys[rf2pos]['date_created'] = str(datetime.now())
+        othersys[rf2pos]['md5_hash'] = '12345'
        mock_get.return_value = othersys
        response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
        self.assertIsNotNone(response)
@ -80,28 +102,27 @@ class TestWorkflowSync(BaseTest):
        self.assertIsNotNone(response)
        self.assertEqual(response,[])

-
    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
-    def test_file_differences(self, mock_get):
+    def test_file_differences_clean_slate(self, mock_get):
+        """ This test is basically for coverage"""
        self.load_example_data()
        othersys = get_workflow_spec_files('random_fact')
-        othersys[1]['date_created'] = str(datetime.now())
-        othersys[1]['md5_hash'] = '12345'
        mock_get.return_value = othersys
+        self.delete_example_data()
        response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock
        self.assertIsNotNone(response)
-        self.assertEqual(len(response),1)
-        self.assertEqual(response[0]['filename'], 'random_fact2.bpmn')
+        self.assertEqual(len(response),2)
        self.assertEqual(response[0]['location'], 'remote')
-        self.assertEqual(response[0]['new'], False)
+        self.assertEqual(response[0]['new'], True)


    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
    def test_file_differences(self, mock_get):
        self.load_example_data()
        othersys = get_workflow_spec_files('random_fact')
-        othersys[1]['date_created'] = str(datetime.now())
-        othersys[1]['md5_hash'] = '12345'
+        rf2pos = get_random_fact_2_pos(othersys)
+        othersys[rf2pos]['date_created'] = str(datetime.now())
+        othersys[rf2pos]['md5_hash'] = '12345'
        mock_get.return_value = othersys
        response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock
        self.assertIsNotNone(response)
@ -113,19 +134,24 @@ class TestWorkflowSync(BaseTest):
    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash')
    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec')
-    def test_file_differences(self, workflow_mock, spec_files_mock, file_data_mock):
+    def test_workflow_differences(self, workflow_mock, spec_files_mock, file_data_mock):
        self.load_example_data()
+        # make a remote workflow that is slightly different from local
        remote_workflow = get_workflow_specification('random_fact')
        self.assertEqual(remote_workflow['display_name'],'Random Fact')
        remote_workflow['description'] = 'This Workflow came from Remote'
        remote_workflow['display_name'] = 'Remote Workflow'
        workflow_mock.return_value = remote_workflow
+        # change the remote file date and hash
        othersys = get_workflow_spec_files('random_fact')
-        othersys[1]['date_created'] = str(datetime.now())
-        othersys[1]['md5_hash'] = '12345'
+        rf2pos = get_random_fact_2_pos(othersys)
+        othersys[rf2pos]['date_created'] = str(datetime.now())
+        othersys[rf2pos]['md5_hash'] = '12345'
        spec_files_mock.return_value = othersys
+        # actually go get a different file
        file_data_mock.return_value = self.workflow_sync_response('random_fact2.bpmn')
        response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock
+        # now make sure that everything gets pulled over
        self.assertIsNotNone(response)
        self.assertEqual(len(response),1)
        self.assertEqual(response[0], 'random_fact2.bpmn')
@ -137,6 +163,39 @@ class TestWorkflowSync(BaseTest):



+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash')
+    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
+    def test_ref_file_differences(self, spec_files_mock, file_data_mock):
+        """
+        Make sure we copy over a new reference file if it exists
+        """
+        self.load_example_data()
+        # make a remote workflow that is slightly different from local
+        othersys = get_workflow_spec_files('REFERENCE_FILES')
+        newfile = {'file_model_id':9999,
+                   'workflow_spec_id': None,
+                   'filename':'test.txt',
+                   'type':'txt',
+                   'primary':False,
+                   'content_type':'text/text',
+                   'primary_process_id':None,
+                   'date_created':str(datetime.now()),
+                   'md5_hash':'12345'
+        }
+        othersys.append(newfile)
+        spec_files_mock.return_value = othersys
+        # actually go get a different file
+        file_data_mock.return_value = self.workflow_sync_response('test.txt')
+        response = sync_changed_files('localhost:0000','REFERENCE_FILES') # endpoint not used due to mock
+        # now make sure that everything gets pulled over
+        self.assertIsNotNone(response)
+        self.assertEqual(len(response),1)
+        self.assertEqual(response[0], 'test.txt')
+        ref_file = FileService.get_reference_file_data('test.txt')
+        self.assertEqual('24a2ab0d-1138-a80a-0b98-ed38894f5a04',str(ref_file.md5_hash))
+
+
+
    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
    @patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec')
    def test_file_deleted(self, workflow_mock, spec_files_mock):
@ -144,7 +203,8 @@ class TestWorkflowSync(BaseTest):
        remote_workflow = get_workflow_specification('random_fact')
        workflow_mock.return_value = remote_workflow
        othersys = get_workflow_spec_files('random_fact')
-        del(othersys[1])
+        rf2pos = get_random_fact_2_pos(othersys)
+        del(othersys[rf2pos])
        spec_files_mock.return_value = othersys
        response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock
        self.assertIsNotNone(response)