Merge pull request #223 from sartography/152-workflow-spec-pull

152 workflow spec pull
This commit is contained in:
Dan Funk 2021-01-27 09:39:09 -05:00 committed by GitHub
commit 691b779a0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 179 additions and 40 deletions

View File

@ -19,6 +19,14 @@ def join_uuids(uuids):
return hashlib.md5(combined_uuids.encode('utf8')).hexdigest() # make a hash of the hashes
def verify_token(token, required_scopes):
"""
Part of the Swagger API permissions for the syncing API
The env variable for this is defined in config/default.py
If you are 'playing' with the swagger interface, you will want to copy the
token that is defined there and use it to authenticate the API if you are
emulating copying files between systems.
"""
if token == app.config['API_TOKEN']:
return {'scope':['any']}
else:
@ -95,20 +103,44 @@ def get_changed_workflows(remote,as_df=False):
def sync_all_changed_workflows(remote):
"""
Does what it says, gets a list of all workflows that are different between
two systems and pulls all of the workflows and files that are different on the
remote system. The idea is that we can make the local system 'look' like the remote
system for deployment or testing.
"""
workflowsdf = get_changed_workflows(remote,as_df=True)
if len(workflowsdf) ==0:
return []
workflows = workflowsdf.reset_index().to_dict(orient='records')
for workflow in workflows:
sync_changed_files(remote,workflow['workflow_spec_id'])
sync_changed_files(remote,'REFERENCE_FILES')
return [x['workflow_spec_id'] for x in workflows]
def sync_changed_files(remote,workflow_spec_id):
# make sure that spec is local before syncing files
def file_get(workflow_spec_id,filename):
"""
Helper function to take care of the special case where we
are looking for files that are marked is_reference
"""
if workflow_spec_id == 'REFERENCE_FILES':
currentfile = session.query(FileModel).filter(FileModel.is_reference == True,
FileModel.name == filename).first()
else:
currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
FileModel.name == filename).first()
return currentfile
specdict = WorkflowSyncService.get_remote_workflow_spec(remote,workflow_spec_id)
def create_or_update_local_spec(remote,workflow_spec_id):
specdict = WorkflowSyncService.get_remote_workflow_spec(remote, workflow_spec_id)
# if we are updating from a master spec, then we want to make sure it is the only
# master spec in our local system.
if specdict['is_master_spec']:
masterspecs = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.is_master_spec == True).all()
for masterspec in masterspecs:
masterspec.is_master_spec = False
session.add(masterspec)
localspec = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id == workflow_spec_id).first()
if localspec is None:
@ -120,7 +152,7 @@ def sync_changed_files(remote,workflow_spec_id):
localcategory = session.query(WorkflowSpecCategoryModel).filter(WorkflowSpecCategoryModel.name
== specdict['category']['name']).first()
if localcategory == None:
#category doesn't exist - lets make it
# category doesn't exist - lets make it
localcategory = WorkflowSpecCategoryModel()
localcategory.name = specdict['category']['name']
localcategory.display_name = specdict['category']['display_name']
@ -131,9 +163,45 @@ def sync_changed_files(remote,workflow_spec_id):
localspec.display_order = specdict['display_order']
localspec.display_name = specdict['display_name']
localspec.name = specdict['name']
localspec.is_master_spec = specdict['is_master_spec']
localspec.description = specdict['description']
session.add(localspec)
def update_or_create_current_file(remote,workflow_spec_id,updatefile):
currentfile = file_get(workflow_spec_id, updatefile['filename'])
if not currentfile:
currentfile = FileModel()
currentfile.name = updatefile['filename']
if workflow_spec_id == 'REFERENCE_FILES':
currentfile.workflow_spec_id = None
currentfile.is_reference = True
else:
currentfile.workflow_spec_id = workflow_spec_id
currentfile.date_created = updatefile['date_created']
currentfile.type = updatefile['type']
currentfile.primary = updatefile['primary']
currentfile.content_type = updatefile['content_type']
currentfile.primary_process_id = updatefile['primary_process_id']
session.add(currentfile)
content = WorkflowSyncService.get_remote_file_by_hash(remote, updatefile['md5_hash'])
FileService.update_file(currentfile, content, updatefile['type'])
def sync_changed_files(remote,workflow_spec_id):
"""
This grabs a list of all files for a workflow_spec that are different between systems,
and gets the remote copy of any file that has changed
We also have a special case for "REFERENCE_FILES" where there is not workflow_spec_id,
but all of the files are marked in the database as is_reference - and they need to be
handled slightly differently.
"""
# make sure that spec is local before syncing files
if workflow_spec_id != 'REFERENCE_FILES':
create_or_update_local_spec(remote,workflow_spec_id)
changedfiles = get_changed_files(remote,workflow_spec_id,as_df=True)
if len(changedfiles)==0:
return []
@ -144,8 +212,7 @@ def sync_changed_files(remote,workflow_spec_id):
deletefiles = deletefiles.reset_index().to_dict(orient='records')
for delfile in deletefiles:
currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
FileModel.name == delfile['filename']).first()
currentfile = file_get(workflow_spec_id,delfile['filename'])
# it is more appropriate to archive the file than delete
# due to the fact that we might have workflows that are using the
@ -154,21 +221,7 @@ def sync_changed_files(remote,workflow_spec_id):
session.add(currentfile)
for updatefile in updatefiles:
currentfile = session.query(FileModel).filter(FileModel.workflow_spec_id==workflow_spec_id,
FileModel.name == updatefile['filename']).first()
if not currentfile:
currentfile = FileModel()
currentfile.name = updatefile['filename']
currentfile.workflow_spec_id = workflow_spec_id
currentfile.date_created = updatefile['date_created']
currentfile.type = updatefile['type']
currentfile.primary = updatefile['primary']
currentfile.content_type = updatefile['content_type']
currentfile.primary_process_id = updatefile['primary_process_id']
session.add(currentfile)
content = WorkflowSyncService.get_remote_file_by_hash(remote,updatefile['md5_hash'])
FileService.update_file(currentfile,content,updatefile['type'])
update_or_create_current_file(remote,workflow_spec_id,updatefile)
session.commit()
return [x['filename'] for x in updatefiles]
@ -185,6 +238,13 @@ def get_changed_files(remote,workflow_spec_id,as_df=False):
local = get_workflow_spec_files_dataframe(workflow_spec_id).reset_index()
local['md5_hash'] = local['md5_hash'].astype('str')
remote_files['md5_hash'] = remote_files['md5_hash'].astype('str')
if len(local) == 0:
remote_files['new'] = True
remote_files['location'] = 'remote'
if as_df:
return remote_files
else:
return remote_files.reset_index().to_dict(orient='records')
different = remote_files.merge(local,
right_on=['filename','md5_hash'],
@ -259,8 +319,14 @@ def get_workflow_spec_files_dataframe(workflowid):
Return a list of all files for a workflow_spec along with last updated date and a
hash so we can determine file differences for a changed workflow on a box.
Return a dataframe
In the special case of "REFERENCE_FILES" we get all of the files that are
marked as is_reference
"""
x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id==workflowid)
if workflowid == 'REFERENCE_FILES':
x = session.query(FileDataModel).join(FileModel).filter(FileModel.is_reference == True)
else:
x = session.query(FileDataModel).join(FileModel).filter(FileModel.workflow_spec_id == workflowid)
# there might be a cleaner way of getting a data frome from some of the
# fields in the ORM - but this works OK
filelist = []
@ -306,7 +372,10 @@ def get_all_spec_state_dataframe():
'md5_hash':file.md5_hash,
'filename':file.file_model.name,
'date_created':file.date_created})
df = pd.DataFrame(filelist)
if len(filelist) == 0:
df = pd.DataFrame(columns=['file_model_id','workflow_spec_id','md5_hash','filename','date_created'])
else:
df = pd.DataFrame(filelist)
# If the file list is empty, return an empty data frame
if df.empty:

View File

@ -39,7 +39,7 @@ class WorkflowSyncService(object):
try:
response = requests.get(url,headers={'X-CR-API-KEY':app.config['API_TOKEN']})
except:
raise ApiError("workflow_sync_error",response.text)
raise ApiError("workflow_sync_error",url)
if response.ok and response.text:
if return_contents:
return response.content

View File

@ -134,6 +134,15 @@ class BaseTest(unittest.TestCase):
return dict(Authorization='Bearer ' + user_model.encode_auth_token().decode())
def delete_example_data(self, use_crc_data=False, use_rrt_data=False):
"""
delete everything that matters in the local database - this is used to
test ground zero copy of workflow specs.
"""
session.execute("delete from workflow; delete from file_data; delete from file; delete from workflow_spec;")
session.commit()
def load_example_data(self, use_crc_data=False, use_rrt_data=False):
"""use_crc_data will cause this to load the mammoth collection of documents
we built up developing crc, use_rrt_data will do the same for hte rrt project,

View File

@ -0,0 +1 @@
TEST - This is only a test - TEST

View File

@ -12,6 +12,27 @@ from crc.models.workflow import WorkflowSpecModel
from datetime import datetime
from crc.services.file_service import FileService
def get_random_fact_pos(othersys):
"""
Make sure we get the 'random_fact' workflow spec
no matter what order it is in
"""
rf2pos = 0
for pos in range(len(othersys)):
if othersys[pos]['workflow_spec_id'] == 'random_fact':
rf2pos = pos
return rf2pos
def get_random_fact_2_pos(othersys):
"""
Makes sure we get the random_fact2.bpmn file no matter what order it is in
"""
rf2pos = 0
for pos in range(len(othersys)):
if othersys[pos]['filename'] == 'random_fact2.bpmn':
rf2pos = pos
return rf2pos
class TestWorkflowSync(BaseTest):
@ -30,8 +51,9 @@ class TestWorkflowSync(BaseTest):
def test_remote_workflow_change(self, mock_get):
self.load_example_data()
othersys = get_all_spec_state()
othersys[1]['date_created'] = str(datetime.now())
othersys[1]['md5_hash'] = '12345'
rf2pos = get_random_fact_pos(othersys)
othersys[rf2pos]['date_created'] = str(datetime.now())
othersys[rf2pos]['md5_hash'] = '12345'
mock_get.return_value = othersys
response = get_changed_workflows('localhost:0000') #endpoint is not used due to mock
self.assertIsNotNone(response)
@ -80,28 +102,27 @@ class TestWorkflowSync(BaseTest):
self.assertIsNotNone(response)
self.assertEqual(response,[])
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
def test_file_differences(self, mock_get):
def test_file_differences_clean_slate(self, mock_get):
""" This test is basically for coverage"""
self.load_example_data()
othersys = get_workflow_spec_files('random_fact')
othersys[1]['date_created'] = str(datetime.now())
othersys[1]['md5_hash'] = '12345'
mock_get.return_value = othersys
self.delete_example_data()
response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock
self.assertIsNotNone(response)
self.assertEqual(len(response),1)
self.assertEqual(response[0]['filename'], 'random_fact2.bpmn')
self.assertEqual(len(response),2)
self.assertEqual(response[0]['location'], 'remote')
self.assertEqual(response[0]['new'], False)
self.assertEqual(response[0]['new'], True)
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
def test_file_differences(self, mock_get):
self.load_example_data()
othersys = get_workflow_spec_files('random_fact')
othersys[1]['date_created'] = str(datetime.now())
othersys[1]['md5_hash'] = '12345'
rf2pos = get_random_fact_2_pos(othersys)
othersys[rf2pos]['date_created'] = str(datetime.now())
othersys[rf2pos]['md5_hash'] = '12345'
mock_get.return_value = othersys
response = get_changed_files('localhost:0000','random_fact',as_df=False) #endpoint is not used due to mock
self.assertIsNotNone(response)
@ -113,19 +134,24 @@ class TestWorkflowSync(BaseTest):
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash')
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec')
def test_file_differences(self, workflow_mock, spec_files_mock, file_data_mock):
def test_workflow_differences(self, workflow_mock, spec_files_mock, file_data_mock):
self.load_example_data()
# make a remote workflow that is slightly different from local
remote_workflow = get_workflow_specification('random_fact')
self.assertEqual(remote_workflow['display_name'],'Random Fact')
remote_workflow['description'] = 'This Workflow came from Remote'
remote_workflow['display_name'] = 'Remote Workflow'
workflow_mock.return_value = remote_workflow
# change the remote file date and hash
othersys = get_workflow_spec_files('random_fact')
othersys[1]['date_created'] = str(datetime.now())
othersys[1]['md5_hash'] = '12345'
rf2pos = get_random_fact_2_pos(othersys)
othersys[rf2pos]['date_created'] = str(datetime.now())
othersys[rf2pos]['md5_hash'] = '12345'
spec_files_mock.return_value = othersys
# actually go get a different file
file_data_mock.return_value = self.workflow_sync_response('random_fact2.bpmn')
response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock
# now make sure that everything gets pulled over
self.assertIsNotNone(response)
self.assertEqual(len(response),1)
self.assertEqual(response[0], 'random_fact2.bpmn')
@ -137,6 +163,39 @@ class TestWorkflowSync(BaseTest):
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_file_by_hash')
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
def test_ref_file_differences(self, spec_files_mock, file_data_mock):
"""
Make sure we copy over a new reference file if it exists
"""
self.load_example_data()
# make a remote workflow that is slightly different from local
othersys = get_workflow_spec_files('REFERENCE_FILES')
newfile = {'file_model_id':9999,
'workflow_spec_id': None,
'filename':'test.txt',
'type':'txt',
'primary':False,
'content_type':'text/text',
'primary_process_id':None,
'date_created':str(datetime.now()),
'md5_hash':'12345'
}
othersys.append(newfile)
spec_files_mock.return_value = othersys
# actually go get a different file
file_data_mock.return_value = self.workflow_sync_response('test.txt')
response = sync_changed_files('localhost:0000','REFERENCE_FILES') # endpoint not used due to mock
# now make sure that everything gets pulled over
self.assertIsNotNone(response)
self.assertEqual(len(response),1)
self.assertEqual(response[0], 'test.txt')
ref_file = FileService.get_reference_file_data('test.txt')
self.assertEqual('24a2ab0d-1138-a80a-0b98-ed38894f5a04',str(ref_file.md5_hash))
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec_files')
@patch('crc.services.workflow_sync.WorkflowSyncService.get_remote_workflow_spec')
def test_file_deleted(self, workflow_mock, spec_files_mock):
@ -144,7 +203,8 @@ class TestWorkflowSync(BaseTest):
remote_workflow = get_workflow_specification('random_fact')
workflow_mock.return_value = remote_workflow
othersys = get_workflow_spec_files('random_fact')
del(othersys[1])
rf2pos = get_random_fact_2_pos(othersys)
del(othersys[rf2pos])
spec_files_mock.return_value = othersys
response = sync_changed_files('localhost:0000','random_fact') # endpoint not used due to mock
self.assertIsNotNone(response)