merging in dev, and fixing a failing test from dev.

2021-01-08 14:42:13 -05:00 · 2021-01-08 14:42:13 -05:00 · 26655b26d3
parent d274813ae5 9118517995
commit 26655b26d3
10 changed files with 173 additions and 39 deletions
--- a/crc/init.py
+++ b/crc/init.py
@ -99,6 +99,13 @@ def load_example_rrt_data():
    ExampleDataLoader.clean_db()
    ExampleDataLoader().load_rrt()
@app.cli.command()
 def load_reference_files():
    """Load example data into the database."""
    from example_data import ExampleDataLoader
    ExampleDataLoader().load_reference_documents()
@app.cli.command()
 def clear_db():
    """Load example data into the database."""
--- a/crc/api/workflow_sync.py
+++ b/crc/api/workflow_sync.py
@ -37,42 +37,55 @@ def get_changed_workflows(remote,as_df=False):
    # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
    local = get_all_spec_state_dataframe().reset_index()
    if local.empty:
        # return the list as a dict, let swagger convert it to json
        remote_workflows['new'] = True
        if as_df:
            return remote_workflows
        else:
            return remote_workflows.reset_index().to_dict(orient='records')
    # merge these on workflow spec id and hash - this will
    # make two different date columns date_x and date_y
-    different  = remote_workflows.merge(local,
+    different = remote_workflows.merge(local,
                              right_on=['workflow_spec_id','md5_hash'],
                              left_on=['workflow_spec_id','md5_hash'],
                              how = 'outer' ,
                              indicator=True).loc[lambda x : x['_merge']!='both']
    if len(different)==0:
        return []
    # each line has a tag on it - if was in the left or the right,
    # label it so we know if that was on the remote or local machine
    different.loc[different['_merge']=='left_only','location'] = 'remote'
    different.loc[different['_merge']=='right_only','location'] = 'local'
-    # this takes the different date_created_x and date-created_y columns and
+    # If there are no differences, then we can just return.
-    # combines them back into one date_created column
+    if not different.empty:
    index = different['date_created_x'].isnull()
    different.loc[index,'date_created_x'] = different[index]['date_created_y']
    different = different[['workflow_spec_id','date_created_x','location']].copy()
    different.columns=['workflow_spec_id','date_created','location']
-    # our different list will have multiple entries for a workflow if there is a version on either side
+        # each line has a tag on it - if was in the left or the right,
-    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
+        # label it so we know if that was on the remote or local machine
-    changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
+        different.loc[different['_merge']=='left_only','location'] = 'remote'
        different.loc[different['_merge']=='right_only','location'] = 'local'
-    # get an exclusive or list of workflow ids - that is we want lists of files that are
+        # this takes the different date_created_x and date-created_y columns and
-    # on one machine or the other, but not both
+        # combines them back into one date_created column
-    remote_spec_ids = remote_workflows[['workflow_spec_id']]
+        index = different['date_created_x'].isnull()
-    local_spec_ids = local[['workflow_spec_id']]
+        different.loc[index,'date_created_x'] = different[index]['date_created_y']
-    left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
+        different = different[['workflow_spec_id','date_created_x','location']].copy()
-    right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
+        different.columns=['workflow_spec_id','date_created','location']
-    # flag files as new that are only on the remote box and remove the files that are only on the local box
+        # our different list will have multiple entries for a workflow if there is a version on either side
-    changedfiles['new'] = False
+        # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
-    changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
+        changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
-    output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
+
        # get an exclusive or list of workflow ids - that is we want lists of files that are
        # on one machine or the other, but not both
        remote_spec_ids = remote_workflows[['workflow_spec_id']]
        local_spec_ids = local[['workflow_spec_id']]
        left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
        right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
        # flag files as new that are only on the remote box and remove the files that are only on the local box
        changedfiles['new'] = False
        changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
        output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
    else:
        output = different
    # return the list as a dict, let swagger convert it to json
    if as_df:
@ -295,6 +308,10 @@ def get_all_spec_state_dataframe():
                         'date_created':file.date_created})
    df = pd.DataFrame(filelist)
    # If the file list is empty, return an empty data frame
    if df.empty:
        return df
    # get a distinct list of file_model_id's with the most recent file_data retained
    df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy()
--- a/crc/scripts/is_file_uploaded.py
+++ b/crc/scripts/is_file_uploaded.py
@ -0,0 +1,23 @@
 from crc.scripts.script import Script
 from crc.services.file_service import FileService
 class IsFileUploaded(Script):
    def get_description(self):
        return """Test whether a file is uploaded for a study. 
                  Pass in the IRB Doc Code for the file."""
    def do_task_validate_only(self, task, study_id, workflow_id, *args, **kwargs):
        doc_code = args[0]
        files = FileService.get_files_for_study(study_id)
    def do_task(self, task, study_id, workflow_id, *args, **kwargs):
        files = FileService.get_files_for_study(study_id)
        if len(files) > 0:
            doc_code = args[0]
            for file in files:
                if doc_code == file.irb_doc_code:
                    return True
        return False
--- a/crc/services/file_service.py
+++ b/crc/services/file_service.py
@ -35,14 +35,22 @@ class FileService(object):
    def add_workflow_spec_file(workflow_spec: WorkflowSpecModel,
                               name, content_type, binary_data, primary=False, is_status=False):
        """Create a new file and associate it with a workflow spec."""
-        file_model = FileModel(
+        # Raise ApiError if the file already exists
-            workflow_spec_id=workflow_spec.id,
+        if session.query(FileModel)\
-            name=name,
+            .filter(FileModel.workflow_spec_id == workflow_spec.id)\
-            primary=primary,
+            .filter(FileModel.name == name).first():
            is_status=is_status,
        )
-        return FileService.update_file(file_model, binary_data, content_type)
+            raise ApiError(code="Duplicate File",
                           message='If you want to replace the file, use the update mechanism.')
        else:
            file_model = FileModel(
                workflow_spec_id=workflow_spec.id,
                name=name,
                primary=primary,
                is_status=is_status,
            )
            return FileService.update_file(file_model, binary_data, content_type)
    @staticmethod
    def is_allowed_document(code):
--- a/crc/services/workflow_service.py
+++ b/crc/services/workflow_service.py
@ -130,11 +130,16 @@ class WorkflowService(object):
            # Assure we have a field type
            if field.type is None:
                raise ApiError(code='invalid_form_data',
-                               message='Field type is None. A field type must be provided.')
+                                message = f'Type is missing for field "{field.id}". A field type must be provided.',
-            # Assure we have valid ids
+                                task_id = task.id,
                                task_name = task.get_name())
                # Assure we have valid ids
            if not WorkflowService.check_field_id(field.id):
                raise ApiError(code='invalid_form_id',
-                               message="A field ID must begin with a letter, and can only contain letters, numbers, and '_'")
+                               message=f'Invalid Field name: "{field.id}".  A field ID must begin with a letter, '
                                       f'and can only contain letters, numbers, and "_"',
                               task_id = task.id,
                               task_name = task.get_name())
            # Assure field has valid properties
            WorkflowService.check_field_properties(field, task)
--- a/tests/test_is_file_uploaded_script.py
+++ b/tests/test_is_file_uploaded_script.py
@ -0,0 +1,41 @@
 from tests.base_test import BaseTest
 from crc.services.file_service import FileService
 from crc.scripts.is_file_uploaded import IsFileUploaded
 class TestIsFileUploaded(BaseTest):
    def test_file_uploaded_pass(self):
        self.load_example_data()
        irb_code_1 = 'UVACompl_PRCAppr'
        irb_code_2 = 'Study_App_Doc'
        workflow = self.create_workflow('empty_workflow')
        first_task = self.get_workflow_api(workflow).next_task
        study_id = workflow.study_id
        # We shouldn't have any files yet.
        files = FileService.get_files_for_study(study_id)
        self.assertEqual(0, len(files))
        self.assertEqual(False, IsFileUploaded.do_task(IsFileUploaded, first_task, study_id, workflow.id, irb_code_1))
        # Add a file
        FileService.add_workflow_file(workflow_id=workflow.id,
                                      name="something.png", content_type="text",
                                      binary_data=b'1234', irb_doc_code=irb_code_1)
        # Make sure we find the file
        files = FileService.get_files_for_study(study_id)
        self.assertEqual(1, len(files))
        self.assertEqual(True, IsFileUploaded.do_task(IsFileUploaded, first_task, study_id, workflow.id, irb_code_1))
        # Add second file
        FileService.add_workflow_file(workflow_id=workflow.id,
                                      name="anything.png", content_type="text",
                                      binary_data=b'5678', irb_doc_code=irb_code_2)
        # Make sure we find both files.
        files = FileService.get_files_for_study(study_id)
        self.assertEqual(2, len(files))
        self.assertEqual(True, IsFileUploaded.do_task(IsFileUploaded, first_task, study_id, workflow.id, irb_code_1))
        self.assertEqual(True, IsFileUploaded.do_task(IsFileUploaded, first_task, study_id, workflow.id, irb_code_2))
--- a/tests/test_workflow_sync.py
+++ b/tests/test_workflow_sync.py
@ -1,7 +1,7 @@
 from unittest.mock import patch
 from tests.base_test import BaseTest
 from crc import db
 from tests.base_test import BaseTest
 from crc.api.workflow_sync import get_all_spec_state, \
                                  get_changed_workflows, \
                                  get_workflow_spec_files, \
--- a/tests/workflow/test_duplicate_workflow_spec_file.py
+++ b/tests/workflow/test_duplicate_workflow_spec_file.py
@ -0,0 +1,32 @@
 from tests.base_test import BaseTest
 from crc import session
 from crc.api.common import ApiError
 from crc.models.workflow import WorkflowSpecModel
 from crc.services.file_service import FileService
 class TestDuplicateWorkflowSpecFile(BaseTest):
    def test_duplicate_workflow_spec_file(self):
        # We want this to fail.
        # Users should not be able to upload a file that already exists.
        self.load_example_data()
        spec = session.query(WorkflowSpecModel).first()
        # Add a file
        file_model = FileService.add_workflow_spec_file(spec,
                                                        name="something.png",
                                                        content_type="text",
                                                        binary_data=b'1234')
        self.assertEqual(file_model.name, 'something.png')
        self.assertEqual(file_model.content_type, 'text')
        # Try to add it again
        try:
            FileService.add_workflow_spec_file(spec,
                                               name="something.png",
                                               content_type="text",
                                               binary_data=b'5678')
        except ApiError as ae:
            self.assertEqual(ae.message, 'If you want to replace the file, use the update mechanism.')
--- a/tests/workflow/test_workflow_form_field_name.py
+++ b/tests/workflow/test_workflow_form_field_name.py
@ -10,4 +10,5 @@ class TestFormFieldName(BaseTest):
        json_data = json.loads(rv.get_data(as_text=True))
        self.assertEqual(json_data[0]['message'],
-                         "When populating all fields ... A field ID must begin with a letter, and can only contain letters, numbers, and '_'")
+                         'When populating all fields ... Invalid Field name: "user-title".  A field ID must begin '
                         'with a letter, and can only contain letters, numbers, and "_"')
--- a/tests/workflow/test_workflow_form_field_type.py
+++ b/tests/workflow/test_workflow_form_field_type.py
@ -10,5 +10,5 @@ class TestFormFieldType(BaseTest):
        json_data = json.loads(rv.get_data(as_text=True))
        self.assertEqual(json_data[0]['message'],
-                         'When populating all fields ... Field type is None. A field type must be provided.')
+                         'When populating all fields ... Type is missing for field "name". A field type must be provided.')
        # print('TestFormFieldType: Good Form')