merging in dev, and fixing a failing test from dev.

2025-02-20 11:48:16 +00:00 · 2021-01-08 14:42:13 -05:00 · 2021-01-08 14:42:13 -05:00 · 26655b26d3
commit 26655b26d3
parent d274813ae5 9118517995
10 changed files with 173 additions and 39 deletions
--- a/crc/init.py
+++ b/crc/init.py
@ -99,6 +99,13 @@ def load_example_rrt_data():
    ExampleDataLoader.clean_db()
    ExampleDataLoader().load_rrt()

+
+@app.cli.command()
+def load_reference_files():
+    """Load example data into the database."""
+    from example_data import ExampleDataLoader
+    ExampleDataLoader().load_reference_documents()
+
@app.cli.command()
 def clear_db():
    """Load example data into the database."""
--- a/crc/api/workflow_sync.py
+++ b/crc/api/workflow_sync.py
@ -37,42 +37,55 @@ def get_changed_workflows(remote,as_df=False):
    # get the local thumbprints & make sure that 'workflow_spec_id' is a column, not an index
    local = get_all_spec_state_dataframe().reset_index()

+    if local.empty:
+        # return the list as a dict, let swagger convert it to json
+        remote_workflows['new'] = True
+        if as_df:
+            return remote_workflows
+        else:
+            return remote_workflows.reset_index().to_dict(orient='records')
+
    # merge these on workflow spec id and hash - this will
    # make two different date columns date_x and date_y
-    different  = remote_workflows.merge(local,
+    different = remote_workflows.merge(local,
                              right_on=['workflow_spec_id','md5_hash'],
                              left_on=['workflow_spec_id','md5_hash'],
                              how = 'outer' ,
                              indicator=True).loc[lambda x : x['_merge']!='both']
-    if len(different)==0:
-        return []
-    # each line has a tag on it - if was in the left or the right,
-    # label it so we know if that was on the remote or local machine
-    different.loc[different['_merge']=='left_only','location'] = 'remote'
-    different.loc[different['_merge']=='right_only','location'] = 'local'

-    # this takes the different date_created_x and date-created_y columns and
-    # combines them back into one date_created column
-    index = different['date_created_x'].isnull()
-    different.loc[index,'date_created_x'] = different[index]['date_created_y']
-    different = different[['workflow_spec_id','date_created_x','location']].copy()
-    different.columns=['workflow_spec_id','date_created','location']
+    # If there are no differences, then we can just return.
+    if not different.empty:

-    # our different list will have multiple entries for a workflow if there is a version on either side
-    # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
-    changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
+        # each line has a tag on it - if was in the left or the right,
+        # label it so we know if that was on the remote or local machine
+        different.loc[different['_merge']=='left_only','location'] = 'remote'
+        different.loc[different['_merge']=='right_only','location'] = 'local'

-    # get an exclusive or list of workflow ids - that is we want lists of files that are
-    # on one machine or the other, but not both
-    remote_spec_ids = remote_workflows[['workflow_spec_id']]
-    local_spec_ids = local[['workflow_spec_id']]
-    left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
-    right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
+        # this takes the different date_created_x and date-created_y columns and
+        # combines them back into one date_created column
+        index = different['date_created_x'].isnull()
+        different.loc[index,'date_created_x'] = different[index]['date_created_y']
+        different = different[['workflow_spec_id','date_created_x','location']].copy()
+        different.columns=['workflow_spec_id','date_created','location']

-    # flag files as new that are only on the remote box and remove the files that are only on the local box
-    changedfiles['new'] = False
-    changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
-    output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
+        # our different list will have multiple entries for a workflow if there is a version on either side
+        # we want to grab the most recent one, so we sort and grab the most recent one for each workflow
+        changedfiles = different.sort_values('date_created',ascending=False).groupby('workflow_spec_id').first()
+
+        # get an exclusive or list of workflow ids - that is we want lists of files that are
+        # on one machine or the other, but not both
+        remote_spec_ids = remote_workflows[['workflow_spec_id']]
+        local_spec_ids = local[['workflow_spec_id']]
+        left = remote_spec_ids[~remote_spec_ids['workflow_spec_id'].isin(local_spec_ids['workflow_spec_id'])]
+        right = local_spec_ids[~local_spec_ids['workflow_spec_id'].isin(remote_spec_ids['workflow_spec_id'])]
+
+        # flag files as new that are only on the remote box and remove the files that are only on the local box
+        changedfiles['new'] = False
+        changedfiles.loc[changedfiles.index.isin(left['workflow_spec_id']), 'new'] = True
+        output = changedfiles[~changedfiles.index.isin(right['workflow_spec_id'])]
+
+    else:
+        output = different

    # return the list as a dict, let swagger convert it to json
    if as_df:
@ -295,6 +308,10 @@ def get_all_spec_state_dataframe():
                         'date_created':file.date_created})
    df = pd.DataFrame(filelist)

+    # If the file list is empty, return an empty data frame
+    if df.empty:
+        return df
+
    # get a distinct list of file_model_id's with the most recent file_data retained
    df = df.sort_values('date_created').drop_duplicates(['file_model_id'],keep='last').copy()

--- a/crc/scripts/is_file_uploaded.py
+++ b/crc/scripts/is_file_uploaded.py
@ -0,0 +1,23 @@
+from crc.scripts.script import Script
+from crc.services.file_service import FileService
+
+
+class IsFileUploaded(Script):
+
+    def get_description(self):
+        return """Test whether a file is uploaded for a study. 
+                  Pass in the IRB Doc Code for the file."""
+
+    def do_task_validate_only(self, task, study_id, workflow_id, *args, **kwargs):
+        doc_code = args[0]
+        files = FileService.get_files_for_study(study_id)
+
+    def do_task(self, task, study_id, workflow_id, *args, **kwargs):
+
+        files = FileService.get_files_for_study(study_id)
+        if len(files) > 0:
+            doc_code = args[0]
+            for file in files:
+                if doc_code == file.irb_doc_code:
+                    return True
+        return False
--- a/crc/services/file_service.py
+++ b/crc/services/file_service.py
@ -35,14 +35,22 @@ class FileService(object):
    def add_workflow_spec_file(workflow_spec: WorkflowSpecModel,
                               name, content_type, binary_data, primary=False, is_status=False):
        """Create a new file and associate it with a workflow spec."""
-        file_model = FileModel(
-            workflow_spec_id=workflow_spec.id,
-            name=name,
-            primary=primary,
-            is_status=is_status,
-        )
+        # Raise ApiError if the file already exists
+        if session.query(FileModel)\
+            .filter(FileModel.workflow_spec_id == workflow_spec.id)\
+            .filter(FileModel.name == name).first():

-        return FileService.update_file(file_model, binary_data, content_type)
+            raise ApiError(code="Duplicate File",
+                           message='If you want to replace the file, use the update mechanism.')
+        else:
+            file_model = FileModel(
+                workflow_spec_id=workflow_spec.id,
+                name=name,
+                primary=primary,
+                is_status=is_status,
+            )
+
+            return FileService.update_file(file_model, binary_data, content_type)

    @staticmethod
    def is_allowed_document(code):
--- a/crc/services/workflow_service.py
+++ b/crc/services/workflow_service.py
@ -130,11 +130,16 @@ class WorkflowService(object):
            # Assure we have a field type
            if field.type is None:
                raise ApiError(code='invalid_form_data',
-                               message='Field type is None. A field type must be provided.')
-            # Assure we have valid ids
+                                message = f'Type is missing for field "{field.id}". A field type must be provided.',
+                                task_id = task.id,
+                                task_name = task.get_name())
+                # Assure we have valid ids
            if not WorkflowService.check_field_id(field.id):
                raise ApiError(code='invalid_form_id',
-                               message="A field ID must begin with a letter, and can only contain letters, numbers, and '_'")
+                               message=f'Invalid Field name: "{field.id}".  A field ID must begin with a letter, '
+                                       f'and can only contain letters, numbers, and "_"',
+                               task_id = task.id,
+                               task_name = task.get_name())
            # Assure field has valid properties
            WorkflowService.check_field_properties(field, task)

--- a/tests/test_is_file_uploaded_script.py
+++ b/tests/test_is_file_uploaded_script.py
@ -0,0 +1,41 @@
+from tests.base_test import BaseTest
+from crc.services.file_service import FileService
+from crc.scripts.is_file_uploaded import IsFileUploaded
+
+
+class TestIsFileUploaded(BaseTest):
+
+    def test_file_uploaded_pass(self):
+        self.load_example_data()
+        irb_code_1 = 'UVACompl_PRCAppr'
+        irb_code_2 = 'Study_App_Doc'
+
+        workflow = self.create_workflow('empty_workflow')
+        first_task = self.get_workflow_api(workflow).next_task
+        study_id = workflow.study_id
+
+        # We shouldn't have any files yet.
+        files = FileService.get_files_for_study(study_id)
+        self.assertEqual(0, len(files))
+        self.assertEqual(False, IsFileUploaded.do_task(IsFileUploaded, first_task, study_id, workflow.id, irb_code_1))
+
+        # Add a file
+        FileService.add_workflow_file(workflow_id=workflow.id,
+                                      name="something.png", content_type="text",
+                                      binary_data=b'1234', irb_doc_code=irb_code_1)
+
+        # Make sure we find the file
+        files = FileService.get_files_for_study(study_id)
+        self.assertEqual(1, len(files))
+        self.assertEqual(True, IsFileUploaded.do_task(IsFileUploaded, first_task, study_id, workflow.id, irb_code_1))
+
+        # Add second file
+        FileService.add_workflow_file(workflow_id=workflow.id,
+                                      name="anything.png", content_type="text",
+                                      binary_data=b'5678', irb_doc_code=irb_code_2)
+
+        # Make sure we find both files.
+        files = FileService.get_files_for_study(study_id)
+        self.assertEqual(2, len(files))
+        self.assertEqual(True, IsFileUploaded.do_task(IsFileUploaded, first_task, study_id, workflow.id, irb_code_1))
+        self.assertEqual(True, IsFileUploaded.do_task(IsFileUploaded, first_task, study_id, workflow.id, irb_code_2))
--- a/tests/test_workflow_sync.py
+++ b/tests/test_workflow_sync.py
@ -1,7 +1,7 @@
 from unittest.mock import patch
+from tests.base_test import BaseTest

 from crc import db
-from tests.base_test import BaseTest
 from crc.api.workflow_sync import get_all_spec_state, \
                                  get_changed_workflows, \
                                  get_workflow_spec_files, \
--- a/tests/workflow/test_duplicate_workflow_spec_file.py
+++ b/tests/workflow/test_duplicate_workflow_spec_file.py
@ -0,0 +1,32 @@
+from tests.base_test import BaseTest
+from crc import session
+from crc.api.common import ApiError
+from crc.models.workflow import WorkflowSpecModel
+from crc.services.file_service import FileService
+
+
+class TestDuplicateWorkflowSpecFile(BaseTest):
+
+    def test_duplicate_workflow_spec_file(self):
+        # We want this to fail.
+        # Users should not be able to upload a file that already exists.
+
+        self.load_example_data()
+        spec = session.query(WorkflowSpecModel).first()
+
+        # Add a file
+        file_model = FileService.add_workflow_spec_file(spec,
+                                                        name="something.png",
+                                                        content_type="text",
+                                                        binary_data=b'1234')
+        self.assertEqual(file_model.name, 'something.png')
+        self.assertEqual(file_model.content_type, 'text')
+
+        # Try to add it again
+        try:
+            FileService.add_workflow_spec_file(spec,
+                                               name="something.png",
+                                               content_type="text",
+                                               binary_data=b'5678')
+        except ApiError as ae:
+            self.assertEqual(ae.message, 'If you want to replace the file, use the update mechanism.')
--- a/tests/workflow/test_workflow_form_field_name.py
+++ b/tests/workflow/test_workflow_form_field_name.py
@ -10,4 +10,5 @@ class TestFormFieldName(BaseTest):

        json_data = json.loads(rv.get_data(as_text=True))
        self.assertEqual(json_data[0]['message'],
-                         "When populating all fields ... A field ID must begin with a letter, and can only contain letters, numbers, and '_'")
+                         'When populating all fields ... Invalid Field name: "user-title".  A field ID must begin '
+                         'with a letter, and can only contain letters, numbers, and "_"')
--- a/tests/workflow/test_workflow_form_field_type.py
+++ b/tests/workflow/test_workflow_form_field_type.py
@ -10,5 +10,5 @@ class TestFormFieldType(BaseTest):

        json_data = json.loads(rv.get_data(as_text=True))
        self.assertEqual(json_data[0]['message'],
-                         'When populating all fields ... Field type is None. A field type must be provided.')
+                         'When populating all fields ... Type is missing for field "name". A field type must be provided.')
        # print('TestFormFieldType: Good Form')