From b72ecb83759debc500121cdb369b34d7ef8bf2c3 Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 17 Feb 2022 11:59:48 -0500 Subject: [PATCH] Another re-work to fix 619 - and to assure that we aren't rebuilding the lookup tables too frequently. --- crc/api/workflow.py | 8 ++++-- crc/models/file.py | 2 +- crc/services/file_system_service.py | 4 +++ crc/services/lookup_service.py | 20 +++++++------- crc/services/reference_file_service.py | 6 ++++- crc/services/spec_file_service.py | 5 ++++ migrations/versions/3c56c894ff5c_.py | 30 +++++++++++++++++++++ tests/study/test_study_details_documents.py | 1 + tests/test_lookup_service.py | 8 +++--- 9 files changed, 66 insertions(+), 18 deletions(-) create mode 100644 migrations/versions/3c56c894ff5c_.py diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 8eafc8b8..81a4eba6 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -214,8 +214,12 @@ def restart_workflow(workflow_id, clear_data=False, delete_files=False): """Restart a workflow with the latest spec. Clear data allows user to restart the workflow without previous data.""" workflow_model: WorkflowModel = session.query(WorkflowModel).filter_by(id=workflow_id).first() - WorkflowProcessor.reset(workflow_model, clear_data=clear_data, delete_files=delete_files) - return get_workflow(workflow_model.id) + processor = WorkflowProcessor.reset(workflow_model, clear_data=clear_data, delete_files=delete_files) + processor.do_engine_steps() + processor.save() + WorkflowService.update_task_assignments(processor) + workflow_api_model = WorkflowService.processor_to_workflow_api(processor) + return WorkflowApiSchema().dump(workflow_api_model) def get_task_events(action = None, workflow = None, study = None): diff --git a/crc/models/file.py b/crc/models/file.py index 4a734368..9c3e5d88 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -182,8 +182,8 @@ class LookupFileModel(db.Model): task_spec_id = db.Column(db.String) field_id = db.Column(db.String) file_name = db.Column(db.String) + file_timestamp = db.Column(db.FLOAT) #The file systems time stamp, to check for changes to the file. is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup. - last_updated = db.Column(db.DateTime(timezone=True)) dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan") diff --git a/crc/services/file_system_service.py b/crc/services/file_system_service.py index d965d267..2af58dfb 100644 --- a/crc/services/file_system_service.py +++ b/crc/services/file_system_service.py @@ -84,6 +84,10 @@ class FileSystemService(object): 'The file you provided does not have an accepted extension:' + file_extension, status_code=404) + @staticmethod + def _timestamp(file_path: str): + return os.path.getmtime(file_path) + @staticmethod def _last_modified(file_path: str): # Returns the last modified date of the given file. diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index d34fd8cf..c28ed611 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -52,15 +52,16 @@ class LookupService(object): @staticmethod def get_lookup_model_for_reference(file_name, value_column, label_column): + timestamp = ReferenceFileService().timestamp(file_name) lookup_model = db.session.query(LookupFileModel).\ filter(LookupFileModel.file_name == file_name). \ filter(LookupFileModel.workflow_spec_id == None).\ + filter(LookupFileModel.file_timestamp == timestamp).\ first() # use "==" not "is none" which does NOT work, and makes this constantly expensive. if not lookup_model: logging.warning("!!!! Making a very expensive call to update the lookup model.") file_data = ReferenceFileService().get_data(file_name) - file_date = ReferenceFileService().last_modified(file_name) - lookup_model = LookupService.build_lookup_table(file_name, file_data, file_date, value_column, label_column) + lookup_model = LookupService.build_lookup_table(file_name, file_data, timestamp, value_column, label_column) return lookup_model @staticmethod @@ -77,12 +78,12 @@ class LookupService(object): if lookup_model: if lookup_model.is_ldap: # LDAP is always current is_current = True - elif lookup_model.file_name is not None and lookup_model.last_updated is not None: + elif lookup_model.file_name is not None and lookup_model.file_timestamp is not None: # In some legacy cases, the lookup model might exist, but not have a file name, in which case we need # to rebuild. workflow_spec = WorkflowSpecService().get_spec(workflow.workflow_spec_id) - current_date = SpecFileService.last_modified(workflow_spec, lookup_model.file_name) - is_current = current_date == lookup_model.last_updated + timestamp = SpecFileService.timestamp(workflow_spec, lookup_model.file_name) + is_current = timestamp == lookup_model.file_timestamp if not is_current: # Very very very expensive, but we don't know need this till we do. @@ -147,9 +148,9 @@ class LookupService(object): file = latest_files[0] file_data = SpecFileService().get_data(workflow_spec, file_name) - file_date = SpecFileService.last_modified(workflow_spec, file_name) + timestamp = SpecFileService.timestamp(workflow_spec, file_name) - lookup_model = LookupService.build_lookup_table(file_name, file_data, file_date, value_column, label_column, + lookup_model = LookupService.build_lookup_table(file_name, file_data, timestamp, value_column, label_column, workflow_model.workflow_spec_id, task_spec_id, field_id) # Use the results of an LDAP request to populate enum field options @@ -168,7 +169,7 @@ class LookupService(object): return lookup_model @staticmethod - def build_lookup_table(file_name, file_data, file_date, value_column, label_column, + def build_lookup_table(file_name, file_data, timestamp, value_column, label_column, workflow_spec_id=None, task_spec_id=None, field_id=None): """ In some cases the lookup table can be very large. This method will add all values to the database in a way that can be searched and returned via an api call - rather than sending the full set of @@ -204,10 +205,9 @@ class LookupService(object): field_id=field_id, task_spec_id=task_spec_id, file_name=file_name, - last_updated=file_date, + file_timestamp=timestamp, is_ldap=False) - db.session.add(lookup_model) for index, row in df.iterrows(): lookup_data = LookupDataModel(lookup_file_model=lookup_model, diff --git a/crc/services/reference_file_service.py b/crc/services/reference_file_service.py index 0f364410..c46453df 100644 --- a/crc/services/reference_file_service.py +++ b/crc/services/reference_file_service.py @@ -76,4 +76,8 @@ class ReferenceFileService(FileSystemService): @staticmethod def last_modified(file_name): - return FileSystemService._last_modified(ReferenceFileService.file_path(file_name)) \ No newline at end of file + return FileSystemService._last_modified(ReferenceFileService.file_path(file_name)) + + @staticmethod + def timestamp(file_name): + return FileSystemService._timestamp(ReferenceFileService.file_path(file_name)) \ No newline at end of file diff --git a/crc/services/spec_file_service.py b/crc/services/spec_file_service.py index bcbf3e28..74b4615d 100644 --- a/crc/services/spec_file_service.py +++ b/crc/services/spec_file_service.py @@ -76,6 +76,11 @@ class SpecFileService(FileSystemService): path = SpecFileService.file_path(spec, file_name) return FileSystemService._last_modified(path) + @staticmethod + def timestamp(spec: WorkflowSpecInfo, file_name: str): + path = SpecFileService.file_path(spec, file_name) + return FileSystemService._timestamp(path) + @staticmethod def delete_file(spec, file_name): # Fixme: Remember to remove the lookup files when the spec file is removed. diff --git a/migrations/versions/3c56c894ff5c_.py b/migrations/versions/3c56c894ff5c_.py new file mode 100644 index 00000000..36968c3e --- /dev/null +++ b/migrations/versions/3c56c894ff5c_.py @@ -0,0 +1,30 @@ +"""empty message + +Revision ID: 3c56c894ff5c +Revises: 29bad12c9945 +Create Date: 2022-02-17 11:52:52.335700 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '3c56c894ff5c' +down_revision = '29bad12c9945' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('lookup_file', sa.Column('file_timestamp', sa.FLOAT(), nullable=True)) + op.drop_column('lookup_file', 'last_updated') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('lookup_file', sa.Column('last_updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True)) + op.drop_column('lookup_file', 'file_timestamp') + # ### end Alembic commands ### diff --git a/tests/study/test_study_details_documents.py b/tests/study/test_study_details_documents.py index 133b0980..6e9c8514 100644 --- a/tests/study/test_study_details_documents.py +++ b/tests/study/test_study_details_documents.py @@ -135,6 +135,7 @@ class TestStudyDetailsDocumentsScript(BaseTest): @patch('crc.services.protocol_builder.requests.get') def test_file_data_set_invalid_irb_code_fails(self, mock_get): + self.create_reference_document() mock_get.return_value.ok = True mock_get.return_value.text = self.protocol_builder_response('required_docs.json') self.add_studies() diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py index 2a872058..30ece256 100644 --- a/tests/test_lookup_service.py +++ b/tests/test_lookup_service.py @@ -187,15 +187,15 @@ class TestLookupService(BaseTest): # Using an old xls file should raise an error file_data_xls = SpecFileService().get_data(spec, 'sponsors.xls') - file_date = SpecFileService().last_modified(spec, 'sponsors.xls') + timestamp = SpecFileService().timestamp(spec, 'sponsors.xls') with self.assertRaises(ApiError) as ae: - LookupService.build_lookup_table('sponsors.xls', file_data_xls, file_date, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME') + LookupService.build_lookup_table('sponsors.xls', file_data_xls, timestamp, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME') self.assertIn('Error opening excel file', ae.exception.args[0]) # Using an xlsx file should work file_data_xlsx = SpecFileService().get_data(spec, 'sponsors.xlsx') - file_date = SpecFileService().last_modified(spec, 'sponsors.xlsx') - lookup_model = LookupService.build_lookup_table('sponsors.xlsx', file_data_xlsx, file_date, + timestamp = SpecFileService().timestamp(spec, 'sponsors.xlsx') + lookup_model = LookupService.build_lookup_table('sponsors.xlsx', file_data_xlsx, timestamp, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME') self.assertEqual(28, len(lookup_model.dependencies)) self.assertIn('CUSTOMER_NAME', lookup_model.dependencies[0].data.keys())