Another re-work to fix 619 - and to assure that we aren't rebuilding the lookup tables too frequently.

This commit is contained in:
Dan 2022-02-17 11:59:48 -05:00
parent f2b6008e5f
commit b72ecb8375
9 changed files with 66 additions and 18 deletions

View File

@ -214,8 +214,12 @@ def restart_workflow(workflow_id, clear_data=False, delete_files=False):
"""Restart a workflow with the latest spec. """Restart a workflow with the latest spec.
Clear data allows user to restart the workflow without previous data.""" Clear data allows user to restart the workflow without previous data."""
workflow_model: WorkflowModel = session.query(WorkflowModel).filter_by(id=workflow_id).first() workflow_model: WorkflowModel = session.query(WorkflowModel).filter_by(id=workflow_id).first()
WorkflowProcessor.reset(workflow_model, clear_data=clear_data, delete_files=delete_files) processor = WorkflowProcessor.reset(workflow_model, clear_data=clear_data, delete_files=delete_files)
return get_workflow(workflow_model.id) processor.do_engine_steps()
processor.save()
WorkflowService.update_task_assignments(processor)
workflow_api_model = WorkflowService.processor_to_workflow_api(processor)
return WorkflowApiSchema().dump(workflow_api_model)
def get_task_events(action = None, workflow = None, study = None): def get_task_events(action = None, workflow = None, study = None):

View File

@ -182,8 +182,8 @@ class LookupFileModel(db.Model):
task_spec_id = db.Column(db.String) task_spec_id = db.Column(db.String)
field_id = db.Column(db.String) field_id = db.Column(db.String)
file_name = db.Column(db.String) file_name = db.Column(db.String)
file_timestamp = db.Column(db.FLOAT) #The file systems time stamp, to check for changes to the file.
is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup. is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup.
last_updated = db.Column(db.DateTime(timezone=True))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model",
cascade="all, delete, delete-orphan") cascade="all, delete, delete-orphan")

View File

@ -84,6 +84,10 @@ class FileSystemService(object):
'The file you provided does not have an accepted extension:' + 'The file you provided does not have an accepted extension:' +
file_extension, status_code=404) file_extension, status_code=404)
@staticmethod
def _timestamp(file_path: str):
return os.path.getmtime(file_path)
@staticmethod @staticmethod
def _last_modified(file_path: str): def _last_modified(file_path: str):
# Returns the last modified date of the given file. # Returns the last modified date of the given file.

View File

@ -52,15 +52,16 @@ class LookupService(object):
@staticmethod @staticmethod
def get_lookup_model_for_reference(file_name, value_column, label_column): def get_lookup_model_for_reference(file_name, value_column, label_column):
timestamp = ReferenceFileService().timestamp(file_name)
lookup_model = db.session.query(LookupFileModel).\ lookup_model = db.session.query(LookupFileModel).\
filter(LookupFileModel.file_name == file_name). \ filter(LookupFileModel.file_name == file_name). \
filter(LookupFileModel.workflow_spec_id == None).\ filter(LookupFileModel.workflow_spec_id == None).\
filter(LookupFileModel.file_timestamp == timestamp).\
first() # use "==" not "is none" which does NOT work, and makes this constantly expensive. first() # use "==" not "is none" which does NOT work, and makes this constantly expensive.
if not lookup_model: if not lookup_model:
logging.warning("!!!! Making a very expensive call to update the lookup model.") logging.warning("!!!! Making a very expensive call to update the lookup model.")
file_data = ReferenceFileService().get_data(file_name) file_data = ReferenceFileService().get_data(file_name)
file_date = ReferenceFileService().last_modified(file_name) lookup_model = LookupService.build_lookup_table(file_name, file_data, timestamp, value_column, label_column)
lookup_model = LookupService.build_lookup_table(file_name, file_data, file_date, value_column, label_column)
return lookup_model return lookup_model
@staticmethod @staticmethod
@ -77,12 +78,12 @@ class LookupService(object):
if lookup_model: if lookup_model:
if lookup_model.is_ldap: # LDAP is always current if lookup_model.is_ldap: # LDAP is always current
is_current = True is_current = True
elif lookup_model.file_name is not None and lookup_model.last_updated is not None: elif lookup_model.file_name is not None and lookup_model.file_timestamp is not None:
# In some legacy cases, the lookup model might exist, but not have a file name, in which case we need # In some legacy cases, the lookup model might exist, but not have a file name, in which case we need
# to rebuild. # to rebuild.
workflow_spec = WorkflowSpecService().get_spec(workflow.workflow_spec_id) workflow_spec = WorkflowSpecService().get_spec(workflow.workflow_spec_id)
current_date = SpecFileService.last_modified(workflow_spec, lookup_model.file_name) timestamp = SpecFileService.timestamp(workflow_spec, lookup_model.file_name)
is_current = current_date == lookup_model.last_updated is_current = timestamp == lookup_model.file_timestamp
if not is_current: if not is_current:
# Very very very expensive, but we don't know need this till we do. # Very very very expensive, but we don't know need this till we do.
@ -147,9 +148,9 @@ class LookupService(object):
file = latest_files[0] file = latest_files[0]
file_data = SpecFileService().get_data(workflow_spec, file_name) file_data = SpecFileService().get_data(workflow_spec, file_name)
file_date = SpecFileService.last_modified(workflow_spec, file_name) timestamp = SpecFileService.timestamp(workflow_spec, file_name)
lookup_model = LookupService.build_lookup_table(file_name, file_data, file_date, value_column, label_column, lookup_model = LookupService.build_lookup_table(file_name, file_data, timestamp, value_column, label_column,
workflow_model.workflow_spec_id, task_spec_id, field_id) workflow_model.workflow_spec_id, task_spec_id, field_id)
# Use the results of an LDAP request to populate enum field options # Use the results of an LDAP request to populate enum field options
@ -168,7 +169,7 @@ class LookupService(object):
return lookup_model return lookup_model
@staticmethod @staticmethod
def build_lookup_table(file_name, file_data, file_date, value_column, label_column, def build_lookup_table(file_name, file_data, timestamp, value_column, label_column,
workflow_spec_id=None, task_spec_id=None, field_id=None): workflow_spec_id=None, task_spec_id=None, field_id=None):
""" In some cases the lookup table can be very large. This method will add all values to the database """ In some cases the lookup table can be very large. This method will add all values to the database
in a way that can be searched and returned via an api call - rather than sending the full set of in a way that can be searched and returned via an api call - rather than sending the full set of
@ -204,10 +205,9 @@ class LookupService(object):
field_id=field_id, field_id=field_id,
task_spec_id=task_spec_id, task_spec_id=task_spec_id,
file_name=file_name, file_name=file_name,
last_updated=file_date, file_timestamp=timestamp,
is_ldap=False) is_ldap=False)
db.session.add(lookup_model) db.session.add(lookup_model)
for index, row in df.iterrows(): for index, row in df.iterrows():
lookup_data = LookupDataModel(lookup_file_model=lookup_model, lookup_data = LookupDataModel(lookup_file_model=lookup_model,

View File

@ -76,4 +76,8 @@ class ReferenceFileService(FileSystemService):
@staticmethod @staticmethod
def last_modified(file_name): def last_modified(file_name):
return FileSystemService._last_modified(ReferenceFileService.file_path(file_name)) return FileSystemService._last_modified(ReferenceFileService.file_path(file_name))
@staticmethod
def timestamp(file_name):
return FileSystemService._timestamp(ReferenceFileService.file_path(file_name))

View File

@ -76,6 +76,11 @@ class SpecFileService(FileSystemService):
path = SpecFileService.file_path(spec, file_name) path = SpecFileService.file_path(spec, file_name)
return FileSystemService._last_modified(path) return FileSystemService._last_modified(path)
@staticmethod
def timestamp(spec: WorkflowSpecInfo, file_name: str):
path = SpecFileService.file_path(spec, file_name)
return FileSystemService._timestamp(path)
@staticmethod @staticmethod
def delete_file(spec, file_name): def delete_file(spec, file_name):
# Fixme: Remember to remove the lookup files when the spec file is removed. # Fixme: Remember to remove the lookup files when the spec file is removed.

View File

@ -0,0 +1,30 @@
"""empty message
Revision ID: 3c56c894ff5c
Revises: 29bad12c9945
Create Date: 2022-02-17 11:52:52.335700
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '3c56c894ff5c'
down_revision = '29bad12c9945'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('lookup_file', sa.Column('file_timestamp', sa.FLOAT(), nullable=True))
op.drop_column('lookup_file', 'last_updated')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('lookup_file', sa.Column('last_updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True))
op.drop_column('lookup_file', 'file_timestamp')
# ### end Alembic commands ###

View File

@ -135,6 +135,7 @@ class TestStudyDetailsDocumentsScript(BaseTest):
@patch('crc.services.protocol_builder.requests.get') @patch('crc.services.protocol_builder.requests.get')
def test_file_data_set_invalid_irb_code_fails(self, mock_get): def test_file_data_set_invalid_irb_code_fails(self, mock_get):
self.create_reference_document()
mock_get.return_value.ok = True mock_get.return_value.ok = True
mock_get.return_value.text = self.protocol_builder_response('required_docs.json') mock_get.return_value.text = self.protocol_builder_response('required_docs.json')
self.add_studies() self.add_studies()

View File

@ -187,15 +187,15 @@ class TestLookupService(BaseTest):
# Using an old xls file should raise an error # Using an old xls file should raise an error
file_data_xls = SpecFileService().get_data(spec, 'sponsors.xls') file_data_xls = SpecFileService().get_data(spec, 'sponsors.xls')
file_date = SpecFileService().last_modified(spec, 'sponsors.xls') timestamp = SpecFileService().timestamp(spec, 'sponsors.xls')
with self.assertRaises(ApiError) as ae: with self.assertRaises(ApiError) as ae:
LookupService.build_lookup_table('sponsors.xls', file_data_xls, file_date, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME') LookupService.build_lookup_table('sponsors.xls', file_data_xls, timestamp, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
self.assertIn('Error opening excel file', ae.exception.args[0]) self.assertIn('Error opening excel file', ae.exception.args[0])
# Using an xlsx file should work # Using an xlsx file should work
file_data_xlsx = SpecFileService().get_data(spec, 'sponsors.xlsx') file_data_xlsx = SpecFileService().get_data(spec, 'sponsors.xlsx')
file_date = SpecFileService().last_modified(spec, 'sponsors.xlsx') timestamp = SpecFileService().timestamp(spec, 'sponsors.xlsx')
lookup_model = LookupService.build_lookup_table('sponsors.xlsx', file_data_xlsx, file_date, lookup_model = LookupService.build_lookup_table('sponsors.xlsx', file_data_xlsx, timestamp,
'CUSTOMER_NUMBER', 'CUSTOMER_NAME') 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
self.assertEqual(28, len(lookup_model.dependencies)) self.assertEqual(28, len(lookup_model.dependencies))
self.assertIn('CUSTOMER_NAME', lookup_model.dependencies[0].data.keys()) self.assertIn('CUSTOMER_NAME', lookup_model.dependencies[0].data.keys())