Another re-work to fix 619 - and to assure that we aren't rebuilding the lookup tables too frequently.

This commit is contained in:
Dan 2022-02-17 11:59:48 -05:00
parent f2b6008e5f
commit b72ecb8375
9 changed files with 66 additions and 18 deletions

View File

@ -214,8 +214,12 @@ def restart_workflow(workflow_id, clear_data=False, delete_files=False):
"""Restart a workflow with the latest spec.
Clear data allows user to restart the workflow without previous data."""
workflow_model: WorkflowModel = session.query(WorkflowModel).filter_by(id=workflow_id).first()
WorkflowProcessor.reset(workflow_model, clear_data=clear_data, delete_files=delete_files)
return get_workflow(workflow_model.id)
processor = WorkflowProcessor.reset(workflow_model, clear_data=clear_data, delete_files=delete_files)
processor.do_engine_steps()
processor.save()
WorkflowService.update_task_assignments(processor)
workflow_api_model = WorkflowService.processor_to_workflow_api(processor)
return WorkflowApiSchema().dump(workflow_api_model)
def get_task_events(action = None, workflow = None, study = None):

View File

@ -182,8 +182,8 @@ class LookupFileModel(db.Model):
task_spec_id = db.Column(db.String)
field_id = db.Column(db.String)
file_name = db.Column(db.String)
file_timestamp = db.Column(db.FLOAT) #The file systems time stamp, to check for changes to the file.
is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup.
last_updated = db.Column(db.DateTime(timezone=True))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model",
cascade="all, delete, delete-orphan")

View File

@ -84,6 +84,10 @@ class FileSystemService(object):
'The file you provided does not have an accepted extension:' +
file_extension, status_code=404)
@staticmethod
def _timestamp(file_path: str):
return os.path.getmtime(file_path)
@staticmethod
def _last_modified(file_path: str):
# Returns the last modified date of the given file.

View File

@ -52,15 +52,16 @@ class LookupService(object):
@staticmethod
def get_lookup_model_for_reference(file_name, value_column, label_column):
timestamp = ReferenceFileService().timestamp(file_name)
lookup_model = db.session.query(LookupFileModel).\
filter(LookupFileModel.file_name == file_name). \
filter(LookupFileModel.workflow_spec_id == None).\
filter(LookupFileModel.file_timestamp == timestamp).\
first() # use "==" not "is none" which does NOT work, and makes this constantly expensive.
if not lookup_model:
logging.warning("!!!! Making a very expensive call to update the lookup model.")
file_data = ReferenceFileService().get_data(file_name)
file_date = ReferenceFileService().last_modified(file_name)
lookup_model = LookupService.build_lookup_table(file_name, file_data, file_date, value_column, label_column)
lookup_model = LookupService.build_lookup_table(file_name, file_data, timestamp, value_column, label_column)
return lookup_model
@staticmethod
@ -77,12 +78,12 @@ class LookupService(object):
if lookup_model:
if lookup_model.is_ldap: # LDAP is always current
is_current = True
elif lookup_model.file_name is not None and lookup_model.last_updated is not None:
elif lookup_model.file_name is not None and lookup_model.file_timestamp is not None:
# In some legacy cases, the lookup model might exist, but not have a file name, in which case we need
# to rebuild.
workflow_spec = WorkflowSpecService().get_spec(workflow.workflow_spec_id)
current_date = SpecFileService.last_modified(workflow_spec, lookup_model.file_name)
is_current = current_date == lookup_model.last_updated
timestamp = SpecFileService.timestamp(workflow_spec, lookup_model.file_name)
is_current = timestamp == lookup_model.file_timestamp
if not is_current:
# Very very very expensive, but we don't know need this till we do.
@ -147,9 +148,9 @@ class LookupService(object):
file = latest_files[0]
file_data = SpecFileService().get_data(workflow_spec, file_name)
file_date = SpecFileService.last_modified(workflow_spec, file_name)
timestamp = SpecFileService.timestamp(workflow_spec, file_name)
lookup_model = LookupService.build_lookup_table(file_name, file_data, file_date, value_column, label_column,
lookup_model = LookupService.build_lookup_table(file_name, file_data, timestamp, value_column, label_column,
workflow_model.workflow_spec_id, task_spec_id, field_id)
# Use the results of an LDAP request to populate enum field options
@ -168,7 +169,7 @@ class LookupService(object):
return lookup_model
@staticmethod
def build_lookup_table(file_name, file_data, file_date, value_column, label_column,
def build_lookup_table(file_name, file_data, timestamp, value_column, label_column,
workflow_spec_id=None, task_spec_id=None, field_id=None):
""" In some cases the lookup table can be very large. This method will add all values to the database
in a way that can be searched and returned via an api call - rather than sending the full set of
@ -204,10 +205,9 @@ class LookupService(object):
field_id=field_id,
task_spec_id=task_spec_id,
file_name=file_name,
last_updated=file_date,
file_timestamp=timestamp,
is_ldap=False)
db.session.add(lookup_model)
for index, row in df.iterrows():
lookup_data = LookupDataModel(lookup_file_model=lookup_model,

View File

@ -76,4 +76,8 @@ class ReferenceFileService(FileSystemService):
@staticmethod
def last_modified(file_name):
return FileSystemService._last_modified(ReferenceFileService.file_path(file_name))
return FileSystemService._last_modified(ReferenceFileService.file_path(file_name))
@staticmethod
def timestamp(file_name):
return FileSystemService._timestamp(ReferenceFileService.file_path(file_name))

View File

@ -76,6 +76,11 @@ class SpecFileService(FileSystemService):
path = SpecFileService.file_path(spec, file_name)
return FileSystemService._last_modified(path)
@staticmethod
def timestamp(spec: WorkflowSpecInfo, file_name: str):
path = SpecFileService.file_path(spec, file_name)
return FileSystemService._timestamp(path)
@staticmethod
def delete_file(spec, file_name):
# Fixme: Remember to remove the lookup files when the spec file is removed.

View File

@ -0,0 +1,30 @@
"""empty message
Revision ID: 3c56c894ff5c
Revises: 29bad12c9945
Create Date: 2022-02-17 11:52:52.335700
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '3c56c894ff5c'
down_revision = '29bad12c9945'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('lookup_file', sa.Column('file_timestamp', sa.FLOAT(), nullable=True))
op.drop_column('lookup_file', 'last_updated')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('lookup_file', sa.Column('last_updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True))
op.drop_column('lookup_file', 'file_timestamp')
# ### end Alembic commands ###

View File

@ -135,6 +135,7 @@ class TestStudyDetailsDocumentsScript(BaseTest):
@patch('crc.services.protocol_builder.requests.get')
def test_file_data_set_invalid_irb_code_fails(self, mock_get):
self.create_reference_document()
mock_get.return_value.ok = True
mock_get.return_value.text = self.protocol_builder_response('required_docs.json')
self.add_studies()

View File

@ -187,15 +187,15 @@ class TestLookupService(BaseTest):
# Using an old xls file should raise an error
file_data_xls = SpecFileService().get_data(spec, 'sponsors.xls')
file_date = SpecFileService().last_modified(spec, 'sponsors.xls')
timestamp = SpecFileService().timestamp(spec, 'sponsors.xls')
with self.assertRaises(ApiError) as ae:
LookupService.build_lookup_table('sponsors.xls', file_data_xls, file_date, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
LookupService.build_lookup_table('sponsors.xls', file_data_xls, timestamp, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
self.assertIn('Error opening excel file', ae.exception.args[0])
# Using an xlsx file should work
file_data_xlsx = SpecFileService().get_data(spec, 'sponsors.xlsx')
file_date = SpecFileService().last_modified(spec, 'sponsors.xlsx')
lookup_model = LookupService.build_lookup_table('sponsors.xlsx', file_data_xlsx, file_date,
timestamp = SpecFileService().timestamp(spec, 'sponsors.xlsx')
lookup_model = LookupService.build_lookup_table('sponsors.xlsx', file_data_xlsx, timestamp,
'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
self.assertEqual(28, len(lookup_model.dependencies))
self.assertIn('CUSTOMER_NAME', lookup_model.dependencies[0].data.keys())