1. Add a default directory for the location of SYNC files.

2. Added a last_updated column to the lookup table
3. The Lookup service now uses the above, and compares it to the actual file date, we can then rebuild the lookup if needed.
4. That 755 migration loads up the models, so when you change the models, the migration starts to fail.  Not really sure what to do here, but modify the migration while we are in process.
This commit is contained in:
Dan 2022-01-20 13:05:58 -05:00
parent 9ea1399578
commit f815add699
6 changed files with 61 additions and 38 deletions

View File

@ -87,3 +87,6 @@ MAIL_USE_SSL = environ.get('MAIL_USE_SSL', default=False)
MAIL_USE_TLS = environ.get('MAIL_USE_TLS', default=False)
MAIL_USERNAME = environ.get('MAIL_USERNAME', default='')
MAIL_PASSWORD = environ.get('MAIL_PASSWORD', default='')
# Local file path
SYNC_FILE_ROOT = './SPECS'

View File

@ -175,9 +175,10 @@ class LookupFileModel(db.Model):
field_id = db.Column(db.String)
is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup.
file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
last_updated = db.Column(db.DateTime(timezone=True))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model",
cascade="all, delete, delete-orphan")
file_model = db.relationship("FileModel")
class LookupDataModel(db.Model):

View File

@ -218,12 +218,10 @@ class FileService(object):
@staticmethod
def delete_file(file_id):
try:
data_models = session.query(FileDataModel).filter_by(file_model_id=file_id).all()
for dm in data_models:
lookup_files = session.query(LookupFileModel).filter_by(file_data_model_id=dm.id).all()
for lf in lookup_files:
session.query(LookupDataModel).filter_by(lookup_file_model_id=lf.id).delete()
session.query(LookupFileModel).filter_by(id=lf.id).delete()
lookup_files = session.query(LookupFileModel).filter_by(file_model_id=file_id).all()
for lf in lookup_files:
session.query(LookupDataModel).filter_by(lookup_file_model_id=lf.id).delete()
session.query(LookupFileModel).filter_by(id=lf.id).delete()
session.query(FileDataModel).filter_by(file_model_id=file_id).delete()
session.query(DataStoreModel).filter_by(file_id=file_id).delete()
session.query(FileModel).filter_by(id=file_id).delete()

View File

@ -66,16 +66,15 @@ class LookupService(object):
.filter(LookupFileModel.task_spec_id == task_spec_id) \
.order_by(desc(LookupFileModel.id)).first()
# one more quick query, to see if the lookup file is still related to this workflow.
# if not, we need to rebuild the lookup table.
# The above may return a model, if it does, it might still be out of date.
# We need to check the file date to assure we have the most recent file.
is_current = False
if lookup_model:
if lookup_model.is_ldap: # LDAP is always current
is_current = True
else:
is_current = db.session.query(WorkflowSpecDependencyFile). \
filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).\
filter(WorkflowSpecDependencyFile.workflow_id == workflow.id).count()
current_date = SpecFileService().last_modified(lookup_model.file_model.id)
is_current = current_date == lookup_model.last_updated
if not is_current:
# Very very very expensive, but we don't know need this till we do.
@ -132,8 +131,8 @@ class LookupService(object):
value_column = field.get_property(Task.FIELD_PROP_VALUE_COLUMN)
label_column = field.get_property(Task.FIELD_PROP_LABEL_COLUMN)
latest_files = SpecFileService().get_spec_data_files(workflow_spec_id=workflow_model.workflow_spec_id,
workflow_id=workflow_model.id,
name=file_name)
workflow_id=workflow_model.id,
name=file_name)
if len(latest_files) < 1:
raise ApiError("invalid_enum", "Unable to locate the lookup data file '%s'" % file_name)
else:
@ -142,6 +141,7 @@ class LookupService(object):
file_id = data_dict['meta']['id']
file_name = data_dict['meta']['name']
file_data = data_dict['data']
lookup_model = LookupService.build_lookup_table(file_id, file_name, file_data, value_column, label_column,
workflow_model.workflow_spec_id, task_spec_id, field_id)

View File

@ -283,33 +283,54 @@ class SpecFileService(object):
return category_name
def get_spec_file_data(self, file_id: int):
file_model = session.query(FileModel).filter(FileModel.id==file_id).first()
if file_model is not None:
spec_model = session.query(WorkflowSpecModel).filter(WorkflowSpecModel.id==file_model.workflow_spec_id).first()
if spec_model is not None:
category_name = self.get_spec_file_category_name(spec_model)
sync_file_root = self.get_sync_file_root()
file_path = os.path.join(sync_file_root, category_name, spec_model.display_name, file_model.name)
mtime = os.path.getmtime(file_path)
with open(file_path, 'rb') as f_handle:
spec_file_data = f_handle.read()
size = len(spec_file_data)
md5_checksum = UUID(hashlib.md5(spec_file_data).hexdigest())
def get_path(self, file_id: int):
# Returns the path on the file system for the given File id
file_data_model = FileDataModel(data=spec_file_data,
md5_hash=md5_checksum,
size=size,
date_created=datetime.datetime.fromtimestamp(mtime),
file_model_id=file_id)
return file_data_model
else:
raise ApiError(code='spec_not_found',
message=f'No spec found for file with file_id: {file_id}, and spec_id: {file_model.workflow_spec_id}')
else:
# Assure we have a file.
file_model = session.query(FileModel).filter(FileModel.id==file_id).first()
if not file_model:
raise ApiError(code='model_not_found',
message=f'No model found for file with file_id: {file_id}')
# Assure we have a spec.
spec_model = session.query(WorkflowSpecModel).filter(
WorkflowSpecModel.id == file_model.workflow_spec_id).first()
if not spec_model:
raise ApiError(code='spec_not_found',
message=f'No spec found for file with file_id: '
f'{file_model.id}, and spec_id: {file_model.workflow_spec_id}')
# Calculate the path.
sync_file_root = self.get_sync_file_root()
category_name = self.get_spec_file_category_name(spec_model)
return os.path.join(sync_file_root, category_name, spec_model.display_name, file_model.name)
def last_modified(self, file_id: int):
path = self.get_path(file_id)
return self.__last_modified(path)
def __last_modified(self, file_path: str):
# Returns the last modified date of the given file.
timestamp = os.path.getmtime(file_path)
return datetime.datetime.fromtimestamp(timestamp)
def get_spec_file_data(self, file_id: int):
file_path = self.get_path(file_id)
date = self.last_modified(file_id)
with open(file_path, 'rb') as f_handle:
spec_file_data = f_handle.read()
size = len(spec_file_data)
md5_checksum = UUID(hashlib.md5(spec_file_data).hexdigest())
last_modified = self.__last_modified(file_path)
file_data_model = FileDataModel(data=spec_file_data,
md5_hash=md5_checksum,
size=size,
date_created=last_modified,
file_model_id=file_id)
return file_data_model
@staticmethod
def get_process_id(et_root: etree.Element):
process_elements = []

View File

@ -12,7 +12,6 @@ import sqlalchemy as sa
# import crc
from crc import app
from crc.models.file import FileModel, FileDataModel, LookupFileModel
from crc.models.workflow import WorkflowSpecDependencyFile
from crc.services.file_service import FileService
from crc.services.spec_file_service import SpecFileService
from crc.services.reference_file_service import ReferenceFileService
@ -39,6 +38,7 @@ def upgrade():
op.drop_table('workflow_spec_dependency_file')
# op.drop_constraint('lookup_file_file_data_model_id_fkey', 'lookup_file', type_='foreignkey')
op.add_column('lookup_file', sa.Column('file_model_id', sa.Integer(), nullable=True))
op.add_column('lookup_file', sa.Column('last_updated', sa.DateTime(), nullable=True))
op.create_foreign_key(None, 'lookup_file', 'file', ['file_model_id'], ['id'])
processed_files = []