Faster lookup fields. We were parsing the spec each time to get details about how to search. We're just grabbing the workflow id and task id now and building that straight into the full text search index for faster lookups. Should be peppy.

Another speed improvement - data in the FileDataModel is deferred, and not queried until it is specifically used, as the new data structures need to use this model frequently.
This commit is contained in:
Dan Funk 2020-05-29 01:39:39 -04:00
parent 22bdb6c760
commit 11413838a7
13 changed files with 257 additions and 182 deletions

View File

@ -672,7 +672,7 @@ paths:
application/json: application/json:
schema: schema:
$ref: "#/components/schemas/Workflow" $ref: "#/components/schemas/Workflow"
/workflow/{workflow_id}/task/{task_id}/lookup/{field_id}: /workflow/{workflow_id}/lookup/{field_id}:
parameters: parameters:
- name: workflow_id - name: workflow_id
in: path in: path
@ -681,13 +681,6 @@ paths:
schema: schema:
type: integer type: integer
format: int32 format: int32
- name: task_id
in: path
required: true
description: The id of the task
schema:
type: string
format: uuid
- name: field_id - name: field_id
in: path in: path
required: true required: true

View File

@ -219,26 +219,13 @@ def delete_workflow_spec_category(cat_id):
session.commit() session.commit()
def lookup(workflow_id, task_id, field_id, query, limit): def lookup(workflow_id, field_id, query, limit):
""" """
given a field in a task, attempts to find the lookup table or function associated given a field in a task, attempts to find the lookup table or function associated
with that field and runs a full-text query against it to locate the values and with that field and runs a full-text query against it to locate the values and
labels that would be returned to a type-ahead box. labels that would be returned to a type-ahead box.
Tries to be fast, but first runs will be very slow.
""" """
workflow_model = session.query(WorkflowModel).filter_by(id=workflow_id).first() workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
if not workflow_model: lookup_data = LookupService.lookup(workflow, field_id, query, limit)
raise ApiError("unknown_workflow", "No workflow found with id: %i" % workflow_id)
processor = WorkflowProcessor(workflow_model)
task_id = uuid.UUID(task_id)
spiff_task = processor.bpmn_workflow.get_task(task_id)
if not spiff_task:
raise ApiError("unknown_task", "No task with %s found in workflow: %i" % (task_id, workflow_id))
field = None
for f in spiff_task.task_spec.form.fields:
if f.id == field_id:
field = f
if not field:
raise ApiError("unknown_field", "No field named %s in task %s" % (task_id, spiff_task.task_spec.name))
lookup_data = LookupService.lookup(spiff_task, field, query, limit)
return LookupDataSchema(many=True).dump(lookup_data) return LookupDataSchema(many=True).dump(lookup_data)

View File

@ -6,6 +6,7 @@ from marshmallow_enum import EnumField
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from sqlalchemy import func, Index from sqlalchemy import func, Index
from sqlalchemy.dialects.postgresql import UUID from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import deferred
from crc import db, ma from crc import db, ma
@ -61,7 +62,7 @@ class FileDataModel(db.Model):
__tablename__ = 'file_data' __tablename__ = 'file_data'
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False) md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
data = db.Column(db.LargeBinary) data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to.
version = db.Column(db.Integer, default=0) version = db.Column(db.Integer, default=0)
date_created = db.Column(db.DateTime(timezone=True), default=func.now()) date_created = db.Column(db.DateTime(timezone=True), default=func.now())
file_model_id = db.Column(db.Integer, db.ForeignKey('file.id')) file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
@ -127,25 +128,22 @@ class FileSchema(ma.Schema):
class LookupFileModel(db.Model): class LookupFileModel(db.Model):
"""Takes the content of a file (like a xlsx, or csv file) and creates a key/value """Gives us a quick way to tell what kind of lookup is set on a form field.
store that can be used for lookups and searches. This table contains the metadata, Connected to the file data model, so that if a new version of the same file is
so we know the version of the file that was used, and what key column, and value column created, we can update the listing."""
were used to generate this lookup table. ie, the same xls file might have multiple #fixme: What happens if they change the file associated with a lookup field?
lookup file models, if different keys and labels are used - or someone decides to
make a change. We need to handle full text search over the label and value columns,
and not every column, because we don't know how much information will be in there. """
__tablename__ = 'lookup_file' __tablename__ = 'lookup_file'
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
label_column = db.Column(db.String) workflow_spec_id = db.Column(db.String)
value_column = db.Column(db.String) field_id = db.Column(db.String)
is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup.
file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id')) file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan")
class LookupDataModel(db.Model): class LookupDataModel(db.Model):
__tablename__ = 'lookup_data' __tablename__ = 'lookup_data'
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id')) lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id'))
lookup_file_model = db.relationship(LookupFileModel)
value = db.Column(db.String) value = db.Column(db.String)
label = db.Column(db.String) label = db.Column(db.String)
# In the future, we might allow adding an additional "search" column if we want to search things not in label. # In the future, we might allow adding an additional "search" column if we want to search things not in label.

View File

@ -204,22 +204,27 @@ class FileService(object):
return results return results
@staticmethod @staticmethod
def get_spec_data_files(workflow_spec_id, workflow_id=None): def get_spec_data_files(workflow_spec_id, workflow_id=None, name=None):
"""Returns all the FileDataModels related to a workflow specification. """Returns all the FileDataModels related to a workflow specification.
If a workflow is specified, returns the version of the spec relatted If a workflow is specified, returns the version of the spec relatted
to that workflow, otherwise, returns the lastes files.""" to that workflow, otherwise, returns the lastes files."""
if workflow_id: if workflow_id:
files = session.query(FileDataModel) \ query = session.query(FileDataModel) \
.join(WorkflowSpecDependencyFile) \ .join(WorkflowSpecDependencyFile) \
.filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \ .filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \
.order_by(FileDataModel.id).all() .order_by(FileDataModel.id)
return files if name:
query = query.join(FileModel).filter(FileModel.name == name)
return query.all()
else: else:
"""Returns all the latest files related to a workflow specification""" """Returns all the latest files related to a workflow specification"""
file_models = FileService.get_files(workflow_spec_id=workflow_spec_id) file_models = FileService.get_files(workflow_spec_id=workflow_spec_id)
latest_data_files = [] latest_data_files = []
for file_model in file_models: for file_model in file_models:
latest_data_files.append(FileService.get_file_data(file_model.id)) if name and file_model.name == name:
latest_data_files.append(FileService.get_file_data(file_model.id))
elif not name:
latest_data_files.append(FileService.get_file_data(file_model.id))
return latest_data_files return latest_data_files
@staticmethod @staticmethod

View File

@ -1,4 +1,5 @@
import logging import logging
import re
from pandas import ExcelFile from pandas import ExcelFile
from sqlalchemy import func, desc from sqlalchemy import func, desc
@ -8,8 +9,11 @@ from crc import db
from crc.api.common import ApiError from crc.api.common import ApiError
from crc.models.api_models import Task from crc.models.api_models import Task
from crc.models.file import FileDataModel, LookupFileModel, LookupDataModel from crc.models.file import FileDataModel, LookupFileModel, LookupDataModel
from crc.models.workflow import WorkflowModel, WorkflowSpecDependencyFile
from crc.services.file_service import FileService from crc.services.file_service import FileService
from crc.services.ldap_service import LdapService from crc.services.ldap_service import LdapService
from crc.services.workflow_processor import WorkflowProcessor
class TSRank(GenericFunction): class TSRank(GenericFunction):
package = 'full_text' package = 'full_text'
@ -31,33 +35,56 @@ class LookupService(object):
""" """
@staticmethod @staticmethod
def lookup(spiff_task, field, query, limit): def get_lookup_model(spiff_task, field):
"""Executes the lookup for the given field.""" workflow_id = spiff_task.workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY]
if field.type != Task.FIELD_TYPE_AUTO_COMPLETE: workflow = db.session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
raise ApiError.from_task("invalid_field_type", return LookupService.__get_lookup_model(workflow, field.id)
"Field '%s' must be an autocomplete field to use lookups." % field.label,
task=spiff_task)
# If this field has an associated options file, then do the lookup against that field.
if field.has_property(Task.PROP_OPTIONS_FILE):
lookup_table = LookupService.get_lookup_table(spiff_task, field)
return LookupService._run_lookup_query(lookup_table, query, limit)
# If this is a ldap lookup, use the ldap service to provide the fields to return.
elif field.has_property(Task.PROP_LDAP_LOOKUP):
return LookupService._run_ldap_query(query, limit)
else:
raise ApiError.from_task("unknown_lookup_option",
"Lookup supports using spreadsheet options or ldap options, and neither was"
"provided.")
@staticmethod @staticmethod
def get_lookup_table(spiff_task, field): def __get_lookup_model(workflow, field_id):
""" Checks to see if the options are provided in a separate lookup table associated with the lookup_model = db.session.query(LookupFileModel) \
.filter(LookupFileModel.workflow_spec_id == workflow.workflow_spec_id) \
.filter(LookupFileModel.field_id == field_id).first()
# one more quick query, to see if the lookup file is still related to this workflow.
# if not, we need to rebuild the lookup table.
is_current = False
if lookup_model:
is_current = db.session.query(WorkflowSpecDependencyFile).\
filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).count()
if not is_current:
if lookup_model:
db.session.delete(lookup_model)
# Very very very expensive, but we don't know need this till we do.
lookup_model = LookupService.create_lookup_model(workflow, field_id)
return lookup_model
@staticmethod
def lookup(workflow, field_id, query, limit):
lookup_model = LookupService.__get_lookup_model(workflow, field_id)
if lookup_model.is_ldap:
return LookupService._run_ldap_query(query, limit)
else:
return LookupService._run_lookup_query(lookup_model, query, limit)
@staticmethod
def create_lookup_model(workflow_model, field_id):
"""
This is all really expensive, but should happen just once (per file change).
Checks to see if the options are provided in a separate lookup table associated with the
workflow, and if so, assures that data exists in the database, and return a model than can be used workflow, and if so, assures that data exists in the database, and return a model than can be used
to locate that data. to locate that data.
Returns: an array of LookupData, suitable for returning to the api. Returns: an array of LookupData, suitable for returning to the api.
""" """
processor = WorkflowProcessor(workflow_model) # VERY expensive, Ludicrous for lookup / type ahead
spiff_task, field = processor.find_task_and_field_by_field_id(field_id)
if field.has_property(Task.PROP_OPTIONS_FILE): if field.has_property(Task.PROP_OPTIONS_FILE):
if not field.has_property(Task.PROP_OPTIONS_VALUE_COLUMN) or \ if not field.has_property(Task.PROP_OPTIONS_VALUE_COLUMN) or \
not field.has_property(Task.PROP_OPTIONS_LABEL_COL): not field.has_property(Task.PROP_OPTIONS_LABEL_COL):
@ -72,52 +99,67 @@ class LookupService(object):
file_name = field.get_property(Task.PROP_OPTIONS_FILE) file_name = field.get_property(Task.PROP_OPTIONS_FILE)
value_column = field.get_property(Task.PROP_OPTIONS_VALUE_COLUMN) value_column = field.get_property(Task.PROP_OPTIONS_VALUE_COLUMN)
label_column = field.get_property(Task.PROP_OPTIONS_LABEL_COL) label_column = field.get_property(Task.PROP_OPTIONS_LABEL_COL)
data_model = FileService.get_workflow_file_data(spiff_task.workflow, file_name) latest_files = FileService.get_spec_data_files(workflow_spec_id=workflow_model.workflow_spec_id,
lookup_model = LookupService.get_lookup_table_from_data_model(data_model, value_column, label_column) workflow_id=workflow_model.id,
return lookup_model name=file_name)
if len(latest_files) < 1:
raise ApiError("missing_file", "Unable to locate the lookup data file '%s'" % file_name)
else:
data_model = latest_files[0]
lookup_model = LookupService.build_lookup_table(data_model, value_column, label_column,
workflow_model.workflow_spec_id, field_id)
elif field.has_property(Task.PROP_LDAP_LOOKUP):
lookup_model = LookupFileModel(workflow_spec_id=workflow_model.workflow_spec_id,
field_id=field_id,
is_ldap=True)
else:
raise ApiError("unknown_lookup_option",
"Lookup supports using spreadsheet options or ldap options, and neither "
"was provided.")
db.session.add(lookup_model)
db.session.commit()
return lookup_model
@staticmethod @staticmethod
def get_lookup_table_from_data_model(data_model: FileDataModel, value_column, label_column): def build_lookup_table(data_model: FileDataModel, value_column, label_column, workflow_spec_id, field_id):
""" In some cases the lookup table can be very large. This method will add all values to the database """ In some cases the lookup table can be very large. This method will add all values to the database
in a way that can be searched and returned via an api call - rather than sending the full set of in a way that can be searched and returned via an api call - rather than sending the full set of
options along with the form. It will only open the file and process the options if something has options along with the form. It will only open the file and process the options if something has
changed. """ changed. """
xls = ExcelFile(data_model.data)
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
if value_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
value_column))
if label_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
label_column))
lookup_model = db.session.query(LookupFileModel) \ lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id,
.filter(LookupFileModel.file_data_model_id == data_model.id) \ field_id=field_id,
.filter(LookupFileModel.value_column == value_column) \ file_data_model_id=data_model.id,
.filter(LookupFileModel.label_column == label_column).first() is_ldap=False)
if not lookup_model:
xls = ExcelFile(data_model.data)
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
if value_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
value_column))
if label_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
label_column))
lookup_model = LookupFileModel(label_column=label_column, value_column=value_column,
file_data_model_id=data_model.id)
db.session.add(lookup_model)
for index, row in df.iterrows():
lookup_data = LookupDataModel(lookup_file_model=lookup_model,
value=row[value_column],
label=row[label_column],
data=row.to_json())
db.session.add(lookup_data)
db.session.commit()
db.session.add(lookup_model)
for index, row in df.iterrows():
lookup_data = LookupDataModel(lookup_file_model=lookup_model,
value=row[value_column],
label=row[label_column],
data=row.to_json())
db.session.add(lookup_data)
db.session.commit()
return lookup_model return lookup_model
@staticmethod @staticmethod
def _run_lookup_query(lookup_file_model, query, limit): def _run_lookup_query(lookup_file_model, query, limit):
db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model) db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
query = re.sub('[^A-Za-z0-9 ]+', '', query)
print("Query: " + query)
query = query.strip() query = query.strip()
if len(query) > 0: if len(query) > 0:
if ' ' in query: if ' ' in query:

View File

@ -100,7 +100,7 @@ class WorkflowProcessor(object):
STUDY_ID_KEY = "study_id" STUDY_ID_KEY = "study_id"
VALIDATION_PROCESS_KEY = "validate_only" VALIDATION_PROCESS_KEY = "validate_only"
def __init__(self, workflow_model: WorkflowModel, soft_reset=False, hard_reset=False): def __init__(self, workflow_model: WorkflowModel, soft_reset=False, hard_reset=False, validate_only=False):
"""Create a Workflow Processor based on the serialized information available in the workflow model. """Create a Workflow Processor based on the serialized information available in the workflow model.
If soft_reset is set to true, it will try to use the latest version of the workflow specification. If soft_reset is set to true, it will try to use the latest version of the workflow specification.
If hard_reset is set to true, it will create a new Workflow, but embed the data from the last If hard_reset is set to true, it will create a new Workflow, but embed the data from the last
@ -121,6 +121,7 @@ class WorkflowProcessor(object):
self.workflow_spec_id = workflow_model.workflow_spec_id self.workflow_spec_id = workflow_model.workflow_spec_id
try: try:
self.bpmn_workflow = self.__get_bpmn_workflow(workflow_model, spec) self.bpmn_workflow = self.__get_bpmn_workflow(workflow_model, spec)
self.bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = validate_only
self.bpmn_workflow.script_engine = self._script_engine self.bpmn_workflow.script_engine = self._script_engine
if self.WORKFLOW_ID_KEY not in self.bpmn_workflow.data: if self.WORKFLOW_ID_KEY not in self.bpmn_workflow.data:
@ -402,3 +403,13 @@ class WorkflowProcessor(object):
for nav_item in self.bpmn_workflow.get_nav_list(): for nav_item in self.bpmn_workflow.get_nav_list():
if nav_item['task_id'] == task.id: if nav_item['task_id'] == task.id:
return nav_item return nav_item
def find_task_and_field_by_field_id(self, field_id):
"""Tracks down a form field by name in the workflow spec,
only looks at ready tasks. Returns a tuple of the task, and form"""
for spiff_task in self.bpmn_workflow.get_tasks(SpiffTask.READY):
if hasattr(spiff_task.task_spec, "form"):
for field in spiff_task.task_spec.form.fields:
if field.id == field_id:
return spiff_task, field
raise ApiError("invalid_field", "Unable to find a ready task with field: %s" % field_id)

View File

@ -18,6 +18,7 @@ from crc.api.common import ApiError
from crc.models.api_models import Task, MultiInstanceType from crc.models.api_models import Task, MultiInstanceType
from crc.models.file import LookupDataModel from crc.models.file import LookupDataModel
from crc.models.stats import TaskEventModel from crc.models.stats import TaskEventModel
from crc.models.workflow import WorkflowModel, WorkflowStatus
from crc.services.file_service import FileService from crc.services.file_service import FileService
from crc.services.lookup_service import LookupService from crc.services.lookup_service import LookupService
from crc.services.workflow_processor import WorkflowProcessor, CustomBpmnScriptEngine from crc.services.workflow_processor import WorkflowProcessor, CustomBpmnScriptEngine
@ -41,18 +42,20 @@ class WorkflowService(object):
"""Runs a spec through it's paces to see if it results in any errors. Not fool-proof, but a good """Runs a spec through it's paces to see if it results in any errors. Not fool-proof, but a good
sanity check.""" sanity check."""
spec = WorkflowProcessor.get_spec( workflow_model = WorkflowModel(status=WorkflowStatus.not_started,
file_data_models=FileService.get_spec_data_files(workflow_spec_id=spec_id), workflow_spec_id=spec_id,
workflow_spec_id=spec_id) last_updated=datetime.now(),
bpmn_workflow = BpmnWorkflow(spec, script_engine=CustomBpmnScriptEngine()) study_id=1)
bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = 1 try:
bpmn_workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY] = spec_id processor = WorkflowProcessor(workflow_model, validate_only=True)
bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = True except WorkflowException as we:
raise ApiError.from_task_spec("workflow_execution_exception", str(we),
we.sender)
while not bpmn_workflow.is_completed(): while not processor.bpmn_workflow.is_completed():
try: try:
bpmn_workflow.do_engine_steps() processor.bpmn_workflow.do_engine_steps()
tasks = bpmn_workflow.get_tasks(SpiffTask.READY) tasks = processor.bpmn_workflow.get_tasks(SpiffTask.READY)
for task in tasks: for task in tasks:
task_api = WorkflowService.spiff_task_to_api_task( task_api = WorkflowService.spiff_task_to_api_task(
task, task,
@ -60,8 +63,10 @@ class WorkflowService(object):
WorkflowService.populate_form_with_random_data(task, task_api) WorkflowService.populate_form_with_random_data(task, task_api)
task.complete() task.complete()
except WorkflowException as we: except WorkflowException as we:
db.session.delete(workflow_model)
raise ApiError.from_task_spec("workflow_execution_exception", str(we), raise ApiError.from_task_spec("workflow_execution_exception", str(we),
we.sender) we.sender)
db.session.delete(workflow_model)
@staticmethod @staticmethod
def populate_form_with_random_data(task, task_api): def populate_form_with_random_data(task, task_api):
@ -84,7 +89,7 @@ class WorkflowService(object):
" with no options" % field.id, " with no options" % field.id,
task) task)
elif field.type == "autocomplete": elif field.type == "autocomplete":
lookup_model = LookupService.get_lookup_table(task, field) lookup_model = LookupService.get_lookup_model(task, field)
if field.has_property(Task.PROP_LDAP_LOOKUP): if field.has_property(Task.PROP_LDAP_LOOKUP):
form_data[field.id] = { form_data[field.id] = {
"label": "dhf8r", "label": "dhf8r",
@ -250,12 +255,12 @@ class WorkflowService(object):
@staticmethod @staticmethod
def process_options(spiff_task, field): def process_options(spiff_task, field):
lookup_model = LookupService.get_lookup_table(spiff_task, field)
# If this is an auto-complete field, do not populate options, a lookup will happen later. # If this is an auto-complete field, do not populate options, a lookup will happen later.
if field.type == Task.FIELD_TYPE_AUTO_COMPLETE: if field.type == Task.FIELD_TYPE_AUTO_COMPLETE:
pass pass
else: elif field.has_property(Task.PROP_OPTIONS_FILE):
lookup_model = LookupService.get_lookup_model(spiff_task, field)
data = db.session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_model).all() data = db.session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_model).all()
if not hasattr(field, 'options'): if not hasattr(field, 'options'):
field.options = [] field.options = []
@ -286,3 +291,4 @@ class WorkflowService(object):
) )
db.session.add(task_event) db.session.add(task_event)
db.session.commit() db.session.commit()

View File

@ -0,0 +1,36 @@
"""empty message
Revision ID: 5064b72284b7
Revises: bec71f7dc652
Create Date: 2020-05-28 23:54:45.623361
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '5064b72284b7'
down_revision = 'bec71f7dc652'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('lookup_file', sa.Column('field_id', sa.String(), nullable=True))
op.add_column('lookup_file', sa.Column('is_ldap', sa.Boolean(), nullable=True))
op.add_column('lookup_file', sa.Column('workflow_spec_id', sa.String(), nullable=True))
op.drop_column('lookup_file', 'value_column')
op.drop_column('lookup_file', 'label_column')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('lookup_file', sa.Column('label_column', sa.VARCHAR(), autoincrement=False, nullable=True))
op.add_column('lookup_file', sa.Column('value_column', sa.VARCHAR(), autoincrement=False, nullable=True))
op.drop_column('lookup_file', 'workflow_spec_id')
op.drop_column('lookup_file', 'is_ldap')
op.drop_column('lookup_file', 'field_id')
# ### end Alembic commands ###

View File

@ -1,90 +1,119 @@
import os
from tests.base_test import BaseTest from tests.base_test import BaseTest
from crc import session
from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel
from crc.services.file_service import FileService from crc.services.file_service import FileService
from crc.api.common import ApiError
from crc import session, app
from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel, CONTENT_TYPES
from crc.services.lookup_service import LookupService from crc.services.lookup_service import LookupService
from crc.services.workflow_processor import WorkflowProcessor
class TestLookupService(BaseTest): class TestLookupService(BaseTest):
def test_lookup_returns_good_error_on_bad_field(self):
def test_create_lookup_file_multiple_times_does_not_update_database(self): spec = BaseTest.load_test_spec('enum_options_with_search')
spec = BaseTest.load_test_spec('enum_options_from_file') workflow = self.create_workflow('enum_options_with_search')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first() file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first() file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
with self.assertRaises(ApiError):
LookupService.lookup(workflow, "not_the_right_field", "sam", limit=10)
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME") def test_lookup_table_is_not_created_more_than_once(self):
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME") spec = BaseTest.load_test_spec('enum_options_with_search')
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME") workflow = self.create_workflow('enum_options_with_search')
LookupService.lookup(workflow, "sponsor", "sam", limit=10)
LookupService.lookup(workflow, "sponsor", "something", limit=10)
LookupService.lookup(workflow, "sponsor", "blah", limit=10)
lookup_records = session.query(LookupFileModel).all() lookup_records = session.query(LookupFileModel).all()
self.assertIsNotNone(lookup_records) self.assertIsNotNone(lookup_records)
self.assertEqual(1, len(lookup_records)) self.assertEqual(1, len(lookup_records))
lookup_record = lookup_records[0] lookup_record = lookup_records[0]
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all() lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(28, len(lookup_data)) self.assertEquals(28, len(lookup_data))
# Using the same table with different lookup lable or value, does create additional records.
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NAME", "CUSTOMER_NUMBER") def test_updates_to_file_cause_lookup_rebuild(self):
spec = BaseTest.load_test_spec('enum_options_with_search')
workflow = self.create_workflow('enum_options_with_search')
file_model = session.query(FileModel).filter(FileModel.name == "sponsors.xls").first()
LookupService.lookup(workflow, "sponsor", "sam", limit=10)
lookup_records = session.query(LookupFileModel).all() lookup_records = session.query(LookupFileModel).all()
self.assertIsNotNone(lookup_records) self.assertIsNotNone(lookup_records)
self.assertEqual(2, len(lookup_records)) self.assertEqual(1, len(lookup_records))
lookup_record = lookup_records[0]
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(28, len(lookup_data))
# Update the workflow specification file.
file_path = os.path.join(app.root_path, '..', 'tests', 'data',
'enum_options_with_search', 'sponsors_modified.xls')
file = open(file_path, 'rb')
FileService.update_file(file_model, file.read(), CONTENT_TYPES['xls'])
file.close()
# restart the workflow, so it can pick up the changes.
WorkflowProcessor(workflow, soft_reset=True)
LookupService.lookup(workflow, "sponsor", "sam", limit=10)
lookup_records = session.query(LookupFileModel).all()
lookup_record = lookup_records[0]
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(4, len(lookup_data))
def test_some_full_text_queries(self): def test_some_full_text_queries(self):
spec = BaseTest.load_test_spec('enum_options_from_file') spec = BaseTest.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first() workflow = self.create_workflow('enum_options_from_file')
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first() processor = WorkflowProcessor(workflow)
lookup_table = LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME") processor.do_engine_steps()
results = LookupService._run_lookup_query(lookup_table, "medicines", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "", limit=10)
self.assertEquals(10, len(results), "Blank queries return everything, to the limit")
results = LookupService.lookup(workflow, "AllTheNames", "medicines", limit=10)
self.assertEquals(1, len(results), "words in the middle of label are detected.") self.assertEquals(1, len(results), "words in the middle of label are detected.")
self.assertEquals("The Medicines Company", results[0].label) self.assertEquals("The Medicines Company", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "UVA", limit=10)
self.assertEquals(10, len(results), "Blank queries return everything, to the limit")
results = LookupService._run_lookup_query(lookup_table, "UVA", limit=10)
self.assertEquals(1, len(results), "Beginning of label is found.") self.assertEquals(1, len(results), "Beginning of label is found.")
self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label) self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "uva", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "uva", limit=10)
self.assertEquals(1, len(results), "case does not matter.") self.assertEquals(1, len(results), "case does not matter.")
self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label) self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "medici", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "medici", limit=10)
self.assertEquals(1, len(results), "partial words are picked up.") self.assertEquals(1, len(results), "partial words are picked up.")
self.assertEquals("The Medicines Company", results[0].label) self.assertEquals("The Medicines Company", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "Genetics Savings", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "Genetics Savings", limit=10)
self.assertEquals(1, len(results), "multiple terms are picked up..") self.assertEquals(1, len(results), "multiple terms are picked up..")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label) self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "Genetics Sav", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "Genetics Sav", limit=10)
self.assertEquals(1, len(results), "prefix queries still work with partial terms") self.assertEquals(1, len(results), "prefix queries still work with partial terms")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label) self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "Gen Sav", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "Gen Sav", limit=10)
self.assertEquals(1, len(results), "prefix queries still work with ALL the partial terms") self.assertEquals(1, len(results), "prefix queries still work with ALL the partial terms")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label) self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "Inc", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "Inc", limit=10)
self.assertEquals(7, len(results), "short terms get multiple correct results.") self.assertEquals(7, len(results), "short terms get multiple correct results.")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label) self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "reaction design", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "reaction design", limit=10)
self.assertEquals(5, len(results), "all results come back for two terms.") self.assertEquals(5, len(results), "all results come back for two terms.")
self.assertEquals("Reaction Design", results[0].label, "Exact matches come first.") self.assertEquals("Reaction Design", results[0].label, "Exact matches come first.")
def test_prefer_exact_match(self): results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
spec = BaseTest.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
lookup_table = LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER",
"CUSTOMER_NAME")
results = LookupService._run_lookup_query(lookup_table, "1 Something", limit=10)
self.assertEquals("1 Something", results[0].label, "Exact matches are prefered") self.assertEquals("1 Something", results[0].label, "Exact matches are prefered")
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
self.assertEquals("1 Something", results[0].label, "special characters don't flake out")
# 1018 10000 Something Industry # 1018 10000 Something Industry
# 1019 1000 Something Industry # 1019 1000 Something Industry

View File

@ -334,8 +334,8 @@ class TestTasksApi(BaseTest):
workflow = self.get_workflow_api(workflow) workflow = self.get_workflow_api(workflow)
task = workflow.next_task task = workflow.next_task
field_id = task.form['fields'][0]['id'] field_id = task.form['fields'][0]['id']
rv = self.app.get('/v1.0/workflow/%i/task/%s/lookup/%s?query=%s&limit=5' % rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' %
(workflow.id, task.id, field_id, 'c'), # All records with a word that starts with 'c' (workflow.id, field_id, 'c'), # All records with a word that starts with 'c'
headers=self.logged_in_headers(), headers=self.logged_in_headers(),
content_type="application/json") content_type="application/json")
self.assert_success(rv) self.assert_success(rv)
@ -350,8 +350,8 @@ class TestTasksApi(BaseTest):
task = workflow.next_task task = workflow.next_task
field_id = task.form['fields'][0]['id'] field_id = task.form['fields'][0]['id']
# lb3dp is a user record in the mock ldap responses for tests. # lb3dp is a user record in the mock ldap responses for tests.
rv = self.app.get('/v1.0/workflow/%i/task/%s/lookup/%s?query=%s&limit=5' % rv = self.app.get('/v1.0/workflow/%s/lookup/%s?query=%s&limit=5' %
(workflow.id, task.id, field_id, 'lb3dp'), (workflow.id, field_id, 'lb3dp'),
headers=self.logged_in_headers(), headers=self.logged_in_headers(),
content_type="application/json") content_type="application/json")
self.assert_success(rv) self.assert_success(rv)

View File

@ -1,7 +1,5 @@
from tests.base_test import BaseTest from tests.base_test import BaseTest
from crc import session
from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel
from crc.services.lookup_service import LookupService
from crc.services.workflow_processor import WorkflowProcessor from crc.services.workflow_processor import WorkflowProcessor
from crc.services.workflow_service import WorkflowService from crc.services.workflow_service import WorkflowService
@ -72,36 +70,6 @@ class TestWorkflowService(BaseTest):
self.assertEquals('1000', options[0]['id']) self.assertEquals('1000', options[0]['id'])
self.assertEquals("UVA - INTERNAL - GM USE ONLY", options[0]['name']) self.assertEquals("UVA - INTERNAL - GM USE ONLY", options[0]['name'])
def test_create_lookup_file(self):
spec = self.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
lookup_records = session.query(LookupFileModel).all()
self.assertIsNotNone(lookup_records)
self.assertEqual(1, len(lookup_records))
lookup_record = lookup_records[0]
self.assertIsNotNone(lookup_record)
self.assertEquals("CUSTOMER_NUMBER", lookup_record.value_column)
self.assertEquals("CUSTOMER_NAME", lookup_record.label_column)
self.assertEquals("CUSTOMER_NAME", lookup_record.label_column)
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(28, len(lookup_data))
self.assertEquals("1000", lookup_data[0].value)
self.assertEquals("UVA - INTERNAL - GM USE ONLY", lookup_data[0].label)
# search_results = session.query(LookupDataModel).\
# filter(LookupDataModel.lookup_file_model_id == lookup_record.id).\
# filter(LookupDataModel.__ts_vector__.op('@@')(func.plainto_tsquery('INTERNAL'))).all()
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
self.assertEquals(1, len(search_results))
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("internal")).all()
self.assertEquals(1, len(search_results))
# This query finds results where a word starts with "bio"
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("bio:*")).all()
self.assertEquals(2, len(search_results))
def test_random_data_populate_form_on_auto_complete(self): def test_random_data_populate_form_on_auto_complete(self):
self.load_example_data() self.load_example_data()
workflow = self.create_workflow('enum_options_with_search') workflow = self.create_workflow('enum_options_with_search')