Faster lookup fields. We were parsing the spec each time to get details about how to search. We're just grabbing the workflow id and task id now and building that straight into the full text search index for faster lookups. Should be peppy.

Another speed improvement - data in the FileDataModel is deferred, and not queried until it is specifically used, as the new data structures need to use this model frequently.
This commit is contained in:
Dan Funk 2020-05-29 01:39:39 -04:00
parent 22bdb6c760
commit 11413838a7
13 changed files with 257 additions and 182 deletions

View File

@ -672,7 +672,7 @@ paths:
application/json:
schema:
$ref: "#/components/schemas/Workflow"
/workflow/{workflow_id}/task/{task_id}/lookup/{field_id}:
/workflow/{workflow_id}/lookup/{field_id}:
parameters:
- name: workflow_id
in: path
@ -681,13 +681,6 @@ paths:
schema:
type: integer
format: int32
- name: task_id
in: path
required: true
description: The id of the task
schema:
type: string
format: uuid
- name: field_id
in: path
required: true

View File

@ -219,26 +219,13 @@ def delete_workflow_spec_category(cat_id):
session.commit()
def lookup(workflow_id, task_id, field_id, query, limit):
def lookup(workflow_id, field_id, query, limit):
"""
given a field in a task, attempts to find the lookup table or function associated
with that field and runs a full-text query against it to locate the values and
labels that would be returned to a type-ahead box.
Tries to be fast, but first runs will be very slow.
"""
workflow_model = session.query(WorkflowModel).filter_by(id=workflow_id).first()
if not workflow_model:
raise ApiError("unknown_workflow", "No workflow found with id: %i" % workflow_id)
processor = WorkflowProcessor(workflow_model)
task_id = uuid.UUID(task_id)
spiff_task = processor.bpmn_workflow.get_task(task_id)
if not spiff_task:
raise ApiError("unknown_task", "No task with %s found in workflow: %i" % (task_id, workflow_id))
field = None
for f in spiff_task.task_spec.form.fields:
if f.id == field_id:
field = f
if not field:
raise ApiError("unknown_field", "No field named %s in task %s" % (task_id, spiff_task.task_spec.name))
lookup_data = LookupService.lookup(spiff_task, field, query, limit)
workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
lookup_data = LookupService.lookup(workflow, field_id, query, limit)
return LookupDataSchema(many=True).dump(lookup_data)

View File

@ -6,6 +6,7 @@ from marshmallow_enum import EnumField
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from sqlalchemy import func, Index
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import deferred
from crc import db, ma
@ -61,7 +62,7 @@ class FileDataModel(db.Model):
__tablename__ = 'file_data'
id = db.Column(db.Integer, primary_key=True)
md5_hash = db.Column(UUID(as_uuid=True), unique=False, nullable=False)
data = db.Column(db.LargeBinary)
data = deferred(db.Column(db.LargeBinary)) # Don't load it unless you have to.
version = db.Column(db.Integer, default=0)
date_created = db.Column(db.DateTime(timezone=True), default=func.now())
file_model_id = db.Column(db.Integer, db.ForeignKey('file.id'))
@ -127,25 +128,22 @@ class FileSchema(ma.Schema):
class LookupFileModel(db.Model):
"""Takes the content of a file (like a xlsx, or csv file) and creates a key/value
store that can be used for lookups and searches. This table contains the metadata,
so we know the version of the file that was used, and what key column, and value column
were used to generate this lookup table. ie, the same xls file might have multiple
lookup file models, if different keys and labels are used - or someone decides to
make a change. We need to handle full text search over the label and value columns,
and not every column, because we don't know how much information will be in there. """
"""Gives us a quick way to tell what kind of lookup is set on a form field.
Connected to the file data model, so that if a new version of the same file is
created, we can update the listing."""
#fixme: What happens if they change the file associated with a lookup field?
__tablename__ = 'lookup_file'
id = db.Column(db.Integer, primary_key=True)
label_column = db.Column(db.String)
value_column = db.Column(db.String)
workflow_spec_id = db.Column(db.String)
field_id = db.Column(db.String)
is_ldap = db.Column(db.Boolean) # Allows us to run an ldap query instead of a db lookup.
file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan")
class LookupDataModel(db.Model):
__tablename__ = 'lookup_data'
id = db.Column(db.Integer, primary_key=True)
lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id'))
lookup_file_model = db.relationship(LookupFileModel)
value = db.Column(db.String)
label = db.Column(db.String)
# In the future, we might allow adding an additional "search" column if we want to search things not in label.

View File

@ -204,22 +204,27 @@ class FileService(object):
return results
@staticmethod
def get_spec_data_files(workflow_spec_id, workflow_id=None):
def get_spec_data_files(workflow_spec_id, workflow_id=None, name=None):
"""Returns all the FileDataModels related to a workflow specification.
If a workflow is specified, returns the version of the spec relatted
to that workflow, otherwise, returns the lastes files."""
if workflow_id:
files = session.query(FileDataModel) \
.join(WorkflowSpecDependencyFile) \
.filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \
.order_by(FileDataModel.id).all()
return files
query = session.query(FileDataModel) \
.join(WorkflowSpecDependencyFile) \
.filter(WorkflowSpecDependencyFile.workflow_id == workflow_id) \
.order_by(FileDataModel.id)
if name:
query = query.join(FileModel).filter(FileModel.name == name)
return query.all()
else:
"""Returns all the latest files related to a workflow specification"""
file_models = FileService.get_files(workflow_spec_id=workflow_spec_id)
latest_data_files = []
for file_model in file_models:
latest_data_files.append(FileService.get_file_data(file_model.id))
if name and file_model.name == name:
latest_data_files.append(FileService.get_file_data(file_model.id))
elif not name:
latest_data_files.append(FileService.get_file_data(file_model.id))
return latest_data_files
@staticmethod

View File

@ -1,4 +1,5 @@
import logging
import re
from pandas import ExcelFile
from sqlalchemy import func, desc
@ -8,8 +9,11 @@ from crc import db
from crc.api.common import ApiError
from crc.models.api_models import Task
from crc.models.file import FileDataModel, LookupFileModel, LookupDataModel
from crc.models.workflow import WorkflowModel, WorkflowSpecDependencyFile
from crc.services.file_service import FileService
from crc.services.ldap_service import LdapService
from crc.services.workflow_processor import WorkflowProcessor
class TSRank(GenericFunction):
package = 'full_text'
@ -31,33 +35,56 @@ class LookupService(object):
"""
@staticmethod
def lookup(spiff_task, field, query, limit):
"""Executes the lookup for the given field."""
if field.type != Task.FIELD_TYPE_AUTO_COMPLETE:
raise ApiError.from_task("invalid_field_type",
"Field '%s' must be an autocomplete field to use lookups." % field.label,
task=spiff_task)
# If this field has an associated options file, then do the lookup against that field.
if field.has_property(Task.PROP_OPTIONS_FILE):
lookup_table = LookupService.get_lookup_table(spiff_task, field)
return LookupService._run_lookup_query(lookup_table, query, limit)
# If this is a ldap lookup, use the ldap service to provide the fields to return.
elif field.has_property(Task.PROP_LDAP_LOOKUP):
return LookupService._run_ldap_query(query, limit)
else:
raise ApiError.from_task("unknown_lookup_option",
"Lookup supports using spreadsheet options or ldap options, and neither was"
"provided.")
def get_lookup_model(spiff_task, field):
workflow_id = spiff_task.workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY]
workflow = db.session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
return LookupService.__get_lookup_model(workflow, field.id)
@staticmethod
def get_lookup_table(spiff_task, field):
""" Checks to see if the options are provided in a separate lookup table associated with the
def __get_lookup_model(workflow, field_id):
lookup_model = db.session.query(LookupFileModel) \
.filter(LookupFileModel.workflow_spec_id == workflow.workflow_spec_id) \
.filter(LookupFileModel.field_id == field_id).first()
# one more quick query, to see if the lookup file is still related to this workflow.
# if not, we need to rebuild the lookup table.
is_current = False
if lookup_model:
is_current = db.session.query(WorkflowSpecDependencyFile).\
filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).count()
if not is_current:
if lookup_model:
db.session.delete(lookup_model)
# Very very very expensive, but we don't know need this till we do.
lookup_model = LookupService.create_lookup_model(workflow, field_id)
return lookup_model
@staticmethod
def lookup(workflow, field_id, query, limit):
lookup_model = LookupService.__get_lookup_model(workflow, field_id)
if lookup_model.is_ldap:
return LookupService._run_ldap_query(query, limit)
else:
return LookupService._run_lookup_query(lookup_model, query, limit)
@staticmethod
def create_lookup_model(workflow_model, field_id):
"""
This is all really expensive, but should happen just once (per file change).
Checks to see if the options are provided in a separate lookup table associated with the
workflow, and if so, assures that data exists in the database, and return a model than can be used
to locate that data.
Returns: an array of LookupData, suitable for returning to the api.
"""
processor = WorkflowProcessor(workflow_model) # VERY expensive, Ludicrous for lookup / type ahead
spiff_task, field = processor.find_task_and_field_by_field_id(field_id)
if field.has_property(Task.PROP_OPTIONS_FILE):
if not field.has_property(Task.PROP_OPTIONS_VALUE_COLUMN) or \
not field.has_property(Task.PROP_OPTIONS_LABEL_COL):
@ -72,52 +99,67 @@ class LookupService(object):
file_name = field.get_property(Task.PROP_OPTIONS_FILE)
value_column = field.get_property(Task.PROP_OPTIONS_VALUE_COLUMN)
label_column = field.get_property(Task.PROP_OPTIONS_LABEL_COL)
data_model = FileService.get_workflow_file_data(spiff_task.workflow, file_name)
lookup_model = LookupService.get_lookup_table_from_data_model(data_model, value_column, label_column)
return lookup_model
latest_files = FileService.get_spec_data_files(workflow_spec_id=workflow_model.workflow_spec_id,
workflow_id=workflow_model.id,
name=file_name)
if len(latest_files) < 1:
raise ApiError("missing_file", "Unable to locate the lookup data file '%s'" % file_name)
else:
data_model = latest_files[0]
lookup_model = LookupService.build_lookup_table(data_model, value_column, label_column,
workflow_model.workflow_spec_id, field_id)
elif field.has_property(Task.PROP_LDAP_LOOKUP):
lookup_model = LookupFileModel(workflow_spec_id=workflow_model.workflow_spec_id,
field_id=field_id,
is_ldap=True)
else:
raise ApiError("unknown_lookup_option",
"Lookup supports using spreadsheet options or ldap options, and neither "
"was provided.")
db.session.add(lookup_model)
db.session.commit()
return lookup_model
@staticmethod
def get_lookup_table_from_data_model(data_model: FileDataModel, value_column, label_column):
def build_lookup_table(data_model: FileDataModel, value_column, label_column, workflow_spec_id, field_id):
""" In some cases the lookup table can be very large. This method will add all values to the database
in a way that can be searched and returned via an api call - rather than sending the full set of
options along with the form. It will only open the file and process the options if something has
changed. """
xls = ExcelFile(data_model.data)
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
if value_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
value_column))
if label_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
label_column))
lookup_model = db.session.query(LookupFileModel) \
.filter(LookupFileModel.file_data_model_id == data_model.id) \
.filter(LookupFileModel.value_column == value_column) \
.filter(LookupFileModel.label_column == label_column).first()
if not lookup_model:
xls = ExcelFile(data_model.data)
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
if value_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
value_column))
if label_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
label_column))
lookup_model = LookupFileModel(label_column=label_column, value_column=value_column,
file_data_model_id=data_model.id)
db.session.add(lookup_model)
for index, row in df.iterrows():
lookup_data = LookupDataModel(lookup_file_model=lookup_model,
value=row[value_column],
label=row[label_column],
data=row.to_json())
db.session.add(lookup_data)
db.session.commit()
lookup_model = LookupFileModel(workflow_spec_id=workflow_spec_id,
field_id=field_id,
file_data_model_id=data_model.id,
is_ldap=False)
db.session.add(lookup_model)
for index, row in df.iterrows():
lookup_data = LookupDataModel(lookup_file_model=lookup_model,
value=row[value_column],
label=row[label_column],
data=row.to_json())
db.session.add(lookup_data)
db.session.commit()
return lookup_model
@staticmethod
def _run_lookup_query(lookup_file_model, query, limit):
db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
query = re.sub('[^A-Za-z0-9 ]+', '', query)
print("Query: " + query)
query = query.strip()
if len(query) > 0:
if ' ' in query:

View File

@ -100,7 +100,7 @@ class WorkflowProcessor(object):
STUDY_ID_KEY = "study_id"
VALIDATION_PROCESS_KEY = "validate_only"
def __init__(self, workflow_model: WorkflowModel, soft_reset=False, hard_reset=False):
def __init__(self, workflow_model: WorkflowModel, soft_reset=False, hard_reset=False, validate_only=False):
"""Create a Workflow Processor based on the serialized information available in the workflow model.
If soft_reset is set to true, it will try to use the latest version of the workflow specification.
If hard_reset is set to true, it will create a new Workflow, but embed the data from the last
@ -121,6 +121,7 @@ class WorkflowProcessor(object):
self.workflow_spec_id = workflow_model.workflow_spec_id
try:
self.bpmn_workflow = self.__get_bpmn_workflow(workflow_model, spec)
self.bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = validate_only
self.bpmn_workflow.script_engine = self._script_engine
if self.WORKFLOW_ID_KEY not in self.bpmn_workflow.data:
@ -402,3 +403,13 @@ class WorkflowProcessor(object):
for nav_item in self.bpmn_workflow.get_nav_list():
if nav_item['task_id'] == task.id:
return nav_item
def find_task_and_field_by_field_id(self, field_id):
"""Tracks down a form field by name in the workflow spec,
only looks at ready tasks. Returns a tuple of the task, and form"""
for spiff_task in self.bpmn_workflow.get_tasks(SpiffTask.READY):
if hasattr(spiff_task.task_spec, "form"):
for field in spiff_task.task_spec.form.fields:
if field.id == field_id:
return spiff_task, field
raise ApiError("invalid_field", "Unable to find a ready task with field: %s" % field_id)

View File

@ -18,6 +18,7 @@ from crc.api.common import ApiError
from crc.models.api_models import Task, MultiInstanceType
from crc.models.file import LookupDataModel
from crc.models.stats import TaskEventModel
from crc.models.workflow import WorkflowModel, WorkflowStatus
from crc.services.file_service import FileService
from crc.services.lookup_service import LookupService
from crc.services.workflow_processor import WorkflowProcessor, CustomBpmnScriptEngine
@ -41,18 +42,20 @@ class WorkflowService(object):
"""Runs a spec through it's paces to see if it results in any errors. Not fool-proof, but a good
sanity check."""
spec = WorkflowProcessor.get_spec(
file_data_models=FileService.get_spec_data_files(workflow_spec_id=spec_id),
workflow_spec_id=spec_id)
bpmn_workflow = BpmnWorkflow(spec, script_engine=CustomBpmnScriptEngine())
bpmn_workflow.data[WorkflowProcessor.STUDY_ID_KEY] = 1
bpmn_workflow.data[WorkflowProcessor.WORKFLOW_ID_KEY] = spec_id
bpmn_workflow.data[WorkflowProcessor.VALIDATION_PROCESS_KEY] = True
workflow_model = WorkflowModel(status=WorkflowStatus.not_started,
workflow_spec_id=spec_id,
last_updated=datetime.now(),
study_id=1)
try:
processor = WorkflowProcessor(workflow_model, validate_only=True)
except WorkflowException as we:
raise ApiError.from_task_spec("workflow_execution_exception", str(we),
we.sender)
while not bpmn_workflow.is_completed():
while not processor.bpmn_workflow.is_completed():
try:
bpmn_workflow.do_engine_steps()
tasks = bpmn_workflow.get_tasks(SpiffTask.READY)
processor.bpmn_workflow.do_engine_steps()
tasks = processor.bpmn_workflow.get_tasks(SpiffTask.READY)
for task in tasks:
task_api = WorkflowService.spiff_task_to_api_task(
task,
@ -60,8 +63,10 @@ class WorkflowService(object):
WorkflowService.populate_form_with_random_data(task, task_api)
task.complete()
except WorkflowException as we:
db.session.delete(workflow_model)
raise ApiError.from_task_spec("workflow_execution_exception", str(we),
we.sender)
db.session.delete(workflow_model)
@staticmethod
def populate_form_with_random_data(task, task_api):
@ -84,7 +89,7 @@ class WorkflowService(object):
" with no options" % field.id,
task)
elif field.type == "autocomplete":
lookup_model = LookupService.get_lookup_table(task, field)
lookup_model = LookupService.get_lookup_model(task, field)
if field.has_property(Task.PROP_LDAP_LOOKUP):
form_data[field.id] = {
"label": "dhf8r",
@ -250,12 +255,12 @@ class WorkflowService(object):
@staticmethod
def process_options(spiff_task, field):
lookup_model = LookupService.get_lookup_table(spiff_task, field)
# If this is an auto-complete field, do not populate options, a lookup will happen later.
if field.type == Task.FIELD_TYPE_AUTO_COMPLETE:
pass
else:
elif field.has_property(Task.PROP_OPTIONS_FILE):
lookup_model = LookupService.get_lookup_model(spiff_task, field)
data = db.session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_model).all()
if not hasattr(field, 'options'):
field.options = []
@ -286,3 +291,4 @@ class WorkflowService(object):
)
db.session.add(task_event)
db.session.commit()

View File

@ -0,0 +1,36 @@
"""empty message
Revision ID: 5064b72284b7
Revises: bec71f7dc652
Create Date: 2020-05-28 23:54:45.623361
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '5064b72284b7'
down_revision = 'bec71f7dc652'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('lookup_file', sa.Column('field_id', sa.String(), nullable=True))
op.add_column('lookup_file', sa.Column('is_ldap', sa.Boolean(), nullable=True))
op.add_column('lookup_file', sa.Column('workflow_spec_id', sa.String(), nullable=True))
op.drop_column('lookup_file', 'value_column')
op.drop_column('lookup_file', 'label_column')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('lookup_file', sa.Column('label_column', sa.VARCHAR(), autoincrement=False, nullable=True))
op.add_column('lookup_file', sa.Column('value_column', sa.VARCHAR(), autoincrement=False, nullable=True))
op.drop_column('lookup_file', 'workflow_spec_id')
op.drop_column('lookup_file', 'is_ldap')
op.drop_column('lookup_file', 'field_id')
# ### end Alembic commands ###

View File

@ -1,90 +1,119 @@
import os
from tests.base_test import BaseTest
from crc import session
from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel
from crc.services.file_service import FileService
from crc.api.common import ApiError
from crc import session, app
from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel, CONTENT_TYPES
from crc.services.lookup_service import LookupService
from crc.services.workflow_processor import WorkflowProcessor
class TestLookupService(BaseTest):
def test_create_lookup_file_multiple_times_does_not_update_database(self):
spec = BaseTest.load_test_spec('enum_options_from_file')
def test_lookup_returns_good_error_on_bad_field(self):
spec = BaseTest.load_test_spec('enum_options_with_search')
workflow = self.create_workflow('enum_options_with_search')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
with self.assertRaises(ApiError):
LookupService.lookup(workflow, "not_the_right_field", "sam", limit=10)
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
def test_lookup_table_is_not_created_more_than_once(self):
spec = BaseTest.load_test_spec('enum_options_with_search')
workflow = self.create_workflow('enum_options_with_search')
LookupService.lookup(workflow, "sponsor", "sam", limit=10)
LookupService.lookup(workflow, "sponsor", "something", limit=10)
LookupService.lookup(workflow, "sponsor", "blah", limit=10)
lookup_records = session.query(LookupFileModel).all()
self.assertIsNotNone(lookup_records)
self.assertEqual(1, len(lookup_records))
lookup_record = lookup_records[0]
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(28, len(lookup_data))
# Using the same table with different lookup lable or value, does create additional records.
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NAME", "CUSTOMER_NUMBER")
def test_updates_to_file_cause_lookup_rebuild(self):
spec = BaseTest.load_test_spec('enum_options_with_search')
workflow = self.create_workflow('enum_options_with_search')
file_model = session.query(FileModel).filter(FileModel.name == "sponsors.xls").first()
LookupService.lookup(workflow, "sponsor", "sam", limit=10)
lookup_records = session.query(LookupFileModel).all()
self.assertIsNotNone(lookup_records)
self.assertEqual(2, len(lookup_records))
self.assertEqual(1, len(lookup_records))
lookup_record = lookup_records[0]
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(28, len(lookup_data))
# Update the workflow specification file.
file_path = os.path.join(app.root_path, '..', 'tests', 'data',
'enum_options_with_search', 'sponsors_modified.xls')
file = open(file_path, 'rb')
FileService.update_file(file_model, file.read(), CONTENT_TYPES['xls'])
file.close()
# restart the workflow, so it can pick up the changes.
WorkflowProcessor(workflow, soft_reset=True)
LookupService.lookup(workflow, "sponsor", "sam", limit=10)
lookup_records = session.query(LookupFileModel).all()
lookup_record = lookup_records[0]
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(4, len(lookup_data))
def test_some_full_text_queries(self):
spec = BaseTest.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
lookup_table = LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
workflow = self.create_workflow('enum_options_from_file')
processor = WorkflowProcessor(workflow)
processor.do_engine_steps()
results = LookupService._run_lookup_query(lookup_table, "medicines", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "", limit=10)
self.assertEquals(10, len(results), "Blank queries return everything, to the limit")
results = LookupService.lookup(workflow, "AllTheNames", "medicines", limit=10)
self.assertEquals(1, len(results), "words in the middle of label are detected.")
self.assertEquals("The Medicines Company", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "", limit=10)
self.assertEquals(10, len(results), "Blank queries return everything, to the limit")
results = LookupService._run_lookup_query(lookup_table, "UVA", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "UVA", limit=10)
self.assertEquals(1, len(results), "Beginning of label is found.")
self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "uva", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "uva", limit=10)
self.assertEquals(1, len(results), "case does not matter.")
self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "medici", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "medici", limit=10)
self.assertEquals(1, len(results), "partial words are picked up.")
self.assertEquals("The Medicines Company", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "Genetics Savings", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "Genetics Savings", limit=10)
self.assertEquals(1, len(results), "multiple terms are picked up..")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "Genetics Sav", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "Genetics Sav", limit=10)
self.assertEquals(1, len(results), "prefix queries still work with partial terms")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "Gen Sav", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "Gen Sav", limit=10)
self.assertEquals(1, len(results), "prefix queries still work with ALL the partial terms")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "Inc", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "Inc", limit=10)
self.assertEquals(7, len(results), "short terms get multiple correct results.")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "reaction design", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "reaction design", limit=10)
self.assertEquals(5, len(results), "all results come back for two terms.")
self.assertEquals("Reaction Design", results[0].label, "Exact matches come first.")
def test_prefer_exact_match(self):
spec = BaseTest.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
lookup_table = LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER",
"CUSTOMER_NAME")
results = LookupService._run_lookup_query(lookup_table, "1 Something", limit=10)
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
self.assertEquals("1 Something", results[0].label, "Exact matches are prefered")
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
self.assertEquals("1 Something", results[0].label, "special characters don't flake out")
# 1018 10000 Something Industry
# 1019 1000 Something Industry

View File

@ -334,8 +334,8 @@ class TestTasksApi(BaseTest):
workflow = self.get_workflow_api(workflow)
task = workflow.next_task
field_id = task.form['fields'][0]['id']
rv = self.app.get('/v1.0/workflow/%i/task/%s/lookup/%s?query=%s&limit=5' %
(workflow.id, task.id, field_id, 'c'), # All records with a word that starts with 'c'
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' %
(workflow.id, field_id, 'c'), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
self.assert_success(rv)
@ -350,8 +350,8 @@ class TestTasksApi(BaseTest):
task = workflow.next_task
field_id = task.form['fields'][0]['id']
# lb3dp is a user record in the mock ldap responses for tests.
rv = self.app.get('/v1.0/workflow/%i/task/%s/lookup/%s?query=%s&limit=5' %
(workflow.id, task.id, field_id, 'lb3dp'),
rv = self.app.get('/v1.0/workflow/%s/lookup/%s?query=%s&limit=5' %
(workflow.id, field_id, 'lb3dp'),
headers=self.logged_in_headers(),
content_type="application/json")
self.assert_success(rv)

View File

@ -1,7 +1,5 @@
from tests.base_test import BaseTest
from crc import session
from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel
from crc.services.lookup_service import LookupService
from crc.services.workflow_processor import WorkflowProcessor
from crc.services.workflow_service import WorkflowService
@ -72,36 +70,6 @@ class TestWorkflowService(BaseTest):
self.assertEquals('1000', options[0]['id'])
self.assertEquals("UVA - INTERNAL - GM USE ONLY", options[0]['name'])
def test_create_lookup_file(self):
spec = self.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
lookup_records = session.query(LookupFileModel).all()
self.assertIsNotNone(lookup_records)
self.assertEqual(1, len(lookup_records))
lookup_record = lookup_records[0]
self.assertIsNotNone(lookup_record)
self.assertEquals("CUSTOMER_NUMBER", lookup_record.value_column)
self.assertEquals("CUSTOMER_NAME", lookup_record.label_column)
self.assertEquals("CUSTOMER_NAME", lookup_record.label_column)
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(28, len(lookup_data))
self.assertEquals("1000", lookup_data[0].value)
self.assertEquals("UVA - INTERNAL - GM USE ONLY", lookup_data[0].label)
# search_results = session.query(LookupDataModel).\
# filter(LookupDataModel.lookup_file_model_id == lookup_record.id).\
# filter(LookupDataModel.__ts_vector__.op('@@')(func.plainto_tsquery('INTERNAL'))).all()
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
self.assertEquals(1, len(search_results))
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("internal")).all()
self.assertEquals(1, len(search_results))
# This query finds results where a word starts with "bio"
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("bio:*")).all()
self.assertEquals(2, len(search_results))
def test_random_data_populate_form_on_auto_complete(self):
self.load_example_data()
workflow = self.create_workflow('enum_options_with_search')