better overall search results for type ahead. Still dealing with stop words failing.
This commit is contained in:
parent
65b29e1a9d
commit
b5b46b7c2c
|
@ -201,15 +201,6 @@ def lookup(workflow_id, task_id, field_id, query, limit):
|
|||
if not field:
|
||||
raise ApiError("unknown_field", "No field named %s in task %s" % (task_id, spiff_task.task_spec.name))
|
||||
|
||||
lookup_model = WorkflowService.get_lookup_table(spiff_task, field)
|
||||
db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_model)
|
||||
|
||||
query = query.strip()
|
||||
if(len(query) > 1):
|
||||
if(' ' in query):
|
||||
query = ':* ||'.join(query.split(' '))
|
||||
db_query = db_query.filter(LookupDataModel.label.match("%s:*" % query)).limit(limit)
|
||||
else:
|
||||
db_query = db_query.filter(LookupDataModel.label.match("%s:*" % query)).limit(limit)
|
||||
|
||||
return LookupDataSchema(many=True).dump(db_query.all())
|
||||
lookup_table = WorkflowService.get_lookup_table(spiff_task, field)
|
||||
lookup_data = WorkflowService.run_lookup_query(lookup_table.id, query, limit)
|
||||
return LookupDataSchema(many=True).dump(lookup_data)
|
|
@ -127,7 +127,7 @@ class LookupDataModel(db.Model):
|
|||
__table_args__ = (
|
||||
Index(
|
||||
'ix_lookupdata_tsv',
|
||||
func.to_tsvector('english', label),
|
||||
func.to_tsvector('simple', label), # Use simple, not english to keep stop words in place.
|
||||
postgresql_using='gin'
|
||||
),
|
||||
)
|
||||
|
|
|
@ -5,6 +5,7 @@ from SpiffWorkflow.bpmn.workflow import BpmnWorkflow
|
|||
from SpiffWorkflow.dmn.specs.BuisnessRuleTask import BusinessRuleTask
|
||||
from SpiffWorkflow.specs import CancelTask, StartTask
|
||||
from pandas import ExcelFile
|
||||
from sqlalchemy import func
|
||||
|
||||
from crc import db
|
||||
from crc.api.common import ApiError
|
||||
|
@ -205,3 +206,24 @@ class WorkflowService(object):
|
|||
db.session.commit()
|
||||
|
||||
return lookup_model
|
||||
|
||||
@staticmethod
|
||||
def run_lookup_query(lookup_file_id, query, limit):
|
||||
db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model_id == lookup_file_id)
|
||||
|
||||
query = query.strip()
|
||||
if len(query) > 1:
|
||||
if ' ' in query:
|
||||
terms = query.split(' ')
|
||||
query = ""
|
||||
new_terms = []
|
||||
for t in terms:
|
||||
new_terms.append(t + ":*")
|
||||
query = '|'.join(new_terms)
|
||||
else:
|
||||
query = "%s:*" % query
|
||||
db_query = db_query.filter(LookupDataModel.label.match(query))
|
||||
|
||||
# db_query = db_query.filter(text("lookup_data.label @@ to_tsquery('simple', '%s')" % query))
|
||||
|
||||
return db_query.limit(limit).all()
|
|
@ -129,5 +129,51 @@ class TestWorkflowService(BaseTest):
|
|||
self.assertIsNotNone(lookup_records)
|
||||
self.assertEqual(2, len(lookup_records))
|
||||
|
||||
def test_some_queries(self):
|
||||
pass
|
||||
def test_some_full_text_queries(self):
|
||||
self.load_test_spec('enum_options_from_file')
|
||||
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
|
||||
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
|
||||
lookup_table = WorkflowService._get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
|
||||
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_table).all()
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "medicines", limit=10)
|
||||
self.assertEquals(1, len(results), "words in the middle of label are detected.")
|
||||
self.assertEquals("The Medicines Company", results[0].label)
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "", limit=10)
|
||||
self.assertEquals(10, len(results), "Blank queries return everything, to the limit")
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "UVA", limit=10)
|
||||
self.assertEquals(1, len(results), "Beginning of label is found.")
|
||||
self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label)
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "uva", limit=10)
|
||||
self.assertEquals(1, len(results), "case does not matter.")
|
||||
self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label)
|
||||
|
||||
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "medici", limit=10)
|
||||
self.assertEquals(1, len(results), "partial words are picked up.")
|
||||
self.assertEquals("The Medicines Company", results[0].label)
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "Genetics Savings", limit=10)
|
||||
self.assertEquals(1, len(results), "multiple terms are picked up..")
|
||||
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "Genetics Sav", limit=10)
|
||||
self.assertEquals(1, len(results), "prefix queries still work with partial terms")
|
||||
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "Gen Sav", limit=10)
|
||||
self.assertEquals(1, len(results), "prefix queries still work with ALL the partial terms")
|
||||
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
|
||||
|
||||
results = WorkflowService.run_lookup_query(lookup_table.id, "Inc", limit=10)
|
||||
self.assertEquals(7, len(results), "short terms get multiple correct results.")
|
||||
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
|
||||
|
||||
# Fixme: Stop words are taken into account on the query side, and haven't found a fix yet.
|
||||
#results = WorkflowService.run_lookup_query(lookup_table.id, "in", limit=10)
|
||||
#self.assertEquals(7, len(results), "stop words are not removed.")
|
||||
#self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
|
||||
|
|
Loading…
Reference in New Issue