Lookup Service now raises exact matches to the top. Very hackish, but it works.

This commit is contained in:
Dan Funk 2020-05-27 14:36:10 -04:00
parent 1d52a0214a
commit 77f72e408f
4 changed files with 49 additions and 24 deletions

View File

@ -1,3 +1,5 @@
import logging
from pandas import ExcelFile
from sqlalchemy import func, desc
from sqlalchemy.sql.functions import GenericFunction
@ -117,20 +119,31 @@ class LookupService(object):
db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
query = query.strip()
if len(query) > 1:
if len(query) > 0:
if ' ' in query:
terms = query.split(' ')
new_terms = []
new_terms = ["'%s'" % query]
for t in terms:
new_terms.append(t + ":*")
query = '|'.join(new_terms)
new_terms.append("%s:*" % t)
new_query = ' | '.join(new_terms)
else:
query = "%s:*" % query
db_query = db_query.filter(LookupDataModel.label.match(query))
db_query = db_query.order_by(desc(func.full_text.ts_rank(
func.to_tsvector('simple', LookupDataModel.label),
func.to_tsquery('simple', query))))
return db_query.limit(limit).all()
new_query = "%s:*" % query
# Run the full text query
db_query = db_query.filter(LookupDataModel.label.match(new_query))
# But hackishly order by like, which does a good job of
# pulling more relevant matches to the top.
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
#ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC
# db_query = db_query.order_by(desc(func.full_text.ts_rank(
# func.to_tsvector(LookupDataModel.label),
# func.to_tsquery(query))))
from sqlalchemy.dialects import postgresql
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
result = db_query.limit(limit).all()
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
return result
@staticmethod
def _run_ldap_query(query, limit):

View File

@ -22,10 +22,9 @@ from crc.models.user import UserModel
from crc import app, db, session
from example_data import ExampleDataLoader
# UNCOMMENT THIS FOR DEBUGGING SQL ALCHEMY QUERIES
# import logging
# logging.basicConfig()
# logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
#UNCOMMENT THIS FOR DEBUGGING SQL ALCHEMY QUERIES
import logging
logging.basicConfig()
class BaseTest(unittest.TestCase):

View File

@ -9,10 +9,12 @@ from crc.services.lookup_service import LookupService
class TestLookupService(BaseTest):
def test_create_lookup_file_multiple_times_does_not_update_database(self):
spec = self.load_test_spec('enum_options_from_file')
spec = BaseTest.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
@ -21,18 +23,16 @@ class TestLookupService(BaseTest):
self.assertEqual(1, len(lookup_records))
lookup_record = lookup_records[0]
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(23, len(lookup_data))
self.assertEquals(28, len(lookup_data))
# Using the same table with different lookup lable or value, does create additional records.
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NAME", "CUSTOMER_NUMBER")
lookup_records = session.query(LookupFileModel).all()
self.assertIsNotNone(lookup_records)
self.assertEqual(2, len(lookup_records))
FileService.delete_file(file_model.id) ## Assure we can delete the file.
def test_some_full_text_queries(self):
self.load_test_spec('enum_options_from_file')
spec = BaseTest.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
self.assertIsNotNone(file_model)
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
lookup_table = LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
@ -73,11 +73,24 @@ class TestLookupService(BaseTest):
results = LookupService._run_lookup_query(lookup_table, "reaction design", limit=10)
self.assertEquals(5, len(results), "all results come back for two terms.")
self.assertEquals("Reaction Design", results[0].label, "The first result is the most relevant")
self.assertEquals("Reaction Then Design ", results[1].label, "The first result is the most relevant")
self.assertEquals("Design Then Reaction", results[2].label, "The first result is the most relevant")
self.assertEquals("Just Reaction", results[3].label, "The first result is the most relevant")
self.assertEquals("Just Design", results[4].label, "The first result is the most relevant")
self.assertEquals("Reaction Design", results[0].label, "Exact matches come first.")
def test_prefer_exact_match(self):
spec = BaseTest.load_test_spec('enum_options_from_file')
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
lookup_table = LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER",
"CUSTOMER_NAME")
results = LookupService._run_lookup_query(lookup_table, "1 Something", limit=10)
self.assertEquals("1 Something", results[0].label, "Exact matches are prefered")
# 1018 10000 Something Industry
# 1019 1000 Something Industry
# 1020 1 Something Industry
# 1021 10 Something Industry
# 1022 10000 Something Industry
# Fixme: Stop words are taken into account on the query side, and haven't found a fix yet.
#results = WorkflowService.run_lookup_query(lookup_table.id, "in", limit=10)