I may have finally wrapped my head around full text search in python. Now properly using an index based on simple rather than english dictionary which has far fewer stop words and stemming processes and plays much better to the type ahead search we are trying to provide.

Stop words are no longer excluded, so "other" is a valid search and gets a result.
This commit is contained in:
Dan Funk 2020-08-13 18:13:41 -04:00
parent c7e208b641
commit 9a5c1d7cfb
4 changed files with 16 additions and 8 deletions

View File

@ -166,10 +166,12 @@ class LookupDataModel(db.Model):
# query with:
# search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
__ts_vector__ = func.to_tsvector('simple', label)
__table_args__ = (
Index(
'ix_lookupdata_tsv',
func.to_tsvector('simple', label), # Use simple, not english to keep stop words in place.
__ts_vector__, # Use simple, not english to keep stop words in place.
postgresql_using='gin'
),
)

View File

@ -181,20 +181,22 @@ class LookupService(object):
if len(query) > 0:
if ' ' in query:
terms = query.split(' ')
new_terms = ["'%s'" % query]
new_terms = []
for t in terms:
new_terms.append("%s:*" % t)
new_query = ' | '.join(new_terms)
new_query = ' & '.join(new_terms)
new_query = "'%s' | %s" % (query, new_query)
else:
new_query = "%s:*" % query
# Run the full text query
db_query = db_query.filter(LookupDataModel.label.match(new_query))
# But hackishly order by like, which does a good job of
# pulling more relevant matches to the top.
db_query = db_query.filter(
LookupDataModel.__ts_vector__.match(new_query, postgresql_regconfig='simple'))
# Hackishly order by like, which does a good job of pulling more relevant matches to the top.
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
logging.info(db_query)
result = db_query.limit(limit).all()
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
return result

View File

@ -81,6 +81,10 @@ class TestLookupService(BaseTest):
results = LookupService.lookup(workflow, "AllTheNames", "", limit=10)
self.assertEqual(10, len(results), "Blank queries return everything, to the limit")
results = LookupService.lookup(workflow, "AllTheNames", "other", limit=10)
self.assertEqual("Other", results[0].label, "Can't find the word 'other', even through it is there.")
results = LookupService.lookup(workflow, "AllTheNames", "medicines", limit=10)
self.assertEqual(1, len(results), "words in the middle of label are detected.")
self.assertEqual("The Medicines Company", results[0].label)
@ -118,7 +122,7 @@ class TestLookupService(BaseTest):
self.assertEqual("Reaction Design", results[0].label, "Exact matches come first.")
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
self.assertEqual("1 Something", results[0].label, "Exact matches are prefered")
self.assertEqual("1 Something", results[0].label, "Exact matches are preferred")
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
self.assertEqual("1 Something", results[0].label, "special characters don't flake out")