I may have finally wrapped my head around full text search in python. Now properly using an index based on simple rather than english dictionary which has far fewer stop words and stemming processes and plays much better to the type ahead search we are trying to provide.
Stop words are no longer excluded, so "other" is a valid search and gets a result.
This commit is contained in:
parent
c7e208b641
commit
9a5c1d7cfb
|
@ -166,10 +166,12 @@ class LookupDataModel(db.Model):
|
||||||
# query with:
|
# query with:
|
||||||
# search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
|
# search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
|
||||||
|
|
||||||
|
__ts_vector__ = func.to_tsvector('simple', label)
|
||||||
|
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
Index(
|
Index(
|
||||||
'ix_lookupdata_tsv',
|
'ix_lookupdata_tsv',
|
||||||
func.to_tsvector('simple', label), # Use simple, not english to keep stop words in place.
|
__ts_vector__, # Use simple, not english to keep stop words in place.
|
||||||
postgresql_using='gin'
|
postgresql_using='gin'
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
|
@ -181,20 +181,22 @@ class LookupService(object):
|
||||||
if len(query) > 0:
|
if len(query) > 0:
|
||||||
if ' ' in query:
|
if ' ' in query:
|
||||||
terms = query.split(' ')
|
terms = query.split(' ')
|
||||||
new_terms = ["'%s'" % query]
|
new_terms = []
|
||||||
for t in terms:
|
for t in terms:
|
||||||
new_terms.append("%s:*" % t)
|
new_terms.append("%s:*" % t)
|
||||||
new_query = ' | '.join(new_terms)
|
new_query = ' & '.join(new_terms)
|
||||||
|
new_query = "'%s' | %s" % (query, new_query)
|
||||||
else:
|
else:
|
||||||
new_query = "%s:*" % query
|
new_query = "%s:*" % query
|
||||||
|
|
||||||
# Run the full text query
|
db_query = db_query.filter(
|
||||||
db_query = db_query.filter(LookupDataModel.label.match(new_query))
|
LookupDataModel.__ts_vector__.match(new_query, postgresql_regconfig='simple'))
|
||||||
# But hackishly order by like, which does a good job of
|
|
||||||
# pulling more relevant matches to the top.
|
# Hackishly order by like, which does a good job of pulling more relevant matches to the top.
|
||||||
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
|
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
|
||||||
|
|
||||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
||||||
|
logging.info(db_query)
|
||||||
result = db_query.limit(limit).all()
|
result = db_query.limit(limit).all()
|
||||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
|
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
|
||||||
return result
|
return result
|
||||||
|
|
Binary file not shown.
|
@ -81,6 +81,10 @@ class TestLookupService(BaseTest):
|
||||||
results = LookupService.lookup(workflow, "AllTheNames", "", limit=10)
|
results = LookupService.lookup(workflow, "AllTheNames", "", limit=10)
|
||||||
self.assertEqual(10, len(results), "Blank queries return everything, to the limit")
|
self.assertEqual(10, len(results), "Blank queries return everything, to the limit")
|
||||||
|
|
||||||
|
results = LookupService.lookup(workflow, "AllTheNames", "other", limit=10)
|
||||||
|
self.assertEqual("Other", results[0].label, "Can't find the word 'other', even through it is there.")
|
||||||
|
|
||||||
|
|
||||||
results = LookupService.lookup(workflow, "AllTheNames", "medicines", limit=10)
|
results = LookupService.lookup(workflow, "AllTheNames", "medicines", limit=10)
|
||||||
self.assertEqual(1, len(results), "words in the middle of label are detected.")
|
self.assertEqual(1, len(results), "words in the middle of label are detected.")
|
||||||
self.assertEqual("The Medicines Company", results[0].label)
|
self.assertEqual("The Medicines Company", results[0].label)
|
||||||
|
@ -118,7 +122,7 @@ class TestLookupService(BaseTest):
|
||||||
self.assertEqual("Reaction Design", results[0].label, "Exact matches come first.")
|
self.assertEqual("Reaction Design", results[0].label, "Exact matches come first.")
|
||||||
|
|
||||||
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
|
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
|
||||||
self.assertEqual("1 Something", results[0].label, "Exact matches are prefered")
|
self.assertEqual("1 Something", results[0].label, "Exact matches are preferred")
|
||||||
|
|
||||||
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
|
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
|
||||||
self.assertEqual("1 Something", results[0].label, "special characters don't flake out")
|
self.assertEqual("1 Something", results[0].label, "special characters don't flake out")
|
||||||
|
|
Loading…
Reference in New Issue