diff --git a/crc/models/file.py b/crc/models/file.py index 8afed6cd..8693b7e5 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -166,10 +166,12 @@ class LookupDataModel(db.Model): # query with: # search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all() + __ts_vector__ = func.to_tsvector('simple', label) + __table_args__ = ( Index( 'ix_lookupdata_tsv', - func.to_tsvector('simple', label), # Use simple, not english to keep stop words in place. + __ts_vector__, # Use simple, not english to keep stop words in place. postgresql_using='gin' ), ) diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index c9eb1dd8..cfe00615 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -181,20 +181,22 @@ class LookupService(object): if len(query) > 0: if ' ' in query: terms = query.split(' ') - new_terms = ["'%s'" % query] + new_terms = [] for t in terms: new_terms.append("%s:*" % t) - new_query = ' | '.join(new_terms) + new_query = ' & '.join(new_terms) + new_query = "'%s' | %s" % (query, new_query) else: new_query = "%s:*" % query - # Run the full text query - db_query = db_query.filter(LookupDataModel.label.match(new_query)) - # But hackishly order by like, which does a good job of - # pulling more relevant matches to the top. + db_query = db_query.filter( + LookupDataModel.__ts_vector__.match(new_query, postgresql_regconfig='simple')) + + # Hackishly order by like, which does a good job of pulling more relevant matches to the top. db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%"))) logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) + logging.info(db_query) result = db_query.limit(limit).all() logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR) return result diff --git a/tests/data/enum_options_from_file/customer_list.xls b/tests/data/enum_options_from_file/customer_list.xls index d697bb67..1ed72dd7 100644 Binary files a/tests/data/enum_options_from_file/customer_list.xls and b/tests/data/enum_options_from_file/customer_list.xls differ diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py index a27427f4..5b433b96 100644 --- a/tests/test_lookup_service.py +++ b/tests/test_lookup_service.py @@ -81,6 +81,10 @@ class TestLookupService(BaseTest): results = LookupService.lookup(workflow, "AllTheNames", "", limit=10) self.assertEqual(10, len(results), "Blank queries return everything, to the limit") + results = LookupService.lookup(workflow, "AllTheNames", "other", limit=10) + self.assertEqual("Other", results[0].label, "Can't find the word 'other', even through it is there.") + + results = LookupService.lookup(workflow, "AllTheNames", "medicines", limit=10) self.assertEqual(1, len(results), "words in the middle of label are detected.") self.assertEqual("The Medicines Company", results[0].label) @@ -118,7 +122,7 @@ class TestLookupService(BaseTest): self.assertEqual("Reaction Design", results[0].label, "Exact matches come first.") results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10) - self.assertEqual("1 Something", results[0].label, "Exact matches are prefered") + self.assertEqual("1 Something", results[0].label, "Exact matches are preferred") results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10) self.assertEqual("1 Something", results[0].label, "special characters don't flake out")