diff --git a/crc/models/file.py b/crc/models/file.py index 8afed6cd..8693b7e5 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -166,10 +166,12 @@ class LookupDataModel(db.Model): # query with: # search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all() + __ts_vector__ = func.to_tsvector('simple', label) + __table_args__ = ( Index( 'ix_lookupdata_tsv', - func.to_tsvector('simple', label), # Use simple, not english to keep stop words in place. + __ts_vector__, # Use simple, not english to keep stop words in place. postgresql_using='gin' ), ) diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index c9eb1dd8..cfe00615 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -181,20 +181,22 @@ class LookupService(object): if len(query) > 0: if ' ' in query: terms = query.split(' ') - new_terms = ["'%s'" % query] + new_terms = [] for t in terms: new_terms.append("%s:*" % t) - new_query = ' | '.join(new_terms) + new_query = ' & '.join(new_terms) + new_query = "'%s' | %s" % (query, new_query) else: new_query = "%s:*" % query - # Run the full text query - db_query = db_query.filter(LookupDataModel.label.match(new_query)) - # But hackishly order by like, which does a good job of - # pulling more relevant matches to the top. + db_query = db_query.filter( + LookupDataModel.__ts_vector__.match(new_query, postgresql_regconfig='simple')) + + # Hackishly order by like, which does a good job of pulling more relevant matches to the top. db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%"))) logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) + logging.info(db_query) result = db_query.limit(limit).all() logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR) return result diff --git a/tests/data/enum_options_from_file/customer_list.xls b/tests/data/enum_options_from_file/customer_list.xls index d697bb67..1ed72dd7 100644 Binary files a/tests/data/enum_options_from_file/customer_list.xls and b/tests/data/enum_options_from_file/customer_list.xls differ diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py index a27427f4..0b7a8ddb 100644 --- a/tests/test_lookup_service.py +++ b/tests/test_lookup_service.py @@ -114,11 +114,11 @@ class TestLookupService(BaseTest): self.assertEqual("Genetics Savings & Clone, Inc.", results[0].label) results = LookupService.lookup(workflow, "AllTheNames", "reaction design", limit=10) - self.assertEqual(5, len(results), "all results come back for two terms.") + self.assertEqual(3, len(results), "all results come back for two terms.") self.assertEqual("Reaction Design", results[0].label, "Exact matches come first.") results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10) - self.assertEqual("1 Something", results[0].label, "Exact matches are prefered") + self.assertEqual("1 Something", results[0].label, "Exact matches are preferred") results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10) self.assertEqual("1 Something", results[0].label, "special characters don't flake out") @@ -126,16 +126,12 @@ class TestLookupService(BaseTest): results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10) self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.") + results = LookupService.lookup(workflow, "AllTheNames", "in", limit=10) + self.assertEqual(10, len(results), "stop words are not removed.") + self.assertEqual("Genetics Savings & Clone, Inc.", results[0].label) + + results = LookupService.lookup(workflow, "AllTheNames", "other", limit=10) + self.assertEqual("Other", results[0].label, "Can't find the word 'other', which is an english stop word") -# 1018 10000 Something Industry -# 1019 1000 Something Industry -# 1020 1 Something Industry -# 1021 10 Something Industry -# 1022 10000 Something Industry - - # Fixme: Stop words are taken into account on the query side, and haven't found a fix yet. - #results = WorkflowService.run_lookup_query(lookup_table.id, "in", limit=10) - #self.assertEqual(7, len(results), "stop words are not removed.") - #self.assertEqual("Genetics Savings & Clone, Inc.", results[0].label) diff --git a/tests/workflow/test_workflow_service.py b/tests/workflow/test_workflow_service.py index 9ae49b5a..89606959 100644 --- a/tests/workflow/test_workflow_service.py +++ b/tests/workflow/test_workflow_service.py @@ -75,9 +75,9 @@ class TestWorkflowService(BaseTest): task = processor.next_task() WorkflowService.process_options(task, task.task_spec.form.fields[0]) options = task.task_spec.form.fields[0].options - self.assertEqual(28, len(options)) - self.assertEqual('1000', options[0]['id']) - self.assertEqual("UVA - INTERNAL - GM USE ONLY", options[0]['name']) + self.assertEqual(29, len(options)) + self.assertEqual('0', options[0]['id']) + self.assertEqual("Other", options[0]['name']) def test_random_data_populate_form_on_auto_complete(self): self.load_example_data()