Merge pull request #183 from sartography/bug/109_lookup_stopwords
Bug/109 lookup stopwords
This commit is contained in:
commit
ec2d1dcefb
|
@ -166,10 +166,12 @@ class LookupDataModel(db.Model):
|
|||
# query with:
|
||||
# search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
|
||||
|
||||
__ts_vector__ = func.to_tsvector('simple', label)
|
||||
|
||||
__table_args__ = (
|
||||
Index(
|
||||
'ix_lookupdata_tsv',
|
||||
func.to_tsvector('simple', label), # Use simple, not english to keep stop words in place.
|
||||
__ts_vector__, # Use simple, not english to keep stop words in place.
|
||||
postgresql_using='gin'
|
||||
),
|
||||
)
|
||||
|
|
|
@ -181,20 +181,22 @@ class LookupService(object):
|
|||
if len(query) > 0:
|
||||
if ' ' in query:
|
||||
terms = query.split(' ')
|
||||
new_terms = ["'%s'" % query]
|
||||
new_terms = []
|
||||
for t in terms:
|
||||
new_terms.append("%s:*" % t)
|
||||
new_query = ' | '.join(new_terms)
|
||||
new_query = ' & '.join(new_terms)
|
||||
new_query = "'%s' | %s" % (query, new_query)
|
||||
else:
|
||||
new_query = "%s:*" % query
|
||||
|
||||
# Run the full text query
|
||||
db_query = db_query.filter(LookupDataModel.label.match(new_query))
|
||||
# But hackishly order by like, which does a good job of
|
||||
# pulling more relevant matches to the top.
|
||||
db_query = db_query.filter(
|
||||
LookupDataModel.__ts_vector__.match(new_query, postgresql_regconfig='simple'))
|
||||
|
||||
# Hackishly order by like, which does a good job of pulling more relevant matches to the top.
|
||||
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
|
||||
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
||||
logging.info(db_query)
|
||||
result = db_query.limit(limit).all()
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
|
||||
return result
|
||||
|
|
Binary file not shown.
|
@ -114,11 +114,11 @@ class TestLookupService(BaseTest):
|
|||
self.assertEqual("Genetics Savings & Clone, Inc.", results[0].label)
|
||||
|
||||
results = LookupService.lookup(workflow, "AllTheNames", "reaction design", limit=10)
|
||||
self.assertEqual(5, len(results), "all results come back for two terms.")
|
||||
self.assertEqual(3, len(results), "all results come back for two terms.")
|
||||
self.assertEqual("Reaction Design", results[0].label, "Exact matches come first.")
|
||||
|
||||
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
|
||||
self.assertEqual("1 Something", results[0].label, "Exact matches are prefered")
|
||||
self.assertEqual("1 Something", results[0].label, "Exact matches are preferred")
|
||||
|
||||
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
|
||||
self.assertEqual("1 Something", results[0].label, "special characters don't flake out")
|
||||
|
@ -126,16 +126,12 @@ class TestLookupService(BaseTest):
|
|||
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
|
||||
self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.")
|
||||
|
||||
results = LookupService.lookup(workflow, "AllTheNames", "in", limit=10)
|
||||
self.assertEqual(10, len(results), "stop words are not removed.")
|
||||
self.assertEqual("Genetics Savings & Clone, Inc.", results[0].label)
|
||||
|
||||
results = LookupService.lookup(workflow, "AllTheNames", "other", limit=10)
|
||||
self.assertEqual("Other", results[0].label, "Can't find the word 'other', which is an english stop word")
|
||||
|
||||
|
||||
# 1018 10000 Something Industry
|
||||
# 1019 1000 Something Industry
|
||||
# 1020 1 Something Industry
|
||||
# 1021 10 Something Industry
|
||||
# 1022 10000 Something Industry
|
||||
|
||||
# Fixme: Stop words are taken into account on the query side, and haven't found a fix yet.
|
||||
#results = WorkflowService.run_lookup_query(lookup_table.id, "in", limit=10)
|
||||
#self.assertEqual(7, len(results), "stop words are not removed.")
|
||||
#self.assertEqual("Genetics Savings & Clone, Inc.", results[0].label)
|
||||
|
||||
|
|
|
@ -75,9 +75,9 @@ class TestWorkflowService(BaseTest):
|
|||
task = processor.next_task()
|
||||
WorkflowService.process_options(task, task.task_spec.form.fields[0])
|
||||
options = task.task_spec.form.fields[0].options
|
||||
self.assertEqual(28, len(options))
|
||||
self.assertEqual('1000', options[0]['id'])
|
||||
self.assertEqual("UVA - INTERNAL - GM USE ONLY", options[0]['name'])
|
||||
self.assertEqual(29, len(options))
|
||||
self.assertEqual('0', options[0]['id'])
|
||||
self.assertEqual("Other", options[0]['name'])
|
||||
|
||||
def test_random_data_populate_form_on_auto_complete(self):
|
||||
self.load_example_data()
|
||||
|
|
Loading…
Reference in New Issue