Order search results by relevancy in the lookup service.

This commit is contained in:
Dan Funk 2020-05-27 09:47:44 -04:00
parent 397cb23b52
commit d5e075db82
4 changed files with 22 additions and 11 deletions

View File

@ -1,4 +1,6 @@
from pandas import ExcelFile from pandas import ExcelFile
from sqlalchemy import func, desc
from sqlalchemy.sql.functions import GenericFunction
from crc import db from crc import db
from crc.api.common import ApiError from crc.api.common import ApiError
@ -7,6 +9,9 @@ from crc.models.file import FileDataModel, LookupFileModel, LookupDataModel
from crc.services.file_service import FileService from crc.services.file_service import FileService
from crc.services.ldap_service import LdapService from crc.services.ldap_service import LdapService
class TSRank(GenericFunction):
package = 'full_text'
name = 'ts_rank'
class LookupService(object): class LookupService(object):
@ -122,9 +127,9 @@ class LookupService(object):
else: else:
query = "%s:*" % query query = "%s:*" % query
db_query = db_query.filter(LookupDataModel.label.match(query)) db_query = db_query.filter(LookupDataModel.label.match(query))
db_query = db_query.order_by(desc(func.full_text.ts_rank(
# db_query = db_query.filter(text("lookup_data.label @@ to_tsquery('simple', '%s')" % query)) func.to_tsvector('simple', LookupDataModel.label),
func.to_tsquery('simple', query))))
return db_query.limit(limit).all() return db_query.limit(limit).all()
@staticmethod @staticmethod

View File

@ -19,7 +19,7 @@ class ProtocolBuilderService(object):
if isinstance(app.config['PB_ENABLED'], str): if isinstance(app.config['PB_ENABLED'], str):
return app.config['PB_ENABLED'].lower() == "true" return app.config['PB_ENABLED'].lower() == "true"
else: else:
return app.config['PB_ENABLED'] == True return app.config['PB_ENABLED'] is True
@staticmethod @staticmethod
def get_studies(user_id) -> {}: def get_studies(user_id) -> {}:
@ -65,4 +65,3 @@ class ProtocolBuilderService(object):
"Received an invalid response from the protocol builder (status %s): %s when calling " "Received an invalid response from the protocol builder (status %s): %s when calling "
"url '%s'." % "url '%s'." %
(response.status_code, response.text, url)) (response.status_code, response.text, url))

View File

@ -1,10 +1,10 @@
from tests.base_test import BaseTest
from crc import session from crc import session
from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel
from crc.services.file_service import FileService from crc.services.file_service import FileService
from crc.services.lookup_service import LookupService from crc.services.lookup_service import LookupService
from crc.services.workflow_processor import WorkflowProcessor
from crc.services.workflow_service import WorkflowService
from tests.base_test import BaseTest
class TestLookupService(BaseTest): class TestLookupService(BaseTest):
@ -21,7 +21,7 @@ class TestLookupService(BaseTest):
self.assertEqual(1, len(lookup_records)) self.assertEqual(1, len(lookup_records))
lookup_record = lookup_records[0] lookup_record = lookup_records[0]
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all() lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEquals(19, len(lookup_data)) self.assertEquals(23, len(lookup_data))
# Using the same table with different lookup lable or value, does create additional records. # Using the same table with different lookup lable or value, does create additional records.
LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NAME", "CUSTOMER_NUMBER") LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NAME", "CUSTOMER_NUMBER")
lookup_records = session.query(LookupFileModel).all() lookup_records = session.query(LookupFileModel).all()
@ -51,8 +51,6 @@ class TestLookupService(BaseTest):
self.assertEquals(1, len(results), "case does not matter.") self.assertEquals(1, len(results), "case does not matter.")
self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label) self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "medici", limit=10) results = LookupService._run_lookup_query(lookup_table, "medici", limit=10)
self.assertEquals(1, len(results), "partial words are picked up.") self.assertEquals(1, len(results), "partial words are picked up.")
self.assertEquals("The Medicines Company", results[0].label) self.assertEquals("The Medicines Company", results[0].label)
@ -73,7 +71,16 @@ class TestLookupService(BaseTest):
self.assertEquals(7, len(results), "short terms get multiple correct results.") self.assertEquals(7, len(results), "short terms get multiple correct results.")
self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label) self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)
results = LookupService._run_lookup_query(lookup_table, "reaction design", limit=10)
self.assertEquals(5, len(results), "all results come back for two terms.")
self.assertEquals("Reaction Design", results[0].label, "The first result is the most relevant")
self.assertEquals("Reaction Then Design ", results[1].label, "The first result is the most relevant")
self.assertEquals("Design Then Reaction", results[2].label, "The first result is the most relevant")
self.assertEquals("Just Reaction", results[3].label, "The first result is the most relevant")
self.assertEquals("Just Design", results[4].label, "The first result is the most relevant")
# Fixme: Stop words are taken into account on the query side, and haven't found a fix yet. # Fixme: Stop words are taken into account on the query side, and haven't found a fix yet.
#results = WorkflowService.run_lookup_query(lookup_table.id, "in", limit=10) #results = WorkflowService.run_lookup_query(lookup_table.id, "in", limit=10)
#self.assertEquals(7, len(results), "stop words are not removed.") #self.assertEquals(7, len(results), "stop words are not removed.")
#self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label) #self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label)