From d5e075db825ca75aa3cb12553b48fce8f6eece06 Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Wed, 27 May 2020 09:47:44 -0400 Subject: [PATCH] Order search results by relevancy in the lookup service. --- crc/services/lookup_service.py | 11 +++++++--- crc/services/protocol_builder.py | 3 +-- .../enum_options_from_file/customer_list.xls | Bin 108544 -> 108544 bytes tests/test_lookup_service.py | 19 ++++++++++++------ 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index f9d023bc..12a50330 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -1,4 +1,6 @@ from pandas import ExcelFile +from sqlalchemy import func, desc +from sqlalchemy.sql.functions import GenericFunction from crc import db from crc.api.common import ApiError @@ -7,6 +9,9 @@ from crc.models.file import FileDataModel, LookupFileModel, LookupDataModel from crc.services.file_service import FileService from crc.services.ldap_service import LdapService +class TSRank(GenericFunction): + package = 'full_text' + name = 'ts_rank' class LookupService(object): @@ -122,9 +127,9 @@ class LookupService(object): else: query = "%s:*" % query db_query = db_query.filter(LookupDataModel.label.match(query)) - - # db_query = db_query.filter(text("lookup_data.label @@ to_tsquery('simple', '%s')" % query)) - + db_query = db_query.order_by(desc(func.full_text.ts_rank( + func.to_tsvector('simple', LookupDataModel.label), + func.to_tsquery('simple', query)))) return db_query.limit(limit).all() @staticmethod diff --git a/crc/services/protocol_builder.py b/crc/services/protocol_builder.py index 23dcb79f..5fc5535f 100644 --- a/crc/services/protocol_builder.py +++ b/crc/services/protocol_builder.py @@ -19,7 +19,7 @@ class ProtocolBuilderService(object): if isinstance(app.config['PB_ENABLED'], str): return app.config['PB_ENABLED'].lower() == "true" else: - return app.config['PB_ENABLED'] == True + return app.config['PB_ENABLED'] is True @staticmethod def get_studies(user_id) -> {}: @@ -65,4 +65,3 @@ class ProtocolBuilderService(object): "Received an invalid response from the protocol builder (status %s): %s when calling " "url '%s'." % (response.status_code, response.text, url)) - diff --git a/tests/data/enum_options_from_file/customer_list.xls b/tests/data/enum_options_from_file/customer_list.xls index 8c977446b446586a4d05a0658d44ea4fe252fe05..d352073e4ce4df2ba9eb3fd6de186b1787fc8866 100644 GIT binary patch delta 622 zcmZvaF-yZx6ot=ysjZsYsI`gJiUAQ3925lItXLPFEaEOwnhw^Xb#TyDN5QE`5L6KS z0b;QQ7xf1S4x&qcgNTEJD0-7TiJ}Q4@0|C&_s+dZ`7T(#3!bG^I9)E{~%+BfD`(Gqq!~ln(wh7~|h1$#1BSotO5;3H3J*?@nBd Xj6YCiAz;?8PiRsRjs5!DDFu};#35a_ delta 296 zcmZp;z}9eqZG#IpW9npA?nd?^CJhD#28qpQxpSFV|1)p_nLT_=jE>pD{0vM$nOX)O z21X#9ppM`(0cHM4BKXV<42*o75IzGFA5a$qgXre-{OOF7zwz@i3QhjVFU%+mCPgM+ z6cnD^AjmTLil6|a7>LIx4k9=25p?-5)q{~|Iu8q@<2EKnMyBn4ER1YZr}Z(iFizg? zI+HP&dAh?~#wg`NVFm^Vuq6;emVuvvSp>{xWDsGPEX=?(eeYaG=jj297$0t%$C$w^ z2=a+B5IcZWC9W4+J-u=vqdZ5@R_#D@;cJYO16dTdA6Up}!OZk~%JyGN7}=Ns)ZRe2 diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py index 89be6168..233709a8 100644 --- a/tests/test_lookup_service.py +++ b/tests/test_lookup_service.py @@ -1,10 +1,10 @@ + +from tests.base_test import BaseTest + from crc import session from crc.models.file import FileDataModel, FileModel, LookupFileModel, LookupDataModel from crc.services.file_service import FileService from crc.services.lookup_service import LookupService -from crc.services.workflow_processor import WorkflowProcessor -from crc.services.workflow_service import WorkflowService -from tests.base_test import BaseTest class TestLookupService(BaseTest): @@ -21,7 +21,7 @@ class TestLookupService(BaseTest): self.assertEqual(1, len(lookup_records)) lookup_record = lookup_records[0] lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all() - self.assertEquals(19, len(lookup_data)) + self.assertEquals(23, len(lookup_data)) # Using the same table with different lookup lable or value, does create additional records. LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NAME", "CUSTOMER_NUMBER") lookup_records = session.query(LookupFileModel).all() @@ -51,8 +51,6 @@ class TestLookupService(BaseTest): self.assertEquals(1, len(results), "case does not matter.") self.assertEquals("UVA - INTERNAL - GM USE ONLY", results[0].label) - - results = LookupService._run_lookup_query(lookup_table, "medici", limit=10) self.assertEquals(1, len(results), "partial words are picked up.") self.assertEquals("The Medicines Company", results[0].label) @@ -73,7 +71,16 @@ class TestLookupService(BaseTest): self.assertEquals(7, len(results), "short terms get multiple correct results.") self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label) + results = LookupService._run_lookup_query(lookup_table, "reaction design", limit=10) + self.assertEquals(5, len(results), "all results come back for two terms.") + self.assertEquals("Reaction Design", results[0].label, "The first result is the most relevant") + self.assertEquals("Reaction Then Design ", results[1].label, "The first result is the most relevant") + self.assertEquals("Design Then Reaction", results[2].label, "The first result is the most relevant") + self.assertEquals("Just Reaction", results[3].label, "The first result is the most relevant") + self.assertEquals("Just Design", results[4].label, "The first result is the most relevant") + # Fixme: Stop words are taken into account on the query side, and haven't found a fix yet. #results = WorkflowService.run_lookup_query(lookup_table.id, "in", limit=10) #self.assertEquals(7, len(results), "stop words are not removed.") #self.assertEquals("Genetics Savings & Clone, Inc.", results[0].label) +