Merge branch 'dev' into rrt/production

2025-02-21 20:28:10 +00:00 · 2020-05-27 23:49:47 -04:00 · 2020-05-27 23:49:47 -04:00 · 94b6147768
commit 94b6147768
parent 98efa0475a 97cdbfce94
6 changed files with 51 additions and 27 deletions
--- a/crc/services/file_service.py
+++ b/crc/services/file_service.py
@ -133,7 +133,6 @@ class FileService(object):
        return file_extension.lower().strip()[1:]

    @staticmethod
-
    def update_file(file_model, binary_data, content_type):
        session.flush()  # Assure the database is up-to-date before running this.

--- a/crc/services/lookup_service.py
+++ b/crc/services/lookup_service.py
@ -1,3 +1,5 @@
+import logging
+
 from pandas import ExcelFile
 from sqlalchemy import func, desc
 from sqlalchemy.sql.functions import GenericFunction
@ -117,20 +119,31 @@ class LookupService(object):
        db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)

        query = query.strip()
-        if len(query) > 1:
+        if len(query) > 0:
            if ' ' in query:
                terms = query.split(' ')
-                new_terms = []
+                new_terms = ["'%s'" % query]
                for t in terms:
-                    new_terms.append(t + ":*")
-                query = '|'.join(new_terms)
+                    new_terms.append("%s:*" % t)
+                new_query = ' | '.join(new_terms)
            else:
-                query = "%s:*" % query
-            db_query = db_query.filter(LookupDataModel.label.match(query))
-            db_query = db_query.order_by(desc(func.full_text.ts_rank(
-                func.to_tsvector('simple', LookupDataModel.label),
-                func.to_tsquery('simple', query))))
-        return db_query.limit(limit).all()
+                new_query = "%s:*" % query
+
+            # Run the full text query
+            db_query = db_query.filter(LookupDataModel.label.match(new_query))
+            # But hackishly order by like, which does a good job of
+            # pulling more relevant matches to the top.
+            db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
+            #ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC
+
+#            db_query = db_query.order_by(desc(func.full_text.ts_rank(
+#                func.to_tsvector(LookupDataModel.label),
+#                func.to_tsquery(query))))
+        from sqlalchemy.dialects import postgresql
+        logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
+        result = db_query.limit(limit).all()
+        logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
+        return result

    @staticmethod
    def _run_ldap_query(query, limit):
--- a/tests/base_test.py
+++ b/tests/base_test.py
@ -22,10 +22,9 @@ from crc.models.user import UserModel
 from crc import app, db, session
 from example_data import ExampleDataLoader

-# UNCOMMENT THIS FOR DEBUGGING SQL ALCHEMY QUERIES
-# import logging
-# logging.basicConfig()
-# logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
+#UNCOMMENT THIS FOR DEBUGGING SQL ALCHEMY QUERIES
+import logging
+logging.basicConfig()


 class BaseTest(unittest.TestCase):
--- a/tests/data/enum_options_from_file/customer_list.xls
+++ b/tests/data/enum_options_from_file/customer_list.xls
--- a/tests/test_lookup_service.py
+++ b/tests/test_lookup_service.py
@ -9,10 +9,12 @@ from crc.services.lookup_service import LookupService

 class TestLookupService(BaseTest):

+
    def test_create_lookup_file_multiple_times_does_not_update_database(self):
-        spec = self.load_test_spec('enum_options_from_file')
+        spec = BaseTest.load_test_spec('enum_options_from_file')
        file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
        file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
+
        LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
        LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
        LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
@ -21,18 +23,16 @@ class TestLookupService(BaseTest):
        self.assertEqual(1, len(lookup_records))
        lookup_record = lookup_records[0]
        lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
-        self.assertEquals(23, len(lookup_data))
+        self.assertEquals(28, len(lookup_data))
        # Using the same table with different lookup lable or value, does create additional records.
        LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NAME", "CUSTOMER_NUMBER")
        lookup_records = session.query(LookupFileModel).all()
        self.assertIsNotNone(lookup_records)
        self.assertEqual(2, len(lookup_records))
-        FileService.delete_file(file_model.id) ## Assure we can delete the file.

    def test_some_full_text_queries(self):
-        self.load_test_spec('enum_options_from_file')
+        spec = BaseTest.load_test_spec('enum_options_from_file')
        file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
-        self.assertIsNotNone(file_model)
        file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
        lookup_table = LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")

@ -73,11 +73,24 @@ class TestLookupService(BaseTest):

        results = LookupService._run_lookup_query(lookup_table, "reaction design", limit=10)
        self.assertEquals(5, len(results), "all results come back for two terms.")
-        self.assertEquals("Reaction Design", results[0].label, "The first result is the most relevant")
-        self.assertEquals("Reaction Then Design ", results[1].label, "The first result is the most relevant")
-        self.assertEquals("Design Then Reaction", results[2].label, "The first result is the most relevant")
-        self.assertEquals("Just Reaction", results[3].label, "The first result is the most relevant")
-        self.assertEquals("Just Design", results[4].label, "The first result is the most relevant")
+        self.assertEquals("Reaction Design", results[0].label, "Exact matches come first.")
+
+    def test_prefer_exact_match(self):
+        spec = BaseTest.load_test_spec('enum_options_from_file')
+        file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
+        file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
+
+        lookup_table = LookupService.get_lookup_table_from_data_model(file_data_model, "CUSTOMER_NUMBER",
+                                                                      "CUSTOMER_NAME")
+        results = LookupService._run_lookup_query(lookup_table, "1 Something", limit=10)
+        self.assertEquals("1 Something", results[0].label, "Exact matches are prefered")
+
+
+# 1018	10000 Something	Industry
+# 1019	1000 Something	Industry
+# 1020	1 Something	Industry
+# 1021	10 Something	Industry
+# 1022	10000 Something	Industry

        # Fixme:  Stop words are taken into account on the query side, and haven't found a fix yet.
        #results = WorkflowService.run_lookup_query(lookup_table.id, "in", limit=10)
--- a/tests/test_workflow_service.py
+++ b/tests/test_workflow_service.py
@ -68,7 +68,7 @@ class TestWorkflowService(BaseTest):
        task = processor.next_task()
        WorkflowService.process_options(task, task.task_spec.form.fields[0])
        options = task.task_spec.form.fields[0].options
-        self.assertEquals(23, len(options))
+        self.assertEquals(28, len(options))
        self.assertEquals('1000', options[0]['id'])
        self.assertEquals("UVA - INTERNAL - GM USE ONLY", options[0]['name'])

@ -86,7 +86,7 @@ class TestWorkflowService(BaseTest):
        self.assertEquals("CUSTOMER_NAME", lookup_record.label_column)
        self.assertEquals("CUSTOMER_NAME", lookup_record.label_column)
        lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
-        self.assertEquals(23, len(lookup_data))
+        self.assertEquals(28, len(lookup_data))

        self.assertEquals("1000", lookup_data[0].value)
        self.assertEquals("UVA - INTERNAL - GM USE ONLY", lookup_data[0].label)