From d3ce1af1ce2e98191ff6086222a08f3530602e50 Mon Sep 17 00:00:00 2001
From: Dan Funk <daniel.h.funk@gmail.com>
Date: Tue, 30 Jun 2020 10:00:22 -0400
Subject: [PATCH 1/4] Provides some basic tools for getting additional
 information about a lookup field. Adds an optional 'id' parameter to the
 lookup endpoint so you can find a specific entry in the lookup table. Makes
 sure the data attribute returned on a lookup model is a dictionary, and not a
 string. Fixes a previous bug that would crop up in double spaces were used
 when performing a search.

---
 crc/api.yml                    |  7 ++++
 crc/api/workflow.py            |  9 +++---
 crc/models/file.py             |  1 +
 crc/services/lookup_service.py | 59 ++++++++++++++++++----------------
 tests/test_lookup_service.py   | 12 +++++++
 tests/test_tasks_api.py        | 24 ++++++++++++++
 6 files changed, 79 insertions(+), 33 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index 3f72bf7f..0bf8bd6f 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -709,6 +709,13 @@ paths:
         description: The total number of records to return, defaults to 10.
         schema:
           type: integer
+      - name: id
+        in: query
+        required: false
+        description: Rather than supplying a query, you can specify a speicific id to get data back on a lookup field.
+        schema:
+          type: string
+
     get:
       operationId: crc.api.workflow.lookup
       summary: Provides type-ahead search against a lookup table associted with a form field.
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index 68b84f56..cee2f4cf 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -41,7 +41,6 @@ def get_workflow_specification(spec_id):
 
 
 def validate_workflow_specification(spec_id):
-
     errors = []
     try:
         WorkflowService.test_spec(spec_id)
@@ -57,7 +56,6 @@ def validate_workflow_specification(spec_id):
     return ApiErrorSchema(many=True).dump(errors)
 
 
-
 def update_workflow_specification(spec_id, body):
     if spec_id is None:
         raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.')
@@ -200,7 +198,7 @@ def delete_workflow_spec_category(cat_id):
     session.commit()
 
 
-def lookup(workflow_id, field_id, query, limit):
+def lookup(workflow_id, field_id, query=None, limit=10, id=None):
     """
     given a field in a task, attempts to find the lookup table or function associated
     with that field and runs a full-text query against it to locate the values and
@@ -208,14 +206,15 @@ def lookup(workflow_id, field_id, query, limit):
     Tries to be fast, but first runs will be very slow.
     """
     workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
-    lookup_data = LookupService.lookup(workflow, field_id, query, limit)
+    lookup_data = LookupService.lookup(workflow, field_id, query, limit, id)
     return LookupDataSchema(many=True).dump(lookup_data)
 
 
 def __get_user_uid(user_uid):
     if 'user' in g:
         if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid:
-            raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", status_code=403)
+            raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.",
+                           status_code=403)
         else:
             return g.user.uid
 
diff --git a/crc/models/file.py b/crc/models/file.py
index 15a48709..ccfbdc56 100644
--- a/crc/models/file.py
+++ b/crc/models/file.py
@@ -153,6 +153,7 @@ class LookupFileModel(db.Model):
     file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
     dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan")
 
+
 class LookupDataModel(db.Model):
     __tablename__ = 'lookup_data'
     id = db.Column(db.Integer, primary_key=True)
diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py
index b3e0bddc..876fda80 100644
--- a/crc/services/lookup_service.py
+++ b/crc/services/lookup_service.py
@@ -1,7 +1,8 @@
 import logging
 import re
 
-from pandas import ExcelFile
+import pandas as pd
+from pandas import ExcelFile, np
 from sqlalchemy import func, desc
 from sqlalchemy.sql.functions import GenericFunction
 
@@ -62,14 +63,14 @@ class LookupService(object):
         return lookup_model
 
     @staticmethod
-    def lookup(workflow, field_id, query, limit):
+    def lookup(workflow, field_id, query, limit, id = None):
 
         lookup_model = LookupService.__get_lookup_model(workflow, field_id)
 
         if lookup_model.is_ldap:
             return LookupService._run_ldap_query(query, limit)
         else:
-            return LookupService._run_lookup_query(lookup_model, query, limit)
+            return LookupService._run_lookup_query(lookup_model, query, limit, id)
 
 
 
@@ -130,6 +131,7 @@ class LookupService(object):
           changed.  """
         xls = ExcelFile(data_model.data)
         df = xls.parse(xls.sheet_names[0])  # Currently we only look at the fist sheet.
+        df = pd.DataFrame(df).replace({np.nan: None})
         if value_column not in df:
             raise ApiError("invalid_emum",
                            "The file %s does not contain a column named % s" % (data_model.file_model.name,
@@ -149,39 +151,40 @@ class LookupService(object):
             lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                           value=row[value_column],
                                           label=row[label_column],
-                                          data=row.to_json())
+                                          data=row.to_dict())
             db.session.add(lookup_data)
         db.session.commit()
         return lookup_model
 
     @staticmethod
-    def _run_lookup_query(lookup_file_model, query, limit):
+    def _run_lookup_query(lookup_file_model, query, limit, lookup_id):
         db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
+        if lookup_id is not None:  # Then just find the model with that id.
+            db_query = db_query.filter(LookupDataModel.id == lookup_id)
+        else:
+            # Build a full text query that takes all the terms provided and executes each term as a prefix query, and
+            # OR's those queries together.  The order of the results is handled as a standard "Like" on the original
+            # string which seems to work intuitively for most entries.
+            query = re.sub('[^A-Za-z0-9 ]+', '', query)  # Strip out non ascii characters.
+            query = re.sub(' {2,}', ' ', query)  # Convert multiple spaces to just one space, as we split on spaces.
+            print("Query: " + query)
+            query = query.strip()
+            if len(query) > 0:
+                if ' ' in query:
+                    terms = query.split(' ')
+                    new_terms = ["'%s'" % query]
+                    for t in terms:
+                        new_terms.append("%s:*" % t)
+                    new_query = ' | '.join(new_terms)
+                else:
+                    new_query = "%s:*" % query
 
-        query = re.sub('[^A-Za-z0-9 ]+', '', query)
-        print("Query: " + query)
-        query = query.strip()
-        if len(query) > 0:
-            if ' ' in query:
-                terms = query.split(' ')
-                new_terms = ["'%s'" % query]
-                for t in terms:
-                    new_terms.append("%s:*" % t)
-                new_query = ' | '.join(new_terms)
-            else:
-                new_query = "%s:*" % query
+                # Run the full text query
+                db_query = db_query.filter(LookupDataModel.label.match(new_query))
+                # But hackishly order by like, which does a good job of
+                # pulling more relevant matches to the top.
+                db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
 
-            # Run the full text query
-            db_query = db_query.filter(LookupDataModel.label.match(new_query))
-            # But hackishly order by like, which does a good job of
-            # pulling more relevant matches to the top.
-            db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
-            #ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC
-
-#            db_query = db_query.order_by(desc(func.full_text.ts_rank(
-#                func.to_tsvector(LookupDataModel.label),
-#                func.to_tsquery(query))))
-        from sqlalchemy.dialects import postgresql
         logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
         result = db_query.limit(limit).all()
         logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py
index b61e20e2..4b7c180d 100644
--- a/tests/test_lookup_service.py
+++ b/tests/test_lookup_service.py
@@ -61,6 +61,15 @@ class TestLookupService(BaseTest):
         lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
         self.assertEqual(4, len(lookup_data))
 
+    def test_lookup_based_on_id(self):
+        spec = BaseTest.load_test_spec('enum_options_from_file')
+        workflow = self.create_workflow('enum_options_from_file')
+        processor = WorkflowProcessor(workflow)
+        processor.do_engine_steps()
+        results = LookupService.lookup(workflow, "AllTheNames", "", id=1, limit=10)
+        self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search")
+        self.assertIsNotNone(results[0].data)
+        self.assertIsInstance(results[0].data, dict)
 
 
     def test_some_full_text_queries(self):
@@ -114,6 +123,9 @@ class TestLookupService(BaseTest):
         results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
         self.assertEqual("1 Something", results[0].label, "special characters don't flake out")
 
+        results = LookupService.lookup(workflow, "AllTheNames", "1  Something", limit=10)
+        self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.")
+
 
 
 # 1018	10000 Something	Industry
diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py
index c6b09dae..11be07fd 100644
--- a/tests/test_tasks_api.py
+++ b/tests/test_tasks_api.py
@@ -343,6 +343,30 @@ class TestTasksApi(BaseTest):
         results = json.loads(rv.get_data(as_text=True))
         self.assertEqual(5, len(results))
 
+    def test_lookup_endpoint_for_task_field_using_lookup_entry_id(self):
+        self.load_example_data()
+        workflow = self.create_workflow('enum_options_with_search')
+        # get the first form in the two form workflow.
+        workflow = self.get_workflow_api(workflow)
+        task = workflow.next_task
+        field_id = task.form['fields'][0]['id']
+        rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' %
+                          (workflow.id, field_id, 'c'), # All records with a word that starts with 'c'
+                          headers=self.logged_in_headers(),
+                          content_type="application/json")
+        self.assert_success(rv)
+        results = json.loads(rv.get_data(as_text=True))
+        self.assertEqual(5, len(results))
+        rv = self.app.get('/v1.0/workflow/%i/lookup/%s?id=%i' %
+                          (workflow.id, field_id, results[0]['id']), # All records with a word that starts with 'c'
+                          headers=self.logged_in_headers(),
+                          content_type="application/json")
+        results = json.loads(rv.get_data(as_text=True))
+        self.assertEqual(1, len(results))
+        self.assertIsInstance(results[0]['data'], dict)
+
+
+
     def test_lookup_endpoint_for_task_ldap_field_lookup(self):
         self.load_example_data()
         workflow = self.create_workflow('ldap_lookup')

From f183e12fe56a7a81aef4c4d5fe39ac969b5bce05 Mon Sep 17 00:00:00 2001
From: Dan Funk <daniel.h.funk@gmail.com>
Date: Tue, 30 Jun 2020 10:34:16 -0400
Subject: [PATCH 2/4] Provides some basic tools for getting additional
 information about a lookup field. Adds an optional 'value' parameter to the
 lookup endpoint so you can find a specific entry in the lookup table. Makes
 sure the data attribute returned on a lookup model is a dictionary, and not a
 string. Fixes a previous bug that would crop up if double spaces were used
 when performing a search.

---
 crc/api.yml                    | 12 ++++++------
 crc/api/workflow.py            |  4 ++--
 crc/models/file.py             |  1 +
 crc/services/lookup_service.py | 10 +++++-----
 tests/test_tasks_api.py        | 27 ++++++++++++++++++++++++---
 5 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/crc/api.yml b/crc/api.yml
index 0bf8bd6f..213e8d15 100644
--- a/crc/api.yml
+++ b/crc/api.yml
@@ -703,18 +703,18 @@ paths:
         description: The string to search for in the Value column of the lookup table.
         schema:
           type: string
+      - name: value
+        in: query
+        required: false
+        description: An alternative to query, this accepts the specific value or id selected in a dropdown list or auto-complete, and will return the one matching record. Useful for getting additional details about an item selected in a dropdown.
+        schema:
+          type: string
       - name: limit
         in: query
         required: false
         description: The total number of records to return, defaults to 10.
         schema:
           type: integer
-      - name: id
-        in: query
-        required: false
-        description: Rather than supplying a query, you can specify a speicific id to get data back on a lookup field.
-        schema:
-          type: string
 
     get:
       operationId: crc.api.workflow.lookup
diff --git a/crc/api/workflow.py b/crc/api/workflow.py
index cee2f4cf..dc86ac9e 100644
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@@ -198,7 +198,7 @@ def delete_workflow_spec_category(cat_id):
     session.commit()
 
 
-def lookup(workflow_id, field_id, query=None, limit=10, id=None):
+def lookup(workflow_id, field_id, query=None, value=None, limit=10):
     """
     given a field in a task, attempts to find the lookup table or function associated
     with that field and runs a full-text query against it to locate the values and
@@ -206,7 +206,7 @@ def lookup(workflow_id, field_id, query=None, limit=10, id=None):
     Tries to be fast, but first runs will be very slow.
     """
     workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
-    lookup_data = LookupService.lookup(workflow, field_id, query, limit, id)
+    lookup_data = LookupService.lookup(workflow, field_id, query, value, limit)
     return LookupDataSchema(many=True).dump(lookup_data)
 
 
diff --git a/crc/models/file.py b/crc/models/file.py
index ccfbdc56..5eb50d4e 100644
--- a/crc/models/file.py
+++ b/crc/models/file.py
@@ -182,6 +182,7 @@ class LookupDataSchema(SQLAlchemyAutoSchema):
         load_instance = True
         include_relationships = False
         include_fk = False  # Includes foreign keys
+        exclude = ['id'] # Do not include the id field, it should never be used via the API.
 
 
 class SimpleFileSchema(ma.Schema):
diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py
index 876fda80..97c9824b 100644
--- a/crc/services/lookup_service.py
+++ b/crc/services/lookup_service.py
@@ -63,14 +63,14 @@ class LookupService(object):
         return lookup_model
 
     @staticmethod
-    def lookup(workflow, field_id, query, limit, id = None):
+    def lookup(workflow, field_id, query, value, limit):
 
         lookup_model = LookupService.__get_lookup_model(workflow, field_id)
 
         if lookup_model.is_ldap:
             return LookupService._run_ldap_query(query, limit)
         else:
-            return LookupService._run_lookup_query(lookup_model, query, limit, id)
+            return LookupService._run_lookup_query(lookup_model, query, value, limit)
 
 
 
@@ -157,10 +157,10 @@ class LookupService(object):
         return lookup_model
 
     @staticmethod
-    def _run_lookup_query(lookup_file_model, query, limit, lookup_id):
+    def _run_lookup_query(lookup_file_model, query, value, limit):
         db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
-        if lookup_id is not None:  # Then just find the model with that id.
-            db_query = db_query.filter(LookupDataModel.id == lookup_id)
+        if value is not None:  # Then just find the model with that value
+            db_query = db_query.filter(LookupDataModel.value == value)
         else:
             # Build a full text query that takes all the terms provided and executes each term as a prefix query, and
             # OR's those queries together.  The order of the results is handled as a standard "Like" on the original
diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py
index 11be07fd..236defdc 100644
--- a/tests/test_tasks_api.py
+++ b/tests/test_tasks_api.py
@@ -357,15 +357,36 @@ class TestTasksApi(BaseTest):
         self.assert_success(rv)
         results = json.loads(rv.get_data(as_text=True))
         self.assertEqual(5, len(results))
-        rv = self.app.get('/v1.0/workflow/%i/lookup/%s?id=%i' %
-                          (workflow.id, field_id, results[0]['id']), # All records with a word that starts with 'c'
+        rv = self.app.get('/v1.0/workflow/%i/lookup/%s?value=%s' %
+                          (workflow.id, field_id, results[0]['value']), # All records with a word that starts with 'c'
                           headers=self.logged_in_headers(),
                           content_type="application/json")
         results = json.loads(rv.get_data(as_text=True))
         self.assertEqual(1, len(results))
         self.assertIsInstance(results[0]['data'], dict)
+        self.assertNotIn('id', results[0], "Don't include the internal id, that can be very confusing, and should not be used.")
 
-
+    def test_lookup_endpoint_also_works_for_enum(self):
+        # Naming here get's a little confusing.  fields can be marked as enum or autocomplete.
+        # In the event of an auto-complete it's a type-ahead search field, for an enum the
+        # the key/values from the spreadsheet are added directly to the form and it shows up as
+        # a dropdown.  This tests the case of wanting to get additional data when a user selects
+        # something from a drodown.
+        self.load_example_data()
+        workflow = self.create_workflow('enum_options_from_file')
+        # get the first form in the two form workflow.
+        workflow = self.get_workflow_api(workflow)
+        task = workflow.next_task
+        field_id = task.form['fields'][0]['id']
+        option_id = task.form['fields'][0]['options'][0]['id']
+        rv = self.app.get('/v1.0/workflow/%i/lookup/%s?value=%s' %
+                          (workflow.id, field_id, option_id), # All records with a word that starts with 'c'
+                          headers=self.logged_in_headers(),
+                          content_type="application/json")
+        self.assert_success(rv)
+        results = json.loads(rv.get_data(as_text=True))
+        self.assertEqual(1, len(results))
+        self.assertIsInstance(results[0]['data'], dict)
 
     def test_lookup_endpoint_for_task_ldap_field_lookup(self):
         self.load_example_data()

From 93bf46354bf43f7412078ad7fa3dbbf43f1ae66a Mon Sep 17 00:00:00 2001
From: Dan Funk <daniel.h.funk@gmail.com>
Date: Tue, 30 Jun 2020 11:12:28 -0400
Subject: [PATCH 3/4] A last minute change to make the API a little clearer and
 cleaner broke some tests.

---
 crc/services/lookup_service.py | 22 ++++++++++------------
 tests/test_lookup_service.py   |  2 +-
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py
index 97c9824b..e9852315 100644
--- a/crc/services/lookup_service.py
+++ b/crc/services/lookup_service.py
@@ -20,8 +20,8 @@ class TSRank(GenericFunction):
     package = 'full_text'
     name = 'ts_rank'
 
-class LookupService(object):
 
+class LookupService(object):
     """Provides tools for doing lookups for auto-complete fields.
     This can currently take two forms:
     1) Lookup from spreadsheet data associated with a workflow specification.
@@ -51,7 +51,7 @@ class LookupService(object):
         # if not, we need to rebuild the lookup table.
         is_current = False
         if lookup_model:
-            is_current = db.session.query(WorkflowSpecDependencyFile).\
+            is_current = db.session.query(WorkflowSpecDependencyFile). \
                 filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).count()
 
         if not is_current:
@@ -63,7 +63,7 @@ class LookupService(object):
         return lookup_model
 
     @staticmethod
-    def lookup(workflow, field_id, query, value, limit):
+    def lookup(workflow, field_id, query, value=None, limit=10):
 
         lookup_model = LookupService.__get_lookup_model(workflow, field_id)
 
@@ -72,8 +72,6 @@ class LookupService(object):
         else:
             return LookupService._run_lookup_query(lookup_model, query, value, limit)
 
-
-
     @staticmethod
     def create_lookup_model(workflow_model, field_id):
         """
@@ -117,8 +115,8 @@ class LookupService(object):
                                            is_ldap=True)
         else:
             raise ApiError("unknown_lookup_option",
-                            "Lookup supports using spreadsheet options or ldap options, and neither "
-                            "was provided.")
+                           "Lookup supports using spreadsheet options or ldap options, and neither "
+                           "was provided.")
         db.session.add(lookup_model)
         db.session.commit()
         return lookup_model
@@ -199,8 +197,8 @@ class LookupService(object):
          we return a lookup data model."""
         user_list = []
         for user in users:
-            user_list.append( {"value": user['uid'],
-                                "label": user['display_name'] + " (" + user['uid'] + ")",
-                                "data": user
-                               })
-        return user_list
\ No newline at end of file
+            user_list.append({"value": user['uid'],
+                              "label": user['display_name'] + " (" + user['uid'] + ")",
+                              "data": user
+                              })
+        return user_list
diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py
index 4b7c180d..a27427f4 100644
--- a/tests/test_lookup_service.py
+++ b/tests/test_lookup_service.py
@@ -66,7 +66,7 @@ class TestLookupService(BaseTest):
         workflow = self.create_workflow('enum_options_from_file')
         processor = WorkflowProcessor(workflow)
         processor.do_engine_steps()
-        results = LookupService.lookup(workflow, "AllTheNames", "", id=1, limit=10)
+        results = LookupService.lookup(workflow, "AllTheNames", "", value="1000", limit=10)
         self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search")
         self.assertIsNotNone(results[0].data)
         self.assertIsInstance(results[0].data, dict)

From 84973d23512028931b83915f0e16c3e986d23095 Mon Sep 17 00:00:00 2001
From: Dan Funk <daniel.h.funk@gmail.com>
Date: Tue, 30 Jun 2020 12:24:48 -0400
Subject: [PATCH 4/4] resolving comments from pull request.

---
 crc/services/lookup_service.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py
index e9852315..47424ae8 100644
--- a/crc/services/lookup_service.py
+++ b/crc/services/lookup_service.py
@@ -1,5 +1,6 @@
 import logging
 import re
+from collections import OrderedDict
 
 import pandas as pd
 from pandas import ExcelFile, np
@@ -149,7 +150,7 @@ class LookupService(object):
             lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                           value=row[value_column],
                                           label=row[label_column],
-                                          data=row.to_dict())
+                                          data=row.to_dict(OrderedDict))
             db.session.add(lookup_data)
         db.session.commit()
         return lookup_model
@@ -164,7 +165,7 @@ class LookupService(object):
             # OR's those queries together.  The order of the results is handled as a standard "Like" on the original
             # string which seems to work intuitively for most entries.
             query = re.sub('[^A-Za-z0-9 ]+', '', query)  # Strip out non ascii characters.
-            query = re.sub(' {2,}', ' ', query)  # Convert multiple spaces to just one space, as we split on spaces.
+            query = re.sub(r'\s+', ' ', query)  # Convert multiple space like characters to just one space, as we split on spaces.
             print("Query: " + query)
             query = query.strip()
             if len(query) > 0: