Provides some basic tools for getting additional information about a lookup field.

Adds an optional 'id' parameter to the lookup endpoint so you can find a specific entry in the lookup table. Makes sure the data attribute returned on a lookup model is a dictionary, and not a string. Fixes a previous bug that would crop up in double spaces were used when performing a search.
2020-06-30 10:00:22 -04:00 · 2020-06-30 10:00:22 -04:00 · d3ce1af1ce
parent 19b6f5d1ab
commit d3ce1af1ce
6 changed files with 79 additions and 33 deletions
--- a/crc/api.yml
+++ b/crc/api.yml
@ -709,6 +709,13 @@ paths:
        description: The total number of records to return, defaults to 10.
        schema:
          type: integer
      - name: id
        in: query
        required: false
        description: Rather than supplying a query, you can specify a speicific id to get data back on a lookup field.
        schema:
          type: string
    get:
      operationId: crc.api.workflow.lookup
      summary: Provides type-ahead search against a lookup table associted with a form field.
--- a/crc/api/workflow.py
+++ b/crc/api/workflow.py
@ -41,7 +41,6 @@ def get_workflow_specification(spec_id):
 def validate_workflow_specification(spec_id):
    errors = []
    try:
        WorkflowService.test_spec(spec_id)
@ -57,7 +56,6 @@ def validate_workflow_specification(spec_id):
    return ApiErrorSchema(many=True).dump(errors)
 def update_workflow_specification(spec_id, body):
    if spec_id is None:
        raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.')
@ -200,7 +198,7 @@ def delete_workflow_spec_category(cat_id):
    session.commit()
-def lookup(workflow_id, field_id, query, limit):
+def lookup(workflow_id, field_id, query=None, limit=10, id=None):
    """
    given a field in a task, attempts to find the lookup table or function associated
    with that field and runs a full-text query against it to locate the values and
@ -208,14 +206,15 @@ def lookup(workflow_id, field_id, query, limit):
    Tries to be fast, but first runs will be very slow.
    """
    workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
-    lookup_data = LookupService.lookup(workflow, field_id, query, limit)
+    lookup_data = LookupService.lookup(workflow, field_id, query, limit, id)
    return LookupDataSchema(many=True).dump(lookup_data)
 def __get_user_uid(user_uid):
    if 'user' in g:
        if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid:
-            raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", status_code=403)
+            raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.",
                           status_code=403)
        else:
            return g.user.uid
--- a/crc/models/file.py
+++ b/crc/models/file.py
@ -153,6 +153,7 @@ class LookupFileModel(db.Model):
    file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
    dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan")
 class LookupDataModel(db.Model):
    __tablename__ = 'lookup_data'
    id = db.Column(db.Integer, primary_key=True)
--- a/crc/services/lookup_service.py
+++ b/crc/services/lookup_service.py
@ -1,7 +1,8 @@
 import logging
 import re
-from pandas import ExcelFile
+import pandas as pd
 from pandas import ExcelFile, np
 from sqlalchemy import func, desc
 from sqlalchemy.sql.functions import GenericFunction
@ -62,14 +63,14 @@ class LookupService(object):
        return lookup_model
    @staticmethod
-    def lookup(workflow, field_id, query, limit):
+    def lookup(workflow, field_id, query, limit, id = None):
        lookup_model = LookupService.__get_lookup_model(workflow, field_id)
        if lookup_model.is_ldap:
            return LookupService._run_ldap_query(query, limit)
        else:
-            return LookupService._run_lookup_query(lookup_model, query, limit)
+            return LookupService._run_lookup_query(lookup_model, query, limit, id)
@ -130,6 +131,7 @@ class LookupService(object):
          changed.  """
        xls = ExcelFile(data_model.data)
        df = xls.parse(xls.sheet_names[0])  # Currently we only look at the fist sheet.
        df = pd.DataFrame(df).replace({np.nan: None})
        if value_column not in df:
            raise ApiError("invalid_emum",
                           "The file %s does not contain a column named % s" % (data_model.file_model.name,
@ -149,39 +151,40 @@ class LookupService(object):
            lookup_data = LookupDataModel(lookup_file_model=lookup_model,
                                          value=row[value_column],
                                          label=row[label_column],
-                                          data=row.to_json())
+                                          data=row.to_dict())
            db.session.add(lookup_data)
        db.session.commit()
        return lookup_model
    @staticmethod
-    def _run_lookup_query(lookup_file_model, query, limit):
+    def _run_lookup_query(lookup_file_model, query, limit, lookup_id):
        db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
        if lookup_id is not None:  # Then just find the model with that id.
            db_query = db_query.filter(LookupDataModel.id == lookup_id)
        else:
            # Build a full text query that takes all the terms provided and executes each term as a prefix query, and
            # OR's those queries together.  The order of the results is handled as a standard "Like" on the original
            # string which seems to work intuitively for most entries.
            query = re.sub('[^A-Za-z0-9 ]+', '', query)  # Strip out non ascii characters.
            query = re.sub(' {2,}', ' ', query)  # Convert multiple spaces to just one space, as we split on spaces.
            print("Query: " + query)
            query = query.strip()
            if len(query) > 0:
                if ' ' in query:
                    terms = query.split(' ')
                    new_terms = ["'%s'" % query]
                    for t in terms:
                        new_terms.append("%s:*" % t)
                    new_query = ' | '.join(new_terms)
                else:
                    new_query = "%s:*" % query
-        query = re.sub('[^A-Za-z0-9 ]+', '', query)
+                # Run the full text query
-        print("Query: " + query)
+                db_query = db_query.filter(LookupDataModel.label.match(new_query))
-        query = query.strip()
+                # But hackishly order by like, which does a good job of
-        if len(query) > 0:
+                # pulling more relevant matches to the top.
-            if ' ' in query:
+                db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
                terms = query.split(' ')
                new_terms = ["'%s'" % query]
                for t in terms:
                    new_terms.append("%s:*" % t)
                new_query = ' | '.join(new_terms)
            else:
                new_query = "%s:*" % query
            # Run the full text query
            db_query = db_query.filter(LookupDataModel.label.match(new_query))
            # But hackishly order by like, which does a good job of
            # pulling more relevant matches to the top.
            db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
            #ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC
 #            db_query = db_query.order_by(desc(func.full_text.ts_rank(
 #                func.to_tsvector(LookupDataModel.label),
 #                func.to_tsquery(query))))
        from sqlalchemy.dialects import postgresql
        logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
        result = db_query.limit(limit).all()
        logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
--- a/tests/test_lookup_service.py
+++ b/tests/test_lookup_service.py
@ -61,6 +61,15 @@ class TestLookupService(BaseTest):
        lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
        self.assertEqual(4, len(lookup_data))
    def test_lookup_based_on_id(self):
        spec = BaseTest.load_test_spec('enum_options_from_file')
        workflow = self.create_workflow('enum_options_from_file')
        processor = WorkflowProcessor(workflow)
        processor.do_engine_steps()
        results = LookupService.lookup(workflow, "AllTheNames", "", id=1, limit=10)
        self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search")
        self.assertIsNotNone(results[0].data)
        self.assertIsInstance(results[0].data, dict)
    def test_some_full_text_queries(self):
@ -114,6 +123,9 @@ class TestLookupService(BaseTest):
        results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
        self.assertEqual("1 Something", results[0].label, "special characters don't flake out")
        results = LookupService.lookup(workflow, "AllTheNames", "1  Something", limit=10)
        self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.")
 # 1018	10000 Something	Industry
--- a/tests/test_tasks_api.py
+++ b/tests/test_tasks_api.py
@ -343,6 +343,30 @@ class TestTasksApi(BaseTest):
        results = json.loads(rv.get_data(as_text=True))
        self.assertEqual(5, len(results))
    def test_lookup_endpoint_for_task_field_using_lookup_entry_id(self):
        self.load_example_data()
        workflow = self.create_workflow('enum_options_with_search')
        # get the first form in the two form workflow.
        workflow = self.get_workflow_api(workflow)
        task = workflow.next_task
        field_id = task.form['fields'][0]['id']
        rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' %
                          (workflow.id, field_id, 'c'), # All records with a word that starts with 'c'
                          headers=self.logged_in_headers(),
                          content_type="application/json")
        self.assert_success(rv)
        results = json.loads(rv.get_data(as_text=True))
        self.assertEqual(5, len(results))
        rv = self.app.get('/v1.0/workflow/%i/lookup/%s?id=%i' %
                          (workflow.id, field_id, results[0]['id']), # All records with a word that starts with 'c'
                          headers=self.logged_in_headers(),
                          content_type="application/json")
        results = json.loads(rv.get_data(as_text=True))
        self.assertEqual(1, len(results))
        self.assertIsInstance(results[0]['data'], dict)
    def test_lookup_endpoint_for_task_ldap_field_lookup(self):
        self.load_example_data()
        workflow = self.create_workflow('ldap_lookup')