From d3ce1af1ce2e98191ff6086222a08f3530602e50 Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Tue, 30 Jun 2020 10:00:22 -0400 Subject: [PATCH] Provides some basic tools for getting additional information about a lookup field. Adds an optional 'id' parameter to the lookup endpoint so you can find a specific entry in the lookup table. Makes sure the data attribute returned on a lookup model is a dictionary, and not a string. Fixes a previous bug that would crop up in double spaces were used when performing a search. --- crc/api.yml | 7 ++++ crc/api/workflow.py | 9 +++--- crc/models/file.py | 1 + crc/services/lookup_service.py | 59 ++++++++++++++++++---------------- tests/test_lookup_service.py | 12 +++++++ tests/test_tasks_api.py | 24 ++++++++++++++ 6 files changed, 79 insertions(+), 33 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 3f72bf7f..0bf8bd6f 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -709,6 +709,13 @@ paths: description: The total number of records to return, defaults to 10. schema: type: integer + - name: id + in: query + required: false + description: Rather than supplying a query, you can specify a speicific id to get data back on a lookup field. + schema: + type: string + get: operationId: crc.api.workflow.lookup summary: Provides type-ahead search against a lookup table associted with a form field. diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 68b84f56..cee2f4cf 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -41,7 +41,6 @@ def get_workflow_specification(spec_id): def validate_workflow_specification(spec_id): - errors = [] try: WorkflowService.test_spec(spec_id) @@ -57,7 +56,6 @@ def validate_workflow_specification(spec_id): return ApiErrorSchema(many=True).dump(errors) - def update_workflow_specification(spec_id, body): if spec_id is None: raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.') @@ -200,7 +198,7 @@ def delete_workflow_spec_category(cat_id): session.commit() -def lookup(workflow_id, field_id, query, limit): +def lookup(workflow_id, field_id, query=None, limit=10, id=None): """ given a field in a task, attempts to find the lookup table or function associated with that field and runs a full-text query against it to locate the values and @@ -208,14 +206,15 @@ def lookup(workflow_id, field_id, query, limit): Tries to be fast, but first runs will be very slow. """ workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() - lookup_data = LookupService.lookup(workflow, field_id, query, limit) + lookup_data = LookupService.lookup(workflow, field_id, query, limit, id) return LookupDataSchema(many=True).dump(lookup_data) def __get_user_uid(user_uid): if 'user' in g: if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid: - raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", status_code=403) + raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", + status_code=403) else: return g.user.uid diff --git a/crc/models/file.py b/crc/models/file.py index 15a48709..ccfbdc56 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -153,6 +153,7 @@ class LookupFileModel(db.Model): file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id')) dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan") + class LookupDataModel(db.Model): __tablename__ = 'lookup_data' id = db.Column(db.Integer, primary_key=True) diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index b3e0bddc..876fda80 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -1,7 +1,8 @@ import logging import re -from pandas import ExcelFile +import pandas as pd +from pandas import ExcelFile, np from sqlalchemy import func, desc from sqlalchemy.sql.functions import GenericFunction @@ -62,14 +63,14 @@ class LookupService(object): return lookup_model @staticmethod - def lookup(workflow, field_id, query, limit): + def lookup(workflow, field_id, query, limit, id = None): lookup_model = LookupService.__get_lookup_model(workflow, field_id) if lookup_model.is_ldap: return LookupService._run_ldap_query(query, limit) else: - return LookupService._run_lookup_query(lookup_model, query, limit) + return LookupService._run_lookup_query(lookup_model, query, limit, id) @@ -130,6 +131,7 @@ class LookupService(object): changed. """ xls = ExcelFile(data_model.data) df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet. + df = pd.DataFrame(df).replace({np.nan: None}) if value_column not in df: raise ApiError("invalid_emum", "The file %s does not contain a column named % s" % (data_model.file_model.name, @@ -149,39 +151,40 @@ class LookupService(object): lookup_data = LookupDataModel(lookup_file_model=lookup_model, value=row[value_column], label=row[label_column], - data=row.to_json()) + data=row.to_dict()) db.session.add(lookup_data) db.session.commit() return lookup_model @staticmethod - def _run_lookup_query(lookup_file_model, query, limit): + def _run_lookup_query(lookup_file_model, query, limit, lookup_id): db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model) + if lookup_id is not None: # Then just find the model with that id. + db_query = db_query.filter(LookupDataModel.id == lookup_id) + else: + # Build a full text query that takes all the terms provided and executes each term as a prefix query, and + # OR's those queries together. The order of the results is handled as a standard "Like" on the original + # string which seems to work intuitively for most entries. + query = re.sub('[^A-Za-z0-9 ]+', '', query) # Strip out non ascii characters. + query = re.sub(' {2,}', ' ', query) # Convert multiple spaces to just one space, as we split on spaces. + print("Query: " + query) + query = query.strip() + if len(query) > 0: + if ' ' in query: + terms = query.split(' ') + new_terms = ["'%s'" % query] + for t in terms: + new_terms.append("%s:*" % t) + new_query = ' | '.join(new_terms) + else: + new_query = "%s:*" % query - query = re.sub('[^A-Za-z0-9 ]+', '', query) - print("Query: " + query) - query = query.strip() - if len(query) > 0: - if ' ' in query: - terms = query.split(' ') - new_terms = ["'%s'" % query] - for t in terms: - new_terms.append("%s:*" % t) - new_query = ' | '.join(new_terms) - else: - new_query = "%s:*" % query + # Run the full text query + db_query = db_query.filter(LookupDataModel.label.match(new_query)) + # But hackishly order by like, which does a good job of + # pulling more relevant matches to the top. + db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%"))) - # Run the full text query - db_query = db_query.filter(LookupDataModel.label.match(new_query)) - # But hackishly order by like, which does a good job of - # pulling more relevant matches to the top. - db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%"))) - #ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC - -# db_query = db_query.order_by(desc(func.full_text.ts_rank( -# func.to_tsvector(LookupDataModel.label), -# func.to_tsquery(query)))) - from sqlalchemy.dialects import postgresql logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) result = db_query.limit(limit).all() logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR) diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py index b61e20e2..4b7c180d 100644 --- a/tests/test_lookup_service.py +++ b/tests/test_lookup_service.py @@ -61,6 +61,15 @@ class TestLookupService(BaseTest): lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all() self.assertEqual(4, len(lookup_data)) + def test_lookup_based_on_id(self): + spec = BaseTest.load_test_spec('enum_options_from_file') + workflow = self.create_workflow('enum_options_from_file') + processor = WorkflowProcessor(workflow) + processor.do_engine_steps() + results = LookupService.lookup(workflow, "AllTheNames", "", id=1, limit=10) + self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search") + self.assertIsNotNone(results[0].data) + self.assertIsInstance(results[0].data, dict) def test_some_full_text_queries(self): @@ -114,6 +123,9 @@ class TestLookupService(BaseTest): results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10) self.assertEqual("1 Something", results[0].label, "special characters don't flake out") + results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10) + self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.") + # 1018 10000 Something Industry diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index c6b09dae..11be07fd 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -343,6 +343,30 @@ class TestTasksApi(BaseTest): results = json.loads(rv.get_data(as_text=True)) self.assertEqual(5, len(results)) + def test_lookup_endpoint_for_task_field_using_lookup_entry_id(self): + self.load_example_data() + workflow = self.create_workflow('enum_options_with_search') + # get the first form in the two form workflow. + workflow = self.get_workflow_api(workflow) + task = workflow.next_task + field_id = task.form['fields'][0]['id'] + rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' % + (workflow.id, field_id, 'c'), # All records with a word that starts with 'c' + headers=self.logged_in_headers(), + content_type="application/json") + self.assert_success(rv) + results = json.loads(rv.get_data(as_text=True)) + self.assertEqual(5, len(results)) + rv = self.app.get('/v1.0/workflow/%i/lookup/%s?id=%i' % + (workflow.id, field_id, results[0]['id']), # All records with a word that starts with 'c' + headers=self.logged_in_headers(), + content_type="application/json") + results = json.loads(rv.get_data(as_text=True)) + self.assertEqual(1, len(results)) + self.assertIsInstance(results[0]['data'], dict) + + + def test_lookup_endpoint_for_task_ldap_field_lookup(self): self.load_example_data() workflow = self.create_workflow('ldap_lookup')