From d3ce1af1ce2e98191ff6086222a08f3530602e50 Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Tue, 30 Jun 2020 10:00:22 -0400 Subject: [PATCH 1/4] Provides some basic tools for getting additional information about a lookup field. Adds an optional 'id' parameter to the lookup endpoint so you can find a specific entry in the lookup table. Makes sure the data attribute returned on a lookup model is a dictionary, and not a string. Fixes a previous bug that would crop up in double spaces were used when performing a search. --- crc/api.yml | 7 ++++ crc/api/workflow.py | 9 +++--- crc/models/file.py | 1 + crc/services/lookup_service.py | 59 ++++++++++++++++++---------------- tests/test_lookup_service.py | 12 +++++++ tests/test_tasks_api.py | 24 ++++++++++++++ 6 files changed, 79 insertions(+), 33 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 3f72bf7f..0bf8bd6f 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -709,6 +709,13 @@ paths: description: The total number of records to return, defaults to 10. schema: type: integer + - name: id + in: query + required: false + description: Rather than supplying a query, you can specify a speicific id to get data back on a lookup field. + schema: + type: string + get: operationId: crc.api.workflow.lookup summary: Provides type-ahead search against a lookup table associted with a form field. diff --git a/crc/api/workflow.py b/crc/api/workflow.py index 68b84f56..cee2f4cf 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -41,7 +41,6 @@ def get_workflow_specification(spec_id): def validate_workflow_specification(spec_id): - errors = [] try: WorkflowService.test_spec(spec_id) @@ -57,7 +56,6 @@ def validate_workflow_specification(spec_id): return ApiErrorSchema(many=True).dump(errors) - def update_workflow_specification(spec_id, body): if spec_id is None: raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.') @@ -200,7 +198,7 @@ def delete_workflow_spec_category(cat_id): session.commit() -def lookup(workflow_id, field_id, query, limit): +def lookup(workflow_id, field_id, query=None, limit=10, id=None): """ given a field in a task, attempts to find the lookup table or function associated with that field and runs a full-text query against it to locate the values and @@ -208,14 +206,15 @@ def lookup(workflow_id, field_id, query, limit): Tries to be fast, but first runs will be very slow. """ workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() - lookup_data = LookupService.lookup(workflow, field_id, query, limit) + lookup_data = LookupService.lookup(workflow, field_id, query, limit, id) return LookupDataSchema(many=True).dump(lookup_data) def __get_user_uid(user_uid): if 'user' in g: if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid: - raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", status_code=403) + raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", + status_code=403) else: return g.user.uid diff --git a/crc/models/file.py b/crc/models/file.py index 15a48709..ccfbdc56 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -153,6 +153,7 @@ class LookupFileModel(db.Model): file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id')) dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan") + class LookupDataModel(db.Model): __tablename__ = 'lookup_data' id = db.Column(db.Integer, primary_key=True) diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index b3e0bddc..876fda80 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -1,7 +1,8 @@ import logging import re -from pandas import ExcelFile +import pandas as pd +from pandas import ExcelFile, np from sqlalchemy import func, desc from sqlalchemy.sql.functions import GenericFunction @@ -62,14 +63,14 @@ class LookupService(object): return lookup_model @staticmethod - def lookup(workflow, field_id, query, limit): + def lookup(workflow, field_id, query, limit, id = None): lookup_model = LookupService.__get_lookup_model(workflow, field_id) if lookup_model.is_ldap: return LookupService._run_ldap_query(query, limit) else: - return LookupService._run_lookup_query(lookup_model, query, limit) + return LookupService._run_lookup_query(lookup_model, query, limit, id) @@ -130,6 +131,7 @@ class LookupService(object): changed. """ xls = ExcelFile(data_model.data) df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet. + df = pd.DataFrame(df).replace({np.nan: None}) if value_column not in df: raise ApiError("invalid_emum", "The file %s does not contain a column named % s" % (data_model.file_model.name, @@ -149,39 +151,40 @@ class LookupService(object): lookup_data = LookupDataModel(lookup_file_model=lookup_model, value=row[value_column], label=row[label_column], - data=row.to_json()) + data=row.to_dict()) db.session.add(lookup_data) db.session.commit() return lookup_model @staticmethod - def _run_lookup_query(lookup_file_model, query, limit): + def _run_lookup_query(lookup_file_model, query, limit, lookup_id): db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model) + if lookup_id is not None: # Then just find the model with that id. + db_query = db_query.filter(LookupDataModel.id == lookup_id) + else: + # Build a full text query that takes all the terms provided and executes each term as a prefix query, and + # OR's those queries together. The order of the results is handled as a standard "Like" on the original + # string which seems to work intuitively for most entries. + query = re.sub('[^A-Za-z0-9 ]+', '', query) # Strip out non ascii characters. + query = re.sub(' {2,}', ' ', query) # Convert multiple spaces to just one space, as we split on spaces. + print("Query: " + query) + query = query.strip() + if len(query) > 0: + if ' ' in query: + terms = query.split(' ') + new_terms = ["'%s'" % query] + for t in terms: + new_terms.append("%s:*" % t) + new_query = ' | '.join(new_terms) + else: + new_query = "%s:*" % query - query = re.sub('[^A-Za-z0-9 ]+', '', query) - print("Query: " + query) - query = query.strip() - if len(query) > 0: - if ' ' in query: - terms = query.split(' ') - new_terms = ["'%s'" % query] - for t in terms: - new_terms.append("%s:*" % t) - new_query = ' | '.join(new_terms) - else: - new_query = "%s:*" % query + # Run the full text query + db_query = db_query.filter(LookupDataModel.label.match(new_query)) + # But hackishly order by like, which does a good job of + # pulling more relevant matches to the top. + db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%"))) - # Run the full text query - db_query = db_query.filter(LookupDataModel.label.match(new_query)) - # But hackishly order by like, which does a good job of - # pulling more relevant matches to the top. - db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%"))) - #ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC - -# db_query = db_query.order_by(desc(func.full_text.ts_rank( -# func.to_tsvector(LookupDataModel.label), -# func.to_tsquery(query)))) - from sqlalchemy.dialects import postgresql logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) result = db_query.limit(limit).all() logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR) diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py index b61e20e2..4b7c180d 100644 --- a/tests/test_lookup_service.py +++ b/tests/test_lookup_service.py @@ -61,6 +61,15 @@ class TestLookupService(BaseTest): lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all() self.assertEqual(4, len(lookup_data)) + def test_lookup_based_on_id(self): + spec = BaseTest.load_test_spec('enum_options_from_file') + workflow = self.create_workflow('enum_options_from_file') + processor = WorkflowProcessor(workflow) + processor.do_engine_steps() + results = LookupService.lookup(workflow, "AllTheNames", "", id=1, limit=10) + self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search") + self.assertIsNotNone(results[0].data) + self.assertIsInstance(results[0].data, dict) def test_some_full_text_queries(self): @@ -114,6 +123,9 @@ class TestLookupService(BaseTest): results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10) self.assertEqual("1 Something", results[0].label, "special characters don't flake out") + results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10) + self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.") + # 1018 10000 Something Industry diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index c6b09dae..11be07fd 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -343,6 +343,30 @@ class TestTasksApi(BaseTest): results = json.loads(rv.get_data(as_text=True)) self.assertEqual(5, len(results)) + def test_lookup_endpoint_for_task_field_using_lookup_entry_id(self): + self.load_example_data() + workflow = self.create_workflow('enum_options_with_search') + # get the first form in the two form workflow. + workflow = self.get_workflow_api(workflow) + task = workflow.next_task + field_id = task.form['fields'][0]['id'] + rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' % + (workflow.id, field_id, 'c'), # All records with a word that starts with 'c' + headers=self.logged_in_headers(), + content_type="application/json") + self.assert_success(rv) + results = json.loads(rv.get_data(as_text=True)) + self.assertEqual(5, len(results)) + rv = self.app.get('/v1.0/workflow/%i/lookup/%s?id=%i' % + (workflow.id, field_id, results[0]['id']), # All records with a word that starts with 'c' + headers=self.logged_in_headers(), + content_type="application/json") + results = json.loads(rv.get_data(as_text=True)) + self.assertEqual(1, len(results)) + self.assertIsInstance(results[0]['data'], dict) + + + def test_lookup_endpoint_for_task_ldap_field_lookup(self): self.load_example_data() workflow = self.create_workflow('ldap_lookup') From f183e12fe56a7a81aef4c4d5fe39ac969b5bce05 Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Tue, 30 Jun 2020 10:34:16 -0400 Subject: [PATCH 2/4] Provides some basic tools for getting additional information about a lookup field. Adds an optional 'value' parameter to the lookup endpoint so you can find a specific entry in the lookup table. Makes sure the data attribute returned on a lookup model is a dictionary, and not a string. Fixes a previous bug that would crop up if double spaces were used when performing a search. --- crc/api.yml | 12 ++++++------ crc/api/workflow.py | 4 ++-- crc/models/file.py | 1 + crc/services/lookup_service.py | 10 +++++----- tests/test_tasks_api.py | 27 ++++++++++++++++++++++++--- 5 files changed, 38 insertions(+), 16 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 0bf8bd6f..213e8d15 100644 --- a/crc/api.yml +++ b/crc/api.yml @@ -703,18 +703,18 @@ paths: description: The string to search for in the Value column of the lookup table. schema: type: string + - name: value + in: query + required: false + description: An alternative to query, this accepts the specific value or id selected in a dropdown list or auto-complete, and will return the one matching record. Useful for getting additional details about an item selected in a dropdown. + schema: + type: string - name: limit in: query required: false description: The total number of records to return, defaults to 10. schema: type: integer - - name: id - in: query - required: false - description: Rather than supplying a query, you can specify a speicific id to get data back on a lookup field. - schema: - type: string get: operationId: crc.api.workflow.lookup diff --git a/crc/api/workflow.py b/crc/api/workflow.py index cee2f4cf..dc86ac9e 100644 --- a/crc/api/workflow.py +++ b/crc/api/workflow.py @@ -198,7 +198,7 @@ def delete_workflow_spec_category(cat_id): session.commit() -def lookup(workflow_id, field_id, query=None, limit=10, id=None): +def lookup(workflow_id, field_id, query=None, value=None, limit=10): """ given a field in a task, attempts to find the lookup table or function associated with that field and runs a full-text query against it to locate the values and @@ -206,7 +206,7 @@ def lookup(workflow_id, field_id, query=None, limit=10, id=None): Tries to be fast, but first runs will be very slow. """ workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() - lookup_data = LookupService.lookup(workflow, field_id, query, limit, id) + lookup_data = LookupService.lookup(workflow, field_id, query, value, limit) return LookupDataSchema(many=True).dump(lookup_data) diff --git a/crc/models/file.py b/crc/models/file.py index ccfbdc56..5eb50d4e 100644 --- a/crc/models/file.py +++ b/crc/models/file.py @@ -182,6 +182,7 @@ class LookupDataSchema(SQLAlchemyAutoSchema): load_instance = True include_relationships = False include_fk = False # Includes foreign keys + exclude = ['id'] # Do not include the id field, it should never be used via the API. class SimpleFileSchema(ma.Schema): diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index 876fda80..97c9824b 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -63,14 +63,14 @@ class LookupService(object): return lookup_model @staticmethod - def lookup(workflow, field_id, query, limit, id = None): + def lookup(workflow, field_id, query, value, limit): lookup_model = LookupService.__get_lookup_model(workflow, field_id) if lookup_model.is_ldap: return LookupService._run_ldap_query(query, limit) else: - return LookupService._run_lookup_query(lookup_model, query, limit, id) + return LookupService._run_lookup_query(lookup_model, query, value, limit) @@ -157,10 +157,10 @@ class LookupService(object): return lookup_model @staticmethod - def _run_lookup_query(lookup_file_model, query, limit, lookup_id): + def _run_lookup_query(lookup_file_model, query, value, limit): db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model) - if lookup_id is not None: # Then just find the model with that id. - db_query = db_query.filter(LookupDataModel.id == lookup_id) + if value is not None: # Then just find the model with that value + db_query = db_query.filter(LookupDataModel.value == value) else: # Build a full text query that takes all the terms provided and executes each term as a prefix query, and # OR's those queries together. The order of the results is handled as a standard "Like" on the original diff --git a/tests/test_tasks_api.py b/tests/test_tasks_api.py index 11be07fd..236defdc 100644 --- a/tests/test_tasks_api.py +++ b/tests/test_tasks_api.py @@ -357,15 +357,36 @@ class TestTasksApi(BaseTest): self.assert_success(rv) results = json.loads(rv.get_data(as_text=True)) self.assertEqual(5, len(results)) - rv = self.app.get('/v1.0/workflow/%i/lookup/%s?id=%i' % - (workflow.id, field_id, results[0]['id']), # All records with a word that starts with 'c' + rv = self.app.get('/v1.0/workflow/%i/lookup/%s?value=%s' % + (workflow.id, field_id, results[0]['value']), # All records with a word that starts with 'c' headers=self.logged_in_headers(), content_type="application/json") results = json.loads(rv.get_data(as_text=True)) self.assertEqual(1, len(results)) self.assertIsInstance(results[0]['data'], dict) + self.assertNotIn('id', results[0], "Don't include the internal id, that can be very confusing, and should not be used.") - + def test_lookup_endpoint_also_works_for_enum(self): + # Naming here get's a little confusing. fields can be marked as enum or autocomplete. + # In the event of an auto-complete it's a type-ahead search field, for an enum the + # the key/values from the spreadsheet are added directly to the form and it shows up as + # a dropdown. This tests the case of wanting to get additional data when a user selects + # something from a drodown. + self.load_example_data() + workflow = self.create_workflow('enum_options_from_file') + # get the first form in the two form workflow. + workflow = self.get_workflow_api(workflow) + task = workflow.next_task + field_id = task.form['fields'][0]['id'] + option_id = task.form['fields'][0]['options'][0]['id'] + rv = self.app.get('/v1.0/workflow/%i/lookup/%s?value=%s' % + (workflow.id, field_id, option_id), # All records with a word that starts with 'c' + headers=self.logged_in_headers(), + content_type="application/json") + self.assert_success(rv) + results = json.loads(rv.get_data(as_text=True)) + self.assertEqual(1, len(results)) + self.assertIsInstance(results[0]['data'], dict) def test_lookup_endpoint_for_task_ldap_field_lookup(self): self.load_example_data() From 93bf46354bf43f7412078ad7fa3dbbf43f1ae66a Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Tue, 30 Jun 2020 11:12:28 -0400 Subject: [PATCH 3/4] A last minute change to make the API a little clearer and cleaner broke some tests. --- crc/services/lookup_service.py | 22 ++++++++++------------ tests/test_lookup_service.py | 2 +- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index 97c9824b..e9852315 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -20,8 +20,8 @@ class TSRank(GenericFunction): package = 'full_text' name = 'ts_rank' -class LookupService(object): +class LookupService(object): """Provides tools for doing lookups for auto-complete fields. This can currently take two forms: 1) Lookup from spreadsheet data associated with a workflow specification. @@ -51,7 +51,7 @@ class LookupService(object): # if not, we need to rebuild the lookup table. is_current = False if lookup_model: - is_current = db.session.query(WorkflowSpecDependencyFile).\ + is_current = db.session.query(WorkflowSpecDependencyFile). \ filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).count() if not is_current: @@ -63,7 +63,7 @@ class LookupService(object): return lookup_model @staticmethod - def lookup(workflow, field_id, query, value, limit): + def lookup(workflow, field_id, query, value=None, limit=10): lookup_model = LookupService.__get_lookup_model(workflow, field_id) @@ -72,8 +72,6 @@ class LookupService(object): else: return LookupService._run_lookup_query(lookup_model, query, value, limit) - - @staticmethod def create_lookup_model(workflow_model, field_id): """ @@ -117,8 +115,8 @@ class LookupService(object): is_ldap=True) else: raise ApiError("unknown_lookup_option", - "Lookup supports using spreadsheet options or ldap options, and neither " - "was provided.") + "Lookup supports using spreadsheet options or ldap options, and neither " + "was provided.") db.session.add(lookup_model) db.session.commit() return lookup_model @@ -199,8 +197,8 @@ class LookupService(object): we return a lookup data model.""" user_list = [] for user in users: - user_list.append( {"value": user['uid'], - "label": user['display_name'] + " (" + user['uid'] + ")", - "data": user - }) - return user_list \ No newline at end of file + user_list.append({"value": user['uid'], + "label": user['display_name'] + " (" + user['uid'] + ")", + "data": user + }) + return user_list diff --git a/tests/test_lookup_service.py b/tests/test_lookup_service.py index 4b7c180d..a27427f4 100644 --- a/tests/test_lookup_service.py +++ b/tests/test_lookup_service.py @@ -66,7 +66,7 @@ class TestLookupService(BaseTest): workflow = self.create_workflow('enum_options_from_file') processor = WorkflowProcessor(workflow) processor.do_engine_steps() - results = LookupService.lookup(workflow, "AllTheNames", "", id=1, limit=10) + results = LookupService.lookup(workflow, "AllTheNames", "", value="1000", limit=10) self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search") self.assertIsNotNone(results[0].data) self.assertIsInstance(results[0].data, dict) From 84973d23512028931b83915f0e16c3e986d23095 Mon Sep 17 00:00:00 2001 From: Dan Funk Date: Tue, 30 Jun 2020 12:24:48 -0400 Subject: [PATCH 4/4] resolving comments from pull request. --- crc/services/lookup_service.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index e9852315..47424ae8 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -1,5 +1,6 @@ import logging import re +from collections import OrderedDict import pandas as pd from pandas import ExcelFile, np @@ -149,7 +150,7 @@ class LookupService(object): lookup_data = LookupDataModel(lookup_file_model=lookup_model, value=row[value_column], label=row[label_column], - data=row.to_dict()) + data=row.to_dict(OrderedDict)) db.session.add(lookup_data) db.session.commit() return lookup_model @@ -164,7 +165,7 @@ class LookupService(object): # OR's those queries together. The order of the results is handled as a standard "Like" on the original # string which seems to work intuitively for most entries. query = re.sub('[^A-Za-z0-9 ]+', '', query) # Strip out non ascii characters. - query = re.sub(' {2,}', ' ', query) # Convert multiple spaces to just one space, as we split on spaces. + query = re.sub(r'\s+', ' ', query) # Convert multiple space like characters to just one space, as we split on spaces. print("Query: " + query) query = query.strip() if len(query) > 0: