Provides some basic tools for getting additional information about a lookup field.

Adds an optional 'id' parameter to the lookup endpoint so you can find a specific entry in the lookup table.
Makes sure the data attribute returned on a lookup model is a dictionary, and not a string.
Fixes a previous bug that would crop up in double spaces were used when performing a search.
This commit is contained in:
Dan Funk 2020-06-30 10:00:22 -04:00
parent 19b6f5d1ab
commit d3ce1af1ce
6 changed files with 79 additions and 33 deletions

View File

@ -709,6 +709,13 @@ paths:
description: The total number of records to return, defaults to 10. description: The total number of records to return, defaults to 10.
schema: schema:
type: integer type: integer
- name: id
in: query
required: false
description: Rather than supplying a query, you can specify a speicific id to get data back on a lookup field.
schema:
type: string
get: get:
operationId: crc.api.workflow.lookup operationId: crc.api.workflow.lookup
summary: Provides type-ahead search against a lookup table associted with a form field. summary: Provides type-ahead search against a lookup table associted with a form field.

View File

@ -41,7 +41,6 @@ def get_workflow_specification(spec_id):
def validate_workflow_specification(spec_id): def validate_workflow_specification(spec_id):
errors = [] errors = []
try: try:
WorkflowService.test_spec(spec_id) WorkflowService.test_spec(spec_id)
@ -57,7 +56,6 @@ def validate_workflow_specification(spec_id):
return ApiErrorSchema(many=True).dump(errors) return ApiErrorSchema(many=True).dump(errors)
def update_workflow_specification(spec_id, body): def update_workflow_specification(spec_id, body):
if spec_id is None: if spec_id is None:
raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.') raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.')
@ -200,7 +198,7 @@ def delete_workflow_spec_category(cat_id):
session.commit() session.commit()
def lookup(workflow_id, field_id, query, limit): def lookup(workflow_id, field_id, query=None, limit=10, id=None):
""" """
given a field in a task, attempts to find the lookup table or function associated given a field in a task, attempts to find the lookup table or function associated
with that field and runs a full-text query against it to locate the values and with that field and runs a full-text query against it to locate the values and
@ -208,14 +206,15 @@ def lookup(workflow_id, field_id, query, limit):
Tries to be fast, but first runs will be very slow. Tries to be fast, but first runs will be very slow.
""" """
workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
lookup_data = LookupService.lookup(workflow, field_id, query, limit) lookup_data = LookupService.lookup(workflow, field_id, query, limit, id)
return LookupDataSchema(many=True).dump(lookup_data) return LookupDataSchema(many=True).dump(lookup_data)
def __get_user_uid(user_uid): def __get_user_uid(user_uid):
if 'user' in g: if 'user' in g:
if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid: if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid:
raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", status_code=403) raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.",
status_code=403)
else: else:
return g.user.uid return g.user.uid

View File

@ -153,6 +153,7 @@ class LookupFileModel(db.Model):
file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id')) file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan") dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan")
class LookupDataModel(db.Model): class LookupDataModel(db.Model):
__tablename__ = 'lookup_data' __tablename__ = 'lookup_data'
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)

View File

@ -1,7 +1,8 @@
import logging import logging
import re import re
from pandas import ExcelFile import pandas as pd
from pandas import ExcelFile, np
from sqlalchemy import func, desc from sqlalchemy import func, desc
from sqlalchemy.sql.functions import GenericFunction from sqlalchemy.sql.functions import GenericFunction
@ -62,14 +63,14 @@ class LookupService(object):
return lookup_model return lookup_model
@staticmethod @staticmethod
def lookup(workflow, field_id, query, limit): def lookup(workflow, field_id, query, limit, id = None):
lookup_model = LookupService.__get_lookup_model(workflow, field_id) lookup_model = LookupService.__get_lookup_model(workflow, field_id)
if lookup_model.is_ldap: if lookup_model.is_ldap:
return LookupService._run_ldap_query(query, limit) return LookupService._run_ldap_query(query, limit)
else: else:
return LookupService._run_lookup_query(lookup_model, query, limit) return LookupService._run_lookup_query(lookup_model, query, limit, id)
@ -130,6 +131,7 @@ class LookupService(object):
changed. """ changed. """
xls = ExcelFile(data_model.data) xls = ExcelFile(data_model.data)
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet. df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
df = pd.DataFrame(df).replace({np.nan: None})
if value_column not in df: if value_column not in df:
raise ApiError("invalid_emum", raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name, "The file %s does not contain a column named % s" % (data_model.file_model.name,
@ -149,39 +151,40 @@ class LookupService(object):
lookup_data = LookupDataModel(lookup_file_model=lookup_model, lookup_data = LookupDataModel(lookup_file_model=lookup_model,
value=row[value_column], value=row[value_column],
label=row[label_column], label=row[label_column],
data=row.to_json()) data=row.to_dict())
db.session.add(lookup_data) db.session.add(lookup_data)
db.session.commit() db.session.commit()
return lookup_model return lookup_model
@staticmethod @staticmethod
def _run_lookup_query(lookup_file_model, query, limit): def _run_lookup_query(lookup_file_model, query, limit, lookup_id):
db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model) db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
if lookup_id is not None: # Then just find the model with that id.
db_query = db_query.filter(LookupDataModel.id == lookup_id)
else:
# Build a full text query that takes all the terms provided and executes each term as a prefix query, and
# OR's those queries together. The order of the results is handled as a standard "Like" on the original
# string which seems to work intuitively for most entries.
query = re.sub('[^A-Za-z0-9 ]+', '', query) # Strip out non ascii characters.
query = re.sub(' {2,}', ' ', query) # Convert multiple spaces to just one space, as we split on spaces.
print("Query: " + query)
query = query.strip()
if len(query) > 0:
if ' ' in query:
terms = query.split(' ')
new_terms = ["'%s'" % query]
for t in terms:
new_terms.append("%s:*" % t)
new_query = ' | '.join(new_terms)
else:
new_query = "%s:*" % query
query = re.sub('[^A-Za-z0-9 ]+', '', query) # Run the full text query
print("Query: " + query) db_query = db_query.filter(LookupDataModel.label.match(new_query))
query = query.strip() # But hackishly order by like, which does a good job of
if len(query) > 0: # pulling more relevant matches to the top.
if ' ' in query: db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
terms = query.split(' ')
new_terms = ["'%s'" % query]
for t in terms:
new_terms.append("%s:*" % t)
new_query = ' | '.join(new_terms)
else:
new_query = "%s:*" % query
# Run the full text query
db_query = db_query.filter(LookupDataModel.label.match(new_query))
# But hackishly order by like, which does a good job of
# pulling more relevant matches to the top.
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
#ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC
# db_query = db_query.order_by(desc(func.full_text.ts_rank(
# func.to_tsvector(LookupDataModel.label),
# func.to_tsquery(query))))
from sqlalchemy.dialects import postgresql
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
result = db_query.limit(limit).all() result = db_query.limit(limit).all()
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR) logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)

View File

@ -61,6 +61,15 @@ class TestLookupService(BaseTest):
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all() lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEqual(4, len(lookup_data)) self.assertEqual(4, len(lookup_data))
def test_lookup_based_on_id(self):
spec = BaseTest.load_test_spec('enum_options_from_file')
workflow = self.create_workflow('enum_options_from_file')
processor = WorkflowProcessor(workflow)
processor.do_engine_steps()
results = LookupService.lookup(workflow, "AllTheNames", "", id=1, limit=10)
self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search")
self.assertIsNotNone(results[0].data)
self.assertIsInstance(results[0].data, dict)
def test_some_full_text_queries(self): def test_some_full_text_queries(self):
@ -114,6 +123,9 @@ class TestLookupService(BaseTest):
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
self.assertEqual("1 Something", results[0].label, "special characters don't flake out") self.assertEqual("1 Something", results[0].label, "special characters don't flake out")
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.")
# 1018 10000 Something Industry # 1018 10000 Something Industry

View File

@ -343,6 +343,30 @@ class TestTasksApi(BaseTest):
results = json.loads(rv.get_data(as_text=True)) results = json.loads(rv.get_data(as_text=True))
self.assertEqual(5, len(results)) self.assertEqual(5, len(results))
def test_lookup_endpoint_for_task_field_using_lookup_entry_id(self):
self.load_example_data()
workflow = self.create_workflow('enum_options_with_search')
# get the first form in the two form workflow.
workflow = self.get_workflow_api(workflow)
task = workflow.next_task
field_id = task.form['fields'][0]['id']
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' %
(workflow.id, field_id, 'c'), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
self.assert_success(rv)
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(5, len(results))
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?id=%i' %
(workflow.id, field_id, results[0]['id']), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(1, len(results))
self.assertIsInstance(results[0]['data'], dict)
def test_lookup_endpoint_for_task_ldap_field_lookup(self): def test_lookup_endpoint_for_task_ldap_field_lookup(self):
self.load_example_data() self.load_example_data()
workflow = self.create_workflow('ldap_lookup') workflow = self.create_workflow('ldap_lookup')