Merge pull request #133 from sartography/feature/lookup_by_id

Feature/lookup by
This commit is contained in:
Dan Funk 2020-06-30 14:12:00 -04:00 committed by GitHub
commit 49a4670b64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 111 additions and 44 deletions

View File

@ -703,12 +703,19 @@ paths:
description: The string to search for in the Value column of the lookup table. description: The string to search for in the Value column of the lookup table.
schema: schema:
type: string type: string
- name: value
in: query
required: false
description: An alternative to query, this accepts the specific value or id selected in a dropdown list or auto-complete, and will return the one matching record. Useful for getting additional details about an item selected in a dropdown.
schema:
type: string
- name: limit - name: limit
in: query in: query
required: false required: false
description: The total number of records to return, defaults to 10. description: The total number of records to return, defaults to 10.
schema: schema:
type: integer type: integer
get: get:
operationId: crc.api.workflow.lookup operationId: crc.api.workflow.lookup
summary: Provides type-ahead search against a lookup table associted with a form field. summary: Provides type-ahead search against a lookup table associted with a form field.

View File

@ -41,7 +41,6 @@ def get_workflow_specification(spec_id):
def validate_workflow_specification(spec_id): def validate_workflow_specification(spec_id):
errors = [] errors = []
try: try:
WorkflowService.test_spec(spec_id) WorkflowService.test_spec(spec_id)
@ -57,7 +56,6 @@ def validate_workflow_specification(spec_id):
return ApiErrorSchema(many=True).dump(errors) return ApiErrorSchema(many=True).dump(errors)
def update_workflow_specification(spec_id, body): def update_workflow_specification(spec_id, body):
if spec_id is None: if spec_id is None:
raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.') raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.')
@ -200,7 +198,7 @@ def delete_workflow_spec_category(cat_id):
session.commit() session.commit()
def lookup(workflow_id, field_id, query, limit): def lookup(workflow_id, field_id, query=None, value=None, limit=10):
""" """
given a field in a task, attempts to find the lookup table or function associated given a field in a task, attempts to find the lookup table or function associated
with that field and runs a full-text query against it to locate the values and with that field and runs a full-text query against it to locate the values and
@ -208,14 +206,15 @@ def lookup(workflow_id, field_id, query, limit):
Tries to be fast, but first runs will be very slow. Tries to be fast, but first runs will be very slow.
""" """
workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first() workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
lookup_data = LookupService.lookup(workflow, field_id, query, limit) lookup_data = LookupService.lookup(workflow, field_id, query, value, limit)
return LookupDataSchema(many=True).dump(lookup_data) return LookupDataSchema(many=True).dump(lookup_data)
def __get_user_uid(user_uid): def __get_user_uid(user_uid):
if 'user' in g: if 'user' in g:
if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid: if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid:
raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", status_code=403) raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.",
status_code=403)
else: else:
return g.user.uid return g.user.uid

View File

@ -153,6 +153,7 @@ class LookupFileModel(db.Model):
file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id')) file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan") dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan")
class LookupDataModel(db.Model): class LookupDataModel(db.Model):
__tablename__ = 'lookup_data' __tablename__ = 'lookup_data'
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
@ -181,6 +182,7 @@ class LookupDataSchema(SQLAlchemyAutoSchema):
load_instance = True load_instance = True
include_relationships = False include_relationships = False
include_fk = False # Includes foreign keys include_fk = False # Includes foreign keys
exclude = ['id'] # Do not include the id field, it should never be used via the API.
class SimpleFileSchema(ma.Schema): class SimpleFileSchema(ma.Schema):

View File

@ -1,7 +1,9 @@
import logging import logging
import re import re
from collections import OrderedDict
from pandas import ExcelFile import pandas as pd
from pandas import ExcelFile, np
from sqlalchemy import func, desc from sqlalchemy import func, desc
from sqlalchemy.sql.functions import GenericFunction from sqlalchemy.sql.functions import GenericFunction
@ -19,8 +21,8 @@ class TSRank(GenericFunction):
package = 'full_text' package = 'full_text'
name = 'ts_rank' name = 'ts_rank'
class LookupService(object):
class LookupService(object):
"""Provides tools for doing lookups for auto-complete fields. """Provides tools for doing lookups for auto-complete fields.
This can currently take two forms: This can currently take two forms:
1) Lookup from spreadsheet data associated with a workflow specification. 1) Lookup from spreadsheet data associated with a workflow specification.
@ -50,7 +52,7 @@ class LookupService(object):
# if not, we need to rebuild the lookup table. # if not, we need to rebuild the lookup table.
is_current = False is_current = False
if lookup_model: if lookup_model:
is_current = db.session.query(WorkflowSpecDependencyFile).\ is_current = db.session.query(WorkflowSpecDependencyFile). \
filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).count() filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).count()
if not is_current: if not is_current:
@ -62,16 +64,14 @@ class LookupService(object):
return lookup_model return lookup_model
@staticmethod @staticmethod
def lookup(workflow, field_id, query, limit): def lookup(workflow, field_id, query, value=None, limit=10):
lookup_model = LookupService.__get_lookup_model(workflow, field_id) lookup_model = LookupService.__get_lookup_model(workflow, field_id)
if lookup_model.is_ldap: if lookup_model.is_ldap:
return LookupService._run_ldap_query(query, limit) return LookupService._run_ldap_query(query, limit)
else: else:
return LookupService._run_lookup_query(lookup_model, query, limit) return LookupService._run_lookup_query(lookup_model, query, value, limit)
@staticmethod @staticmethod
def create_lookup_model(workflow_model, field_id): def create_lookup_model(workflow_model, field_id):
@ -116,8 +116,8 @@ class LookupService(object):
is_ldap=True) is_ldap=True)
else: else:
raise ApiError("unknown_lookup_option", raise ApiError("unknown_lookup_option",
"Lookup supports using spreadsheet options or ldap options, and neither " "Lookup supports using spreadsheet options or ldap options, and neither "
"was provided.") "was provided.")
db.session.add(lookup_model) db.session.add(lookup_model)
db.session.commit() db.session.commit()
return lookup_model return lookup_model
@ -130,6 +130,7 @@ class LookupService(object):
changed. """ changed. """
xls = ExcelFile(data_model.data) xls = ExcelFile(data_model.data)
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet. df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
df = pd.DataFrame(df).replace({np.nan: None})
if value_column not in df: if value_column not in df:
raise ApiError("invalid_emum", raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name, "The file %s does not contain a column named % s" % (data_model.file_model.name,
@ -149,39 +150,40 @@ class LookupService(object):
lookup_data = LookupDataModel(lookup_file_model=lookup_model, lookup_data = LookupDataModel(lookup_file_model=lookup_model,
value=row[value_column], value=row[value_column],
label=row[label_column], label=row[label_column],
data=row.to_json()) data=row.to_dict(OrderedDict))
db.session.add(lookup_data) db.session.add(lookup_data)
db.session.commit() db.session.commit()
return lookup_model return lookup_model
@staticmethod @staticmethod
def _run_lookup_query(lookup_file_model, query, limit): def _run_lookup_query(lookup_file_model, query, value, limit):
db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model) db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
if value is not None: # Then just find the model with that value
db_query = db_query.filter(LookupDataModel.value == value)
else:
# Build a full text query that takes all the terms provided and executes each term as a prefix query, and
# OR's those queries together. The order of the results is handled as a standard "Like" on the original
# string which seems to work intuitively for most entries.
query = re.sub('[^A-Za-z0-9 ]+', '', query) # Strip out non ascii characters.
query = re.sub(r'\s+', ' ', query) # Convert multiple space like characters to just one space, as we split on spaces.
print("Query: " + query)
query = query.strip()
if len(query) > 0:
if ' ' in query:
terms = query.split(' ')
new_terms = ["'%s'" % query]
for t in terms:
new_terms.append("%s:*" % t)
new_query = ' | '.join(new_terms)
else:
new_query = "%s:*" % query
query = re.sub('[^A-Za-z0-9 ]+', '', query) # Run the full text query
print("Query: " + query) db_query = db_query.filter(LookupDataModel.label.match(new_query))
query = query.strip() # But hackishly order by like, which does a good job of
if len(query) > 0: # pulling more relevant matches to the top.
if ' ' in query: db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
terms = query.split(' ')
new_terms = ["'%s'" % query]
for t in terms:
new_terms.append("%s:*" % t)
new_query = ' | '.join(new_terms)
else:
new_query = "%s:*" % query
# Run the full text query
db_query = db_query.filter(LookupDataModel.label.match(new_query))
# But hackishly order by like, which does a good job of
# pulling more relevant matches to the top.
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
#ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC
# db_query = db_query.order_by(desc(func.full_text.ts_rank(
# func.to_tsvector(LookupDataModel.label),
# func.to_tsquery(query))))
from sqlalchemy.dialects import postgresql
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
result = db_query.limit(limit).all() result = db_query.limit(limit).all()
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR) logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
@ -196,8 +198,8 @@ class LookupService(object):
we return a lookup data model.""" we return a lookup data model."""
user_list = [] user_list = []
for user in users: for user in users:
user_list.append( {"value": user['uid'], user_list.append({"value": user['uid'],
"label": user['display_name'] + " (" + user['uid'] + ")", "label": user['display_name'] + " (" + user['uid'] + ")",
"data": user "data": user
}) })
return user_list return user_list

View File

@ -61,6 +61,15 @@ class TestLookupService(BaseTest):
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all() lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEqual(4, len(lookup_data)) self.assertEqual(4, len(lookup_data))
def test_lookup_based_on_id(self):
spec = BaseTest.load_test_spec('enum_options_from_file')
workflow = self.create_workflow('enum_options_from_file')
processor = WorkflowProcessor(workflow)
processor.do_engine_steps()
results = LookupService.lookup(workflow, "AllTheNames", "", value="1000", limit=10)
self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search")
self.assertIsNotNone(results[0].data)
self.assertIsInstance(results[0].data, dict)
def test_some_full_text_queries(self): def test_some_full_text_queries(self):
@ -114,6 +123,9 @@ class TestLookupService(BaseTest):
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10) results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
self.assertEqual("1 Something", results[0].label, "special characters don't flake out") self.assertEqual("1 Something", results[0].label, "special characters don't flake out")
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.")
# 1018 10000 Something Industry # 1018 10000 Something Industry

View File

@ -343,6 +343,51 @@ class TestTasksApi(BaseTest):
results = json.loads(rv.get_data(as_text=True)) results = json.loads(rv.get_data(as_text=True))
self.assertEqual(5, len(results)) self.assertEqual(5, len(results))
def test_lookup_endpoint_for_task_field_using_lookup_entry_id(self):
self.load_example_data()
workflow = self.create_workflow('enum_options_with_search')
# get the first form in the two form workflow.
workflow = self.get_workflow_api(workflow)
task = workflow.next_task
field_id = task.form['fields'][0]['id']
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' %
(workflow.id, field_id, 'c'), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
self.assert_success(rv)
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(5, len(results))
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?value=%s' %
(workflow.id, field_id, results[0]['value']), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(1, len(results))
self.assertIsInstance(results[0]['data'], dict)
self.assertNotIn('id', results[0], "Don't include the internal id, that can be very confusing, and should not be used.")
def test_lookup_endpoint_also_works_for_enum(self):
# Naming here get's a little confusing. fields can be marked as enum or autocomplete.
# In the event of an auto-complete it's a type-ahead search field, for an enum the
# the key/values from the spreadsheet are added directly to the form and it shows up as
# a dropdown. This tests the case of wanting to get additional data when a user selects
# something from a drodown.
self.load_example_data()
workflow = self.create_workflow('enum_options_from_file')
# get the first form in the two form workflow.
workflow = self.get_workflow_api(workflow)
task = workflow.next_task
field_id = task.form['fields'][0]['id']
option_id = task.form['fields'][0]['options'][0]['id']
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?value=%s' %
(workflow.id, field_id, option_id), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
self.assert_success(rv)
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(1, len(results))
self.assertIsInstance(results[0]['data'], dict)
def test_lookup_endpoint_for_task_ldap_field_lookup(self): def test_lookup_endpoint_for_task_ldap_field_lookup(self):
self.load_example_data() self.load_example_data()
workflow = self.create_workflow('ldap_lookup') workflow = self.create_workflow('ldap_lookup')