Merge pull request #133 from sartography/feature/lookup_by_id

Feature/lookup by
This commit is contained in:
Dan Funk 2020-06-30 14:12:00 -04:00 committed by GitHub
commit 49a4670b64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 111 additions and 44 deletions

View File

@ -703,12 +703,19 @@ paths:
description: The string to search for in the Value column of the lookup table.
schema:
type: string
- name: value
in: query
required: false
description: An alternative to query, this accepts the specific value or id selected in a dropdown list or auto-complete, and will return the one matching record. Useful for getting additional details about an item selected in a dropdown.
schema:
type: string
- name: limit
in: query
required: false
description: The total number of records to return, defaults to 10.
schema:
type: integer
get:
operationId: crc.api.workflow.lookup
summary: Provides type-ahead search against a lookup table associted with a form field.

View File

@ -41,7 +41,6 @@ def get_workflow_specification(spec_id):
def validate_workflow_specification(spec_id):
errors = []
try:
WorkflowService.test_spec(spec_id)
@ -57,7 +56,6 @@ def validate_workflow_specification(spec_id):
return ApiErrorSchema(many=True).dump(errors)
def update_workflow_specification(spec_id, body):
if spec_id is None:
raise ApiError('unknown_spec', 'Please provide a valid Workflow Spec ID.')
@ -200,7 +198,7 @@ def delete_workflow_spec_category(cat_id):
session.commit()
def lookup(workflow_id, field_id, query, limit):
def lookup(workflow_id, field_id, query=None, value=None, limit=10):
"""
given a field in a task, attempts to find the lookup table or function associated
with that field and runs a full-text query against it to locate the values and
@ -208,14 +206,15 @@ def lookup(workflow_id, field_id, query, limit):
Tries to be fast, but first runs will be very slow.
"""
workflow = session.query(WorkflowModel).filter(WorkflowModel.id == workflow_id).first()
lookup_data = LookupService.lookup(workflow, field_id, query, limit)
lookup_data = LookupService.lookup(workflow, field_id, query, value, limit)
return LookupDataSchema(many=True).dump(lookup_data)
def __get_user_uid(user_uid):
if 'user' in g:
if g.user.uid not in app.config['ADMIN_UIDS'] and user_uid != g.user.uid:
raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.", status_code=403)
raise ApiError("permission_denied", "You are not authorized to edit the task data for this workflow.",
status_code=403)
else:
return g.user.uid

View File

@ -153,6 +153,7 @@ class LookupFileModel(db.Model):
file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
dependencies = db.relationship("LookupDataModel", lazy="select", backref="lookup_file_model", cascade="all, delete, delete-orphan")
class LookupDataModel(db.Model):
__tablename__ = 'lookup_data'
id = db.Column(db.Integer, primary_key=True)
@ -181,6 +182,7 @@ class LookupDataSchema(SQLAlchemyAutoSchema):
load_instance = True
include_relationships = False
include_fk = False # Includes foreign keys
exclude = ['id'] # Do not include the id field, it should never be used via the API.
class SimpleFileSchema(ma.Schema):

View File

@ -1,7 +1,9 @@
import logging
import re
from collections import OrderedDict
from pandas import ExcelFile
import pandas as pd
from pandas import ExcelFile, np
from sqlalchemy import func, desc
from sqlalchemy.sql.functions import GenericFunction
@ -19,8 +21,8 @@ class TSRank(GenericFunction):
package = 'full_text'
name = 'ts_rank'
class LookupService(object):
class LookupService(object):
"""Provides tools for doing lookups for auto-complete fields.
This can currently take two forms:
1) Lookup from spreadsheet data associated with a workflow specification.
@ -50,7 +52,7 @@ class LookupService(object):
# if not, we need to rebuild the lookup table.
is_current = False
if lookup_model:
is_current = db.session.query(WorkflowSpecDependencyFile).\
is_current = db.session.query(WorkflowSpecDependencyFile). \
filter(WorkflowSpecDependencyFile.file_data_id == lookup_model.file_data_model_id).count()
if not is_current:
@ -62,16 +64,14 @@ class LookupService(object):
return lookup_model
@staticmethod
def lookup(workflow, field_id, query, limit):
def lookup(workflow, field_id, query, value=None, limit=10):
lookup_model = LookupService.__get_lookup_model(workflow, field_id)
if lookup_model.is_ldap:
return LookupService._run_ldap_query(query, limit)
else:
return LookupService._run_lookup_query(lookup_model, query, limit)
return LookupService._run_lookup_query(lookup_model, query, value, limit)
@staticmethod
def create_lookup_model(workflow_model, field_id):
@ -116,8 +116,8 @@ class LookupService(object):
is_ldap=True)
else:
raise ApiError("unknown_lookup_option",
"Lookup supports using spreadsheet options or ldap options, and neither "
"was provided.")
"Lookup supports using spreadsheet options or ldap options, and neither "
"was provided.")
db.session.add(lookup_model)
db.session.commit()
return lookup_model
@ -130,6 +130,7 @@ class LookupService(object):
changed. """
xls = ExcelFile(data_model.data)
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
df = pd.DataFrame(df).replace({np.nan: None})
if value_column not in df:
raise ApiError("invalid_emum",
"The file %s does not contain a column named % s" % (data_model.file_model.name,
@ -149,39 +150,40 @@ class LookupService(object):
lookup_data = LookupDataModel(lookup_file_model=lookup_model,
value=row[value_column],
label=row[label_column],
data=row.to_json())
data=row.to_dict(OrderedDict))
db.session.add(lookup_data)
db.session.commit()
return lookup_model
@staticmethod
def _run_lookup_query(lookup_file_model, query, limit):
def _run_lookup_query(lookup_file_model, query, value, limit):
db_query = LookupDataModel.query.filter(LookupDataModel.lookup_file_model == lookup_file_model)
if value is not None: # Then just find the model with that value
db_query = db_query.filter(LookupDataModel.value == value)
else:
# Build a full text query that takes all the terms provided and executes each term as a prefix query, and
# OR's those queries together. The order of the results is handled as a standard "Like" on the original
# string which seems to work intuitively for most entries.
query = re.sub('[^A-Za-z0-9 ]+', '', query) # Strip out non ascii characters.
query = re.sub(r'\s+', ' ', query) # Convert multiple space like characters to just one space, as we split on spaces.
print("Query: " + query)
query = query.strip()
if len(query) > 0:
if ' ' in query:
terms = query.split(' ')
new_terms = ["'%s'" % query]
for t in terms:
new_terms.append("%s:*" % t)
new_query = ' | '.join(new_terms)
else:
new_query = "%s:*" % query
query = re.sub('[^A-Za-z0-9 ]+', '', query)
print("Query: " + query)
query = query.strip()
if len(query) > 0:
if ' ' in query:
terms = query.split(' ')
new_terms = ["'%s'" % query]
for t in terms:
new_terms.append("%s:*" % t)
new_query = ' | '.join(new_terms)
else:
new_query = "%s:*" % query
# Run the full text query
db_query = db_query.filter(LookupDataModel.label.match(new_query))
# But hackishly order by like, which does a good job of
# pulling more relevant matches to the top.
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
# Run the full text query
db_query = db_query.filter(LookupDataModel.label.match(new_query))
# But hackishly order by like, which does a good job of
# pulling more relevant matches to the top.
db_query = db_query.order_by(desc(LookupDataModel.label.like("%" + query + "%")))
#ORDER BY name LIKE concat('%', ticker, '%') desc, rank DESC
# db_query = db_query.order_by(desc(func.full_text.ts_rank(
# func.to_tsvector(LookupDataModel.label),
# func.to_tsquery(query))))
from sqlalchemy.dialects import postgresql
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
result = db_query.limit(limit).all()
logging.getLogger('sqlalchemy.engine').setLevel(logging.ERROR)
@ -196,8 +198,8 @@ class LookupService(object):
we return a lookup data model."""
user_list = []
for user in users:
user_list.append( {"value": user['uid'],
"label": user['display_name'] + " (" + user['uid'] + ")",
"data": user
})
return user_list
user_list.append({"value": user['uid'],
"label": user['display_name'] + " (" + user['uid'] + ")",
"data": user
})
return user_list

View File

@ -61,6 +61,15 @@ class TestLookupService(BaseTest):
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
self.assertEqual(4, len(lookup_data))
def test_lookup_based_on_id(self):
spec = BaseTest.load_test_spec('enum_options_from_file')
workflow = self.create_workflow('enum_options_from_file')
processor = WorkflowProcessor(workflow)
processor.do_engine_steps()
results = LookupService.lookup(workflow, "AllTheNames", "", value="1000", limit=10)
self.assertEqual(1, len(results), "It is possible to find an item based on the id, rather than as a search")
self.assertIsNotNone(results[0].data)
self.assertIsInstance(results[0].data, dict)
def test_some_full_text_queries(self):
@ -114,6 +123,9 @@ class TestLookupService(BaseTest):
results = LookupService.lookup(workflow, "AllTheNames", "1 (!-Something", limit=10)
self.assertEqual("1 Something", results[0].label, "special characters don't flake out")
results = LookupService.lookup(workflow, "AllTheNames", "1 Something", limit=10)
self.assertEqual("1 Something", results[0].label, "double spaces should not be an issue.")
# 1018 10000 Something Industry

View File

@ -343,6 +343,51 @@ class TestTasksApi(BaseTest):
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(5, len(results))
def test_lookup_endpoint_for_task_field_using_lookup_entry_id(self):
self.load_example_data()
workflow = self.create_workflow('enum_options_with_search')
# get the first form in the two form workflow.
workflow = self.get_workflow_api(workflow)
task = workflow.next_task
field_id = task.form['fields'][0]['id']
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?query=%s&limit=5' %
(workflow.id, field_id, 'c'), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
self.assert_success(rv)
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(5, len(results))
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?value=%s' %
(workflow.id, field_id, results[0]['value']), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(1, len(results))
self.assertIsInstance(results[0]['data'], dict)
self.assertNotIn('id', results[0], "Don't include the internal id, that can be very confusing, and should not be used.")
def test_lookup_endpoint_also_works_for_enum(self):
# Naming here get's a little confusing. fields can be marked as enum or autocomplete.
# In the event of an auto-complete it's a type-ahead search field, for an enum the
# the key/values from the spreadsheet are added directly to the form and it shows up as
# a dropdown. This tests the case of wanting to get additional data when a user selects
# something from a drodown.
self.load_example_data()
workflow = self.create_workflow('enum_options_from_file')
# get the first form in the two form workflow.
workflow = self.get_workflow_api(workflow)
task = workflow.next_task
field_id = task.form['fields'][0]['id']
option_id = task.form['fields'][0]['options'][0]['id']
rv = self.app.get('/v1.0/workflow/%i/lookup/%s?value=%s' %
(workflow.id, field_id, option_id), # All records with a word that starts with 'c'
headers=self.logged_in_headers(),
content_type="application/json")
self.assert_success(rv)
results = json.loads(rv.get_data(as_text=True))
self.assertEqual(1, len(results))
self.assertIsInstance(results[0]['data'], dict)
def test_lookup_endpoint_for_task_ldap_field_lookup(self):
self.load_example_data()
workflow = self.create_workflow('ldap_lookup')