mirror of
https://github.com/sartography/cr-connect-workflow.git
synced 2025-02-20 11:48:16 +00:00
Create lookup tables for XSL files referenced in workflows so we can do full text searches and populate lists on the fly quickly.
This commit is contained in:
parent
4ab51fe8f2
commit
6de8c8b977
@ -20,6 +20,8 @@ class Task(object):
|
||||
ENUM_OPTIONS_FILE_PROP = "enum.options.file"
|
||||
EMUM_OPTIONS_VALUE_COL_PROP = "enum.options.value.column"
|
||||
EMUM_OPTIONS_LABEL_COL_PROP = "enum.options.label.column"
|
||||
EMUM_OPTIONS_AS_LOOKUP = "enum.options.lookup"
|
||||
|
||||
|
||||
def __init__(self, id, name, title, type, state, form, documentation, data,
|
||||
mi_type, mi_count, mi_index, properties):
|
||||
|
@ -1,8 +1,10 @@
|
||||
import enum
|
||||
from typing import cast
|
||||
|
||||
from marshmallow_enum import EnumField
|
||||
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import func, Index, text
|
||||
from sqlalchemy.dialects import postgresql
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
|
||||
from crc import db
|
||||
@ -91,3 +93,42 @@ class FileModelSchema(SQLAlchemyAutoSchema):
|
||||
include_relationships = True
|
||||
include_fk = True # Includes foreign keys
|
||||
type = EnumField(FileType)
|
||||
|
||||
|
||||
class LookupFileModel(db.Model):
|
||||
"""Takes the content of a file (like a xlsx, or csv file) and creates a key/value
|
||||
store that can be used for lookups and searches. This table contains the metadata,
|
||||
so we know the version of the file that was used, and what key column, and value column
|
||||
were used to generate this lookup table. ie, the same xls file might have multiple
|
||||
lookup file models, if different keys and labels are used - or someone decides to
|
||||
make a change. We need to handle full text search over the label and value columns,
|
||||
and not every column, because we don't know how much information will be in there. """
|
||||
__tablename__ = 'lookup_file'
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
label_column = db.Column(db.String)
|
||||
value_column = db.Column(db.String)
|
||||
file_data_model_id = db.Column(db.Integer, db.ForeignKey('file_data.id'))
|
||||
|
||||
|
||||
class LookupDataModel(db.Model):
|
||||
__tablename__ = 'lookup_data'
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
lookup_file_model_id = db.Column(db.Integer, db.ForeignKey('lookup_file.id'))
|
||||
lookup_file_model = db.relationship(LookupFileModel)
|
||||
value = db.Column(db.String)
|
||||
label = db.Column(db.String)
|
||||
# In the future, we might allow adding an additional "search" column if we want to search things not in label.
|
||||
data = db.Column(db.JSON) # all data for the row is stored in a json structure here, but not searched presently.
|
||||
|
||||
# Assure there is a searchable index on the label column, so we can get fast results back.
|
||||
# query with:
|
||||
# search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
|
||||
|
||||
__table_args__ = (
|
||||
Index(
|
||||
'ix_lookupdata_tsv',
|
||||
func.to_tsvector('english', label),
|
||||
postgresql_using='gin'
|
||||
),
|
||||
)
|
||||
|
||||
|
@ -117,6 +117,7 @@ class Study(object):
|
||||
self.categories = categories
|
||||
self.warnings = []
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_model(cls, study_model: StudyModel):
|
||||
args = {k: v for k, v in study_model.__dict__.items() if not k.startswith('_')}
|
||||
|
@ -198,7 +198,7 @@ class FileService(object):
|
||||
|
||||
@staticmethod
|
||||
def get_workflow_file_data(workflow, file_name):
|
||||
"""Given a SPIFF Workflow Model, tracks down a file with the given name in the datbase and returns it's data"""
|
||||
"""Given a SPIFF Workflow Model, tracks down a file with the given name in the database and returns it's data"""
|
||||
workflow_spec_model = FileService.find_spec_model_in_db(workflow)
|
||||
study_id = workflow.data[WorkflowProcessor.STUDY_ID_KEY]
|
||||
|
||||
|
@ -1,6 +1,4 @@
|
||||
from SpiffWorkflow.bpmn.specs.ManualTask import ManualTask
|
||||
from SpiffWorkflow.bpmn.specs.MultiInstanceTask import MultiInstanceTask
|
||||
from SpiffWorkflow.bpmn.specs.NoneTask import NoneTask
|
||||
from SpiffWorkflow.bpmn.specs.ScriptTask import ScriptTask
|
||||
from SpiffWorkflow.bpmn.specs.UserTask import UserTask
|
||||
from SpiffWorkflow.bpmn.workflow import BpmnWorkflow
|
||||
@ -8,11 +6,13 @@ from SpiffWorkflow.dmn.specs.BuisnessRuleTask import BusinessRuleTask
|
||||
from SpiffWorkflow.specs import CancelTask, StartTask
|
||||
from pandas import ExcelFile
|
||||
|
||||
from crc import db
|
||||
from crc.api.common import ApiError
|
||||
from crc.models.api_models import Task, MultiInstanceType
|
||||
import jinja2
|
||||
from jinja2 import Template
|
||||
|
||||
from crc.models.file import FileDataModel, LookupFileModel, LookupDataModel
|
||||
from crc.services.file_service import FileService
|
||||
from crc.services.workflow_processor import WorkflowProcessor, CustomBpmnScriptEngine
|
||||
from SpiffWorkflow import Task as SpiffTask, WorkflowException
|
||||
@ -141,10 +141,10 @@ class WorkflowService(object):
|
||||
if not field.has_property(Task.EMUM_OPTIONS_VALUE_COL_PROP) or \
|
||||
not field.has_property(Task.EMUM_OPTIONS_LABEL_COL_PROP):
|
||||
raise ApiError.from_task("invalid_emum",
|
||||
"For emumerations based on an xls file, you must include 3 properties: %s, "
|
||||
"%s, and %s, you supplied %s" % (Task.ENUM_OPTIONS_FILE_PROP,
|
||||
Task.EMUM_OPTIONS_VALUE_COL_PROP,
|
||||
Task.EMUM_OPTIONS_LABEL_COL_PROP),
|
||||
"For enumerations based on an xls file, you must include 3 properties: %s, "
|
||||
"%s, and %s" % (Task.ENUM_OPTIONS_FILE_PROP,
|
||||
Task.EMUM_OPTIONS_VALUE_COL_PROP,
|
||||
Task.EMUM_OPTIONS_LABEL_COL_PROP),
|
||||
task=spiff_task)
|
||||
|
||||
# Get the file data from the File Service
|
||||
@ -152,15 +152,51 @@ class WorkflowService(object):
|
||||
value_column = field.get_property(Task.EMUM_OPTIONS_VALUE_COL_PROP)
|
||||
label_column = field.get_property(Task.EMUM_OPTIONS_LABEL_COL_PROP)
|
||||
data_model = FileService.get_workflow_file_data(spiff_task.workflow, file_name)
|
||||
lookup_model = WorkflowService.get_lookup_table(data_model, value_column, label_column)
|
||||
|
||||
# If lookup is set to true, do not populate options, a lookup will happen later.
|
||||
if field.has_property(Task.EMUM_OPTIONS_AS_LOOKUP) and field.get_property(Task.EMUM_OPTIONS_AS_LOOKUP):
|
||||
pass
|
||||
else:
|
||||
data = db.session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_model).all()
|
||||
for d in data:
|
||||
field.options.append({"id": d.value, "name": d.label})
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_lookup_table(data_model: FileDataModel, value_column, label_column):
|
||||
""" In some cases the lookup table can be very large. This method will add all values to the database
|
||||
in a way that can be searched and returned via an api call - rather than sending the full set of
|
||||
options along with the form. It will only open the file and process the options if something has
|
||||
changed. """
|
||||
|
||||
lookup_model = db.session.query(LookupFileModel) \
|
||||
.filter(LookupFileModel.file_data_model_id == data_model.id) \
|
||||
.filter(LookupFileModel.value_column == value_column) \
|
||||
.filter(LookupFileModel.label_column == label_column).first()
|
||||
|
||||
if not lookup_model:
|
||||
xls = ExcelFile(data_model.data)
|
||||
df = xls.parse(xls.sheet_names[0])
|
||||
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
|
||||
if value_column not in df:
|
||||
raise ApiError("invalid_emum",
|
||||
"The file %s does not contain a column named % s" % (file_name, value_column))
|
||||
"The file %s does not contain a column named % s" % (data_model.file_model.name,
|
||||
value_column))
|
||||
if label_column not in df:
|
||||
raise ApiError("invalid_emum",
|
||||
"The file %s does not contain a column named % s" % (file_name, label_column))
|
||||
"The file %s does not contain a column named % s" % (data_model.file_model.name,
|
||||
label_column))
|
||||
|
||||
lookup_model = LookupFileModel(label_column=label_column, value_column=value_column,
|
||||
file_data_model_id=data_model.id)
|
||||
|
||||
db.session.add(lookup_model)
|
||||
for index, row in df.iterrows():
|
||||
field.options.append({"id": row[value_column],
|
||||
"name": row[label_column]})
|
||||
lookup_data = LookupDataModel(lookup_file_model=lookup_model,
|
||||
value=row[value_column],
|
||||
label=row[label_column],
|
||||
data=row.to_json())
|
||||
db.session.add(lookup_data)
|
||||
db.session.commit()
|
||||
|
||||
return lookup_model
|
||||
|
@ -1,12 +1,15 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from sqlalchemy import func
|
||||
|
||||
from crc import session, app
|
||||
from crc.models.api_models import WorkflowApiSchema, Task
|
||||
from crc.models.file import FileModelSchema
|
||||
from crc.models.file import FileModelSchema, FileDataModel, FileModel, LookupFileModel, LookupDataModel
|
||||
from crc.models.stats import WorkflowStatsModel, TaskEventModel
|
||||
from crc.models.study import StudyModel
|
||||
from crc.models.workflow import WorkflowSpecModelSchema, WorkflowModel, WorkflowStatus
|
||||
from crc.services.file_service import FileService
|
||||
from crc.services.workflow_processor import WorkflowProcessor
|
||||
from crc.services.workflow_service import WorkflowService
|
||||
from tests.base_test import BaseTest
|
||||
@ -75,5 +78,54 @@ class TestWorkflowService(BaseTest):
|
||||
WorkflowService._process_options(task, task.task_spec.form.fields[0])
|
||||
options = task.task_spec.form.fields[0].options
|
||||
self.assertEquals(19, len(options))
|
||||
self.assertEquals(1000, options[0]['id'])
|
||||
self.assertEquals("UVA - INTERNAL - GM USE ONLY", options[0]['name'])
|
||||
self.assertEquals('1000', options[0]['id'])
|
||||
self.assertEquals("UVA - INTERNAL - GM USE ONLY", options[0]['name'])
|
||||
|
||||
def test_create_lookup_file(self):
|
||||
spec = self.load_test_spec('enum_options_from_file')
|
||||
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
|
||||
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
|
||||
WorkflowService.get_lookup_table(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
|
||||
lookup_records = session.query(LookupFileModel).all()
|
||||
self.assertIsNotNone(lookup_records)
|
||||
self.assertEqual(1, len(lookup_records))
|
||||
lookup_record = lookup_records[0]
|
||||
self.assertIsNotNone(lookup_record)
|
||||
self.assertEquals("CUSTOMER_NUMBER", lookup_record.value_column)
|
||||
self.assertEquals("CUSTOMER_NAME", lookup_record.label_column)
|
||||
self.assertEquals("CUSTOMER_NAME", lookup_record.label_column)
|
||||
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
|
||||
self.assertEquals(19, len(lookup_data))
|
||||
|
||||
self.assertEquals("1000", lookup_data[0].value)
|
||||
self.assertEquals("UVA - INTERNAL - GM USE ONLY", lookup_data[0].label)
|
||||
# search_results = session.query(LookupDataModel).\
|
||||
# filter(LookupDataModel.lookup_file_model_id == lookup_record.id).\
|
||||
# filter(LookupDataModel.__ts_vector__.op('@@')(func.plainto_tsquery('INTERNAL'))).all()
|
||||
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("INTERNAL")).all()
|
||||
self.assertEquals(1, len(search_results))
|
||||
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("internal")).all()
|
||||
self.assertEquals(1, len(search_results))
|
||||
# This query finds results where a word starts with "bio"
|
||||
search_results = LookupDataModel.query.filter(LookupDataModel.label.match("bio:*")).all()
|
||||
self.assertEquals(2, len(search_results))
|
||||
|
||||
def test_create_lookup_file_multiple_times_does_not_update_database(self):
|
||||
spec = self.load_test_spec('enum_options_from_file')
|
||||
file_model = session.query(FileModel).filter(FileModel.name == "customer_list.xls").first()
|
||||
file_data_model = session.query(FileDataModel).filter(FileDataModel.file_model == file_model).first()
|
||||
WorkflowService.get_lookup_table(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
|
||||
WorkflowService.get_lookup_table(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
|
||||
WorkflowService.get_lookup_table(file_data_model, "CUSTOMER_NUMBER", "CUSTOMER_NAME")
|
||||
lookup_records = session.query(LookupFileModel).all()
|
||||
self.assertIsNotNone(lookup_records)
|
||||
self.assertEqual(1, len(lookup_records))
|
||||
lookup_record = lookup_records[0]
|
||||
lookup_data = session.query(LookupDataModel).filter(LookupDataModel.lookup_file_model == lookup_record).all()
|
||||
self.assertEquals(19, len(lookup_data))
|
||||
# Using the same table with different lookup lable or value, does create additional records.
|
||||
WorkflowService.get_lookup_table(file_data_model, "CUSTOMER_NAME", "CUSTOMER_NUMBER")
|
||||
lookup_records = session.query(LookupFileModel).all()
|
||||
self.assertIsNotNone(lookup_records)
|
||||
self.assertEqual(2, len(lookup_records))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user