Merge pull request #424 from sartography/xls-xlsx-issue-540

Xls xlsx issue #540
This commit is contained in:
Dan Funk 2021-11-30 10:50:33 -05:00 committed by GitHub
commit c8c8c8b244
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 3 deletions

View File

@ -19,7 +19,10 @@ known_errors = {'Non-default exclusive outgoing sequence flow without condition
'Could not set task title on task .*':
{'hint': 'You are overriding the title using an extension and it is causing this error. '
'Look under the extensions tab for the task, and check the value you are setting '
'for the property.'}}
'for the property.'},
'Error opening excel file .*, with file_model_id:':
{'hint': 'It looks like you are trying to use an older xls file. '
'Try uploading a newer xlsx file.'}}
class ValidationErrorService(object):

View File

@ -1,6 +1,7 @@
import logging
import re
from collections import OrderedDict
from zipfile import BadZipFile
import pandas as pd
import numpy
@ -163,8 +164,14 @@ class LookupService(object):
in a way that can be searched and returned via an api call - rather than sending the full set of
options along with the form. It will only open the file and process the options if something has
changed. """
xls = ExcelFile(data_model.data, engine='openpyxl')
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
try:
xlsx = ExcelFile(data_model.data, engine='openpyxl')
# Pandas--or at least openpyxl, cannot read old xls files.
# The error comes back as zipfile.BadZipFile because xlsx files are zipped xml files
except BadZipFile:
raise ApiError(code='excel_error',
message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})')
df = xlsx.parse(xlsx.sheet_names[0]) # Currently we only look at the fist sheet.
df = df.convert_dtypes()
df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns.
df = pd.DataFrame(df).dropna(how='all') # Drop null rows

Binary file not shown.

View File

@ -176,3 +176,21 @@ class TestLookupService(BaseTest):
first_result = result[0]
self.assertEquals(1000, first_result['CUSTOMER_NUMBER'])
self.assertEquals('UVA - INTERNAL - GM USE ONLY', first_result['CUSTOMER_NAME'])
def test_lookup_fails_for_xls(self):
BaseTest.load_test_spec('enum_options_with_search')
# Using an old xls file should raise an error
file_model_xls = session.query(FileModel).filter(FileModel.name == 'sponsors.xls').first()
file_data_model_xls = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xls.id).first()
with self.assertRaises(ApiError) as ae:
LookupService.build_lookup_table(file_data_model_xls, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
self.assertIn('Error opening excel file', ae.exception.args[0])
# Using an xlsx file should work
file_model_xlsx = session.query(FileModel).filter(FileModel.name == 'sponsors.xlsx').first()
file_data_model_xlsx = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xlsx.id).first()
lookup_model = LookupService.build_lookup_table(file_data_model_xlsx, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
self.assertEqual(28, len(lookup_model.dependencies))
self.assertIn('CUSTOMER_NAME', lookup_model.dependencies[0].data.keys())
self.assertIn('CUSTOMER_NUMBER', lookup_model.dependencies[0].data.keys())