Merge pull request #424 from sartography/xls-xlsx-issue-540
Xls xlsx issue #540
This commit is contained in:
commit
c8c8c8b244
|
@ -19,7 +19,10 @@ known_errors = {'Non-default exclusive outgoing sequence flow without condition
|
|||
'Could not set task title on task .*':
|
||||
{'hint': 'You are overriding the title using an extension and it is causing this error. '
|
||||
'Look under the extensions tab for the task, and check the value you are setting '
|
||||
'for the property.'}}
|
||||
'for the property.'},
|
||||
'Error opening excel file .*, with file_model_id:':
|
||||
{'hint': 'It looks like you are trying to use an older xls file. '
|
||||
'Try uploading a newer xlsx file.'}}
|
||||
|
||||
|
||||
class ValidationErrorService(object):
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import logging
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from zipfile import BadZipFile
|
||||
|
||||
import pandas as pd
|
||||
import numpy
|
||||
|
@ -163,8 +164,14 @@ class LookupService(object):
|
|||
in a way that can be searched and returned via an api call - rather than sending the full set of
|
||||
options along with the form. It will only open the file and process the options if something has
|
||||
changed. """
|
||||
xls = ExcelFile(data_model.data, engine='openpyxl')
|
||||
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
|
||||
try:
|
||||
xlsx = ExcelFile(data_model.data, engine='openpyxl')
|
||||
# Pandas--or at least openpyxl, cannot read old xls files.
|
||||
# The error comes back as zipfile.BadZipFile because xlsx files are zipped xml files
|
||||
except BadZipFile:
|
||||
raise ApiError(code='excel_error',
|
||||
message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})')
|
||||
df = xlsx.parse(xlsx.sheet_names[0]) # Currently we only look at the fist sheet.
|
||||
df = df.convert_dtypes()
|
||||
df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns.
|
||||
df = pd.DataFrame(df).dropna(how='all') # Drop null rows
|
||||
|
|
Binary file not shown.
|
@ -176,3 +176,21 @@ class TestLookupService(BaseTest):
|
|||
first_result = result[0]
|
||||
self.assertEquals(1000, first_result['CUSTOMER_NUMBER'])
|
||||
self.assertEquals('UVA - INTERNAL - GM USE ONLY', first_result['CUSTOMER_NAME'])
|
||||
|
||||
def test_lookup_fails_for_xls(self):
|
||||
BaseTest.load_test_spec('enum_options_with_search')
|
||||
|
||||
# Using an old xls file should raise an error
|
||||
file_model_xls = session.query(FileModel).filter(FileModel.name == 'sponsors.xls').first()
|
||||
file_data_model_xls = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xls.id).first()
|
||||
with self.assertRaises(ApiError) as ae:
|
||||
LookupService.build_lookup_table(file_data_model_xls, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
|
||||
self.assertIn('Error opening excel file', ae.exception.args[0])
|
||||
|
||||
# Using an xlsx file should work
|
||||
file_model_xlsx = session.query(FileModel).filter(FileModel.name == 'sponsors.xlsx').first()
|
||||
file_data_model_xlsx = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xlsx.id).first()
|
||||
lookup_model = LookupService.build_lookup_table(file_data_model_xlsx, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
|
||||
self.assertEqual(28, len(lookup_model.dependencies))
|
||||
self.assertIn('CUSTOMER_NAME', lookup_model.dependencies[0].data.keys())
|
||||
self.assertIn('CUSTOMER_NUMBER', lookup_model.dependencies[0].data.keys())
|
||||
|
|
Loading…
Reference in New Issue