Grab exception when reading older xls spreadsheet into pandas

Renamed `xls` variable to `xlsx`, so it makes more sense
Added a hint to error_service for validation
This commit is contained in:
mike cullerton 2021-11-16 11:54:31 -05:00
parent ca5e984915
commit 9f18484ebb
2 changed files with 13 additions and 3 deletions

View File

@ -19,7 +19,10 @@ known_errors = {'Non-default exclusive outgoing sequence flow without condition
'Could not set task title on task .*': 'Could not set task title on task .*':
{'hint': 'You are overriding the title using an extension and it is causing this error. ' {'hint': 'You are overriding the title using an extension and it is causing this error. '
'Look under the extensions tab for the task, and check the value you are setting ' 'Look under the extensions tab for the task, and check the value you are setting '
'for the property.'}} 'for the property.'},
'Error opening excel file .*, with file_model_id:':
{'hint': 'It looks like you are trying to use an older xls file. '
'Try uploading a newer xlsx file.'}}
class ValidationErrorService(object): class ValidationErrorService(object):

View File

@ -1,6 +1,7 @@
import logging import logging
import re import re
from collections import OrderedDict from collections import OrderedDict
from zipfile import BadZipFile
import pandas as pd import pandas as pd
import numpy import numpy
@ -163,8 +164,14 @@ class LookupService(object):
in a way that can be searched and returned via an api call - rather than sending the full set of in a way that can be searched and returned via an api call - rather than sending the full set of
options along with the form. It will only open the file and process the options if something has options along with the form. It will only open the file and process the options if something has
changed. """ changed. """
xls = ExcelFile(data_model.data, engine='openpyxl') try:
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet. xlsx = ExcelFile(data_model.data, engine='openpyxl')
# Pandas--or at least openpyxl, cannot read old xls files.
# This comes back as zipfile.BadZipFile because xlsx files are zipped xml files
except BadZipFile as bzf:
raise ApiError(code='excel_error',
message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})')
df = xlsx.parse(xlsx.sheet_names[0]) # Currently we only look at the fist sheet.
df = df.convert_dtypes() df = df.convert_dtypes()
df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns. df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns.
df = pd.DataFrame(df).dropna(how='all') # Drop null rows df = pd.DataFrame(df).dropna(how='all') # Drop null rows