Grab exception when reading older xls spreadsheet into pandas

Renamed `xls` variable to `xlsx`, so it makes more sense
Added a hint to error_service for validation
This commit is contained in:
mike cullerton 2021-11-16 11:54:31 -05:00
parent ca5e984915
commit 9f18484ebb
2 changed files with 13 additions and 3 deletions

View File

@ -19,7 +19,10 @@ known_errors = {'Non-default exclusive outgoing sequence flow without condition
'Could not set task title on task .*':
{'hint': 'You are overriding the title using an extension and it is causing this error. '
'Look under the extensions tab for the task, and check the value you are setting '
'for the property.'}}
'for the property.'},
'Error opening excel file .*, with file_model_id:':
{'hint': 'It looks like you are trying to use an older xls file. '
'Try uploading a newer xlsx file.'}}
class ValidationErrorService(object):

View File

@ -1,6 +1,7 @@
import logging
import re
from collections import OrderedDict
from zipfile import BadZipFile
import pandas as pd
import numpy
@ -163,8 +164,14 @@ class LookupService(object):
in a way that can be searched and returned via an api call - rather than sending the full set of
options along with the form. It will only open the file and process the options if something has
changed. """
xls = ExcelFile(data_model.data, engine='openpyxl')
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
try:
xlsx = ExcelFile(data_model.data, engine='openpyxl')
# Pandas--or at least openpyxl, cannot read old xls files.
# This comes back as zipfile.BadZipFile because xlsx files are zipped xml files
except BadZipFile as bzf:
raise ApiError(code='excel_error',
message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})')
df = xlsx.parse(xlsx.sheet_names[0]) # Currently we only look at the fist sheet.
df = df.convert_dtypes()
df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns.
df = pd.DataFrame(df).dropna(how='all') # Drop null rows