Grab exception when reading older xls spreadsheet into pandas
Renamed `xls` variable to `xlsx`, so it makes more sense Added a hint to error_service for validation
This commit is contained in:
parent
ca5e984915
commit
9f18484ebb
|
@ -19,7 +19,10 @@ known_errors = {'Non-default exclusive outgoing sequence flow without condition
|
||||||
'Could not set task title on task .*':
|
'Could not set task title on task .*':
|
||||||
{'hint': 'You are overriding the title using an extension and it is causing this error. '
|
{'hint': 'You are overriding the title using an extension and it is causing this error. '
|
||||||
'Look under the extensions tab for the task, and check the value you are setting '
|
'Look under the extensions tab for the task, and check the value you are setting '
|
||||||
'for the property.'}}
|
'for the property.'},
|
||||||
|
'Error opening excel file .*, with file_model_id:':
|
||||||
|
{'hint': 'It looks like you are trying to use an older xls file. '
|
||||||
|
'Try uploading a newer xlsx file.'}}
|
||||||
|
|
||||||
|
|
||||||
class ValidationErrorService(object):
|
class ValidationErrorService(object):
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from zipfile import BadZipFile
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy
|
import numpy
|
||||||
|
@ -163,8 +164,14 @@ class LookupService(object):
|
||||||
in a way that can be searched and returned via an api call - rather than sending the full set of
|
in a way that can be searched and returned via an api call - rather than sending the full set of
|
||||||
options along with the form. It will only open the file and process the options if something has
|
options along with the form. It will only open the file and process the options if something has
|
||||||
changed. """
|
changed. """
|
||||||
xls = ExcelFile(data_model.data, engine='openpyxl')
|
try:
|
||||||
df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet.
|
xlsx = ExcelFile(data_model.data, engine='openpyxl')
|
||||||
|
# Pandas--or at least openpyxl, cannot read old xls files.
|
||||||
|
# This comes back as zipfile.BadZipFile because xlsx files are zipped xml files
|
||||||
|
except BadZipFile as bzf:
|
||||||
|
raise ApiError(code='excel_error',
|
||||||
|
message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})')
|
||||||
|
df = xlsx.parse(xlsx.sheet_names[0]) # Currently we only look at the fist sheet.
|
||||||
df = df.convert_dtypes()
|
df = df.convert_dtypes()
|
||||||
df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns.
|
df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns.
|
||||||
df = pd.DataFrame(df).dropna(how='all') # Drop null rows
|
df = pd.DataFrame(df).dropna(how='all') # Drop null rows
|
||||||
|
|
Loading…
Reference in New Issue