From 9f18484ebb7fbd724f82815f0f7482d53dee361a Mon Sep 17 00:00:00 2001 From: mike cullerton Date: Tue, 16 Nov 2021 11:54:31 -0500 Subject: [PATCH] Grab exception when reading older xls spreadsheet into pandas Renamed `xls` variable to `xlsx`, so it makes more sense Added a hint to error_service for validation --- crc/services/error_service.py | 5 ++++- crc/services/lookup_service.py | 11 +++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/crc/services/error_service.py b/crc/services/error_service.py index ffbd2143..3810ea5a 100644 --- a/crc/services/error_service.py +++ b/crc/services/error_service.py @@ -19,7 +19,10 @@ known_errors = {'Non-default exclusive outgoing sequence flow without condition 'Could not set task title on task .*': {'hint': 'You are overriding the title using an extension and it is causing this error. ' 'Look under the extensions tab for the task, and check the value you are setting ' - 'for the property.'}} + 'for the property.'}, + 'Error opening excel file .*, with file_model_id:': + {'hint': 'It looks like you are trying to use an older xls file. ' + 'Try uploading a newer xlsx file.'}} class ValidationErrorService(object): diff --git a/crc/services/lookup_service.py b/crc/services/lookup_service.py index 385bd660..63755edf 100644 --- a/crc/services/lookup_service.py +++ b/crc/services/lookup_service.py @@ -1,6 +1,7 @@ import logging import re from collections import OrderedDict +from zipfile import BadZipFile import pandas as pd import numpy @@ -163,8 +164,14 @@ class LookupService(object): in a way that can be searched and returned via an api call - rather than sending the full set of options along with the form. It will only open the file and process the options if something has changed. """ - xls = ExcelFile(data_model.data, engine='openpyxl') - df = xls.parse(xls.sheet_names[0]) # Currently we only look at the fist sheet. + try: + xlsx = ExcelFile(data_model.data, engine='openpyxl') + # Pandas--or at least openpyxl, cannot read old xls files. + # This comes back as zipfile.BadZipFile because xlsx files are zipped xml files + except BadZipFile as bzf: + raise ApiError(code='excel_error', + message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})') + df = xlsx.parse(xlsx.sheet_names[0]) # Currently we only look at the fist sheet. df = df.convert_dtypes() df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns. df = pd.DataFrame(df).dropna(how='all') # Drop null rows