Grab exception when reading older xls spreadsheet into pandas

Renamed `xls` variable to `xlsx`, so it makes more sense Added a hint to error_service for validation
2025-02-23 05:08:32 +00:00 · 2021-11-16 11:54:31 -05:00 · 2021-11-16 11:54:31 -05:00 · 9f18484ebb
commit 9f18484ebb
parent ca5e984915
2 changed files with 13 additions and 3 deletions
--- a/crc/services/error_service.py
+++ b/crc/services/error_service.py
@ -19,7 +19,10 @@ known_errors = {'Non-default exclusive outgoing sequence flow  without condition
                'Could not set task title on task .*':
                {'hint': 'You are overriding the title using an extension and it is causing this error. '
                         'Look under the extensions tab for the task, and check the value you are setting '
-                         'for the property.'}}
+                         'for the property.'},
+                'Error opening excel file .*, with file_model_id:':
+                {'hint': 'It looks like you are trying to use an older xls file. '
+                         'Try uploading a newer xlsx file.'}}


 class ValidationErrorService(object):
--- a/crc/services/lookup_service.py
+++ b/crc/services/lookup_service.py
@ -1,6 +1,7 @@
 import logging
 import re
 from collections import OrderedDict
+from zipfile import BadZipFile

 import pandas as pd
 import numpy
@ -163,8 +164,14 @@ class LookupService(object):
         in a way that can be searched and returned via an api call - rather than sending the full set of
          options along with the form.  It will only open the file and process the options if something has
          changed.  """
-        xls = ExcelFile(data_model.data, engine='openpyxl')
-        df = xls.parse(xls.sheet_names[0])  # Currently we only look at the fist sheet.
+        try:
+            xlsx = ExcelFile(data_model.data, engine='openpyxl')
+        # Pandas--or at least openpyxl, cannot read old xls files.
+        # This comes back as zipfile.BadZipFile because xlsx files are zipped xml files
+        except BadZipFile as bzf:
+            raise ApiError(code='excel_error',
+                           message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})')
+        df = xlsx.parse(xlsx.sheet_names[0])  # Currently we only look at the fist sheet.
        df = df.convert_dtypes()
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns.
        df = pd.DataFrame(df).dropna(how='all')  # Drop null rows