Merge pull request #424 from sartography/xls-xlsx-issue-540

Xls xlsx issue #540
2021-11-30 10:50:33 -05:00 · 2021-11-30 10:50:33 -05:00 · c8c8c8b244
parent 8ec58fd6d7 5e97953c19
commit c8c8c8b244
4 changed files with 31 additions and 3 deletions
--- a/crc/services/error_service.py
+++ b/crc/services/error_service.py
@ -19,7 +19,10 @@ known_errors = {'Non-default exclusive outgoing sequence flow  without condition
                'Could not set task title on task .*':
                {'hint': 'You are overriding the title using an extension and it is causing this error. '
                         'Look under the extensions tab for the task, and check the value you are setting '
-                         'for the property.'}}
+                         'for the property.'},
+                'Error opening excel file .*, with file_model_id:':
+                {'hint': 'It looks like you are trying to use an older xls file. '
+                         'Try uploading a newer xlsx file.'}}


 class ValidationErrorService(object):
--- a/crc/services/lookup_service.py
+++ b/crc/services/lookup_service.py
@ -1,6 +1,7 @@
 import logging
 import re
 from collections import OrderedDict
+from zipfile import BadZipFile

 import pandas as pd
 import numpy
@ -163,8 +164,14 @@ class LookupService(object):
         in a way that can be searched and returned via an api call - rather than sending the full set of
          options along with the form.  It will only open the file and process the options if something has
          changed.  """
-        xls = ExcelFile(data_model.data, engine='openpyxl')
-        df = xls.parse(xls.sheet_names[0])  # Currently we only look at the fist sheet.
+        try:
+            xlsx = ExcelFile(data_model.data, engine='openpyxl')
+        # Pandas--or at least openpyxl, cannot read old xls files.
+        # The error comes back as zipfile.BadZipFile because xlsx files are zipped xml files
+        except BadZipFile:
+            raise ApiError(code='excel_error',
+                           message=f'Error opening excel file {data_model.file_model.name}. You may have an older .xls spreadsheet. (file_model_id: {data_model.file_model_id} workflow_spec_id: {workflow_spec_id}, task_spec_id: {task_spec_id}, and field_id: {field_id})')
+        df = xlsx.parse(xlsx.sheet_names[0])  # Currently we only look at the fist sheet.
        df = df.convert_dtypes()
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # Drop unnamed columns.
        df = pd.DataFrame(df).dropna(how='all')  # Drop null rows
--- a/tests/data/enum_options_with_search/sponsors.xls
+++ b/tests/data/enum_options_with_search/sponsors.xls
--- a/tests/test_lookup_service.py
+++ b/tests/test_lookup_service.py
@ -176,3 +176,21 @@ class TestLookupService(BaseTest):
        first_result = result[0]
        self.assertEquals(1000, first_result['CUSTOMER_NUMBER'])
        self.assertEquals('UVA - INTERNAL - GM USE ONLY', first_result['CUSTOMER_NAME'])
+
+    def test_lookup_fails_for_xls(self):
+        BaseTest.load_test_spec('enum_options_with_search')
+
+        # Using an old xls file should raise an error
+        file_model_xls = session.query(FileModel).filter(FileModel.name == 'sponsors.xls').first()
+        file_data_model_xls = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xls.id).first()
+        with self.assertRaises(ApiError) as ae:
+            LookupService.build_lookup_table(file_data_model_xls, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
+        self.assertIn('Error opening excel file', ae.exception.args[0])
+
+        # Using an xlsx file should work
+        file_model_xlsx = session.query(FileModel).filter(FileModel.name == 'sponsors.xlsx').first()
+        file_data_model_xlsx = session.query(FileDataModel).filter(FileDataModel.file_model_id == file_model_xlsx.id).first()
+        lookup_model = LookupService.build_lookup_table(file_data_model_xlsx, 'CUSTOMER_NUMBER', 'CUSTOMER_NAME')
+        self.assertEqual(28, len(lookup_model.dependencies))
+        self.assertIn('CUSTOMER_NAME', lookup_model.dependencies[0].data.keys())
+        self.assertIn('CUSTOMER_NUMBER', lookup_model.dependencies[0].data.keys())