From 2fc4b44ef368e92bc0c32be0e1d4144e62165e09 Mon Sep 17 00:00:00 2001 From: Dan Date: Sat, 12 Mar 2022 16:19:07 -0500 Subject: [PATCH] Create a path to directly download the spreadsheet file (and avoid the weird dance on the front end of making an API call to get file data.) Fixing pagination. Seems the front end uses a page_index that is 0 based, and sqlAlchemy prefers to start at 1. --- crc/api.yml | 9 +++++++- crc/api/study.py | 9 ++++++-- crc/models/task_log.py | 29 ++++++++++++++++++++----- crc/services/task_logging_service.py | 2 +- tests/scripts/test_task_logging.py | 6 +++-- tests/study/test_study_download_logs.py | 20 +++++++++++++---- 6 files changed, 60 insertions(+), 15 deletions(-) diff --git a/crc/api.yml b/crc/api.yml index 2af8e249..9b73736d 100755 --- a/crc/api.yml +++ b/crc/api.yml @@ -293,14 +293,21 @@ paths: schema: type: integer format: int32 + - name : auth_token + in : query + required : true + description : User Auth Toeken + schema: + type: string get: operationId: crc.api.study.download_logs_for_study summary: Returns a csv file of logged events that occured within a study + security: [] # Will verify manually with provided Auth Token. tags: - Studies responses: '200': - description: Returns the csv file of logged events + description: Returns the spreadsheet file of logged events content: application/octet-stream: schema: diff --git a/crc/api/study.py b/crc/api/study.py index 83dd4b5b..41421ead 100644 --- a/crc/api/study.py +++ b/crc/api/study.py @@ -2,7 +2,6 @@ from datetime import datetime from flask import g, send_file from sqlalchemy.exc import IntegrityError - from crc import session from crc.api.common import ApiError, ApiErrorSchema from crc.models.study import Study, StudyEventType, StudyModel, StudySchema, StudyForUpdateSchema, \ @@ -15,6 +14,7 @@ from crc.services.user_service import UserService from crc.services.workflow_processor import WorkflowProcessor from crc.services.workflow_service import WorkflowService from crc.services.workflow_spec_service import WorkflowSpecService +from crc.api.user import verify_token import io @@ -117,11 +117,16 @@ def get_study_associates(study_id): def get_logs_for_study(study_id, body): task_log_query = TaskLogQuery(**body) + task_log_query.study_id = study_id # Force the study id return TaskLogQuerySchema().dump( TaskLoggingService.get_logs_for_study_paginated(study_id, task_log_query)) -def download_logs_for_study(study_id): +def download_logs_for_study(study_id, auth_token): + # Download links incorporate an auth token in the request for direct download + if not verify_token(auth_token): + raise ApiError('not_authenticated', 'You need to include an authorization token in the URL with this') + title = f'Study {study_id}' logs, headers = TaskLoggingService.get_log_data_for_download(study_id) spreadsheet = SpreadsheetService.create_spreadsheet(logs, headers, title) diff --git a/crc/models/task_log.py b/crc/models/task_log.py index 060f5578..a1171f00 100644 --- a/crc/models/task_log.py +++ b/crc/models/task_log.py @@ -1,6 +1,10 @@ import enum +import urllib +import flask import marshmallow +from flask import url_for +from marshmallow.fields import Method from crc import db, ma from crc.models.study import StudyModel @@ -62,9 +66,10 @@ class TaskLogModelSchema(ma.Schema): class TaskLogQuery: """Encapsulates the paginated queries and results when retrieving and filtering task logs over the API""" - def __init__(self, code="", level="", user="", page=1, per_page=10, + def __init__(self, study_id=None, code="", level="", user="", page=0, per_page=10, sort_column=None, sort_reverse=False, items=None, - pages=0, total=0, has_next=False, has_prev=False): + pages=0, total=0, has_next=False, has_prev=False, download_url=None): + self.study_id = study_id # Filter on Study. self.code = code # Filter on code. self.level = level # Filter on level. self.user = user # Filter on user. @@ -77,11 +82,12 @@ class TaskLogQuery: self.pages = pages self.has_next = False self.has_prev = False + self.download_url = None def update_from_sqlalchemy_paginator(self, paginator): """Updates this with results that are returned from the paginator""" self.items = paginator.items - self.page = paginator.page + self.page = paginator.page - 1 self.per_page = paginator.per_page self.pages = paginator.pages self.has_next = paginator.has_next @@ -94,5 +100,18 @@ class TaskLogQuerySchema(ma.Schema): model = TaskLogModel fields = ["code", "level", "user", "page", "per_page", "sort_column", "sort_reverse", "items", "pages", "total", - "has_next", "has_prev"] - items = marshmallow.fields.List(marshmallow.fields.Nested(TaskLogModelSchema)) \ No newline at end of file + "has_next", "has_prev", "download_url"] + items = marshmallow.fields.List(marshmallow.fields.Nested(TaskLogModelSchema)) + download_url = Method("get_url") + + def get_url(self, obj): + token = 'not_available' + if hasattr(obj, 'study_id') and obj.study_id is not None: + file_url = url_for("/v1_0.crc_api_study_download_logs_for_study", study_id=obj.study_id, _external=True) + if hasattr(flask.g, 'user'): + token = flask.g.user.encode_auth_token() + url = file_url + '?auth_token=' + urllib.parse.quote_plus(token) + return url + else: + return "" + diff --git a/crc/services/task_logging_service.py b/crc/services/task_logging_service.py index f28ae101..1d0f1e5e 100644 --- a/crc/services/task_logging_service.py +++ b/crc/services/task_logging_service.py @@ -89,7 +89,7 @@ class TaskLoggingService(object): sort_column = desc(task_log_query.sort_column) else: sort_column = task_log_query.sort_column - paginator = sql_query.order_by(sort_column).paginate(task_log_query.page, task_log_query.per_page, + paginator = sql_query.order_by(sort_column).paginate(task_log_query.page + 1, task_log_query.per_page, error_out=False) task_log_query.update_from_sqlalchemy_paginator(paginator) return task_log_query diff --git a/tests/scripts/test_task_logging.py b/tests/scripts/test_task_logging.py index af37ba46..e66cdcf0 100644 --- a/tests/scripts/test_task_logging.py +++ b/tests/scripts/test_task_logging.py @@ -153,7 +153,7 @@ class TestTaskLogging(BaseTest): logs = TaskLoggingService().get_logs_for_study_paginated(study.id, TaskLogQuery(per_page=5)) self.assertEqual(40, logs.total) self.assertEqual(5, len(logs.items), "I can limit results to 5") - self.assertEqual(1, logs.page) + self.assertEqual(0, logs.page) self.assertEqual(8, logs.pages) self.assertEqual(5, logs.per_page) self.assertEqual(True, logs.has_next) @@ -165,4 +165,6 @@ class TestTaskLogging(BaseTest): logs = TaskLoggingService.get_logs_for_study_paginated(study.id, TaskLogQuery(per_page=5, sort_column="level", sort_reverse=True)) for i in range(0, 5): - self.assertEqual('info', logs.items[i].level, "It is possible to sort on a column") \ No newline at end of file + self.assertEqual('info', logs.items[i].level, "It is possible to sort on a column") + + diff --git a/tests/study/test_study_download_logs.py b/tests/study/test_study_download_logs.py index 87bd3681..bd2e6b1d 100644 --- a/tests/study/test_study_download_logs.py +++ b/tests/study/test_study_download_logs.py @@ -1,7 +1,10 @@ +import json + from tests.base_test import BaseTest from crc import session -from crc.models.task_log import TaskLogModel +from crc.models.task_log import TaskLogModel, TaskLogQuery, TaskLogQuerySchema +from crc.models.user import UserModel from openpyxl import load_workbook from io import BytesIO @@ -40,10 +43,19 @@ class TestDownloadLogsForStudy(BaseTest): 'message': 'This is a third message.'} self.add_log(study_id, workflow.id, task.name, 'empty_workflow', log_data) - rv = self.app.get(f'/v1.0/study/{study_id}/log/download', - content_type="application/json", - headers=self.logged_in_headers()) + # Run the query, which should include a 'download_url' link that we can click on. + url = f'/v1.0/study/{workflow.study_id}/log' + task_log_query = TaskLogQuery() + user = session.query(UserModel).filter_by(uid=self.test_uid).first() + rv = self.app.put(url, headers=self.logged_in_headers(user), content_type="application/json", + data=TaskLogQuerySchema().dump(task_log_query)) + self.assert_success(rv) + log_query = json.loads(rv.get_data(as_text=True)) + self.assertIsNotNone(log_query['download_url']) + # Use the provided link to get the file. + rv = self.app.get(log_query['download_url']) + self.assert_success(rv) wb = load_workbook(BytesIO(rv.data)) ws = wb.active