From 5ed4639f301fbb785ca0ad84cd6fb33af1a3c34e Mon Sep 17 00:00:00 2001 From: David Buxton Date: Sat, 12 Apr 2014 01:04:35 +0100 Subject: [PATCH] Add very basic db cleanup job --- app/cabotapp/tasks.py | 36 ++++++++++++++++++++++++++++++++++-- app/celeryconfig.py | 4 ++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/app/cabotapp/tasks.py b/app/cabotapp/tasks.py index c2a082f..b721a76 100644 --- a/app/cabotapp/tasks.py +++ b/app/cabotapp/tasks.py @@ -3,7 +3,6 @@ import os.path import sys import random import logging -from itertools import chain from celery import Celery from celery._state import set_default_app @@ -74,6 +73,39 @@ def update_service(service_or_id): @task(ignore_result=True) -def update_shifts(ignore_result=True): +def update_shifts(): from .models import update_shifts as _update_shifts _update_shifts() + + +@task(ignore_result=True) +def clean_db(days_to_retain=60): + """ + Clean up database otherwise it gets overwhelmed with StatusCheckResults. + + To loop over undeleted results, spawn new tasks to make sure db connection closed etc + """ + from .models import StatusCheckResult, ServiceStatusSnapshot + from datetime import timedelta + + to_discard_results = StatusCheckResult.objects.filter(time__lte=timezone.now()-timedelta(days=days_to_retain)) + to_discard_snapshots = ServiceStatusSnapshot.objects.filter(time__lte=timezone.now()-timedelta(days=days_to_retain)) + + result_ids = to_discard_results.values_list('id', flat=True)[:100] + snapshot_ids = to_discard_snapshots.values_list('id', flat=True)[:100] + + if not result_ids: + logger.info('Completed deleting StatusCheckResult objects') + if not snapshot_ids: + logger.info('Completed deleting ServiceStatusSnapshot objects') + if (not snapshot_ids) and (not result_ids): + return + + logger.info('Processing %s StatusCheckResult objects' % len(result_ids)) + logger.info('Processing %s ServiceStatusSnapshot objects' % len(snapshot_ids)) + + StatusCheckResult.objects.filter(id__in=result_ids).delete() + ServiceStatusSnapshot.objects.filter(id__in=snapshot_ids).delete() + + clean_db.apply_async(kwargs={'days_to_retain': days_to_retain}, countdown=3) + diff --git a/app/celeryconfig.py b/app/celeryconfig.py index e337c33..1516062 100644 --- a/app/celeryconfig.py +++ b/app/celeryconfig.py @@ -16,6 +16,10 @@ CELERYBEAT_SCHEDULE = { 'task': 'app.cabotapp.tasks.update_shifts', 'schedule': timedelta(seconds=1800), }, + 'clean-db': { + 'task': 'app.cabotapp.tasks.clean_db', + 'schedule': timedelta(seconds=60*60*24), + }, } CELERY_TIMEZONE = 'UTC'