Merge pull request #424 from arachnys/fix-database-cleanup-task

Fix database cleanup task
This commit is contained in:
Frank Hamand 2017-02-13 14:07:04 +00:00 committed by GitHub
commit 5c43b2a744
3 changed files with 71 additions and 18 deletions

View File

@ -1,6 +1,7 @@
master
------
* Fix db_clean task failing on large results tables
* Wait for docker containers to start in docker-entrypoint.sh
* Update CABOT_PLUGINS_ENABLED to compatible plugin versions
* Automatically initialise database, assets and superuser on docker container start

View File

@ -80,7 +80,7 @@ def update_shifts():
@task(ignore_result=True)
def clean_db(days_to_retain=60):
def clean_db(days_to_retain=60, batch_size=10000):
"""
Clean up database otherwise it gets overwhelmed with StatusCheckResults.
@ -89,24 +89,30 @@ def clean_db(days_to_retain=60):
from .models import StatusCheckResult, ServiceStatusSnapshot
from datetime import timedelta
to_discard_results = StatusCheckResult.objects.filter(time__lte=timezone.now()-timedelta(days=days_to_retain))
to_discard_snapshots = ServiceStatusSnapshot.objects.filter(time__lte=timezone.now()-timedelta(days=days_to_retain))
to_discard_results = StatusCheckResult.objects.filter(time_complete__lte=timezone.now() - timedelta(days=days_to_retain))
to_discard_snapshots = ServiceStatusSnapshot.objects.order_by('time').filter(time__lte=timezone.now() - timedelta(days=days_to_retain))
result_ids = to_discard_results.values_list('id', flat=True)[:100]
snapshot_ids = to_discard_snapshots.values_list('id', flat=True)[:100]
result_ids = to_discard_results[:batch_size].values_list('id', flat=True)
snapshot_ids = to_discard_snapshots[:batch_size].values_list('id', flat=True)
if not result_ids:
result_count = result_ids.count()
snapshot_count = snapshot_ids.count()
# id__in throws exception if passed an empty list, so guard against it
if result_count > 0:
StatusCheckResult.objects.filter(id__in=result_ids).delete()
logger.info('Processing %s StatusCheckResult objects' % result_count)
else:
logger.info('Completed deleting StatusCheckResult objects')
if not snapshot_ids:
if snapshot_count > 0:
ServiceStatusSnapshot.objects.filter(id__in=snapshot_ids).delete()
logger.info('Processing %s ServiceStatusSnapshot objects' % snapshot_count)
else:
logger.info('Completed deleting ServiceStatusSnapshot objects')
if (not snapshot_ids) and (not result_ids):
return
logger.info('Processing %s StatusCheckResult objects' % len(result_ids))
logger.info('Processing %s ServiceStatusSnapshot objects' % len(snapshot_ids))
StatusCheckResult.objects.filter(id__in=result_ids).delete()
ServiceStatusSnapshot.objects.filter(id__in=snapshot_ids).delete()
clean_db.apply_async(kwargs={'days_to_retain': days_to_retain}, countdown=3)
if result_count < batch_size and snapshot_count < batch_size:
logger.info('Completed deleted all old records')
else:
# Re-queue to cleanup remaining records
clean_db.apply_async(kwargs={'days_to_retain': days_to_retain, 'batch_size': batch_size}, countdown=3)

View File

@ -12,9 +12,10 @@ from cabot.cabotapp.alert import update_alert_plugins
from cabot.cabotapp.models import (
GraphiteStatusCheck, JenkinsStatusCheck,
HttpStatusCheck, ICMPStatusCheck, Service, Instance,
StatusCheckResult, minimize_targets)
StatusCheckResult, minimize_targets, ServiceStatusSnapshot)
from cabot.cabotapp.calendar import get_events
from cabot.cabotapp.views import StatusCheckReportForm
from cabot.cabotapp import tasks
from django.conf import settings
from django.contrib.auth.models import Permission
from django.contrib.auth.models import User
@ -945,6 +946,51 @@ class TestAlerts(LocalTestCase):
self.assertEqual(len(plugins), plugin_count - 1)
class TestCleanUpTask(LocalTestCase):
def setUp(self):
super(TestCleanUpTask, self).setUp()
def test_cleanup_simple(self):
initial_results = StatusCheckResult.objects.all().count()
initial_snapshots = ServiceStatusSnapshot.objects.all().count()
ServiceStatusSnapshot(
service=self.service,
num_checks_active=1,
num_checks_passing=1,
num_checks_failing=1,
overall_status=self.service.overall_status,
time=timezone.now() - timedelta(days=61),
).save()
StatusCheckResult(
status_check=self.graphite_check,
time=timezone.now() - timedelta(days=61),
time_complete=timezone.now() - timedelta(days=61),
succeeded=False
).save()
self.assertEqual(StatusCheckResult.objects.all().count(), initial_results + 1)
tasks.clean_db()
self.assertEqual(StatusCheckResult.objects.all().count(), initial_results)
self.assertEqual(ServiceStatusSnapshot.objects.all().count(), initial_snapshots)
def test_cleanup_batch(self):
initial_results = StatusCheckResult.objects.all().count()
for i in range(2):
StatusCheckResult(
status_check=self.graphite_check,
time=timezone.now() - timedelta(days=61),
time_complete=timezone.now() - timedelta(days=61),
succeeded=False
).save()
self.assertEqual(StatusCheckResult.objects.all().count(), initial_results + 2)
tasks.clean_db(batch_size=1)
self.assertEqual(StatusCheckResult.objects.all().count(), initial_results + 1)
class TestMinimizeTargets(LocalTestCase):
def test_null(self):
result = minimize_targets([])