mirror of
https://github.com/status-im/cabot.git
synced 2025-02-24 18:38:07 +00:00
Bugfixes
This commit is contained in:
parent
4e3968fd96
commit
fb761f5603
@ -5,6 +5,7 @@ from polymorphic import PolymorphicModel
|
|||||||
from django.db.models import F
|
from django.db.models import F
|
||||||
from django.core.urlresolvers import reverse
|
from django.core.urlresolvers import reverse
|
||||||
from django.contrib.admin.models import User
|
from django.contrib.admin.models import User
|
||||||
|
from celery.exceptions import SoftTimeLimitExceeded
|
||||||
|
|
||||||
from jenkins import get_job_status
|
from jenkins import get_job_status
|
||||||
from .alert import send_alert
|
from .alert import send_alert
|
||||||
@ -13,6 +14,7 @@ from .graphite import parse_metric
|
|||||||
from .tasks import update_service, update_instance
|
from .tasks import update_service, update_instance
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
from django.db import transaction
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
@ -172,7 +174,7 @@ class CheckGroupMixin(models.Model):
|
|||||||
self.save()
|
self.save()
|
||||||
self.snapshot.did_send_alert = True
|
self.snapshot.did_send_alert = True
|
||||||
self.snapshot.save()
|
self.snapshot.save()
|
||||||
service_send_alert(self, duty_officers=get_duty_officers())
|
send_alert(self, duty_officers=get_duty_officers())
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def recent_snapshots(self):
|
def recent_snapshots(self):
|
||||||
@ -261,7 +263,7 @@ class Instance(CheckGroupMixin):
|
|||||||
new_instance.save()
|
new_instance.save()
|
||||||
|
|
||||||
for check in checks:
|
for check in checks:
|
||||||
check.duplicate(inst_set=[new_instance,], serv_set=check.service_set.all())
|
check.duplicate(inst_set=[new_instance], serv_set=())
|
||||||
|
|
||||||
return new_instance.pk
|
return new_instance.pk
|
||||||
|
|
||||||
@ -281,12 +283,6 @@ class Instance(CheckGroupMixin):
|
|||||||
)
|
)
|
||||||
self.snapshot.save()
|
self.snapshot.save()
|
||||||
self.save()
|
self.save()
|
||||||
if not (self.overall_status == Service.PASSING_STATUS and self.old_overall_status == Service.PASSING_STATUS):
|
|
||||||
self.alert()
|
|
||||||
|
|
||||||
def alert():
|
|
||||||
return
|
|
||||||
#We don't want alerts for instances
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
ordering = ['name']
|
ordering = ['name']
|
||||||
@ -302,9 +298,11 @@ class Instance(CheckGroupMixin):
|
|||||||
def active_icmp_status_checks(self):
|
def active_icmp_status_checks(self):
|
||||||
return self.icmp_status_checks().filter(active=True)
|
return self.icmp_status_checks().filter(active=True)
|
||||||
|
|
||||||
|
class Snapshot(models.Model):
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
abstract = True
|
||||||
|
|
||||||
class ServiceStatusSnapshot(models.Model):
|
|
||||||
service = models.ForeignKey(Service, related_name='snapshots')
|
|
||||||
time = models.DateTimeField(db_index=True)
|
time = models.DateTimeField(db_index=True)
|
||||||
num_checks_active = models.IntegerField(default=0)
|
num_checks_active = models.IntegerField(default=0)
|
||||||
num_checks_passing = models.IntegerField(default=0)
|
num_checks_passing = models.IntegerField(default=0)
|
||||||
@ -312,17 +310,14 @@ class ServiceStatusSnapshot(models.Model):
|
|||||||
overall_status = models.TextField(default=Service.PASSING_STATUS)
|
overall_status = models.TextField(default=Service.PASSING_STATUS)
|
||||||
did_send_alert = models.IntegerField(default=False)
|
did_send_alert = models.IntegerField(default=False)
|
||||||
|
|
||||||
|
class ServiceStatusSnapshot(Snapshot):
|
||||||
|
service = models.ForeignKey(Service, related_name='snapshots')
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return u"%s: %s" % (self.service.name, self.overall_status)
|
return u"%s: %s" % (self.service.name, self.overall_status)
|
||||||
|
|
||||||
class InstanceStatusSnapshot(models.Model):
|
class InstanceStatusSnapshot(Snapshot):
|
||||||
instance = models.ForeignKey(Instance, related_name='snapshots')
|
instance = models.ForeignKey(Instance, related_name='snapshots')
|
||||||
time = models.DateTimeField(db_index=True)
|
|
||||||
num_checks_active = models.IntegerField(default=0)
|
|
||||||
num_checks_passing = models.IntegerField(default=0)
|
|
||||||
num_checks_failing = models.IntegerField(default=0)
|
|
||||||
overall_status = models.TextField(default=Service.PASSING_STATUS)
|
|
||||||
did_send_alert = models.IntegerField(default=False)
|
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return u"%s: %s" % (self.instance.name, self.overall_status)
|
return u"%s: %s" % (self.instance.name, self.overall_status)
|
||||||
@ -435,11 +430,11 @@ class StatusCheck(PolymorphicModel):
|
|||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
def recent_results(self):
|
def recent_results(self):
|
||||||
return self.statuscheckresult_set.all().order_by('-time_complete').defer('raw_data')[:10]
|
return StatusCheckResult.objects.filter(check=self).order_by('-time_complete').defer('raw_data')[:10]
|
||||||
|
|
||||||
def last_result(self):
|
def last_result(self):
|
||||||
try:
|
try:
|
||||||
return self.recent_results()[0]
|
return StatusCheckResult.objects.filter(check=self).order_by('-time_complete').defer('raw_data')[0]
|
||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -447,14 +442,13 @@ class StatusCheck(PolymorphicModel):
|
|||||||
start = timezone.now()
|
start = timezone.now()
|
||||||
try:
|
try:
|
||||||
result = self._run()
|
result = self._run()
|
||||||
|
except SoftTimeLimitExceeded as e:
|
||||||
|
result = StatusCheckResult(check=self)
|
||||||
|
result.error = u'Error in performing check: Celery soft time limit exceeded'
|
||||||
|
result.succeeded = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
result = StatusCheckResult(check=self)
|
result = StatusCheckResult(check=self)
|
||||||
result.error = u'Error in performing check: %s' % (e,)
|
result.error = u'Error in performing check: %s' % (e,)
|
||||||
if result.error.startswith("Error in performing check: get() returned more than one Instance"):
|
|
||||||
first_instance = self.instance_set.all().order_by('id')[0]
|
|
||||||
self.instance_set = [first_instance]
|
|
||||||
first_instance_link = '<a href="%s">' % reverse('instance', kwargs={'pk': first_instance.pk}) + first_instance.name + "</a>"
|
|
||||||
result.error = "Error: This type of check can only be attached to one instance. All instances, apart from the oldest one (%s), have been detached from this check. The check will run normally next time." % first_instance_link
|
|
||||||
result.succeeded = False
|
result.succeeded = False
|
||||||
finish = timezone.now()
|
finish = timezone.now()
|
||||||
result.time = start
|
result.time = start
|
||||||
@ -470,25 +464,50 @@ class StatusCheck(PolymorphicModel):
|
|||||||
raise NotImplementedError('Subclasses should implement')
|
raise NotImplementedError('Subclasses should implement')
|
||||||
|
|
||||||
def save(self, *args, **kwargs):
|
def save(self, *args, **kwargs):
|
||||||
recent_results = self.recent_results()
|
if self.pk:
|
||||||
if calculate_debounced_passing(recent_results, self.debounce):
|
# This should not be necessary
|
||||||
self.calculated_status = Service.CALCULATED_PASSING_STATUS
|
with transaction.commit_manually():
|
||||||
|
try:
|
||||||
|
recent_results = list(self.recent_results())
|
||||||
|
if calculate_debounced_passing(recent_results, self.debounce):
|
||||||
|
self.calculated_status = Service.CALCULATED_PASSING_STATUS
|
||||||
|
else:
|
||||||
|
self.calculated_status = Service.CALCULATED_FAILING_STATUS
|
||||||
|
self.cached_health = serialize_recent_results(recent_results)
|
||||||
|
transaction.commit()
|
||||||
|
except SoftTimeLimitExceeded as e:
|
||||||
|
# Something weird with postgres
|
||||||
|
transaction.rollback()
|
||||||
|
logger.error('Celery time limit exceeded for getting results for %s' % self.pk)
|
||||||
|
self.calculated_status = Service.CALCULATED_FAILING_STATUS
|
||||||
|
self.cached_health = '-1'
|
||||||
|
except Exception as e:
|
||||||
|
transaction.rollback()
|
||||||
|
logger.error('Got exception when saving check: %s' % e)
|
||||||
|
self.calculated_status = Service.CALCULATED_FAILING_STATUS
|
||||||
|
self.cached_health = '-1'
|
||||||
|
try:
|
||||||
|
updated = StatusCheck.objects.get(pk=self.pk)
|
||||||
|
except StatusCheck.DoesNotExist as e:
|
||||||
|
logger.error('Cannot find myself (check %s) in the database, presumably have been deleted' % self.pk)
|
||||||
|
return
|
||||||
else:
|
else:
|
||||||
self.calculated_status = Service.CALCULATED_FAILING_STATUS
|
self.cached_health = ''
|
||||||
self.cached_health = serialize_recent_results(recent_results)
|
self.calculated_status = Service.CALCULATED_PASSING_STATUS
|
||||||
ret = super(StatusCheck, self).save(*args, **kwargs)
|
ret = super(StatusCheck, self).save(*args, **kwargs)
|
||||||
# Update linked services
|
|
||||||
self.update_related_services()
|
self.update_related_services()
|
||||||
self.update_related_instances()
|
self.update_related_instances()
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def duplicate(self, inst_set=[None,], serv_set=[None,]):
|
def duplicate(self, inst_set=None, serv_set=None):
|
||||||
new_check = self
|
new_check = self
|
||||||
new_check.pk = None
|
new_check.pk = None
|
||||||
new_check.id = None
|
new_check.id = None
|
||||||
new_check.save()
|
new_check.save()
|
||||||
new_check.instance_set = inst_set
|
if inst_set is not None:
|
||||||
new_check.service_set = serv_set
|
new_check.instance_set = inst_set
|
||||||
|
if serv_set is not None:
|
||||||
|
new_check.service_set = serv_set
|
||||||
new_check.save()
|
new_check.save()
|
||||||
return new_check.pk
|
return new_check.pk
|
||||||
|
|
||||||
@ -500,7 +519,7 @@ class StatusCheck(PolymorphicModel):
|
|||||||
def update_related_instances(self):
|
def update_related_instances(self):
|
||||||
instances = self.instance_set.all()
|
instances = self.instance_set.all()
|
||||||
for instance in instances:
|
for instance in instances:
|
||||||
update_service.delay(instance.id)
|
update_instance.delay(instance.id)
|
||||||
|
|
||||||
class ICMPStatusCheck(StatusCheck):
|
class ICMPStatusCheck(StatusCheck):
|
||||||
|
|
||||||
@ -516,7 +535,7 @@ class ICMPStatusCheck(StatusCheck):
|
|||||||
instances = self.instance_set.all()
|
instances = self.instance_set.all()
|
||||||
target = self.instance_set.get().address
|
target = self.instance_set.get().address
|
||||||
|
|
||||||
#We need to read both STDOUT and STDERR because ping can write to both, depending on the kind of error. Thanks a lot, ping.
|
# We need to read both STDOUT and STDERR because ping can write to both, depending on the kind of error. Thanks a lot, ping.
|
||||||
ping_process = subprocess.Popen("ping -c 1 " + target, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
|
ping_process = subprocess.Popen("ping -c 1 " + target, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
|
||||||
response = ping_process.wait()
|
response = ping_process.wait()
|
||||||
|
|
||||||
|
@ -71,15 +71,17 @@ def update_service(service_or_id):
|
|||||||
service = service_or_id
|
service = service_or_id
|
||||||
service.update_status()
|
service.update_status()
|
||||||
|
|
||||||
|
|
||||||
@task(ignore_result=True)
|
@task(ignore_result=True)
|
||||||
def update_instance(instance_or_id):
|
def update_instance(instance_or_id):
|
||||||
from .models import Instance
|
from .models import Instance
|
||||||
if not isinstance(instance_or_id, Service):
|
if not isinstance(instance_or_id, Instance):
|
||||||
instance = Instance.objects.get(id=instance_or_id)
|
instance = Instance.objects.get(id=instance_or_id)
|
||||||
else:
|
else:
|
||||||
instance = instance_or_id
|
instance = instance_or_id
|
||||||
instance.update_status()
|
instance.update_status()
|
||||||
|
|
||||||
|
|
||||||
@task(ignore_result=True)
|
@task(ignore_result=True)
|
||||||
def update_shifts():
|
def update_shifts():
|
||||||
from .models import update_shifts as _update_shifts
|
from .models import update_shifts as _update_shifts
|
||||||
|
@ -85,6 +85,7 @@ class StatusCheckResultDetailView(LoginRequiredMixin, DetailView):
|
|||||||
model = StatusCheckResult
|
model = StatusCheckResult
|
||||||
context_object_name = 'result'
|
context_object_name = 'result'
|
||||||
|
|
||||||
|
|
||||||
class SymmetricalForm(forms.ModelForm):
|
class SymmetricalForm(forms.ModelForm):
|
||||||
symmetrical_fields = () # Iterable of 2-tuples (field, model)
|
symmetrical_fields = () # Iterable of 2-tuples (field, model)
|
||||||
|
|
||||||
@ -115,7 +116,9 @@ base_widgets = {
|
|||||||
|
|
||||||
|
|
||||||
class StatusCheckForm(SymmetricalForm):
|
class StatusCheckForm(SymmetricalForm):
|
||||||
|
|
||||||
symmetrical_fields = ('service_set', 'instance_set')
|
symmetrical_fields = ('service_set', 'instance_set')
|
||||||
|
|
||||||
service_set = forms.ModelMultipleChoiceField(
|
service_set = forms.ModelMultipleChoiceField(
|
||||||
queryset=Service.objects.all(),
|
queryset=Service.objects.all(),
|
||||||
required=False,
|
required=False,
|
||||||
@ -140,6 +143,7 @@ class StatusCheckForm(SymmetricalForm):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GraphiteStatusCheckForm(StatusCheckForm):
|
class GraphiteStatusCheckForm(StatusCheckForm):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
@ -184,6 +188,7 @@ class ICMPStatusCheckForm(StatusCheckForm):
|
|||||||
)
|
)
|
||||||
widgets = dict(**base_widgets)
|
widgets = dict(**base_widgets)
|
||||||
|
|
||||||
|
|
||||||
class HttpStatusCheckForm(StatusCheckForm):
|
class HttpStatusCheckForm(StatusCheckForm):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
@ -244,6 +249,7 @@ class UserProfileForm(forms.ModelForm):
|
|||||||
model = UserProfile
|
model = UserProfile
|
||||||
exclude = ('user',)
|
exclude = ('user',)
|
||||||
|
|
||||||
|
|
||||||
class InstanceForm(SymmetricalForm):
|
class InstanceForm(SymmetricalForm):
|
||||||
|
|
||||||
symmetrical_fields = ('service_set',)
|
symmetrical_fields = ('service_set',)
|
||||||
@ -291,8 +297,6 @@ class InstanceForm(SymmetricalForm):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ServiceForm(forms.ModelForm):
|
class ServiceForm(forms.ModelForm):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
@ -523,6 +527,7 @@ class InstanceListView(LoginRequiredMixin, ListView):
|
|||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
return Instance.objects.all().order_by('name').prefetch_related('status_checks')
|
return Instance.objects.all().order_by('name').prefetch_related('status_checks')
|
||||||
|
|
||||||
|
|
||||||
class ServiceListView(LoginRequiredMixin, ListView):
|
class ServiceListView(LoginRequiredMixin, ListView):
|
||||||
model = Service
|
model = Service
|
||||||
context_object_name = 'services'
|
context_object_name = 'services'
|
||||||
@ -629,12 +634,14 @@ class ServiceDeleteView(LoginRequiredMixin, DeleteView):
|
|||||||
context_object_name = 'service'
|
context_object_name = 'service'
|
||||||
template_name = 'cabotapp/service_confirm_delete.html'
|
template_name = 'cabotapp/service_confirm_delete.html'
|
||||||
|
|
||||||
|
|
||||||
class InstanceDeleteView(LoginRequiredMixin, DeleteView):
|
class InstanceDeleteView(LoginRequiredMixin, DeleteView):
|
||||||
model = Instance
|
model = Instance
|
||||||
success_url = reverse_lazy('instances')
|
success_url = reverse_lazy('instances')
|
||||||
context_object_name = 'instance'
|
context_object_name = 'instance'
|
||||||
template_name = 'cabotapp/instance_confirm_delete.html'
|
template_name = 'cabotapp/instance_confirm_delete.html'
|
||||||
|
|
||||||
|
|
||||||
class ShiftListView(LoginRequiredMixin, ListView):
|
class ShiftListView(LoginRequiredMixin, ListView):
|
||||||
model = Shift
|
model = Shift
|
||||||
context_object_name = 'shifts'
|
context_object_name = 'shifts'
|
||||||
|
@ -6,6 +6,8 @@ CELERY_IMPORTS = ('app.cabotapp.tasks', )
|
|||||||
CELERYBEAT_SCHEDULER = "djcelery.schedulers.DatabaseScheduler"
|
CELERYBEAT_SCHEDULER = "djcelery.schedulers.DatabaseScheduler"
|
||||||
CELERY_TASK_SERIALIZER = "json"
|
CELERY_TASK_SERIALIZER = "json"
|
||||||
CELERY_ACCEPT_CONTENT = ['json', 'msgpack', 'yaml']
|
CELERY_ACCEPT_CONTENT = ['json', 'msgpack', 'yaml']
|
||||||
|
CELERYD_TASK_SOFT_TIME_LIMIT = 120
|
||||||
|
CELERYD_TASK_TIME_LIMIT = 150
|
||||||
|
|
||||||
CELERYBEAT_SCHEDULE = {
|
CELERYBEAT_SCHEDULE = {
|
||||||
'run-all-checks': {
|
'run-all-checks': {
|
||||||
|
@ -91,6 +91,7 @@ MIDDLEWARE_CLASSES = (
|
|||||||
'django.middleware.csrf.CsrfViewMiddleware',
|
'django.middleware.csrf.CsrfViewMiddleware',
|
||||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||||
'django.contrib.messages.middleware.MessageMiddleware',
|
'django.contrib.messages.middleware.MessageMiddleware',
|
||||||
|
'django.middleware.transaction.TransactionMiddleware',
|
||||||
)
|
)
|
||||||
|
|
||||||
ROOT_URLCONF = 'app.urls'
|
ROOT_URLCONF = 'app.urls'
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
</div>
|
</div>
|
||||||
{% if form.instance.id %}
|
{% if form.instance.id %}
|
||||||
<div class="col-xs-4">
|
<div class="col-xs-4">
|
||||||
<a class="btn btn-danger" href="{% url delete-service form.instance.id %}">Delete instance</a>
|
<a class="btn btn-danger" href="{% url delete-instance form.instance.id %}">Delete instance</a>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
Django==1.4.10
|
Django==1.4.13
|
||||||
PyJWT==0.1.2
|
PyJWT==0.1.2
|
||||||
South==0.7.6
|
South==0.7.6
|
||||||
amqp==1.3.3
|
amqp==1.3.3
|
||||||
|
Loading…
x
Reference in New Issue
Block a user