mirror of
https://github.com/status-im/cabot.git
synced 2025-02-24 10:28:06 +00:00
Removed Instance alerts (caused errors)
This commit is contained in:
parent
09effe2aab
commit
b1d730103e
@ -1,167 +0,0 @@
|
||||
from os import environ as env
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.mail import send_mail
|
||||
from django.core.urlresolvers import reverse
|
||||
from django.template import Context, Template
|
||||
|
||||
from twilio.rest import TwilioRestClient
|
||||
from twilio import twiml
|
||||
import requests
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
email_template = """Instance {{ instance.name }} {{ scheme }}://{{ host }}{% url instance pk=instance.id %} {% if instance.overall_status != instance.PASSING_STATUS %}alerting with status: {{ instance.overall_status }}{% else %}is back to normal{% endif %}.
|
||||
{% if instance.overall_status != instance.PASSING_STATUS %}
|
||||
CHECKS FAILING:{% for check in instance.all_failing_checks %}
|
||||
FAILING - {{ check.name }} - Type: {{ check.check_category }} - Importance: {{ check.get_importance_display }}{% endfor %}
|
||||
{% if instance.all_passing_checks %}
|
||||
Passing checks:{% for check in instance.all_passing_checks %}
|
||||
PASSING - {{ check.name }} - Type: {{ check.check_category }} - Importance: {{ check.get_importance_display }}{% endfor %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
"""
|
||||
|
||||
hipchat_template = "Instance {{ instance.name }} {% if instance.overall_status == instance.PASSING_STATUS %}is back to normal{% else %}reporting {{ instance.overall_status }} status{% endif %}: {{ scheme }}://{{ host }}{% url instance pk=instance.id %}. {% if instance.overall_status != instance.PASSING_STATUS %}Checks failing:{% for check in instance.all_failing_checks %} {{ check.name }}{% if check.last_result.error %} ({{ check.last_result.error|safe }}){% endif %}{% endfor %}{% endif %}{% if alert %}{% for alias in users %} @{{ alias }}{% endfor %}{% endif %}"
|
||||
|
||||
sms_template = "Instance {{ instance.name }} {% if instance.overall_status == instance.PASSING_STATUS %}is back to normal{% else %}reporting {{ instance.overall_status }} status{% endif %}: {{ scheme }}://{{ host }}{% url instance pk=instance.id %}"
|
||||
|
||||
telephone_template = "This is an urgent message from Arachnys monitoring. Instance \"{{ instance.name }}\" is erroring. Please check Cabot urgently."
|
||||
|
||||
|
||||
def send_alert(instance, duty_officers=None):
|
||||
users = instance.users_to_notify.all()
|
||||
if instance.email_alert:
|
||||
send_email_alert(instance, users, duty_officers)
|
||||
if instance.hipchat_alert:
|
||||
send_hipchat_alert(instance, users, duty_officers)
|
||||
if instance.sms_alert:
|
||||
send_sms_alert(instance, users, duty_officers)
|
||||
if instance.telephone_alert:
|
||||
send_telephone_alert(instance, users, duty_officers)
|
||||
|
||||
|
||||
def send_email_alert(instance, users, duty_officers):
|
||||
emails = [u.email for u in users if u.email]
|
||||
if not emails:
|
||||
return
|
||||
c = Context({
|
||||
'instance': instance,
|
||||
'host': settings.WWW_HTTP_HOST,
|
||||
'scheme': settings.WWW_SCHEME
|
||||
})
|
||||
if instance.overall_status != instance.PASSING_STATUS:
|
||||
if instance.overall_status == instance.CRITICAL_STATUS:
|
||||
emails += [u.email for u in duty_officers]
|
||||
subject = '%s status for instance: %s' % (
|
||||
instance.overall_status, instance.name)
|
||||
else:
|
||||
subject = 'Instance back to normal: %s' % (instance.name,)
|
||||
t = Template(email_template)
|
||||
send_mail(
|
||||
subject=subject,
|
||||
message=t.render(c),
|
||||
from_email='Cabot <%s>' % settings.CABOT_FROM_EMAIL,
|
||||
recipient_list=emails,
|
||||
)
|
||||
|
||||
|
||||
def send_hipchat_alert(instance, users, duty_officers):
|
||||
alert = True
|
||||
hipchat_aliases = [u.profile.hipchat_alias for u in users if hasattr(
|
||||
u, 'profile') and u.profile.hipchat_alias]
|
||||
if instance.overall_status == instance.WARNING_STATUS:
|
||||
alert = False # Don't alert at all for WARNING
|
||||
if instance.overall_status == instance.ERROR_STATUS:
|
||||
if instance.old_overall_status in (instance.ERROR_STATUS, instance.ERROR_STATUS):
|
||||
alert = False # Don't alert repeatedly for ERROR
|
||||
if instance.overall_status == instance.PASSING_STATUS:
|
||||
color = 'green'
|
||||
if instance.old_overall_status == instance.WARNING_STATUS:
|
||||
alert = False # Don't alert for recovery from WARNING status
|
||||
else:
|
||||
color = 'red'
|
||||
if instance.overall_status == instance.CRITICAL_STATUS:
|
||||
hipchat_aliases += [u.profile.hipchat_alias for u in duty_officers if hasattr(
|
||||
u, 'profile') and u.profile.hipchat_alias]
|
||||
c = Context({
|
||||
'instance': instance,
|
||||
'users': hipchat_aliases,
|
||||
'host': settings.WWW_HTTP_HOST,
|
||||
'scheme': settings.WWW_SCHEME,
|
||||
'alert': alert,
|
||||
})
|
||||
message = Template(hipchat_template).render(c)
|
||||
_send_hipchat_alert(message, color=color, sender='Cabot/%s' % instance.name)
|
||||
|
||||
|
||||
def _send_hipchat_alert(message, color='green', sender='Cabot'):
|
||||
room = settings.HIPCHAT_ALERT_ROOM
|
||||
api_key = settings.HIPCHAT_API_KEY
|
||||
url = settings.HIPCHAT_URL
|
||||
resp = requests.post(url + '?auth_token=' + api_key, data={
|
||||
'room_id': room,
|
||||
'from': sender[:15],
|
||||
'message': message,
|
||||
'notify': 1,
|
||||
'color': color,
|
||||
'message_format': 'text',
|
||||
})
|
||||
|
||||
|
||||
def send_sms_alert(instance, users, duty_officers):
|
||||
client = TwilioRestClient(
|
||||
settings.TWILIO_ACCOUNT_SID, settings.TWILIO_AUTH_TOKEN)
|
||||
mobiles = [u.profile.prefixed_mobile_number for u in users if hasattr(
|
||||
u, 'profile') and u.profile.mobile_number]
|
||||
if instance.is_critical:
|
||||
mobiles += [u.profile.prefixed_mobile_number for u in duty_officers if hasattr(
|
||||
u, 'profile') and u.profile.mobile_number]
|
||||
c = Context({
|
||||
'instance': instance,
|
||||
'host': settings.WWW_HTTP_HOST,
|
||||
'scheme': settings.WWW_SCHEME,
|
||||
})
|
||||
message = Template(sms_template).render(c)
|
||||
mobiles = list(set(mobiles))
|
||||
for mobile in mobiles:
|
||||
try:
|
||||
client.sms.messages.create(
|
||||
to=mobile,
|
||||
from_=settings.TWILIO_OUTGOING_NUMBER,
|
||||
body=message,
|
||||
)
|
||||
except Exception, e:
|
||||
logger.exception('Error sending twilio sms: %s' % e)
|
||||
|
||||
|
||||
def send_telephone_alert(instance, users, duty_officers):
|
||||
# No need to call to say things are resolved
|
||||
if instance.overall_status != instance.CRITICAL_STATUS:
|
||||
return
|
||||
client = TwilioRestClient(
|
||||
settings.TWILIO_ACCOUNT_SID, settings.TWILIO_AUTH_TOKEN)
|
||||
mobiles = [u.profile.prefixed_mobile_number for u in duty_officers if hasattr(
|
||||
u, 'profile') and u.profile.mobile_number]
|
||||
url = 'http://%s%s' % (settings.WWW_HTTP_HOST,
|
||||
reverse('twiml-callback', kwargs={'instance_id': instance.id}))
|
||||
for mobile in mobiles:
|
||||
try:
|
||||
client.calls.create(
|
||||
to=mobile,
|
||||
from_=settings.TWILIO_OUTGOING_NUMBER,
|
||||
url=url,
|
||||
method='GET',
|
||||
)
|
||||
except Exception, e:
|
||||
logger.exception('Error making twilio phone call: %s' % e)
|
||||
|
||||
|
||||
def telephone_alert_twiml_callback(instance):
|
||||
c = Context({'instance': instance})
|
||||
t = Template(telephone_template).render(c)
|
||||
r = twiml.Response()
|
||||
r.say(t, voice='woman')
|
||||
r.hangup()
|
||||
return r
|
@ -7,8 +7,7 @@ from django.core.urlresolvers import reverse
|
||||
from django.contrib.admin.models import User
|
||||
|
||||
from jenkins import get_job_status
|
||||
from .alert_service import send_alert as service_send_alert
|
||||
from .alert_instance import send_alert as instance_send_alert
|
||||
from .alert import send_alert
|
||||
from .calendar import get_events
|
||||
from .graphite import parse_metric
|
||||
from .tasks import update_service, update_instance
|
||||
@ -117,10 +116,6 @@ class CheckGroupMixin(models.Model):
|
||||
default=False,
|
||||
help_text='Must be enabled, and check importance set to Critical, to receive telephone alerts.',
|
||||
)
|
||||
alerts_enabled = models.BooleanField(
|
||||
default=True,
|
||||
help_text='Alert when this service is not healthy.',
|
||||
)
|
||||
overall_status = models.TextField(default=PASSING_STATUS)
|
||||
old_overall_status = models.TextField(default=PASSING_STATUS)
|
||||
hackpad_id = models.TextField(
|
||||
@ -173,7 +168,7 @@ class CheckGroupMixin(models.Model):
|
||||
self.save()
|
||||
self.snapshot.did_send_alert = True
|
||||
self.snapshot.save()
|
||||
# send_alert needs to be called separately from the instance and the service
|
||||
service_send_alert(self, duty_officers=get_duty_officers())
|
||||
|
||||
@property
|
||||
def recent_snapshots(self):
|
||||
@ -240,10 +235,10 @@ class Service(CheckGroupMixin):
|
||||
help_text='Instances this service is running on.',
|
||||
)
|
||||
|
||||
def alert(self):
|
||||
ret = super(Service, self).alert()
|
||||
service_send_alert(self, duty_officers=get_duty_officers())
|
||||
return ret
|
||||
alerts_enabled = models.BooleanField(
|
||||
default=True,
|
||||
help_text='Alert when this service is not healthy.',
|
||||
)
|
||||
|
||||
url = models.TextField(
|
||||
blank=True,
|
||||
@ -285,11 +280,6 @@ class Instance(CheckGroupMixin):
|
||||
help_text="Address (IP/Hostname) of service."
|
||||
)
|
||||
|
||||
def alert(self):
|
||||
ret = super(Instance, self).alert()
|
||||
instance_send_alert(self, duty_officers=get_duty_officers())
|
||||
return ret
|
||||
|
||||
def icmp_status_checks(self):
|
||||
return self.status_checks.filter(polymorphic_ctype__model='icmpstatuscheck')
|
||||
|
||||
|
@ -268,7 +268,6 @@ class InstanceForm(SymmetricalForm):
|
||||
'hipchat_alert',
|
||||
'sms_alert',
|
||||
'telephone_alert',
|
||||
'alerts_enabled',
|
||||
)
|
||||
widgets = {
|
||||
'name': forms.TextInput(attrs={'style': 'width: 30%;'}),
|
||||
|
Loading…
x
Reference in New Issue
Block a user