Browse Source

Checks now have a new status: "paused". sendalerts management command will mark checks as paused if sending a notification throws exception. This should avoid potential infinite loops of sendalerts crashes/respawns.

pull/9/head
Pēteris Caune 9 years ago
parent
commit
c1840a92bd
  1. 3
      .gitignore
  2. 86
      hc/api/management/commands/sendalerts.py
  3. 19
      hc/api/migrations/0016_auto_20151030_1107.py
  4. 12
      hc/api/models.py
  5. 4
      static/css/base.css
  6. 3
      templates/emails/alert-body-html.html
  7. 2
      templates/front/my_checks_desktop.html
  8. 2
      templates/front/my_checks_mobile.html

3
.gitignore

@ -1,5 +1,6 @@
__pycache__/
*.pyc
.coverage
local_settings.py
hc.sqlite
hc/local_settings.py
static-collected

86
hc/api/management/commands/sendalerts.py

@ -1,17 +1,63 @@
import logging
import sys
import time
from django.core.management.base import BaseCommand
from django.db.models import Q
from django.utils import timezone
from hc.api.models import Check
logger = logging.getLogger(__name__)
def _log(message):
def _stdout(message):
sys.stdout.write(message)
sys.stdout.flush()
def handle_one():
""" Send an alert for a single check.
Return True if an appropriate check was selected and processed.
Return False if no checks need to be processed.
"""
query = Check.objects.filter(user__isnull=False)
now = timezone.now()
going_down = Q(alert_after__lt=now, status="up")
going_up = Q(alert_after__gt=now, status="down")
query = query.filter(going_down | going_up)
try:
check = query[0]
except IndexError:
return False
check.status = check.get_status()
tmpl = "\nSending alert, status=%s, code=%s\n"
_stdout(tmpl % (check.status, check.code))
try:
check.send_alert()
except:
# Catch EVERYTHING. If we crash here, what can happen is:
# - the sendalerts command will crash
# - supervisor will respawn sendalerts command
# - sendalerts will try same thing again, resulting in infinite loop
# So instead we catch and log all exceptions, and mark
# the checks as paused so they are not retried.
logger.error("Could not alert %s" % check.code, exc_info=True)
check.status = "paused"
finally:
check.save()
return True
class Command(BaseCommand):
help = 'Sends UP/DOWN email alerts'
@ -19,36 +65,12 @@ class Command(BaseCommand):
ticks = 0
while True:
# Gone down?
query = Check.objects
query = query.filter(alert_after__lt=timezone.now())
query = query.filter(user__isnull=False)
query = query.filter(status="up")
for check in query:
check.status = "down"
_log("\nSending notification(s) about going down for %s\n" % check.code)
check.send_alert()
ticks = 0
# Save status after the notification is sent
check.save()
# Gone up?
query = Check.objects
query = query.filter(alert_after__gt=timezone.now())
query = query.filter(user__isnull=False)
query = query.filter(status="down")
for check in query:
check.status = "up"
_log("\nSending notification(s) about going up for %s\n" % check.code)
check.send_alert()
ticks = 0
# Save status after the notification is sent
check.save()
success = True
while success:
success = handle_one()
ticks = 0 if success else ticks + 1
time.sleep(1)
ticks = (ticks + 1) % 80
_log("." + ("\n" if ticks == 0 else ""))
_stdout(".")
if ticks % 60 == 0:
_stdout("\n")

19
hc/api/migrations/0016_auto_20151030_1107.py

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import models, migrations
class Migration(migrations.Migration):
dependencies = [
('api', '0015_auto_20151022_1008'),
]
operations = [
migrations.AlterField(
model_name='check',
name='status',
field=models.CharField(default='new', max_length=6, choices=[('up', 'Up'), ('down', 'Down'), ('new', 'New'), ('paused', 'Paused')]),
),
]

12
hc/api/models.py

@ -15,8 +15,12 @@ import requests
from hc.lib import emails
STATUSES = (("up", "Up"), ("down", "Down"), ("new", "New"))
STATUSES = (
("up", "Up"),
("down", "Down"),
("new", "New"),
("paused", "Paused")
)
DEFAULT_TIMEOUT = td(days=1)
DEFAULT_GRACE = td(hours=1)
CHANNEL_KINDS = (("email", "Email"), ("webhook", "Webhook"),
@ -60,8 +64,8 @@ class Check(models.Model):
channel.notify(self)
def get_status(self):
if self.status == "new":
return "new"
if self.status in ("new", "paused"):
return self.status
now = timezone.now()

4
static/css/base.css

@ -42,7 +42,7 @@ body {
font-size: small;
}
.glyphicon.up, .glyphicon.new, .glyphicon.grace, .glyphicon.down {
.glyphicon.up, .glyphicon.new, .glyphicon.paused, .glyphicon.grace, .glyphicon.down {
font-size: 22px;
}
@ -50,7 +50,7 @@ body {
color: #5cb85c;
}
.glyphicon.new {
.glyphicon.new, .glyphicon.paused {
color: #AAA;
}

3
templates/emails/alert-body-html.html

@ -19,6 +19,7 @@
}
.new { background: #AAA; }
.paused { background: #AAA; }
.up { background: #5cb85c; }
.grace { background: #f0ad4e; }
.down { background: #d9534f; }
@ -55,6 +56,8 @@
<span class="badge grace">LATE</span>
{% elif check.get_status == "down" %}
<span class="badge down">DOWN</span>
{% elif check.get_status == "paused" %}
<span class="badge paused">PAUSED</span>
{% endif %}
</td>
<td>

2
templates/front/my_checks_desktop.html

@ -26,6 +26,8 @@
<span class="glyphicon glyphicon-exclamation-sign grace"></span>
{% elif check.get_status == "down" %}
<span class="glyphicon glyphicon-exclamation-sign down"></span>
{% elif check.get_status == "paused" %}
<span class="glyphicon glyphicon-minus-sign paused"></span>
{% endif %}
</td>
<td class="name-cell">

2
templates/front/my_checks_mobile.html

@ -31,6 +31,8 @@
<span class="label label-warning">LATE</span>
{% elif check.get_status == "down" %}
<span class="label label-danger">DOWN</span>
{% elif check.get_status == "paused" %}
<span class="label label-default">PAUSED</span>
{% endif %}
</td>
</tr>

Loading…
Cancel
Save