Commit 3c3ca1cf authored by Jonathan Michalon's avatar Jonathan Michalon

Implement config reload (via HUP signal)

Reloading of the configuration implemented. Old/changed checks in failure send
an email saying it will never come back again. New checks are added to the list.
Unmodified checks are kept the same, preserving context.
Systemd unit now knows about reload and by the way uses systemd itself as a
watchdog instead of the one provided alongside the program.
parent c2efddd5
......@@ -59,6 +59,17 @@ def __alarm_handler(signum, frame):
mails.send_email_report(report)
def __hangup_handler(signum, frame):
print ("Signal SIGHUP caught, reloading config. (%s)" %
datetime.now())
from . import config
from importlib import reload
oldchecks = list(config.checks)
config.checks.clear()
reload(config.configmodule)
config.checks.merge(oldchecks)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("-1", "--one",
......@@ -85,11 +96,12 @@ def import_config(configfile):
filename = os.path.basename(configfile)
base, ext = os.path.splitext(filename)
try:
import_module(base)
configmodule = import_module(base)
except ImportError as e:
logging.critical("Cannot load config from '%s': %s" % (
configfile, str(e)))
sys.exit(1)
config.install_attr('configmodule', configmodule)
def run():
......@@ -108,6 +120,7 @@ def run():
# register signal handling
signal.signal(signal.SIGUSR1, __usr1_handler)
signal.signal(signal.SIGALRM, __alarm_handler)
signal.signal(signal.SIGHUP, __hangup_handler)
# register report signal interval
if config.emails.report.every > 0:
......
......@@ -22,6 +22,19 @@ class Checks(list):
for check in checks:
self += [check(d, **options) for d in dests]
# this should (assuming "other" is the "older" list):
# - pickup checks defined in both list (keep old one with its variables)
# - email for checks in the other list but not in us (it was removed)
def merge(self, other):
for oldcheck in other:
found = False
for idx, newcheck in enumerate(self):
if oldcheck == newcheck:
self[idx] = oldcheck
found = True
if not found and not oldcheck.ok:
mails.send_email_for_check(oldcheck, True)
class Check(object):
def __init__(self, dest, **options):
......@@ -51,6 +64,11 @@ class Check(object):
self.retry_count,
self.retry)
def __eq__(self, other):
return (self.__class__.__name__ == other.__class__.__name__ and
self.dest == other.dest and
self._options == other._options)
def setup(self):
pass
......
......@@ -105,12 +105,15 @@ def send_email(subject, body, extra_headers={}):
msg.as_string())
def send_email_for_check(check):
def send_email_for_check(check, removed=False):
from . import config
# ensure we do not traceback with unknown substitutions
state = 'OK' if check.ok else 'Problem'
if removed:
state = 'Removed'
subject = config.emails.subject_tpl.format_map(
defaultdict(lambda: "<no substitution>",
state='OK' if check.ok else 'Problem',
state=state,
check=check.__class__.__name__,
dest=check.target_name))
......@@ -123,13 +126,16 @@ def send_email_for_check(check):
msg_text += ("recovered after %s (%d %s)." %
(delta, n, "retry" if n == 1 else "retries"))
else:
msg_text += ("failure:\n%s\n" % check.errmsg.strip())
msg_text += ("\nFYI, last exec was:\n%s\n" % check.last_exec.strip())
if removed:
msg_text += "The check was removed from configuration.\n"
else:
msg_text += ("failure:\n%s\n" % check.errmsg.strip())
msg_text += ("\nLast run was:\n%s\n" % check.last_exec.strip())
extra_headers = {}
extra_headers['Message-ID'] = make_msgid(type(check).__name__)
# if check is OK it's a follow up, so set In-Reply-To
if check.ok and hasattr(check, 'mails_msgid'):
# if it's a follow up, set In-Reply-To
if hasattr(check, 'mails_msgid'):
extra_headers['In-Reply-To'] = check.mails_msgid
extra_headers['References'] = check.mails_msgid
check.mails_msgid = extra_headers['Message-ID']
......
......@@ -3,8 +3,9 @@ Description=Picomon Monitoring Daemon for %i
After=network.target
[Service]
ExecStart=/usr/local/bin/picomon-watchdog -c /etc/picomon/%i.py
#ExecReload=/bin/kill -HUP $MAINPID
ExecStart=/usr/local/bin/picomon -c /etc/picomon/%i.py
ExecReload=/bin/kill -HUP $MAINPID
Restart=on-failure
User=nobody
Group=nogroup
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment