monitower

nimble and trivial monitoring suite
Log | Files | Refs

commit 0e5cbc0c7c1ca16274356504f146a39e5ef58745
parent f78c5a4bb2bf422c2a4a413dee1b3f85143bb03e
Author: Josuah Demangeon <me@josuah.net>
Date:   Thu, 17 Sep 2020 01:07:41 +0200

initial commit

Diffstat:
AMakefile | 5+++++
Abin/check-dns | 4++++
Abin/check-mail | 27+++++++++++++++++++++++++++
Dbin/monit-vmstat | 73-------------------------------------------------------------------------
Abin/monitower-event | 6++++++
Mbin/monitower-run | 20+++++++++++++-------
Mbin/monitower-show | 60++++++++++++++++++++++++++++++++----------------------------
Mbin/monitower-status | 18+++++++++---------
8 files changed, 96 insertions(+), 117 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,5 @@ +PREFIX = /usr/local + +install: + mkdir -p ${PREFIX}${DESTDIR}/bin + cp -r bin/* ${PREFIX}${DESTDIR}/bin diff --git a/bin/check-dns b/bin/check-dns @@ -0,0 +1,4 @@ +#!/bin/sh -eu +for x; do export "var_$x"; done + +exec dig -t "$var_type" "@$var_ip" "$var_dom" diff --git a/bin/check-mail b/bin/check-mail @@ -0,0 +1,27 @@ +#!/bin/sh -eu +for x; do export "var_$x"; done + +: ${var_port:=25} ${var_timeout:=1} + +out() { + echo "C: $*" >&2 + printf '%s\r\n' "$*" + sleep 0.4 +} + +{ +out "EHLO monitoring" +out "MAIL from:<monitoring>" +out "RCPT to:<$var_mail>" +out "DATA" +out "" +out "From: monitorin" +out "To: $var_mail" +out "Subject: check=mail time=$(date +%s)" +out "" +out "Message body." +out "." +} | nc -w "$var_timeout" "$var_ip" "$var_port" | while read code line; do + echo "S: $code $line" >&2 + case $code in (5*) exit 1 ;; esac +done diff --git a/bin/monit-vmstat b/bin/monit-vmstat @@ -1,73 +0,0 @@ -#!/usr/bin/awk -f - -function out(name, value) -{ - if (value) - print HOST "." name, $value, NOW -} - -BEGIN { - cmd = "hostname" - cmd | getline HOST - close (cmd) - - sub("\\..*", "", HOST) -} - -$1 ~ /-*procs-*/ { - n = 1 - for (i = 1; match(substr($0, i), "[^ ]+ *"); i += RSTART-1 + RLENGTH) { - header_start[n] = i - header_name[n] = substr($0, i) - sub("^[- ]*", "", header_name[n]) - sub("[- ].*", "", header_name[n]) - n++ - } - next -} - -$1 == "r" { - f = 1 - for (i = 1; i in header_start; i++) { - end = ((i + 1) in header_start) ? header_start[i + 1] : 10000 - section = substr($0, header_start[i], end - header_start[i] - 1) - while (sub(" *[^ ]+ *", "", section)) { - key = header_name[i] "." $f - F[key] = f++ - } - } - next -} - -NR > 3 { - cmd = "date +%s" - cmd | getline NOW - close(cmd) - - gsub("k", "000") - gsub("M", "000000") - gsub("G", "000000000") - gsub("T", "000000000000") - gsub("E", "000000000000000") - - print "" - for (i in F) - print i, "=", $F[i] - - next - out("sys.proc.run", "procs.r") - out("sys.proc.back", "procs.b") - out("sys.mem.avail", "memory.avm") - out("sys.mem.free", "memory.fre") - out("sys.page.fault", "page.flt") - out("sys.interrupt", "int") - out("sys.syscall", "sy") - out("sys.ctxswitch", "ctx") - out("sys.cpu.user", "us") - out("sys.cpu.system", "su") - - for (i in F) { - if (i ~ "^disks.[a-z][0-9]") - out("sys.disk." substr(i, length("disks.")), F[i]) - } -} diff --git a/bin/monitower-event b/bin/monitower-event @@ -0,0 +1,6 @@ +#!/bin/sh -eu + +# Trigger an alert on monitoring event failure. + +printf ' [["%s"]]' "$@" +printf '\n' diff --git a/bin/monitower-run b/bin/monitower-run @@ -8,20 +8,26 @@ # parsing and exploitation is made at every run, and this script is inexpensive enough # to be run from a /etc/crontab. -check() { set -eu +check() { set -u +e + local state now="$(date +%s)" for x; do local "var_$x"; done mkdir -p "$spool/$var_host/$var_name" - "check-$var_check" "$@" - echo "time=$now exit=$? $*" >>$spool/$var_host/$var_name/current + + out=$("check-$var_check" "$@" 2>&1) + case $? in (0) state=ok ;; (*) state=err ;; esac + + echo "time=$now state=$state" >>$spool/$var_host/$var_name/current + + case $state in + (err) echo "$out" | monitower-event time="$now" state="$state" "$@" ;; + esac } -now=$(date +%s) spool=/var/spool/monitower IFS=' ' - sed -r 's,\\\\,\\s,g; s,\\",\\q,g' "$@" \ -| while IFS=' ' read name check line; do - check $(echo $line | xargs -n 1) +| while IFS=' ' read line; do + check $(echo $line | xargs -n 1) & done diff --git a/bin/monitower-show b/bin/monitower-show @@ -1,19 +1,30 @@ #!/usr/bin/awk -f +# Turn a stream of "t=1600297219 " into a bar graph ||||||| with green/red +# to show availability of service. + +function state_color(st) { + return (st == "err") ? 31 : (st == "ok") ? 32 : 34 +} + function flush_values(ctx) { - printf "\033[%d;1m|\033[m", (32 - (ctx["sum"] < ctx["num"])) - delete ctx["sum"] - delete ctx["num"] + printf "\033[%d;1m|\033[m", state_color(ctx["state"]) + ctx["state"] = "ok" ctx["progress"] += STEP } -function add_value(time, value, ctx) +function value_init(ctx) +{ + ctx["progress"] = START + ctx["status"] = "ok" +} + +function value_add(time, value, ctx) { - if (!(id in progress)) - ctx["progress"] = START + #print "time="time, "step="STEP, "start="START, "progress="ctx["progress"], "value="value, "state="ctx["state"] - if (time > ctx["progress"] && ctx["num"] > 0) + if (time > ctx["progress"]) flush_values(ctx) # catch up with time by filling the gaps with spaces @@ -22,18 +33,17 @@ function add_value(time, value, ctx) ctx["progress"] += STEP } - # accumulate the sum and num of entries now that we are at the tip - ctx["sum"] += (value == "ok") - ctx["num"] += 1 + # order of priority is "err" > "ok" + ctx["state"] = (value == "err" ? "err" : ctx["state"]) } -function var_split(input, vars, +function var_split(input, var, key, fields, i) { split(input, fields, " ") for (i in fields) { key = substr(fields[i], 1, index(fields[i], "=") - 1) - vars[key] = substr(fields[i], index(fields[i], "=") + 1) + var[key] = substr(fields[i], index(fields[i], "=") + 1) } } @@ -41,33 +51,27 @@ BEGIN { cmd = "date +%s" cmd | getline NOW close(cmd) - NOW = NOW - NOW % STEP + STEP STEP = ENVIRON["STEP"] ? ENVIRON["STEP"] : 600 COUNT = ENVIRON["COUNT"] ? ENVIRON["COUNT"] : 80 + NOW = NOW - NOW % STEP + STEP START = NOW - STEP * COUNT START = START - START % STEP + STEP - NAME = ARGV[1] - sub("/*$", "", NAME) - sub(".*/", "", NAME) + value_init(ctx) - printf " %-15s %s |", NAME, START + printf " %s >", START } -START <= $1 { - time = $1 - value = $2 - sub("[^ ]+ [^ ]+ ", "", $0) +{ var_split($0, var) } - var_split($0, vars) - add_value(time, value, ctx) - - split("", var) +var["time"] >= START { + value_add(var["time"], var["state"], ctx) } +{ split("", var) } + END { - for (id in out) - add_value(NOW, "err", ctx) - printf "| %s\n", NOW + value_add(NOW, -1, ctx) + printf "< %s\n", NOW } diff --git a/bin/monitower-status b/bin/monitower-status @@ -1,16 +1,16 @@ #!/bin/sh -eu -base=$(cd "${0%/*}/.."; pwd) +spool=/var/spool/monitower -for path in /var/spool/monitor/*/current; do - [ -f "$path" ] || continue - - host=${path%/current} - host=${host##*/} +if [ "$#" = 0 ]; then + for host in "$spool/"*; do + set -- "$@" "${host##*/}" + done +fi +for path in "$spool/"*"/"*"/current"; do echo - echo "$host" - - monitor-show "$path" + echo "${path#$spool/}" + monitower-show "$path" done echo