monitower

nimble and trivial monitoring suite
Log | Files | Refs

commit 32e078e44e887abee76c5e70c891a653b2d588ba
parent 1e6fb03bc4a58304e178a829871754489fc38461
Author: Josuah Demangeon <me@josuah.net>
Date:   Sun, 25 Oct 2020 11:00:23 +0100

remove bin/ and merge some commands, simplify check-*

Diffstat:
MMakefile | 2+-
Dbin/check-dns | 6------
Dbin/check-http | 6------
Dbin/check-load | 7-------
Dbin/check-mail | 27---------------------------
Dbin/check-mysql-slow-queries | 12------------
Dbin/check-ping | 4----
Dbin/check-ssh | 4----
Dbin/check-tcp | 6------
Dbin/check-tls | 8--------
Dbin/monitower-event | 5-----
Dbin/monitower-run | 35-----------------------------------
Dbin/monitower-show | 75---------------------------------------------------------------------------
Dbin/monitower-status | 16----------------
Acheck-dns | 6++++++
Acheck-http | 6++++++
Acheck-load | 8++++++++
Acheck-mail | 29+++++++++++++++++++++++++++++
Acheck-mysql-slow-queries | 12++++++++++++
Acheck-ping | 4++++
Acheck-ssh | 4++++
Acheck-tcp | 6++++++
Acheck-tls | 5+++++
Amonitower-graph | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amonitower-run | 37+++++++++++++++++++++++++++++++++++++
Amonitower-status | 18++++++++++++++++++
26 files changed, 218 insertions(+), 212 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,4 +2,4 @@ PREFIX = /usr/local install: mkdir -p ${PREFIX}${DESTDIR}/bin - cp -r bin/* ${PREFIX}${DESTDIR}/bin + cp -r monitower-* check-* ${PREFIX}${DESTDIR}/bin diff --git a/bin/check-dns b/bin/check-dns @@ -1,6 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -: ${var_timeout:=3} - -exec dig +timeout="$var_timeout" -t "$var_type" "@$var_ip" "$var_dom" diff --git a/bin/check-http b/bin/check-http @@ -1,6 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -: ${var_method:=GET} - -exec curl -Lvs -X "${var_method}" "${var_url}" >/dev/null diff --git a/bin/check-load b/bin/check-load @@ -1,7 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -: ${var_load:=2} - -load=$(uptime | sed 'y/LOAD,/load./; s/.*load[^0-9]*//; s/[^0-9].*//') -exec [ "$load" -gt "$var_load" ] diff --git a/bin/check-mail b/bin/check-mail @@ -1,27 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -: ${var_port:=25} ${var_timeout:=1} - -out() { - echo "C: $*" >&2 - printf '%s\r\n' "$*" - sleep 0.4 -} - -{ -out "EHLO monitoring" -out "MAIL from:<monitoring>" -out "RCPT to:<$var_mail>" -out "DATA" -out "" -out "From: monitorin" -out "To: $var_mail" -out "Subject: check=mail time=$(date +%s)" -out "" -out "Message body." -out "." -} | nc -w "$var_timeout" "$var_ip" "$var_port" | while read code line; do - echo "S: $code $line" >&2 - case $code in (5*) exit 1 ;; esac -done diff --git a/bin/check-mysql-slow-queries b/bin/check-mysql-slow-queries @@ -1,12 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -: ${var_time:=30} - -q=" - SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST - WHERE Time > $var_time - AND Command != 'Sleep' -" - -exec [ -z "$(exec mysql -Be "$q")" ] diff --git a/bin/check-ping b/bin/check-ping @@ -1,4 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -exec ping -c 1 -t 1 "$var_ip" diff --git a/bin/check-ssh b/bin/check-ssh @@ -1,4 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -exec ssh-keyscan -T 1 "$var_ip" diff --git a/bin/check-tcp b/bin/check-tcp @@ -1,6 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -: ${var_timeout:=3} - -exec nc -w "$var_timeout" -vz "$var_ip" "$var_port" diff --git a/bin/check-tls b/bin/check-tls @@ -1,8 +0,0 @@ -#!/bin/sh -eu -for x; do export "var_$x"; done - -exec openssl s_client - -servername "$var_dom" \ - -connect "$var_dom:$var_port" \ - -verify_return_error -verify_depth 3 \ - </dev/null diff --git a/bin/monitower-event b/bin/monitower-event @@ -1,5 +0,0 @@ -#!/bin/sh -eu - -id=$(dd if=/dev/urandom bs=1 count=8 2>/dev/null | od -An -tx1 | tr -cd 0-9a-f) - -exec logger -nc -d "$*" -t monitower -p alert -m "$id" diff --git a/bin/monitower-run b/bin/monitower-run @@ -1,35 +0,0 @@ -#!/bin/sh -eu - -# This part of monitower reads a single log file as input, and for every line, -# split it as key=values format, and trigger the "$check" for "$host" and store it -# as "$host/$name" in the spool directory. -# -# This way, as few as possible is performed every time, and no complex configuration -# parsing and exploitation is made at every run, and this script is inexpensive enough -# to be run from a /etc/crontab. - -check() { set -u +e - local state now="$(date +%s)" - for x; do local "var_$x"; done - - mkdir -p "$spool/$var_host/$var_name" - - out=$("check-$var_check" "$@" 2>&1) - case $? in (0) state=ok ;; (*) state=err ;; esac - - echo "time=$now state=$state" >>$spool/$var_host/$var_name/current - - case $state in - (err) echo "$out" | monitower-event time="$now" state="$state" "$@" ;; - esac -} - -[ $# -gt 0 ] || set -- /etc/monitower/check.d/* - -spool=/var/spool/monitower -IFS=' -' -sed -r 's,\\\\,\\s,g; s,\\",\\q,g' "$@" \ -| while IFS=' ' read line; do - check $(echo $line | xargs -n 1) & -done diff --git a/bin/monitower-show b/bin/monitower-show @@ -1,75 +0,0 @@ -#!/usr/bin/awk -f - -# Turn a stream of "t=1600297219 " into a bar graph ||||||| with green/red -# to show availability of service. - -function state_color(st) { - return (st == "err") ? 31 : (st == "ok") ? 32 : 34 -} - -function flush_values(ctx) -{ - printf "\033[%d;1m|\033[m", state_color(ctx["state"]) - ctx["state"] = "ok" - ctx["progress"] += STEP -} - -function value_init(ctx) -{ - ctx["progress"] = START - ctx["status"] = "ok" -} - -function value_add(time, value, ctx) -{ - if (time > ctx["progress"]) - flush_values(ctx) - - # catch up with time by filling the gaps with spaces - while (ctx["progress"] < time) { - printf " " - ctx["progress"] += STEP - } - - # order of priority is "err" > "ok" - ctx["state"] = (value == "err" ? "err" : ctx["state"]) -} - -function var_split(input, var, - key, fields, i) -{ - split(input, fields, " ") - for (i in fields) { - key = substr(fields[i], 1, index(fields[i], "=") - 1) - var[key] = substr(fields[i], index(fields[i], "=") + 1) - } -} - -BEGIN { - cmd = "date +%s" - cmd | getline NOW - close(cmd) - - STEP = ENVIRON["STEP"] ? ENVIRON["STEP"] : 60 - COUNT = ENVIRON["COUNT"] ? ENVIRON["COUNT"] : 80 - NOW = NOW - NOW % STEP + STEP - START = NOW - STEP * COUNT - START = START - START % STEP + STEP - - value_init(ctx) - - printf " %s >", START -} - -{ var_split($0, var) } - -var["time"] >= START { - value_add(var["time"], var["state"], ctx) -} - -{ split("", var) } - -END { - value_add(NOW, -1, ctx) - printf "< %s\n", NOW -} diff --git a/bin/monitower-status b/bin/monitower-status @@ -1,16 +0,0 @@ -#!/bin/sh -eu - -spool=/var/spool/monitower - -if [ "$#" = 0 ]; then - for host in "$spool/"*; do - set -- "$@" "${host##*/}" - done -fi - -for path in "$spool/"*"/"*"/current"; do - echo - echo "${path#$spool/}" - monitower-show "$path" -done -echo diff --git a/check-dns b/check-dns @@ -0,0 +1,6 @@ +#!/bin/sh -eu +export x "$@" + +: ${timeout:=3} + +exec dig +timeout="$timeout" -t "$type" "@$ip" "$dom" diff --git a/check-http b/check-http @@ -0,0 +1,6 @@ +#!/bin/sh -eu +export x "$@" + +: ${method:=GET} + +exec curl -Lvs -X "$method" "$url" >/dev/null diff --git a/check-load b/check-load @@ -0,0 +1,8 @@ +#!/bin/sh -eu +export x "$@" + +: ${load:=2} + +stat=$(uptime | sed 'y/LOAD,/load./; s/.*load[^0-9]*//; s/[^0-9].*//') +echo "$stat" >/dev/stderr +exec [ "$stat" -gt "$load" ] diff --git a/check-mail b/check-mail @@ -0,0 +1,29 @@ +#!/bin/sh -eu +export x "$@" + +: ${port:=25} ${timeout:=1} ${host:=$(hostname)} + +out() { + echo "C: $*" >&2 + printf '%s\r\n' "$*" + sleep 0.4 +} + +{ + out "EHLO monitoring" + out "MAIL from:<monitoring>" + out "RCPT to:<$mail>" + out "DATA" + out "" + out "From: $(whoami)@$host" + out "To: $mail" + out "Subject: check=mail time=$(date +%s)" + out "Date: $(date +'%a, %m %b %Y %H:%M:%S %+z')" + out "Message-Id: $RANDOM.$RANDOM.$RANDOM.$RANDOM@$host" + out "X-Monitower: $*" + out "" + out "." +} | nc -w "$timeout" "$ip" "$port" | while read code line; do + echo "S: $code $line" | tr -d '\r'>&2 + case $code in (5*) exit 1 ;; esac +done diff --git a/check-mysql-slow-queries b/check-mysql-slow-queries @@ -0,0 +1,12 @@ +#!/bin/sh -eu +export x "$@" + +: ${time:=30} + +q=" + SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST + WHERE Time > $time + AND Command != 'Sleep' +" + +exec [ -z "$(exec mysql -Be "$q")" ] diff --git a/check-ping b/check-ping @@ -0,0 +1,4 @@ +#!/bin/sh -eu +export x "$@" + +exec ping -c 1 -t 1 "$ip" diff --git a/check-ssh b/check-ssh @@ -0,0 +1,4 @@ +#!/bin/sh -eu +export x "$@" + +exec ssh-keyscan -T 1 "$ip" diff --git a/check-tcp b/check-tcp @@ -0,0 +1,6 @@ +#!/bin/sh -eu +export x "$@" + +: ${timeout:=3} + +exec nc -w "$timeout" -vz "$ip" "$port" diff --git a/check-tls b/check-tls @@ -0,0 +1,5 @@ +#!/bin/sh -eu +export x "$@" + +exec openssl s_client </dev/null \ + -servername "$dom" -connect "$dom:$port" -verify_return_error -verify_depth 3 diff --git a/monitower-graph b/monitower-graph @@ -0,0 +1,82 @@ +#!/usr/bin/awk -f + +# Turn a stream of "time=1600297219 state=(ok|err)" into a graph with green/red +# to show availability of service. + +function state_glyph(st) { + if (TTY) { + color = (st == "err") ? 31 : (st == "ok") ? 32 : 34 + return "\033[1;" color "m|\033[m" + } else { + return (st == "err") ? "X" : (st == "ok") ? "|" : ":" + } +} + +function value_flush(ctx) +{ + printf "%s", state_glyph(ctx["state"]) + ctx["state"] = "ok" + ctx["progress"] += STEP +} + +function value_init(ctx) +{ + ctx["progress"] = START + ctx["status"] = "ok" +} + +function value_add(time, value, ctx) +{ + if (time > ctx["progress"]) + value_flush(ctx) + + # catch up with time by filling the gaps with spaces + while (ctx["progress"] < time) { + printf " " + ctx["progress"] += STEP + } + + # order of priority is "err" > "ok" + ctx["state"] = (value == "err" ? "err" : ctx["state"]) +} + +function split_var(input, var, + key, fields, i) +{ + split(input, fields, " ") + for (i in fields) { + key = substr(fields[i], 1, index(fields[i], "=") - 1) + var[key] = substr(fields[i], index(fields[i], "=") + 1) + } +} + +BEGIN { + cmd = "date +%s" + cmd | getline NOW + close(cmd) + + STEP = ENVIRON["STEP"] ? ENVIRON["STEP"] : 60 + COUNT = ENVIRON["COUNT"] ? ENVIRON["COUNT"] : 80 + NOW = NOW - NOW % STEP + STEP + START = NOW - STEP * COUNT + START = START - START % STEP + STEP + + TTY = system("exec tty >/dev/null 2>&1") == 0 && !("NOCOLOR" in ENVIRON) + + value_init(ctx) + + printf " %s >", START +} + +{ split_var($0, var) } + +var["time"] >= START { + value_add(var["time"], var["state"], ctx) +} + +{ split_var("", var) } + +END { + value_add(NOW, -1, ctx) + printf "< %s\n", NOW +} diff --git a/monitower-run b/monitower-run @@ -0,0 +1,37 @@ +#!/bin/sh -eu +# +# Read a single key=value file as input, run the $cmd and store the result as +# "$MONITOWER_SPOOL/$host/$name.log" +# +# This way, no complex configuration or parsing needed every run, and this +# script is inexpensive enough to be run from a /etc/crontab. +# + +check() { set -u +e + local state level "$@" + + mkdir -p "$MONITOWER_SPOOL/$host" + + "$cmd" "$@" >&2 + + case $? in + (0) state=ok level=debug ;; + (*) state=err ;; + esac + + logger -cs -t monitower -p "$MONITOWER_FACILITY.${level:-alert}" \ + time="$now" state="$state" "$@" + + echo "time=$now state=$state" >>$MONITOWER_SPOOL/$host/$name.log +} + +: ${MONITOWER_SPOOL:=/var/spool/monitower} +: ${MONITOWER_FACILITY:=local7} + +now=$(date +%s) +IFS=' +' +[ $# = 0 ] && set /etc/monitower.conf +sed -r 's,\\\\,\\s,g; s,\\",\\q,g' "$@" | while read line; do + check $(echo $line | xargs -n 1) & +done diff --git a/monitower-status b/monitower-status @@ -0,0 +1,18 @@ +#!/bin/sh -eu + +: ${MONITOWER_SPOOL:=/var/spool/monitower} + +if [ "$#" = 0 ]; then + for host in "$MONITOWER_SPOOL/"*; do + set -- "$@" "${host##*/}" + done +fi + +for path in "$MONITOWER_SPOOL"/*/*; do + name=${path#$MONITOWER_SPOOL/} + name=${name%.log} + echo + echo "$name" + monitower-graph "$path" +done +echo