diff options
Diffstat (limited to 'src/supervision')
-rw-r--r-- | src/supervision/deps-exe/s6-supervise | 3 | ||||
-rw-r--r-- | src/supervision/deps-exe/s6-svc | 2 | ||||
-rw-r--r-- | src/supervision/deps-exe/s6-svok | 1 | ||||
-rw-r--r-- | src/supervision/deps-exe/s6-svscan | 3 | ||||
-rw-r--r-- | src/supervision/deps-exe/s6-svscanctl | 2 | ||||
-rw-r--r-- | src/supervision/deps-exe/s6-svstat | 3 | ||||
-rw-r--r-- | src/supervision/deps-exe/s6-svwait | 3 | ||||
-rw-r--r-- | src/supervision/s6-supervise.c | 508 | ||||
-rw-r--r-- | src/supervision/s6-svc.c | 12 | ||||
-rw-r--r-- | src/supervision/s6-svok.c | 32 | ||||
-rw-r--r-- | src/supervision/s6-svscan.c | 498 | ||||
-rw-r--r-- | src/supervision/s6-svscanctl.c | 12 | ||||
-rw-r--r-- | src/supervision/s6-svstat.c | 70 | ||||
-rw-r--r-- | src/supervision/s6-svwait.c | 104 |
14 files changed, 1253 insertions, 0 deletions
diff --git a/src/supervision/deps-exe/s6-supervise b/src/supervision/deps-exe/s6-supervise new file mode 100644 index 0000000..58a34e0 --- /dev/null +++ b/src/supervision/deps-exe/s6-supervise @@ -0,0 +1,3 @@ +-ls6 +-lskarnet +${TAINNOW_LIB} diff --git a/src/supervision/deps-exe/s6-svc b/src/supervision/deps-exe/s6-svc new file mode 100644 index 0000000..83cec1e --- /dev/null +++ b/src/supervision/deps-exe/s6-svc @@ -0,0 +1,2 @@ +-ls6 +-lskarnet diff --git a/src/supervision/deps-exe/s6-svok b/src/supervision/deps-exe/s6-svok new file mode 100644 index 0000000..e7187fe --- /dev/null +++ b/src/supervision/deps-exe/s6-svok @@ -0,0 +1 @@ +-lskarnet diff --git a/src/supervision/deps-exe/s6-svscan b/src/supervision/deps-exe/s6-svscan new file mode 100644 index 0000000..58a34e0 --- /dev/null +++ b/src/supervision/deps-exe/s6-svscan @@ -0,0 +1,3 @@ +-ls6 +-lskarnet +${TAINNOW_LIB} diff --git a/src/supervision/deps-exe/s6-svscanctl b/src/supervision/deps-exe/s6-svscanctl new file mode 100644 index 0000000..83cec1e --- /dev/null +++ b/src/supervision/deps-exe/s6-svscanctl @@ -0,0 +1,2 @@ +-ls6 +-lskarnet diff --git a/src/supervision/deps-exe/s6-svstat b/src/supervision/deps-exe/s6-svstat new file mode 100644 index 0000000..7065b26 --- /dev/null +++ b/src/supervision/deps-exe/s6-svstat @@ -0,0 +1,3 @@ +-ls6 +-lskarnet +${SYSCLOCK_LIB} diff --git a/src/supervision/deps-exe/s6-svwait b/src/supervision/deps-exe/s6-svwait new file mode 100644 index 0000000..58a34e0 --- /dev/null +++ b/src/supervision/deps-exe/s6-svwait @@ -0,0 +1,3 @@ +-ls6 +-lskarnet +${TAINNOW_LIB} diff --git a/src/supervision/s6-supervise.c b/src/supervision/s6-supervise.c new file mode 100644 index 0000000..f9a9872 --- /dev/null +++ b/src/supervision/s6-supervise.c @@ -0,0 +1,508 @@ +/* ISC license. */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <skalibs/allreadwrite.h> +#include <skalibs/bytestr.h> +#include <skalibs/uint.h> +#include <skalibs/strerr2.h> +#include <skalibs/tai.h> +#include <skalibs/iopause.h> +#include <skalibs/djbunix.h> +#include <skalibs/sig.h> +#include <skalibs/selfpipe.h> +#include <skalibs/environ.h> +#include <skalibs/skamisc.h> +#include <s6/ftrigw.h> +#include <s6/s6-supervise.h> + +#define USAGE "s6-supervise dir" + +typedef enum trans_e trans_t, *trans_t_ref ; +enum trans_e +{ + V_TIMEOUT, V_CHLD, V_TERM, V_HUP, V_QUIT, + V_a, V_b, V_q, V_h, V_k, V_t, V_i, V_1, V_2, V_f, V_F, V_p, V_c, + V_o, V_d, V_u, V_x, V_O +} ; + +typedef enum state_e state_t, *state_t_ref ; +enum state_e +{ + DOWN, + UP, + FINISH, + LASTUP, + LASTFINISH +} ; + +typedef void action_t (void) ; +typedef action_t *action_t_ref ; + +static tain_t deadline ; +static s6_svstatus_t status = { .stamp = TAIN_ZERO, .pid = 0, .flagwant = 1, .flagwantup = 1, .flagpaused = 0, .flagfinishing = 0 } ; +static state_t state = DOWN ; +static int flagsetsid = 1 ; +static int cont = 1 ; + +static inline void settimeout (int secs) +{ + tain_addsec_g(&deadline, secs) ; +} + +static inline void settimeout_infinite (void) +{ + tain_add_g(&deadline, &tain_infinite_relative) ; +} + +static inline void announce (void) +{ + if (!s6_svstatus_write(".", &status)) + strerr_warnwu1sys("write status file") ; +} + + +/* The action array. */ + +static void nop (void) +{ +} + +static void bail (void) +{ + cont = 0 ; +} + +static void killa (void) +{ + kill(status.pid, SIGALRM) ; +} + +static void killb (void) +{ + kill(status.pid, SIGABRT) ; +} + +static void killh (void) +{ + kill(status.pid, SIGHUP) ; +} + +static void killq (void) +{ + kill(status.pid, SIGQUIT) ; +} + +static void killk (void) +{ + kill(status.pid, SIGKILL) ; +} + +static void killt (void) +{ + kill(status.pid, SIGTERM) ; +} + +static void killi (void) +{ + kill(status.pid, SIGINT) ; +} + +static void kill1 (void) +{ + kill(status.pid, SIGUSR1) ; +} + +static void kill2 (void) +{ + kill(status.pid, SIGUSR2) ; +} + +static void killp (void) +{ + kill(status.pid, SIGSTOP) ; + status.flagpaused = 1 ; + announce() ; +} + +static void killc (void) +{ + kill(status.pid, SIGCONT) ; + status.flagpaused = 0 ; + announce() ; +} + +static void trystart (void) +{ + int p[2] ; + pid_t pid ; + if (pipecoe(p) < 0) + { + settimeout(60) ; + strerr_warnwu1sys("pipecoe (waiting 60 seconds)") ; + return ; + } + pid = fork() ; + if (pid < 0) + { + settimeout(60) ; + strerr_warnwu1sys("fork (waiting 60 seconds)") ; + fd_close(p[1]) ; fd_close(p[0]) ; + return ; + } + else if (!pid) + { + char const *cargv[2] = { "run", 0 } ; + PROG = "s6-supervise (child)" ; + selfpipe_finish() ; + fd_close(p[0]) ; + if (flagsetsid) setsid() ; + execve("./run", (char *const *)cargv, (char *const *)environ) ; + fd_write(p[1], "", 1) ; + strerr_dieexec(111, "run") ; + } + fd_close(p[1]) ; + { + char c ; + switch (fd_read(p[0], &c, 1)) + { + case -1 : + fd_close(p[0]) ; + settimeout(60) ; + strerr_warnwu1sys("read pipe (waiting 60 seconds)") ; + kill(pid, SIGKILL) ; + return ; + case 1 : + { + fd_close(p[0]) ; + settimeout(10) ; + strerr_warnwu1x("spawn ./run - waiting 10 seconds") ; + return ; + } + } + } + fd_close(p[0]) ; + settimeout_infinite() ; + state = UP ; + status.pid = pid ; + tain_copynow(&status.stamp) ; + announce() ; + ftrigw_notify(S6_SUPERVISE_EVENTDIR, 'u') ; +} + +static void downtimeout (void) +{ + if (status.flagwant && status.flagwantup) trystart() ; + else settimeout_infinite() ; +} + +static void down_O (void) +{ + status.flagwant = 0 ; + announce() ; +} + +static void down_o (void) +{ + down_O() ; + trystart() ; +} + +static void down_u (void) +{ + status.flagwant = 1 ; + status.flagwantup = 1 ; + announce() ; + trystart() ; +} + +static void down_d (void) +{ + status.flagwant = 1 ; + status.flagwantup = 0 ; + announce() ; +} + +static void tryfinish (int wstat, int islast) +{ + register pid_t pid = fork() ; + if (pid < 0) + { + strerr_warnwu2sys("fork for ", "./finish") ; + if (islast) bail() ; + state = DOWN ; + status.pid = 0 ; + settimeout(1) ; + return ; + } + else if (!pid) + { + char fmt0[UINT_FMT] ; + char fmt1[UINT_FMT] ; + char *cargv[4] = { "finish", fmt0, fmt1, 0 } ; + selfpipe_finish() ; + fmt0[uint_fmt(fmt0, WIFSIGNALED(wstat) ? 255 : WEXITSTATUS(wstat))] = 0 ; + fmt1[uint_fmt(fmt1, WIFSIGNALED(wstat))] = 0 ; + if (flagsetsid) setsid() ; + execve("./finish", cargv, (char *const *)environ) ; + _exit(111) ; + } + status.pid = pid ; + status.flagfinishing = 1 ; + state = islast ? LASTFINISH : FINISH ; + settimeout(5) ; +} + +static void uptimeout (void) +{ + settimeout_infinite() ; + strerr_warnw1x("can't happen: timeout while the service is up!") ; +} + +static void up_z (void) +{ + int wstat = status.pid ; + status.pid = 0 ; + tain_copynow(&status.stamp) ; + announce() ; + ftrigw_notify(S6_SUPERVISE_EVENTDIR, 'd') ; + tryfinish(wstat, 0) ; +} + +static void up_o (void) +{ + status.flagwant = 0 ; + announce() ; +} + +static void up_d (void) +{ + status.flagwant = 1 ; + status.flagwantup = 0 ; + killt() ; + killc() ; +} + +static void up_u (void) +{ + status.flagwant = 1 ; + status.flagwantup = 1 ; + announce() ; +} + +static void closethem (void) +{ + fd_close(0) ; + fd_close(1) ; + open_read("/dev/null") ; + open_write("/dev/null") ; +} + +static void up_x (void) +{ + state = LASTUP ; + closethem() ; +} + +static void up_term (void) +{ + up_x() ; + up_d() ; +} + +static void finishtimeout (void) +{ + strerr_warnw1x("finish script takes too long - killing it") ; + killc() ; killk() ; + settimeout(3) ; +} + +static void finish_z (void) +{ + status.pid = 0 ; + status.flagfinishing = 0 ; + state = DOWN ; + announce() ; + settimeout(1) ; +} + +static void finish_u (void) +{ + status.flagwant = 1 ; + status.flagwantup = 1 ; + announce() ; +} + +static void finish_x (void) +{ + state = LASTFINISH ; + closethem() ; +} + +static void lastup_z (void) +{ + int wstat = status.pid ; + status.pid = 0 ; + tain_copynow(&status.stamp) ; + announce() ; + ftrigw_notify(S6_SUPERVISE_EVENTDIR, 'd') ; + tryfinish(wstat, 1) ; +} + +static action_t_ref const actions[5][23] = +{ + { &downtimeout, &nop, &bail, &bail, &bail, + &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, + &down_o, &down_d, &down_u, &bail, &down_O }, + { &uptimeout, &up_z, &up_term, &up_x, &up_term, + &killa, &killb, &killq, &killh, &killk, &killt, &killi, &kill1, &kill2, &nop, &nop, &killp, &killc, + &up_o, &up_d, &up_u, &up_x, &up_o }, + { &finishtimeout, &finish_z, &finish_x, &finish_x, &finish_x, + &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, + &up_o, &down_d, &finish_u, &finish_x, &up_o }, + { &uptimeout, &lastup_z, &up_d, &nop, &up_d, + &killa, &killb, &killq, &killh, &killk, &killt, &killi, &kill1, &kill2, &nop, &nop, &killp, &killc, + &up_o, &up_d, &nop, &nop, &up_o }, + { &finishtimeout, &bail, &nop, &nop, &nop, + &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, &nop, + &nop, &nop, &nop, &nop, &nop } +} ; + + +/* The main loop. + It just loops around the iopause(), calling snippets of code in "actions" when needed. */ + + +static void handle_signals (void) +{ + for (;;) + { + char c = selfpipe_read() ; + switch (c) + { + case -1 : strerr_diefu1sys(111, "selfpipe_read") ; + case 0 : return ; + case SIGCHLD : + if (!status.pid) wait_reap() ; + else + { + int wstat ; + int r = wait_pid_nohang(status.pid, &wstat) ; + if (r < 0) + if (errno != ECHILD) strerr_diefu1sys(111, "wait_pid_nohang") ; + else break ; + else if (!r) break ; + status.pid = wstat ; + (*actions[state][V_CHLD])() ; + } + break ; + case SIGTERM : + (*actions[state][V_TERM])() ; + break ; + case SIGHUP : + (*actions[state][V_HUP])() ; + break ; + case SIGQUIT : + (*actions[state][V_QUIT])() ; + break ; + default : + strerr_dief1x(101, "internal error: inconsistent signal state. Please submit a bug-report.") ; + } + } +} + +static void handle_control (int fd) +{ + for (;;) + { + char c ; + register int r = sanitize_read(fd_read(fd, &c, 1)) ; + if (r < 0) strerr_diefu1sys(111, "read " S6_SUPERVISE_CTLDIR "/control") ; + else if (!r) break ; + else + { + register unsigned int pos = byte_chr("abqhkti12fFpcoduxO", 18, c) ; + if (pos < 18) (*actions[state][V_a + pos])() ; + } + } +} + +int main (int argc, char const *const *argv) +{ + iopause_fd x[2] = { { -1, IOPAUSE_READ, 0 }, { -1, IOPAUSE_READ, 0 } } ; + PROG = "s6-supervise" ; + if (argc < 2) strerr_dieusage(100, USAGE) ; + if (chdir(argv[1]) < 0) strerr_diefu2sys(111, "chdir to ", argv[1]) ; + { + register unsigned int proglen = str_len(PROG) ; + register unsigned int namelen = str_len(argv[1]) ; + char progname[proglen + namelen + 2] ; + byte_copy(progname, proglen, PROG) ; + progname[proglen] = ' ' ; + byte_copy(progname + proglen + 1, namelen + 1, argv[1]) ; + PROG = progname ; + if (!fd_sanitize()) strerr_diefu1sys(111, "sanitize stdin and stdout") ; + x[1].fd = s6_supervise_lock(S6_SUPERVISE_CTLDIR) ; + if (!ftrigw_fifodir_make(S6_SUPERVISE_EVENTDIR, -1, 0)) + strerr_diefu2sys(111, "mkfifodir ", S6_SUPERVISE_EVENTDIR) ; + x[0].fd = selfpipe_init() ; + if (x[0].fd == -1) strerr_diefu1sys(111, "init selfpipe") ; + if (sig_ignore(SIGPIPE) < 0) strerr_diefu1sys(111, "ignore SIGPIPE") ; + { + sigset_t set ; + sigemptyset(&set) ; + sigaddset(&set, SIGTERM) ; + sigaddset(&set, SIGHUP) ; + sigaddset(&set, SIGQUIT) ; + sigaddset(&set, SIGCHLD) ; + if (selfpipe_trapset(&set) < 0) strerr_diefu1sys(111, "trap signals") ; + } + + if (!ftrigw_clean(S6_SUPERVISE_EVENTDIR)) + strerr_warnwu2sys("ftrigw_clean ", S6_SUPERVISE_EVENTDIR) ; + + { + struct stat st ; + if (stat("down", &st) == -1) + { + if (errno != ENOENT) + strerr_diefu1sys(111, "stat down") ; + } + else status.flagwantup = 0 ; + if (stat("nosetsid", &st) == -1) + { + if (errno != ENOENT) + strerr_diefu1sys(111, "stat nosetsid") ; + } + else flagsetsid = 0 ; + } + + tain_now_g() ; + settimeout(0) ; + tain_copynow(&status.stamp) ; + announce() ; + ftrigw_notify(S6_SUPERVISE_EVENTDIR, 's') ; + + while (cont) + { + register int r = iopause_g(x, 2, &deadline) ; + if (r < 0) strerr_diefu1sys(111, "iopause") ; + else if (!r) (*actions[state][V_TIMEOUT])() ; + else + { + if ((x[0].revents | x[1].revents) & IOPAUSE_EXCEPT) + strerr_diefu1x(111, "iopause: trouble with pipes") ; + if (x[0].revents & IOPAUSE_READ) handle_signals() ; + else if (x[1].revents & IOPAUSE_READ) handle_control(x[1].fd) ; + } + } + + ftrigw_notify(S6_SUPERVISE_EVENTDIR, 'x') ; + } + return 0 ; +} diff --git a/src/supervision/s6-svc.c b/src/supervision/s6-svc.c new file mode 100644 index 0000000..699eefd --- /dev/null +++ b/src/supervision/s6-svc.c @@ -0,0 +1,12 @@ +/* ISC license. */ + +#include <skalibs/strerr2.h> +#include <s6/s6-supervise.h> + +#define USAGE "s6-svc [ -abqhkti12fFpcoduxO ] servicedir" + +int main (int argc, char const *const *argv) +{ + PROG = "s6-svc" ; + return s6_svc_main(argc, argv, "abqhkti12fFpcoduxO", USAGE, "supervise") ; +} diff --git a/src/supervision/s6-svok.c b/src/supervision/s6-svok.c new file mode 100644 index 0000000..4a615e9 --- /dev/null +++ b/src/supervision/s6-svok.c @@ -0,0 +1,32 @@ +/* ISC license. */ + +#include <errno.h> +#include <skalibs/bytestr.h> +#include <skalibs/strerr2.h> +#include <skalibs/djbunix.h> +#include <s6/s6-supervise.h> + +#define USAGE "s6-svok servicedir" + +int main (int argc, char const *const *argv) +{ + PROG = "s6-svok" ; + if (argc < 2) strerr_dieusage(100, USAGE) ; + argv++ ; argc-- ; + { + int fd ; + unsigned int dirlen = str_len(*argv) ; + char fn[dirlen + 9 + sizeof(S6_SUPERVISE_CTLDIR)] ; + byte_copy(fn, dirlen, *argv) ; + fn[dirlen] = '/' ; + byte_copy(fn + dirlen + 1, sizeof(S6_SUPERVISE_CTLDIR) - 1, S6_SUPERVISE_CTLDIR) ; + byte_copy(fn + dirlen + sizeof(S6_SUPERVISE_CTLDIR), 9, "/control") ; + fd = open_write(fn) ; + if (fd < 0) + { + if ((errno == ENXIO) || (errno == ENOENT)) return 1 ; + else strerr_diefu2sys(111, "open_write ", fn) ; + } + } + return 0 ; +} diff --git a/src/supervision/s6-svscan.c b/src/supervision/s6-svscan.c new file mode 100644 index 0000000..8b0f82e --- /dev/null +++ b/src/supervision/s6-svscan.c @@ -0,0 +1,498 @@ +/* ISC license. */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <skalibs/allreadwrite.h> +#include <skalibs/bytestr.h> +#include <skalibs/sgetopt.h> +#include <skalibs/uint.h> +#include <skalibs/strerr2.h> +#include <skalibs/tai.h> +#include <skalibs/iopause.h> +#include <skalibs/djbunix.h> +#include <skalibs/direntry.h> +#include <skalibs/sig.h> +#include <skalibs/selfpipe.h> +#include <skalibs/environ.h> +#include <s6/config.h> +#include <s6/s6-supervise.h> + +#define USAGE "s6-svscan [ -c maxservices ] [ -t timeout ] [ dir ]" + +#define FINISH_PROG S6_SVSCAN_CTLDIR "/finish" +#define CRASH_PROG S6_SVSCAN_CTLDIR "/crash" + +#define DIR_RETRY_TIMEOUT 3 +#define CHECK_RETRY_TIMEOUT 4 + +struct svinfo +{ + dev_t dev ; + ino_t ino ; + tain_t restartafter[2] ; + int pid[2] ; + int p[2] ; + unsigned int flagactive : 1 ; + unsigned int flaglog : 1 ; +} ; +#define SVINFO_ZERO { -1, -1, { TAIN_ZERO, TAIN_ZERO }, { 0, 0 }, { -1, -1 }, 0, 0, 0 } ; + +static struct svinfo *services ; +static unsigned int max = 500 ; +static unsigned int n = 0 ; +static tain_t deadline, defaulttimeout ; +static char const *finish_arg = "reboot" ; +static int wantreap = 1 ; +static int wantscan = 1 ; +static unsigned int wantkill = 0 ; +static int cont = 1 ; + +static void panicnosp (char const *) gccattr_noreturn ; +static void panicnosp (char const *errmsg) +{ + char const *eargv[2] = { CRASH_PROG, 0 } ; + strerr_warnwu1sys(errmsg) ; + strerr_warnw2x("executing into ", eargv[0]) ; + execve(eargv[0], (char *const *)eargv, (char *const *)environ) ; + /* and if that execve fails, screw it and just die */ + strerr_dieexec(111, eargv[0]) ; +} + +static void panic (char const *) gccattr_noreturn ; +static void panic (char const *errmsg) +{ + int e = errno ; + selfpipe_finish() ; + errno = e ; + panicnosp(errmsg) ; +} + +static void killthem (void) +{ + register unsigned int i = 0 ; + if (!wantkill) return ; + for (; i < n ; i++) + { + if (!(wantkill & 1) && services[i].flagactive) continue ; + if (services[i].pid[0]) + kill(services[i].pid[0], (wantkill & 2) ? SIGTERM : SIGHUP) ; + if (services[i].flaglog && services[i].pid[1]) + kill(services[i].pid[1], (wantkill & 4) ? SIGTERM : SIGHUP) ; + } + wantkill = 0 ; +} + +static void term (void) +{ + cont = 0 ; + wantkill = 3 ; +} + +static void hup (void) +{ + cont = 0 ; + wantkill = 1 ; +} + +static void quit (void) +{ + cont = 0 ; + wantkill = 7 ; +} + +static void intr (void) +{ + finish_arg = "reboot" ; + term() ; +} + +static void handle_signals (void) +{ + for (;;) + { + switch (selfpipe_read()) + { + case -1 : panic("selfpipe_read") ; + case 0 : return ; + case SIGCHLD : wantreap = 1 ; break ; + case SIGALRM : wantscan = 1 ; break ; + case SIGTERM : term() ; break ; + case SIGHUP : hup() ; break ; + case SIGQUIT : quit() ; break ; + case SIGABRT : cont = 0 ; break ; + case SIGINT : intr() ; break ; + } + } +} + +static void handle_control (int fd) +{ + for (;;) + { + char c ; + int r = sanitize_read(fd_read(fd, &c, 1)) ; + if (r == -1) panic("read control pipe") ; + else if (!r) break ; + else switch (c) + { + case 'p' : finish_arg = "poweroff" ; break ; + case 'h' : hup() ; return ; + case 'r' : finish_arg = "reboot" ; break ; + case 'a' : wantscan = 1 ; break ; + case 't' : term() ; return ; + case 's' : finish_arg = "halt" ; break ; + case 'z' : wantreap = 1 ; break ; + case 'b' : cont = 0 ; return ; + case 'n' : wantkill = 2 ; break ; + case 'N' : wantkill = 6 ; break ; + case '6' : + case 'i' : intr() ; return ; + case 'q' : quit() ; return ; + case '0' : finish_arg = "halt" ; term() ; return ; + case '7' : finish_arg = "poweroff" ; term() ; return ; + case '8' : finish_arg = "other" ; term() ; return ; + default : + { + char s[2] = { c, 0 } ; + strerr_warnw2x("received unknown control command: ", s) ; + } + } + } +} + + +/* First essential function: the reaper. + s6-svscan must wait() for all children, + including ones it doesn't know it has. + Dead active services are flagged to be restarted in 1 second. */ + +static void reap (void) +{ + tain_t nextscan ; + if (!wantreap) return ; + wantreap = 0 ; + tain_addsec_g(&nextscan, 1) ; + for (;;) + { + int wstat ; + int r = wait_nohang(&wstat) ; + if (r < 0) + if (errno != ECHILD) panic("wait_nohang") ; + else break ; + else if (!r) break ; + else + { + register unsigned int i = 0 ; + for (; i < n ; i++) + { + if (services[i].pid[0] == r) + { + services[i].pid[0] = 0 ; + services[i].restartafter[0] = nextscan ; + break ; + } + else if (services[i].pid[1] == r) + { + services[i].pid[1] = 0 ; + services[i].restartafter[1] = nextscan ; + break ; + } + } + if (i == n) continue ; + if (services[i].flagactive) + { + if (tain_less(&nextscan, &deadline)) deadline = nextscan ; + } + else + { + if (services[i].flaglog) + { + /* + BLACK MAGIC: + - we need to close the pipe early: + * as soon as the writer exits so the logger can exit on EOF + * or as soon as the logger exits so the writer can crash on EPIPE + - but if the same service gets reactivated before the second + supervise process exits, ouch: we've lost the pipe + - so we can't reuse the same service even if it gets reactivated + - so we're marking a dying service with a closed pipe + - if the scanner sees a service with p[0] = -1 it won't flag + it as active (and won't restart the dead supervise) + - but if the service gets reactivated we want it to restart + as soon as the 2nd supervise process dies + - so the scanner marks such a process with p[0] = -2 + - and the reaper triggers a scan when it finds a -2. + */ + if (services[i].p[0] >= 0) + { + fd_close(services[i].p[1]) ; services[i].p[1] = -1 ; + fd_close(services[i].p[0]) ; services[i].p[0] = -1 ; + } + else if (services[i].p[0] == -2) wantscan = 1 ; + } + if (!services[i].pid[0] && !services[i].pid[1]) + services[i] = services[--n] ; + } + } + } +} + + +/* Second essential function: the scanner. + It monitors the service directories and spawns a supervisor + if needed. */ + +static void trystart (unsigned int i, char const *name, int islog) +{ + int pid = fork() ; + switch (pid) + { + case -1 : + tain_addsec_g(&services[i].restartafter[islog], CHECK_RETRY_TIMEOUT) ; + strerr_warnwu2sys("fork for ", name) ; + return ; + case 0 : + { + char const *cargv[3] = { "s6-supervise", name, 0 } ; + PROG = "s6-svscan (child)" ; + selfpipe_finish() ; + if (services[i].flaglog) + if (fd_move(!islog, services[i].p[!islog]) == -1) + strerr_diefu2sys(111, "set fds for ", name) ; + pathexec_run(S6_BINPREFIX "s6-supervise", cargv, (char const **)environ) ; + strerr_dieexec(111, S6_BINPREFIX "s6-supervise") ; + } + } + services[i].pid[islog] = pid ; +} + +static void retrydirlater (void) +{ + tain_t a ; + tain_addsec_g(&a, DIR_RETRY_TIMEOUT) ; + if (tain_less(&a, &deadline)) deadline = a ; +} + +static void check (char const *name) +{ + struct stat st ; + unsigned int namelen ; + unsigned int i = 0 ; + if (name[0] == '.') return ; + if (stat(name, &st) == -1) + { + strerr_warnwu2sys("stat ", name) ; + retrydirlater() ; + return ; + } + if (!S_ISDIR(st.st_mode)) return ; + namelen = str_len(name) ; + for (; i < n ; i++) if ((services[i].ino == st.st_ino) && (services[i].dev == st.st_dev)) break ; + if (i < n) + { + if (services[i].flaglog && (services[i].p[0] < 0)) + { + /* See BLACK MAGIC above. */ + services[i].p[0] = -2 ; + return ; + } + } + else + { + if (n >= max) + { + strerr_warnwu3x("start supervisor for ", name, ": too many services") ; + return ; + } + else + { + struct stat su ; + char tmp[namelen + 5] ; + byte_copy(tmp, namelen, name) ; + byte_copy(tmp + namelen, 5, "/log") ; + if (stat(tmp, &su) < 0) + if (errno == ENOENT) services[i].flaglog = 0 ; + else + { + strerr_warnwu2sys("stat ", tmp) ; + retrydirlater() ; + return ; + } + else if (!S_ISDIR(su.st_mode)) + services[i].flaglog = 0 ; + else + { + if (pipecoe(services[i].p) < 0) + { + strerr_warnwu1sys("pipecoe") ; + retrydirlater() ; + return ; + } + services[i].flaglog = 1 ; + } + services[i].ino = st.st_ino ; + services[i].dev = st.st_dev ; + tain_copynow(&services[i].restartafter[0]) ; + tain_copynow(&services[i].restartafter[1]) ; + services[i].pid[0] = 0 ; + services[i].pid[1] = 0 ; + n++ ; + } + } + + services[i].flagactive = 1 ; + + if (services[i].flaglog && !services[i].pid[1]) + { + if (!tain_future(&services[i].restartafter[1])) + { + char tmp[namelen + 5] ; + byte_copy(tmp, namelen, name) ; + byte_copy(tmp + namelen, 5, "/log") ; + trystart(i, tmp, 1) ; + } + else if (tain_less(&services[i].restartafter[1], &deadline)) + deadline = services[i].restartafter[1] ; + } + + if (!services[i].pid[0]) + { + if (!tain_future(&services[i].restartafter[0])) + trystart(i, name, 0) ; + else if (tain_less(&services[i].restartafter[0], &deadline)) + deadline = services[i].restartafter[0] ; + } +} + +static void scan (void) +{ + DIR *dir ; + if (!wantscan) return ; + wantscan = 0 ; + dir = opendir(".") ; + if (!dir) + { + strerr_warnwu1sys("opendir .") ; + retrydirlater() ; + return ; + } + { + register unsigned int i = 0 ; + for (; i < n ; i++) services[i].flagactive = 0 ; + } + for (;;) + { + direntry *d ; + errno = 0 ; + d = readdir(dir) ; + if (!d) break ; + check(d->d_name) ; + } + if (errno) + { + strerr_warnwu1sys("readdir .") ; + retrydirlater() ; + } + dir_close(dir) ; +} + + +int main (int argc, char const *const *argv) +{ + iopause_fd x[2] = { { -1, IOPAUSE_READ, 0 }, { -1, IOPAUSE_READ, 0 } } ; + PROG = "s6-svscan" ; + { + subgetopt_t l = SUBGETOPT_ZERO ; + unsigned int t = 5000 ; + for (;;) + { + register int opt = subgetopt_r(argc, argv, "t:c:", &l) ; + if (opt == -1) break ; + switch (opt) + { + case 't' : if (uint0_scan(l.arg, &t)) break ; + case 'c' : if (uint0_scan(l.arg, &max)) break ; + default : strerr_dieusage(100, USAGE) ; + } + } + argc -= l.ind ; argv += l.ind ; + if (t) tain_from_millisecs(&defaulttimeout, t) ; + else defaulttimeout = tain_infinite_relative ; + if (max < 2) max = 2 ; + } + + /* Init phase. + If something fails here, we can die, because it means that + something is seriously wrong with the system, and we can't + run correctly anyway. + */ + + if (argc && (chdir(argv[0]) < 0)) strerr_diefu1sys(111, "chdir") ; + x[1].fd = s6_supervise_lock(S6_SVSCAN_CTLDIR) ; + x[0].fd = selfpipe_init() ; + if (x[0].fd < 0) strerr_diefu1sys(111, "selfpipe_init") ; + + if (sig_ignore(SIGPIPE) < 0) strerr_diefu1sys(111, "ignore SIGPIPE") ; + { + sigset_t set ; + sigemptyset(&set) ; + sigaddset(&set, SIGCHLD) ; + sigaddset(&set, SIGALRM) ; + sigaddset(&set, SIGTERM) ; + sigaddset(&set, SIGHUP) ; + sigaddset(&set, SIGQUIT) ; + sigaddset(&set, SIGABRT) ; + sigaddset(&set, SIGINT) ; + if (selfpipe_trapset(&set) < 0) strerr_diefu1sys(111, "trap signals") ; + } + + + { + struct svinfo blob[max] ; /* careful with that stack, Eugene */ + services = blob ; + tain_now_g() ; + + + /* Loop phase. + From now on, we must not die. + Temporize on recoverable errors, and panic on serious ones. */ + + while (cont) + { + int r ; + tain_add_g(&deadline, &defaulttimeout) ; + reap() ; + scan() ; + killthem() ; + r = iopause_g(x, 2, &deadline) ; + if (r < 0) panic("iopause") ; + else if (!r) wantscan = 1 ; + else + { + if ((x[0].revents | x[1].revents) & IOPAUSE_EXCEPT) + { + errno = EIO ; + panic("check internal pipes") ; + } + if (x[0].revents & IOPAUSE_READ) handle_signals() ; + if (x[1].revents & IOPAUSE_READ) handle_control(x[1].fd) ; + } + } + + + /* Finish phase. */ + + selfpipe_finish() ; + killthem() ; + reap() ; + } + { + char const *eargv[3] = { FINISH_PROG, finish_arg, 0 } ; + execve(eargv[0], (char **)eargv, (char *const *)environ) ; + } + panicnosp("exec finish script " FINISH_PROG) ; +} diff --git a/src/supervision/s6-svscanctl.c b/src/supervision/s6-svscanctl.c new file mode 100644 index 0000000..48e6420 --- /dev/null +++ b/src/supervision/s6-svscanctl.c @@ -0,0 +1,12 @@ +/* ISC license. */ + +#include <skalibs/strerr2.h> +#include <s6/s6-supervise.h> + +#define USAGE "s6-svscanctl [ -phratszbnNiq0678 ] svscandir" + +int main (int argc, char const *const *argv) +{ + PROG = "s6-svscanctl" ; + return s6_svc_main(argc, argv, "phratszbnNiq0678", USAGE, ".s6-svscan") ; +} diff --git a/src/supervision/s6-svstat.c b/src/supervision/s6-svstat.c new file mode 100644 index 0000000..c986b8d --- /dev/null +++ b/src/supervision/s6-svstat.c @@ -0,0 +1,70 @@ +/* ISC license. */ + +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <skalibs/uint64.h> +#include <skalibs/uint.h> +#include <skalibs/bytestr.h> +#include <skalibs/buffer.h> +#include <skalibs/strerr2.h> +#include <skalibs/tai.h> +#include <skalibs/djbunix.h> +#include <s6/s6-supervise.h> + +#define USAGE "s6-svstat servicedir" + +int main (int argc, char const *const *argv) +{ + s6_svstatus_t status ; + char fmt[UINT_FMT] ; + int isup, normallyup ; + PROG = "s6-svstat" ; + if (argc < 2) strerr_dieusage(100, USAGE) ; + argv++ ; argc-- ; + if (!s6_svstatus_read(*argv, &status)) + strerr_diefu2sys(111, "read status for ", *argv) ; + + { + struct stat st ; + unsigned int dirlen = str_len(*argv) ; + char fn[dirlen + 6] ; + byte_copy(fn, dirlen, *argv) ; + byte_copy(fn + dirlen, 6, "/down") ; + if (stat(fn, &st) == -1) + if (errno != ENOENT) strerr_diefu2sys(111, "stat ", fn) ; + else normallyup = 1 ; + else normallyup = 0 ; + } + + tain_now_g() ; + if (tain_future(&status.stamp)) tain_copynow(&status.stamp) ; + tain_sub(&status.stamp, &STAMP, &status.stamp) ; + + isup = status.pid && !status.flagfinishing ; + if (isup) + { + buffer_putnoflush(buffer_1small,"up (pid ", 8) ; + buffer_putnoflush(buffer_1small, fmt, uint_fmt(fmt, status.pid)) ; + buffer_putnoflush(buffer_1small, ") ", 2) ; + } + else buffer_putnoflush(buffer_1small, "down ", 5) ; + + buffer_putnoflush(buffer_1small, fmt, uint64_fmt(fmt, status.stamp.sec.x)) ; + buffer_putnoflush(buffer_1small," seconds", 8) ; + + if (isup && !normallyup) + buffer_putnoflush(buffer_1small, ", normally down", 15) ; + if (!isup && normallyup) + buffer_putnoflush(buffer_1small, ", normally up", 13) ; + if (isup && status.flagpaused) + buffer_putnoflush(buffer_1small, ", paused", 8) ; + if (!isup && (status.flagwant == 'u')) + buffer_putnoflush(buffer_1small, ", want up", 10) ; + if (isup && (status.flagwant == 'd')) + buffer_putnoflush(buffer_1small, ", want down", 12) ; + + if (buffer_putflush(buffer_1small, "\n", 1) < 0) + strerr_diefu1sys(111, "write to stdout") ; + return 0 ; +} diff --git a/src/supervision/s6-svwait.c b/src/supervision/s6-svwait.c new file mode 100644 index 0000000..0d7c96c --- /dev/null +++ b/src/supervision/s6-svwait.c @@ -0,0 +1,104 @@ +/* ISC license. */ + +#include <skalibs/sgetopt.h> +#include <skalibs/bytestr.h> +#include <skalibs/uint16.h> +#include <skalibs/uint.h> +#include <skalibs/bitarray.h> +#include <skalibs/tai.h> +#include <skalibs/strerr2.h> +#include <skalibs/iopause.h> +#include <s6/ftrigr.h> +#include <s6/s6-supervise.h> + +#define USAGE "s6-svwait [ -U | -u | -d ] [ -A | -a | -o ] [ -t timeout ] servicedir..." +#define dieusage() strerr_dieusage(100, USAGE) + +static inline int check (unsigned char const *ba, unsigned int n, int wantup, int or) +{ + return (bitarray_first(ba, n, or == wantup) < n) == or ; +} + +int main (int argc, char const *const *argv) +{ + tain_t deadline, tto ; + ftrigr_t a = FTRIGR_ZERO ; + uint32 options = FTRIGR_REPEAT ; + int or = 0 ; + int wantup = 1 ; + char re[4] = "u|d" ; + PROG = "s6-svwait" ; + { + subgetopt_t l = SUBGETOPT_ZERO ; + unsigned int t = 0 ; + for (;;) + { + register int opt = subgetopt_r(argc, argv, "uUdAaot:", &l) ; + if (opt == -1) break ; + switch (opt) + { + case 'U' : wantup = 1 ; re[0] = 'U' ; break ; + case 'u' : wantup = 1 ; re[0] = 'u' ; break ; + case 'd' : wantup = 0 ; break ; + case 'A' : or = 0 ; options |= FTRIGR_REPEAT ; break ; + case 'a' : or = 0 ; options &= ~FTRIGR_REPEAT ; break ; + case 'o' : or = 1 ; options &= ~FTRIGR_REPEAT ; break ; + case 't' : if (!uint0_scan(l.arg, &t)) dieusage() ; break ; + default : dieusage() ; + } + } + argc -= l.ind ; argv += l.ind ; + if (t) tain_from_millisecs(&tto, t) ; else tto = tain_infinite_relative ; + } + if (!argc) dieusage() ; + + tain_now_g() ; + tain_add_g(&deadline, &tto) ; + + if (!ftrigr_startf_g(&a, &deadline)) strerr_diefu1sys(111, "ftrigr_startf") ; + + { + iopause_fd x = { -1, IOPAUSE_READ, 0 } ; + unsigned int i = 0 ; + uint16 list[argc] ; + unsigned char states[bitarray_div8(argc)] ; + x.fd = ftrigr_fd(&a) ; + for (; i < (unsigned int)argc ; i++) + { + unsigned int len = str_len(argv[i]) ; + char s[len + 1 + sizeof(S6_SUPERVISE_EVENTDIR)] ; + byte_copy(s, len, argv[i]) ; + s[len] = '/' ; + byte_copy(s + len + 1, sizeof(S6_SUPERVISE_EVENTDIR), S6_SUPERVISE_EVENTDIR) ; + list[i] = ftrigr_subscribe_g(&a, s, re, options, &deadline) ; + if (!list[i]) strerr_diefu2sys(111, "ftrigr_subscribe to ", argv[i]) ; + } + + for (i = 0 ; i < (unsigned int)argc ; i++) + { + s6_svstatus_t st = S6_SVSTATUS_ZERO ; + if (!s6_svstatus_read(argv[i], &st)) strerr_diefu1sys(111, "s6_svstatus_read") ; + bitarray_poke(states, i, !!st.pid) ; + } + + for (;;) + { + if (check(states, argc, wantup, or)) break ; + { + register int r = iopause_g(&x, 1, &deadline) ; + if (r < 0) strerr_diefu1sys(111, "iopause") ; + else if (!r) strerr_dief1x(1, "timed out") ; + } + + if (ftrigr_update(&a) < 0) strerr_diefu1sys(111, "ftrigr_update") ; + for (i = 0 ; i < (unsigned int)argc ; i++) + { + char what ; + register int r = ftrigr_check(&a, list[i], &what) ; + if (r < 0) strerr_diefu1sys(111, "ftrigr_check") ; + if (r) bitarray_poke(states, i, what == re[0]) ; + } + } + } + return 0 ; +} |