diff options
author | Laurent Bercot <ska-skaware@skarnet.org> | 2017-08-19 15:37:17 +0000 |
---|---|---|
committer | Laurent Bercot <ska-skaware@skarnet.org> | 2017-08-19 15:37:17 +0000 |
commit | ea67a084fac347a543d86e2c761340dee36d380f (patch) | |
tree | eceedca8dbf5d89516620c47ce0414f7bbc79e73 | |
parent | 1682ba157c4c8e15903174243152c8675e47326d (diff) | |
download | s6-ea67a084fac347a543d86e2c761340dee36d380f.tar.xz |
Add s6-notifyoncheck
-rw-r--r-- | doc/index.html | 1 | ||||
-rw-r--r-- | doc/s6-notifyoncheck.html | 160 | ||||
-rw-r--r-- | package/deps.mak | 3 | ||||
-rw-r--r-- | package/modes | 1 | ||||
-rw-r--r-- | package/targets.mak | 1 | ||||
-rw-r--r-- | src/supervision/deps-exe/s6-notifyoncheck | 5 | ||||
-rw-r--r-- | src/supervision/s6-notifyoncheck.c | 244 |
7 files changed, 415 insertions, 0 deletions
diff --git a/doc/index.html b/doc/index.html index 8f15a5e..9649c3b 100644 --- a/doc/index.html +++ b/doc/index.html @@ -154,6 +154,7 @@ a user interface to control those processes and monitor service states. <li><a href="s6-svwait.html">The <tt>s6-svwait</tt> program</a></li> <li><a href="s6-svlisten1.html">The <tt>s6-svlisten1</tt> program</a></li> <li><a href="s6-svlisten.html">The <tt>s6-svlisten</tt> program</a></li> +<li><a href="s6-notifyoncheck.html">The <tt>s6-notifyoncheck</tt> program</a></li> </ul> <h4> Daemontools-like utilities </h4> diff --git a/doc/s6-notifyoncheck.html b/doc/s6-notifyoncheck.html new file mode 100644 index 0000000..74cb6cd --- /dev/null +++ b/doc/s6-notifyoncheck.html @@ -0,0 +1,160 @@ +<html> + <head> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> + <meta http-equiv="Content-Language" content="en" /> + <title>s6: the s6-notifyoncheck program</title> + <meta name="Description" content="s6: the s6-notifyoncheck program" /> + <meta name="Keywords" content="s6 command s6-notifyoncheck notification service check polling" /> + <!-- <link rel="stylesheet" type="text/css" href="//skarnet.org/default.css" /> --> + </head> +<body> + +<p> +<a href="index.html">s6</a><br /> +<a href="//skarnet.org/software/">Software</a><br /> +<a href="//skarnet.org/">skarnet.org</a> +</p> + +<h1> The s6-notifyoncheck program </h1> + +<p> +<tt>s6-notifyoncheck</tt> is a chain-loading program meant to be used +in run scripts, in a service that has been declared to honor +readiness notification. It implements a policy of running a user-provided +executable in the background that polls the service currently being +launched, in order to check when it becomes ready. It feeds the +result of this check into the s6 notification mechanism. +</p> + +<p> +s6-notifyoncheck should <strong>only</strong> be used with daemons +that can be polled from the outside to check readiness, and that +<strong>do not implement readiness notification themselves</strong>. +</p> + +<h2> Interface </h2> + +<pre> + s6-notifyoncheck [ -d ] [ -3 <em>notiffd</em> ] [ -s <em>initialsleep</em> ] [ -T <em>globaltimeout</em> ] [ -t <em>localtimeout</em> ] [ -w <em>waitingtime</em> ] [ -n <em>n</em> ] [ -c <em>checkprog</em> ] <em>prog...</em> +</pre> + +<p> + s6-notifyoncheck forks and runs as the child; the parent immediately execs into +<em>prog...</em>, the daemon that must be checked for readiness. +</p> + +<p> + s6-notifyoncheck first waits for a little time, then it spawns the +<tt>./data/check</tt> executable and waits for it to exit. If <tt>./data/check</tt> +exits 0, then s6-notifyoncheck reports that the service is ready, then +exits. If <tt>./data/check</tt> exits anything else, s6-notifyoncheck sleeps +for a little time, then spawns <tt>./data/check</tt> again. It loops until +<tt>./data/check</tt> succeeds, or 7 attempts fail, or a certain amount of +time elapses. +</p> + +<h2> Exit codes </h2> + +<p> + s6-notifyoncheck can exit before executing into <em>prog</em>: +</p> + +<ul> + <li> 100: wrong usage </li> + <li> 111: system call failed </li> +</ul> + +<p> + After forking, s6-notifyoncheck (running as the child) can +exit with the following exit codes, but those are meaningless +because no process will, or should, check them. They are only +differentiated for clarity in the code: +</p> + +<ul> + <li> 0: service readiness achieved and notification sent </li> + <li> 1: maximum number of attempts reached, all unsuccessful </li> + <li> 2: <em>prog</em> died, so s6-notifyoncheck exited early </li> + <li> 3: timed out before readiness was achieved </li> + <li> 111: system call failed </li> +</ul> + +<h2> Options </h2> + +<ul> + <li> <tt>-d</tt> : doublefork. s6-notifyoncheck will run as the +grandchild of <em>prog...</em> instead of its direct child. This is useful +if <em>prog...</em> never reaps zombies it does not know it has. </li> + <li> <tt>-3 <em>notiffd</em></tt> : use <em>fd</em> as the +file descriptor to send a readiness notification to. By default, this +number is automatically read from the <tt>./notification-fd</tt> file. </li> + <li> <tt>-s <em>initialsleep</em></tt> : sleep for +<em>initialsleep</em> milliseconds before starting to poll the service +for readiness. Default is 10 milliseconds. </li> + <li> <tt>-T <em>globaltimeout</em></tt> : give up (and leave +the service <em>up</em> but not <em>ready</em> if service readiness still +has not been detected after <em>globaltimeout</em> milliseconds. Default +is 0, meaning infinite: s6-notifyoncheck will keep polling until it succeeds. </li> + <li> <tt>-t <em>localtimeout</em></tt> : on every attempt, if +<tt>./check</tt> still has not exited after <em>localtimeout</em> milliseconds, +kill it and declare that attempt failed. Default is 0, meaning infinite: +s6-notifyoncheck will wait forever for <tt>./data/check</tt> to exit. </li> + <li> <tt>-w <em>waitingtime</em></tt> : sleep for +<em>waitingtime</em> milliseconds between two invocations of <tt>./data/check</tt>. +This is basically the polling period. Default is 1000: the service will +be polled every second. </li> + <li> <tt>-n <em>n</em></tt> : give up after <em>n</em> +unsuccessful invocations of <tt>./data/check</tt>. 0 means infinite, i.e. keep +polling until it succeeds, or times out, or the service dies first. </li> + <li> <tt>-c <em>checkprog...</em></tt> : invoke <em>checkprog...</em> +instead of <tt>./data/check</tt>. The <em>checkprog</em> string will be parsed by +<a href="//skarnet.org/software/execline/execlineb.html">execlineb</a>, so it +can contain a full command line. This option is mainly useful is the program +used to poll the service is very simple and can be inlined as a simple +command line, to avoid needing to manage a whole script and a <tt>./data/check</tt> +file. </li> +</ul> + +<h2> Usage </h2> + +<p> + s6-notifyoncheck is designed to make it possible for services to use the +<a href="//skarnet.org/software/s6/notifywhenup.html">s6 notification +mechanism</a> even with daemons that do not natively implement the +mechanism of writing a newline to a file descriptor of their choice when +they're ready. +</p> + +<p> +<a href="//skarnet.org/cgi-bin/archive.cgi?2:mss:1607:dfblejammjllfkggpcph">Polling</a> +is evil. Please make sure you really have no other choice before writing a +<tt>./data/check</tt> program and using s6-notifyoncheck in your run script. +If you have access to the source code of the daemon you want to check for +readiness, consider patching it to add readiness notification support, which +is extremely simple and does not require linking against any s6 library. +</p> + +<p> + If using a <tt>./data/check</tt> program is your only option: +</p> + +<ul> + <li> Make sure the <tt>./data</tt> subdirectory is readable and that +<tt>./data/check</tt> is executable, exits 0 if the daemon +it checks is ready, and exits nonzero if the daemon is not ready. </li> + <li> Add a <tt>./notification-fd</tt> file to your service directory, +which can contain any number that is not 0, 1 or 2, or anything else +explicitly used in your run script. The daemon does not need to care +about that file descriptor; from the daemon's point of view, nothing +changes. </li> + <li> In your run script, insert <tt>s6-notifyoncheck</tt> in the +command line that will execute into your daemon. </li> + <li> <tt>./data/check</tt> will run as the same user as s6-notifyoncheck. +If s6-notifyoncheck runs after the run script's process has lost its +root privileges, make sure that <tt>./data/check</tt> is accessible +and runnable as that user. </li> +</ul> + +</body> +</html> diff --git a/package/deps.mak b/package/deps.mak index cd96af9..c188e96 100644 --- a/package/deps.mak +++ b/package/deps.mak @@ -117,6 +117,7 @@ src/pipe-tools/s6-ftrig-listen1.o src/pipe-tools/s6-ftrig-listen1.lo: src/pipe-t src/pipe-tools/s6-ftrig-notify.o src/pipe-tools/s6-ftrig-notify.lo: src/pipe-tools/s6-ftrig-notify.c src/include/s6/ftrigw.h src/pipe-tools/s6-ftrig-wait.o src/pipe-tools/s6-ftrig-wait.lo: src/pipe-tools/s6-ftrig-wait.c src/include/s6/ftrigr.h src/pipe-tools/s6-mkfifodir.o src/pipe-tools/s6-mkfifodir.lo: src/pipe-tools/s6-mkfifodir.c src/include/s6/ftrigw.h +src/supervision/s6-notifyoncheck.o src/supervision/s6-notifyoncheck.lo: src/supervision/s6-notifyoncheck.c src/include/s6/ftrigr.h src/include/s6/s6-supervise.h src/supervision/s6-supervise.o src/supervision/s6-supervise.lo: src/supervision/s6-supervise.c src/include/s6/ftrigw.h src/include/s6/s6-supervise.h src/supervision/s6-svc.o src/supervision/s6-svc.lo: src/supervision/s6-svc.c src/include/s6/config.h src/include/s6/s6-supervise.h src/supervision/s6-svlisten.o src/supervision/s6-svlisten.lo: src/supervision/s6-svlisten.c src/supervision/s6-svlisten.h @@ -230,6 +231,8 @@ s6-ftrig-wait: EXTRA_LIBS := ${SOCKET_LIB} ${TAINNOW_LIB} ${SPAWN_LIB} s6-ftrig-wait: src/pipe-tools/s6-ftrig-wait.o ${LIBS6} -lskarnet s6-mkfifodir: EXTRA_LIBS := s6-mkfifodir: src/pipe-tools/s6-mkfifodir.o ${LIBS6} -lskarnet +s6-notifyoncheck: EXTRA_LIBS := ${SOCKET_LIB} ${TAINNOW_LIB} ${SPAWN_LIB} +s6-notifyoncheck: src/supervision/s6-notifyoncheck.o ${LIBS6} -lskarnet s6-supervise: EXTRA_LIBS := ${TAINNOW_LIB} s6-supervise: src/supervision/s6-supervise.o ${LIBS6} -lskarnet s6-svc: EXTRA_LIBS := diff --git a/package/modes b/package/modes index 544b44c..830708c 100644 --- a/package/modes +++ b/package/modes @@ -17,6 +17,7 @@ s6-svstat 0755 s6-svwait 0755 s6-svlisten1 0755 s6-svlisten 0755 +s6-notifyoncheck 0755 s6-applyuidgid 0700 s6-envdir 0755 s6-envuidgid 0755 diff --git a/package/targets.mak b/package/targets.mak index 67b7fe5..74f34ca 100644 --- a/package/targets.mak +++ b/package/targets.mak @@ -17,6 +17,7 @@ s6-svstat \ s6-svwait \ s6-svlisten1 \ s6-svlisten \ +s6-notifyoncheck \ s6-envdir \ s6-envuidgid \ s6-fghack \ diff --git a/src/supervision/deps-exe/s6-notifyoncheck b/src/supervision/deps-exe/s6-notifyoncheck new file mode 100644 index 0000000..7a6675b --- /dev/null +++ b/src/supervision/deps-exe/s6-notifyoncheck @@ -0,0 +1,5 @@ +${LIBS6} +-lskarnet +${SOCKET_LIB} +${TAINNOW_LIB} +${SPAWN_LIB} diff --git a/src/supervision/s6-notifyoncheck.c b/src/supervision/s6-notifyoncheck.c new file mode 100644 index 0000000..3d988de --- /dev/null +++ b/src/supervision/s6-notifyoncheck.c @@ -0,0 +1,244 @@ +/* ISC license. */ + +#include <stdint.h> +#include <unistd.h> +#include <signal.h> +#include <fcntl.h> +#include <limits.h> +#include <sys/wait.h> +#include <skalibs/types.h> +#include <skalibs/bytestr.h> +#include <skalibs/sgetopt.h> +#include <skalibs/strerr2.h> +#include <skalibs/tai.h> +#include <skalibs/djbunix.h> +#include <skalibs/selfpipe.h> +#include <skalibs/iopause.h> +#include <execline/config.h> +#include <s6/s6-supervise.h> +#include <s6/ftrigr.h> + +#define USAGE "s6-notifyoncheck [ -d ] [ -3 fd ] [ -s initialsleep ] [ -T globaltimeout ] [ -t localtimeout ] [ -w waitingtime ] [ -n tries ] [ -c \"checkprog...\" ] prog..." +#define dieusage() strerr_dieusage(100, USAGE) + + +static inline int read_uint (char const *file, unsigned int *fd) +{ + char buf[UINT_FMT + 1] ; + ssize_t r = openreadnclose_nb(file, buf, UINT_FMT) ; + if (r < 0) return -1 ; + buf[byte_chr(buf, r, '\n')] = 0 ; + return !!uint0_scan(buf, fd) ; +} + +static inline int handle_signals (pid_t pid, int *w) +{ + int gotit = 0 ; + for (;;) + { + switch (selfpipe_read()) + { + case -1 : strerr_diefu1sys(111, "selfpipe_read") ; + case 0 : return gotit ; + case SIGCHLD : + { + int wstat ; + if (wait_pid_nohang(pid, &wstat) == pid) + { + *w = wstat ; + gotit = 1 ; + } + break ; + } + } + } +} + +static int handle_event (ftrigr_t *a, uint16_t id, pid_t pid) +{ + int r ; + char what ; + if (ftrigr_update(a) < 0) strerr_diefu1sys(111, "ftrigr_update") ; + r = ftrigr_check(a, id, &what) ; + if (r < 0) strerr_diefu1sys(111, "ftrigr_check") ; + if (r && what == 'd') + { + if (pid) kill(pid, SIGTERM) ; + return 1 ; + } + return 0 ; +} + + +int main (int argc, char const *const *argv, char const *const *envp) +{ + ftrigr_t a = FTRIGR_ZERO ; + iopause_fd x[2] = { { .events = IOPAUSE_READ }, { .events = IOPAUSE_READ } } ; + char const *childargv[4] = { EXECLINE_EXTBINPREFIX "execlineb", "-c", 0, 0 } ; + char const *checkprog = 0 ; + unsigned int fd ; + int df = 0 ; + int autodetect = 1 ; + tain_t globaldeadline, sleeptto, localtto, waittto ; + unsigned int tries = 7 ; + uint16_t id ; + PROG = "s6-notifyoncheck" ; + + { + subgetopt_t l = SUBGETOPT_ZERO ; + unsigned int initialsleep = 10, globaltimeout = 0, localtimeout = 0, waitingtime = 1000 ; + for (;;) + { + int opt = subgetopt_r(argc, argv, "d3:s:T:t:w:n:c:", &l) ; + if (opt == -1) break ; + switch (opt) + { + case 'd' : df = 1 ; break ; + case '3' : if (!uint0_scan(l.arg, &fd)) dieusage() ; autodetect = 0 ; break ; + case 's' : if (!uint0_scan(l.arg, &initialsleep)) dieusage() ; break ; + case 'T' : if (!uint0_scan(l.arg, &globaltimeout)) dieusage() ; break ; + case 't' : if (!uint0_scan(l.arg, &localtimeout)) dieusage() ; break ; + case 'w' : if (!uint0_scan(l.arg, &waitingtime)) dieusage() ; break ; + case 'n' : if (!uint0_scan(l.arg, &tries)) dieusage() ; break ; + case 'c' : checkprog = l.arg ; break ; + default : dieusage() ; + } + } + argc -= l.ind ; argv += l.ind ; + if (!argc) dieusage() ; + + if (!tain_from_millisecs(&sleeptto, initialsleep)) dieusage() ; + if (globaltimeout) tain_from_millisecs(&globaldeadline, globaltimeout) ; + else globaldeadline = tain_infinite_relative ; + if (localtimeout) tain_from_millisecs(&localtto, localtimeout) ; + else localtto = tain_infinite_relative ; + if (waitingtime) tain_from_millisecs(&waittto, waitingtime) ; + else waittto = tain_infinite_relative ; + if (!tries) tries = UINT_MAX ; + } + + { + int r = s6_svc_ok(".") ; + if (r < 0) strerr_diefu1sys(111, "sanity-check current service directory") ; + if (!r) strerr_dief1x(100, "s6-supervise not running.") ; + } + if (checkprog) childargv[2] = checkprog ; + else + { + childargv[0] = "./data/check" ; + childargv[1] = 0 ; + } + + if (autodetect) + { + int r = read_uint("notification-fd", &fd) ; + if (r < 0) strerr_diefu2sys(111, "read ", "./notification-fd") ; + if (!r) strerr_dief2x(100, "invalid ", "./notification-fd") ; + } + if (fcntl(fd, F_GETFD) < 0) + strerr_dief2sys(111, "notification-fd", " sanity check failed") ; + + tain_now_g() ; + tain_add_g(&globaldeadline, &globaldeadline) ; + + + /* + Fork, let the parent exec into the daemon, keep working in the child. + + We want the child to die if the parent dies, because no need to keep + polling a dead service. And another child will be spawned next time the + service is relaunched by s6-supervise. + We could keep a pipe from the parent to the child, for death + notification, but that's an additional fd forever open in the parent, + which is not good (we need to be 100% transparent). + So we're using ftrigr to listen to a 'd' event in the servicedir's + fifodir. It's much heavier, but temporary - it doesn't use permanent + resources in the daemon - and we're polling anyway, so the user + doesn't care about being 100% lightweight. + */ + + if (!ftrigr_startf_g(&a, &globaldeadline)) + strerr_diefu1sys(111, "ftrigr_startf") ; + id = ftrigr_subscribe_g(&a, "event", "d", 0, &globaldeadline) ; + if (!id) strerr_diefu1sys(111, "ftrigr_subscribe to event fifodir") ; + switch (df ? doublefork() : fork()) + { + case -1: strerr_diefu1sys(111, df ? "doublefork" : "fork") ; + case 0 : break ; + default: + { + close((int)fd) ; + xpathexec_run(argv[0], argv, envp) ; + } + } + + x[0].fd = selfpipe_init() ; + if (x[0].fd < 0) strerr_diefu1sys(111, "selfpipe_init") ; + if (selfpipe_trap(SIGCHLD) < 0) strerr_diefu1sys(111, "trap SIGCHLD") ; + x[1].fd = ftrigr_fd(&a) ; + + + /* Loop around a sleep and a ./data/check invocation */ + + while (tries == UINT_MAX || tries--) + { + tain_t deadline = globaldeadline ; + tain_t localdeadline ; + pid_t pid ; + + tain_add_g(&localdeadline, &sleeptto) ; + sleeptto = waittto ; + if (tain_less(&localdeadline, &deadline)) deadline = localdeadline ; + for (;;) + { + int r = iopause_g(x+1, 1, &deadline) ; + if (r < 0) strerr_diefu1sys(111, "iopause") ; + if (!r) + { + if (!tain_future(&globaldeadline)) return 3 ; + else break ; + } + if (handle_event(&a, id, 0)) return 2 ; + } + + pid = child_spawn0(childargv[0], childargv, envp) ; + if (!pid) + { + strerr_warnwu2sys("spawn ", childargv[0]) ; + continue ; + } + deadline = globaldeadline ; + tain_add_g(&localdeadline, &localtto) ; + if (tain_less(&localdeadline, &deadline)) deadline = localdeadline ; + for (;;) + { + int r = iopause_g(x, 2, &deadline) ; + if (r < 0) strerr_diefu1sys(111, "iopause") ; + if (!r) + { + if (!tain_future(&globaldeadline)) + { + kill(pid, SIGTERM) ; + return 3 ; + } + else break ; + } + if (x[0].revents & IOPAUSE_READ) + { + int wstat ; + if (handle_signals(pid, &wstat)) + { + if (WIFEXITED(wstat) && !WEXITSTATUS(wstat)) + { + write((int)fd, "\n", 1) ; + return 0 ; + } + else break ; + } + } + if (x[1].revents & IOPAUSE_READ && handle_event(&a, id, pid)) return 2 ; + } + } + + return 1 ; +} |