diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | COPYING | 2 | ||||
-rw-r--r-- | INSTALL | 4 | ||||
-rw-r--r-- | NEWS | 11 | ||||
-rw-r--r-- | doc/index.html | 6 | ||||
-rw-r--r-- | doc/s6-rc.html | 40 | ||||
-rw-r--r-- | doc/upgrade.html | 15 | ||||
-rw-r--r-- | package/info | 2 | ||||
-rw-r--r-- | src/s6-rc/s6-rc.c | 47 |
10 files changed, 118 insertions, 12 deletions
@@ -2,3 +2,5 @@ *.a.xyzzy *.lo *.so.xyzzy +config.mak +src/include/s6-rc/config.h @@ -10,3 +10,4 @@ Thanks to: Guillermo <gdiazhartusch@gmail.com> Colin Booth <cathexis@gmail.com> Casper Ti. Vector <caspervector@gmail.com> + Lionel Van Bemten <lionel.van_bemten@nokia.com> @@ -1,4 +1,4 @@ -Copyright (c) 2015-2016 Laurent Bercot <ska-skaware@skarnet.org> +Copyright (c) 2015-2017 Laurent Bercot <ska-skaware@skarnet.org> Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -6,9 +6,9 @@ Build Instructions - A POSIX-compliant C development environment - GNU make version 3.81 or later - - skalibs version 2.5.1.0 or later: http://skarnet.org/software/skalibs/ + - skalibs version 2.5.1.1 or later: http://skarnet.org/software/skalibs/ - execline version 2.3.0.1 or later: http://skarnet.org/software/execline/ - - s6 version 2.5.1.0 or later: http://skarnet.org/software/s6/ + - s6 version 2.6.0.0 or later: http://skarnet.org/software/s6/ This software will run on any operating system that implements POSIX.1-2008, available at: @@ -1,5 +1,16 @@ Changelog for s6-rc. +In 0.2.1.0 +---------- + - Timeouts for oneshots have been increased to 30 seconds. + - s6-rc now sends an explicit "s6-svc -d" to a longrun when an +up transition fails. + - s6-rc now kills subprocesses waiting on longrun transitions +when it receives a SIGINT or a SIGTERM. + + Those two changes combined ensure that a user can interrupt +a transition that is blocked in a "service is not-ready" limbo, and +bring back the service to a known "down" state. + In 0.2.0.1 ---------- diff --git a/doc/index.html b/doc/index.html index 39d0208..aec93ff 100644 --- a/doc/index.html +++ b/doc/index.html @@ -48,11 +48,11 @@ scripts are also run in a controlled environment. <li> A POSIX-compliant system with a standard C development environment </li> <li> GNU make, version 3.81 or later </li> <li> <a href="//skarnet.org/software/skalibs/">skalibs</a> version -2.5.1.0 or later </li> +2.5.1.1 or later </li> <li> <a href="//skarnet.org/software/execline/">execline</a> version 2.3.0.1 or later </li> <li> <a href="//skarnet.org/software/s6/">s6</a> version -2.5.1.0 or later </li> +2.6.0.0 or later </li> </ul> <h3> Licensing </h3> @@ -66,7 +66,7 @@ scripts are also run in a controlled environment. <ul> <li> The current released version of s6-rc is -<a href="s6-rc-0.2.0.1.tar.gz">0.2.0.1</a>. </li> +<a href="s6-rc-0.2.1.0.tar.gz">0.2.1.0</a>. </li> <li> Alternatively, you can checkout a copy of the <a href="//git.skarnet.org/cgi-bin/cgit.cgi/s6-rc/">s6-rc git repository</a>: diff --git a/doc/s6-rc.html b/doc/s6-rc.html index 957c108..7052b45 100644 --- a/doc/s6-rc.html +++ b/doc/s6-rc.html @@ -253,6 +253,31 @@ to arrive. The transition will fail if a timeout occurs. </p> <p> + If a <em>down</em> transition fails, s6-rc does nothing with it. The service +has already received a SIGTERM, and may be stuck in the process of exiting; +or it may already have died but is stuck in a bad <tt>finish</tt> script +that is not timing out. In any case, it is not a situation that s6-rc +can recover from; the service is most likely down, but the administrator +should manually check their process list. And fix their scripts, or +timeout values, because a <em>down</em> transition failure is always a +programmer or sysadmin error. +</p> + +<p> + If an <em>up</em> transition fails, s6-rc sends an explicit +<a href="//skarnet.org/software/s6/s6-svc.html">s6-svc -d</a> command to +the longrun. This ensures the service is in a known <em>down</em> state +when failing to go up, instead of (for instance) being stuck in a not-ready +limbo state. +</p> + +<p> + Note that proper usage of the <tt>timeout-kill</tt> and <tt>timeout-finish</tt> +values in the longrun's definition directory can considerably reduce the +number of cases where the service is left in an unknown state. +</p> + +<p> Transitions are supposed to be idempotent, but it is a general rule of supervision that <tt>run</tt> and <tt>finish</tt> scripts must be idempotent, so a properly designed service directory @@ -310,6 +335,21 @@ each simulated transition will take <em>dryrunthrottle</em> milliseconds to complete successfully. </p> +<h2> Signals </h2> + +<p> + <tt>s6-rc change</tt> reacts to the following signals: +</p> + +<ul> + <li> SIGTERM: s6-rc immediately aborts all its longrun transitions with +a failure, and the impacted longruns will most likely be in a <em>down</em> +state. Oneshot transitions are untouched, because killing the +oneshot subprocess would make it impossible to determine what state the +oneshot service is in. </li> + <li> SIGINT: same as SIGTERM. </li> +</ul> + <h2> Usage examples </h2> <pre> s6-rc change <em>myservicebundle</em> </pre> diff --git a/doc/upgrade.html b/doc/upgrade.html index 3db2fd0..2e4bd72 100644 --- a/doc/upgrade.html +++ b/doc/upgrade.html @@ -18,6 +18,21 @@ <h1> What has changed in s6-rc </h1> +<h2> in 0.2.1.0 </h2> + +<ul> + <li> <a href="//skarnet.org/software/skalibs/">skalibs</a> +dependency bumped to 2.5.1.1. </li> + <li> <a href="//skarnet.org/software/execline/">execline</a> +dependency bumped to 2.3.0.1. </li> + <li> <a href="//skarnet.org/software/s6/">s6</a> +dependency bumped to 2.6.0.0. </li> + <li> <a href="s6-rc.html">s6-rc change</a> now ensures that a +longrun is down when its up transition fails. </li> + <li> <a href="s6-rc.html">s6-rc change</a> now kills subprocesses +waiting on a longrun transition when it receives a SIGINT or a SIGTERM. </li> +</ul> + <h2> in 0.2.0.1 </h2> <ul> diff --git a/package/info b/package/info index 624261c..c9b5527 100644 --- a/package/info +++ b/package/info @@ -1,4 +1,4 @@ package=s6-rc -version=0.2.0.1 +version=0.2.1.0 category=admin package_macro_name=S6RC diff --git a/src/s6-rc/s6-rc.c b/src/s6-rc/s6-rc.c index a80ae91..fe7af67 100644 --- a/src/s6-rc/s6-rc.c +++ b/src/s6-rc/s6-rc.c @@ -13,6 +13,7 @@ #include <skalibs/strerr2.h> #include <skalibs/tai.h> #include <skalibs/environ.h> +#include <skalibs/sig.h> #include <skalibs/djbunix.h> #include <skalibs/selfpipe.h> #include <skalibs/iopause.h> @@ -194,6 +195,22 @@ static void success_longrun (unsigned int i, int h) } } +static void failure_longrun (unsigned int i, int h) +{ + if (h && !dryrun[0]) + { + size_t svdlen = strlen(db->string + db->services[i].name) ; + char fn[livelen + svdlen + 10] ; + char const *newargv[5] = { S6_EXTBINPREFIX "s6-svc", "-d", "--", fn, 0 } ; + memcpy(fn, live, livelen) ; + memcpy(fn + livelen, "/scandir/", 9) ; + memcpy(fn + livelen + 9, db->string + db->services[i].name, svdlen) ; + fn[livelen + 9 + svdlen] = 0 ; + if (!child_spawn0(newargv[0], newargv, (char const *const *)environ)) + strerr_warnwu2sys("spawn ", newargv[0]) ; + } +} + static void broadcast_success (unsigned int, int) ; static void examine (unsigned int i, int h) @@ -253,6 +270,7 @@ static void on_success (unsigned int i, int h) static void on_failure (unsigned int i, int h, int crashed, unsigned int code) { + if (i < db->nlong) failure_longrun(i, h) ; if (verbosity) { char fmt[UINT_FMT] ; @@ -261,17 +279,24 @@ static void on_failure (unsigned int i, int h, int crashed, unsigned int code) } } +static inline void kill_longruns (void) +{ + unsigned int j = npids ; + while (j--) if (pidindex[j].i < db->nlong) + kill(pidindex[j].pid, SIGTERM) ; +} + static int handle_signals (int h) { int ok = 1 ; for (;;) { - switch (selfpipe_read()) + int sig = selfpipe_read() ; + switch (sig) { case -1 : strerr_diefu1sys(111, "selfpipe_read()") ; case 0 : return ok ; case SIGCHLD : - { for (;;) { unsigned int j = 0 ; @@ -296,7 +321,12 @@ static int handle_signals (int h) } } break ; - } + case SIGTERM : + case SIGINT : + if (verbosity >= 2) + strerr_warnw3x("received ", sig_name(sig), ", aborting longrun transitions") ; + kill_longruns() ; + break ; default : strerr_dief1x(101, "inconsistent signal state") ; } } @@ -567,8 +597,15 @@ int main (int argc, char const *const *argv) spfd = selfpipe_init() ; if (spfd < 0) strerr_diefu1sys(111, "init selfpipe") ; - if (selfpipe_trap(SIGCHLD) < 0) - strerr_diefu1sys(111, "trap SIGCHLD") ; + { + sigset_t set ; + sigemptyset(&set) ; + sigaddset(&set, SIGCHLD) ; + sigaddset(&set, SIGTERM) ; + sigaddset(&set, SIGINT) ; + if (selfpipe_trapset(&set) < 0) + strerr_diefu1sys(111, "trap signals") ; + } if (prune) { |