From ce49b0504c523820be6ba33ac370da19bf75d9eb Mon Sep 17 00:00:00 2001 From: Laurent Bercot Date: Sun, 18 Jun 2017 15:35:40 +0000 Subject: Usability changes for transition failures in s6-rc change - Add explicit s6-svc -d call for longrun transition failure - Add SIGTERM and SIGINT handling: kill all longrun transitions - Doc update - Credit Lionel - Prepare for 0.2.1.0 --- .gitignore | 2 ++ AUTHORS | 1 + COPYING | 2 +- INSTALL | 4 ++-- NEWS | 11 +++++++++++ doc/index.html | 6 +++--- doc/s6-rc.html | 40 ++++++++++++++++++++++++++++++++++++++++ doc/upgrade.html | 15 +++++++++++++++ package/info | 2 +- src/s6-rc/s6-rc.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- 10 files changed, 118 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index bff391a..4f5d226 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ *.a.xyzzy *.lo *.so.xyzzy +config.mak +src/include/s6-rc/config.h diff --git a/AUTHORS b/AUTHORS index 1a57b4a..d3d6b13 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,3 +10,4 @@ Thanks to: Guillermo Colin Booth Casper Ti. Vector + Lionel Van Bemten diff --git a/COPYING b/COPYING index fabed3c..a34920e 100644 --- a/COPYING +++ b/COPYING @@ -1,4 +1,4 @@ -Copyright (c) 2015-2016 Laurent Bercot +Copyright (c) 2015-2017 Laurent Bercot Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above diff --git a/INSTALL b/INSTALL index 6ff1754..ee9ea24 100644 --- a/INSTALL +++ b/INSTALL @@ -6,9 +6,9 @@ Build Instructions - A POSIX-compliant C development environment - GNU make version 3.81 or later - - skalibs version 2.5.1.0 or later: http://skarnet.org/software/skalibs/ + - skalibs version 2.5.1.1 or later: http://skarnet.org/software/skalibs/ - execline version 2.3.0.1 or later: http://skarnet.org/software/execline/ - - s6 version 2.5.1.0 or later: http://skarnet.org/software/s6/ + - s6 version 2.6.0.0 or later: http://skarnet.org/software/s6/ This software will run on any operating system that implements POSIX.1-2008, available at: diff --git a/NEWS b/NEWS index 3c9d5b5..0288f64 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,16 @@ Changelog for s6-rc. +In 0.2.1.0 +---------- + - Timeouts for oneshots have been increased to 30 seconds. + - s6-rc now sends an explicit "s6-svc -d" to a longrun when an +up transition fails. + - s6-rc now kills subprocesses waiting on longrun transitions +when it receives a SIGINT or a SIGTERM. + + Those two changes combined ensure that a user can interrupt +a transition that is blocked in a "service is not-ready" limbo, and +bring back the service to a known "down" state. + In 0.2.0.1 ---------- diff --git a/doc/index.html b/doc/index.html index 39d0208..aec93ff 100644 --- a/doc/index.html +++ b/doc/index.html @@ -48,11 +48,11 @@ scripts are also run in a controlled environment.
  • A POSIX-compliant system with a standard C development environment
  • GNU make, version 3.81 or later
  • skalibs version -2.5.1.0 or later
  • +2.5.1.1 or later
  • execline version 2.3.0.1 or later
  • s6 version -2.5.1.0 or later
  • +2.6.0.0 or later

    Licensing

    @@ -66,7 +66,7 @@ scripts are also run in a controlled environment.
    • The current released version of s6-rc is -0.2.0.1.
    • +0.2.1.0.
    • Alternatively, you can checkout a copy of the s6-rc git repository: diff --git a/doc/s6-rc.html b/doc/s6-rc.html index 957c108..7052b45 100644 --- a/doc/s6-rc.html +++ b/doc/s6-rc.html @@ -252,6 +252,31 @@ s6-rc will wait forever on an "up" transition for the notification to arrive. The transition will fail if a timeout occurs.

      +

      + If a down transition fails, s6-rc does nothing with it. The service +has already received a SIGTERM, and may be stuck in the process of exiting; +or it may already have died but is stuck in a bad finish script +that is not timing out. In any case, it is not a situation that s6-rc +can recover from; the service is most likely down, but the administrator +should manually check their process list. And fix their scripts, or +timeout values, because a down transition failure is always a +programmer or sysadmin error. +

      + +

      + If an up transition fails, s6-rc sends an explicit +s6-svc -d command to +the longrun. This ensures the service is in a known down state +when failing to go up, instead of (for instance) being stuck in a not-ready +limbo state. +

      + +

      + Note that proper usage of the timeout-kill and timeout-finish +values in the longrun's definition directory can considerably reduce the +number of cases where the service is left in an unknown state. +

      +

      Transitions are supposed to be idempotent, but it is a general rule of supervision that run and finish scripts @@ -310,6 +335,21 @@ each simulated transition will take dryrunthrottle milliseconds to complete successfully.

      +

      Signals

      + +

      + s6-rc change reacts to the following signals: +

      + +
        +
      • SIGTERM: s6-rc immediately aborts all its longrun transitions with +a failure, and the impacted longruns will most likely be in a down +state. Oneshot transitions are untouched, because killing the +oneshot subprocess would make it impossible to determine what state the +oneshot service is in.
      • +
      • SIGINT: same as SIGTERM.
      • +
      +

      Usage examples

       s6-rc change myservicebundle 
      diff --git a/doc/upgrade.html b/doc/upgrade.html index 3db2fd0..2e4bd72 100644 --- a/doc/upgrade.html +++ b/doc/upgrade.html @@ -18,6 +18,21 @@

      What has changed in s6-rc

      +

      in 0.2.1.0

      + +
        +
      • skalibs +dependency bumped to 2.5.1.1.
      • +
      • execline +dependency bumped to 2.3.0.1.
      • +
      • s6 +dependency bumped to 2.6.0.0.
      • +
      • s6-rc change now ensures that a +longrun is down when its up transition fails.
      • +
      • s6-rc change now kills subprocesses +waiting on a longrun transition when it receives a SIGINT or a SIGTERM.
      • +
      +

      in 0.2.0.1

        diff --git a/package/info b/package/info index 624261c..c9b5527 100644 --- a/package/info +++ b/package/info @@ -1,4 +1,4 @@ package=s6-rc -version=0.2.0.1 +version=0.2.1.0 category=admin package_macro_name=S6RC diff --git a/src/s6-rc/s6-rc.c b/src/s6-rc/s6-rc.c index a80ae91..fe7af67 100644 --- a/src/s6-rc/s6-rc.c +++ b/src/s6-rc/s6-rc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -194,6 +195,22 @@ static void success_longrun (unsigned int i, int h) } } +static void failure_longrun (unsigned int i, int h) +{ + if (h && !dryrun[0]) + { + size_t svdlen = strlen(db->string + db->services[i].name) ; + char fn[livelen + svdlen + 10] ; + char const *newargv[5] = { S6_EXTBINPREFIX "s6-svc", "-d", "--", fn, 0 } ; + memcpy(fn, live, livelen) ; + memcpy(fn + livelen, "/scandir/", 9) ; + memcpy(fn + livelen + 9, db->string + db->services[i].name, svdlen) ; + fn[livelen + 9 + svdlen] = 0 ; + if (!child_spawn0(newargv[0], newargv, (char const *const *)environ)) + strerr_warnwu2sys("spawn ", newargv[0]) ; + } +} + static void broadcast_success (unsigned int, int) ; static void examine (unsigned int i, int h) @@ -253,6 +270,7 @@ static void on_success (unsigned int i, int h) static void on_failure (unsigned int i, int h, int crashed, unsigned int code) { + if (i < db->nlong) failure_longrun(i, h) ; if (verbosity) { char fmt[UINT_FMT] ; @@ -261,17 +279,24 @@ static void on_failure (unsigned int i, int h, int crashed, unsigned int code) } } +static inline void kill_longruns (void) +{ + unsigned int j = npids ; + while (j--) if (pidindex[j].i < db->nlong) + kill(pidindex[j].pid, SIGTERM) ; +} + static int handle_signals (int h) { int ok = 1 ; for (;;) { - switch (selfpipe_read()) + int sig = selfpipe_read() ; + switch (sig) { case -1 : strerr_diefu1sys(111, "selfpipe_read()") ; case 0 : return ok ; case SIGCHLD : - { for (;;) { unsigned int j = 0 ; @@ -296,7 +321,12 @@ static int handle_signals (int h) } } break ; - } + case SIGTERM : + case SIGINT : + if (verbosity >= 2) + strerr_warnw3x("received ", sig_name(sig), ", aborting longrun transitions") ; + kill_longruns() ; + break ; default : strerr_dief1x(101, "inconsistent signal state") ; } } @@ -567,8 +597,15 @@ int main (int argc, char const *const *argv) spfd = selfpipe_init() ; if (spfd < 0) strerr_diefu1sys(111, "init selfpipe") ; - if (selfpipe_trap(SIGCHLD) < 0) - strerr_diefu1sys(111, "trap SIGCHLD") ; + { + sigset_t set ; + sigemptyset(&set) ; + sigaddset(&set, SIGCHLD) ; + sigaddset(&set, SIGTERM) ; + sigaddset(&set, SIGINT) ; + if (selfpipe_trapset(&set) < 0) + strerr_diefu1sys(111, "trap signals") ; + } if (prune) { -- cgit v1.2.3