summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaurent Bercot <ska-skaware@skarnet.org>2017-06-18 15:35:40 +0000
committerLaurent Bercot <ska-skaware@skarnet.org>2017-06-18 15:35:40 +0000
commitce49b0504c523820be6ba33ac370da19bf75d9eb (patch)
tree0f6723002d704b61d5dca30bad0d3d520632ae59
parent8b50d9656a88747ec02767cd3370ff29b82ccf6f (diff)
downloads6-rc-ce49b0504c523820be6ba33ac370da19bf75d9eb.tar.xz
Usability changes for transition failures in s6-rc change
- Add explicit s6-svc -d call for longrun transition failure - Add SIGTERM and SIGINT handling: kill all longrun transitions - Doc update - Credit Lionel - Prepare for 0.2.1.0
-rw-r--r--.gitignore2
-rw-r--r--AUTHORS1
-rw-r--r--COPYING2
-rw-r--r--INSTALL4
-rw-r--r--NEWS11
-rw-r--r--doc/index.html6
-rw-r--r--doc/s6-rc.html40
-rw-r--r--doc/upgrade.html15
-rw-r--r--package/info2
-rw-r--r--src/s6-rc/s6-rc.c47
10 files changed, 118 insertions, 12 deletions
diff --git a/.gitignore b/.gitignore
index bff391a..4f5d226 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
*.a.xyzzy
*.lo
*.so.xyzzy
+config.mak
+src/include/s6-rc/config.h
diff --git a/AUTHORS b/AUTHORS
index 1a57b4a..d3d6b13 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -10,3 +10,4 @@ Thanks to:
Guillermo <gdiazhartusch@gmail.com>
Colin Booth <cathexis@gmail.com>
Casper Ti. Vector <caspervector@gmail.com>
+ Lionel Van Bemten <lionel.van_bemten@nokia.com>
diff --git a/COPYING b/COPYING
index fabed3c..a34920e 100644
--- a/COPYING
+++ b/COPYING
@@ -1,4 +1,4 @@
-Copyright (c) 2015-2016 Laurent Bercot <ska-skaware@skarnet.org>
+Copyright (c) 2015-2017 Laurent Bercot <ska-skaware@skarnet.org>
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
diff --git a/INSTALL b/INSTALL
index 6ff1754..ee9ea24 100644
--- a/INSTALL
+++ b/INSTALL
@@ -6,9 +6,9 @@ Build Instructions
- A POSIX-compliant C development environment
- GNU make version 3.81 or later
- - skalibs version 2.5.1.0 or later: http://skarnet.org/software/skalibs/
+ - skalibs version 2.5.1.1 or later: http://skarnet.org/software/skalibs/
- execline version 2.3.0.1 or later: http://skarnet.org/software/execline/
- - s6 version 2.5.1.0 or later: http://skarnet.org/software/s6/
+ - s6 version 2.6.0.0 or later: http://skarnet.org/software/s6/
This software will run on any operating system that implements
POSIX.1-2008, available at:
diff --git a/NEWS b/NEWS
index 3c9d5b5..0288f64 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,16 @@
Changelog for s6-rc.
+In 0.2.1.0
+----------
+ - Timeouts for oneshots have been increased to 30 seconds.
+ - s6-rc now sends an explicit "s6-svc -d" to a longrun when an
+up transition fails.
+ - s6-rc now kills subprocesses waiting on longrun transitions
+when it receives a SIGINT or a SIGTERM.
+ + Those two changes combined ensure that a user can interrupt
+a transition that is blocked in a "service is not-ready" limbo, and
+bring back the service to a known "down" state.
+
In 0.2.0.1
----------
diff --git a/doc/index.html b/doc/index.html
index 39d0208..aec93ff 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -48,11 +48,11 @@ scripts are also run in a controlled environment.
<li> A POSIX-compliant system with a standard C development environment </li>
<li> GNU make, version 3.81 or later </li>
<li> <a href="//skarnet.org/software/skalibs/">skalibs</a> version
-2.5.1.0 or later </li>
+2.5.1.1 or later </li>
<li> <a href="//skarnet.org/software/execline/">execline</a> version
2.3.0.1 or later </li>
<li> <a href="//skarnet.org/software/s6/">s6</a> version
-2.5.1.0 or later </li>
+2.6.0.0 or later </li>
</ul>
<h3> Licensing </h3>
@@ -66,7 +66,7 @@ scripts are also run in a controlled environment.
<ul>
<li> The current released version of s6-rc is
-<a href="s6-rc-0.2.0.1.tar.gz">0.2.0.1</a>. </li>
+<a href="s6-rc-0.2.1.0.tar.gz">0.2.1.0</a>. </li>
<li> Alternatively, you can checkout a copy of the
<a href="//git.skarnet.org/cgi-bin/cgit.cgi/s6-rc/">s6-rc
git repository</a>:
diff --git a/doc/s6-rc.html b/doc/s6-rc.html
index 957c108..7052b45 100644
--- a/doc/s6-rc.html
+++ b/doc/s6-rc.html
@@ -253,6 +253,31 @@ to arrive. The transition will fail if a timeout occurs.
</p>
<p>
+ If a <em>down</em> transition fails, s6-rc does nothing with it. The service
+has already received a SIGTERM, and may be stuck in the process of exiting;
+or it may already have died but is stuck in a bad <tt>finish</tt> script
+that is not timing out. In any case, it is not a situation that s6-rc
+can recover from; the service is most likely down, but the administrator
+should manually check their process list. And fix their scripts, or
+timeout values, because a <em>down</em> transition failure is always a
+programmer or sysadmin error.
+</p>
+
+<p>
+ If an <em>up</em> transition fails, s6-rc sends an explicit
+<a href="//skarnet.org/software/s6/s6-svc.html">s6-svc -d</a> command to
+the longrun. This ensures the service is in a known <em>down</em> state
+when failing to go up, instead of (for instance) being stuck in a not-ready
+limbo state.
+</p>
+
+<p>
+ Note that proper usage of the <tt>timeout-kill</tt> and <tt>timeout-finish</tt>
+values in the longrun's definition directory can considerably reduce the
+number of cases where the service is left in an unknown state.
+</p>
+
+<p>
Transitions are supposed to be idempotent, but it is a general
rule of supervision that <tt>run</tt> and <tt>finish</tt> scripts
must be idempotent, so a properly designed service directory
@@ -310,6 +335,21 @@ each simulated transition will take <em>dryrunthrottle</em>
milliseconds to complete successfully.
</p>
+<h2> Signals </h2>
+
+<p>
+ <tt>s6-rc change</tt> reacts to the following signals:
+</p>
+
+<ul>
+ <li> SIGTERM: s6-rc immediately aborts all its longrun transitions with
+a failure, and the impacted longruns will most likely be in a <em>down</em>
+state. Oneshot transitions are untouched, because killing the
+oneshot subprocess would make it impossible to determine what state the
+oneshot service is in. </li>
+ <li> SIGINT: same as SIGTERM. </li>
+</ul>
+
<h2> Usage examples </h2>
<pre> s6-rc change <em>myservicebundle</em> </pre>
diff --git a/doc/upgrade.html b/doc/upgrade.html
index 3db2fd0..2e4bd72 100644
--- a/doc/upgrade.html
+++ b/doc/upgrade.html
@@ -18,6 +18,21 @@
<h1> What has changed in s6-rc </h1>
+<h2> in 0.2.1.0 </h2>
+
+<ul>
+ <li> <a href="//skarnet.org/software/skalibs/">skalibs</a>
+dependency bumped to 2.5.1.1. </li>
+ <li> <a href="//skarnet.org/software/execline/">execline</a>
+dependency bumped to 2.3.0.1. </li>
+ <li> <a href="//skarnet.org/software/s6/">s6</a>
+dependency bumped to 2.6.0.0. </li>
+ <li> <a href="s6-rc.html">s6-rc change</a> now ensures that a
+longrun is down when its up transition fails. </li>
+ <li> <a href="s6-rc.html">s6-rc change</a> now kills subprocesses
+waiting on a longrun transition when it receives a SIGINT or a SIGTERM. </li>
+</ul>
+
<h2> in 0.2.0.1 </h2>
<ul>
diff --git a/package/info b/package/info
index 624261c..c9b5527 100644
--- a/package/info
+++ b/package/info
@@ -1,4 +1,4 @@
package=s6-rc
-version=0.2.0.1
+version=0.2.1.0
category=admin
package_macro_name=S6RC
diff --git a/src/s6-rc/s6-rc.c b/src/s6-rc/s6-rc.c
index a80ae91..fe7af67 100644
--- a/src/s6-rc/s6-rc.c
+++ b/src/s6-rc/s6-rc.c
@@ -13,6 +13,7 @@
#include <skalibs/strerr2.h>
#include <skalibs/tai.h>
#include <skalibs/environ.h>
+#include <skalibs/sig.h>
#include <skalibs/djbunix.h>
#include <skalibs/selfpipe.h>
#include <skalibs/iopause.h>
@@ -194,6 +195,22 @@ static void success_longrun (unsigned int i, int h)
}
}
+static void failure_longrun (unsigned int i, int h)
+{
+ if (h && !dryrun[0])
+ {
+ size_t svdlen = strlen(db->string + db->services[i].name) ;
+ char fn[livelen + svdlen + 10] ;
+ char const *newargv[5] = { S6_EXTBINPREFIX "s6-svc", "-d", "--", fn, 0 } ;
+ memcpy(fn, live, livelen) ;
+ memcpy(fn + livelen, "/scandir/", 9) ;
+ memcpy(fn + livelen + 9, db->string + db->services[i].name, svdlen) ;
+ fn[livelen + 9 + svdlen] = 0 ;
+ if (!child_spawn0(newargv[0], newargv, (char const *const *)environ))
+ strerr_warnwu2sys("spawn ", newargv[0]) ;
+ }
+}
+
static void broadcast_success (unsigned int, int) ;
static void examine (unsigned int i, int h)
@@ -253,6 +270,7 @@ static void on_success (unsigned int i, int h)
static void on_failure (unsigned int i, int h, int crashed, unsigned int code)
{
+ if (i < db->nlong) failure_longrun(i, h) ;
if (verbosity)
{
char fmt[UINT_FMT] ;
@@ -261,17 +279,24 @@ static void on_failure (unsigned int i, int h, int crashed, unsigned int code)
}
}
+static inline void kill_longruns (void)
+{
+ unsigned int j = npids ;
+ while (j--) if (pidindex[j].i < db->nlong)
+ kill(pidindex[j].pid, SIGTERM) ;
+}
+
static int handle_signals (int h)
{
int ok = 1 ;
for (;;)
{
- switch (selfpipe_read())
+ int sig = selfpipe_read() ;
+ switch (sig)
{
case -1 : strerr_diefu1sys(111, "selfpipe_read()") ;
case 0 : return ok ;
case SIGCHLD :
- {
for (;;)
{
unsigned int j = 0 ;
@@ -296,7 +321,12 @@ static int handle_signals (int h)
}
}
break ;
- }
+ case SIGTERM :
+ case SIGINT :
+ if (verbosity >= 2)
+ strerr_warnw3x("received ", sig_name(sig), ", aborting longrun transitions") ;
+ kill_longruns() ;
+ break ;
default : strerr_dief1x(101, "inconsistent signal state") ;
}
}
@@ -567,8 +597,15 @@ int main (int argc, char const *const *argv)
spfd = selfpipe_init() ;
if (spfd < 0) strerr_diefu1sys(111, "init selfpipe") ;
- if (selfpipe_trap(SIGCHLD) < 0)
- strerr_diefu1sys(111, "trap SIGCHLD") ;
+ {
+ sigset_t set ;
+ sigemptyset(&set) ;
+ sigaddset(&set, SIGCHLD) ;
+ sigaddset(&set, SIGTERM) ;
+ sigaddset(&set, SIGINT) ;
+ if (selfpipe_trapset(&set) < 0)
+ strerr_diefu1sys(111, "trap signals") ;
+ }
if (prune)
{