From ce49b0504c523820be6ba33ac370da19bf75d9eb Mon Sep 17 00:00:00 2001
From: Laurent Bercot
Date: Sun, 18 Jun 2017 15:35:40 +0000
Subject: Usability changes for transition failures in s6-rc change
- Add explicit s6-svc -d call for longrun transition failure
- Add SIGTERM and SIGINT handling: kill all longrun transitions
- Doc update
- Credit Lionel
- Prepare for 0.2.1.0
---
.gitignore | 2 ++
AUTHORS | 1 +
COPYING | 2 +-
INSTALL | 4 ++--
NEWS | 11 +++++++++++
doc/index.html | 6 +++---
doc/s6-rc.html | 40 ++++++++++++++++++++++++++++++++++++++++
doc/upgrade.html | 15 +++++++++++++++
package/info | 2 +-
src/s6-rc/s6-rc.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
10 files changed, 118 insertions(+), 12 deletions(-)
diff --git a/.gitignore b/.gitignore
index bff391a..4f5d226 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
*.a.xyzzy
*.lo
*.so.xyzzy
+config.mak
+src/include/s6-rc/config.h
diff --git a/AUTHORS b/AUTHORS
index 1a57b4a..d3d6b13 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -10,3 +10,4 @@ Thanks to:
Guillermo
Colin Booth
Casper Ti. Vector
+ Lionel Van Bemten
diff --git a/COPYING b/COPYING
index fabed3c..a34920e 100644
--- a/COPYING
+++ b/COPYING
@@ -1,4 +1,4 @@
-Copyright (c) 2015-2016 Laurent Bercot
+Copyright (c) 2015-2017 Laurent Bercot
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
diff --git a/INSTALL b/INSTALL
index 6ff1754..ee9ea24 100644
--- a/INSTALL
+++ b/INSTALL
@@ -6,9 +6,9 @@ Build Instructions
- A POSIX-compliant C development environment
- GNU make version 3.81 or later
- - skalibs version 2.5.1.0 or later: http://skarnet.org/software/skalibs/
+ - skalibs version 2.5.1.1 or later: http://skarnet.org/software/skalibs/
- execline version 2.3.0.1 or later: http://skarnet.org/software/execline/
- - s6 version 2.5.1.0 or later: http://skarnet.org/software/s6/
+ - s6 version 2.6.0.0 or later: http://skarnet.org/software/s6/
This software will run on any operating system that implements
POSIX.1-2008, available at:
diff --git a/NEWS b/NEWS
index 3c9d5b5..0288f64 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,16 @@
Changelog for s6-rc.
+In 0.2.1.0
+----------
+ - Timeouts for oneshots have been increased to 30 seconds.
+ - s6-rc now sends an explicit "s6-svc -d" to a longrun when an
+up transition fails.
+ - s6-rc now kills subprocesses waiting on longrun transitions
+when it receives a SIGINT or a SIGTERM.
+ + Those two changes combined ensure that a user can interrupt
+a transition that is blocked in a "service is not-ready" limbo, and
+bring back the service to a known "down" state.
+
In 0.2.0.1
----------
diff --git a/doc/index.html b/doc/index.html
index 39d0208..aec93ff 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -48,11 +48,11 @@ scripts are also run in a controlled environment.
A POSIX-compliant system with a standard C development environment
GNU make, version 3.81 or later
skalibs version
-2.5.1.0 or later
+2.5.1.1 or later
execline version
2.3.0.1 or later
s6 version
-2.5.1.0 or later
+2.6.0.0 or later
Licensing
@@ -66,7 +66,7 @@ scripts are also run in a controlled environment.
- The current released version of s6-rc is
-0.2.0.1.
+0.2.1.0.
- Alternatively, you can checkout a copy of the
s6-rc
git repository:
diff --git a/doc/s6-rc.html b/doc/s6-rc.html
index 957c108..7052b45 100644
--- a/doc/s6-rc.html
+++ b/doc/s6-rc.html
@@ -252,6 +252,31 @@ s6-rc will wait forever on an "up" transition for the notification
to arrive. The transition will fail if a timeout occurs.
+
+ If a down transition fails, s6-rc does nothing with it. The service
+has already received a SIGTERM, and may be stuck in the process of exiting;
+or it may already have died but is stuck in a bad finish script
+that is not timing out. In any case, it is not a situation that s6-rc
+can recover from; the service is most likely down, but the administrator
+should manually check their process list. And fix their scripts, or
+timeout values, because a down transition failure is always a
+programmer or sysadmin error.
+
+
+
+ If an up transition fails, s6-rc sends an explicit
+s6-svc -d command to
+the longrun. This ensures the service is in a known down state
+when failing to go up, instead of (for instance) being stuck in a not-ready
+limbo state.
+
+
+
+ Note that proper usage of the timeout-kill and timeout-finish
+values in the longrun's definition directory can considerably reduce the
+number of cases where the service is left in an unknown state.
+
+
Transitions are supposed to be idempotent, but it is a general
rule of supervision that run and finish scripts
@@ -310,6 +335,21 @@ each simulated transition will take dryrunthrottle
milliseconds to complete successfully.
+ Signals
+
+
+ s6-rc change reacts to the following signals:
+
+
+
+ - SIGTERM: s6-rc immediately aborts all its longrun transitions with
+a failure, and the impacted longruns will most likely be in a down
+state. Oneshot transitions are untouched, because killing the
+oneshot subprocess would make it impossible to determine what state the
+oneshot service is in.
+ - SIGINT: same as SIGTERM.
+
+
Usage examples
s6-rc change myservicebundle
diff --git a/doc/upgrade.html b/doc/upgrade.html
index 3db2fd0..2e4bd72 100644
--- a/doc/upgrade.html
+++ b/doc/upgrade.html
@@ -18,6 +18,21 @@
What has changed in s6-rc
+ in 0.2.1.0
+
+
+ - skalibs
+dependency bumped to 2.5.1.1.
+ - execline
+dependency bumped to 2.3.0.1.
+ - s6
+dependency bumped to 2.6.0.0.
+ - s6-rc change now ensures that a
+longrun is down when its up transition fails.
+ - s6-rc change now kills subprocesses
+waiting on a longrun transition when it receives a SIGINT or a SIGTERM.
+
+
in 0.2.0.1
diff --git a/package/info b/package/info
index 624261c..c9b5527 100644
--- a/package/info
+++ b/package/info
@@ -1,4 +1,4 @@
package=s6-rc
-version=0.2.0.1
+version=0.2.1.0
category=admin
package_macro_name=S6RC
diff --git a/src/s6-rc/s6-rc.c b/src/s6-rc/s6-rc.c
index a80ae91..fe7af67 100644
--- a/src/s6-rc/s6-rc.c
+++ b/src/s6-rc/s6-rc.c
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -194,6 +195,22 @@ static void success_longrun (unsigned int i, int h)
}
}
+static void failure_longrun (unsigned int i, int h)
+{
+ if (h && !dryrun[0])
+ {
+ size_t svdlen = strlen(db->string + db->services[i].name) ;
+ char fn[livelen + svdlen + 10] ;
+ char const *newargv[5] = { S6_EXTBINPREFIX "s6-svc", "-d", "--", fn, 0 } ;
+ memcpy(fn, live, livelen) ;
+ memcpy(fn + livelen, "/scandir/", 9) ;
+ memcpy(fn + livelen + 9, db->string + db->services[i].name, svdlen) ;
+ fn[livelen + 9 + svdlen] = 0 ;
+ if (!child_spawn0(newargv[0], newargv, (char const *const *)environ))
+ strerr_warnwu2sys("spawn ", newargv[0]) ;
+ }
+}
+
static void broadcast_success (unsigned int, int) ;
static void examine (unsigned int i, int h)
@@ -253,6 +270,7 @@ static void on_success (unsigned int i, int h)
static void on_failure (unsigned int i, int h, int crashed, unsigned int code)
{
+ if (i < db->nlong) failure_longrun(i, h) ;
if (verbosity)
{
char fmt[UINT_FMT] ;
@@ -261,17 +279,24 @@ static void on_failure (unsigned int i, int h, int crashed, unsigned int code)
}
}
+static inline void kill_longruns (void)
+{
+ unsigned int j = npids ;
+ while (j--) if (pidindex[j].i < db->nlong)
+ kill(pidindex[j].pid, SIGTERM) ;
+}
+
static int handle_signals (int h)
{
int ok = 1 ;
for (;;)
{
- switch (selfpipe_read())
+ int sig = selfpipe_read() ;
+ switch (sig)
{
case -1 : strerr_diefu1sys(111, "selfpipe_read()") ;
case 0 : return ok ;
case SIGCHLD :
- {
for (;;)
{
unsigned int j = 0 ;
@@ -296,7 +321,12 @@ static int handle_signals (int h)
}
}
break ;
- }
+ case SIGTERM :
+ case SIGINT :
+ if (verbosity >= 2)
+ strerr_warnw3x("received ", sig_name(sig), ", aborting longrun transitions") ;
+ kill_longruns() ;
+ break ;
default : strerr_dief1x(101, "inconsistent signal state") ;
}
}
@@ -567,8 +597,15 @@ int main (int argc, char const *const *argv)
spfd = selfpipe_init() ;
if (spfd < 0) strerr_diefu1sys(111, "init selfpipe") ;
- if (selfpipe_trap(SIGCHLD) < 0)
- strerr_diefu1sys(111, "trap SIGCHLD") ;
+ {
+ sigset_t set ;
+ sigemptyset(&set) ;
+ sigaddset(&set, SIGCHLD) ;
+ sigaddset(&set, SIGTERM) ;
+ sigaddset(&set, SIGINT) ;
+ if (selfpipe_trapset(&set) < 0)
+ strerr_diefu1sys(111, "trap signals") ;
+ }
if (prune)
{
--
cgit v1.2.3