From 75c4223a7f5a8a073ba0d898ef9d841fdaef2f63 Mon Sep 17 00:00:00 2001
From: Laurent Bercot
Date: Wed, 21 Mar 2018 18:00:10 +0000
Subject: Add s6-svdt-clear, s6-permafailon
---
.gitignore | 3 +
NEWS | 3 +-
doc/index.html | 2 +
doc/s6-permafailon.html | 98 ++++++++++++++++++++++++++
doc/s6-svdt-clear.html | 58 ++++++++++++++++
doc/s6-svdt.html | 6 +-
doc/s6-svstat.html | 5 +-
doc/upgrade.html | 6 +-
package/deps.mak | 6 ++
package/modes | 2 +
package/targets.mak | 2 +
src/supervision/deps-exe/s6-permafailon | 2 +
src/supervision/deps-exe/s6-svdt-clear | 2 +
src/supervision/s6-permafailon.c | 118 ++++++++++++++++++++++++++++++++
src/supervision/s6-svdt-clear.c | 15 ++++
15 files changed, 322 insertions(+), 6 deletions(-)
create mode 100644 doc/s6-permafailon.html
create mode 100644 doc/s6-svdt-clear.html
create mode 100644 src/supervision/deps-exe/s6-permafailon
create mode 100644 src/supervision/deps-exe/s6-svdt-clear
create mode 100644 src/supervision/s6-permafailon.c
create mode 100644 src/supervision/s6-svdt-clear.c
diff --git a/.gitignore b/.gitignore
index 18a5950..98f7368 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,9 @@
/s6-svlisten1
/s6-svlisten
/s6-notifyoncheck
+/s6-svdt
+/s6-svdt-clear
+/s6-permafailon
/s6-envdir
/s6-envuidgid
/s6-fghack
diff --git a/NEWS b/NEWS
index 0e1aa75..8c3c5af 100644
--- a/NEWS
+++ b/NEWS
@@ -5,7 +5,8 @@ In 2.7.1.0
- Bugfixes.
- s6-svwait et al. now exit 102 instead of hanging if s6-supervise dies.
- - New command: s6-svdt
+ - New commands: s6-svdt, s6-svdt-clear, s6-permafailon
+ - s6-tai64nlocal can now print GMT times with the -g option.
In 2.7.0.0
diff --git a/doc/index.html b/doc/index.html
index 733e3ee..bae4da6 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -157,6 +157,8 @@ a user interface to control those processes and monitor service states.
The s6-svlisten program
The s6-notifyoncheck program
The s6-svdt program
+The s6-svdt-clear program
+The s6-permafailon program
Daemontools-like utilities
diff --git a/doc/s6-permafailon.html b/doc/s6-permafailon.html
new file mode 100644
index 0000000..f749d5f
--- /dev/null
+++ b/doc/s6-permafailon.html
@@ -0,0 +1,98 @@
+
+
+
+
+
+ s6: the s6-permafailon program
+
+
+
+
+
+
+
+s6
+Software
+skarnet.org
+
+
+ The s6-permafailon program
+
+
+s6-permafailon is a program that is meant to be used
+in the ./finish script of a
+service directory supervised by
+s6-supervise. When used, it
+reads and analyses the death tally of a service (i.e. the recent
+process death events that happened), and if the death tally
+matches a given pattern, it causes permanent failure
+of the service, i.e. it tells the supervisor not to try and
+restart it.
+
+
+ Interface
+
+
+ s6-permafailon secs deathcount events prog...
+
+
+
+ - s6-permafailon must have the service directory of the
+tested service as its current directory. This is the default if it is
+called from the finish script of the service.
+ - It reads the death tally of the service, which is
+maintained by s6-supervise.
+ - If the supervised process has died at least deathcount
+times in the last secs seconds with a cause listed in
+events, then s6-permafailon exits 125.
+ - Else s6-permafailon execs into prog....
+
+
+
+ events is a comma-separated list of events. An event can be
+one of the following:
+
+
+
+ - An exit code, which is an integer between 0 and 255. Example: 1
+ - An exit code interval, which is two exit codes separated by a dash. Example: 1-50
+ - A signal name, or a signal number preceded by "SIG". Examples: SIGTERM, sigabrt, sig11
+
+
+ Usage
+
+
+ - s6-supervise detects when the ./finish
+script of its service exits 125, and stops respawning the service. So, if the
+./finish script is a chain-loading command line starting with a
+s6-permafailon invocation (or containing such an invocation), when
+s6-permafailon exits 125, then the ./finish script also
+exits 125 (because it is the same process), and the service is then marked as
+failing permanently.
+ - The ./finish script is naturally a chain-loading
+command line if it is written in the
+execline language. It
+can also be made into a chain-loading command line from a shell script by using
+exec s6-permafailon secs deathcount events rest-of-chainloading-cmdline...
+ - Multiple invocations of s6-permafailon can be chained, in order
+to test several death patterns.
+ - If a permanent failure is triggered and secs is high, it is
+possible that when the administrator manually launches the service again,
+the next death triggers a permanent failure again. If this is not wanted,
+the administrator should clear the death tally with the
+s6-svdt-clear command.
+ - The current death tally can be viewed via the s6-svdt
+command.
+
+
+ Example
+
+
+ s6-permafailon 60 5 1,101-103,SIGSEGV,SIBBUS prog...
+will exit 125 if the service has died 5 times in the last 60 seconds with
+an exit code of 1, 101, 102 or 103, a SIGSEGV or a SIGBUS. Else it will
+chainload into the prog... command line.
+
+
+
+
diff --git a/doc/s6-svdt-clear.html b/doc/s6-svdt-clear.html
new file mode 100644
index 0000000..e69a1ae
--- /dev/null
+++ b/doc/s6-svdt-clear.html
@@ -0,0 +1,58 @@
+
+
+
+
+
+ s6: the s6-svdt-clear program
+
+
+
+
+
+
+
+s6
+Software
+skarnet.org
+
+
+ The s6-svdt-clear program
+
+
+s6-svdt-clear clears the recorded death tally of a service.
+
+
+ Interface
+
+
+ s6-svdt-clear servicedir
+
+
+
+ s6-svdt-clear clears the recorded death tally of the service being
+currently supervised at the servicedir
+service directory.
+
+
+
+ - 0: success
+ - 100: wrong usage
+ - 111: system call failed
+
+
+ Notes
+
+
+ - Use of s6-svdt-clear impacts the listings obtained
+by the s6-svdt command.
+ - It also impacts the behaviour of the
+s6-permafailon command. This is
+the main reason to use s6-svdt-clear: once a service has
+failed permanently due to an excessive number of deaths in a given
+time, it can be useful to erase that record of deaths before
+starting the service again, in order to avoid permanently failing
+again too fast.
+
+
+
+
diff --git a/doc/s6-svdt.html b/doc/s6-svdt.html
index a908d8c..f069536 100644
--- a/doc/s6-svdt.html
+++ b/doc/s6-svdt.html
@@ -16,10 +16,10 @@
skarnet.org
- The s6-svdt program
+ The s6-svdt program
-s6-svstat prints the recorded death tally of a service, i.e. a list of the times
+s6-svdt prints the recorded death tally of a service, i.e. a list of the times
the process died, with the cause of death.
@@ -30,7 +30,7 @@ the process died, with the cause of death.
- s6-svdt prints the contents of the recorded death tally of the service being
+ s6-svdt prints the contents of the recorded death tally of the service being
currently supervised at the servicedir
service directory, then exits 0.
diff --git a/doc/s6-svstat.html b/doc/s6-svstat.html
index a77f903..d40ec3b 100644
--- a/doc/s6-svstat.html
+++ b/doc/s6-svstat.html
@@ -76,8 +76,11 @@ are as follows.
names are the following:
- up: print true if the service is up and false if it is down.
+
+
- wantedup: print true if s6-supervise
is currently instructed to (re)start the service when it is down, and false if
s6-supervise is currently instructed to leave the service alone.
diff --git a/doc/upgrade.html b/doc/upgrade.html
index 10f139b..c57156d 100644
--- a/doc/upgrade.html
+++ b/doc/upgrade.html
@@ -22,7 +22,11 @@
in 2.7.0.0
diff --git a/package/deps.mak b/package/deps.mak
index f74fc71..7c506bd 100644
--- a/package/deps.mak
+++ b/package/deps.mak
@@ -125,8 +125,10 @@ src/pipe-tools/s6-ftrig-notify.o src/pipe-tools/s6-ftrig-notify.lo: src/pipe-too
src/pipe-tools/s6-ftrig-wait.o src/pipe-tools/s6-ftrig-wait.lo: src/pipe-tools/s6-ftrig-wait.c src/include/s6/ftrigr.h
src/pipe-tools/s6-mkfifodir.o src/pipe-tools/s6-mkfifodir.lo: src/pipe-tools/s6-mkfifodir.c src/include/s6/ftrigw.h
src/supervision/s6-notifyoncheck.o src/supervision/s6-notifyoncheck.lo: src/supervision/s6-notifyoncheck.c src/include/s6/ftrigr.h src/include/s6/s6-supervise.h
+src/supervision/s6-permafailon.o src/supervision/s6-permafailon.lo: src/supervision/s6-permafailon.c src/include/s6/s6-supervise.h
src/supervision/s6-supervise.o src/supervision/s6-supervise.lo: src/supervision/s6-supervise.c src/include/s6/ftrigw.h src/include/s6/s6-supervise.h
src/supervision/s6-svc.o src/supervision/s6-svc.lo: src/supervision/s6-svc.c src/include/s6/config.h src/include/s6/s6-supervise.h
+src/supervision/s6-svdt-clear.o src/supervision/s6-svdt-clear.lo: src/supervision/s6-svdt-clear.c src/include/s6/s6-supervise.h
src/supervision/s6-svdt.o src/supervision/s6-svdt.lo: src/supervision/s6-svdt.c src/include/s6/s6-supervise.h
src/supervision/s6-svlisten.o src/supervision/s6-svlisten.lo: src/supervision/s6-svlisten.c src/supervision/s6-svlisten.h
src/supervision/s6-svlisten1.o src/supervision/s6-svlisten1.lo: src/supervision/s6-svlisten1.c src/supervision/s6-svlisten.h
@@ -245,12 +247,16 @@ s6-mkfifodir: EXTRA_LIBS :=
s6-mkfifodir: src/pipe-tools/s6-mkfifodir.o ${LIBS6} -lskarnet
s6-notifyoncheck: EXTRA_LIBS := ${SOCKET_LIB} ${TAINNOW_LIB} ${SPAWN_LIB}
s6-notifyoncheck: src/supervision/s6-notifyoncheck.o ${LIBS6} -lskarnet
+s6-permafailon: EXTRA_LIBS :=
+s6-permafailon: src/supervision/s6-permafailon.o ${LIBS6} -lskarnet
s6-supervise: EXTRA_LIBS := ${TAINNOW_LIB}
s6-supervise: src/supervision/s6-supervise.o ${LIBS6} -lskarnet
s6-svc: EXTRA_LIBS :=
s6-svc: src/supervision/s6-svc.o ${LIBS6} -lskarnet
s6-svdt: EXTRA_LIBS :=
s6-svdt: src/supervision/s6-svdt.o ${LIBS6} -lskarnet
+s6-svdt-clear: EXTRA_LIBS :=
+s6-svdt-clear: src/supervision/s6-svdt-clear.o ${LIBS6} -lskarnet
s6-svlisten: EXTRA_LIBS := ${SOCKET_LIB} ${TAINNOW_LIB} ${SPAWN_LIB}
s6-svlisten: src/supervision/s6-svlisten.o src/supervision/s6_svlisten_signal_handler.o src/supervision/s6_svlisten_loop.o ${LIBS6} -lexecline -lskarnet
s6-svlisten1: EXTRA_LIBS := ${SOCKET_LIB} ${TAINNOW_LIB} ${SPAWN_LIB}
diff --git a/package/modes b/package/modes
index 9c3939c..851c316 100644
--- a/package/modes
+++ b/package/modes
@@ -15,10 +15,12 @@ s6-svscanctl 0755
s6-svok 0755
s6-svstat 0755
s6-svdt 0755
+s6-svdt-clear 0755
s6-svwait 0755
s6-svlisten1 0755
s6-svlisten 0755
s6-notifyoncheck 0755
+s6-permafailon 0755
s6-applyuidgid 0700
s6-envdir 0755
s6-envuidgid 0755
diff --git a/package/targets.mak b/package/targets.mak
index 50445bd..ad1a889 100644
--- a/package/targets.mak
+++ b/package/targets.mak
@@ -15,10 +15,12 @@ s6-svscanctl \
s6-svok \
s6-svstat \
s6-svdt \
+s6-svdt-clear \
s6-svwait \
s6-svlisten1 \
s6-svlisten \
s6-notifyoncheck \
+s6-permafailon \
s6-envdir \
s6-envuidgid \
s6-fghack \
diff --git a/src/supervision/deps-exe/s6-permafailon b/src/supervision/deps-exe/s6-permafailon
new file mode 100644
index 0000000..08815d9
--- /dev/null
+++ b/src/supervision/deps-exe/s6-permafailon
@@ -0,0 +1,2 @@
+${LIBS6}
+-lskarnet
diff --git a/src/supervision/deps-exe/s6-svdt-clear b/src/supervision/deps-exe/s6-svdt-clear
new file mode 100644
index 0000000..08815d9
--- /dev/null
+++ b/src/supervision/deps-exe/s6-svdt-clear
@@ -0,0 +1,2 @@
+${LIBS6}
+-lskarnet
diff --git a/src/supervision/s6-permafailon.c b/src/supervision/s6-permafailon.c
new file mode 100644
index 0000000..db9ccb6
--- /dev/null
+++ b/src/supervision/s6-permafailon.c
@@ -0,0 +1,118 @@
+/* ISC license. */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define USAGE "s6-permafailon seconds deathcount statuslist prog..."
+#define dieusage() strerr_dieusage(100, USAGE)
+
+static void list_scan (char const *s, unsigned char *codes, sigset_t *sigs)
+{
+ size_t pos = 0 ;
+ while (s[pos])
+ {
+ unsigned int u ;
+ size_t len = uint_scan(s + pos, &u) ;
+ if (len)
+ {
+ if (u > 255) strerr_dief1x(100, "invalid exit code") ;
+ pos += len ;
+ if (s[pos] == '-')
+ {
+ unsigned int v ;
+ pos++ ;
+ len = uint_scan(s + pos, &v) ;
+ if (!len) strerr_dief1x(100, "invalid interval specification") ;
+ if (v > 255) strerr_dief1x(100, "invalid exit code") ;
+ if (v < u) strerr_dief1x(100, "invalid interval") ;
+ pos += len ;
+ bitarray_setn(codes, u, v - u + 1) ;
+ }
+ else bitarray_set(codes, u) ;
+ }
+ else
+ {
+ int sig ;
+ size_t next = pos ;
+ while (!strchr(",; \n\r\t", s[next])) next++ ;
+ char tmp[next - pos + 1] ;
+ memcpy(tmp, s + pos, next - pos) ;
+ tmp[next - pos] = 0 ;
+ len = sig0_scan(tmp, &sig) ;
+ if (!len) strerr_dief1x(100, "invalid status list specification") ;
+ pos += len ;
+ if (sigaddset(sigs, sig) < 0) strerr_dief1x(100, "invalid signal") ;
+ }
+ while (memchr(",; \n\r\t", s[pos], 6)) pos++ ;
+ }
+}
+
+int main (int argc, char const *const *argv, char const *const *envp)
+{
+ unsigned char codes[32] ;
+ sigset_t sigs ;
+ unsigned int total, seconds, n ;
+ struct stat st ;
+ PROG = "s6-permafailon" ;
+ if (argc < 4) dieusage() ;
+
+ if (!uint0_scan(argv[1], &seconds)) dieusage() ;
+ if (!uint0_scan(argv[2], &n)) dieusage() ;
+ if (!n) dieusage() ;
+ if (n > S6_MAX_DEATH_TALLY) n = S6_MAX_DEATH_TALLY ;
+ list_scan(argv[3], codes, &sigs) ;
+
+ if (stat(S6_DTALLY_FILENAME, &st) < 0)
+ {
+ strerr_warnwu2sys("stat ", S6_DTALLY_FILENAME) ;
+ goto cont ;
+ }
+ if (st.st_size % S6_DTALLY_PACK || st.st_size > S6_DTALLY_PACK * S6_MAX_DEATH_TALLY)
+ {
+ strerr_warnw2x("invalid ", S6_DTALLY_FILENAME) ;
+ goto cont ;
+ }
+ total = st.st_size / S6_DTALLY_PACK ;
+ {
+ tain_t mintime ;
+ unsigned int matches = 0 ;
+ s6_dtally_t tab[total] ;
+ ssize_t r = s6_dtally_read(".", tab, total) ;
+ if (r <= 0)
+ {
+ if (r < 0) strerr_warnwu2sys("read ", S6_DTALLY_FILENAME) ;
+ goto cont ;
+ }
+ if (r < n) goto cont ;
+ tain_uint(&mintime, seconds) ;
+ tain_sub(&mintime, &tab[r-1].stamp, &mintime) ;
+
+ for (unsigned int i = 0 ; i < r ; i++)
+ {
+ if (!tain_less(&tab[i].stamp, &mintime)
+ && ((tab[i].sig && sigismember(&sigs, tab[i].sig)) || bitarray_peek(codes, tab[i].exitcode))
+ && ++matches >= n)
+ {
+ char fmtevent[4] ;
+ char fmtseconds[UINT_FMT] ;
+ char fmtn[UINT_FMT] ;
+ fmtevent[uint_fmt(fmtevent, tab[i].sig ? tab[i].sig : tab[i].exitcode)] = 0 ;
+ fmtseconds[uint_fmt(fmtseconds, seconds)] = 0 ;
+ fmtn[uint_fmt(fmtseconds, n)] = 0 ;
+ strerr_warni8x("PERMANENT FAILURE triggered after ", fmtn, " events involving ", tab[i].sig ? "signal " : "exit code ", fmtevent, " in the last ", fmtseconds, " seconds") ;
+ return 125 ;
+ }
+ }
+ }
+
+ cont:
+ xpathexec0_run(argv + 4, envp) ;
+}
diff --git a/src/supervision/s6-svdt-clear.c b/src/supervision/s6-svdt-clear.c
new file mode 100644
index 0000000..4db4858
--- /dev/null
+++ b/src/supervision/s6-svdt-clear.c
@@ -0,0 +1,15 @@
+/* ISC license. */
+
+#include
+#include
+
+#define USAGE "s6-svdt-clear servicedir"
+#define dieusage() strerr_dieusage(100, USAGE)
+
+int main (int argc, char const *const *argv)
+{
+ if (argc < 1) dieusage() ;
+ if (!s6_dtally_write(argv[1], 0, 0))
+ strerr_diefu2sys(111, "clear death tally for service ", argv[1]) ;
+ return 0 ;
+}
--
cgit v1.2.3