From 1fbf03bc1e979e909e4f781d06fe285682def535 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Thu, 12 Feb 2026 07:50:28 +0100 Subject: [PATCH 1/6] Clear pid condition on service collection to fix stale deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a setup like this, when 'netd' is marked dirty and subsequently is reloaded, e.g., using 'initctl reload', zebra is properly restarted, but staticd isn't: mgmtd ← netd ← zebra ← staticd Finit must invalidate the condition of zebra to trigger a restart also of staticd. This to guard against daemons like zebra that may fail to clean up their pidfiles. Fixes #475 Signed-off-by: Joachim Wiberg --- src/service.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/service.c b/src/service.c index 05121d06..eefae25b 100644 --- a/src/service.c +++ b/src/service.c @@ -1093,6 +1093,7 @@ static void service_notify_stop(svc_t *svc) */ static void service_cleanup(svc_t *svc) { + char cond[MAX_COND_LEN]; char *fn; /* PID collected, cancel any pending SIGKILL */ @@ -1103,6 +1104,18 @@ static void service_cleanup(svc_t *svc) logit(LOG_CRIT, "Failed removing service %s pidfile %s", svc_ident(svc, NULL, 0), fn); + /* + * Invalidate the pid/ condition for this service to ensure + * dependent services are properly stopped and restarted. + * Without this, the condition is only cleared asynchronously + * via inotify on pidfile removal, which may not trigger when + * the daemon fails to clean up its own pidfile, or when the + * service dies during a reload cycle and goes directly from + * RUNNING to HALTED (skipping STOPPING where cond_clear() + * is normally called). + */ + cond_clear(mkcond(svc, cond, sizeof(cond))); + service_notify_stop(svc); /* No longer running, update books. */ From ab81272083377f5ca1a70734f73fe8274f0ec69a Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Thu, 12 Feb 2026 10:55:10 +0100 Subject: [PATCH 2/6] test: new regression test to verify multi-chain deps Signed-off-by: Joachim Wiberg --- test/Makefile.am | 2 + test/dep-chain-reload.sh | 122 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100755 test/dep-chain-reload.sh diff --git a/test/Makefile.am b/test/Makefile.am index ca965ed2..6e5ddfce 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -32,6 +32,7 @@ EXTRA_DIST += add-remove-dynamic-service-sub-config.sh EXTRA_DIST += bootstrap-crash.sh EXTRA_DIST += cond-start-task.sh EXTRA_DIST += crashing.sh +EXTRA_DIST += dep-chain-reload.sh EXTRA_DIST += depserv.sh EXTRA_DIST += devmon.sh EXTRA_DIST += failing-sysv.sh @@ -74,6 +75,7 @@ TESTS += add-remove-dynamic-service-sub-config.sh TESTS += bootstrap-crash.sh TESTS += cond-start-task.sh TESTS += crashing.sh +TESTS += dep-chain-reload.sh TESTS += depserv.sh TESTS += devmon.sh TESTS += failing-sysv.sh diff --git a/test/dep-chain-reload.sh b/test/dep-chain-reload.sh new file mode 100755 index 00000000..43111701 --- /dev/null +++ b/test/dep-chain-reload.sh @@ -0,0 +1,122 @@ +#!/bin/sh +# Verify transitive dependency chain during reload and crash +# +# Three services in a chain: A → B → C, where B depends on pid/A +# and C depends on pid/B. B is placed in a sub-config file so +# we can use 'initctl touch' on it. B uses (with +# leading '!') so it does not support SIGHUP, causing a full +# stop/start cycle on 'initctl touch svc_b.conf' + reload. +# +# Test 1 - touch + reload: +# After 'initctl touch svc_b.conf' + 'initctl reload': +# - A should be unaffected (same PID) +# - B is restarted (config was touched, noreload) +# - C must be restarted (transitive, depends on pid/B) +# +# Test 2 - crash (kill -9): +# When B is killed with SIGKILL the crash path (RUNNING → +# HALTED) bypasses STOPPING, where cond_clear() used to be +# the only call site. The pidfile plugin only watches for +# IN_CLOSE_WRITE, so neither the pidfile removal (IN_DELETE) +# nor a pidfile touch (IN_ATTRIB) triggers an inotify event. +# Without the fix in service_cleanup(), pid/B is never +# invalidated and C is never restarted. + +set -eu + +TEST_DIR=$(dirname "$0") + +test_teardown() +{ + say "Running test teardown." + run "rm -f $FINIT_RCSD/svc_b.conf" +} + +pidof() +{ + texec initctl -j status "$1" | jq .pid +} + +test_setup() +{ + run "cat >> $FINIT_CONF" < name:svc_c serv -np -i svc_c -- Needs B +EOF + run "echo 'service log:stdout notify:pid name:svc_b serv -np -i svc_b -- Needs A' > $FINIT_RCSD/svc_b.conf" +} + +# shellcheck source=/dev/null +. "$TEST_DIR/lib/setup.sh" + +sep "Configuration" +run "cat $FINIT_CONF" +run "cat $FINIT_RCSD/svc_b.conf" + +say "Reload Finit to start all services" +run "initctl reload" + +say "Wait for full chain to start" +retry 'assert_status "svc_c" "running"' 10 1 + +run "initctl status" +run "initctl cond dump" + +# ―――――――――――――――――――――――――――――――――――――――――――――――――――――― +# Test 1: touch + reload +# ―――――――――――――――――――――――――――――――――――――――――――――――――――――― +sep "Test 1: Touch B and global reload" + +pid_a=$(pidof svc_a) +pid_b=$(pidof svc_b) +pid_c=$(pidof svc_c) +say "PIDs before: A=$pid_a B=$pid_b C=$pid_c" + +run "initctl touch svc_b.conf" +run "initctl reload" + +say "Wait for chain to settle" +retry 'assert_status "svc_c" "running"' 15 1 + +run "initctl status" +run "initctl cond dump" + +new_pid_a=$(pidof svc_a) +new_pid_b=$(pidof svc_b) +new_pid_c=$(pidof svc_c) +say "PIDs after: A=$new_pid_a B=$new_pid_b C=$new_pid_c" + +# shellcheck disable=SC2086 +assert "A was not restarted" $new_pid_a -eq $pid_a +# shellcheck disable=SC2086 +assert "B was restarted (touched)" $new_pid_b -ne $pid_b +# shellcheck disable=SC2086 +assert "C was restarted (transitive dep)" $new_pid_c -ne $pid_c + +# ―――――――――――――――――――――――――――――――――――――――――――――――――――――― +# Test 2: crash (kill -9), bypasses STOPPING +# ―――――――――――――――――――――――――――――――――――――――――――――――――――――― +sep "Test 2: Kill B with SIGKILL (bypasses STOPPING)" + +pid_b=$(pidof svc_b) +pid_c=$(pidof svc_c) +say "PIDs before: B=$pid_b C=$pid_c" + +run "kill -9 $pid_b" + +say "Wait for B to respawn and chain to settle" +retry 'assert_status "svc_c" "running"' 15 1 + +run "initctl status" +run "initctl cond dump" + +new_pid_b=$(pidof svc_b) +new_pid_c=$(pidof svc_c) +say "PIDs after: B=$new_pid_b C=$new_pid_c" + +# shellcheck disable=SC2086 +assert "B was restarted (crashed+respawn)" $new_pid_b -ne $pid_b +# shellcheck disable=SC2086 +assert "C was restarted (transitive dep)" $new_pid_c -ne $pid_c + +return 0 From 4bb2c338592dcb43c6324ab2b1a3b4688336417e Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Fri, 13 Feb 2026 07:24:55 +0100 Subject: [PATCH 3/6] Dependents not restarted after SIGHUP reload of service When 'initctl reload' is called after marking a service in a dependency chain dirty, Finit fails to restart (unfreeze) affected services. This patch updates the pidfile plugin to watch for IN_ATTRIB changes, e.g. when a process uses utimensat() to update its pidfile, and adds service_step_all() at end of reload cycle to guarantee convergence after conditions are reasserted. Issue #476 Signed-off-by: Joachim Wiberg --- plugins/pidfile.c | 3 +- src/sm.c | 12 ++++ test/dep-chain-reload.sh | 122 +++++++++++++++++++++++++++++---------- 3 files changed, 104 insertions(+), 33 deletions(-) diff --git a/plugins/pidfile.c b/plugins/pidfile.c index c64e18ff..ae0fdeea 100644 --- a/plugins/pidfile.c +++ b/plugins/pidfile.c @@ -59,7 +59,7 @@ static int pidfile_add_path(struct iwatch *iw, char *path) } } - return iwatch_add(iw, path, IN_ONLYDIR | IN_CLOSE_WRITE); + return iwatch_add(iw, path, IN_ONLYDIR | IN_CLOSE_WRITE | IN_ATTRIB); } static void pidfile_update_conds(char *dir, char *name, uint32_t mask) @@ -276,6 +276,7 @@ static void pidfile_reconf(void *arg) if (cond_get(cond) == COND_ON) continue; + dbg("Reassert condition %s", cond); cond_set_path(cond_path(cond), COND_ON); } diff --git a/src/sm.c b/src/sm.c index 69f61c86..85510ff8 100644 --- a/src/sm.c +++ b/src/sm.c @@ -510,15 +510,27 @@ void sm_step(void) /* Cleanup stale services */ svc_clean_dynamic(service_unregister); + dbg("Step all services waiting for reasserted conditions ..."); + service_step_all(SVC_TYPE_ANY); + dbg("Calling reconf hooks ..."); plugin_run_hooks(HOOK_SVC_RECONF); + dbg("Step all services waiting for reasserted conditions ..."); + service_step_all(SVC_TYPE_ANY); + dbg("Update configuration generation of device conditions ..."); devmon_reconf(); + dbg("Step all services waiting for reasserted conditions ..."); + service_step_all(SVC_TYPE_ANY); + dbg("Update configuration generation of unmodified non-native services ..."); service_notify_reconf(); + dbg("Step all services waiting for reasserted conditions ..."); + service_step_all(SVC_TYPE_ANY); + dbg("Reconfiguration done"); sm.state = SM_RUNNING_STATE; break; diff --git a/test/dep-chain-reload.sh b/test/dep-chain-reload.sh index 43111701..b5cfea49 100755 --- a/test/dep-chain-reload.sh +++ b/test/dep-chain-reload.sh @@ -1,26 +1,32 @@ #!/bin/sh # Verify transitive dependency chain during reload and crash # -# Three services in a chain: A → B → C, where B depends on pid/A -# and C depends on pid/B. B is placed in a sub-config file so -# we can use 'initctl touch' on it. B uses (with -# leading '!') so it does not support SIGHUP, causing a full -# stop/start cycle on 'initctl touch svc_b.conf' + reload. +# Four services in a chain: A → B → C → D, all using notify:pid. +# B and C are placed in separate sub-config files so we can use +# 'initctl touch' on them independently. B supports SIGHUP (reload), +# while C and D use the '!' condition prefix (noreload), matching +# a common pattern in real-world setups (e.g., FRR daemons on Infix). # -# Test 1 - touch + reload: -# After 'initctl touch svc_b.conf' + 'initctl reload': -# - A should be unaffected (same PID) -# - B is restarted (config was touched, noreload) -# - C must be restarted (transitive, depends on pid/B) +# Chain: A ← B ← C ← D +# +# Test 1 - touch C + reload (the "Infix" scenario): +# C is in the middle of the chain. After 'initctl touch svc_c.conf' +# + 'initctl reload': +# - A and B should be unaffected (same PID) +# - C is stopped/started (config was touched, noreload) +# - D must be restarted (reverse dep of C) # # Test 2 - crash (kill -9): -# When B is killed with SIGKILL the crash path (RUNNING → -# HALTED) bypasses STOPPING, where cond_clear() used to be -# the only call site. The pidfile plugin only watches for -# IN_CLOSE_WRITE, so neither the pidfile removal (IN_DELETE) -# nor a pidfile touch (IN_ATTRIB) triggers an inotify event. -# Without the fix in service_cleanup(), pid/B is never -# invalidated and C is never restarted. +# When B is killed with SIGKILL the crash path (RUNNING → HALTED) +# bypasses STOPPING. Without cond_clear() in service_cleanup(), +# pid/B is never invalidated and C, D are never restarted. +# +# Test 3 - touch B + reload: +# B supports SIGHUP (no '!' prefix). After 'initctl touch +# svc_b.conf' + 'initctl reload': +# - A should be unaffected (same PID) +# - B is SIGHUP'd (same PID, config was touched) +# - C and D must be restarted (transitive reverse deps) set -eu @@ -29,7 +35,7 @@ TEST_DIR=$(dirname "$0") test_teardown() { say "Running test teardown." - run "rm -f $FINIT_RCSD/svc_b.conf" + run "rm -f $FINIT_RCSD/svc_b.conf $FINIT_RCSD/svc_c.conf" } pidof() @@ -41,9 +47,14 @@ test_setup() { run "cat >> $FINIT_CONF" < name:svc_c serv -np -i svc_c -- Needs B +service log:stdout notify:pid name:svc_d serv -np -i svc_d -- Needs C +EOF + run "cat >> $FINIT_RCSD/svc_b.conf" < name:svc_b serv -np -i svc_b -- Needs A +EOF + run "cat >> $FINIT_RCSD/svc_c.conf" < name:svc_c serv -np -i svc_c -- Needs B EOF - run "echo 'service log:stdout notify:pid name:svc_b serv -np -i svc_b -- Needs A' > $FINIT_RCSD/svc_b.conf" } # shellcheck source=/dev/null @@ -52,31 +63,33 @@ EOF sep "Configuration" run "cat $FINIT_CONF" run "cat $FINIT_RCSD/svc_b.conf" +run "cat $FINIT_RCSD/svc_c.conf" say "Reload Finit to start all services" run "initctl reload" say "Wait for full chain to start" -retry 'assert_status "svc_c" "running"' 10 1 +retry 'assert_status "svc_d" "running"' 10 1 run "initctl status" run "initctl cond dump" # ―――――――――――――――――――――――――――――――――――――――――――――――――――――― -# Test 1: touch + reload +# Test 1: touch C (middle of chain, noreload) + reload # ―――――――――――――――――――――――――――――――――――――――――――――――――――――― -sep "Test 1: Touch B and global reload" +sep "Test 1: Touch C (middle) and global reload" pid_a=$(pidof svc_a) pid_b=$(pidof svc_b) pid_c=$(pidof svc_c) -say "PIDs before: A=$pid_a B=$pid_b C=$pid_c" +pid_d=$(pidof svc_d) +say "PIDs before: A=$pid_a B=$pid_b C=$pid_c D=$pid_d" -run "initctl touch svc_b.conf" +run "initctl touch svc_c.conf" run "initctl reload" say "Wait for chain to settle" -retry 'assert_status "svc_c" "running"' 15 1 +retry 'assert_status "svc_d" "running"' 15 1 run "initctl status" run "initctl cond dump" @@ -84,14 +97,17 @@ run "initctl cond dump" new_pid_a=$(pidof svc_a) new_pid_b=$(pidof svc_b) new_pid_c=$(pidof svc_c) -say "PIDs after: A=$new_pid_a B=$new_pid_b C=$new_pid_c" +new_pid_d=$(pidof svc_d) +say "PIDs after: A=$new_pid_a B=$new_pid_b C=$new_pid_c D=$new_pid_d" # shellcheck disable=SC2086 assert "A was not restarted" $new_pid_a -eq $pid_a # shellcheck disable=SC2086 -assert "B was restarted (touched)" $new_pid_b -ne $pid_b +assert "B was not restarted" $new_pid_b -eq $pid_b # shellcheck disable=SC2086 -assert "C was restarted (transitive dep)" $new_pid_c -ne $pid_c +assert "C was restarted (touched)" $new_pid_c -ne $pid_c +# shellcheck disable=SC2086 +assert "D was restarted (transitive dep)" $new_pid_d -ne $pid_d # ―――――――――――――――――――――――――――――――――――――――――――――――――――――― # Test 2: crash (kill -9), bypasses STOPPING @@ -100,23 +116,65 @@ sep "Test 2: Kill B with SIGKILL (bypasses STOPPING)" pid_b=$(pidof svc_b) pid_c=$(pidof svc_c) -say "PIDs before: B=$pid_b C=$pid_c" +pid_d=$(pidof svc_d) +say "PIDs before: B=$pid_b C=$pid_c D=$pid_d" run "kill -9 $pid_b" say "Wait for B to respawn and chain to settle" -retry 'assert_status "svc_c" "running"' 15 1 +retry 'assert_status "svc_d" "running"' 15 1 run "initctl status" run "initctl cond dump" new_pid_b=$(pidof svc_b) new_pid_c=$(pidof svc_c) -say "PIDs after: B=$new_pid_b C=$new_pid_c" +new_pid_d=$(pidof svc_d) +say "PIDs after: B=$new_pid_b C=$new_pid_c D=$new_pid_d" # shellcheck disable=SC2086 assert "B was restarted (crashed+respawn)" $new_pid_b -ne $pid_b # shellcheck disable=SC2086 assert "C was restarted (transitive dep)" $new_pid_c -ne $pid_c +# shellcheck disable=SC2086 +assert "D was restarted (transitive dep)" $new_pid_d -ne $pid_d + +# ―――――――――――――――――――――――――――――――――――――――――――――――――――――― +# Test 3: touch B (supports SIGHUP) + reload +# ―――――――――――――――――――――――――――――――――――――――――――――――――――――― +sep "Test 3: Touch B (SIGHUP) and global reload" + +pid_a=$(pidof svc_a) +pid_b=$(pidof svc_b) +pid_c=$(pidof svc_c) +pid_d=$(pidof svc_d) +say "PIDs before: A=$pid_a B=$pid_b C=$pid_c D=$pid_d" + +run "initctl debug" +run "initctl touch svc_b.conf" +run "initctl reload" +sleep 2 +run "initctl status" + +say "Wait for chain to settle" +retry 'assert_status "svc_d" "running"' 15 1 + +run "initctl status" +run "initctl cond dump" + +new_pid_a=$(pidof svc_a) +new_pid_b=$(pidof svc_b) +new_pid_c=$(pidof svc_c) +new_pid_d=$(pidof svc_d) +say "PIDs after: A=$new_pid_a B=$new_pid_b C=$new_pid_c D=$new_pid_d" + +# shellcheck disable=SC2086 +assert "A was not restarted" $new_pid_a -eq $pid_a +# shellcheck disable=SC2086 +assert "B was not restarted (SIGHUP)" $new_pid_b -eq $pid_b +# shellcheck disable=SC2086 +assert "C was restarted (transitive dep)" $new_pid_c -ne $pid_c +# shellcheck disable=SC2086 +assert "D was restarted (transitive dep)" $new_pid_d -ne $pid_d return 0 From 435670199b9f219f4a4e650b5fcc6c2dd2c06998 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 16 Feb 2026 13:32:11 +0100 Subject: [PATCH 4/6] Add <~cond> condition modifier for tightly coupled services MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Conditions in Finit are dependencies: if A is asserted, service B is allowed to run. When A goes through FLUX (e.g., upstream reloads), dependents are PAUSED and then simply resumed when the condition is reasserted -- this is the correct behavior for barrier-style deps like . However, some setups have tightly coupled services where dependents must be reloaded/restarted when an upstream service reloads, not just resumed. E.g., the FRR routing stack on Infix OS: netd ← zebra ← {staticd,ripd} When netd reloads (SIGHUP), zebra and its dependents must be restarted to pick up the new configuration. The new '~' condition prefix marks a dependency as flux-sensitive: service name:zebra ... When the upstream condition goes FLUX and returns to ON, the dependent is reloaded (SIGHUP) or restarted (noreload '!') instead of merely resumed. Transitivity follows naturally through the condition chain. Closes #416 Closes #476 Signed-off-by: Joachim Wiberg --- doc/conditions.md | 37 +++++++++++++++++++++++++++++++++---- doc/config/services.md | 8 ++++++++ src/conf.c | 13 +++++++++++++ src/service.c | 14 ++++++++++++++ src/svc.h | 1 + test/dep-chain-reload.sh | 17 +++++++++-------- 6 files changed, 78 insertions(+), 12 deletions(-) diff --git a/doc/conditions.md b/doc/conditions.md index 93df4169..aac0eadf 100644 --- a/doc/conditions.md +++ b/doc/conditions.md @@ -14,11 +14,11 @@ specified separated by comma. Multiple conditions are logically AND'ed during evaluation, i.e. all conditions must be satisfied in order for a service to run. -A special syntax, using a leading `!` in run/task/service conditions, -denote if a: +Two special prefixes can be used inside the angle brackets: - - service does not support `SIGHUP` - - run/task should not block runlevel changes (i.e., bootstrap) + - `!` -- service does not support `SIGHUP` (noreload), or run/task + should not block runlevel changes (i.e., bootstrap) + - `~` -- propagate reload from this dependency, see below Finit guarantees by default that all run/tasks run (at least) once per runlevel. For most tasks this is a good default, for example @@ -53,6 +53,30 @@ moving to the next runlevel after bootstrap, so we set ``: task [S0123456789] name:pwrfail initctl poweroff -- Power failure, shutting down +Propagating Reload in Dependencies +----------------------------------- + +By default, when a service reloads during `initctl reload`, dependent +services are paused (`SIGSTOP`) and simply resumed (`SIGCONT`) when the +condition is reasserted. This is correct for barrier-style dependencies +like ``, where dependents just need syslogd running and do +not care if it reloads its config. + +For services that need to react when their upstream reloads, the `~` +prefix propagates the reload from the dependency: + + service name:svc_b /sbin/svc_b -- Needs A (barrier) + service name:svc_c /sbin/svc_c -- Propagate reload from B + +Here, `<~pid/svc_b>` means: propagate a reload of `svc_b` to `svc_c`. +When `svc_b` reloads, `svc_c` will be restarted (because of `!`, +noreload) instead of merely resumed. If `svc_c` supported `SIGHUP` +(no `!` prefix), it would be sent `SIGHUP` instead. + +This is similar to systemd's `PropagatesReloadTo=` directive, but +declared on the consumer side rather than the provider side. + + Triggering ---------- @@ -281,6 +305,11 @@ This STOP/CONT handling minimizes the number of unnecessary service restarts that would otherwise occur because a depending service was sent `SIGHUP` for example. +Services with the `~` prefix are an exception to this rule: when their +conditions return to `on` after being in `flux`, the reload is propagated +-- the service is reloaded (SIGHUP) or restarted (noreload `!`) instead +of simply being resumed. + Therefore, any plugin that supplies Finit with conditions must ensure that their state is updated after each reconfiguration. This can be done by binding to the `HOOK_SVC_RECONF` hook. For an example of how diff --git a/doc/config/services.md b/doc/config/services.md index 38ba0be4..a315b372 100644 --- a/doc/config/services.md +++ b/doc/config/services.md @@ -47,6 +47,14 @@ waits for another service, `zebra`, to have created its PID file in *all* files in `/var/run`, for each file named `*.pid`, or `*/pid`, Finit opens it and find the matching `NAME:ID` using the PID. +The condition can be prefixed with `!` and/or `~`: + + - `` -- `ospfd` does not support `SIGHUP` (noreload) + - `<~pid/zebra>` -- propagate reload from `zebra` to `ospfd` + - `` -- both: noreload and propagate reload + +For details, see the [Finit Conditions](../conditions.md) document. + Some services do not maintain a PID file and rather than patching each application Finit provides a workaround. A `pid` keyword can be set to have Finit automatically create (when starting) and later remove diff --git a/src/conf.c b/src/conf.c index b4393e65..d0abb61d 100644 --- a/src/conf.c +++ b/src/conf.c @@ -745,8 +745,21 @@ void conf_parse_cond(svc_t *svc, char *cond) return; } + /* + * The '~' prefix means a reload of the upstream service is + * propagated to this service -- it will be reloaded (SIGHUP) + * or restarted (noreload), not just paused and resumed. + * Syntax: or <~pid/foo> + * + * Strip '~' from each condition and set the service-level + * flag. This allows '~' on any condition in the list. + */ svc->cond[0] = 0; for (i = 0, c = strtok(ptr, ","); c; c = strtok(NULL, ","), i++) { + if (c[0] == '~') { + c++; + svc->flux_reload = 1; + } devmon_add_cond(c); if (i) strlcat(svc->cond, ",", sizeof(svc->cond)); diff --git a/src/service.c b/src/service.c index eefae25b..4b6938b0 100644 --- a/src/service.c +++ b/src/service.c @@ -3146,6 +3146,20 @@ int service_step(svc_t *svc) switch (cond) { case COND_ON: kill(svc->pid, SIGCONT); + + /* + * Propagate reload (~): upstream reloaded, so + * we must also reload/restart, not just resume. + */ + if (svc->flux_reload && !svc_is_changed(svc)) { + svc_set_state(svc, SVC_RUNNING_STATE); + if (svc_is_noreload(svc)) + service_stop(svc); + else + service_reload(svc); + break; + } + svc_set_state(svc, SVC_RUNNING_STATE); /* Reassert condition if we go from waiting and no change */ if (!svc_is_changed(svc)) { diff --git a/src/svc.h b/src/svc.h index d433a515..f21a8ee6 100644 --- a/src/svc.h +++ b/src/svc.h @@ -146,6 +146,7 @@ typedef struct svc { int starting; /* ... waiting for pidfile to be re-asserted */ int runlevels; int sighup; /* This service supports SIGHUP :) */ + int flux_reload; /* Propagate reload from dependency, '~' prefix */ int forking; /* This is a service/sysv daemon that forks, wait for it ... */ svc_block_t block; /* Reason that this service is currently stopped */ char cond[MAX_COND_LEN]; diff --git a/test/dep-chain-reload.sh b/test/dep-chain-reload.sh index b5cfea49..dd67ac19 100755 --- a/test/dep-chain-reload.sh +++ b/test/dep-chain-reload.sh @@ -4,17 +4,18 @@ # Four services in a chain: A → B → C → D, all using notify:pid. # B and C are placed in separate sub-config files so we can use # 'initctl touch' on them independently. B supports SIGHUP (reload), -# while C and D use the '!' condition prefix (noreload), matching -# a common pattern in real-world setups (e.g., FRR daemons on Infix). +# while C and D use '!' (noreload) and '~' (propagate reload), +# matching a common pattern in real-world setups (e.g., FRR routing +# daemons on Infix OS). # -# Chain: A ← B ← C ← D +# Chain: A ← B ← C ← D # # Test 1 - touch C + reload (the "Infix" scenario): # C is in the middle of the chain. After 'initctl touch svc_c.conf' # + 'initctl reload': # - A and B should be unaffected (same PID) # - C is stopped/started (config was touched, noreload) -# - D must be restarted (reverse dep of C) +# - D must be restarted (reload propagated from C) # # Test 2 - crash (kill -9): # When B is killed with SIGKILL the crash path (RUNNING → HALTED) @@ -26,7 +27,7 @@ # svc_b.conf' + 'initctl reload': # - A should be unaffected (same PID) # - B is SIGHUP'd (same PID, config was touched) -# - C and D must be restarted (transitive reverse deps) +# - C and D must be restarted (reload propagated, '~' prefix) set -eu @@ -47,13 +48,13 @@ test_setup() { run "cat >> $FINIT_CONF" < name:svc_d serv -np -i svc_d -- Needs C +service log:stdout notify:pid name:svc_d serv -np -i svc_d -- Needs C EOF run "cat >> $FINIT_RCSD/svc_b.conf" < name:svc_b serv -np -i svc_b -- Needs A +service log:stdout notify:pid name:svc_b serv -np -i svc_b -- Needs A EOF run "cat >> $FINIT_RCSD/svc_c.conf" < name:svc_c serv -np -i svc_c -- Needs B +service log:stdout notify:pid name:svc_c serv -np -i svc_c -- Needs B EOF } From 7fba137288eba0c7e3521ac3a4683527364c1240 Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Mon, 16 Feb 2026 17:12:16 +0100 Subject: [PATCH 5/6] Reformat signaling log messages for readability Signed-off-by: Joachim Wiberg --- src/service.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/service.c b/src/service.c index 4b6938b0..4ab732b8 100644 --- a/src/service.c +++ b/src/service.c @@ -1153,7 +1153,7 @@ int service_stop(svc_t *svc) service_timeout_cancel(svc); if (svc->stop_script[0]) { - logit(LOG_CONSOLE | LOG_NOTICE, "%s[%d], calling stop:%s ...", id, svc->pid, svc->stop_script); + logit(LOG_CONSOLE | LOG_NOTICE, "Stopping %s[%d], calling stop:%s ...", id, svc->pid, svc->stop_script); } else if (!svc_is_sysv(svc)) { char *nm = pid_get_name(svc->pid, NULL, 0); const char *sig = sig_name(svc->sighalt); @@ -1168,10 +1168,10 @@ int service_stop(svc_t *svc) } dbg("Sending %s to pid:%d name:%s(%s)", sig, svc->pid, id, nm); - logit(LOG_CONSOLE | LOG_NOTICE, "%s[%d], stopping, sending %s ...", id, svc->pid, sig); + logit(LOG_CONSOLE | LOG_NOTICE, "Stopping %s[%d], sending %s ...", id, svc->pid, sig); } else { compose_cmdline(svc, cmdline, sizeof(cmdline)); - logit(LOG_CONSOLE | LOG_NOTICE, "%s[%d], calling '%s stop' ...", id, svc->pid, cmdline); + logit(LOG_CONSOLE | LOG_NOTICE, "Stopping %s[%d], calling '%s stop' ...", id, svc->pid, cmdline); } /* @@ -1291,7 +1291,7 @@ static int service_reload(svc_t *svc) print_desc("Restarting ", svc->desc); if (svc->reload_script[0]) { - logit(LOG_CONSOLE | LOG_NOTICE, "%s[%d], calling reload:%s ...", id, svc->pid, svc->reload_script); + logit(LOG_CONSOLE | LOG_NOTICE, "Reloading %s[%d], calling reload:%s ...", id, svc->pid, svc->reload_script); rc = service_run_script(svc, svc->reload_script); } else if (svc->sighup) { if (svc->pid <= 1) { @@ -1299,8 +1299,8 @@ static int service_reload(svc_t *svc) svc->start_time = svc->pid = 0; goto done; } - dbg("%s[%d], sending SIGHUP", id, svc->pid); - logit(LOG_CONSOLE | LOG_NOTICE, "%s[%d], sending SIGHUP ...", id, svc->pid); + dbg("Reloading %s[%d], sending SIGHUP", id, svc->pid); + logit(LOG_CONSOLE | LOG_NOTICE, "Reloading %s[%d], sending SIGHUP ...", id, svc->pid); rc = kill(svc->pid, SIGHUP); if (rc == -1 && (errno == ESRCH || errno == ENOENT)) { /* nobody home, reset internal state machine */ From c06fd734696ea37c9e69ed7e0fa4bb52ab01673c Mon Sep 17 00:00:00 2001 From: Joachim Wiberg Date: Fri, 13 Feb 2026 07:41:36 +0100 Subject: [PATCH 6/6] Update ChangeLog Signed-off-by: Joachim Wiberg --- doc/ChangeLog.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/ChangeLog.md b/doc/ChangeLog.md index 4057ff1f..0bd0d143 100644 --- a/doc/ChangeLog.md +++ b/doc/ChangeLog.md @@ -18,6 +18,11 @@ All relevant changes are documented in this file. systemd `RemainAfterExit=yes`, by Aaron Andersen - Clear service conditions on `initctl reload NAME` to ensure dependent services are properly updated +- Add `~` condition prefix to propagate reload from a dependency to the + dependent service. E.g., `` means not only a regular + condition, but when `netd` reloads, restart this service too. Similar + to systemd's directive `PropagatesReloadTo=`, but declared on the + consumer side. Issue #416 ### Fixes - Fix #464: invalid user:group examples in cgroups.md @@ -25,6 +30,14 @@ All relevant changes are documented in this file. - Fix #467: TTY services stuck in restart state after non-zero exit. Throttling logic introduced in v4.15 had duplicate checks causing infinite timer loop, and TTYs lacked default restart timeout +- Fix #475: clear pid condition on service collection to fix stale + deps. When a service crashes (SIGKILL), the RUNNING → HALTED path + bypasses STOPPING where `cond_clear()` is normally called, leaving + dependents stuck +- Fix #476: dependents not restarted after SIGHUP reload of service in + dependency chain. Add `service_step_all()` at end of reload cycle to + guarantee convergence after conditions are reasserted. See also the + new `~` condition prefix (above) to propagate reload to dependents - Fix handling of already-mounted cgroups in `cgroup_init()`, can occur after switch_root or in container environments - Improve cgroups documentation clarity, grammar, and examples