diff --git a/src/daemon.zig b/src/daemon.zig index 6693e61..6f0d22e 100644 --- a/src/daemon.zig +++ b/src/daemon.zig @@ -119,14 +119,23 @@ pub const Daemon = struct { self.alloc.destroy(c); } self.conns.deinit(self.alloc); - posix.close(self.opts.listen_fd); - std.fs.cwd().deleteFile(self.opts.socket_path) catch {}; + self.retireListener(); if (self.owned_name) |n| self.alloc.free(n); if (self.owned_socket_path) |p| self.alloc.free(p); if (self.sig_read >= 0) posix.close(self.sig_read); if (sigchld_pipe >= 0) posix.close(sigchld_pipe); } + /// Close the listening socket and remove its file so new clients + /// resolve "no session" instead of connecting to a dying daemon + /// and reading EOF. + fn retireListener(self: *Daemon) void { + if (self.opts.listen_fd < 0) return; + posix.close(self.opts.listen_fd); + self.opts.listen_fd = -1; + std.fs.cwd().deleteFile(self.opts.socket_path) catch {}; + } + fn loop(self: *Daemon) !void { var fds: std.ArrayList(posix.pollfd) = .empty; defer fds.deinit(self.alloc); @@ -397,6 +406,11 @@ pub const Daemon = struct { } self.rename(conn, argv[1]); } else if (std.mem.eql(u8, cmd, "quit")) { + // Retire the listener before acking: by the time the kill + // client sees the reply, the socket file is gone, so a + // follow-up command resolves "no session" instead of + // connecting to the dying daemon and reading EOF. + self.retireListener(); conn.send(.ok, ""); if (self.win) |w| { posix.kill(w.child_pid, posix.SIG.HUP) catch {}; diff --git a/src/main.zig b/src/main.zig index 3b68a4c..9ce5055 100644 --- a/src/main.zig +++ b/src/main.zig @@ -190,8 +190,10 @@ pub fn sessionInfo(alloc: std.mem.Allocator, dir: []const u8, name: []const u8) }; } -/// Run a control command against a session, mapping a missing daemon -/// to the documented exit code. +/// Run a control command against a session, mapping a missing or +/// mid-teardown daemon to the documented exit code. An EOF on the +/// control connection means the daemon died before replying, so it is +/// reported the same as a daemon that is already gone. fn mustControl( alloc: std.mem.Allocator, dir: []const u8, @@ -201,7 +203,7 @@ fn mustControl( const sock = try paths.socketPath(alloc, dir, name); defer alloc.free(sock); return client.control(alloc, sock, argv) catch |err| switch (err) { - error.FileNotFound, error.ConnectionRefused => fail( + error.FileNotFound, error.ConnectionRefused, error.ConnectionLost => fail( exit_no_session, "no session named {s}", .{name}, diff --git a/test/integration.zig b/test/integration.zig index ddbd0cb..c7336ef 100644 --- a/test/integration.zig +++ b/test/integration.zig @@ -1193,6 +1193,25 @@ test "agent loop: new, send, wait, peek, kill" { try h.runExit(&.{ "peek", "agent" }, 3); } +test "kill: peek immediately after kill reports no session" { + const alloc = std.testing.allocator; + var h = try Harness.init(alloc); + defer h.deinit(); + + // Once kill is acked the socket file is already unlinked, so a + // back-to-back peek must deterministically resolve "no session" + // (exit 3) and never observe EOF from the dying daemon. Repeat to + // amplify the former race between the kill ack and teardown. + var i: usize = 0; + var name_buf: [16]u8 = undefined; + while (i < 10) : (i += 1) { + const name = try std.fmt.bufPrint(&name_buf, "reap{d}", .{i}); + try h.startDetached(name, &.{"sh"}); + try h.runOk(&.{ "kill", name }); + try h.runExit(&.{ "peek", name }, 3); + } +} + test "rename: moves a session to a new name" { const alloc = std.testing.allocator; var h = try Harness.init(alloc);