From 986a664109ead1f48b1ea542f175ae2ee68b3b51 Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Tue, 7 Apr 2026 23:51:52 +0200 Subject: [PATCH] Cleanly shutdown the GC messages task `start_gc_msgs_task()` previously launched a lingering task that could cause hangs when Julia is started with multiple threads. --- src/cluster.jl | 17 ++++++++++++++++- src/remotecall.jl | 6 ++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/cluster.jl b/src/cluster.jl index 848b908..1bfab27 100644 --- a/src/cluster.jl +++ b/src/cluster.jl @@ -1337,12 +1337,26 @@ end using Random: randstring +# Exit handler state +const shutting_down = Threads.Atomic{Bool}(false) + +function atexit_handler() + if inited[] + terminate_all_workers() + end + + shutting_down[] = true + @lock any_gc_flag notify(any_gc_flag) + if !isnothing(gc_msgs_task) + wait(gc_msgs_task::Task) + end +end + # do initialization that's only needed when there is more than 1 processor const inited = Threads.Atomic{Bool}(false) function init_multi() if !Threads.atomic_cas!(inited, false, true) push!(Base.package_callbacks, _require_callback) - atexit(terminate_all_workers) init_bind_addr() cluster_cookie(randstring(HDR_COOKIE_LEN)) end @@ -1350,6 +1364,7 @@ function init_multi() end function init_parallel() + atexit(atexit_handler) start_gc_msgs_task() # start in "head node" mode, if worker, will override later. diff --git a/src/remotecall.jl b/src/remotecall.jl index 19ec2f7..e4dea65 100644 --- a/src/remotecall.jl +++ b/src/remotecall.jl @@ -272,10 +272,12 @@ end # XXX: Is this worth the additional complexity? # `flush_gc_msgs` has to iterate over all connected workers. const any_gc_flag = Threads.Condition() +gc_msgs_task::Union{Task, Nothing} = nothing + function start_gc_msgs_task() - errormonitor( + global gc_msgs_task = errormonitor( @async begin - while true + while !shutting_down[] lock(any_gc_flag) do # this might miss events wait(any_gc_flag)