@@ -9,14 +9,34 @@ local timeout = tonumber(ARGV[2])
99local use_dynamic_deadline = ARGV [3 ] == " true"
1010local default_timeout = tonumber (ARGV [4 ]) or 0
1111
12- local lost_tests
13- if use_dynamic_deadline then
14- lost_tests = redis .call (' zrangebyscore' , zset_key , 0 , current_time )
15- else
16- lost_tests = redis .call (' zrangebyscore' , zset_key , 0 , current_time - timeout )
12+ -- Helper: returns heartbeat age in seconds, or nil if no valid heartbeat
13+ local function get_heartbeat_age (test )
14+ local owner_value = redis .call (' hget' , owners_key , test )
15+ if not owner_value then return nil end
16+ local pipe_pos = string.find (owner_value , " |" )
17+ if not pipe_pos then return nil end
18+ local last_heartbeat = tonumber (string.sub (owner_value , pipe_pos + 1 ))
19+ if not last_heartbeat then return nil end
20+ return current_time - last_heartbeat
1721end
1822
19- for _ , test in ipairs (lost_tests ) do
23+ -- Collect tests that can be stolen:
24+ -- 1. Expired deadline AND old heartbeat (> 2 minutes)
25+ -- 2. Non-expired deadline AND old heartbeat (> 2 minutes)
26+ local stealable_tests = {}
27+
28+ local all_running_tests = redis .call (' zrange' , zset_key , 0 , - 1 )
29+ for _ , test in ipairs (all_running_tests ) do
30+ if redis .call (' sismember' , processed_key , test ) == 0 then
31+ local heartbeat_age = get_heartbeat_age (test )
32+ -- Only steal if heartbeat is old (> 2 minutes) or missing
33+ if not heartbeat_age or heartbeat_age >= 120 then
34+ table.insert (stealable_tests , test )
35+ end
36+ end
37+ end
38+
39+ for _ , test in ipairs (stealable_tests ) do
2040 if redis .call (' sismember' , processed_key , test ) == 0 then
2141 if use_dynamic_deadline then
2242 local dynamic_timeout = redis .call (' hget' , test_group_timeout_key , test )
@@ -30,7 +50,9 @@ for _, test in ipairs(lost_tests) do
3050 redis .call (' zadd' , zset_key , current_time + timeout , test )
3151 end
3252 redis .call (' lpush' , worker_queue_key , test )
33- redis .call (' hset' , owners_key , test , worker_queue_key ) -- Take ownership
53+ -- Store owner with initial heartbeat timestamp (current_time)
54+ local new_owner_value = worker_queue_key .. " |" .. current_time
55+ redis .call (' hset' , owners_key , test , new_owner_value ) -- Take ownership
3456 return test
3557 end
3658end
0 commit comments