Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions app/jobs/concerns/maintenance_tasks/task_job_concern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,21 @@ def before_perform
end

def on_start
count = @task.count
@run.start(safe_count)
end

def safe_count
count = CountTimeout.with_timeout(MaintenanceTasks.count_timeout_ms) { @task.count }
count = @collection_enum.size if count == NO_COUNT_DEFINED
@run.start(count)
count
rescue ActiveRecord::QueryCanceled => error
Rails.error.report(
error,
handled: true,
severity: :warning,
context: { task: @task.class.name, timeout_ms: MaintenanceTasks.count_timeout_ms },
)
nil
end

def on_shutdown
Expand Down
18 changes: 18 additions & 0 deletions lib/maintenance_tasks.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

require "job-iteration"
require "maintenance_tasks/engine"
require "maintenance_tasks/count_timeout"

# The engine's namespace module. It provides isolation between the host
# application's code and the engine-specific code. Top-level engine constants
Expand Down Expand Up @@ -150,6 +151,23 @@ module MaintenanceTasks
NO_COUNT_DEFINED.freeze
private_constant :NO_COUNT_DEFINED

# @!attribute count_timeout_ms
# @scope class
# Server-side statement timeout, in milliseconds, applied while computing
# a Task's count for progress tracking. The count query populates the
# progress bar and is not required for task execution, so a slow count
# must never block or fail a run.
#
# Implemented via the database's native cancellation mechanism
# (`statement_timeout` on PostgreSQL, `max_execution_time` on MySQL /
# Trilogy). Adapters without a native equivalent (e.g. SQLite) ignore the
# setting. See {CountTimeout}.
#
# Set to `nil` or `0` to disable.
#
# @return [Integer, nil] the timeout in milliseconds.
mattr_accessor :count_timeout_ms, default: 5_000

class << self
DEPRECATION_MESSAGE = "MaintenanceTasks.error_handler is deprecated and will be removed in the 3.0 release. " \
"Instead, reports will be sent to the Rails error reporter. Do not set a handler and subscribe " \
Expand Down
80 changes: 80 additions & 0 deletions lib/maintenance_tasks/count_timeout.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# frozen_string_literal: true

module MaintenanceTasks
# Wraps a block in a database-level statement timeout so a slow query is
# cancelled by the server rather than left running indefinitely.
#
# Used by {TaskJobConcern} to bound the {Task#count} query that populates
# the progress bar — a count failure must not stall or fail a task run.
#
# The mechanism is adapter-specific:
#
# * **PostgreSQL** — `SET LOCAL statement_timeout` inside
# `transaction(requires_new: true)`; automatically reset on commit or
# rollback. When invoked inside an existing transaction (e.g. test
# fixtures), AR's `requires_new` opens a savepoint rather than a real
# transaction; the timeout is still enforced for the duration of the
# block, but the `SET LOCAL` setting is only cleared when the outer
# transaction ends. {TaskJobConcern#on_start} is not wrapped in a
# transaction in production, so this is invisible there.
# * **MySQL / Trilogy** — `SET SESSION max_execution_time`, with the prior
# value restored in an `ensure` block.
# * **SQLite / unknown adapters** — no-op; the block runs without a timeout.
#
# On timeout the database raises {ActiveRecord::QueryCanceled} (or its
# subclass {ActiveRecord::StatementTimeout}). Callers are expected to
# rescue and treat the result as unknown.
#
# @example
# MaintenanceTasks::CountTimeout.with_timeout(5_000) do
# User.where(active: true).count
# end
module CountTimeout
extend self

# Run +block+ with a server-side statement timeout of +timeout_ms+
# milliseconds applied to +connection+.
#
# When +timeout_ms+ is +nil+, +0+, or negative the block runs unmodified —
# callers can pass the configured value directly without a guard.
#
# @param timeout_ms [Integer, nil] the timeout in milliseconds.
# @param connection [ActiveRecord::ConnectionAdapters::AbstractAdapter]
# the connection to apply the timeout to. Defaults to
# `ActiveRecord::Base.connection`.
# @yield the block to execute under the timeout.
# @return the value returned by the block.
# @raise [ActiveRecord::QueryCanceled] if the database cancels a query.
def with_timeout(timeout_ms, connection: ActiveRecord::Base.connection, &block)
return yield if timeout_ms.nil? || timeout_ms <= 0

case connection.adapter_name
when "PostgreSQL", "PostGIS"
with_postgresql_timeout(connection, timeout_ms.to_i, &block)
when "Mysql2", "Trilogy"
with_mysql_timeout(connection, timeout_ms.to_i, &block)
else
yield
end
end

private

def with_postgresql_timeout(connection, timeout_ms, &block)
connection.transaction(requires_new: true) do
connection.execute("SET LOCAL statement_timeout = #{timeout_ms}")
block.call
end
end

def with_mysql_timeout(connection, timeout_ms)
previous = connection.select_value("SELECT @@SESSION.max_execution_time").to_i
connection.execute("SET SESSION max_execution_time = #{timeout_ms}")
begin
yield
ensure
connection.execute("SET SESSION max_execution_time = #{previous}")
end
end
end
end
26 changes: 26 additions & 0 deletions test/jobs/maintenance_tasks/task_job_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,32 @@ class << self
assert_equal 2, @run.reload.tick_total
end

test "a count query that exceeds the timeout is rescued and the run starts with no tick_total" do
Maintenance::TestTask.any_instance.stubs(:count).raises(
ActiveRecord::QueryCanceled.new("canceling statement due to statement timeout"),
)
Rails.error.expects(:report).with do |error, **kwargs|
error.is_a?(ActiveRecord::QueryCanceled) &&
kwargs[:handled] == true &&
kwargs[:severity] == :warning &&
kwargs[:context][:task] == "Maintenance::TestTask"
end

TaskJob.perform_now(@run)

assert_nil @run.reload.tick_total
end

test "@task.count is wrapped in CountTimeout with the configured timeout" do
original = MaintenanceTasks.count_timeout_ms
MaintenanceTasks.count_timeout_ms = 1234
CountTimeout.expects(:with_timeout).with(1234).yields.returns(MaintenanceTasks.const_get(:NO_COUNT_DEFINED))

TaskJob.perform_now(@run)
ensure
MaintenanceTasks.count_timeout_ms = original
end

test ".perform_now accepts custom enumerated tasks" do
run = Run.create!(task_name: "Maintenance::CustomEnumeratingTask")

Expand Down
178 changes: 178 additions & 0 deletions test/lib/maintenance_tasks/count_timeout_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# frozen_string_literal: true

require "test_helper"

module MaintenanceTasks
class CountTimeoutTest < ActiveSupport::TestCase
test ".with_timeout yields without modifying the connection when timeout_ms is nil" do
connection = mock("connection")
connection.expects(:adapter_name).never

result = CountTimeout.with_timeout(nil, connection: connection) { :ok }

assert_equal :ok, result
end

test ".with_timeout yields without modifying the connection when timeout_ms is zero" do
connection = mock("connection")
connection.expects(:adapter_name).never

assert_equal :ok, CountTimeout.with_timeout(0, connection: connection) { :ok }
end

test ".with_timeout yields without modifying the connection when timeout_ms is negative" do
connection = mock("connection")
connection.expects(:adapter_name).never

assert_equal :ok, CountTimeout.with_timeout(-1, connection: connection) { :ok }
end

test ".with_timeout is a no-op on SQLite" do
connection = mock("connection")
connection.stubs(adapter_name: "SQLite")
connection.expects(:execute).never
connection.expects(:transaction).never

assert_equal :ok, CountTimeout.with_timeout(5_000, connection: connection) { :ok }
end

test ".with_timeout is a no-op for unknown adapters" do
connection = mock("connection")
connection.stubs(adapter_name: "SomeFutureAdapter")
connection.expects(:execute).never
connection.expects(:transaction).never

assert_equal :ok, CountTimeout.with_timeout(5_000, connection: connection) { :ok }
end

test ".with_timeout sets statement_timeout inside a transaction on PostgreSQL" do
connection = mock("connection")
connection.stubs(adapter_name: "PostgreSQL")

transaction_seen = false
connection.expects(:transaction).with(requires_new: true).yields.returns(:txn_result)
connection.expects(:execute).with("SET LOCAL statement_timeout = 5000")

result = CountTimeout.with_timeout(5_000, connection: connection) do
transaction_seen = true
:inner
end

assert transaction_seen, "block should have been invoked"
assert_equal :txn_result, result
end

test ".with_timeout works on PostGIS" do
connection = mock("connection")
connection.stubs(adapter_name: "PostGIS")
connection.expects(:transaction).with(requires_new: true).yields.returns(:done)
connection.expects(:execute).with("SET LOCAL statement_timeout = 1234")

assert_equal :done, CountTimeout.with_timeout(1_234, connection: connection) { :ignored }
end

test ".with_timeout sets and restores max_execution_time on Mysql2" do
connection = mock("connection")
connection.stubs(adapter_name: "Mysql2")

seq = sequence("mysql")
connection.expects(:select_value).with("SELECT @@SESSION.max_execution_time").returns("0").in_sequence(seq)
connection.expects(:execute).with("SET SESSION max_execution_time = 5000").in_sequence(seq)
connection.expects(:execute).with("SET SESSION max_execution_time = 0").in_sequence(seq)

assert_equal :ok, CountTimeout.with_timeout(5_000, connection: connection) { :ok }
end

test ".with_timeout works on Trilogy" do
connection = mock("connection")
connection.stubs(adapter_name: "Trilogy")

seq = sequence("trilogy")
connection.expects(:select_value).returns("250").in_sequence(seq)
connection.expects(:execute).with("SET SESSION max_execution_time = 5000").in_sequence(seq)
connection.expects(:execute).with("SET SESSION max_execution_time = 250").in_sequence(seq)

CountTimeout.with_timeout(5_000, connection: connection) { :ok }
end

test ".with_timeout restores max_execution_time on MySQL even if the block raises" do
connection = mock("connection")
connection.stubs(adapter_name: "Mysql2")

seq = sequence("mysql")
connection.expects(:select_value).returns("100").in_sequence(seq)
connection.expects(:execute).with("SET SESSION max_execution_time = 5000").in_sequence(seq)
connection.expects(:execute).with("SET SESSION max_execution_time = 100").in_sequence(seq)

assert_raises(RuntimeError) do
CountTimeout.with_timeout(5_000, connection: connection) { raise "boom" }
end
end

test ".with_timeout lets ActiveRecord::QueryCanceled propagate" do
connection = mock("connection")
connection.stubs(adapter_name: "Mysql2")
connection.stubs(:select_value).returns("0")
connection.stubs(:execute)

assert_raises(ActiveRecord::QueryCanceled) do
CountTimeout.with_timeout(5_000, connection: connection) do
raise ActiveRecord::QueryCanceled, "timeout"
end
end
end

class PostgreSQLIntegrationTest < ActiveSupport::TestCase
# `SET LOCAL` is transaction-scoped. With transactional fixtures the test
# already runs inside an outer transaction, so the helper's
# `transaction(requires_new: true)` opens a savepoint and the setting
# leaks past the helper's block. Disable fixture wrapping so we exercise
# the same code path the production job runs in.
self.use_transactional_tests = false

setup do
skip "requires PostgreSQL" unless ["PostgreSQL", "PostGIS"].include?(
ActiveRecord::Base.connection.adapter_name,
)
end

test "a query exceeding the timeout raises ActiveRecord::QueryCanceled" do
assert_raises(ActiveRecord::QueryCanceled) do
CountTimeout.with_timeout(50) do
ActiveRecord::Base.connection.select_value("SELECT pg_sleep(2)")
end
end
end

test "a fast query completes normally under the timeout" do
result = CountTimeout.with_timeout(5_000) do
ActiveRecord::Base.connection.select_value("SELECT 1")
end

assert_equal 1, result
end

test "statement_timeout is reset after the block returns" do
CountTimeout.with_timeout(50) do
ActiveRecord::Base.connection.select_value("SELECT 1")
end

timeout = ActiveRecord::Base.connection.select_value("SHOW statement_timeout")

assert_equal "0", timeout
end

test "statement_timeout is reset after the block raises" do
assert_raises(ActiveRecord::QueryCanceled) do
CountTimeout.with_timeout(50) do
ActiveRecord::Base.connection.select_value("SELECT pg_sleep(2)")
end
end

timeout = ActiveRecord::Base.connection.select_value("SHOW statement_timeout")

assert_equal "0", timeout
end
end
end
end
Loading