Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 58 additions & 9 deletions app/models/concerns/heartbeatable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,22 +110,32 @@ def daily_streaks_for_users(user_ids, start_date: 31.days.ago)
end

timeout = heartbeat_timeout_duration.to_i

# Optimized query: partition by user_id only, then group by day in application code
# This allows PostgreSQL to use the (user_id, time) index efficiently
raw_durations = joins(:user)
.where(user_id: uncached_users)
.coding_only
.with_valid_timestamps
.where(time: start_date..Time.current)
.select(
:user_id,
:time,
"users.timezone as user_timezone",
Arel.sql("DATE_TRUNC('day', to_timestamp(time) AT TIME ZONE users.timezone) as day_group"),
Arel.sql("LEAST(time - LAG(time) OVER (PARTITION BY user_id, DATE_TRUNC('day', to_timestamp(time) AT TIME ZONE users.timezone) ORDER BY time), #{timeout}) as diff")
Arel.sql("LAG(time) OVER (PARTITION BY user_id ORDER BY time) as prev_time")
)

# Then aggregate the results
# Calculate diffs and group by day in a single aggregation step
daily_durations = connection.select_all(
"SELECT user_id, user_timezone, day_group, COALESCE(SUM(diff), 0)::integer as duration
FROM (#{raw_durations.to_sql}) AS diffs
"SELECT user_id, user_timezone,
DATE_TRUNC('day', to_timestamp(time) AT TIME ZONE user_timezone) as day_group,
COALESCE(SUM(
CASE
WHEN prev_time IS NULL THEN 0
ELSE LEAST(time - prev_time, #{timeout})
END
), 0)::integer as duration
FROM (#{raw_durations.to_sql}) AS time_diffs
GROUP BY user_id, user_timezone, day_group"
).group_by { |row| row["user_id"] }
.transform_values do |rows|
Expand Down Expand Up @@ -211,6 +221,9 @@ def duration_seconds(scope = all)
scope = scope.with_valid_timestamps
timeout = heartbeat_timeout_duration.to_i

# Check if user_id is in the WHERE clause
user_id_value = extract_user_id_from_scope(scope)

if scope.group_values.any?
if scope.group_values.length > 1
raise NotImplementedError, "Multiple group values are not supported"
Expand All @@ -221,10 +234,18 @@ def duration_seconds(scope = all)
# Don't quote if it's a SQL function (contains parentheses)
group_expr = group_column.to_s.include?("(") ? group_column : connection.quote_column_name(group_column)

# Optimize window function by including user_id in PARTITION BY when available
# This allows PostgreSQL to use composite indexes like (user_id, project, time)
partition_clause = if user_id_value
"PARTITION BY user_id, #{group_expr}"
else
"PARTITION BY #{group_expr}"
end

capped_diffs = scope
.select("#{group_expr} as grouped_time, CASE
WHEN LAG(time) OVER (PARTITION BY #{group_expr} ORDER BY time) IS NULL THEN 0
ELSE LEAST(time - LAG(time) OVER (PARTITION BY #{group_expr} ORDER BY time), #{timeout})
WHEN LAG(time) OVER (#{partition_clause} ORDER BY time) IS NULL THEN 0
ELSE LEAST(time - LAG(time) OVER (#{partition_clause} ORDER BY time), #{timeout})
END as diff")
.where.not(time: nil)
.unscope(:group)
Expand All @@ -238,17 +259,45 @@ def duration_seconds(scope = all)
end
else
# when not grouped, return a single value
# For ungrouped queries, add user_id to PARTITION BY if available
partition_clause = if user_id_value
"PARTITION BY user_id"
else
""
end

order_clause = partition_clause.present? ? "#{partition_clause} ORDER BY time" : "ORDER BY time"

capped_diffs = scope
.select("CASE
WHEN LAG(time) OVER (ORDER BY time) IS NULL THEN 0
ELSE LEAST(time - LAG(time) OVER (ORDER BY time), #{timeout})
WHEN LAG(time) OVER (#{order_clause}) IS NULL THEN 0
ELSE LEAST(time - LAG(time) OVER (#{order_clause}), #{timeout})
END as diff")
.where.not(time: nil)

connection.select_value("SELECT COALESCE(SUM(diff), 0)::integer FROM (#{capped_diffs.to_sql}) AS diffs").to_i
end
end

private

def extract_user_id_from_scope(scope)
# Extract user_id from the WHERE clause
# This checks both hash-style and Arel-style where clauses
return scope.where_values_hash["user_id"] if scope.where_values_hash["user_id"].present?

# Check for user_id in Arel where clauses
scope.where_values.each do |where_value|
if where_value.is_a?(Arel::Nodes::Equality) && where_value.left.name.to_s == "user_id"
return where_value.right
end
end

nil
rescue
nil
end

def bulk_duration_stats(week_ranges: [])
timeout = heartbeat_timeout_duration.to_i
scope = with_valid_timestamps.where.not(time: nil)
Expand Down
14 changes: 14 additions & 0 deletions db/migrate/20260210000000_optimize_window_function_indexes.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class OptimizeWindowFunctionIndexes < ActiveRecord::Migration[8.1]
def change
# Add index optimized for PARTITION BY language ORDER BY time window functions
# This supports queries grouping by language that need efficient time ordering within each language
add_index :heartbeats, [ :user_id, :language, :time ],
name: "idx_heartbeats_user_language_time_window",
where: "deleted_at IS NULL AND language IS NOT NULL",
if_not_exists: true

# Add index optimized for PARTITION BY project ORDER BY time window functions
# Note: We already have idx_heartbeats_user_project_time_stats which is (user_id, project, time)
# This is already optimal for PARTITION BY project ORDER BY time when filtered by user_id
end
end

Check failure on line 14 in db/migrate/20260210000000_optimize_window_function_indexes.rb

View workflow job for this annotation

GitHub Actions / lint

Layout/TrailingEmptyLines: Final newline missing.
Loading