From d973e92b527a80b379095f2c46a3fdc29c5a1d2b Mon Sep 17 00:00:00 2001 From: Mahad Kalam Date: Mon, 2 Feb 2026 20:18:05 +0000 Subject: [PATCH 01/10] Field LUTs + sync job + remove raw_data --- app/jobs/backfill_heartbeat_dimensions_job.rb | 130 +++++++++++ app/jobs/migrate_user_from_hackatime_job.rb | 6 +- .../concerns/heartbeat_dimension_resolver.rb | 113 ++++++++++ app/models/heartbeat.rb | 3 +- app/models/heartbeats/branch.rb | 16 ++ app/models/heartbeats/category.rb | 14 ++ app/models/heartbeats/editor.rb | 14 ++ app/models/heartbeats/language.rb | 14 ++ app/models/heartbeats/machine.rb | 16 ++ app/models/heartbeats/operating_system.rb | 14 ++ app/models/heartbeats/project.rb | 16 ++ app/models/heartbeats/user_agent.rb | 14 ++ app/services/heartbeat_import_service.rb | 210 +++++++++--------- ...02194401_create_heartbeat_lookup_tables.rb | 56 +++++ ...6_add_lookup_foreign_keys_to_heartbeats.rb | 33 +++ ...d_foreign_key_constraints_to_heartbeats.rb | 12 + db/schema.rb | 95 +++++++- docs/heartbeat_normalization_migration.md | 116 ++++++++++ lib/tasks/backfill_heartbeat_dimensions.rake | 46 ++++ 19 files changed, 826 insertions(+), 112 deletions(-) create mode 100644 app/jobs/backfill_heartbeat_dimensions_job.rb create mode 100644 app/models/concerns/heartbeat_dimension_resolver.rb create mode 100644 app/models/heartbeats/branch.rb create mode 100644 app/models/heartbeats/category.rb create mode 100644 app/models/heartbeats/editor.rb create mode 100644 app/models/heartbeats/language.rb create mode 100644 app/models/heartbeats/machine.rb create mode 100644 app/models/heartbeats/operating_system.rb create mode 100644 app/models/heartbeats/project.rb create mode 100644 app/models/heartbeats/user_agent.rb create mode 100644 db/migrate/20260202194401_create_heartbeat_lookup_tables.rb create mode 100644 db/migrate/20260202194406_add_lookup_foreign_keys_to_heartbeats.rb create mode 100644 db/migrate/20260202194410_add_foreign_key_constraints_to_heartbeats.rb create mode 100644 docs/heartbeat_normalization_migration.md create mode 100644 lib/tasks/backfill_heartbeat_dimensions.rake diff --git a/app/jobs/backfill_heartbeat_dimensions_job.rb b/app/jobs/backfill_heartbeat_dimensions_job.rb new file mode 100644 index 000000000..0228e5ac3 --- /dev/null +++ b/app/jobs/backfill_heartbeat_dimensions_job.rb @@ -0,0 +1,130 @@ +class BackfillHeartbeatDimensionsJob < ApplicationJob + queue_as :latency_5m + + include GoodJob::ActiveJobExtensions::Concurrency + + good_job_control_concurrency_with( + key: -> { "backfill_heartbeat_dimensions_#{arguments.first}" }, + total_limit: 1 + ) + + BATCH_SIZE = 5_000 + DIMENSIONS = %w[language category editor operating_system user_agent project branch machine].freeze + + def perform(dimension, start_id: 0, end_id: nil) + unless DIMENSIONS.include?(dimension) + Rails.logger.error("Invalid dimension: #{dimension}") + return + end + + end_id ||= Heartbeat.with_deleted.maximum(:id) || 0 + + current_id = start_id + processed = 0 + + while current_id <= end_id + batch_end = current_id + BATCH_SIZE + + case dimension + when "language" + processed += backfill_global_dimension( + Heartbeats::Language, :language, :language_id, :name, current_id, batch_end + ) + when "category" + processed += backfill_global_dimension( + Heartbeats::Category, :category, :category_id, :name, current_id, batch_end + ) + when "editor" + processed += backfill_global_dimension( + Heartbeats::Editor, :editor, :editor_id, :name, current_id, batch_end + ) + when "operating_system" + processed += backfill_global_dimension( + Heartbeats::OperatingSystem, :operating_system, :operating_system_id, :name, current_id, batch_end + ) + when "user_agent" + processed += backfill_global_dimension( + Heartbeats::UserAgent, :user_agent, :user_agent_id, :value, current_id, batch_end + ) + when "project" + processed += backfill_user_scoped_dimension( + Heartbeats::Project, :project, :project_id, current_id, batch_end + ) + when "branch" + processed += backfill_user_scoped_dimension( + Heartbeats::Branch, :branch, :branch_id, current_id, batch_end + ) + when "machine" + processed += backfill_user_scoped_dimension( + Heartbeats::Machine, :machine, :machine_id, current_id, batch_end + ) + end + + current_id = batch_end + sleep(0.1) + end + + Rails.logger.info("BackfillHeartbeatDimensionsJob: #{dimension} complete, processed #{processed} rows") + end + + private + + def backfill_global_dimension(model, string_column, fk_column, lookup_column, start_id, end_id) + heartbeats = Heartbeat.with_deleted + .where(id: start_id...end_id) + .where(fk_column => nil) + .where.not(string_column => nil) + + values = heartbeats.distinct.pluck(string_column).compact + return 0 if values.empty? + + now = Time.current + rows = values.map { |v| { lookup_column => v, created_at: now, updated_at: now } } + model.upsert_all(rows, unique_by: lookup_column) + + lookup_map = model.where(lookup_column => values).pluck(lookup_column, :id).to_h + + updated = 0 + lookup_map.each do |value, id| + updated += Heartbeat.with_deleted + .where(id: start_id...end_id) + .where(fk_column => nil) + .where(string_column => value) + .update_all(fk_column => id) + end + updated + end + + def backfill_user_scoped_dimension(model, string_column, fk_column, start_id, end_id) + heartbeats = Heartbeat.with_deleted + .where(id: start_id...end_id) + .where(fk_column => nil) + .where.not(string_column => nil) + + user_value_pairs = heartbeats.distinct.pluck(:user_id, string_column).select { |u, v| u && v } + return 0 if user_value_pairs.empty? + + now = Time.current + rows = user_value_pairs.map { |user_id, name| { user_id: user_id, name: name, created_at: now, updated_at: now } } + model.upsert_all(rows, unique_by: [ :user_id, :name ]) + + lookup_map = {} + user_value_pairs.each_slice(500) do |batch| + conditions = batch.map { |uid, name| "(user_id = #{uid} AND name = #{model.connection.quote(name)})" }.join(" OR ") + model.where(conditions).pluck(:user_id, :name, :id).each do |uid, name, id| + lookup_map[[ uid, name ]] = id + end + end + + updated = 0 + lookup_map.each do |(user_id, name), id| + updated += Heartbeat.with_deleted + .where(id: start_id...end_id) + .where(fk_column => nil) + .where(user_id: user_id) + .where(string_column => name) + .update_all(fk_column => id) + end + updated + end +end diff --git a/app/jobs/migrate_user_from_hackatime_job.rb b/app/jobs/migrate_user_from_hackatime_job.rb index e28789906..3dd32de12 100644 --- a/app/jobs/migrate_user_from_hackatime_job.rb +++ b/app/jobs/migrate_user_from_hackatime_job.rb @@ -70,9 +70,9 @@ def import_heartbeats cursorpos: heartbeat.cursor_position, project_root_count: heartbeat.project_root_count, is_write: heartbeat.is_write, - source_type: :wakapi_import, - raw_data: heartbeat.attributes.slice(*Heartbeat.indexed_attributes) + source_type: :wakapi_import } + attrs[:raw_data] = heartbeat.attributes.slice(*Heartbeat.indexed_attributes) unless Flipper.enabled?(:skip_heartbeat_raw_data) { **attrs, @@ -80,10 +80,10 @@ def import_heartbeats } end - # dedupe records by fields_hash records_to_upsert = records_to_upsert.group_by { |r| r[:fields_hash] }.map do |_, records| records.max_by { |r| r[:time] } end + records_to_upsert = Heartbeat.batch_resolve_dimensions(records_to_upsert) Heartbeat.upsert_all(records_to_upsert, unique_by: [ :fields_hash ]) end diff --git a/app/models/concerns/heartbeat_dimension_resolver.rb b/app/models/concerns/heartbeat_dimension_resolver.rb new file mode 100644 index 000000000..0aa9932e3 --- /dev/null +++ b/app/models/concerns/heartbeat_dimension_resolver.rb @@ -0,0 +1,113 @@ +module HeartbeatDimensionResolver + extend ActiveSupport::Concern + + included do + belongs_to :heartbeat_language, class_name: "Heartbeats::Language", foreign_key: :language_id, optional: true + belongs_to :heartbeat_category, class_name: "Heartbeats::Category", foreign_key: :category_id, optional: true + belongs_to :heartbeat_editor, class_name: "Heartbeats::Editor", foreign_key: :editor_id, optional: true + belongs_to :heartbeat_operating_system, class_name: "Heartbeats::OperatingSystem", foreign_key: :operating_system_id, optional: true + belongs_to :heartbeat_user_agent, class_name: "Heartbeats::UserAgent", foreign_key: :user_agent_id, optional: true + belongs_to :heartbeat_project, class_name: "Heartbeats::Project", foreign_key: :project_id, optional: true + belongs_to :heartbeat_branch, class_name: "Heartbeats::Branch", foreign_key: :branch_id, optional: true + belongs_to :heartbeat_machine, class_name: "Heartbeats::Machine", foreign_key: :machine_id, optional: true + + before_save :resolve_dimension_ids, if: :should_resolve_dimensions? + end + + private + + def should_resolve_dimensions? + Flipper.enabled?(:heartbeat_dimension_dual_write) + end + + def resolve_dimension_ids + self.language_id ||= Heartbeats::Language.resolve(language)&.id if language.present? + self.category_id ||= Heartbeats::Category.resolve(category)&.id if category.present? + self.editor_id ||= Heartbeats::Editor.resolve(editor)&.id if editor.present? + self.operating_system_id ||= Heartbeats::OperatingSystem.resolve(operating_system)&.id if operating_system.present? + self.user_agent_id ||= Heartbeats::UserAgent.resolve(user_agent)&.id if user_agent.present? + self.project_id ||= Heartbeats::Project.resolve(user_id, project)&.id if project.present? && user_id.present? + self.branch_id ||= Heartbeats::Branch.resolve(user_id, branch)&.id if branch.present? && user_id.present? + self.machine_id ||= Heartbeats::Machine.resolve(user_id, machine)&.id if machine.present? && user_id.present? + end + + class_methods do + def resolve_dimensions_for_attributes(attrs) + user_id = attrs[:user_id] + + attrs[:language_id] ||= Heartbeats::Language.resolve(attrs[:language])&.id if attrs[:language].present? + attrs[:category_id] ||= Heartbeats::Category.resolve(attrs[:category])&.id if attrs[:category].present? + attrs[:editor_id] ||= Heartbeats::Editor.resolve(attrs[:editor])&.id if attrs[:editor].present? + attrs[:operating_system_id] ||= Heartbeats::OperatingSystem.resolve(attrs[:operating_system])&.id if attrs[:operating_system].present? + attrs[:user_agent_id] ||= Heartbeats::UserAgent.resolve(attrs[:user_agent])&.id if attrs[:user_agent].present? + attrs[:project_id] ||= Heartbeats::Project.resolve(user_id, attrs[:project])&.id if attrs[:project].present? && user_id.present? + attrs[:branch_id] ||= Heartbeats::Branch.resolve(user_id, attrs[:branch])&.id if attrs[:branch].present? && user_id.present? + attrs[:machine_id] ||= Heartbeats::Machine.resolve(user_id, attrs[:machine])&.id if attrs[:machine].present? && user_id.present? + + attrs + end + + def batch_resolve_dimensions(records_attrs) + return records_attrs unless Flipper.enabled?(:heartbeat_dimension_dual_write) + + global_languages = records_attrs.map { |r| r[:language] }.compact.uniq + global_categories = records_attrs.map { |r| r[:category] }.compact.uniq + global_editors = records_attrs.map { |r| r[:editor] }.compact.uniq + global_operating_systems = records_attrs.map { |r| r[:operating_system] }.compact.uniq + global_user_agents = records_attrs.map { |r| r[:user_agent] }.compact.uniq + + language_map = batch_resolve_global(Heartbeats::Language, :name, global_languages) + category_map = batch_resolve_global(Heartbeats::Category, :name, global_categories) + editor_map = batch_resolve_global(Heartbeats::Editor, :name, global_editors) + os_map = batch_resolve_global(Heartbeats::OperatingSystem, :name, global_operating_systems) + ua_map = batch_resolve_global(Heartbeats::UserAgent, :value, global_user_agents) + + user_projects = records_attrs.map { |r| [ r[:user_id], r[:project] ] }.select { |u, p| u && p }.uniq + user_branches = records_attrs.map { |r| [ r[:user_id], r[:branch] ] }.select { |u, b| u && b }.uniq + user_machines = records_attrs.map { |r| [ r[:user_id], r[:machine] ] }.select { |u, m| u && m }.uniq + + project_map = batch_resolve_user_scoped(Heartbeats::Project, user_projects) + branch_map = batch_resolve_user_scoped(Heartbeats::Branch, user_branches) + machine_map = batch_resolve_user_scoped(Heartbeats::Machine, user_machines) + + records_attrs.map do |attrs| + attrs = attrs.dup + attrs[:language_id] ||= language_map[attrs[:language]] if attrs[:language] + attrs[:category_id] ||= category_map[attrs[:category]] if attrs[:category] + attrs[:editor_id] ||= editor_map[attrs[:editor]] if attrs[:editor] + attrs[:operating_system_id] ||= os_map[attrs[:operating_system]] if attrs[:operating_system] + attrs[:user_agent_id] ||= ua_map[attrs[:user_agent]] if attrs[:user_agent] + attrs[:project_id] ||= project_map[[ attrs[:user_id], attrs[:project] ]] if attrs[:project] && attrs[:user_id] + attrs[:branch_id] ||= branch_map[[ attrs[:user_id], attrs[:branch] ]] if attrs[:branch] && attrs[:user_id] + attrs[:machine_id] ||= machine_map[[ attrs[:user_id], attrs[:machine] ]] if attrs[:machine] && attrs[:user_id] + attrs + end + end + + private + + def batch_resolve_global(model, column, values) + return {} if values.empty? + + now = Time.current + rows = values.map { |v| { column => v, created_at: now, updated_at: now } } + model.upsert_all(rows, unique_by: column, returning: [ :id, column ]) + + model.where(column => values).pluck(column, :id).to_h + end + + def batch_resolve_user_scoped(model, user_value_pairs) + return {} if user_value_pairs.empty? + + now = Time.current + rows = user_value_pairs.map { |user_id, name| { user_id: user_id, name: name, created_at: now, updated_at: now } } + model.upsert_all(rows, unique_by: [ :user_id, :name ], returning: [ :id, :user_id, :name ]) + + model.where( + user_value_pairs.map { |uid, name| "(user_id = #{uid} AND name = #{model.connection.quote(name)})" }.join(" OR ") + ).pluck(:user_id, :name, :id).each_with_object({}) do |(uid, name, id), h| + h[[ uid, name ]] = id + end + end + end +end diff --git a/app/models/heartbeat.rb b/app/models/heartbeat.rb index fa7ba62af..50ec23185 100644 --- a/app/models/heartbeat.rb +++ b/app/models/heartbeat.rb @@ -1,10 +1,11 @@ class Heartbeat < ApplicationRecord before_save :set_fields_hash! - before_save :set_raw_data! + before_save :set_raw_data!, unless: -> { Flipper.enabled?(:skip_heartbeat_raw_data) } include Heartbeatable include TimeRangeFilterable include PublicActivity::Common + include HeartbeatDimensionResolver time_range_filterable_field :time diff --git a/app/models/heartbeats/branch.rb b/app/models/heartbeats/branch.rb new file mode 100644 index 000000000..783007f1c --- /dev/null +++ b/app/models/heartbeats/branch.rb @@ -0,0 +1,16 @@ +class Heartbeats::Branch < ApplicationRecord + self.table_name = "heartbeat_branches" + + belongs_to :user + has_many :heartbeats, foreign_key: :branch_id, inverse_of: :heartbeat_branch + + validates :name, presence: true + validates :user_id, uniqueness: { scope: :name } + + def self.resolve(user_id, name) + return nil if name.blank? || user_id.blank? + find_or_create_by(user_id: user_id, name: name) + rescue ActiveRecord::RecordNotUnique + find_by(user_id: user_id, name: name) + end +end diff --git a/app/models/heartbeats/category.rb b/app/models/heartbeats/category.rb new file mode 100644 index 000000000..184d976f9 --- /dev/null +++ b/app/models/heartbeats/category.rb @@ -0,0 +1,14 @@ +class Heartbeats::Category < ApplicationRecord + self.table_name = "heartbeat_categories" + + has_many :heartbeats, foreign_key: :category_id, inverse_of: :heartbeat_category + + validates :name, presence: true, uniqueness: true + + def self.resolve(name) + return nil if name.blank? + find_or_create_by(name: name) + rescue ActiveRecord::RecordNotUnique + find_by(name: name) + end +end diff --git a/app/models/heartbeats/editor.rb b/app/models/heartbeats/editor.rb new file mode 100644 index 000000000..acd16f308 --- /dev/null +++ b/app/models/heartbeats/editor.rb @@ -0,0 +1,14 @@ +class Heartbeats::Editor < ApplicationRecord + self.table_name = "heartbeat_editors" + + has_many :heartbeats, foreign_key: :editor_id, inverse_of: :heartbeat_editor + + validates :name, presence: true, uniqueness: true + + def self.resolve(name) + return nil if name.blank? + find_or_create_by(name: name) + rescue ActiveRecord::RecordNotUnique + find_by(name: name) + end +end diff --git a/app/models/heartbeats/language.rb b/app/models/heartbeats/language.rb new file mode 100644 index 000000000..e7705d26e --- /dev/null +++ b/app/models/heartbeats/language.rb @@ -0,0 +1,14 @@ +class Heartbeats::Language < ApplicationRecord + self.table_name = "heartbeat_languages" + + has_many :heartbeats, foreign_key: :language_id, inverse_of: :heartbeat_language + + validates :name, presence: true, uniqueness: true + + def self.resolve(name) + return nil if name.blank? + find_or_create_by(name: name) + rescue ActiveRecord::RecordNotUnique + find_by(name: name) + end +end diff --git a/app/models/heartbeats/machine.rb b/app/models/heartbeats/machine.rb new file mode 100644 index 000000000..105bce87a --- /dev/null +++ b/app/models/heartbeats/machine.rb @@ -0,0 +1,16 @@ +class Heartbeats::Machine < ApplicationRecord + self.table_name = "heartbeat_machines" + + belongs_to :user + has_many :heartbeats, foreign_key: :machine_id, inverse_of: :heartbeat_machine + + validates :name, presence: true + validates :user_id, uniqueness: { scope: :name } + + def self.resolve(user_id, name) + return nil if name.blank? || user_id.blank? + find_or_create_by(user_id: user_id, name: name) + rescue ActiveRecord::RecordNotUnique + find_by(user_id: user_id, name: name) + end +end diff --git a/app/models/heartbeats/operating_system.rb b/app/models/heartbeats/operating_system.rb new file mode 100644 index 000000000..f87241e37 --- /dev/null +++ b/app/models/heartbeats/operating_system.rb @@ -0,0 +1,14 @@ +class Heartbeats::OperatingSystem < ApplicationRecord + self.table_name = "heartbeat_operating_systems" + + has_many :heartbeats, foreign_key: :operating_system_id, inverse_of: :heartbeat_operating_system + + validates :name, presence: true, uniqueness: true + + def self.resolve(name) + return nil if name.blank? + find_or_create_by(name: name) + rescue ActiveRecord::RecordNotUnique + find_by(name: name) + end +end diff --git a/app/models/heartbeats/project.rb b/app/models/heartbeats/project.rb new file mode 100644 index 000000000..07313210f --- /dev/null +++ b/app/models/heartbeats/project.rb @@ -0,0 +1,16 @@ +class Heartbeats::Project < ApplicationRecord + self.table_name = "heartbeat_projects" + + belongs_to :user + has_many :heartbeats, foreign_key: :project_id, inverse_of: :heartbeat_project + + validates :name, presence: true + validates :user_id, uniqueness: { scope: :name } + + def self.resolve(user_id, name) + return nil if name.blank? || user_id.blank? + find_or_create_by(user_id: user_id, name: name) + rescue ActiveRecord::RecordNotUnique + find_by(user_id: user_id, name: name) + end +end diff --git a/app/models/heartbeats/user_agent.rb b/app/models/heartbeats/user_agent.rb new file mode 100644 index 000000000..ef78de1a2 --- /dev/null +++ b/app/models/heartbeats/user_agent.rb @@ -0,0 +1,14 @@ +class Heartbeats::UserAgent < ApplicationRecord + self.table_name = "heartbeat_user_agents" + + has_many :heartbeats, foreign_key: :user_agent_id, inverse_of: :heartbeat_user_agent + + validates :value, presence: true, uniqueness: true + + def self.resolve(value) + return nil if value.blank? + find_or_create_by(value: value) + rescue ActiveRecord::RecordNotUnique + find_by(value: value) + end +end diff --git a/app/services/heartbeat_import_service.rb b/app/services/heartbeat_import_service.rb index f3d8e9211..642d83687 100644 --- a/app/services/heartbeat_import_service.rb +++ b/app/services/heartbeat_import_service.rb @@ -1,105 +1,105 @@ -class HeartbeatImportService - def self.import_from_file(file_content, user) - unless Rails.env.development? - raise StandardError, "Not dev env, not running" - end - - begin - parsed_data = JSON.parse(file_content) - rescue JSON::ParserError => e - raise StandardError, "Not json: #{e.message}" - end - - unless parsed_data.is_a?(Hash) && parsed_data["heartbeats"].is_a?(Array) - raise StandardError, "Not correct format, download from /my/settings on the offical hackatime then import here" - end - - heartbeats_data = parsed_data["heartbeats"] - imported_count = 0 - skipped_count = 0 - errors = [] - cc = 817263 - heartbeats_data.each_slice(100) do |batch| - records_to_upsert = [] - - batch.each_with_index do |heartbeat_data, index| - begin - time_value = if heartbeat_data["time"].is_a?(String) - Time.parse(heartbeat_data["time"]).to_f - else - heartbeat_data["time"].to_f - end - - attrs = { - user_id: user.id, - time: time_value, - entity: heartbeat_data["entity"], - type: heartbeat_data["type"], - category: heartbeat_data["category"] || "coding", - project: heartbeat_data["project"], - language: heartbeat_data["language"], - editor: heartbeat_data["editor"], - operating_system: heartbeat_data["operating_system"], - machine: heartbeat_data["machine"], - branch: heartbeat_data["branch"], - user_agent: heartbeat_data["user_agent"], - is_write: heartbeat_data["is_write"] || false, - line_additions: heartbeat_data["line_additions"], - line_deletions: heartbeat_data["line_deletions"], - lineno: heartbeat_data["lineno"], - lines: heartbeat_data["lines"], - cursorpos: heartbeat_data["cursorpos"], - dependencies: heartbeat_data["dependencies"] || [], - project_root_count: heartbeat_data["project_root_count"], - source_type: :wakapi_import, - raw_data: heartbeat_data.slice(*Heartbeat.indexed_attributes) - } - - attrs[:fields_hash] = Heartbeat.generate_fields_hash(attrs) - print(attrs[:fields_hash]) - print("\n") - records_to_upsert << attrs - - rescue => e - errors << "Row #{index + 1}: #{e.message}" - next - end - end - - if records_to_upsert.any? - print("importing!!!!!!!!!!!!!!!!!!!!!!") - print("\n") - begin - # Copied from migrate user from hackatime (app\jobs\migrate_user_from_hackatime_job.rb) - records_to_upsert = records_to_upsert.group_by { |r| r[:fields_hash] }.map do |_, records| - records.max_by { |r| r[:time] } - end - result = Heartbeat.upsert_all(records_to_upsert, unique_by: [ :fields_hash ]) - imported_count += result.length - rescue => e - errors << "Import error: #{e.message}" - print(e.message) - print("\n") - end - end - end - - { - success: true, - imported_count: imported_count, - total_count: heartbeats_data.length, - skipped_count: heartbeats_data.length - imported_count, - errors: errors - } - - rescue => e - { - success: false, - error: e.message, - imported_count: 0, - total_count: 0, - skipped_count: 0, - errors: [ e.message ] - } - end -end +class HeartbeatImportService + def self.import_from_file(file_content, user) + unless Rails.env.development? + raise StandardError, "Not dev env, not running" + end + + begin + parsed_data = JSON.parse(file_content) + rescue JSON::ParserError => e + raise StandardError, "Not json: #{e.message}" + end + + unless parsed_data.is_a?(Hash) && parsed_data["heartbeats"].is_a?(Array) + raise StandardError, "Not correct format, download from /my/settings on the offical hackatime then import here" + end + + heartbeats_data = parsed_data["heartbeats"] + imported_count = 0 + skipped_count = 0 + errors = [] + cc = 817263 + heartbeats_data.each_slice(100) do |batch| + records_to_upsert = [] + + batch.each_with_index do |heartbeat_data, index| + begin + time_value = if heartbeat_data["time"].is_a?(String) + Time.parse(heartbeat_data["time"]).to_f + else + heartbeat_data["time"].to_f + end + + attrs = { + user_id: user.id, + time: time_value, + entity: heartbeat_data["entity"], + type: heartbeat_data["type"], + category: heartbeat_data["category"] || "coding", + project: heartbeat_data["project"], + language: heartbeat_data["language"], + editor: heartbeat_data["editor"], + operating_system: heartbeat_data["operating_system"], + machine: heartbeat_data["machine"], + branch: heartbeat_data["branch"], + user_agent: heartbeat_data["user_agent"], + is_write: heartbeat_data["is_write"] || false, + line_additions: heartbeat_data["line_additions"], + line_deletions: heartbeat_data["line_deletions"], + lineno: heartbeat_data["lineno"], + lines: heartbeat_data["lines"], + cursorpos: heartbeat_data["cursorpos"], + dependencies: heartbeat_data["dependencies"] || [], + project_root_count: heartbeat_data["project_root_count"], + source_type: :wakapi_import + } + attrs[:raw_data] = heartbeat_data.slice(*Heartbeat.indexed_attributes) unless Flipper.enabled?(:skip_heartbeat_raw_data) + + attrs[:fields_hash] = Heartbeat.generate_fields_hash(attrs) + print(attrs[:fields_hash]) + print("\n") + records_to_upsert << attrs + + rescue => e + errors << "Row #{index + 1}: #{e.message}" + next + end + end + + if records_to_upsert.any? + print("importing!!!!!!!!!!!!!!!!!!!!!!") + print("\n") + begin + records_to_upsert = records_to_upsert.group_by { |r| r[:fields_hash] }.map do |_, records| + records.max_by { |r| r[:time] } + end + records_to_upsert = Heartbeat.batch_resolve_dimensions(records_to_upsert) + result = Heartbeat.upsert_all(records_to_upsert, unique_by: [ :fields_hash ]) + imported_count += result.length + rescue => e + errors << "Import error: #{e.message}" + print(e.message) + print("\n") + end + end + end + + { + success: true, + imported_count: imported_count, + total_count: heartbeats_data.length, + skipped_count: heartbeats_data.length - imported_count, + errors: errors + } + + rescue => e + { + success: false, + error: e.message, + imported_count: 0, + total_count: 0, + skipped_count: 0, + errors: [ e.message ] + } + end +end diff --git a/db/migrate/20260202194401_create_heartbeat_lookup_tables.rb b/db/migrate/20260202194401_create_heartbeat_lookup_tables.rb new file mode 100644 index 000000000..fe1832e55 --- /dev/null +++ b/db/migrate/20260202194401_create_heartbeat_lookup_tables.rb @@ -0,0 +1,56 @@ +class CreateHeartbeatLookupTables < ActiveRecord::Migration[8.1] + disable_ddl_transaction! + + def change + create_table :heartbeat_languages do |t| + t.string :name, null: false + t.timestamps + end + add_index :heartbeat_languages, :name, unique: true, algorithm: :concurrently + + create_table :heartbeat_categories do |t| + t.string :name, null: false + t.timestamps + end + add_index :heartbeat_categories, :name, unique: true, algorithm: :concurrently + + create_table :heartbeat_editors do |t| + t.string :name, null: false + t.timestamps + end + add_index :heartbeat_editors, :name, unique: true, algorithm: :concurrently + + create_table :heartbeat_operating_systems do |t| + t.string :name, null: false + t.timestamps + end + add_index :heartbeat_operating_systems, :name, unique: true, algorithm: :concurrently + + create_table :heartbeat_user_agents do |t| + t.string :value, null: false + t.timestamps + end + add_index :heartbeat_user_agents, :value, unique: true, algorithm: :concurrently + + create_table :heartbeat_projects do |t| + t.references :user, null: false, foreign_key: true + t.string :name, null: false + t.timestamps + end + add_index :heartbeat_projects, [ :user_id, :name ], unique: true, algorithm: :concurrently + + create_table :heartbeat_branches do |t| + t.references :user, null: false, foreign_key: true + t.string :name, null: false + t.timestamps + end + add_index :heartbeat_branches, [ :user_id, :name ], unique: true, algorithm: :concurrently + + create_table :heartbeat_machines do |t| + t.references :user, null: false, foreign_key: true + t.string :name, null: false + t.timestamps + end + add_index :heartbeat_machines, [ :user_id, :name ], unique: true, algorithm: :concurrently + end +end diff --git a/db/migrate/20260202194406_add_lookup_foreign_keys_to_heartbeats.rb b/db/migrate/20260202194406_add_lookup_foreign_keys_to_heartbeats.rb new file mode 100644 index 000000000..d1bf0906e --- /dev/null +++ b/db/migrate/20260202194406_add_lookup_foreign_keys_to_heartbeats.rb @@ -0,0 +1,33 @@ +class AddLookupForeignKeysToHeartbeats < ActiveRecord::Migration[8.1] + disable_ddl_transaction! + + def change + add_column :heartbeats, :language_id, :bigint + add_column :heartbeats, :category_id, :bigint + add_column :heartbeats, :editor_id, :bigint + add_column :heartbeats, :operating_system_id, :bigint + add_column :heartbeats, :user_agent_id, :bigint + add_column :heartbeats, :project_id, :bigint + add_column :heartbeats, :branch_id, :bigint + add_column :heartbeats, :machine_id, :bigint + + add_index :heartbeats, :language_id, algorithm: :concurrently + add_index :heartbeats, :category_id, algorithm: :concurrently + add_index :heartbeats, :editor_id, algorithm: :concurrently + add_index :heartbeats, :operating_system_id, algorithm: :concurrently + add_index :heartbeats, :user_agent_id, algorithm: :concurrently + add_index :heartbeats, :project_id, algorithm: :concurrently + add_index :heartbeats, :branch_id, algorithm: :concurrently + add_index :heartbeats, :machine_id, algorithm: :concurrently + + add_index :heartbeats, [ :user_id, :time, :project_id ], + name: "idx_heartbeats_user_time_project_id", + algorithm: :concurrently, + where: "deleted_at IS NULL" + + add_index :heartbeats, [ :user_id, :time, :language_id ], + name: "idx_heartbeats_user_time_language_id", + algorithm: :concurrently, + where: "deleted_at IS NULL" + end +end diff --git a/db/migrate/20260202194410_add_foreign_key_constraints_to_heartbeats.rb b/db/migrate/20260202194410_add_foreign_key_constraints_to_heartbeats.rb new file mode 100644 index 000000000..7c3ee6c41 --- /dev/null +++ b/db/migrate/20260202194410_add_foreign_key_constraints_to_heartbeats.rb @@ -0,0 +1,12 @@ +class AddForeignKeyConstraintsToHeartbeats < ActiveRecord::Migration[8.1] + def change + add_foreign_key :heartbeats, :heartbeat_languages, column: :language_id, validate: false + add_foreign_key :heartbeats, :heartbeat_categories, column: :category_id, validate: false + add_foreign_key :heartbeats, :heartbeat_editors, column: :editor_id, validate: false + add_foreign_key :heartbeats, :heartbeat_operating_systems, column: :operating_system_id, validate: false + add_foreign_key :heartbeats, :heartbeat_user_agents, column: :user_agent_id, validate: false + add_foreign_key :heartbeats, :heartbeat_projects, column: :project_id, validate: false + add_foreign_key :heartbeats, :heartbeat_branches, column: :branch_id, validate: false + add_foreign_key :heartbeats, :heartbeat_machines, column: :machine_id, validate: false + end +end diff --git a/db/schema.rb b/db/schema.rb index 1f02b02a0..f70beadda 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,8 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.1].define(version: 2026_01_20_014910) do - create_schema "pganalyze" +ActiveRecord::Schema[8.1].define(version: 2026_02_02_194419) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" enable_extension "pg_stat_statements" @@ -260,49 +259,128 @@ t.index ["scheduled_at"], name: "index_good_jobs_on_scheduled_at", where: "(finished_at IS NULL)" end + create_table "heartbeat_branches", force: :cascade do |t| + t.datetime "created_at", null: false + t.string "name", null: false + t.datetime "updated_at", null: false + t.bigint "user_id", null: false + t.index ["user_id", "name"], name: "index_heartbeat_branches_on_user_id_and_name", unique: true + t.index ["user_id"], name: "index_heartbeat_branches_on_user_id" + end + + create_table "heartbeat_categories", force: :cascade do |t| + t.datetime "created_at", null: false + t.string "name", null: false + t.datetime "updated_at", null: false + t.index ["name"], name: "index_heartbeat_categories_on_name", unique: true + end + + create_table "heartbeat_editors", force: :cascade do |t| + t.datetime "created_at", null: false + t.string "name", null: false + t.datetime "updated_at", null: false + t.index ["name"], name: "index_heartbeat_editors_on_name", unique: true + end + + create_table "heartbeat_languages", force: :cascade do |t| + t.datetime "created_at", null: false + t.string "name", null: false + t.datetime "updated_at", null: false + t.index ["name"], name: "index_heartbeat_languages_on_name", unique: true + end + + create_table "heartbeat_machines", force: :cascade do |t| + t.datetime "created_at", null: false + t.string "name", null: false + t.datetime "updated_at", null: false + t.bigint "user_id", null: false + t.index ["user_id", "name"], name: "index_heartbeat_machines_on_user_id_and_name", unique: true + t.index ["user_id"], name: "index_heartbeat_machines_on_user_id" + end + + create_table "heartbeat_operating_systems", force: :cascade do |t| + t.datetime "created_at", null: false + t.string "name", null: false + t.datetime "updated_at", null: false + t.index ["name"], name: "index_heartbeat_operating_systems_on_name", unique: true + end + + create_table "heartbeat_projects", force: :cascade do |t| + t.datetime "created_at", null: false + t.string "name", null: false + t.datetime "updated_at", null: false + t.bigint "user_id", null: false + t.index ["user_id", "name"], name: "index_heartbeat_projects_on_user_id_and_name", unique: true + t.index ["user_id"], name: "index_heartbeat_projects_on_user_id" + end + + create_table "heartbeat_user_agents", force: :cascade do |t| + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.string "value", null: false + t.index ["value"], name: "index_heartbeat_user_agents_on_value", unique: true + end + create_table "heartbeats", force: :cascade do |t| t.string "branch" + t.bigint "branch_id" t.string "category" + t.bigint "category_id" t.datetime "created_at", null: false t.integer "cursorpos" t.datetime "deleted_at" t.string "dependencies", default: [], array: true t.string "editor" + t.bigint "editor_id" t.string "entity" t.text "fields_hash" t.inet "ip_address" t.boolean "is_write" t.string "language" + t.bigint "language_id" t.integer "line_additions" t.integer "line_deletions" t.integer "lineno" t.integer "lines" t.string "machine" + t.bigint "machine_id" t.string "operating_system" + t.bigint "operating_system_id" t.string "project" + t.bigint "project_id" t.integer "project_root_count" - t.jsonb "raw_data" t.bigint "raw_heartbeat_upload_id" t.integer "source_type", null: false t.float "time", null: false t.string "type" t.datetime "updated_at", null: false t.string "user_agent" + t.bigint "user_agent_id" t.bigint "user_id", null: false t.integer "ysws_program", default: 0, null: false + t.index ["branch_id"], name: "index_heartbeats_on_branch_id" t.index ["category", "time"], name: "index_heartbeats_on_category_and_time" + t.index ["category_id"], name: "index_heartbeats_on_category_id" + t.index ["editor_id"], name: "index_heartbeats_on_editor_id" t.index ["fields_hash"], name: "index_heartbeats_on_fields_hash_when_not_deleted", unique: true, where: "(deleted_at IS NULL)" t.index ["ip_address"], name: "index_heartbeats_on_ip_address" + t.index ["language_id"], name: "index_heartbeats_on_language_id" t.index ["machine"], name: "index_heartbeats_on_machine" + t.index ["machine_id"], name: "index_heartbeats_on_machine_id" + t.index ["operating_system_id"], name: "index_heartbeats_on_operating_system_id" t.index ["project", "time"], name: "index_heartbeats_on_project_and_time" t.index ["project"], name: "index_heartbeats_on_project" + t.index ["project_id"], name: "index_heartbeats_on_project_id" t.index ["raw_heartbeat_upload_id"], name: "index_heartbeats_on_raw_heartbeat_upload_id" t.index ["source_type", "time", "user_id", "project"], name: "index_heartbeats_on_source_type_time_user_project" + t.index ["user_agent_id"], name: "index_heartbeats_on_user_agent_id" t.index ["user_id", "id"], name: "index_heartbeats_on_user_id_with_ip", order: { id: :desc }, where: "((ip_address IS NOT NULL) AND (deleted_at IS NULL))" t.index ["user_id", "project", "time"], name: "idx_heartbeats_user_project_time_stats", where: "((deleted_at IS NULL) AND (project IS NOT NULL))" t.index ["user_id", "time", "category"], name: "index_heartbeats_on_user_time_category" t.index ["user_id", "time", "language"], name: "idx_heartbeats_user_time_language_stats", where: "(deleted_at IS NULL)" + t.index ["user_id", "time", "language_id"], name: "idx_heartbeats_user_time_language_id", where: "(deleted_at IS NULL)" t.index ["user_id", "time", "project"], name: "idx_heartbeats_user_time_project_stats", where: "(deleted_at IS NULL)" + t.index ["user_id", "time", "project_id"], name: "idx_heartbeats_user_time_project_id", where: "(deleted_at IS NULL)" t.index ["user_id", "time"], name: "idx_heartbeats_user_time_active", where: "(deleted_at IS NULL)" t.index ["user_id"], name: "index_heartbeats_on_user_id" end @@ -578,6 +656,17 @@ add_foreign_key "deletion_requests", "users", column: "admin_approved_by_id" add_foreign_key "email_addresses", "users" add_foreign_key "email_verification_requests", "users" + add_foreign_key "heartbeat_branches", "users" + add_foreign_key "heartbeat_machines", "users" + add_foreign_key "heartbeat_projects", "users" + add_foreign_key "heartbeats", "heartbeat_branches", column: "branch_id" + add_foreign_key "heartbeats", "heartbeat_categories", column: "category_id" + add_foreign_key "heartbeats", "heartbeat_editors", column: "editor_id" + add_foreign_key "heartbeats", "heartbeat_languages", column: "language_id" + add_foreign_key "heartbeats", "heartbeat_machines", column: "machine_id" + add_foreign_key "heartbeats", "heartbeat_operating_systems", column: "operating_system_id" + add_foreign_key "heartbeats", "heartbeat_projects", column: "project_id" + add_foreign_key "heartbeats", "heartbeat_user_agents", column: "user_agent_id" add_foreign_key "heartbeats", "raw_heartbeat_uploads" add_foreign_key "heartbeats", "users" add_foreign_key "leaderboard_entries", "leaderboards" diff --git a/docs/heartbeat_normalization_migration.md b/docs/heartbeat_normalization_migration.md new file mode 100644 index 000000000..1586edcd2 --- /dev/null +++ b/docs/heartbeat_normalization_migration.md @@ -0,0 +1,116 @@ +# The Big Migration(tm) + +## Run DB migrations + +```bash +rails db:migrate +``` + +## Start dual-writing + +```ruby +Flipper.enable(:heartbeat_dimension_dual_write) +``` + +## Backfill the data + +Run the backfill rake task to populate FK columns for existing heartbeats: + +```bash +rails heartbeats:backfill_dimensions +``` + +For progress: + +```bash +rails heartbeats:backfill_progress +``` + +## Validate FKs + +AFTER the backfill is complete (100% progress on all dimensions), generate and run: + +```bash +rails g migration ValidateHeartbeatForeignKeys +rails db:migrate +``` + +Migration content: + +```ruby +class ValidateHeartbeatForeignKeys < ActiveRecord::Migration[8.1] + def up + validate_foreign_key :heartbeats, :heartbeat_languages + validate_foreign_key :heartbeats, :heartbeat_categories + validate_foreign_key :heartbeats, :heartbeat_editors + validate_foreign_key :heartbeats, :heartbeat_operating_systems + validate_foreign_key :heartbeats, :heartbeat_user_agents + validate_foreign_key :heartbeats, :heartbeat_projects + validate_foreign_key :heartbeats, :heartbeat_branches + validate_foreign_key :heartbeats, :heartbeat_machines + end + + def down + end +end +``` + +(we don't have these in `migrations` bc it won't work till the backfill is done) + +### Stop writing raw_data + +```ruby +Flipper.enable(:skip_heartbeat_raw_data) +``` + +### Remove raw_data Column + +**WARNING:** This will lock the DB!! We'll need to co-ordinate an announcement with program owners + the wider Slack + +```bash +rails g migration RemoveRawDataFromHeartbeats +rails db:migrate +``` + +Migration content: + +```ruby +class RemoveRawDataFromHeartbeats < ActiveRecord::Migration[8.1] + def up + remove_column :heartbeats, :raw_data + end + + def down + add_column :heartbeats, :raw_data, :jsonb + end +end +``` + +## Feature flags + +| Flag | Purpose | +|------|---------| +| `heartbeat_dimension_dual_write` | Enable dual-write of FK columns on new heartbeats | +| `skip_heartbeat_raw_data` | Stop populating raw_data column | + +## Rollback plan + +```ruby +Flipper.disable(:heartbeat_dimension_dual_write) +Flipper.disable(:skip_heartbeat_raw_data) +``` + +## Future stuff + +(Once the migration is done) + +### Read cutover + +Update read queries to: + +1. Filter/GROUP BY FK columns instead of string columns +2. JOIN to lookup tables only for display names + +## Time partitioning + +For leaderboards etc! diff --git a/lib/tasks/backfill_heartbeat_dimensions.rake b/lib/tasks/backfill_heartbeat_dimensions.rake new file mode 100644 index 000000000..571973660 --- /dev/null +++ b/lib/tasks/backfill_heartbeat_dimensions.rake @@ -0,0 +1,46 @@ +namespace :heartbeats do + desc "Backfill heartbeat dimension foreign keys (run after enabling heartbeat_dimension_dual_write)" + task backfill_dimensions: :environment do + dimensions = %w[language category editor operating_system user_agent project branch machine] + + puts "Enqueueing backfill jobs for #{dimensions.count} dimensions..." + + dimensions.each do |dimension| + BackfillHeartbeatDimensionsJob.perform_later(dimension) + puts " - Enqueued #{dimension}" + end + + puts "Done. Monitor GoodJob dashboard for progress." + end + + desc "Check backfill progress for heartbeat dimensions" + task backfill_progress: :environment do + puts "Heartbeat dimension backfill progress:" + puts "=" * 50 + + total = Heartbeat.with_deleted.count + + dimensions = { + language: :language_id, + category: :category_id, + editor: :editor_id, + operating_system: :operating_system_id, + user_agent: :user_agent_id, + project: :project_id, + branch: :branch_id, + machine: :machine_id + } + + dimensions.each do |string_col, fk_col| + with_string = Heartbeat.with_deleted.where.not(string_col => nil).count + with_fk = Heartbeat.with_deleted.where.not(fk_col => nil).count + missing = Heartbeat.with_deleted.where(fk_col => nil).where.not(string_col => nil).count + + pct = with_string > 0 ? ((with_fk.to_f / with_string) * 100).round(1) : 100.0 + puts "#{string_col.to_s.ljust(20)} #{pct}% (#{with_fk}/#{with_string}, #{missing} missing)" + end + + puts "=" * 50 + puts "Total heartbeats: #{total}" + end +end From 32095811db95094e8beb8745114f76fb16ec5b31 Mon Sep 17 00:00:00 2001 From: Mahad Kalam Date: Mon, 2 Feb 2026 20:29:37 +0000 Subject: [PATCH 02/10] Simplify stuff --- app/jobs/backfill_heartbeat_dimensions_job.rb | 53 ++------ .../concerns/heartbeat_dimension_resolver.rb | 126 ++++++++++-------- app/models/heartbeats/branch.rb | 13 +- app/models/heartbeats/category.rb | 11 +- app/models/heartbeats/editor.rb | 11 +- app/models/heartbeats/language.rb | 11 +- app/models/heartbeats/lookup_base.rb | 12 ++ app/models/heartbeats/machine.rb | 13 +- app/models/heartbeats/operating_system.rb | 11 +- app/models/heartbeats/project.rb | 13 +- app/models/heartbeats/user_agent.rb | 13 +- .../heartbeats/user_scoped_lookup_base.rb | 13 ++ lib/tasks/backfill_heartbeat_dimensions.rake | 22 +-- 13 files changed, 124 insertions(+), 198 deletions(-) create mode 100644 app/models/heartbeats/lookup_base.rb create mode 100644 app/models/heartbeats/user_scoped_lookup_base.rb diff --git a/app/jobs/backfill_heartbeat_dimensions_job.rb b/app/jobs/backfill_heartbeat_dimensions_job.rb index 0228e5ac3..653ab05c0 100644 --- a/app/jobs/backfill_heartbeat_dimensions_job.rb +++ b/app/jobs/backfill_heartbeat_dimensions_job.rb @@ -9,55 +9,26 @@ class BackfillHeartbeatDimensionsJob < ApplicationJob ) BATCH_SIZE = 5_000 - DIMENSIONS = %w[language category editor operating_system user_agent project branch machine].freeze def perform(dimension, start_id: 0, end_id: nil) - unless DIMENSIONS.include?(dimension) + spec = HeartbeatDimensionResolver::DIMENSIONS[dimension.to_sym] + unless spec Rails.logger.error("Invalid dimension: #{dimension}") return end end_id ||= Heartbeat.with_deleted.maximum(:id) || 0 - current_id = start_id processed = 0 while current_id <= end_id batch_end = current_id + BATCH_SIZE + model = spec[:model].constantize - case dimension - when "language" - processed += backfill_global_dimension( - Heartbeats::Language, :language, :language_id, :name, current_id, batch_end - ) - when "category" - processed += backfill_global_dimension( - Heartbeats::Category, :category, :category_id, :name, current_id, batch_end - ) - when "editor" - processed += backfill_global_dimension( - Heartbeats::Editor, :editor, :editor_id, :name, current_id, batch_end - ) - when "operating_system" - processed += backfill_global_dimension( - Heartbeats::OperatingSystem, :operating_system, :operating_system_id, :name, current_id, batch_end - ) - when "user_agent" - processed += backfill_global_dimension( - Heartbeats::UserAgent, :user_agent, :user_agent_id, :value, current_id, batch_end - ) - when "project" - processed += backfill_user_scoped_dimension( - Heartbeats::Project, :project, :project_id, current_id, batch_end - ) - when "branch" - processed += backfill_user_scoped_dimension( - Heartbeats::Branch, :branch, :branch_id, current_id, batch_end - ) - when "machine" - processed += backfill_user_scoped_dimension( - Heartbeats::Machine, :machine, :machine_id, current_id, batch_end - ) + processed += if spec[:scope] == :global + backfill_global_dimension(model, spec[:value_attr], spec[:fk], spec[:lookup], current_id, batch_end) + else + backfill_user_scoped_dimension(model, spec[:value_attr], spec[:fk], current_id, batch_end) end current_id = batch_end @@ -108,13 +79,9 @@ def backfill_user_scoped_dimension(model, string_column, fk_column, start_id, en rows = user_value_pairs.map { |user_id, name| { user_id: user_id, name: name, created_at: now, updated_at: now } } model.upsert_all(rows, unique_by: [ :user_id, :name ]) - lookup_map = {} - user_value_pairs.each_slice(500) do |batch| - conditions = batch.map { |uid, name| "(user_id = #{uid} AND name = #{model.connection.quote(name)})" }.join(" OR ") - model.where(conditions).pluck(:user_id, :name, :id).each do |uid, name, id| - lookup_map[[ uid, name ]] = id - end - end + lookup_map = model.where([ "(user_id, name) IN (?)", user_value_pairs ]) + .pluck(:user_id, :name, :id) + .each_with_object({}) { |(uid, name, id), h| h[[ uid, name ]] = id } updated = 0 lookup_map.each do |(user_id, name), id| diff --git a/app/models/concerns/heartbeat_dimension_resolver.rb b/app/models/concerns/heartbeat_dimension_resolver.rb index 0aa9932e3..538bc6628 100644 --- a/app/models/concerns/heartbeat_dimension_resolver.rb +++ b/app/models/concerns/heartbeat_dimension_resolver.rb @@ -1,15 +1,24 @@ module HeartbeatDimensionResolver extend ActiveSupport::Concern + DIMENSIONS = { + language: { model: "Heartbeats::Language", value_attr: :language, fk: :language_id, lookup: :name, scope: :global }, + category: { model: "Heartbeats::Category", value_attr: :category, fk: :category_id, lookup: :name, scope: :global }, + editor: { model: "Heartbeats::Editor", value_attr: :editor, fk: :editor_id, lookup: :name, scope: :global }, + operating_system: { model: "Heartbeats::OperatingSystem", value_attr: :operating_system, fk: :operating_system_id, lookup: :name, scope: :global }, + user_agent: { model: "Heartbeats::UserAgent", value_attr: :user_agent, fk: :user_agent_id, lookup: :value, scope: :global }, + project: { model: "Heartbeats::Project", value_attr: :project, fk: :project_id, lookup: :name, scope: :user }, + branch: { model: "Heartbeats::Branch", value_attr: :branch, fk: :branch_id, lookup: :name, scope: :user }, + machine: { model: "Heartbeats::Machine", value_attr: :machine, fk: :machine_id, lookup: :name, scope: :user } + }.freeze + included do - belongs_to :heartbeat_language, class_name: "Heartbeats::Language", foreign_key: :language_id, optional: true - belongs_to :heartbeat_category, class_name: "Heartbeats::Category", foreign_key: :category_id, optional: true - belongs_to :heartbeat_editor, class_name: "Heartbeats::Editor", foreign_key: :editor_id, optional: true - belongs_to :heartbeat_operating_system, class_name: "Heartbeats::OperatingSystem", foreign_key: :operating_system_id, optional: true - belongs_to :heartbeat_user_agent, class_name: "Heartbeats::UserAgent", foreign_key: :user_agent_id, optional: true - belongs_to :heartbeat_project, class_name: "Heartbeats::Project", foreign_key: :project_id, optional: true - belongs_to :heartbeat_branch, class_name: "Heartbeats::Branch", foreign_key: :branch_id, optional: true - belongs_to :heartbeat_machine, class_name: "Heartbeats::Machine", foreign_key: :machine_id, optional: true + DIMENSIONS.each do |key, spec| + belongs_to :"heartbeat_#{key}", + class_name: spec[:model], + foreign_key: spec[:fk], + optional: true + end before_save :resolve_dimension_ids, if: :should_resolve_dimensions? end @@ -21,28 +30,40 @@ def should_resolve_dimensions? end def resolve_dimension_ids - self.language_id ||= Heartbeats::Language.resolve(language)&.id if language.present? - self.category_id ||= Heartbeats::Category.resolve(category)&.id if category.present? - self.editor_id ||= Heartbeats::Editor.resolve(editor)&.id if editor.present? - self.operating_system_id ||= Heartbeats::OperatingSystem.resolve(operating_system)&.id if operating_system.present? - self.user_agent_id ||= Heartbeats::UserAgent.resolve(user_agent)&.id if user_agent.present? - self.project_id ||= Heartbeats::Project.resolve(user_id, project)&.id if project.present? && user_id.present? - self.branch_id ||= Heartbeats::Branch.resolve(user_id, branch)&.id if branch.present? && user_id.present? - self.machine_id ||= Heartbeats::Machine.resolve(user_id, machine)&.id if machine.present? && user_id.present? + DIMENSIONS.each_value do |spec| + next if self[spec[:fk]].present? + + value = self[spec[:value_attr]] + next if value.blank? + + model = spec[:model].constantize + resolved_id = if spec[:scope] == :global + model.resolve(value)&.id + else + model.resolve(user_id, value)&.id if user_id.present? + end + + self[spec[:fk]] = resolved_id if resolved_id + end end class_methods do def resolve_dimensions_for_attributes(attrs) user_id = attrs[:user_id] - attrs[:language_id] ||= Heartbeats::Language.resolve(attrs[:language])&.id if attrs[:language].present? - attrs[:category_id] ||= Heartbeats::Category.resolve(attrs[:category])&.id if attrs[:category].present? - attrs[:editor_id] ||= Heartbeats::Editor.resolve(attrs[:editor])&.id if attrs[:editor].present? - attrs[:operating_system_id] ||= Heartbeats::OperatingSystem.resolve(attrs[:operating_system])&.id if attrs[:operating_system].present? - attrs[:user_agent_id] ||= Heartbeats::UserAgent.resolve(attrs[:user_agent])&.id if attrs[:user_agent].present? - attrs[:project_id] ||= Heartbeats::Project.resolve(user_id, attrs[:project])&.id if attrs[:project].present? && user_id.present? - attrs[:branch_id] ||= Heartbeats::Branch.resolve(user_id, attrs[:branch])&.id if attrs[:branch].present? && user_id.present? - attrs[:machine_id] ||= Heartbeats::Machine.resolve(user_id, attrs[:machine])&.id if attrs[:machine].present? && user_id.present? + DIMENSIONS.each_value do |spec| + next if attrs[spec[:fk]].present? + + value = attrs[spec[:value_attr]] + next if value.blank? + + model = spec[:model].constantize + attrs[spec[:fk]] = if spec[:scope] == :global + model.resolve(value)&.id + else + model.resolve(user_id, value)&.id if user_id.present? + end + end attrs end @@ -50,36 +71,35 @@ def resolve_dimensions_for_attributes(attrs) def batch_resolve_dimensions(records_attrs) return records_attrs unless Flipper.enabled?(:heartbeat_dimension_dual_write) - global_languages = records_attrs.map { |r| r[:language] }.compact.uniq - global_categories = records_attrs.map { |r| r[:category] }.compact.uniq - global_editors = records_attrs.map { |r| r[:editor] }.compact.uniq - global_operating_systems = records_attrs.map { |r| r[:operating_system] }.compact.uniq - global_user_agents = records_attrs.map { |r| r[:user_agent] }.compact.uniq - - language_map = batch_resolve_global(Heartbeats::Language, :name, global_languages) - category_map = batch_resolve_global(Heartbeats::Category, :name, global_categories) - editor_map = batch_resolve_global(Heartbeats::Editor, :name, global_editors) - os_map = batch_resolve_global(Heartbeats::OperatingSystem, :name, global_operating_systems) - ua_map = batch_resolve_global(Heartbeats::UserAgent, :value, global_user_agents) + global_specs = DIMENSIONS.select { |_, s| s[:scope] == :global } + user_specs = DIMENSIONS.select { |_, s| s[:scope] == :user } - user_projects = records_attrs.map { |r| [ r[:user_id], r[:project] ] }.select { |u, p| u && p }.uniq - user_branches = records_attrs.map { |r| [ r[:user_id], r[:branch] ] }.select { |u, b| u && b }.uniq - user_machines = records_attrs.map { |r| [ r[:user_id], r[:machine] ] }.select { |u, m| u && m }.uniq + global_maps = global_specs.transform_values do |spec| + values = records_attrs.filter_map { |r| r[spec[:value_attr]] }.uniq + batch_resolve_global(spec[:model].constantize, spec[:lookup], values) + end - project_map = batch_resolve_user_scoped(Heartbeats::Project, user_projects) - branch_map = batch_resolve_user_scoped(Heartbeats::Branch, user_branches) - machine_map = batch_resolve_user_scoped(Heartbeats::Machine, user_machines) + user_maps = user_specs.transform_values do |spec| + pairs = records_attrs.filter_map { |r| + uid, val = r[:user_id], r[spec[:value_attr]] + [ uid, val ] if uid && val + }.uniq + batch_resolve_user_scoped(spec[:model].constantize, pairs) + end records_attrs.map do |attrs| attrs = attrs.dup - attrs[:language_id] ||= language_map[attrs[:language]] if attrs[:language] - attrs[:category_id] ||= category_map[attrs[:category]] if attrs[:category] - attrs[:editor_id] ||= editor_map[attrs[:editor]] if attrs[:editor] - attrs[:operating_system_id] ||= os_map[attrs[:operating_system]] if attrs[:operating_system] - attrs[:user_agent_id] ||= ua_map[attrs[:user_agent]] if attrs[:user_agent] - attrs[:project_id] ||= project_map[[ attrs[:user_id], attrs[:project] ]] if attrs[:project] && attrs[:user_id] - attrs[:branch_id] ||= branch_map[[ attrs[:user_id], attrs[:branch] ]] if attrs[:branch] && attrs[:user_id] - attrs[:machine_id] ||= machine_map[[ attrs[:user_id], attrs[:machine] ]] if attrs[:machine] && attrs[:user_id] + + global_specs.each do |key, spec| + next if attrs[spec[:fk]].present? + attrs[spec[:fk]] = global_maps[key][attrs[spec[:value_attr]]] if attrs[spec[:value_attr]] + end + + user_specs.each do |key, spec| + next if attrs[spec[:fk]].present? + attrs[spec[:fk]] = user_maps[key][[ attrs[:user_id], attrs[spec[:value_attr]] ]] if attrs[spec[:value_attr]] && attrs[:user_id] + end + attrs end end @@ -103,11 +123,9 @@ def batch_resolve_user_scoped(model, user_value_pairs) rows = user_value_pairs.map { |user_id, name| { user_id: user_id, name: name, created_at: now, updated_at: now } } model.upsert_all(rows, unique_by: [ :user_id, :name ], returning: [ :id, :user_id, :name ]) - model.where( - user_value_pairs.map { |uid, name| "(user_id = #{uid} AND name = #{model.connection.quote(name)})" }.join(" OR ") - ).pluck(:user_id, :name, :id).each_with_object({}) do |(uid, name, id), h| - h[[ uid, name ]] = id - end + model.where([ "(user_id, name) IN (?)", user_value_pairs ]) + .pluck(:user_id, :name, :id) + .each_with_object({}) { |(uid, name, id), h| h[[ uid, name ]] = id } end end end diff --git a/app/models/heartbeats/branch.rb b/app/models/heartbeats/branch.rb index 783007f1c..30aae6e19 100644 --- a/app/models/heartbeats/branch.rb +++ b/app/models/heartbeats/branch.rb @@ -1,16 +1,5 @@ -class Heartbeats::Branch < ApplicationRecord +class Heartbeats::Branch < Heartbeats::UserScopedLookupBase self.table_name = "heartbeat_branches" - belongs_to :user has_many :heartbeats, foreign_key: :branch_id, inverse_of: :heartbeat_branch - - validates :name, presence: true - validates :user_id, uniqueness: { scope: :name } - - def self.resolve(user_id, name) - return nil if name.blank? || user_id.blank? - find_or_create_by(user_id: user_id, name: name) - rescue ActiveRecord::RecordNotUnique - find_by(user_id: user_id, name: name) - end end diff --git a/app/models/heartbeats/category.rb b/app/models/heartbeats/category.rb index 184d976f9..c5f3ce97e 100644 --- a/app/models/heartbeats/category.rb +++ b/app/models/heartbeats/category.rb @@ -1,14 +1,5 @@ -class Heartbeats::Category < ApplicationRecord +class Heartbeats::Category < Heartbeats::LookupBase self.table_name = "heartbeat_categories" has_many :heartbeats, foreign_key: :category_id, inverse_of: :heartbeat_category - - validates :name, presence: true, uniqueness: true - - def self.resolve(name) - return nil if name.blank? - find_or_create_by(name: name) - rescue ActiveRecord::RecordNotUnique - find_by(name: name) - end end diff --git a/app/models/heartbeats/editor.rb b/app/models/heartbeats/editor.rb index acd16f308..52bf5ee08 100644 --- a/app/models/heartbeats/editor.rb +++ b/app/models/heartbeats/editor.rb @@ -1,14 +1,5 @@ -class Heartbeats::Editor < ApplicationRecord +class Heartbeats::Editor < Heartbeats::LookupBase self.table_name = "heartbeat_editors" has_many :heartbeats, foreign_key: :editor_id, inverse_of: :heartbeat_editor - - validates :name, presence: true, uniqueness: true - - def self.resolve(name) - return nil if name.blank? - find_or_create_by(name: name) - rescue ActiveRecord::RecordNotUnique - find_by(name: name) - end end diff --git a/app/models/heartbeats/language.rb b/app/models/heartbeats/language.rb index e7705d26e..cc21b7603 100644 --- a/app/models/heartbeats/language.rb +++ b/app/models/heartbeats/language.rb @@ -1,14 +1,5 @@ -class Heartbeats::Language < ApplicationRecord +class Heartbeats::Language < Heartbeats::LookupBase self.table_name = "heartbeat_languages" has_many :heartbeats, foreign_key: :language_id, inverse_of: :heartbeat_language - - validates :name, presence: true, uniqueness: true - - def self.resolve(name) - return nil if name.blank? - find_or_create_by(name: name) - rescue ActiveRecord::RecordNotUnique - find_by(name: name) - end end diff --git a/app/models/heartbeats/lookup_base.rb b/app/models/heartbeats/lookup_base.rb new file mode 100644 index 000000000..bf068b00b --- /dev/null +++ b/app/models/heartbeats/lookup_base.rb @@ -0,0 +1,12 @@ +class Heartbeats::LookupBase < ApplicationRecord + self.abstract_class = true + + def self.lookup_column = :name + + validates lookup_column, presence: true, uniqueness: true + + def self.resolve(value) + return nil if value.blank? + create_or_find_by(lookup_column => value) + end +end diff --git a/app/models/heartbeats/machine.rb b/app/models/heartbeats/machine.rb index 105bce87a..35d589619 100644 --- a/app/models/heartbeats/machine.rb +++ b/app/models/heartbeats/machine.rb @@ -1,16 +1,5 @@ -class Heartbeats::Machine < ApplicationRecord +class Heartbeats::Machine < Heartbeats::UserScopedLookupBase self.table_name = "heartbeat_machines" - belongs_to :user has_many :heartbeats, foreign_key: :machine_id, inverse_of: :heartbeat_machine - - validates :name, presence: true - validates :user_id, uniqueness: { scope: :name } - - def self.resolve(user_id, name) - return nil if name.blank? || user_id.blank? - find_or_create_by(user_id: user_id, name: name) - rescue ActiveRecord::RecordNotUnique - find_by(user_id: user_id, name: name) - end end diff --git a/app/models/heartbeats/operating_system.rb b/app/models/heartbeats/operating_system.rb index f87241e37..bd87d5027 100644 --- a/app/models/heartbeats/operating_system.rb +++ b/app/models/heartbeats/operating_system.rb @@ -1,14 +1,5 @@ -class Heartbeats::OperatingSystem < ApplicationRecord +class Heartbeats::OperatingSystem < Heartbeats::LookupBase self.table_name = "heartbeat_operating_systems" has_many :heartbeats, foreign_key: :operating_system_id, inverse_of: :heartbeat_operating_system - - validates :name, presence: true, uniqueness: true - - def self.resolve(name) - return nil if name.blank? - find_or_create_by(name: name) - rescue ActiveRecord::RecordNotUnique - find_by(name: name) - end end diff --git a/app/models/heartbeats/project.rb b/app/models/heartbeats/project.rb index 07313210f..fac11f385 100644 --- a/app/models/heartbeats/project.rb +++ b/app/models/heartbeats/project.rb @@ -1,16 +1,5 @@ -class Heartbeats::Project < ApplicationRecord +class Heartbeats::Project < Heartbeats::UserScopedLookupBase self.table_name = "heartbeat_projects" - belongs_to :user has_many :heartbeats, foreign_key: :project_id, inverse_of: :heartbeat_project - - validates :name, presence: true - validates :user_id, uniqueness: { scope: :name } - - def self.resolve(user_id, name) - return nil if name.blank? || user_id.blank? - find_or_create_by(user_id: user_id, name: name) - rescue ActiveRecord::RecordNotUnique - find_by(user_id: user_id, name: name) - end end diff --git a/app/models/heartbeats/user_agent.rb b/app/models/heartbeats/user_agent.rb index ef78de1a2..4a060de40 100644 --- a/app/models/heartbeats/user_agent.rb +++ b/app/models/heartbeats/user_agent.rb @@ -1,14 +1,7 @@ -class Heartbeats::UserAgent < ApplicationRecord +class Heartbeats::UserAgent < Heartbeats::LookupBase self.table_name = "heartbeat_user_agents" - has_many :heartbeats, foreign_key: :user_agent_id, inverse_of: :heartbeat_user_agent - - validates :value, presence: true, uniqueness: true + def self.lookup_column = :value - def self.resolve(value) - return nil if value.blank? - find_or_create_by(value: value) - rescue ActiveRecord::RecordNotUnique - find_by(value: value) - end + has_many :heartbeats, foreign_key: :user_agent_id, inverse_of: :heartbeat_user_agent end diff --git a/app/models/heartbeats/user_scoped_lookup_base.rb b/app/models/heartbeats/user_scoped_lookup_base.rb new file mode 100644 index 000000000..82952500f --- /dev/null +++ b/app/models/heartbeats/user_scoped_lookup_base.rb @@ -0,0 +1,13 @@ +class Heartbeats::UserScopedLookupBase < ApplicationRecord + self.abstract_class = true + + belongs_to :user + + validates :name, presence: true + validates :user_id, uniqueness: { scope: :name } + + def self.resolve(user_id, name) + return nil if user_id.blank? || name.blank? + create_or_find_by(user_id: user_id, name: name) + end +end diff --git a/lib/tasks/backfill_heartbeat_dimensions.rake b/lib/tasks/backfill_heartbeat_dimensions.rake index 571973660..a4815040a 100644 --- a/lib/tasks/backfill_heartbeat_dimensions.rake +++ b/lib/tasks/backfill_heartbeat_dimensions.rake @@ -1,12 +1,12 @@ namespace :heartbeats do desc "Backfill heartbeat dimension foreign keys (run after enabling heartbeat_dimension_dual_write)" task backfill_dimensions: :environment do - dimensions = %w[language category editor operating_system user_agent project branch machine] + dimensions = HeartbeatDimensionResolver::DIMENSIONS.keys puts "Enqueueing backfill jobs for #{dimensions.count} dimensions..." dimensions.each do |dimension| - BackfillHeartbeatDimensionsJob.perform_later(dimension) + BackfillHeartbeatDimensionsJob.perform_later(dimension.to_s) puts " - Enqueued #{dimension}" end @@ -20,24 +20,16 @@ namespace :heartbeats do total = Heartbeat.with_deleted.count - dimensions = { - language: :language_id, - category: :category_id, - editor: :editor_id, - operating_system: :operating_system_id, - user_agent: :user_agent_id, - project: :project_id, - branch: :branch_id, - machine: :machine_id - } - - dimensions.each do |string_col, fk_col| + HeartbeatDimensionResolver::DIMENSIONS.each do |key, spec| + string_col = spec[:value_attr] + fk_col = spec[:fk] + with_string = Heartbeat.with_deleted.where.not(string_col => nil).count with_fk = Heartbeat.with_deleted.where.not(fk_col => nil).count missing = Heartbeat.with_deleted.where(fk_col => nil).where.not(string_col => nil).count pct = with_string > 0 ? ((with_fk.to_f / with_string) * 100).round(1) : 100.0 - puts "#{string_col.to_s.ljust(20)} #{pct}% (#{with_fk}/#{with_string}, #{missing} missing)" + puts "#{key.to_s.ljust(20)} #{pct}% (#{with_fk}/#{with_string}, #{missing} missing)" end puts "=" * 50 From 216238f6bb8653e1785a00d8f7fa0e34e904fcc1 Mon Sep 17 00:00:00 2001 From: Mahad Kalam <55807755+skyfallwastaken@users.noreply.github.com> Date: Mon, 2 Feb 2026 20:33:18 +0000 Subject: [PATCH 03/10] Update app/models/heartbeats/user_scoped_lookup_base.rb Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- app/models/heartbeats/user_scoped_lookup_base.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/app/models/heartbeats/user_scoped_lookup_base.rb b/app/models/heartbeats/user_scoped_lookup_base.rb index 82952500f..51d5444a1 100644 --- a/app/models/heartbeats/user_scoped_lookup_base.rb +++ b/app/models/heartbeats/user_scoped_lookup_base.rb @@ -4,7 +4,6 @@ class Heartbeats::UserScopedLookupBase < ApplicationRecord belongs_to :user validates :name, presence: true - validates :user_id, uniqueness: { scope: :name } def self.resolve(user_id, name) return nil if user_id.blank? || name.blank? From 47cff4f27173ae95090a97cce9ea7f978b614d08 Mon Sep 17 00:00:00 2001 From: Mahad Kalam <55807755+skyfallwastaken@users.noreply.github.com> Date: Mon, 2 Feb 2026 20:33:24 +0000 Subject: [PATCH 04/10] Update app/models/heartbeats/lookup_base.rb Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- app/models/heartbeats/lookup_base.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/heartbeats/lookup_base.rb b/app/models/heartbeats/lookup_base.rb index bf068b00b..a4f3f2fb6 100644 --- a/app/models/heartbeats/lookup_base.rb +++ b/app/models/heartbeats/lookup_base.rb @@ -3,7 +3,7 @@ class Heartbeats::LookupBase < ApplicationRecord def self.lookup_column = :name - validates lookup_column, presence: true, uniqueness: true + validates lookup_column, presence: true def self.resolve(value) return nil if value.blank? From eca42b62bc8c9f3cc67b22127ef99e9494a7fae2 Mon Sep 17 00:00:00 2001 From: Mahad Kalam Date: Mon, 2 Feb 2026 20:43:14 +0000 Subject: [PATCH 05/10] whoops, fix deleting users --- Gemfile.lock | 1 + app/models/user.rb | 4 ++++ app/services/anonymize_user_service.rb | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/Gemfile.lock b/Gemfile.lock index db9c98fa7..2c7258967 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -615,6 +615,7 @@ PLATFORMS aarch64-linux-musl arm-linux-gnu arm-linux-musl + arm64-darwin-23 arm64-darwin-24 arm64-darwin-25 x86_64-linux diff --git a/app/models/user.rb b/app/models/user.rb index 73d655142..14e568d8d 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -117,6 +117,10 @@ def set_trust(level, changed_by_user: nil, reason: nil, notes: nil) has_many :wakatime_mirrors, dependent: :destroy + has_many :heartbeat_projects, class_name: "Heartbeats::Project", dependent: :delete_all + has_many :heartbeat_branches, class_name: "Heartbeats::Branch", dependent: :delete_all + has_many :heartbeat_machines, class_name: "Heartbeats::Machine", dependent: :delete_all + scope :search_identity, ->(term) { term = term.to_s.strip.downcase return none if term.blank? diff --git a/app/services/anonymize_user_service.rb b/app/services/anonymize_user_service.rb index 367d92ca9..08e0f339c 100644 --- a/app/services/anonymize_user_service.rb +++ b/app/services/anonymize_user_service.rb @@ -69,5 +69,9 @@ def destroy_associated_records user.access_grants.destroy_all user.access_tokens.destroy_all + + user.heartbeat_projects.delete_all + user.heartbeat_branches.delete_all + user.heartbeat_machines.delete_all end end From c92e4ec623260bee0ee779c1bb03884a7da35521 Mon Sep 17 00:00:00 2001 From: Mahad Kalam Date: Tue, 3 Feb 2026 10:17:35 +0000 Subject: [PATCH 06/10] sayonara, raw_data --- app/jobs/migrate_user_from_hackatime_job.rb | 2 -- app/models/heartbeat.rb | 5 ----- app/services/heartbeat_import_service.rb | 2 -- docs/heartbeat_normalization_migration.md | 8 -------- 4 files changed, 17 deletions(-) diff --git a/app/jobs/migrate_user_from_hackatime_job.rb b/app/jobs/migrate_user_from_hackatime_job.rb index 3dd32de12..097599250 100644 --- a/app/jobs/migrate_user_from_hackatime_job.rb +++ b/app/jobs/migrate_user_from_hackatime_job.rb @@ -72,8 +72,6 @@ def import_heartbeats is_write: heartbeat.is_write, source_type: :wakapi_import } - attrs[:raw_data] = heartbeat.attributes.slice(*Heartbeat.indexed_attributes) unless Flipper.enabled?(:skip_heartbeat_raw_data) - { **attrs, fields_hash: Heartbeat.generate_fields_hash(attrs) diff --git a/app/models/heartbeat.rb b/app/models/heartbeat.rb index 50ec23185..13113aee0 100644 --- a/app/models/heartbeat.rb +++ b/app/models/heartbeat.rb @@ -1,6 +1,5 @@ class Heartbeat < ApplicationRecord before_save :set_fields_hash! - before_save :set_raw_data!, unless: -> { Flipper.enabled?(:skip_heartbeat_raw_data) } include Heartbeatable include TimeRangeFilterable @@ -113,10 +112,6 @@ def self.indexed_attributes %w[user_id branch category dependencies editor entity language machine operating_system project type user_agent line_additions line_deletions lineno lines cursorpos project_root_count time is_write] end - def set_raw_data! - self.raw_data ||= self.attributes.slice(*self.class.indexed_attributes) - end - def soft_delete update_column(:deleted_at, Time.current) end diff --git a/app/services/heartbeat_import_service.rb b/app/services/heartbeat_import_service.rb index 642d83687..c3dfd7e7b 100644 --- a/app/services/heartbeat_import_service.rb +++ b/app/services/heartbeat_import_service.rb @@ -53,8 +53,6 @@ def self.import_from_file(file_content, user) project_root_count: heartbeat_data["project_root_count"], source_type: :wakapi_import } - attrs[:raw_data] = heartbeat_data.slice(*Heartbeat.indexed_attributes) unless Flipper.enabled?(:skip_heartbeat_raw_data) - attrs[:fields_hash] = Heartbeat.generate_fields_hash(attrs) print(attrs[:fields_hash]) print("\n") diff --git a/docs/heartbeat_normalization_migration.md b/docs/heartbeat_normalization_migration.md index 1586edcd2..51b9b70c6 100644 --- a/docs/heartbeat_normalization_migration.md +++ b/docs/heartbeat_normalization_migration.md @@ -86,18 +86,10 @@ class RemoveRawDataFromHeartbeats < ActiveRecord::Migration[8.1] end ``` -## Feature flags - -| Flag | Purpose | -|------|---------| -| `heartbeat_dimension_dual_write` | Enable dual-write of FK columns on new heartbeats | -| `skip_heartbeat_raw_data` | Stop populating raw_data column | - ## Rollback plan ```ruby Flipper.disable(:heartbeat_dimension_dual_write) -Flipper.disable(:skip_heartbeat_raw_data) ``` ## Future stuff From 49250e7cbb24d6da4ae3f516684c95240ddb3c05 Mon Sep 17 00:00:00 2001 From: Mahad Kalam Date: Tue, 3 Feb 2026 10:19:57 +0000 Subject: [PATCH 07/10] sayonara x2 --- docs/heartbeat_normalization_migration.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/docs/heartbeat_normalization_migration.md b/docs/heartbeat_normalization_migration.md index 51b9b70c6..b386bf8f7 100644 --- a/docs/heartbeat_normalization_migration.md +++ b/docs/heartbeat_normalization_migration.md @@ -57,15 +57,9 @@ end (we don't have these in `migrations` bc it won't work till the backfill is done) -### Stop writing raw_data - -```ruby -Flipper.enable(:skip_heartbeat_raw_data) -``` - ### Remove raw_data Column -**WARNING:** This will lock the DB!! We'll need to co-ordinate an announcement with program owners + the wider Slack +**WARNING:** This will lock the DB! We'll need to co-ordinate an announcement with program owners + the wider Slack ```bash rails g migration RemoveRawDataFromHeartbeats From 2f7474dfb2157281f51565b92536adcdddb17909 Mon Sep 17 00:00:00 2001 From: Echo Date: Tue, 3 Feb 2026 11:51:58 -0500 Subject: [PATCH 08/10] lets not break the dashboard --- app/controllers/static_pages_controller.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/controllers/static_pages_controller.rb b/app/controllers/static_pages_controller.rb index 5c2f4edb8..3577ab69f 100644 --- a/app/controllers/static_pages_controller.rb +++ b/app/controllers/static_pages_controller.rb @@ -201,7 +201,7 @@ def filterable_dashboard_data hb = if %i[operating_system editor].include?(f) hb.where(f => arr.flat_map { |v| [ v.downcase, v.capitalize ] }.uniq) elsif f == :language - raw = current_user.heartbeats.distinct.pluck(f).compact_blank.select { |l| arr.include?(l.categorize_language) } + raw = current_user.heartbeats.distinct.pluck(f).compact_blank.select { |l| arr.any? { |a| a.casecmp?(l.categorize_language) } } raw.any? ? hb.where(f => raw) : hb else hb.where(f => arr) From f5d10d81362336690538e46b54a083be250e0f04 Mon Sep 17 00:00:00 2001 From: Echo Date: Tue, 3 Feb 2026 12:17:39 -0500 Subject: [PATCH 09/10] properly do migrations --- db/schema.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/db/schema.rb b/db/schema.rb index a3aad284b..a892b5539 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -680,14 +680,14 @@ add_foreign_key "heartbeat_branches", "users" add_foreign_key "heartbeat_machines", "users" add_foreign_key "heartbeat_projects", "users" - add_foreign_key "heartbeats", "heartbeat_branches", column: "branch_id" - add_foreign_key "heartbeats", "heartbeat_categories", column: "category_id" - add_foreign_key "heartbeats", "heartbeat_editors", column: "editor_id" - add_foreign_key "heartbeats", "heartbeat_languages", column: "language_id" - add_foreign_key "heartbeats", "heartbeat_machines", column: "machine_id" - add_foreign_key "heartbeats", "heartbeat_operating_systems", column: "operating_system_id" - add_foreign_key "heartbeats", "heartbeat_projects", column: "project_id" - add_foreign_key "heartbeats", "heartbeat_user_agents", column: "user_agent_id" + add_foreign_key "heartbeats", "heartbeat_branches", column: "branch_id", validate: false + add_foreign_key "heartbeats", "heartbeat_categories", column: "category_id", validate: false + add_foreign_key "heartbeats", "heartbeat_editors", column: "editor_id", validate: false + add_foreign_key "heartbeats", "heartbeat_languages", column: "language_id", validate: false + add_foreign_key "heartbeats", "heartbeat_machines", column: "machine_id", validate: false + add_foreign_key "heartbeats", "heartbeat_operating_systems", column: "operating_system_id", validate: false + add_foreign_key "heartbeats", "heartbeat_projects", column: "project_id", validate: false + add_foreign_key "heartbeats", "heartbeat_user_agents", column: "user_agent_id", validate: false add_foreign_key "heartbeats", "raw_heartbeat_uploads" add_foreign_key "heartbeats", "users" add_foreign_key "leaderboard_entries", "leaderboards" From 590ccb422497d6259935bbfe923e6cf1976bbe2e Mon Sep 17 00:00:00 2001 From: Echo Date: Tue, 3 Feb 2026 12:17:53 -0500 Subject: [PATCH 10/10] refactor hb filters --- .../api/admin/v1/admin_controller.rb | 14 ++++--- app/controllers/api/summary_controller.rb | 18 +++++---- .../concerns/heartbeat_filter_concern.rb | 37 +++++++++++++++++++ 3 files changed, 57 insertions(+), 12 deletions(-) create mode 100644 app/controllers/concerns/heartbeat_filter_concern.rb diff --git a/app/controllers/api/admin/v1/admin_controller.rb b/app/controllers/api/admin/v1/admin_controller.rb index 517fda3d0..4bca3252b 100644 --- a/app/controllers/api/admin/v1/admin_controller.rb +++ b/app/controllers/api/admin/v1/admin_controller.rb @@ -2,6 +2,7 @@ module Api module Admin module V1 class AdminController < Api::Admin::V1::ApplicationController + include HeartbeatFilterConcern before_action :can_write!, only: [ :user_convict ] def check @@ -436,11 +437,14 @@ def user_heartbeats query = query.where("time <= ?", end_timestamp) if end_timestamp end - query = query.where(project: project) if project.present? - query = query.where(language: language) if language.present? - query = query.where(entity: entity) if entity.present? - query = query.where(editor: editor) if editor.present? - query = query.where(machine: machine) if machine.present? + filters = { + project: project, + language: language, + entity: entity, + editor: editor, + machine: machine + }.compact_blank + query = apply_heartbeat_filters(query, filters, user: user) total_count = query.count diff --git a/app/controllers/api/summary_controller.rb b/app/controllers/api/summary_controller.rb index d4281f969..0941842dd 100644 --- a/app/controllers/api/summary_controller.rb +++ b/app/controllers/api/summary_controller.rb @@ -1,5 +1,6 @@ module Api class SummaryController < ApplicationController + include HeartbeatFilterConcern skip_before_action :verify_authenticity_token def index @@ -101,18 +102,21 @@ def determine_date_range(interval, range, from_date, to_date) end def filter_heartbeats(heartbeats, params) - heartbeats = heartbeats.where(project: params[:project]) if params[:project].present? - heartbeats = heartbeats.where(language: params[:language]) if params[:language].present? - heartbeats = heartbeats.where(editor: params[:editor]) if params[:editor].present? - heartbeats = heartbeats.where(operating_system: params[:operating_system]) if params[:operating_system].present? - heartbeats = heartbeats.where(machine: params[:machine]) if params[:machine].present? - + user = nil if params[:user].present? user = User.find_by(slack_uid: params[:user]) heartbeats = heartbeats.where(user_id: user.id) if user end - heartbeats + filters = { + project: params[:project], + language: params[:language], + editor: params[:editor], + operating_system: params[:operating_system], + machine: params[:machine] + }.compact_blank + + apply_heartbeat_filters(heartbeats, filters, user: user) end def calculate_summary(heartbeats, date_range) diff --git a/app/controllers/concerns/heartbeat_filter_concern.rb b/app/controllers/concerns/heartbeat_filter_concern.rb new file mode 100644 index 000000000..043dfdef1 --- /dev/null +++ b/app/controllers/concerns/heartbeat_filter_concern.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +# Case-insensitive filtering for heartbeat fields where display names differ from raw DB values. +module HeartbeatFilterConcern + extend ActiveSupport::Concern + + private + + CASE_INSENSITIVE_FIELDS = %i[language editor operating_system].freeze + + def apply_heartbeat_filter(scope, field, val, user: nil) + return scope if val.blank? + vals = Array(val) + CASE_INSENSITIVE_FIELDS.include?(field) ? case_insensitive_filter(scope, field, vals, user) : scope.where(field => vals) + end + + def case_insensitive_filter(scope, field, vals, user) + raw = (user ? user.heartbeats : scope).distinct.pluck(field).compact_blank + matches = raw.select { |r| vals.any? { |v| v.casecmp?(r) || v.casecmp?(display_name(field, r)) } } + matches.any? ? scope.where(field => matches) : scope.none + end + + def display_name(field, val) + h = ApplicationController.helpers + case field + when :language then WakatimeService.categorize_language(val) || val + when :editor then h.display_editor_name(val) + when :operating_system then h.display_os_name(val) + else val + end + end + + def apply_heartbeat_filters(scope, filters, user: nil) + filters.each { |f, v| scope = apply_heartbeat_filter(scope, f, v, user: user) } + scope + end +end