From ce1681e86831a7b3cc83688ed054d3de078ce974 Mon Sep 17 00:00:00 2001 From: Kabiru Mwenja Date: Fri, 15 May 2026 09:35:02 +0300 Subject: [PATCH] Reinstate doc-level preload cache for WP text macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In semantic mode, every `#N` reference in legacy content needs a WP record to render the consistent `formatted_id` label users opted into. The per-match `find_by_display_id` fallback that ships in #23203 scales linearly with reference count: a wiki page with 50 refs runs 50 indexed PK lookups per render, and API collection endpoints / mailer fan-out / journal feeds compound the multiplier across records. `PatternMatcherFilter` now primes a `RequestStore`-backed lookup once per document via `ResourceLinksMatcher.with_preloaded_resources`. The link handler reads from `work_package_for(identifier)` so the same path serves numeric and semantic input. Cost is one batched `WorkPackage` SELECT per render, plus an alias-table SELECT only when historical identifiers are referenced. Classic mode short-circuits before any preload — `formatted_id` collapses to the numeric form, so the matched id alone is enough. The save/restore around nested `format_text` is retained: custom-field formatters re-enter the pipeline mid-render and must not clobber the outer document's lookup. The earlier `MAX_PRELOAD_IDENTIFIERS` cap is intentionally omitted. Silent truncation past position N would render the (N+1)th reference as literal text — a regression of the feature itself. Postgres handles several-thousand-bind `IN` clauses comfortably; the right safety net, if one is needed later, is log-and-continue, not truncate. `PreformattedBlocks` is restored so the preload visitor and the matcher's text-node walk share one `
` / `` skip.

Follow-up to https://github.com/opf/openproject/pull/23203
---
 .../filters/pattern_matcher_filter.rb         | 29 +++++--
 .../matchers/link_handlers/work_packages.rb   |  6 +-
 .../matchers/resource_links_matcher.rb        | 84 ++++++++++++++++++
 .../text_formatting/preformatted_blocks.rb    | 52 +++++++++++
 .../link_handlers/work_packages_spec.rb       | 87 ++++++++++++++++---
 5 files changed, 237 insertions(+), 21 deletions(-)
 create mode 100644 lib/open_project/text_formatting/preformatted_blocks.rb

diff --git a/lib/open_project/text_formatting/filters/pattern_matcher_filter.rb b/lib/open_project/text_formatting/filters/pattern_matcher_filter.rb
index 98096983d48c..798601fcafad 100644
--- a/lib/open_project/text_formatting/filters/pattern_matcher_filter.rb
+++ b/lib/open_project/text_formatting/filters/pattern_matcher_filter.rb
@@ -31,9 +31,6 @@
 module OpenProject::TextFormatting
   module Filters
     class PatternMatcherFilter < HTML::Pipeline::Filter
-      # Skip text nodes that are within preformatted blocks
-      PREFORMATTED_BLOCKS = %w(pre code).to_set
-
       class << self
         def append_matcher(matcher)
           matchers << matcher
@@ -45,15 +42,35 @@ def matchers
       end
 
       def call
+        with_matcher_preloads(self.class.matchers.dup) { process_text_nodes }
+        doc
+      end
+
+      private
+
+      # Wraps the per-node loop in each matcher's `with_preloaded_resources`
+      # hook so matchers can warm per-render caches and save/restore them
+      # around nested `format_text` calls. Opt-in: matchers without the hook
+      # are skipped.
+      def with_matcher_preloads(matchers, &)
+        matcher = matchers.shift
+        return yield if matcher.nil?
+
+        if matcher.respond_to?(:with_preloaded_resources)
+          matcher.with_preloaded_resources(doc, context) { with_matcher_preloads(matchers, &) }
+        else
+          with_matcher_preloads(matchers, &)
+        end
+      end
+
+      def process_text_nodes
         doc.search(".//text()").each do |node|
-          next if has_ancestor?(node, PREFORMATTED_BLOCKS)
+          next if has_ancestor?(node, OpenProject::TextFormatting::PreformattedBlocks::BLOCKS)
 
           self.class.matchers.each do |matcher|
             matcher.call(node, doc:, context:)
           end
         end
-
-        doc
       end
     end
   end
diff --git a/lib/open_project/text_formatting/matchers/link_handlers/work_packages.rb b/lib/open_project/text_formatting/matchers/link_handlers/work_packages.rb
index f975af718a79..c495ee278f52 100644
--- a/lib/open_project/text_formatting/matchers/link_handlers/work_packages.rb
+++ b/lib/open_project/text_formatting/matchers/link_handlers/work_packages.rb
@@ -79,8 +79,8 @@ def render_for_semantic(display_id)
           render_work_package_macro(display_id, detailed: detailed?)
         else
           # Plain link needs the WP record for the `formatted_id` label and
-          # hover-card URL. Unresolved → literal text rather than a broken URL.
-          wp = WorkPackage.find_by_display_id(display_id)
+          # hover-card URL. Cache miss → literal text rather than a broken URL.
+          wp = OpenProject::TextFormatting::Matchers::ResourceLinksMatcher.work_package_for(display_id)
           return nil unless wp
 
           render_work_package_link(wp, fallback_id: display_id)
@@ -88,7 +88,7 @@ def render_for_semantic(display_id)
       end
 
       def render_for_numeric(wp_id)
-        wp = WorkPackage.find_by_display_id(wp_id)
+        wp = OpenProject::TextFormatting::Matchers::ResourceLinksMatcher.work_package_for(wp_id)
 
         if quickinfo?
           # Prefer the resolved WP's display_id so `##1234` typed in semantic
diff --git a/lib/open_project/text_formatting/matchers/resource_links_matcher.rb b/lib/open_project/text_formatting/matchers/resource_links_matcher.rb
index 1e3b02a49a29..740c5af22d32 100644
--- a/lib/open_project/text_formatting/matchers/resource_links_matcher.rb
+++ b/lib/open_project/text_formatting/matchers/resource_links_matcher.rb
@@ -69,6 +69,9 @@ module Matchers
     #     identifier:version:1.0.0
     #     identifier:source:some/file
     class ResourceLinksMatcher < RegexMatcher
+      WORK_PACKAGES_LOOKUP_KEY = :text_formatting_work_packages_lookup
+      private_constant :WORK_PACKAGES_LOOKUP_KEY
+
       include ::OpenProject::TextFormatting::Truncation
       # used for the work package quick links
       include WorkPackagesHelper
@@ -127,6 +130,87 @@ def self.link_handlers
         ]
       end
 
+      # Returns the preloaded WorkPackage for the given identifier (numeric
+      # or semantic), or nil if no preload is active (classic mode, no `#N`
+      # references) or the WP couldn't be resolved.
+      def self.work_package_for(identifier)
+        RequestStore.store[WORK_PACKAGES_LOOKUP_KEY]&.[](identifier.to_s)
+      end
+
+      # Doc-level preload called by `PatternMatcherFilter`. Save/restores
+      # the lookup so a nested `format_text` (e.g. custom-field formatter
+      # re-entering the pipeline) doesn't clobber the outer render. Classic
+      # mode skips the load — `display_id` collapses to numeric, so the
+      # link handler can render from the matched id alone.
+      def self.with_preloaded_resources(doc, _context)
+        previous = RequestStore.store[WORK_PACKAGES_LOOKUP_KEY]
+
+        return yield unless Setting::WorkPackageIdentifier.semantic_mode_active?
+
+        identifiers = collect_work_package_identifiers(doc)
+        return yield if identifiers.empty?
+
+        RequestStore.store[WORK_PACKAGES_LOOKUP_KEY] = build_lookup(identifiers)
+        yield
+      ensure
+        RequestStore.store[WORK_PACKAGES_LOOKUP_KEY] = previous
+      end
+
+      def self.collect_work_package_identifiers(doc)
+        identifiers = Set.new
+        doc.search(".//text()").each do |node|
+          next if OpenProject::TextFormatting::PreformattedBlocks.ancestor?(node)
+
+          node.to_s.scan(regexp) do
+            extract_work_package_identifier(Regexp.last_match)&.then { identifiers << it }
+          end
+        end
+        identifiers
+      end
+
+      # Returns the WP identifier for any `#N` / `##N` / `###N` (or
+      # semantic-shape) reference. Returns nil for prefixed resource links
+      # (`version#3`, `message#12`) and `:`-separator resources. Leading-zero
+      # numerics ("0123") pass through here — the link handler rejects them
+      # at render time, so a non-resolving cache entry is harmless.
+      def self.extract_work_package_identifier(match)
+        parts = parse_match(match)
+        identifier = parts[:identifier]
+        return nil unless parts[:prefix].nil? && parts[:sep]&.start_with?("#") && identifier.present?
+
+        identifier
+      end
+
+      # 1 SELECT in the common case. A second targeted SELECT only fires
+      # when references hit historical aliases — the loaded WP row carries
+      # only its current identifier, so unmapped inputs must be filled in
+      # from `WorkPackageSemanticAlias`.
+      def self.build_lookup(identifiers)
+        work_packages = WorkPackage.where_display_id_in(*identifiers).select(:id, :identifier).to_a
+        lookup = index_by_id_and_identifier(work_packages)
+        fold_in_alias_keys(lookup, identifiers)
+        lookup
+      end
+
+      def self.index_by_id_and_identifier(work_packages)
+        work_packages.each_with_object({}) do |wp, lookup|
+          lookup[wp.id.to_s] = wp
+          lookup[wp.identifier] = wp if wp.identifier.present?
+        end
+      end
+      private_class_method :index_by_id_and_identifier
+
+      def self.fold_in_alias_keys(lookup, identifiers)
+        unmapped = identifiers.map(&:to_s) - lookup.keys
+        return if unmapped.empty?
+
+        WorkPackageSemanticAlias
+          .where(identifier: unmapped)
+          .pluck(:identifier, :work_package_id)
+          .each { |ident, wp_id| lookup[ident] = lookup[wp_id.to_s] }
+      end
+      private_class_method :fold_in_alias_keys
+
       # Flattens the three alternation branches into a single `:sep` /
       # `:identifier` shape so callers don't branch on which one matched.
       def self.parse_match(match)
diff --git a/lib/open_project/text_formatting/preformatted_blocks.rb b/lib/open_project/text_formatting/preformatted_blocks.rb
new file mode 100644
index 000000000000..a9a674dc9869
--- /dev/null
+++ b/lib/open_project/text_formatting/preformatted_blocks.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+
+#-- copyright
+# OpenProject is an open source project management software.
+# Copyright (C) the OpenProject GmbH
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License version 3.
+#
+# OpenProject is a fork of ChiliProject, which is a fork of Redmine. The copyright follows:
+# Copyright (C) 2006-2013 Jean-Philippe Lang
+# Copyright (C) 2010-2013 the ChiliProject Team
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+# See COPYRIGHT and LICENSE files for more details.
+#++
+
+module OpenProject
+  module TextFormatting
+    # `
`/`` ancestry skip. Filters call
+    # `has_ancestor?(node, BLOCKS)` via HTML::Pipeline's instance helper;
+    # matchers without that helper call `ancestor?(node)` here.
+    module PreformattedBlocks
+      BLOCKS = %w[pre code].to_set.freeze
+
+      module_function
+
+      def ancestor?(node)
+        ancestor = node.parent
+        until ancestor.nil? || ancestor.fragment? || ancestor.document?
+          return true if BLOCKS.include?(ancestor.name)
+
+          ancestor = ancestor.parent
+        end
+        false
+      end
+    end
+  end
+end
diff --git a/spec/lib/open_project/text_formatting/matchers/link_handlers/work_packages_spec.rb b/spec/lib/open_project/text_formatting/matchers/link_handlers/work_packages_spec.rb
index 0ecf3dc94768..93644aacedae 100644
--- a/spec/lib/open_project/text_formatting/matchers/link_handlers/work_packages_spec.rb
+++ b/spec/lib/open_project/text_formatting/matchers/link_handlers/work_packages_spec.rb
@@ -72,21 +72,83 @@
     end
   end
 
-  describe "per-reference query cost",
+  describe ".with_preloaded_resources save/restore semantics",
+           with_flag: { semantic_work_package_ids: true },
+           with_settings: { work_packages_identifier: "semantic" } do
+    # A custom-field formatter or recursive markdown render may invoke the
+    # text-formatting pipeline while an outer render is mid-iteration. The
+    # lookup must save on entry and restore on exit so the outer render's
+    # remaining `#N` matchers still see its WPs after the inner call returns.
+    include_context "with author signed in"
+
+    let(:project) { create(:project, identifier: "NESTED") }
+    let(:outer_wp) { create(:work_package, project:, author:) }
+    let(:inner_wp) { create(:work_package, project:, author:) }
+    let(:matcher) { OpenProject::TextFormatting::Matchers::ResourceLinksMatcher }
+
+    before do
+      outer_wp.allocate_and_register_semantic_id
+      inner_wp.allocate_and_register_semantic_id
+    end
+
+    it "preserves the outer lookup across a nested call" do
+      outer = outer_wp.reload
+      inner = inner_wp.reload
+      outer_doc = Nokogiri::HTML.fragment("##{outer.id}")
+      inner_doc = Nokogiri::HTML.fragment("##{inner.id}")
+
+      matcher.with_preloaded_resources(outer_doc, {}) do
+        expect(matcher.work_package_for(outer.id)).to eq(outer)
+
+        matcher.with_preloaded_resources(inner_doc, {}) do
+          expect(matcher.work_package_for(inner.id)).to eq(inner)
+        end
+
+        expect(matcher.work_package_for(outer.id))
+          .to eq(outer), "outer lookup should be restored after nested call"
+      end
+
+      expect(matcher.work_package_for(outer.id)).to be_nil
+    end
+  end
+
+  describe "classic mode is query-free",
+           with_flag: { semantic_work_package_ids: false },
+           with_settings: { work_packages_identifier: "classic" } do
+    # Rendering a `#N` reference in classic mode must not run any
+    # WorkPackage SELECTs: the preload is a no-op when `display_id` and
+    # `formatted_id` would collapse to the numeric form, so the link
+    # handler can build the link from the matched id alone.
+    include_context "with author signed in"
+    let(:project) { create(:project, identifier: "classicproj") }
+
+    it "does not query work_packages when rendering #N" do
+      wps = create_list(:work_package, 3, project:, author:)
+      ids_text = wps.map { |wp| "##{wp.id}" }.join(" ")
+
+      recorder = ActiveRecord::QueryRecorder.new { format_text(ids_text) }
+      wp_selects = recorder.log.grep(/FROM "work_packages"/i)
+
+      expect(wp_selects).to be_empty,
+                            "classic mode added unexpected WP SELECTs:\n#{wp_selects.join("\n")}"
+    end
+  end
+
+  describe "N+1 query bound",
            with_flag: { semantic_work_package_ids: true },
            with_settings: { work_packages_identifier: "semantic" } do
     include_context "with author signed in"
     let(:project) { create(:project, identifier: "NPLUSONE") }
 
-    it "issues one work_packages SELECT per `#N` reference" do
+    it "loads referenced work packages with a single SELECT regardless of count" do
       wps = create_list(:work_package, 5, project:, author:)
       ids_text = wps.map { |wp| "##{wp.id}" }.join(" ")
 
       recorder = ActiveRecord::QueryRecorder.new { format_text(ids_text) }
       wp_selects = recorder.log.grep(/FROM "work_packages"/i)
 
-      expect(wp_selects.size).to eq(5),
-                                 "expected one SELECT per reference, got #{wp_selects.size}:\n#{wp_selects.join("\n")}"
+      expect(wp_selects.size).to eq(1),
+                                 "expected exactly one work_packages SELECT, got #{wp_selects.size}:\n#{wp_selects.join("\n")}"
     end
   end
 
@@ -140,7 +202,7 @@
     end
 
     context "with mixed numeric and semantic references in one render" do
-      it "resolves both with one work_packages SELECT per reference" do
+      it "resolves both with a single work_packages SELECT" do
         wps = create_list(:work_package, 2, project:, author:)
         wps.each(&:allocate_and_register_semantic_id)
         loaded = wps.map(&:reload)
@@ -150,8 +212,8 @@
         recorder = ActiveRecord::QueryRecorder.new { rendered = format_text(text) }
         wp_selects = recorder.log.grep(/FROM "work_packages"/i)
 
-        expect(wp_selects.size).to eq(2),
-                                   "expected one SELECT per reference, got #{wp_selects.size}:\n#{wp_selects.join("\n")}"
+        expect(wp_selects.size).to eq(1),
+                                   "expected exactly one work_packages SELECT, got #{wp_selects.size}:\n#{wp_selects.join("\n")}"
 
         # Both render with the user-facing display_id, regardless of which
         # form the user typed.
@@ -161,7 +223,7 @@
     end
 
     context "with a historical alias reference" do
-      it "resolves via the alias table without a separate alias round-trip" do
+      it "resolves via the alias table with two round-trips total" do
         wp = work_package.reload
         # Simulate a project rename: the WP keeps its current MACROPROJ-N
         # identifier on the row, but a historical OLD-prefix alias row
@@ -172,14 +234,15 @@
         rendered = nil
         recorder = ActiveRecord::QueryRecorder.new { rendered = format_text("see #OLDPROJ-1") }
 
-        # `find_by_display_id` runs one WP SELECT whose WHERE clause is
-        # `identifier = ? OR EXISTS (SELECT 1 FROM aliases …)`, so the
-        # alias table is consulted in-line — no separate alias-only SELECT.
+        # Two database round-trips: (1) `where_display_id_in` runs a single
+        # WP SELECT whose WHERE clause includes an EXISTS subquery against
+        # the alias table; (2) the sidecar alias pluck maps the historical
+        # input string back to its WP for the cache.
         wp_selects = recorder.log.grep(/FROM "work_packages"/i)
         alias_only_selects = recorder.log.grep(/FROM "work_package_semantic_aliases"/i)
                                      .grep_v(/FROM "work_packages"/i)
         expect(wp_selects.size).to eq(1)
-        expect(alias_only_selects).to be_empty
+        expect(alias_only_selects.size).to eq(1)
 
         # Renders against the WP's CURRENT display_id, not the historical
         # alias the user typed — old content stays alive but points at the