Shopify · juniper-shopify · Mar 27, 2026 · Mar 11, 2026
@@ -25,7 +25,9 @@ def invoke(input)
               invocations << invocation
               break unless invocation.result.success
             end
-            Output.new(invocations.last.not_nil!.result)
+            final_result = invocations.last.not_nil!.result
+            final_result.stats = invocations.filter_map { |i| i.result.stats }.reduce(:+) if invocations.size > 1
+            Output.new(final_result)
           end
         end
       end

@@ -31,6 +31,7 @@ def invoke(input)
               break unless invocation.result.success
             end
             final_result = invocations.last.not_nil!.result
+            final_result.stats = invocations.filter_map { |i| i.result.stats }.reduce(:+) if invocations.size > 1
             Output.new(final_result)
           end
         end

@@ -61,6 +61,21 @@ def initialize
           @model_usage = {}
         end
 
+        # Add two Stats objects together, summing their durations, turns, usage, and model usage
+        #
+        # Nil values are treated as zero when the other operand is non-nil.
+        # Model usage hashes are merged, summing usage for models that appear in both.
+        #
+        #: (Stats) -> Stats
+        def +(other)
+          result = Stats.new
+          result.duration_ms = sum_nils(duration_ms, other.duration_ms)&.to_int
+          result.num_turns = sum_nils(num_turns, other.num_turns)&.to_int
+          result.usage = usage + other.usage
+          result.model_usage = merge_model_usage(model_usage, other.model_usage)
+          result
+        end
+
         # Get a human-readable string representation of the statistics
         #
         # Formats the statistics into a multi-line string with the following information:
@@ -84,6 +99,20 @@ def to_s
           end
           lines.join("\n")
         end
+
+        private
+
+        #: (Numeric?, Numeric?) -> Numeric?
+        def sum_nils(a, b)
+          return if a.nil? && b.nil?
+
+          (a || 0) + (b || 0)
+        end
+
+        #: (Hash[String, Usage], Hash[String, Usage]) -> Hash[String, Usage]
+        def merge_model_usage(a, b)
+          a.merge(b) { |_model, usage_a, usage_b| usage_a + usage_b }
+        end
       end
     end
   end

@@ -54,6 +54,28 @@ class Usage
         #
         #: Float?
         attr_accessor :cost_usd
+
+        # Add two Usage objects together, summing their token counts and costs
+        #
+        # Nil values are treated as zero when the other operand is non-nil.
+        #
+        #: (Usage) -> Usage
+        def +(other)
+          result = Usage.new
+          result.input_tokens = sum_nils(input_tokens, other.input_tokens)&.to_int
+          result.output_tokens = sum_nils(output_tokens, other.output_tokens)&.to_int
+          result.cost_usd = sum_nils(cost_usd, other.cost_usd)&.to_f
+          result
+        end
+
+        private
+
+        #: (Numeric?, Numeric?) -> Numeric?
+        def sum_nils(a, b)
+          return if a.nil? && b.nil?
+
+          (a || 0) + (b || 0)
+        end
       end
     end
   end

@@ -141,6 +141,63 @@ def mock_status(success:)
             assert_equal "final result", output.response
           end
 
+          test "invoke sums stats across multiple invocations" do
+            input = Agent::Input.new
+            input.prompts = ["First", "Second"]
+
+            call_count = 0
+            CommandRunner.stubs(:execute).with do |_args, **kwargs|
+              call_count += 1
+              result_hash = {
+                type: "result",
+                subtype: "success",
+                result: call_count == 1 ? "intermediate" : "final",
+                duration_ms: call_count == 1 ? 1000 : 2000,
+                num_turns: call_count == 1 ? 3 : 5,
+                total_cost_usd: call_count == 1 ? 0.01 : 0.02,
+                modelUsage: {
+                  "claude-sonnet" => {
+                    inputTokens: call_count == 1 ? 100 : 200,
+                    outputTokens: call_count == 1 ? 50 : 75,
+                  },
+                },
+              }
+              kwargs[:stdout_handler]&.call(result_hash.to_json)
+              true
+            end.returns(["", "", mock_status(success: true)])
+
+            output = @provider.invoke(input)
+
+            assert_equal 3000, output.stats.duration_ms
+            assert_equal 8, output.stats.num_turns
+            assert_in_delta 0.03, output.stats.usage.cost_usd
+            assert_equal 300, output.stats.model_usage[:"claude-sonnet"].input_tokens
+            assert_equal 125, output.stats.model_usage[:"claude-sonnet"].output_tokens
+          end
+
+          test "invoke does not sum stats for single invocation" do
+            input = Agent::Input.new
+            input.prompt = "Only prompt"
+
+            result_hash = {
+              type: "result",
+              subtype: "success",
+              result: "done",
+              duration_ms: 1000,
+              num_turns: 3,
+              total_cost_usd: 0.01,
+            }
+            CommandRunner.stubs(:execute).with do |_args, **kwargs|
+              kwargs[:stdout_handler]&.call(result_hash.to_json)
+              true
+            end.returns(["", "", mock_status(success: true)])
+
+            output = @provider.invoke(input)
+
+            assert_equal 1000, output.stats.duration_ms
+            assert_equal 3, output.stats.num_turns
+          end
+
           test "invoke uses input session when no previous invocation session exists" do
             input = Agent::Input.new
             input.prompts = ["Main task", "Finalize"]

@@ -185,6 +185,77 @@ def mock_status(success:)
             assert_equal "final result", output.response
           end
 
+          test "invoke sums stats across multiple invocations" do
+            input = Agent::Input.new
+            input.prompts = ["First", "Second"]
+
+            call_count = 0
+            CommandRunner.stubs(:execute).with do |_args, **kwargs|
+              call_count += 1
+              # Simulate turn_start events: 3 turns for first invocation, 5 for second
+              num_turns = call_count == 1 ? 3 : 5
+              num_turns.times { kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json) }
+              usage_data = {
+                type: "message_end",
+                message: {
+                  role: "assistant",
+                  model: "claude-sonnet",
+                  content: [{ type: "text", text: call_count == 1 ? "intermediate" : "final" }],
+                  usage: {
+                    input: call_count == 1 ? 100 : 200,
+                    output: call_count == 1 ? 50 : 75,
+                    cacheRead: 0,
+                    cacheWrite: 0,
+                    cost: { total: call_count == 1 ? 0.01 : 0.02 },
+                  },
+                },
+              }.to_json
+              kwargs[:stdout_handler]&.call(usage_data)
+              session_json = { type: "session", id: "session_#{call_count}" }.to_json
+              kwargs[:stdout_handler]&.call(session_json)
+              true
+            end.returns(["", "", mock_status(success: true)])
+
+            output = @provider.invoke(input)
+
+            assert_equal 8, output.stats.num_turns
+            assert_in_delta 0.03, output.stats.usage.cost_usd
+            assert_equal 300, output.stats.model_usage["claude-sonnet"].input_tokens
+            assert_equal 125, output.stats.model_usage["claude-sonnet"].output_tokens
+          end
+
+          test "invoke does not sum stats for single invocation" do
+            input = Agent::Input.new
+            input.prompt = "Only prompt"
+
+            CommandRunner.stubs(:execute).with do |_args, **kwargs|
+              kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json)
+              kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json)
+              kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json)
+              usage_data = {
+                type: "message_end",
+                message: {
+                  role: "assistant",
+                  model: "claude-sonnet",
+                  content: [{ type: "text", text: "done" }],
+                  usage: {
+                    input: 100,
+                    output: 50,
+                    cacheRead: 0,
+                    cacheWrite: 0,
+                    cost: { total: 0.01 },
+                  },
+                },
+              }.to_json
+              kwargs[:stdout_handler]&.call(usage_data)
+              true
+            end.returns(["", "", mock_status(success: true)])
+
+            output = @provider.invoke(input)
+
+            assert_equal 3, output.stats.num_turns
+          end
+
           test "invoke uses input session when no previous invocation session exists" do
             input = Agent::Input.new
             input.prompts = ["Main task", "Finalize"]

@@ -99,6 +99,102 @@ def setup
           assert_match(/Tokens \(model2\):/, output)
         end
 
+        test "+ sums duration_ms" do
+          a = Stats.new
+          a.duration_ms = 3000
+          b = Stats.new
+          b.duration_ms = 2000
+
+          result = a + b
+
+          assert_equal 5000, result.duration_ms
+        end
+
+        test "+ sums num_turns" do
+          a = Stats.new
+          a.num_turns = 3
+          b = Stats.new
+          b.num_turns = 5
+
+          result = a + b
+
+          assert_equal 8, result.num_turns
+        end
+
+        test "+ sums usage" do
+          a = Stats.new
+          a.usage.input_tokens = 100
+          a.usage.cost_usd = 0.01
+          b = Stats.new
+          b.usage.input_tokens = 200
+          b.usage.cost_usd = 0.02
+
+          result = a + b
+
+          assert_equal 300, result.usage.input_tokens
+          assert_in_delta 0.03, result.usage.cost_usd
+        end
+
+        test "+ merges model_usage for different models" do
+          a = Stats.new
+          usage_a = Usage.new
+          usage_a.input_tokens = 100
+          a.model_usage["model-a"] = usage_a
+
+          b = Stats.new
+          usage_b = Usage.new
+          usage_b.input_tokens = 200
+          b.model_usage["model-b"] = usage_b
+
+          result = a + b
+
+          assert_equal 100, result.model_usage["model-a"].input_tokens
+          assert_equal 200, result.model_usage["model-b"].input_tokens
+        end
+
+        test "+ sums model_usage for the same model" do
+          a = Stats.new
+          usage_a = Usage.new
+          usage_a.input_tokens = 100
+          usage_a.output_tokens = 50
+          a.model_usage["claude"] = usage_a
+
+          b = Stats.new
+          usage_b = Usage.new
+          usage_b.input_tokens = 200
+          usage_b.output_tokens = 75
+          b.model_usage["claude"] = usage_b
+
+          result = a + b
+
+          assert_equal 300, result.model_usage["claude"].input_tokens
+          assert_equal 125, result.model_usage["claude"].output_tokens
+        end
+
+        test "+ returns nil for fields that are nil on both sides" do
+          a = Stats.new
+          b = Stats.new
+
+          result = a + b
+
+          assert_nil result.duration_ms
+          assert_nil result.num_turns
+        end
+
+        test "+ does not mutate operands" do
+          a = Stats.new
+          a.duration_ms = 1000
+          a.num_turns = 2
+          b = Stats.new
+          b.duration_ms = 2000
+          b.num_turns = 3
+
+          _ = a + b
+
+          assert_equal 1000, a.duration_ms
+          assert_equal 2, a.num_turns
+        end
+
         test "to_s formats complete stats" do
           @stats.duration_ms = 5000
           @stats.num_turns = 3