diff --git a/lib/roast/cogs/agent/providers/claude.rb b/lib/roast/cogs/agent/providers/claude.rb index 3ad52ceb..2db0c619 100644 --- a/lib/roast/cogs/agent/providers/claude.rb +++ b/lib/roast/cogs/agent/providers/claude.rb @@ -25,7 +25,9 @@ def invoke(input) invocations << invocation break unless invocation.result.success end - Output.new(invocations.last.not_nil!.result) + final_result = invocations.last.not_nil!.result + final_result.stats = invocations.filter_map { |i| i.result.stats }.reduce(:+) if invocations.size > 1 + Output.new(final_result) end end end diff --git a/lib/roast/cogs/agent/providers/pi.rb b/lib/roast/cogs/agent/providers/pi.rb index c297b4ac..7bae45ff 100644 --- a/lib/roast/cogs/agent/providers/pi.rb +++ b/lib/roast/cogs/agent/providers/pi.rb @@ -31,6 +31,7 @@ def invoke(input) break unless invocation.result.success end final_result = invocations.last.not_nil!.result + final_result.stats = invocations.filter_map { |i| i.result.stats }.reduce(:+) if invocations.size > 1 Output.new(final_result) end end diff --git a/lib/roast/cogs/agent/stats.rb b/lib/roast/cogs/agent/stats.rb index 8d324733..262aeb10 100644 --- a/lib/roast/cogs/agent/stats.rb +++ b/lib/roast/cogs/agent/stats.rb @@ -61,6 +61,21 @@ def initialize @model_usage = {} end + # Add two Stats objects together, summing their durations, turns, usage, and model usage + # + # Nil values are treated as zero when the other operand is non-nil. + # Model usage hashes are merged, summing usage for models that appear in both. + # + #: (Stats) -> Stats + def +(other) + result = Stats.new + result.duration_ms = sum_nils(duration_ms, other.duration_ms)&.to_int + result.num_turns = sum_nils(num_turns, other.num_turns)&.to_int + result.usage = usage + other.usage + result.model_usage = merge_model_usage(model_usage, other.model_usage) + result + end + # Get a human-readable string representation of the statistics # # Formats the statistics into a multi-line string with the following information: @@ -84,6 +99,20 @@ def to_s end lines.join("\n") end + + private + + #: (Numeric?, Numeric?) -> Numeric? + def sum_nils(a, b) + return if a.nil? && b.nil? + + (a || 0) + (b || 0) + end + + #: (Hash[String, Usage], Hash[String, Usage]) -> Hash[String, Usage] + def merge_model_usage(a, b) + a.merge(b) { |_model, usage_a, usage_b| usage_a + usage_b } + end end end end diff --git a/lib/roast/cogs/agent/usage.rb b/lib/roast/cogs/agent/usage.rb index e848e6ea..1d41675e 100644 --- a/lib/roast/cogs/agent/usage.rb +++ b/lib/roast/cogs/agent/usage.rb @@ -54,6 +54,28 @@ class Usage # #: Float? attr_accessor :cost_usd + + # Add two Usage objects together, summing their token counts and costs + # + # Nil values are treated as zero when the other operand is non-nil. + # + #: (Usage) -> Usage + def +(other) + result = Usage.new + result.input_tokens = sum_nils(input_tokens, other.input_tokens)&.to_int + result.output_tokens = sum_nils(output_tokens, other.output_tokens)&.to_int + result.cost_usd = sum_nils(cost_usd, other.cost_usd)&.to_f + result + end + + private + + #: (Numeric?, Numeric?) -> Numeric? + def sum_nils(a, b) + return if a.nil? && b.nil? + + (a || 0) + (b || 0) + end end end end diff --git a/test/roast/cogs/agent/providers/claude_test.rb b/test/roast/cogs/agent/providers/claude_test.rb index 6e84702c..d1edbb13 100644 --- a/test/roast/cogs/agent/providers/claude_test.rb +++ b/test/roast/cogs/agent/providers/claude_test.rb @@ -141,6 +141,63 @@ def mock_status(success:) assert_equal "final result", output.response end + test "invoke sums stats across multiple invocations" do + input = Agent::Input.new + input.prompts = ["First", "Second"] + + call_count = 0 + CommandRunner.stubs(:execute).with do |_args, **kwargs| + call_count += 1 + result_hash = { + type: "result", + subtype: "success", + result: call_count == 1 ? "intermediate" : "final", + duration_ms: call_count == 1 ? 1000 : 2000, + num_turns: call_count == 1 ? 3 : 5, + total_cost_usd: call_count == 1 ? 0.01 : 0.02, + modelUsage: { + "claude-sonnet" => { + inputTokens: call_count == 1 ? 100 : 200, + outputTokens: call_count == 1 ? 50 : 75, + }, + }, + } + kwargs[:stdout_handler]&.call(result_hash.to_json) + true + end.returns(["", "", mock_status(success: true)]) + + output = @provider.invoke(input) + + assert_equal 3000, output.stats.duration_ms + assert_equal 8, output.stats.num_turns + assert_in_delta 0.03, output.stats.usage.cost_usd + assert_equal 300, output.stats.model_usage[:"claude-sonnet"].input_tokens + assert_equal 125, output.stats.model_usage[:"claude-sonnet"].output_tokens + end + + test "invoke does not sum stats for single invocation" do + input = Agent::Input.new + input.prompt = "Only prompt" + + result_hash = { + type: "result", + subtype: "success", + result: "done", + duration_ms: 1000, + num_turns: 3, + total_cost_usd: 0.01, + } + CommandRunner.stubs(:execute).with do |_args, **kwargs| + kwargs[:stdout_handler]&.call(result_hash.to_json) + true + end.returns(["", "", mock_status(success: true)]) + + output = @provider.invoke(input) + + assert_equal 1000, output.stats.duration_ms + assert_equal 3, output.stats.num_turns + end + test "invoke uses input session when no previous invocation session exists" do input = Agent::Input.new input.prompts = ["Main task", "Finalize"] diff --git a/test/roast/cogs/agent/providers/pi_test.rb b/test/roast/cogs/agent/providers/pi_test.rb index 89e43f7d..0c98f41a 100644 --- a/test/roast/cogs/agent/providers/pi_test.rb +++ b/test/roast/cogs/agent/providers/pi_test.rb @@ -185,6 +185,77 @@ def mock_status(success:) assert_equal "final result", output.response end + test "invoke sums stats across multiple invocations" do + input = Agent::Input.new + input.prompts = ["First", "Second"] + + call_count = 0 + CommandRunner.stubs(:execute).with do |_args, **kwargs| + call_count += 1 + # Simulate turn_start events: 3 turns for first invocation, 5 for second + num_turns = call_count == 1 ? 3 : 5 + num_turns.times { kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json) } + usage_data = { + type: "message_end", + message: { + role: "assistant", + model: "claude-sonnet", + content: [{ type: "text", text: call_count == 1 ? "intermediate" : "final" }], + usage: { + input: call_count == 1 ? 100 : 200, + output: call_count == 1 ? 50 : 75, + cacheRead: 0, + cacheWrite: 0, + cost: { total: call_count == 1 ? 0.01 : 0.02 }, + }, + }, + }.to_json + kwargs[:stdout_handler]&.call(usage_data) + session_json = { type: "session", id: "session_#{call_count}" }.to_json + kwargs[:stdout_handler]&.call(session_json) + true + end.returns(["", "", mock_status(success: true)]) + + output = @provider.invoke(input) + + assert_equal 8, output.stats.num_turns + assert_in_delta 0.03, output.stats.usage.cost_usd + assert_equal 300, output.stats.model_usage["claude-sonnet"].input_tokens + assert_equal 125, output.stats.model_usage["claude-sonnet"].output_tokens + end + + test "invoke does not sum stats for single invocation" do + input = Agent::Input.new + input.prompt = "Only prompt" + + CommandRunner.stubs(:execute).with do |_args, **kwargs| + kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json) + kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json) + kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json) + usage_data = { + type: "message_end", + message: { + role: "assistant", + model: "claude-sonnet", + content: [{ type: "text", text: "done" }], + usage: { + input: 100, + output: 50, + cacheRead: 0, + cacheWrite: 0, + cost: { total: 0.01 }, + }, + }, + }.to_json + kwargs[:stdout_handler]&.call(usage_data) + true + end.returns(["", "", mock_status(success: true)]) + + output = @provider.invoke(input) + + assert_equal 3, output.stats.num_turns + end + test "invoke uses input session when no previous invocation session exists" do input = Agent::Input.new input.prompts = ["Main task", "Finalize"] diff --git a/test/roast/cogs/agent/stats_test.rb b/test/roast/cogs/agent/stats_test.rb index 377c3b15..6b6a397e 100644 --- a/test/roast/cogs/agent/stats_test.rb +++ b/test/roast/cogs/agent/stats_test.rb @@ -99,6 +99,102 @@ def setup assert_match(/Tokens \(model2\):/, output) end + test "+ sums duration_ms" do + a = Stats.new + a.duration_ms = 3000 + b = Stats.new + b.duration_ms = 2000 + + result = a + b + + assert_equal 5000, result.duration_ms + end + + test "+ sums num_turns" do + a = Stats.new + a.num_turns = 3 + b = Stats.new + b.num_turns = 5 + + result = a + b + + assert_equal 8, result.num_turns + end + + test "+ sums usage" do + a = Stats.new + a.usage.input_tokens = 100 + a.usage.cost_usd = 0.01 + b = Stats.new + b.usage.input_tokens = 200 + b.usage.cost_usd = 0.02 + + result = a + b + + assert_equal 300, result.usage.input_tokens + assert_in_delta 0.03, result.usage.cost_usd + end + + test "+ merges model_usage for different models" do + a = Stats.new + usage_a = Usage.new + usage_a.input_tokens = 100 + a.model_usage["model-a"] = usage_a + + b = Stats.new + usage_b = Usage.new + usage_b.input_tokens = 200 + b.model_usage["model-b"] = usage_b + + result = a + b + + assert_equal 100, result.model_usage["model-a"].input_tokens + assert_equal 200, result.model_usage["model-b"].input_tokens + end + + test "+ sums model_usage for the same model" do + a = Stats.new + usage_a = Usage.new + usage_a.input_tokens = 100 + usage_a.output_tokens = 50 + a.model_usage["claude"] = usage_a + + b = Stats.new + usage_b = Usage.new + usage_b.input_tokens = 200 + usage_b.output_tokens = 75 + b.model_usage["claude"] = usage_b + + result = a + b + + assert_equal 300, result.model_usage["claude"].input_tokens + assert_equal 125, result.model_usage["claude"].output_tokens + end + + test "+ returns nil for fields that are nil on both sides" do + a = Stats.new + b = Stats.new + + result = a + b + + assert_nil result.duration_ms + assert_nil result.num_turns + end + + test "+ does not mutate operands" do + a = Stats.new + a.duration_ms = 1000 + a.num_turns = 2 + b = Stats.new + b.duration_ms = 2000 + b.num_turns = 3 + + _ = a + b + + assert_equal 1000, a.duration_ms + assert_equal 2, a.num_turns + end + test "to_s formats complete stats" do @stats.duration_ms = 5000 @stats.num_turns = 3 diff --git a/test/roast/cogs/agent/usage_test.rb b/test/roast/cogs/agent/usage_test.rb new file mode 100644 index 00000000..e479085c --- /dev/null +++ b/test/roast/cogs/agent/usage_test.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +require "test_helper" + +module Roast + module Cogs + class Agent < Cog + class UsageTest < ActiveSupport::TestCase + test "+ sums input_tokens" do + a = Usage.new + a.input_tokens = 100 + b = Usage.new + b.input_tokens = 200 + + result = a + b + + assert_equal 300, result.input_tokens + end + + test "+ sums output_tokens" do + a = Usage.new + a.output_tokens = 50 + b = Usage.new + b.output_tokens = 75 + + result = a + b + + assert_equal 125, result.output_tokens + end + + test "+ sums cost_usd" do + a = Usage.new + a.cost_usd = 0.01 + b = Usage.new + b.cost_usd = 0.02 + + result = a + b + + assert_in_delta 0.03, result.cost_usd + end + + test "+ treats nil as zero when other is non-nil" do + a = Usage.new + a.input_tokens = 100 + b = Usage.new + + result = a + b + + assert_equal 100, result.input_tokens + assert_nil result.output_tokens + end + + test "+ returns nil when both values are nil" do + a = Usage.new + b = Usage.new + + result = a + b + + assert_nil result.input_tokens + assert_nil result.output_tokens + assert_nil result.cost_usd + end + + test "+ does not mutate operands" do + a = Usage.new + a.input_tokens = 100 + b = Usage.new + b.input_tokens = 200 + + _ = a + b + + assert_equal 100, a.input_tokens + assert_equal 200, b.input_tokens + end + end + end + end +end