Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/roast/cogs/agent/providers/claude.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ def invoke(input)
invocations << invocation
break unless invocation.result.success
end
Output.new(invocations.last.not_nil!.result)
final_result = invocations.last.not_nil!.result
final_result.stats = invocations.filter_map { |i| i.result.stats }.reduce(:+) if invocations.size > 1
Output.new(final_result)
end
end
end
Expand Down
1 change: 1 addition & 0 deletions lib/roast/cogs/agent/providers/pi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def invoke(input)
break unless invocation.result.success
end
final_result = invocations.last.not_nil!.result
final_result.stats = invocations.filter_map { |i| i.result.stats }.reduce(:+) if invocations.size > 1
Output.new(final_result)
end
end
Expand Down
29 changes: 29 additions & 0 deletions lib/roast/cogs/agent/stats.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ def initialize
@model_usage = {}
end

# Add two Stats objects together, summing their durations, turns, usage, and model usage
#
# Nil values are treated as zero when the other operand is non-nil.
# Model usage hashes are merged, summing usage for models that appear in both.
#
#: (Stats) -> Stats
def +(other)
result = Stats.new
result.duration_ms = sum_nils(duration_ms, other.duration_ms)&.to_int
result.num_turns = sum_nils(num_turns, other.num_turns)&.to_int
result.usage = usage + other.usage
result.model_usage = merge_model_usage(model_usage, other.model_usage)
result
end

# Get a human-readable string representation of the statistics
#
# Formats the statistics into a multi-line string with the following information:
Expand All @@ -84,6 +99,20 @@ def to_s
end
lines.join("\n")
end

private

#: (Numeric?, Numeric?) -> Numeric?
def sum_nils(a, b)
return if a.nil? && b.nil?

(a || 0) + (b || 0)
end

#: (Hash[String, Usage], Hash[String, Usage]) -> Hash[String, Usage]
def merge_model_usage(a, b)
a.merge(b) { |_model, usage_a, usage_b| usage_a + usage_b }
end
end
end
end
Expand Down
22 changes: 22 additions & 0 deletions lib/roast/cogs/agent/usage.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,28 @@ class Usage
#
#: Float?
attr_accessor :cost_usd

# Add two Usage objects together, summing their token counts and costs
#
# Nil values are treated as zero when the other operand is non-nil.
#
#: (Usage) -> Usage
def +(other)
result = Usage.new
result.input_tokens = sum_nils(input_tokens, other.input_tokens)&.to_int
result.output_tokens = sum_nils(output_tokens, other.output_tokens)&.to_int
result.cost_usd = sum_nils(cost_usd, other.cost_usd)&.to_f
result
end

private

#: (Numeric?, Numeric?) -> Numeric?
def sum_nils(a, b)
return if a.nil? && b.nil?

(a || 0) + (b || 0)
end
end
end
end
Expand Down
57 changes: 57 additions & 0 deletions test/roast/cogs/agent/providers/claude_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,63 @@ def mock_status(success:)
assert_equal "final result", output.response
end

test "invoke sums stats across multiple invocations" do
input = Agent::Input.new
input.prompts = ["First", "Second"]

call_count = 0
CommandRunner.stubs(:execute).with do |_args, **kwargs|
call_count += 1
result_hash = {
type: "result",
subtype: "success",
result: call_count == 1 ? "intermediate" : "final",
duration_ms: call_count == 1 ? 1000 : 2000,
num_turns: call_count == 1 ? 3 : 5,
total_cost_usd: call_count == 1 ? 0.01 : 0.02,
modelUsage: {
"claude-sonnet" => {
inputTokens: call_count == 1 ? 100 : 200,
outputTokens: call_count == 1 ? 50 : 75,
},
},
}
kwargs[:stdout_handler]&.call(result_hash.to_json)
true
end.returns(["", "", mock_status(success: true)])

output = @provider.invoke(input)

assert_equal 3000, output.stats.duration_ms
assert_equal 8, output.stats.num_turns
assert_in_delta 0.03, output.stats.usage.cost_usd
assert_equal 300, output.stats.model_usage[:"claude-sonnet"].input_tokens
assert_equal 125, output.stats.model_usage[:"claude-sonnet"].output_tokens
end

test "invoke does not sum stats for single invocation" do
input = Agent::Input.new
input.prompt = "Only prompt"

result_hash = {
type: "result",
subtype: "success",
result: "done",
duration_ms: 1000,
num_turns: 3,
total_cost_usd: 0.01,
}
CommandRunner.stubs(:execute).with do |_args, **kwargs|
kwargs[:stdout_handler]&.call(result_hash.to_json)
true
end.returns(["", "", mock_status(success: true)])

output = @provider.invoke(input)

assert_equal 1000, output.stats.duration_ms
assert_equal 3, output.stats.num_turns
end

test "invoke uses input session when no previous invocation session exists" do
input = Agent::Input.new
input.prompts = ["Main task", "Finalize"]
Expand Down
71 changes: 71 additions & 0 deletions test/roast/cogs/agent/providers/pi_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,77 @@ def mock_status(success:)
assert_equal "final result", output.response
end

test "invoke sums stats across multiple invocations" do
input = Agent::Input.new
input.prompts = ["First", "Second"]

call_count = 0
CommandRunner.stubs(:execute).with do |_args, **kwargs|
call_count += 1
# Simulate turn_start events: 3 turns for first invocation, 5 for second
num_turns = call_count == 1 ? 3 : 5
num_turns.times { kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json) }
usage_data = {
type: "message_end",
message: {
role: "assistant",
model: "claude-sonnet",
content: [{ type: "text", text: call_count == 1 ? "intermediate" : "final" }],
usage: {
input: call_count == 1 ? 100 : 200,
output: call_count == 1 ? 50 : 75,
cacheRead: 0,
cacheWrite: 0,
cost: { total: call_count == 1 ? 0.01 : 0.02 },
},
},
}.to_json
kwargs[:stdout_handler]&.call(usage_data)
session_json = { type: "session", id: "session_#{call_count}" }.to_json
kwargs[:stdout_handler]&.call(session_json)
true
end.returns(["", "", mock_status(success: true)])

output = @provider.invoke(input)

assert_equal 8, output.stats.num_turns
assert_in_delta 0.03, output.stats.usage.cost_usd
assert_equal 300, output.stats.model_usage["claude-sonnet"].input_tokens
assert_equal 125, output.stats.model_usage["claude-sonnet"].output_tokens
end

test "invoke does not sum stats for single invocation" do
input = Agent::Input.new
input.prompt = "Only prompt"

CommandRunner.stubs(:execute).with do |_args, **kwargs|
kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json)
kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json)
kwargs[:stdout_handler]&.call({ type: "turn_start" }.to_json)
usage_data = {
type: "message_end",
message: {
role: "assistant",
model: "claude-sonnet",
content: [{ type: "text", text: "done" }],
usage: {
input: 100,
output: 50,
cacheRead: 0,
cacheWrite: 0,
cost: { total: 0.01 },
},
},
}.to_json
kwargs[:stdout_handler]&.call(usage_data)
true
end.returns(["", "", mock_status(success: true)])

output = @provider.invoke(input)

assert_equal 3, output.stats.num_turns
end

test "invoke uses input session when no previous invocation session exists" do
input = Agent::Input.new
input.prompts = ["Main task", "Finalize"]
Expand Down
96 changes: 96 additions & 0 deletions test/roast/cogs/agent/stats_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,102 @@ def setup
assert_match(/Tokens \(model2\):/, output)
end

test "+ sums duration_ms" do
a = Stats.new
a.duration_ms = 3000
b = Stats.new
b.duration_ms = 2000

result = a + b

assert_equal 5000, result.duration_ms
end

test "+ sums num_turns" do
a = Stats.new
a.num_turns = 3
b = Stats.new
b.num_turns = 5

result = a + b

assert_equal 8, result.num_turns
end

test "+ sums usage" do
a = Stats.new
a.usage.input_tokens = 100
a.usage.cost_usd = 0.01
b = Stats.new
b.usage.input_tokens = 200
b.usage.cost_usd = 0.02

result = a + b

assert_equal 300, result.usage.input_tokens
assert_in_delta 0.03, result.usage.cost_usd
end

test "+ merges model_usage for different models" do
a = Stats.new
usage_a = Usage.new
usage_a.input_tokens = 100
a.model_usage["model-a"] = usage_a

b = Stats.new
usage_b = Usage.new
usage_b.input_tokens = 200
b.model_usage["model-b"] = usage_b

result = a + b

assert_equal 100, result.model_usage["model-a"].input_tokens
assert_equal 200, result.model_usage["model-b"].input_tokens
end

test "+ sums model_usage for the same model" do
a = Stats.new
usage_a = Usage.new
usage_a.input_tokens = 100
usage_a.output_tokens = 50
a.model_usage["claude"] = usage_a

b = Stats.new
usage_b = Usage.new
usage_b.input_tokens = 200
usage_b.output_tokens = 75
b.model_usage["claude"] = usage_b

result = a + b

assert_equal 300, result.model_usage["claude"].input_tokens
assert_equal 125, result.model_usage["claude"].output_tokens
end

test "+ returns nil for fields that are nil on both sides" do
a = Stats.new
b = Stats.new

result = a + b

assert_nil result.duration_ms
assert_nil result.num_turns
end

test "+ does not mutate operands" do
a = Stats.new
a.duration_ms = 1000
a.num_turns = 2
b = Stats.new
b.duration_ms = 2000
b.num_turns = 3

_ = a + b

assert_equal 1000, a.duration_ms
assert_equal 2, a.num_turns
end

test "to_s formats complete stats" do
@stats.duration_ms = 5000
@stats.num_turns = 3
Expand Down
Loading
Loading