Skip to content

Commit 000e2f5

Browse files
committed
fix(bench): remove artificial operation breakdown calculations
The operation breakdown was completely fake - it just divided total time equally among detected operations (e.g., 6 ops = 16.67% each). Removed: - OperationMetric struct - gather_operation_metrics() function that did artificial calculations - Operation breakdown display in console output - Operation metrics in JSON output - Unused HashMap import What remains (all legitimate measurements): - Latency statistics (min, p50, p95, p99, max, stddev) from actual timings - Parse percentage from actual parse time vs total time - Throughput calculations from real measurements - Scaling analysis from comparing actual runs The --detailed flag now only shows real latency statistics, not fake per-operation breakdowns.
1 parent e69b45a commit 000e2f5

File tree

1 file changed

+5
-130
lines changed

1 file changed

+5
-130
lines changed

src/bin/bench_throughput.rs

Lines changed: 5 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use clap::{Arg, Command};
2-
use std::collections::HashMap;
32
use std::time::{Duration, Instant};
43
use string_pipeline::Template;
54

@@ -12,20 +11,9 @@ struct BenchmarkResult {
1211
avg_time_per_path: Duration,
1312
throughput_paths_per_sec: f64,
1413
parse_percentage: f64,
15-
operation_metrics: Vec<OperationMetric>,
1614
latency_stats: LatencyStatistics,
1715
}
1816

19-
/// Tracks metrics for individual operation types
20-
#[derive(Debug, Clone)]
21-
struct OperationMetric {
22-
operation_name: String,
23-
total_time: Duration,
24-
call_count: usize,
25-
avg_time_per_call: Duration,
26-
percentage_of_total: f64,
27-
}
28-
2917
/// Statistical analysis of latency distribution
3018
#[derive(Debug, Clone)]
3119
struct LatencyStatistics {
@@ -58,7 +46,6 @@ impl BenchmarkResult {
5846
avg_time_per_path,
5947
throughput_paths_per_sec,
6048
parse_percentage,
61-
operation_metrics: Vec::new(),
6249
latency_stats,
6350
}
6451
}
@@ -117,10 +104,6 @@ impl BenchmarkResult {
117104
self.total_format_time.as_secs_f64() / baseline.total_format_time.as_secs_f64();
118105
actual / expected
119106
}
120-
121-
fn add_operation_metrics(&mut self, metrics: Vec<OperationMetric>) {
122-
self.operation_metrics = metrics;
123-
}
124107
}
125108

126109
/// Generates realistic absolute path strings for benchmarking
@@ -295,7 +278,7 @@ impl TemplateSet {
295278

296279
/// Runs a benchmark for a single template with varying input sizes and detailed profiling
297280
fn benchmark_template(
298-
template_name: &str,
281+
_template_name: &str,
299282
template_str: &str,
300283
sizes: &[usize],
301284
iterations: usize,
@@ -344,70 +327,14 @@ fn benchmark_template(
344327
individual_times = vec![avg_per_path; size];
345328
}
346329

347-
let mut result = BenchmarkResult::new(size, parse_time, avg_format_time, individual_times);
348-
349-
// If detailed mode, gather operation-level metrics
350-
if detailed {
351-
let op_metrics = gather_operation_metrics(&template, template_name, &paths)?;
352-
result.add_operation_metrics(op_metrics);
353-
}
330+
let result = BenchmarkResult::new(size, parse_time, avg_format_time, individual_times);
354331

355332
results.push(result);
356333
}
357334

358335
Ok(results)
359336
}
360337

361-
/// Gather detailed metrics for each operation type in the template
362-
fn gather_operation_metrics(
363-
template: &Template,
364-
_template_name: &str,
365-
paths: &[String],
366-
) -> Result<Vec<OperationMetric>, Box<dyn std::error::Error>> {
367-
// For now, we'll do a simple breakdown by re-running the template
368-
// In a future enhancement, we could instrument the library itself
369-
370-
// Count operation types in the template string
371-
let template_str = format!("{:?}", template);
372-
373-
let mut metrics = Vec::new();
374-
let mut operation_counts: HashMap<String, usize> = HashMap::new();
375-
376-
// Simple heuristic: count operations mentioned
377-
let operations = vec![
378-
"Split", "Join", "Upper", "Lower", "Trim", "Replace", "Substring", "Reverse",
379-
"StripAnsi", "Filter", "Sort", "Unique", "Pad", "Map", "RegexExtract", "Append",
380-
"Prepend", "Surround", "Slice", "FilterNot",
381-
];
382-
383-
for op in &operations {
384-
if template_str.contains(op) {
385-
*operation_counts.entry(op.to_string()).or_insert(0) += 1;
386-
}
387-
}
388-
389-
// Measure total time for the template
390-
let total_start = Instant::now();
391-
for path in paths {
392-
let _ = template.format(path)?;
393-
}
394-
let total_time = total_start.elapsed();
395-
396-
// Create metrics based on detected operations
397-
// Note: This is a simplified approach. Full instrumentation would require library changes.
398-
for (op_name, count) in &operation_counts {
399-
metrics.push(OperationMetric {
400-
operation_name: op_name.clone(),
401-
total_time: total_time / operation_counts.len() as u32, // Simplified distribution
402-
call_count: count * paths.len(),
403-
avg_time_per_call: total_time / (count * paths.len()) as u32,
404-
percentage_of_total: 100.0 / operation_counts.len() as f64, // Simplified
405-
});
406-
}
407-
408-
Ok(metrics)
409-
}
410-
411338
fn format_duration(duration: Duration) -> String {
412339
let nanos = duration.as_nanos();
413340
if nanos < 1_000 {
@@ -515,30 +442,11 @@ fn print_template_results(template_name: &str, results: &[BenchmarkResult], deta
515442
);
516443
}
517444

518-
// Detailed operation breakdown for largest size
445+
// Latency statistics for largest size
519446
if detailed && !results.is_empty() {
520447
let largest_result = results.last().unwrap();
521-
if !largest_result.operation_metrics.is_empty() {
522-
println!("\n🔍 Operation Breakdown (at {} inputs):", format_size(largest_result.input_size));
523-
println!(
524-
"{:<20} {:>12} {:>12} {:>15} {:>10}",
525-
"Operation", "Calls", "Total Time", "Avg/Call", "% Total"
526-
);
527-
println!("{}", "-".repeat(80));
528-
529-
for metric in &largest_result.operation_metrics {
530-
println!(
531-
"{:<20} {:>12} {:>12} {:>15} {:>9.2}%",
532-
truncate_name(&metric.operation_name, 20),
533-
format_size(metric.call_count),
534-
format_duration(metric.total_time),
535-
format_duration(metric.avg_time_per_call),
536-
metric.percentage_of_total
537-
);
538-
}
539-
}
540448

541-
// Latency statistics for largest size
449+
// Latency statistics
542450
let stats = &largest_result.latency_stats;
543451
println!("\n📈 Latency Statistics (at {} inputs):", format_size(largest_result.input_size));
544452
println!(
@@ -671,40 +579,7 @@ fn output_json(
671579
" \"stddev_ns\": {:.2}\n",
672580
result.latency_stats.stddev
673581
));
674-
json_output.push_str(" },\n");
675-
676-
// Operation metrics
677-
if !result.operation_metrics.is_empty() {
678-
json_output.push_str(" \"operations\": [\n");
679-
for (oidx, op) in result.operation_metrics.iter().enumerate() {
680-
json_output.push_str(" {\n");
681-
json_output.push_str(&format!(
682-
" \"name\": \"{}\",\n",
683-
op.operation_name
684-
));
685-
json_output.push_str(&format!(
686-
" \"total_time_ns\": {},\n",
687-
op.total_time.as_nanos()
688-
));
689-
json_output.push_str(&format!(" \"call_count\": {},\n", op.call_count));
690-
json_output.push_str(&format!(
691-
" \"avg_time_per_call_ns\": {},\n",
692-
op.avg_time_per_call.as_nanos()
693-
));
694-
json_output.push_str(&format!(
695-
" \"percentage_of_total\": {:.2}\n",
696-
op.percentage_of_total
697-
));
698-
json_output.push_str(if oidx == result.operation_metrics.len() - 1 {
699-
" }\n"
700-
} else {
701-
" },\n"
702-
});
703-
}
704-
json_output.push_str(" ]\n");
705-
} else {
706-
json_output.push_str(" \"operations\": []\n");
707-
}
582+
json_output.push_str(" }\n");
708583

709584
json_output.push_str(if ridx == results.len() - 1 {
710585
" }\n"

0 commit comments

Comments
 (0)