Skip to content

Commit 7917a30

Browse files
committed
script to generate table of additionnal datasets
1 parent b064189 commit 7917a30

File tree

2 files changed

+324
-3
lines changed

2 files changed

+324
-3
lines changed
Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Generate a compact LaTeX table summarizing performance on real-world datasets
4+
across multiple CPUs and compilers.
5+
6+
Outputs a table of the form:
7+
8+
Algorithm | Dataset | Ryzen(...) | Apple M4(...)
9+
10+
Rules:
11+
- Dataset name extracted automatically from filenames (no hardcoded list)
12+
- Floatbits determined from filename (_s = float32, else float64)
13+
- Only the *native* floatbits for each dataset are kept (e.g., mobilenet=32, gaia=64)
14+
- All CPUs detected automatically
15+
- Filter algorithms via --algos, or by setting ALGO_FILTER
16+
17+
Directory structure must match your outputs/, e.g.:
18+
outputs/
19+
AMD_Ryzen9_9900X/...
20+
apple_m4/...
21+
"""
22+
23+
import os
24+
import re
25+
import argparse
26+
import pandas as pd
27+
28+
# Regex for parsing rows: algorithm & ns/f & ins/f & ins/c
29+
ROW_RE = re.compile(
30+
r"^\s*(?P<algo>[A-Za-z0-9_:\\]+)\s*&\s*(?P<nsf>[\d\.]+)\s*&\s*(?P<insf>[\d\.]+)\s*&\s*(?P<insc>[\d\.]+)"
31+
)
32+
33+
NATIVE_FLOATBITS = {
34+
"canada": 64,
35+
"marine_ik": 32,
36+
"mesh": 64,
37+
"bitcoin": 64,
38+
"numbers": 64,
39+
"mobilenetv3_large": 32,
40+
"gaia": 64,
41+
"noaa_global_hourly_2023": 32,
42+
"noaa_gfs_1p00": 32,
43+
"hellfloat64": 64,
44+
}
45+
46+
47+
def detect_dataset(filename):
48+
"""
49+
Automatically extract dataset name from filename.
50+
51+
Filename convention:
52+
CPU_COMPILER_DATASET_VARIANT(.tex/.raw)
53+
e.g.:
54+
Ryzen9900x_g++_mobilenetv3_s.tex
55+
Apple_M4_Max_clang++_noaa_global_hourly_2023_float64.tex
56+
57+
Dataset = all parts after compiler (g++/clang++) and before last 1 part.
58+
"""
59+
base = os.path.splitext(filename)[0]
60+
parts = base.split("_")
61+
62+
# Find compiler index
63+
comp_idx = None
64+
for i, p in enumerate(parts):
65+
if p in ("g++", "clang++"):
66+
comp_idx = i
67+
break
68+
if comp_idx is None:
69+
return None # cannot detect dataset
70+
71+
# dataset parts = everything after compiler, except last element (variant)
72+
dataset_parts = parts[comp_idx + 1:-1]
73+
if not dataset_parts:
74+
return None
75+
76+
return "_".join(dataset_parts)
77+
78+
79+
def detect_float_bits(filename):
80+
"""
81+
Detect float bits:
82+
- filenames ending in *_s.* → float32
83+
- otherwise → float64
84+
"""
85+
base = os.path.splitext(filename)[0]
86+
if base.endswith("_s"):
87+
return 32
88+
return 64
89+
90+
91+
def clean_cpu(cpu_dir):
92+
"""
93+
Clean CPU directory name for nicer LaTeX display.
94+
"""
95+
cpu = cpu_dir.replace("_", " ").strip()
96+
97+
# Ryzen name cleanup
98+
cpu = cpu.replace("Ryzen9", "Ryzen 9")
99+
100+
# Apple cleanup
101+
cpu = cpu.replace("apple m4", "Apple M4 Max")
102+
cpu = cpu.replace("apple", "Apple")
103+
104+
return cpu
105+
106+
107+
def parse_tex_result(path, cpu, compiler):
108+
"""
109+
Parse a 4-column LaTeX table row:
110+
algo, ns/f, ins/f, ins/c
111+
Return dict rows: algorithm, metrics, cpu, dataset, floatbits.
112+
"""
113+
fname = os.path.basename(path)
114+
dataset = detect_dataset(fname)
115+
if dataset is None:
116+
return []
117+
118+
floatbits = detect_float_bits(fname)
119+
rows = []
120+
121+
with open(path, "r") as f:
122+
for line in f:
123+
m = ROW_RE.match(line)
124+
if not m:
125+
continue
126+
rows.append({
127+
"algorithm": m.group("algo"),
128+
"dataset": dataset,
129+
"cpu": cpu,
130+
"compiler": compiler,
131+
"floatbits": floatbits,
132+
"ns/f": float(m.group("nsf")),
133+
"ins/f": float(m.group("insf")),
134+
"ins/c": float(m.group("insc")),
135+
})
136+
137+
return rows
138+
139+
140+
def scan_outputs(root):
141+
"""
142+
Recursively scan subdirectories for .tex result files.
143+
"""
144+
all_rows = []
145+
146+
for cpu_dir in sorted(os.listdir(root)):
147+
cpu_path = os.path.join(root, cpu_dir)
148+
if not os.path.isdir(cpu_path):
149+
continue
150+
151+
cpu_clean = clean_cpu(cpu_dir)
152+
for fname in sorted(os.listdir(cpu_path)):
153+
if not fname.endswith(".tex"):
154+
continue
155+
156+
full = os.path.join(cpu_path, fname)
157+
158+
# determine compiler
159+
if "_g++" in fname:
160+
compiler = "g++"
161+
elif "_clang++" in fname:
162+
compiler = "clang++"
163+
else:
164+
compiler = "unknown"
165+
166+
# parse
167+
rows = parse_tex_result(full, cpu_clean, compiler)
168+
all_rows.extend(rows)
169+
170+
return all_rows
171+
172+
173+
def select_native_floatbits(df):
174+
"""
175+
Keep only the native floatbits for each dataset.
176+
Native floatbits are defined explicitly in NATIVE_FLOATBITS.
177+
"""
178+
cleaned = []
179+
for i, row in df.iterrows():
180+
ds = row["dataset"]
181+
if ds not in NATIVE_FLOATBITS:
182+
continue
183+
184+
if row["floatbits"] == NATIVE_FLOATBITS[ds]:
185+
cleaned.append(True)
186+
else:
187+
cleaned.append(False)
188+
189+
return df[cleaned]
190+
191+
192+
def latex_table(df, algo_filter=None):
193+
"""
194+
Build the final compact LaTeX table.
195+
Columns:
196+
Dataset | Algorithm | CPU1(ns/f,ins/f,ins/c) | CPU2(...) | ...
197+
"""
198+
if algo_filter:
199+
df = df[df["algorithm"].isin(algo_filter)]
200+
201+
cpus = sorted(df["cpu"].unique())
202+
datasets = sorted(df["dataset"].unique())
203+
204+
tex = []
205+
tex.append("\\begin{table*}[htbp]")
206+
tex.append(" \\centering")
207+
tex.append(
208+
" \\caption{Performance on additional real-world datasets across CPUs.}"
209+
)
210+
tex.append(" \\label{tab:additional_summary}")
211+
tex.append("")
212+
tex.append(" \\begin{tabular}{ll" + "r" * (3 * len(cpus)) + "}")
213+
tex.append(" \\toprule")
214+
215+
# CPU header row — minimal change
216+
header = ["Dataset", "Algorithm"]
217+
for cpu in cpus:
218+
header.append(f"\\multicolumn{{3}}{{c}}{{{cpu}}}")
219+
tex.append(" " + " & ".join(header) + " \\\\")
220+
221+
for i in range(len(cpus)):
222+
left = 3 + 3 * i
223+
right = left + 2
224+
tex.append(f" \\cmidrule(lr){{{left}-{right}}}")
225+
226+
# Metric header — unchanged except for swapped order
227+
sub = ["", ""]
228+
sub.extend(["ns/f", "ins/f", "ins/c"] * len(cpus))
229+
tex.append(" " + " & ".join(sub) + " \\\\")
230+
tex.append(" \\midrule")
231+
232+
# Rows grouped by dataset — minimal change applied
233+
prev_ds = None
234+
for ds in datasets:
235+
236+
# midrule between dataset blocks
237+
if prev_ds is not None:
238+
tex.append(" \\midrule")
239+
prev_ds = ds
240+
241+
# dataset label
242+
display_ds = f"{ds} (f{NATIVE_FLOATBITS[ds]})".replace("_", "\\_")
243+
244+
first_line = True
245+
for algo in sorted(df["algorithm"].unique()):
246+
247+
# dataset only on first line of group
248+
if first_line:
249+
row = [display_ds, algo]
250+
first_line = False
251+
else:
252+
row = ["", algo]
253+
254+
# Collect values per CPU as triples
255+
cpu_values = []
256+
for cpu in cpus:
257+
subdf = df[(df["algorithm"] == algo) & (df["dataset"] == ds) &
258+
(df["cpu"] == cpu)]
259+
if subdf.empty:
260+
cpu_values.append(["--", "--", "--"])
261+
else:
262+
r = subdf.iloc[0]
263+
cpu_values.append([
264+
f"{r['ns/f']:.1f}",
265+
f"{r['ins/f']:.0f}",
266+
f"{r['ins/c']:.1f}",
267+
])
268+
269+
flat = [v for triple in cpu_values for v in triple]
270+
271+
if all(v == "--" for v in flat):
272+
continue
273+
274+
for triple in cpu_values:
275+
row.extend(triple)
276+
277+
tex.append(" " + " & ".join(row) + " \\\\")
278+
279+
tex.append(" \\bottomrule")
280+
tex.append(" \\end{tabular}")
281+
tex.append("\\end{table*}\n")
282+
return "\n".join(tex)
283+
284+
285+
def main():
286+
ap = argparse.ArgumentParser()
287+
ap.add_argument("-i",
288+
"--input-dir",
289+
default="./outputs",
290+
help="Directory containing CPU output folders.")
291+
ap.add_argument("-o",
292+
"--output",
293+
default="./outputs/extra_datasets_table.tex",
294+
help="Output LaTeX filename.")
295+
ap.add_argument("--algos",
296+
nargs="+",
297+
default=None,
298+
help="Filter: keep only these algorithms.")
299+
args = ap.parse_args()
300+
301+
rows = scan_outputs(args.input_dir)
302+
if not rows:
303+
print("No results found.")
304+
return
305+
306+
df = pd.DataFrame(rows)
307+
if df.empty:
308+
print("No valid rows parsed.")
309+
return
310+
311+
df = select_native_floatbits(df)
312+
tex = latex_table(df, args.algos)
313+
314+
with open(args.output, "w") as f:
315+
f.write(tex)
316+
317+
print(f"[OK] wrote {args.output}")
318+
319+
320+
if __name__ == "__main__":
321+
main()

scripts/generate_multiple_tables.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414

1515
# Configuration
1616
input_files = [
17-
'data/canada.txt',
18-
'data/mesh.txt',
17+
# 'data/canada.txt',
18+
# 'data/mesh.txt',
1919
'data/bitcoin.txt',
2020
'data/gaia.txt',
2121
'data/marine_ik.txt',
@@ -24,7 +24,7 @@
2424
'data/noaa_global_hourly_2023.txt'
2525
]
2626
models = [
27-
'uniform_01',
27+
# 'uniform_01',
2828
# 'logspace_all',
2929
# 'integer_uniform',
3030
# 'centered',

0 commit comments

Comments
 (0)