Skip to content

Commit adb5e28

Browse files
committed
perf: optimize large file memory handling
- Avoid repeated split() in ValueExtractor by accepting pre-split lines array - Add LTTB (Largest Triangle Three Buckets) downsampling algorithm for chart rendering - Optimize LocalStorage: large files (>500KB) store only metricsData, not raw content - Add parsing status indicator and large file warning in FileList - Add i18n translations for new UI states This should significantly reduce memory usage when handling files with 100k+ lines.
1 parent 04e722b commit adb5e28

9 files changed

Lines changed: 267 additions & 48 deletions

File tree

public/locales/en/translation.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
"fileList.disabled": "Disabled",
1313
"fileList.config": "Configure file {{name}}",
1414
"fileList.delete": "Remove file {{name}}",
15+
"fileList.parsing": "Parsing",
16+
"fileList.needsReupload": "Large file - re-upload required to re-parse",
17+
"fileList.needsReuploadTip": "File data is cached, but re-upload is required to modify parsing config",
1518
"comparison.title": "Compare Mode",
1619
"comparison.select": "Select comparison mode",
1720
"comparison.multiFileMode": "Multi-file comparison mode",

public/locales/zh/translation.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
"fileList.disabled": "已禁用",
1313
"fileList.config": "配置文件 {{name}}",
1414
"fileList.delete": "删除文件 {{name}}",
15+
"fileList.parsing": "解析中",
16+
"fileList.needsReupload": "大文件 - 需要重新上传才能重新解析",
17+
"fileList.needsReuploadTip": "此文件数据已缓存,但需要重新上传才能修改解析配置",
1518
"comparison.title": "对比模式",
1619
"comparison.select": "选择数据对比模式",
1720
"comparison.multiFileMode": "多文件对比模式",

src/App.jsx

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ import { Header } from './components/Header';
1111
import { PanelLeftClose, PanelLeftOpen } from 'lucide-react';
1212
import { mergeFilesWithReplacement } from './utils/mergeFiles.js';
1313

14+
// Threshold for "large file" - files above this won't have content persisted
15+
const LARGE_FILE_THRESHOLD = 500 * 1024; // 500KB of content
16+
1417
// Default global parsing configuration
1518
export const DEFAULT_GLOBAL_PARSING_CONFIG = {
1619
metrics: [
@@ -35,7 +38,22 @@ function App() {
3538
const { t } = useTranslation();
3639
const [uploadedFiles, setUploadedFiles] = useState(() => {
3740
const stored = localStorage.getItem('uploadedFiles');
38-
return stored ? JSON.parse(stored) : [];
41+
if (!stored) return [];
42+
try {
43+
const parsed = JSON.parse(stored);
44+
// Restore files with proper defaults for large files that have metricsData
45+
return parsed.map(file => ({
46+
...file,
47+
enabled: file.enabled ?? true,
48+
isParsing: false,
49+
// For large files, metricsData is already stored; for small files it will be re-parsed
50+
metricsData: file.metricsData || {},
51+
// Mark large files that need re-upload for re-parsing
52+
needsReupload: file.isLargeFile && !file.content
53+
}));
54+
} catch {
55+
return [];
56+
}
3957
});
4058

4159
// Global parsing configuration state
@@ -118,16 +136,26 @@ function App() {
118136
useEffect(() => {
119137
if (savingDisabledRef.current) return;
120138
try {
121-
const serialized = uploadedFiles.map(({ id, name, enabled, content, config }) => ({
122-
id,
123-
name,
124-
enabled,
125-
content,
126-
config
127-
}));
139+
// Smart serialization: for large files, only store metricsData (not raw content)
140+
// This allows the app to still display charts after refresh, but re-parsing will need re-upload
141+
const serialized = uploadedFiles.map(({ id, name, enabled, content, config, metricsData }) => {
142+
const isLargeFile = content && content.length > LARGE_FILE_THRESHOLD;
143+
return {
144+
id,
145+
name,
146+
enabled,
147+
// For large files, don't store content to save memory/storage
148+
content: isLargeFile ? null : content,
149+
config,
150+
// Store metricsData for large files so charts still work after refresh
151+
metricsData: isLargeFile ? metricsData : undefined,
152+
// Flag to indicate this file needs re-upload for re-parsing
153+
isLargeFile
154+
};
155+
});
128156
if (serialized.length > 0) {
129157
const json = JSON.stringify(serialized);
130-
// Avoid filling localStorage with very large files
158+
// Avoid filling localStorage with very large data
131159
if (json.length > 5 * 1024 * 1024) {
132160
savingDisabledRef.current = true;
133161
console.warn('Uploaded files exceed storage limit; persistence disabled.');

src/components/ChartContainer.jsx

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import zoomPlugin from 'chartjs-plugin-zoom';
1616
import { ImageDown, Copy, FileDown } from 'lucide-react';
1717
import { getMinSteps } from "../utils/getMinSteps.js";
1818
import { useTranslation } from 'react-i18next';
19+
import { adaptiveDownsample } from "../utils/downsample.js";
1920

2021
ChartJS.register(
2122
CategoryScale,
@@ -272,8 +273,11 @@ export default function ChartContainer({
272273
}
273274
}, [parsedData, onXRangeChange]);
274275

275-
const colors = ['#ef4444', '#3b82f6', '#10b981', '#f59e0b', '#8b5cf6', '#f97316'];
276-
const createChartData = dataArray => {
276+
// Maximum points to render per dataset - prevents browser crashes on large files
277+
const MAX_DISPLAY_POINTS = 3000;
278+
279+
const colors = useMemo(() => ['#ef4444', '#3b82f6', '#10b981', '#f59e0b', '#8b5cf6', '#f97316'], []);
280+
const createChartData = useCallback((dataArray) => {
277281
// Ensure no duplicate datasets
278282
const uniqueItems = dataArray.reduce((acc, item) => {
279283
const exists = acc.find(existing => existing.name === item.name);
@@ -286,9 +290,13 @@ export default function ChartContainer({
286290
return {
287291
datasets: uniqueItems.map((item, index) => {
288292
const color = colors[index % colors.length];
293+
// Apply LTTB downsampling for display - preserves trends while reducing memory
294+
const displayData = adaptiveDownsample(item.data, MAX_DISPLAY_POINTS);
289295
return {
290296
label: item.name?.replace(/\.(log|txt)$/i, '') || `File ${index + 1}`,
291-
data: item.data,
297+
data: displayData,
298+
// Store original data length for reference
299+
_originalLength: item.data.length,
292300
borderColor: color,
293301
backgroundColor: `${color}33`,
294302
borderWidth: 2,
@@ -307,7 +315,7 @@ export default function ChartContainer({
307315
};
308316
})
309317
};
310-
};
318+
}, [colors]);
311319

312320
const getComparisonData = (data1, data2, mode) => {
313321
const map2 = new Map(data2.map(p => [p.x, p.y]));

src/components/FileList.jsx

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import React from 'react';
2-
import { FileText, X, Settings } from 'lucide-react';
2+
import { FileText, X, Settings, Loader2, AlertCircle } from 'lucide-react';
33
import { useTranslation } from 'react-i18next';
44

55
export function FileList({ files, onFileRemove, onFileToggle, onFileConfig }) {
@@ -45,17 +45,34 @@ import { useTranslation } from 'react-i18next';
4545
className="checkbox"
4646
aria-describedby={`file-status-${file.id}`}
4747
/>
48-
<FileText
49-
size={14}
50-
className={`${file.enabled !== false ? 'text-blue-600' : 'text-gray-400 dark:text-gray-500'}`}
51-
aria-hidden="true"
52-
/>
48+
{file.isParsing ? (
49+
<Loader2
50+
size={14}
51+
className="text-blue-600 animate-spin"
52+
aria-hidden="true"
53+
/>
54+
) : file.needsReupload ? (
55+
<AlertCircle
56+
size={14}
57+
className="text-amber-500"
58+
aria-hidden="true"
59+
title={t('fileList.needsReupload')}
60+
/>
61+
) : (
62+
<FileText
63+
size={14}
64+
className={`${file.enabled !== false ? 'text-blue-600' : 'text-gray-400 dark:text-gray-500'}`}
65+
aria-hidden="true"
66+
/>
67+
)}
5368
<span
5469
className={`text-xs font-medium truncate ${
5570
file.enabled !== false ? 'text-gray-700 dark:text-gray-200' : 'text-gray-400 dark:text-gray-500'
5671
}`}
72+
title={file.needsReupload ? t('fileList.needsReuploadTip') : file.name}
5773
>
5874
{file.name}
75+
{file.isParsing && <span className="text-blue-500 ml-1">({t('fileList.parsing')})</span>}
5976
</span>
6077
<span
6178
id={`file-status-${file.id}`}

src/utils/ValueExtractor.js

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,26 @@ export const MATCH_MODES = {
77

88
// Value extractor class
99
export class ValueExtractor {
10-
// Keyword match
11-
static extractByKeyword(content, keyword) {
10+
// Helper to get lines array - accepts either content string or pre-split lines array
11+
static getLines(contentOrLines) {
12+
if (!contentOrLines) return [];
13+
if (Array.isArray(contentOrLines)) return contentOrLines;
14+
return contentOrLines.split('\n');
15+
}
16+
17+
// Keyword match - now accepts either content string or pre-split lines array
18+
static extractByKeyword(contentOrLines, keyword) {
1219
const results = [];
13-
// Handle empty content
14-
if (!content) return results;
15-
16-
const lines = content.split('\n');
17-
20+
const lines = this.getLines(contentOrLines);
21+
if (lines.length === 0) return results;
22+
1823
// Number regex supporting scientific notation
1924
const numberRegex = /[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/;
20-
25+
const keywordLower = keyword.toLowerCase();
26+
2127
lines.forEach((line, lineIndex) => {
2228
// Find keyword (case-insensitive)
23-
const keywordIndex = line.toLowerCase().indexOf(keyword.toLowerCase());
29+
const keywordIndex = line.toLowerCase().indexOf(keywordLower);
2430
if (keywordIndex !== -1) {
2531
// Find first number after the keyword
2632
const afterKeyword = line.substring(keywordIndex + keyword.length);
@@ -43,13 +49,12 @@ export class ValueExtractor {
4349
return results;
4450
}
4551

46-
// Column position match
47-
static extractByColumn(content, columnIndex, separator = ' ') {
52+
// Column position match - now accepts either content string or pre-split lines array
53+
static extractByColumn(contentOrLines, columnIndex, separator = ' ') {
4854
const results = [];
49-
if (!content) return results;
55+
const lines = this.getLines(contentOrLines);
56+
if (lines.length === 0) return results;
5057

51-
const lines = content.split('\n');
52-
5358
lines.forEach((line, lineIndex) => {
5459
if (line.trim()) {
5560
const columns = separator === ' '
@@ -72,13 +77,12 @@ export class ValueExtractor {
7277
return results;
7378
}
7479

75-
// Smart parsing
76-
static extractBySmart(content, type = 'loss') {
80+
// Smart parsing - now accepts either content string or pre-split lines array
81+
static extractBySmart(contentOrLines, type = 'loss') {
7782
const results = [];
78-
if (!content) return results;
83+
const lines = this.getLines(contentOrLines);
84+
if (lines.length === 0) return results;
7985

80-
const lines = content.split('\n');
81-
8286
// Smart keyword list
8387
const keywords = type === 'loss'
8488
? ['loss', 'training_loss', 'train_loss', 'val_loss', 'validation_loss']
@@ -143,13 +147,12 @@ export class ValueExtractor {
143147
return results;
144148
}
145149

146-
// Regex match (original functionality)
147-
static extractByRegex(content, regex) {
150+
// Regex match (original functionality) - now accepts either content string or pre-split lines array
151+
static extractByRegex(contentOrLines, regex) {
148152
const results = [];
149-
if (!content) return results;
153+
const lines = this.getLines(contentOrLines);
154+
if (lines.length === 0) return results;
150155

151-
const lines = content.split('\n');
152-
153156
try {
154157
const regexObj = new RegExp(regex, 'gi');
155158
lines.forEach((line, lineIndex) => {

0 commit comments

Comments
 (0)