From 17e8183e91de26279e98932cbe3b6440e78e1a70 Mon Sep 17 00:00:00 2001 From: konard Date: Tue, 13 Jan 2026 13:01:42 +0100 Subject: [PATCH 1/4] Initial commit with task details Adding CLAUDE.md with task information for AI processing. This file will be removed when the task is complete. Issue: https://github.com/link-foundation/links-notation/issues/197 --- CLAUDE.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..5818f01 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,5 @@ +Issue to solve: https://github.com/link-foundation/links-notation/issues/197 +Your prepared branch: issue-197-93f74116bd61 +Your prepared working directory: /tmp/gh-issue-solver-1768305700276 + +Proceed. From 08d931c73a425fc61e56f02c873a0928f456036f Mon Sep 17 00:00:00 2001 From: konard Date: Tue, 13 Jan 2026 13:11:22 +0100 Subject: [PATCH 2/4] Add streaming parser API for large message handling This commit adds a StreamParser class to both JavaScript and Rust implementations for efficient processing of large Links Notation messages. Features: - Event-based API to receive links as they are parsed - Memory efficient: no need to load entire message into memory - Detailed error reporting with line/column information - Incremental input via write() method - Reset functionality for parser reuse JavaScript API: - StreamParser class with on('link'), on('error'), on('end') events - ParseError class with line, column, offset properties - write(chunk), end(), reset(), getLinks() methods Rust API: - StreamParser struct with on_link() and on_error() callbacks - StreamParseError with location information - write(), finish(), reset(), get_links() methods Closes #197 Co-Authored-By: Claude Opus 4.5 --- js/README.md | 60 +++ js/package.json | 2 +- js/src/StreamParser.js | 590 ++++++++++++++++++++++++ js/src/index.js | 1 + js/tests/StreamParser.test.js | 490 ++++++++++++++++++++ rust/Cargo.lock | 2 +- rust/Cargo.toml | 2 +- rust/README.md | 67 +++ rust/src/lib.rs | 1 + rust/src/stream_parser.rs | 841 ++++++++++++++++++++++++++++++++++ 10 files changed, 2053 insertions(+), 3 deletions(-) create mode 100644 js/src/StreamParser.js create mode 100644 js/tests/StreamParser.test.js create mode 100644 rust/src/stream_parser.rs diff --git a/js/README.md b/js/README.md index 8b4d1c3..cad1bfc 100644 --- a/js/README.md +++ b/js/README.md @@ -113,6 +113,40 @@ console.log('ID:', link.id); console.log('Values:', link.values); ``` +### Streaming Parser + +For processing large messages efficiently without loading everything into memory: + +```javascript +import { StreamParser, ParseError } from 'links-notation'; + +const parser = new StreamParser(); + +// Register event handlers +parser.on('link', (link) => { + // Process each link as it's parsed + console.log('Parsed:', link.toString()); +}); + +parser.on('error', (error) => { + // Handle parse errors with location info + console.error(`Error at line ${error.line}, col ${error.column}: ${error.message}`); +}); + +// Feed data incrementally +parser.write('papa (lovesMama: loves mama)\n'); +parser.write('son lovesMama\n'); + +// Finish parsing and get all links +const links = parser.end(); +``` + +The streaming parser supports: +- **Memory efficiency**: Process large messages without loading everything into memory +- **Low latency**: Start processing before the full message is received +- **Detailed error reporting**: Errors include line and column information +- **Event-based API**: Receive links as they are parsed + ### Advanced Usage ```javascript @@ -195,12 +229,38 @@ Container for grouping related links. - `constructor(links)` - Create a new group - `format()` - Format the group as a string +#### `StreamParser` + +Streaming parser for incremental processing. + +- `constructor(options = {})` - Create a new streaming parser + - `options.maxInputSize` - Maximum input size in bytes (default: 10MB) + - `options.maxDepth` - Maximum nesting depth (default: 1000) +- `on(event, handler)` - Register an event handler ('link', 'error', 'end') +- `off(event, handler)` - Remove an event handler +- `write(chunk)` - Write a chunk of data to the parser +- `end()` - Signal end of input and get all parsed links +- `reset()` - Reset the parser for reuse +- `getLinks()` - Get all links parsed so far +- `getPosition()` - Get current parser position (line, column, offset) +- `isEnded()` - Check if parser has ended + +#### `ParseError` + +Error class with location information. + +- `message` - Error message +- `line` - Line number (1-based) +- `column` - Column number (1-based) +- `offset` - Byte offset in the input + ## Project Structure - `src/grammar.pegjs` - Peggy.js grammar definition - `src/Link.js` - Link data structure - `src/LinksGroup.js` - Links group container - `src/Parser.js` - Parser wrapper +- `src/StreamParser.js` - Streaming parser for large messages - `src/index.js` - Main entry point - `tests/` - Test files diff --git a/js/package.json b/js/package.json index 3d95b43..4941519 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "links-notation", - "version": "0.13.0", + "version": "0.14.0", "description": "Links Notation parser for JavaScript", "main": "dist/index.js", "type": "module", diff --git a/js/src/StreamParser.js b/js/src/StreamParser.js new file mode 100644 index 0000000..ac8cad9 --- /dev/null +++ b/js/src/StreamParser.js @@ -0,0 +1,590 @@ +import { Link } from './Link.js'; +import * as parserModule from './parser-generated.js'; + +/** + * ParseError with detailed location information + */ +export class ParseError extends Error { + /** + * Create a new ParseError with location info + * @param {string} message - Error message + * @param {number} line - Line number (1-based) + * @param {number} column - Column number (1-based) + * @param {number} offset - Byte offset in the input + */ + constructor(message, line = null, column = null, offset = null) { + super(message); + this.name = 'ParseError'; + this.line = line; + this.column = column; + this.offset = offset; + } + + /** + * Create a formatted error message with location + * @returns {string} Formatted error string + */ + toString() { + if (this.line !== null && this.column !== null) { + return `ParseError at line ${this.line}, column ${this.column}: ${this.message}`; + } + return `ParseError: ${this.message}`; + } +} + +/** + * Streaming parser for Links Notation (Lino) + * + * Allows processing data incrementally and emitting parsed links + * as they become available, without loading the entire input into memory. + * + * @example + * const parser = new StreamParser(); + * + * parser.on('link', (link) => { + * console.log('Parsed link:', link); + * }); + * + * parser.on('error', (error) => { + * console.error(`Error at line ${error.line}: ${error.message}`); + * }); + * + * parser.write(chunk1); + * parser.write(chunk2); + * const links = parser.end(); + */ +export class StreamParser { + /** + * Create a new StreamParser instance + * @param {Object} options - Parser options + * @param {number} options.maxInputSize - Maximum total input size in bytes (default: 10MB) + * @param {number} options.maxDepth - Maximum nesting depth (default: 1000) + */ + constructor(options = {}) { + this.maxInputSize = options.maxInputSize || 10 * 1024 * 1024; // 10MB default + this.maxDepth = options.maxDepth || 1000; + + // Buffer for incomplete input + this._buffer = ''; + this._totalBytesReceived = 0; + + // Position tracking for error reporting + this._currentLine = 1; + this._currentColumn = 1; + this._lineOffsets = [0]; // Byte offset where each line starts + + // Event handlers + this._handlers = { + link: [], + error: [], + end: [] + }; + + // Parsed links + this._links = []; + + // State + this._ended = false; + } + + /** + * Register an event handler + * @param {string} event - Event name ('link', 'error', or 'end') + * @param {Function} handler - Handler function + * @returns {StreamParser} This instance for chaining + */ + on(event, handler) { + if (this._handlers[event]) { + this._handlers[event].push(handler); + } + return this; + } + + /** + * Remove an event handler + * @param {string} event - Event name + * @param {Function} handler - Handler function to remove + * @returns {StreamParser} This instance for chaining + */ + off(event, handler) { + if (this._handlers[event]) { + const index = this._handlers[event].indexOf(handler); + if (index !== -1) { + this._handlers[event].splice(index, 1); + } + } + return this; + } + + /** + * Emit an event to all registered handlers + * @param {string} event - Event name + * @param {*} data - Event data + * @private + */ + _emit(event, data) { + if (this._handlers[event]) { + for (const handler of this._handlers[event]) { + try { + handler(data); + } catch (e) { + // Handler errors shouldn't break the parser + if (event !== 'error') { + this._emit('error', new ParseError(`Handler error: ${e.message}`)); + } + } + } + } + } + + /** + * Update position tracking based on processed text + * @param {string} text - Text that was processed + * @private + */ + _updatePosition(text) { + for (const char of text) { + if (char === '\n') { + this._currentLine++; + this._currentColumn = 1; + this._lineOffsets.push(this._totalBytesReceived); + } else { + this._currentColumn++; + } + this._totalBytesReceived++; + } + } + + /** + * Calculate line and column from byte offset + * @param {number} offset - Byte offset + * @returns {{line: number, column: number}} Position + * @private + */ + _getPositionFromOffset(offset) { + let line = 1; + for (let i = 0; i < this._lineOffsets.length; i++) { + if (this._lineOffsets[i] > offset) { + break; + } + line = i + 1; + } + const lineStart = this._lineOffsets[line - 1] || 0; + const column = offset - lineStart + 1; + return { line, column }; + } + + /** + * Write a chunk of data to the parser + * @param {string} chunk - Data chunk to process + * @returns {StreamParser} This instance for chaining + * @throws {Error} If parser has ended or input exceeds max size + */ + write(chunk) { + if (this._ended) { + throw new Error('Cannot write to a parser that has ended'); + } + + if (typeof chunk !== 'string') { + throw new TypeError('Chunk must be a string'); + } + + // Check total size + if (this._buffer.length + chunk.length > this.maxInputSize) { + const error = new ParseError( + `Input size exceeds maximum allowed size of ${this.maxInputSize} bytes`, + this._currentLine, + this._currentColumn + ); + this._emit('error', error); + throw error; + } + + this._buffer += chunk; + + // Try to parse complete lines/elements + this._processBuffer(); + + return this; + } + + /** + * Process buffered data and emit links for complete elements + * @private + */ + _processBuffer() { + // For streaming, we need to identify complete top-level elements + // A complete element is one where we have seen the end of the line + // and any subsequent lines are at a lower or equal indentation level + + // Find the last position where we can safely parse + // This is tricky because we need to handle: + // 1. Multiline parenthesized elements: (...) + // 2. Indented elements: id:\n value1\n value2 + // 3. Single-line elements: id: value1 value2 + + const safePoint = this._findSafeParsePoint(); + + if (safePoint > 0) { + const toParse = this._buffer.slice(0, safePoint); + this._buffer = this._buffer.slice(safePoint); + + this._parseAndEmit(toParse); + } + } + + /** + * Find the last safe point to parse (end of a complete top-level element) + * @returns {number} Byte offset where we can safely parse up to + * @private + */ + _findSafeParsePoint() { + const buffer = this._buffer; + + // We can't parse incomplete data + if (buffer.length === 0) { + return 0; + } + + // Look for complete lines that form complete top-level elements + // A top-level element ends when: + // 1. We see a line at base indentation (or start of new top-level element) + // 2. We have a complete parenthesized expression + + let lastSafePoint = 0; + let i = 0; + let inParentheses = 0; + let baseIndentation = null; + let currentIndentation = 0; + let lineStart = 0; + let inQuote = false; + let quoteChar = null; + let quoteCount = 0; + + while (i < buffer.length) { + const char = buffer[i]; + + // Track quote state for proper parsing + if (!inQuote && (char === '"' || char === "'" || char === '`')) { + // Count consecutive quotes + quoteChar = char; + quoteCount = 0; + let j = i; + while (j < buffer.length && buffer[j] === quoteChar) { + quoteCount++; + j++; + } + if (quoteCount > 0) { + inQuote = true; + i = j; + continue; + } + } else if (inQuote && char === quoteChar) { + // Check for closing quotes + let count = 0; + let j = i; + while (j < buffer.length && buffer[j] === quoteChar) { + count++; + j++; + } + // Check if this is an escape (2*N) or close (N) + if (count === quoteCount * 2) { + // Escape sequence - skip + i = j; + continue; + } else if (count >= quoteCount) { + // Closing quote + inQuote = false; + quoteChar = null; + i += quoteCount; + continue; + } + } + + if (inQuote) { + i++; + continue; + } + + // Track parentheses + if (char === '(') { + inParentheses++; + } else if (char === ')') { + inParentheses--; + } + + // Track line boundaries and indentation + if (char === '\n') { + // Check if this ends a complete top-level element + if (inParentheses === 0) { + // Check indentation of next line + let nextIndent = 0; + let j = i + 1; + while (j < buffer.length && buffer[j] === ' ') { + nextIndent++; + j++; + } + + // Check if we have content on next line + if (j < buffer.length && buffer[j] !== '\n' && buffer[j] !== '\r') { + // First non-empty line sets base indentation + if (baseIndentation === null && lineStart === 0) { + // Find first content line's indentation + let firstContentIndent = 0; + let k = 0; + while (k < buffer.length && buffer[k] === ' ') { + firstContentIndent++; + k++; + } + baseIndentation = firstContentIndent; + } + + // If next line is at base indentation and we're not waiting for indented children + // this could be a new top-level element + const normalizedNext = baseIndentation !== null ? Math.max(0, nextIndent - baseIndentation) : nextIndent; + + if (normalizedNext === 0) { + // This line boundary is a safe parse point + lastSafePoint = i + 1; + } + } + } + + lineStart = i + 1; + currentIndentation = 0; + } else if (i === lineStart && char === ' ') { + currentIndentation++; + } + + i++; + } + + // If buffer ends with newline and no unclosed parens, it's safe + if (buffer.endsWith('\n') && inParentheses === 0 && !inQuote) { + lastSafePoint = buffer.length; + } + + return lastSafePoint; + } + + /** + * Parse text and emit resulting links + * @param {string} text - Text to parse + * @private + */ + _parseAndEmit(text) { + if (!text.trim()) { + this._updatePosition(text); + return; + } + + try { + const rawResult = parserModule.parse(text); + const links = this._transformResult(rawResult); + + for (const link of links) { + this._links.push(link); + this._emit('link', link); + } + } catch (error) { + // Extract location from Peggy error + let line = this._currentLine; + let column = this._currentColumn; + let offset = null; + + if (error.location) { + line = this._currentLine + error.location.start.line - 1; + column = error.location.start.line === 1 + ? this._currentColumn + error.location.start.column - 1 + : error.location.start.column; + offset = error.location.start.offset; + } + + const parseError = new ParseError(error.message, line, column, offset); + parseError.cause = error; + this._emit('error', parseError); + } + + this._updatePosition(text); + } + + /** + * Transform raw parse result into Link objects + * @param {*} rawResult - Raw parser output + * @returns {Link[]} Array of Link objects + * @private + */ + _transformResult(rawResult) { + const links = []; + const items = Array.isArray(rawResult) ? rawResult : [rawResult]; + + for (const item of items) { + if (item !== null && item !== undefined) { + this._collectLinks(item, [], links); + } + } + return links; + } + + /** + * Collect links from parse tree + * @private + */ + _collectLinks(item, parentPath, result) { + if (item === null || item === undefined) return; + + if (item.children && item.children.length > 0) { + if (item.id && (!item.values || item.values.length === 0)) { + const childValues = item.children.map((child) => { + if (child.values && child.values.length === 1) { + return this._transformLink(child.values[0]); + } + return this._transformLink(child); + }); + const linkWithChildren = { + id: item.id, + values: childValues, + }; + const currentLink = this._transformLink(linkWithChildren); + + if (parentPath.length === 0) { + result.push(currentLink); + } else { + result.push(this._combinePathElements(parentPath, currentLink)); + } + } else { + const currentLink = this._transformLink(item); + + if (parentPath.length === 0) { + result.push(currentLink); + } else { + result.push(this._combinePathElements(parentPath, currentLink)); + } + + const newPath = [...parentPath, currentLink]; + + for (const child of item.children) { + this._collectLinks(child, newPath, result); + } + } + } else { + const currentLink = this._transformLink(item); + + if (parentPath.length === 0) { + result.push(currentLink); + } else { + result.push(this._combinePathElements(parentPath, currentLink)); + } + } + } + + /** + * Combine path elements with current link + * @private + */ + _combinePathElements(pathElements, current) { + if (pathElements.length === 0) return current; + if (pathElements.length === 1) { + const combined = new Link(null, [pathElements[0], current]); + combined._isFromPathCombination = true; + return combined; + } + + const parentPath = pathElements.slice(0, -1); + const lastElement = pathElements[pathElements.length - 1]; + let parent = this._combinePathElements(parentPath, lastElement); + const combined = new Link(null, [parent, current]); + combined._isFromPathCombination = true; + return combined; + } + + /** + * Transform a parsed item into a Link object + * @private + */ + _transformLink(item) { + if (item === null || item === undefined) return null; + + if (item instanceof Link) { + return item; + } + + if (item.id !== undefined && !item.values && !item.children) { + return new Link(item.id); + } + + if (item.values && Array.isArray(item.values)) { + const link = new Link(item.id || null, []); + link.values = item.values.map((v) => this._transformLink(v)); + return link; + } + + return new Link(item.id || null, []); + } + + /** + * Signal end of input and finish parsing + * @returns {Link[]} All parsed links + * @throws {ParseError} If there's remaining unparseable content + */ + end() { + if (this._ended) { + return this._links; + } + + this._ended = true; + + // Parse any remaining buffered content + if (this._buffer.trim()) { + this._parseAndEmit(this._buffer); + this._buffer = ''; + } + + this._emit('end', this._links); + + return this._links; + } + + /** + * Reset the parser for reuse + * @returns {StreamParser} This instance for chaining + */ + reset() { + this._buffer = ''; + this._totalBytesReceived = 0; + this._currentLine = 1; + this._currentColumn = 1; + this._lineOffsets = [0]; + this._links = []; + this._ended = false; + return this; + } + + /** + * Get all links parsed so far + * @returns {Link[]} Array of parsed links + */ + getLinks() { + return this._links.slice(); + } + + /** + * Get current parser position + * @returns {{line: number, column: number, offset: number}} Current position + */ + getPosition() { + return { + line: this._currentLine, + column: this._currentColumn, + offset: this._totalBytesReceived + }; + } + + /** + * Check if the parser has ended + * @returns {boolean} True if ended + */ + isEnded() { + return this._ended; + } +} diff --git a/js/src/index.js b/js/src/index.js index 54e133b..eee4990 100644 --- a/js/src/index.js +++ b/js/src/index.js @@ -1,5 +1,6 @@ export { Link, formatLinks } from './Link.js'; export { LinksGroup } from './LinksGroup.js'; export { Parser } from './Parser.js'; +export { StreamParser, ParseError } from './StreamParser.js'; export { FormatConfig } from './FormatConfig.js'; export { FormatOptions } from './FormatOptions.js'; diff --git a/js/tests/StreamParser.test.js b/js/tests/StreamParser.test.js new file mode 100644 index 0000000..d6e34f6 --- /dev/null +++ b/js/tests/StreamParser.test.js @@ -0,0 +1,490 @@ +import { test, expect, describe } from 'bun:test'; +import { StreamParser, ParseError } from '../src/StreamParser.js'; +import { Parser } from '../src/Parser.js'; +import { Link, formatLinks } from '../src/Link.js'; + +describe('StreamParser', () => { + describe('basic functionality', () => { + test('parses simple single-line input', () => { + const parser = new StreamParser(); + const links = []; + + parser.on('link', (link) => links.push(link)); + + parser.write('a b c\n'); + parser.end(); + + expect(links.length).toBe(1); + expect(links[0].values.length).toBe(3); + }); + + test('parses multiline input incrementally', () => { + const parser = new StreamParser(); + const links = []; + + parser.on('link', (link) => links.push(link)); + + parser.write('line1 value1\n'); + parser.write('line2 value2\n'); + parser.end(); + + expect(links.length).toBe(2); + }); + + test('parses parenthesized link', () => { + const parser = new StreamParser(); + const links = []; + + parser.on('link', (link) => links.push(link)); + + parser.write('(id: value1 value2)\n'); + parser.end(); + + expect(links.length).toBe(1); + expect(links[0].id).toBe('id'); + expect(links[0].values.length).toBe(2); + }); + + test('handles empty input', () => { + const parser = new StreamParser(); + const links = parser.end(); + + expect(links.length).toBe(0); + }); + + test('handles whitespace-only input', () => { + const parser = new StreamParser(); + parser.write(' \n\n \n'); + const links = parser.end(); + + expect(links.length).toBe(0); + }); + + test('returns same result as regular Parser', () => { + const input = `papa (lovesMama: loves mama) +son lovesMama +daughter lovesMama`; + + const regularParser = new Parser(); + const regularLinks = regularParser.parse(input); + + const streamParser = new StreamParser(); + streamParser.write(input); + const streamLinks = streamParser.end(); + + expect(streamLinks.length).toBe(regularLinks.length); + + for (let i = 0; i < regularLinks.length; i++) { + expect(formatLinks([streamLinks[i]])).toBe(formatLinks([regularLinks[i]])); + } + }); + }); + + describe('streaming behavior', () => { + test('can write multiple chunks', () => { + const parser = new StreamParser(); + const links = []; + + parser.on('link', (link) => links.push(link)); + + parser.write('first'); + parser.write(' second'); + parser.write('\n'); + parser.end(); + + expect(links.length).toBe(1); + }); + + test('buffers incomplete elements', () => { + const parser = new StreamParser(); + const links = []; + + parser.on('link', (link) => links.push(link)); + + // Write incomplete parenthesized expression + parser.write('(id: val'); + + // Links should be empty (waiting for closing paren) + expect(links.length).toBe(0); + + // Complete the expression + parser.write('ue)\n'); + parser.end(); + + expect(links.length).toBe(1); + expect(links[0].id).toBe('id'); + }); + + test('emits links as soon as they are complete', () => { + const parser = new StreamParser(); + const links = []; + const emitTimes = []; + + parser.on('link', (link) => { + links.push(link); + emitTimes.push(Date.now()); + }); + + parser.write('first line\n'); + const afterFirst = links.length; + + parser.write('second line\n'); + const afterSecond = links.length; + + parser.end(); + + // First link should have been emitted after first write + expect(afterFirst).toBe(1); + // Second link should have been emitted after second write + expect(afterSecond).toBe(2); + }); + }); + + describe('event handling', () => { + test('on() returns parser for chaining', () => { + const parser = new StreamParser(); + const result = parser.on('link', () => {}); + + expect(result).toBe(parser); + }); + + test('fires link event for each parsed link', () => { + const parser = new StreamParser(); + const links = []; + + parser.on('link', (link) => links.push(link)); + + parser.write('a\nb\nc\n'); + parser.end(); + + expect(links.length).toBe(3); + }); + + test('fires end event when parsing completes', () => { + const parser = new StreamParser(); + let endCalled = false; + let endLinks = null; + + parser.on('end', (links) => { + endCalled = true; + endLinks = links; + }); + + parser.write('test\n'); + parser.end(); + + expect(endCalled).toBe(true); + expect(endLinks.length).toBe(1); + }); + + test('off() removes event handler', () => { + const parser = new StreamParser(); + let callCount = 0; + + const handler = () => callCount++; + + parser.on('link', handler); + parser.write('first\n'); + + parser.off('link', handler); + parser.write('second\n'); + parser.end(); + + expect(callCount).toBe(1); + }); + + test('multiple handlers for same event', () => { + const parser = new StreamParser(); + const results = []; + + parser.on('link', () => results.push('handler1')); + parser.on('link', () => results.push('handler2')); + + parser.write('test\n'); + parser.end(); + + expect(results).toEqual(['handler1', 'handler2']); + }); + }); + + describe('error handling', () => { + test('fires error event on parse error', () => { + const parser = new StreamParser(); + let errorReceived = null; + + parser.on('error', (error) => { + errorReceived = error; + }); + + // Unclosed parenthesis at end of stream + parser.write('(unclosed\n'); + parser.end(); + + expect(errorReceived).not.toBeNull(); + expect(errorReceived instanceof ParseError).toBe(true); + }); + + test('throws when writing after end', () => { + const parser = new StreamParser(); + parser.end(); + + expect(() => parser.write('more data')).toThrow('Cannot write to a parser that has ended'); + }); + + test('throws on non-string input', () => { + const parser = new StreamParser(); + + expect(() => parser.write(123)).toThrow(TypeError); + expect(() => parser.write(null)).toThrow(TypeError); + }); + + test('throws when input exceeds max size', () => { + const parser = new StreamParser({ maxInputSize: 100 }); + + const largeInput = 'x'.repeat(200); + + expect(() => parser.write(largeInput)).toThrow(/exceeds maximum allowed size/); + }); + }); + + describe('ParseError', () => { + test('ParseError has line and column properties', () => { + const error = new ParseError('test message', 5, 10); + + expect(error.line).toBe(5); + expect(error.column).toBe(10); + expect(error.message).toBe('test message'); + }); + + test('ParseError toString includes location', () => { + const error = new ParseError('test error', 3, 7); + + expect(error.toString()).toBe('ParseError at line 3, column 7: test error'); + }); + + test('ParseError toString without location', () => { + const error = new ParseError('test error'); + + expect(error.toString()).toBe('ParseError: test error'); + }); + }); + + describe('position tracking', () => { + test('getPosition() returns current position', () => { + const parser = new StreamParser(); + + parser.write('first line\n'); + const pos1 = parser.getPosition(); + + parser.write('second line\n'); + const pos2 = parser.getPosition(); + + expect(pos1.line).toBe(2); + expect(pos2.line).toBe(3); + }); + }); + + describe('reset functionality', () => { + test('reset() allows parser reuse', () => { + const parser = new StreamParser(); + const allLinks = []; + + parser.on('link', (link) => allLinks.push(link)); + + parser.write('first\n'); + parser.end(); + + expect(allLinks.length).toBe(1); + + parser.reset(); + parser.write('second\n'); + parser.end(); + + expect(allLinks.length).toBe(2); + }); + + test('reset() clears internal state', () => { + const parser = new StreamParser(); + + parser.write('data\n'); + parser.end(); + + parser.reset(); + + expect(parser.getLinks().length).toBe(0); + expect(parser.isEnded()).toBe(false); + }); + }); + + describe('getLinks() method', () => { + test('getLinks() returns all parsed links', () => { + const parser = new StreamParser(); + + parser.write('a\nb\nc\n'); + parser.end(); + + const links = parser.getLinks(); + + expect(links.length).toBe(3); + }); + + test('getLinks() returns copy of internal array', () => { + const parser = new StreamParser(); + + parser.write('test\n'); + parser.end(); + + const links1 = parser.getLinks(); + const links2 = parser.getLinks(); + + expect(links1).not.toBe(links2); + expect(links1).toEqual(links2); + }); + }); + + describe('indented syntax', () => { + test('parses indented ID syntax', () => { + const parser = new StreamParser(); + + parser.write('id:\n value1\n value2\n'); + const links = parser.end(); + + expect(links.length).toBe(1); + expect(links[0].id).toBe('id'); + expect(links[0].values.length).toBe(2); + }); + + test('produces same result as regular parser for indented syntax', () => { + const input = `id: + value1 + value2`; + + const regularParser = new Parser(); + const regularLinks = regularParser.parse(input); + + const streamParser = new StreamParser(); + streamParser.write(input); + const streamLinks = streamParser.end(); + + expect(formatLinks(streamLinks)).toBe(formatLinks(regularLinks)); + }); + }); + + describe('quoted strings', () => { + test('parses single-quoted references', () => { + const parser = new StreamParser(); + + parser.write("('hello world')\n"); + const links = parser.end(); + + expect(links.length).toBe(1); + // Single value in parentheses becomes a Link with null id and single value + expect(links[0].values.length).toBe(1); + expect(links[0].values[0].id).toBe('hello world'); + }); + + test('parses double-quoted references', () => { + const parser = new StreamParser(); + + parser.write('("hello world")\n'); + const links = parser.end(); + + expect(links.length).toBe(1); + // Single value in parentheses becomes a Link with null id and single value + expect(links[0].values.length).toBe(1); + expect(links[0].values[0].id).toBe('hello world'); + }); + + test('handles quotes in streaming chunks', () => { + const parser = new StreamParser(); + + parser.write('("hello '); + parser.write('world")\n'); + const links = parser.end(); + + expect(links.length).toBe(1); + // Single value in parentheses becomes a Link with null id and single value + expect(links[0].values.length).toBe(1); + expect(links[0].values[0].id).toBe('hello world'); + }); + + test('parses quoted id with colon syntax', () => { + const parser = new StreamParser(); + + parser.write('("quoted id": value1 value2)\n'); + const links = parser.end(); + + expect(links.length).toBe(1); + expect(links[0].id).toBe('quoted id'); + expect(links[0].values.length).toBe(2); + }); + }); + + describe('complex inputs', () => { + test('nested parentheses', () => { + const input = '(outer: (inner: a b))\n'; + + const regularParser = new Parser(); + const regularLinks = regularParser.parse(input); + + const streamParser = new StreamParser(); + streamParser.write(input); + const streamLinks = streamParser.end(); + + expect(formatLinks(streamLinks)).toBe(formatLinks(regularLinks)); + }); + + test('mixed syntax', () => { + const input = `(id: value1 value2) +simple line +other: + child1 + child2 +`; + + const regularParser = new Parser(); + const regularLinks = regularParser.parse(input); + + const streamParser = new StreamParser(); + streamParser.write(input); + const streamLinks = streamParser.end(); + + expect(streamLinks.length).toBe(regularLinks.length); + }); + }); + + describe('use case from issue', () => { + test('API matches issue request for JavaScript', () => { + // Test the API matches what was requested in the issue + const parser = new StreamParser(); + const parsedLinks = []; + const errors = []; + + parser.on('link', (link) => { + // Process each link as it's parsed + parsedLinks.push(link); + }); + + parser.on('error', (error) => { + // Handle parse errors with location info + errors.push({ + message: error.message, + line: error.line, + column: error.column + }); + }); + + // Feed data incrementally + const chunk1 = 'papa (lovesMama: loves mama)\n'; + const chunk2 = 'son lovesMama\n'; + + parser.write(chunk1); + parser.write(chunk2); + const finalLinks = parser.end(); + + expect(parsedLinks.length).toBe(2); + expect(finalLinks.length).toBe(2); + expect(errors.length).toBe(0); + }); + }); +}); diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 9dfde87..446b0db 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "links-notation" -version = "0.13.0" +version = "0.14.0" dependencies = [ "nom", ] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 85b2f92..a264e65 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "links-notation" -version = "0.13.0" +version = "0.14.0" edition = "2021" description = "Rust implementation of the Links Notation parser" license = "Unlicense" diff --git a/rust/README.md b/rust/README.md index 2ec7727..aafb4b9 100644 --- a/rust/README.md +++ b/rust/README.md @@ -145,6 +145,39 @@ let quoted = r#"("quoted id": "value with spaces")"#; let parsed = parse_lino(quoted)?; ``` +### Streaming Parser + +For processing large messages efficiently without loading everything into memory: + +```rust +use links_notation::stream_parser::StreamParser; + +let mut parser = StreamParser::new(); + +// Register callback for each parsed link +parser.on_link(|link| { + println!("{:?}", link); +}); + +// Register error callback +parser.on_error(|error| { + eprintln!("Error at line {}: {}", error.line.unwrap_or(0), error.message); +}); + +// Feed data incrementally +parser.write("papa (lovesMama: loves mama)\n")?; +parser.write("son lovesMama\n")?; + +// Finish parsing and get all links +let links = parser.finish()?; +``` + +The streaming parser supports: +- **Memory efficiency**: Process large messages without loading everything into memory +- **Low latency**: Start processing before the full message is received +- **Detailed error reporting**: Errors include line and column information +- **Callback-based API**: Receive links as they are parsed + ## Syntax Examples ### Doublets (2-tuple) @@ -207,6 +240,40 @@ Represents either a Link or a Reference: Parses a Links Notation document string and returns the parsed structure or an error. +### Streaming Parser + +#### `StreamParser` + +Streaming parser for incremental processing of large messages. + +- `StreamParser::new()` - Create a new streaming parser +- `StreamParser::with_max_size(size)` - Create with custom max input size +- `on_link(callback)` - Register callback for parsed links +- `on_error(callback)` - Register callback for parse errors +- `write(chunk) -> Result<(), StreamParseError>` - Write a chunk of data +- `finish() -> Result>, StreamParseError>` - Finish parsing +- `reset()` - Reset the parser for reuse +- `get_links()` - Get all links parsed so far +- `get_position()` - Get current parser position +- `is_ended()` - Check if parser has ended + +#### `StreamParseError` + +Error type with location information. + +- `message` - Error message +- `line` - Line number (1-based, optional) +- `column` - Column number (1-based, optional) +- `offset` - Byte offset in the input (optional) + +#### `Position` + +Parser position in the input stream. + +- `line` - Line number (1-based) +- `column` - Column number (1-based) +- `offset` - Byte offset + ### Formatting The `Display` trait is implemented for `LiNo` where `T: ToString`: diff --git a/rust/src/lib.rs b/rust/src/lib.rs index c3be7c4..b54bcab 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,5 +1,6 @@ pub mod format_config; pub mod parser; +pub mod stream_parser; use format_config::FormatConfig; use std::error::Error as StdError; diff --git a/rust/src/stream_parser.rs b/rust/src/stream_parser.rs new file mode 100644 index 0000000..9ab37f2 --- /dev/null +++ b/rust/src/stream_parser.rs @@ -0,0 +1,841 @@ +//! Streaming parser for Links Notation (Lino) +//! +//! This module provides a streaming parser that allows processing data incrementally +//! and emitting parsed links as they become available, without loading the entire +//! input into memory. +//! +//! # Example +//! +//! ``` +//! use links_notation::stream_parser::StreamParser; +//! +//! let mut parser = StreamParser::new(); +//! +//! parser.on_link(|link| { +//! println!("{:?}", link); +//! }); +//! +//! parser.write("papa lovesMama\n")?; +//! parser.write("son follows\n")?; +//! let links = parser.finish()?; +//! # Ok::<(), links_notation::stream_parser::StreamParseError>(()) +//! ``` + +use crate::parser; +use crate::LiNo; +use std::error::Error as StdError; +use std::fmt; + +/// Error type for streaming parser +#[derive(Debug, Clone)] +pub struct StreamParseError { + /// Error message + pub message: String, + /// Line number (1-based) + pub line: Option, + /// Column number (1-based) + pub column: Option, + /// Byte offset in the input + pub offset: Option, +} + +impl StreamParseError { + /// Create a new error without location info + pub fn new(message: impl Into) -> Self { + StreamParseError { + message: message.into(), + line: None, + column: None, + offset: None, + } + } + + /// Create a new error with location info + pub fn with_location( + message: impl Into, + line: usize, + column: usize, + offset: Option, + ) -> Self { + StreamParseError { + message: message.into(), + line: Some(line), + column: Some(column), + offset, + } + } +} + +impl fmt::Display for StreamParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match (self.line, self.column) { + (Some(line), Some(column)) => { + write!( + f, + "ParseError at line {}, column {}: {}", + line, column, self.message + ) + } + _ => write!(f, "ParseError: {}", self.message), + } + } +} + +impl StdError for StreamParseError {} + +/// Position in the input stream +#[derive(Debug, Clone, Copy, Default)] +pub struct Position { + /// Line number (1-based) + pub line: usize, + /// Column number (1-based) + pub column: usize, + /// Byte offset + pub offset: usize, +} + +impl Position { + /// Create a new position at the start of the input + pub fn new() -> Self { + Position { + line: 1, + column: 1, + offset: 0, + } + } +} + +// Type aliases for callback functions to avoid clippy type_complexity warnings +type LinkCallback = Box)>; +type ErrorCallback = Box; + +/// Streaming parser for Links Notation +/// +/// Allows processing data incrementally and emitting parsed links +/// as they become available. +pub struct StreamParser { + /// Buffer for incomplete input + buffer: String, + /// Total bytes received + total_bytes: usize, + /// Current line number + current_line: usize, + /// Current column number + current_column: usize, + /// Line offsets for position calculation + line_offsets: Vec, + /// Maximum input size in bytes + max_input_size: usize, + /// Parsed links + links: Vec>, + /// Whether the parser has ended + ended: bool, + /// Link callback + on_link_callback: Option, + /// Error callback + on_error_callback: Option, +} + +impl Default for StreamParser { + fn default() -> Self { + Self::new() + } +} + +impl StreamParser { + /// Create a new StreamParser with default options + pub fn new() -> Self { + StreamParser { + buffer: String::new(), + total_bytes: 0, + current_line: 1, + current_column: 1, + line_offsets: vec![0], + max_input_size: 10 * 1024 * 1024, // 10MB default + links: Vec::new(), + ended: false, + on_link_callback: None, + on_error_callback: None, + } + } + + /// Create a new StreamParser with custom max input size + pub fn with_max_size(max_input_size: usize) -> Self { + let mut parser = Self::new(); + parser.max_input_size = max_input_size; + parser + } + + /// Set the callback for when a link is parsed + /// + /// # Example + /// + /// ``` + /// use links_notation::stream_parser::StreamParser; + /// + /// let mut parser = StreamParser::new(); + /// parser.on_link(|link| { + /// println!("Parsed: {:?}", link); + /// }); + /// ``` + pub fn on_link(&mut self, callback: F) + where + F: FnMut(&LiNo) + 'static, + { + self.on_link_callback = Some(Box::new(callback)); + } + + /// Set the callback for when an error occurs + /// + /// # Example + /// + /// ``` + /// use links_notation::stream_parser::StreamParser; + /// + /// let mut parser = StreamParser::new(); + /// parser.on_error(|error| { + /// eprintln!("Error at line {}: {}", error.line.unwrap_or(0), error.message); + /// }); + /// ``` + pub fn on_error(&mut self, callback: F) + where + F: FnMut(&StreamParseError) + 'static, + { + self.on_error_callback = Some(Box::new(callback)); + } + + /// Write a chunk of data to the parser + /// + /// # Arguments + /// + /// * `chunk` - The string chunk to process + /// + /// # Returns + /// + /// Ok(()) if successful, or an error if the parser has ended or input exceeds max size + /// + /// # Example + /// + /// ``` + /// use links_notation::stream_parser::StreamParser; + /// + /// let mut parser = StreamParser::new(); + /// parser.write("hello world\n")?; + /// parser.write("another line\n")?; + /// # Ok::<(), links_notation::stream_parser::StreamParseError>(()) + /// ``` + pub fn write(&mut self, chunk: &str) -> Result<(), StreamParseError> { + if self.ended { + return Err(StreamParseError::new( + "Cannot write to a parser that has ended", + )); + } + + // Check total size + if self.buffer.len() + chunk.len() > self.max_input_size { + let error = StreamParseError::with_location( + format!( + "Input size exceeds maximum allowed size of {} bytes", + self.max_input_size + ), + self.current_line, + self.current_column, + Some(self.total_bytes), + ); + self.emit_error(&error); + return Err(error); + } + + self.buffer.push_str(chunk); + + // Try to parse complete elements + self.process_buffer(); + + Ok(()) + } + + /// Process buffered data and emit links for complete elements + fn process_buffer(&mut self) { + let safe_point = self.find_safe_parse_point(); + + if safe_point > 0 { + let to_parse = self.buffer[..safe_point].to_string(); + self.buffer = self.buffer[safe_point..].to_string(); + + self.parse_and_emit(&to_parse); + } + } + + /// Find the last safe point to parse (end of a complete top-level element) + fn find_safe_parse_point(&self) -> usize { + let buffer = &self.buffer; + + if buffer.is_empty() { + return 0; + } + + let mut last_safe_point = 0; + let mut i = 0; + let mut in_parentheses = 0; + let mut base_indentation: Option = None; + let mut line_start = 0; + let mut in_quote = false; + let mut quote_char: Option = None; + let mut quote_count = 0; + + let chars: Vec = buffer.chars().collect(); + + while i < chars.len() { + let char = chars[i]; + + // Track quote state for proper parsing + if !in_quote && (char == '"' || char == '\'' || char == '`') { + // Count consecutive quotes + quote_char = Some(char); + quote_count = 0; + let mut j = i; + while j < chars.len() && chars[j] == char { + quote_count += 1; + j += 1; + } + if quote_count > 0 { + in_quote = true; + i = j; + continue; + } + } else if in_quote && quote_char == Some(char) { + // Check for closing quotes + let mut count = 0; + let mut j = i; + while j < chars.len() && chars[j] == char { + count += 1; + j += 1; + } + // Check if this is an escape (2*N) or close (N) + if count == quote_count * 2 { + // Escape sequence - skip + i = j; + continue; + } else if count >= quote_count { + // Closing quote + in_quote = false; + quote_char = None; + i += quote_count; + continue; + } + } + + if in_quote { + i += 1; + continue; + } + + // Track parentheses + if char == '(' { + in_parentheses += 1; + } else if char == ')' { + in_parentheses -= 1; + } + + // Track line boundaries and indentation + if char == '\n' { + // Check if this ends a complete top-level element + if in_parentheses == 0 { + // Check indentation of next line + let mut next_indent: usize = 0; + let mut j = i + 1; + while j < chars.len() && chars[j] == ' ' { + next_indent += 1; + j += 1; + } + + // Check if we have content on next line + if j < chars.len() && chars[j] != '\n' && chars[j] != '\r' { + // First non-empty line sets base indentation + if base_indentation.is_none() && line_start == 0 { + let mut first_content_indent = 0; + let mut k = 0; + while k < chars.len() && chars[k] == ' ' { + first_content_indent += 1; + k += 1; + } + base_indentation = Some(first_content_indent); + } + + // If next line is at base indentation, this could be a new top-level element + let normalized_next = base_indentation + .map(|base| next_indent.saturating_sub(base)) + .unwrap_or(next_indent); + + if normalized_next == 0 { + // This line boundary is a safe parse point + last_safe_point = i + 1; + } + } + } + + line_start = i + 1; + } + + i += 1; + } + + // If buffer ends with newline and no unclosed parens, it's safe + if buffer.ends_with('\n') && in_parentheses == 0 && !in_quote { + last_safe_point = buffer.len(); + } + + last_safe_point + } + + /// Parse text and emit resulting links + fn parse_and_emit(&mut self, text: &str) { + if text.trim().is_empty() { + self.update_position(text); + return; + } + + match parser::parse_document(text) { + Ok((_, raw_links)) => { + let links = self.flatten_links(raw_links); + + for link in links { + self.links.push(link.clone()); + self.emit_link(&link); + } + } + Err(e) => { + let error = StreamParseError::with_location( + format!("{:?}", e), + self.current_line, + self.current_column, + Some(self.total_bytes), + ); + self.emit_error(&error); + } + } + + self.update_position(text); + } + + /// Update position tracking based on processed text + fn update_position(&mut self, text: &str) { + for char in text.chars() { + if char == '\n' { + self.current_line += 1; + self.current_column = 1; + self.line_offsets.push(self.total_bytes); + } else { + self.current_column += 1; + } + self.total_bytes += char.len_utf8(); + } + } + + /// Emit a link to the callback + fn emit_link(&mut self, link: &LiNo) { + if let Some(ref mut callback) = self.on_link_callback { + callback(link); + } + } + + /// Emit an error to the callback + fn emit_error(&mut self, error: &StreamParseError) { + if let Some(ref mut callback) = self.on_error_callback { + callback(error); + } + } + + /// Flatten parser::Link into LiNo + fn flatten_links(&self, links: Vec) -> Vec> { + let mut result = vec![]; + + for link in links { + self.flatten_link_recursive(&link, None, &mut result); + } + + result + } + + /// Recursive helper for flattening links + fn flatten_link_recursive( + &self, + link: &parser::Link, + parent: Option<&LiNo>, + result: &mut Vec>, + ) { + // Special case: If this is an indented ID with children + if link.is_indented_id + && link.id.is_some() + && link.values.is_empty() + && !link.children.is_empty() + { + let child_values: Vec> = link + .children + .iter() + .map(|child| { + if child.values.len() == 1 + && child.values[0].values.is_empty() + && child.values[0].children.is_empty() + { + if let Some(ref id) = child.values[0].id { + LiNo::Ref(id.clone()) + } else { + parser::Link { + id: child.id.clone(), + values: child.values.clone(), + children: vec![], + is_indented_id: false, + } + .into() + } + } else { + parser::Link { + id: child.id.clone(), + values: child.values.clone(), + children: vec![], + is_indented_id: false, + } + .into() + } + }) + .collect(); + + let current = LiNo::Link { + id: link.id.clone(), + values: child_values, + }; + + let combined = if let Some(parent) = parent { + let wrapped_parent = match parent { + LiNo::Ref(ref_id) => LiNo::Link { + id: None, + values: vec![LiNo::Ref(ref_id.clone())], + }, + link => link.clone(), + }; + + LiNo::Link { + id: None, + values: vec![wrapped_parent, current], + } + } else { + current + }; + + result.push(combined); + return; + } + + // Create the current link without children + let current: LiNo = if link.values.is_empty() { + if let Some(id) = &link.id { + LiNo::Ref(id.clone()) + } else { + LiNo::Link { + id: None, + values: vec![], + } + } + } else { + let values: Vec> = link + .values + .iter() + .map(|v| { + parser::Link { + id: v.id.clone(), + values: v.values.clone(), + children: vec![], + is_indented_id: false, + } + .into() + }) + .collect(); + LiNo::Link { + id: link.id.clone(), + values, + } + }; + + // Create the combined link with parent + let combined = if let Some(parent) = parent { + let wrapped_parent = match parent { + LiNo::Ref(ref_id) => LiNo::Link { + id: None, + values: vec![LiNo::Ref(ref_id.clone())], + }, + link => link.clone(), + }; + + let wrapped_current = match ¤t { + LiNo::Ref(ref_id) => LiNo::Link { + id: None, + values: vec![LiNo::Ref(ref_id.clone())], + }, + link => link.clone(), + }; + + LiNo::Link { + id: None, + values: vec![wrapped_parent, wrapped_current], + } + } else { + current.clone() + }; + + result.push(combined.clone()); + + // Process children + for child in &link.children { + self.flatten_link_recursive(child, Some(&combined), result); + } + } + + /// Signal end of input and finish parsing + /// + /// # Returns + /// + /// All parsed links + /// + /// # Example + /// + /// ``` + /// use links_notation::stream_parser::StreamParser; + /// + /// let mut parser = StreamParser::new(); + /// parser.write("hello world\n")?; + /// let links = parser.finish()?; + /// # Ok::<(), links_notation::stream_parser::StreamParseError>(()) + /// ``` + pub fn finish(&mut self) -> Result>, StreamParseError> { + if self.ended { + return Ok(self.links.clone()); + } + + self.ended = true; + + // Parse any remaining buffered content + if !self.buffer.trim().is_empty() { + let remaining = std::mem::take(&mut self.buffer); + self.parse_and_emit(&remaining); + } + + Ok(self.links.clone()) + } + + /// Reset the parser for reuse + pub fn reset(&mut self) { + self.buffer = String::new(); + self.total_bytes = 0; + self.current_line = 1; + self.current_column = 1; + self.line_offsets = vec![0]; + self.links = Vec::new(); + self.ended = false; + } + + /// Get all links parsed so far + pub fn get_links(&self) -> &[LiNo] { + &self.links + } + + /// Get current parser position + pub fn get_position(&self) -> Position { + Position { + line: self.current_line, + column: self.current_column, + offset: self.total_bytes, + } + } + + /// Check if the parser has ended + pub fn is_ended(&self) -> bool { + self.ended + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::cell::RefCell; + use std::rc::Rc; + + #[test] + fn test_basic_parsing() { + let mut parser = StreamParser::new(); + parser.write("a b c\n").unwrap(); + let links = parser.finish().unwrap(); + + assert_eq!(links.len(), 1); + } + + #[test] + fn test_multiline_incremental() { + let mut parser = StreamParser::new(); + let count = Rc::new(RefCell::new(0)); + let count_clone = count.clone(); + + parser.on_link(move |_| { + *count_clone.borrow_mut() += 1; + }); + + parser.write("line1 value1\n").unwrap(); + parser.write("line2 value2\n").unwrap(); + parser.finish().unwrap(); + + assert_eq!(*count.borrow(), 2); + } + + #[test] + fn test_parenthesized_link() { + let mut parser = StreamParser::new(); + parser.write("(id: value1 value2)\n").unwrap(); + let links = parser.finish().unwrap(); + + assert_eq!(links.len(), 1); + if let LiNo::Link { id, values } = &links[0] { + assert_eq!(id.as_ref().unwrap(), "id"); + assert_eq!(values.len(), 2); + } else { + panic!("Expected Link"); + } + } + + #[test] + fn test_empty_input() { + let mut parser = StreamParser::new(); + let links = parser.finish().unwrap(); + + assert_eq!(links.len(), 0); + } + + #[test] + fn test_buffering_incomplete() { + let mut parser = StreamParser::new(); + + // Write incomplete parenthesized expression + parser.write("(id: val").unwrap(); + // Check internal links (not emitted yet) + assert_eq!(parser.get_links().len(), 0); + + // Complete the expression + parser.write("ue)\n").unwrap(); + let links = parser.finish().unwrap(); + + assert_eq!(links.len(), 1); + } + + #[test] + fn test_write_after_end() { + let mut parser = StreamParser::new(); + parser.finish().unwrap(); + + let result = parser.write("more data"); + assert!(result.is_err()); + } + + #[test] + fn test_max_size_exceeded() { + let mut parser = StreamParser::with_max_size(100); + + let large_input = "x".repeat(200); + let result = parser.write(&large_input); + + assert!(result.is_err()); + } + + #[test] + fn test_reset() { + let mut parser = StreamParser::new(); + parser.write("test\n").unwrap(); + parser.finish().unwrap(); + + parser.reset(); + + assert!(!parser.is_ended()); + assert_eq!(parser.get_links().len(), 0); + + parser.write("new\n").unwrap(); + let links = parser.finish().unwrap(); + assert_eq!(links.len(), 1); + } + + #[test] + fn test_position_tracking() { + let mut parser = StreamParser::new(); + parser.write("first line\n").unwrap(); + + let pos = parser.get_position(); + assert_eq!(pos.line, 2); + + parser.write("second line\n").unwrap(); + let pos = parser.get_position(); + assert_eq!(pos.line, 3); + } + + #[test] + fn test_error_callback() { + let mut parser = StreamParser::new(); + let error_received = Rc::new(RefCell::new(false)); + let error_received_clone = error_received.clone(); + + parser.on_error(move |_| { + *error_received_clone.borrow_mut() = true; + }); + + // Unclosed parenthesis + parser.write("(unclosed\n").unwrap(); + parser.finish().unwrap(); + + assert!(*error_received.borrow()); + } + + #[test] + fn test_indented_syntax() { + let mut parser = StreamParser::new(); + parser.write("id:\n value1\n value2\n").unwrap(); + let links = parser.finish().unwrap(); + + assert_eq!(links.len(), 1); + if let LiNo::Link { id, values } = &links[0] { + assert_eq!(id.as_ref().unwrap(), "id"); + assert_eq!(values.len(), 2); + } else { + panic!("Expected Link"); + } + } + + #[test] + fn test_quoted_strings() { + let mut parser = StreamParser::new(); + parser.write("(\"quoted id\": value1 value2)\n").unwrap(); + let links = parser.finish().unwrap(); + + assert_eq!(links.len(), 1); + if let LiNo::Link { id, values } = &links[0] { + assert_eq!(id.as_ref().unwrap(), "quoted id"); + assert_eq!(values.len(), 2); + } else { + panic!("Expected Link"); + } + } + + #[test] + fn test_use_case_from_issue() { + let mut parser = StreamParser::new(); + let parsed_links = Rc::new(RefCell::new(Vec::new())); + let parsed_links_clone = parsed_links.clone(); + + parser.on_link(move |link| { + parsed_links_clone.borrow_mut().push(link.clone()); + }); + + // Feed data incrementally + parser.write("papa (lovesMama: loves mama)\n").unwrap(); + parser.write("son lovesMama\n").unwrap(); + let final_links = parser.finish().unwrap(); + + assert_eq!(parsed_links.borrow().len(), 2); + assert_eq!(final_links.len(), 2); + } +} From db2fe8b1bf9c7ef46385e4a035605a019e23484b Mon Sep 17 00:00:00 2001 From: konard Date: Tue, 13 Jan 2026 13:14:04 +0100 Subject: [PATCH 3/4] style: format JavaScript files with Prettier Co-Authored-By: Claude Opus 4.5 --- js/README.md | 5 ++++- js/src/StreamParser.js | 16 ++++++++++------ js/tests/StreamParser.test.js | 18 +++++++++++++----- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/js/README.md b/js/README.md index cad1bfc..4ac408b 100644 --- a/js/README.md +++ b/js/README.md @@ -130,7 +130,9 @@ parser.on('link', (link) => { parser.on('error', (error) => { // Handle parse errors with location info - console.error(`Error at line ${error.line}, col ${error.column}: ${error.message}`); + console.error( + `Error at line ${error.line}, col ${error.column}: ${error.message}` + ); }); // Feed data incrementally @@ -142,6 +144,7 @@ const links = parser.end(); ``` The streaming parser supports: + - **Memory efficiency**: Process large messages without loading everything into memory - **Low latency**: Start processing before the full message is received - **Detailed error reporting**: Errors include line and column information diff --git a/js/src/StreamParser.js b/js/src/StreamParser.js index ac8cad9..e176d55 100644 --- a/js/src/StreamParser.js +++ b/js/src/StreamParser.js @@ -77,7 +77,7 @@ export class StreamParser { this._handlers = { link: [], error: [], - end: [] + end: [], }; // Parsed links @@ -341,7 +341,10 @@ export class StreamParser { // If next line is at base indentation and we're not waiting for indented children // this could be a new top-level element - const normalizedNext = baseIndentation !== null ? Math.max(0, nextIndent - baseIndentation) : nextIndent; + const normalizedNext = + baseIndentation !== null + ? Math.max(0, nextIndent - baseIndentation) + : nextIndent; if (normalizedNext === 0) { // This line boundary is a safe parse point @@ -394,9 +397,10 @@ export class StreamParser { if (error.location) { line = this._currentLine + error.location.start.line - 1; - column = error.location.start.line === 1 - ? this._currentColumn + error.location.start.column - 1 - : error.location.start.column; + column = + error.location.start.line === 1 + ? this._currentColumn + error.location.start.column - 1 + : error.location.start.column; offset = error.location.start.offset; } @@ -576,7 +580,7 @@ export class StreamParser { return { line: this._currentLine, column: this._currentColumn, - offset: this._totalBytesReceived + offset: this._totalBytesReceived, }; } diff --git a/js/tests/StreamParser.test.js b/js/tests/StreamParser.test.js index d6e34f6..a60f539 100644 --- a/js/tests/StreamParser.test.js +++ b/js/tests/StreamParser.test.js @@ -75,7 +75,9 @@ daughter lovesMama`; expect(streamLinks.length).toBe(regularLinks.length); for (let i = 0; i < regularLinks.length; i++) { - expect(formatLinks([streamLinks[i]])).toBe(formatLinks([regularLinks[i]])); + expect(formatLinks([streamLinks[i]])).toBe( + formatLinks([regularLinks[i]]) + ); } }); }); @@ -228,7 +230,9 @@ daughter lovesMama`; const parser = new StreamParser(); parser.end(); - expect(() => parser.write('more data')).toThrow('Cannot write to a parser that has ended'); + expect(() => parser.write('more data')).toThrow( + 'Cannot write to a parser that has ended' + ); }); test('throws on non-string input', () => { @@ -243,7 +247,9 @@ daughter lovesMama`; const largeInput = 'x'.repeat(200); - expect(() => parser.write(largeInput)).toThrow(/exceeds maximum allowed size/); + expect(() => parser.write(largeInput)).toThrow( + /exceeds maximum allowed size/ + ); }); }); @@ -259,7 +265,9 @@ daughter lovesMama`; test('ParseError toString includes location', () => { const error = new ParseError('test error', 3, 7); - expect(error.toString()).toBe('ParseError at line 3, column 7: test error'); + expect(error.toString()).toBe( + 'ParseError at line 3, column 7: test error' + ); }); test('ParseError toString without location', () => { @@ -470,7 +478,7 @@ other: errors.push({ message: error.message, line: error.line, - column: error.column + column: error.column, }); }); From ce296b5d64a70beca855961b1030abae71f7ac31 Mon Sep 17 00:00:00 2001 From: konard Date: Tue, 13 Jan 2026 13:16:08 +0100 Subject: [PATCH 4/4] chore: remove CLAUDE.md configuration file Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 5818f01..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,5 +0,0 @@ -Issue to solve: https://github.com/link-foundation/links-notation/issues/197 -Your prepared branch: issue-197-93f74116bd61 -Your prepared working directory: /tmp/gh-issue-solver-1768305700276 - -Proceed.