From 933d54883b5b6dcf75587c0b40b958342dd7bc8b Mon Sep 17 00:00:00 2001 From: Matthew Donoughe Date: Thu, 2 Oct 2025 15:26:24 -0400 Subject: [PATCH] limit the context for convert_error --- nom-language/src/error.rs | 187 +++++++++++++++++++++++++++----------- 1 file changed, 132 insertions(+), 55 deletions(-) diff --git a/nom-language/src/error.rs b/nom-language/src/error.rs index 4d38f4293..2c1dd0308 100644 --- a/nom-language/src/error.rs +++ b/nom-language/src/error.rs @@ -137,6 +137,8 @@ pub fn convert_error>(input: I, e: VerboseErro VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s), VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e), } + // Because `write!` to a `String` is infallible, this `unwrap` is fine. + .unwrap() } else { let prefix = &input.as_bytes()[..offset]; @@ -163,71 +165,81 @@ pub fn convert_error>(input: I, e: VerboseErro let column_number = line.offset(substring) + 1; match kind { - VerboseErrorKind::Char(c) => { + VerboseErrorKind::Char(expected) => { + writeln!( + &mut result, + "{i}: at line {line_number} column {column_number}:", + ) + .unwrap(); + show_position(&mut result, line, column_number); if let Some(actual) = substring.chars().next() { - write!( - &mut result, - "{i}: at line {line_number}:\n\ - {line}\n\ - {caret:>column$}\n\ - expected '{expected}', found {actual}\n\n", - i = i, - line_number = line_number, - line = line, - caret = '^', - column = column_number, - expected = c, - actual = actual, - ) + write!(&mut result, "expect '{expected}', found {actual}\n\n").unwrap(); } else { - write!( - &mut result, - "{i}: at line {line_number}:\n\ - {line}\n\ - {caret:>column$}\n\ - expected '{expected}', got end of input\n\n", - i = i, - line_number = line_number, - line = line, - caret = '^', - column = column_number, - expected = c, - ) + write!(&mut result, "expected '{expected}', got end of input\n\n").unwrap(); } } - VerboseErrorKind::Context(s) => write!( - &mut result, - "{i}: at line {line_number}, in {context}:\n\ - {line}\n\ - {caret:>column$}\n\n", - i = i, - line_number = line_number, - context = s, - line = line, - caret = '^', - column = column_number, - ), - VerboseErrorKind::Nom(e) => write!( - &mut result, - "{i}: at line {line_number}, in {nom_err:?}:\n\ - {line}\n\ - {caret:>column$}\n\n", - i = i, - line_number = line_number, - nom_err = e, - line = line, - caret = '^', - column = column_number, - ), + VerboseErrorKind::Context(context) => { + writeln!( + &mut result, + "{i}: at line {line_number} column {column_number}, in {context}:", + ) + .unwrap(); + show_position(&mut result, line, column_number); + writeln!(&mut result).unwrap(); + } + VerboseErrorKind::Nom(nom_err) => { + writeln!( + &mut result, + "{i}: at line {line_number} column {column_number}, in {nom_err:?}:", + ) + .unwrap(); + show_position(&mut result, line, column_number); + writeln!(&mut result).unwrap(); + } } } - // Because `write!` to a `String` is infallible, this `unwrap` is fine. - .unwrap(); } result } +fn show_position(result: &mut String, line: &str, column_number: usize) { + use std::fmt::Write; + + const MAXIMUM_CONTEXT: usize = 40; + + // column_number is assumed to be the 1-indexed byte offset of a valid character offset within line + let column_offset = column_number - 1; + + let from_offset = column_offset.saturating_sub(MAXIMUM_CONTEXT); + // round to the next character boundary + let from_offset = line + .char_indices() + .find_map(|(i, _)| (i >= from_offset).then_some(i)) + .unwrap_or(column_offset); + + let to_offset = column_number + .saturating_add(MAXIMUM_CONTEXT + 1) + .min(line.len()); + // round to the previous character boundary + let to_offset = line[column_number..] + .char_indices() + .map(|(i, _)| i + column_number) + .filter(|i| *i < to_offset) + .last() + .unwrap_or(column_number); + + write!( + result, + "{line}\n\ + {caret:>column$}\n", + caret = '^', + line = &line[from_offset..to_offset], + column = column_number.saturating_sub(from_offset), + ) + .unwrap(); +} + #[test] fn convert_error_panic() { use nom::character::complete::char; @@ -257,6 +269,71 @@ fn issue_1027_convert_error_panic_nonempty() { let msg = convert_error(input, err); assert_eq!( msg, - "0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n" + "0: at line 1 column 2:\na\n ^\nexpected \'b\', got end of input\n\n" + ); +} + +#[test] +fn convert_error_long_line() { + use std::iter::repeat_n; + + use nom::bytes::complete::{tag, take}; + use nom::sequence::terminated; + use nom::Finish; + use nom::Parser; + + const COUNT: usize = 0x10000; + let mut input = String::with_capacity(COUNT * 2 + 1); + input.extend(repeat_n('>', COUNT)); + input.push('|'); + input.extend(repeat_n('<', COUNT)); + + let err: VerboseError<&str> = terminated(take(COUNT), tag("_")) + .parse(&*input) + .finish() + .unwrap_err(); + + let msg = convert_error(&*input, err); + assert_eq!( + msg, + r#"0: at line 1 column 65537, in Tag: +>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>|<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + ^ + +"# + ); +} + +#[test] +fn convert_error_long_line_unicode() { + use std::iter::repeat_n; + + use nom::bytes::complete::{tag, take}; + use nom::sequence::terminated; + use nom::Finish; + use nom::Parser; + + // This is the same test as the non-Unicode version, but using 3-byte Unicode characters to check + // for character boundary problems. + + const COUNT: usize = 0x10000; + let mut input = String::with_capacity(COUNT * 3 * 2 + 1); + input.extend(repeat_n('≥', COUNT)); + input.push('|'); + input.extend(repeat_n('≤', COUNT)); + + let err: VerboseError<&str> = terminated(take(COUNT), tag("_")) + .parse(&*input) + .finish() + .unwrap_err(); + + let msg = convert_error(&*input, err); + assert_eq!( + msg, + r#"0: at line 1 column 196609, in Tag: +≥≥≥≥≥≥≥≥≥≥≥≥≥|≤≤≤≤≤≤≤≤≤≤≤≤≤ + ^ + +"# ); }