From 01026f1a7a2f2f7a8a65763181ddfe13bc70281a Mon Sep 17 00:00:00 2001 From: mysticmnd Date: Tue, 31 Mar 2026 06:04:22 +0530 Subject: [PATCH 1/2] fix: avoid over-escaping plain text delimiters Keep literal parentheses and braces in normal text output while limiting CommonMark escaping to square brackets to avoid accidental link parsing. --- ...apeMarkdownCharsInTextProperly.verified.md | 2 +- ...ldren_ThenEscapeTextInMarkdown.verified.md | 2 +- ...nvertToMarkdownEscapedAsterisk.verified.md | 2 +- src/ReverseMarkdown.Test/ConverterTests.cs | 12 ++++++++++++ src/ReverseMarkdown/Converters/Text.cs | 19 +++++++------------ 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/ReverseMarkdown.Test/ConverterTests.EscapeMarkdownCharsInTextProperly.verified.md b/src/ReverseMarkdown.Test/ConverterTests.EscapeMarkdownCharsInTextProperly.verified.md index 7a227a8..72cdbf6 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.EscapeMarkdownCharsInTextProperly.verified.md +++ b/src/ReverseMarkdown.Test/ConverterTests.EscapeMarkdownCharsInTextProperly.verified.md @@ -1 +1 @@ -\[a-z\]\(\[0-9\]\)\{0,4\} \ No newline at end of file +[a-z]([0-9]){0,4} diff --git a/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsHtmlLinkWithDisallowedCharsInChildren_ThenEscapeTextInMarkdown.verified.md b/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsHtmlLinkWithDisallowedCharsInChildren_ThenEscapeTextInMarkdown.verified.md index 4d2805e..d3e7482 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsHtmlLinkWithDisallowedCharsInChildren_ThenEscapeTextInMarkdown.verified.md +++ b/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsHtmlLinkWithDisallowedCharsInChildren_ThenEscapeTextInMarkdown.verified.md @@ -1 +1 @@ -[this \\]\( might break things](http://example.com) \ No newline at end of file +[this \]( might break things](http://example.com) diff --git a/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsSingleAsteriskInText_ThenConvertToMarkdownEscapedAsterisk.verified.md b/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsSingleAsteriskInText_ThenConvertToMarkdownEscapedAsterisk.verified.md index adccbaf..e792ec7 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsSingleAsteriskInText_ThenConvertToMarkdownEscapedAsterisk.verified.md +++ b/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsSingleAsteriskInText_ThenConvertToMarkdownEscapedAsterisk.verified.md @@ -1 +1 @@ -This is a sample\(\*\) paragraph \ No newline at end of file +This is a sample(\*) paragraph diff --git a/src/ReverseMarkdown.Test/ConverterTests.cs b/src/ReverseMarkdown.Test/ConverterTests.cs index 50a2ea3..100cfbf 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.cs +++ b/src/ReverseMarkdown.Test/ConverterTests.cs @@ -117,6 +117,18 @@ public void WhenEscapeMarkdownLineStartsEnabled_ThenEscapeHeadingAndListMarkers( Assert.Equal(@"1\. Point 1", converter.Convert("

1. Point 1

")); } + [Fact] + public void WhenTextContainsBracketsBracesAndParentheses_ThenDoNotEscapeThem() + { + const string html = "This is [a] test of the (reverse) {markdown} system."; + + var converter = new Converter(); + Assert.Equal(html, converter.Convert(html)); + + var commonMarkConverter = new Converter(new Config { CommonMark = true }); + Assert.Equal("This is \\[a\\] test of the (reverse) {markdown} system.", commonMarkConverter.Convert(html)); + } + [Fact] public void WhenOutputLineEndingConfigured_ThenNormalizeOutputLineEndings() { diff --git a/src/ReverseMarkdown/Converters/Text.cs b/src/ReverseMarkdown/Converters/Text.cs index a31a9f4..9679f42 100644 --- a/src/ReverseMarkdown/Converters/Text.cs +++ b/src/ReverseMarkdown/Converters/Text.cs @@ -26,13 +26,9 @@ public Text(Converter converter) : base(converter) [@"\_"] = "_", }; - private static readonly StringReplaceValues _specialMarkdownCharacters = new() { + private static readonly StringReplaceValues _commonMarkSpecialMarkdownCharacters = new() { ["["] = @"\[", ["]"] = @"\]", - ["("] = @"\(", - [")"] = @"\)", - ["{"] = @"\{", - ["}"] = @"\}", }; private static readonly StringReplaceValues _preserveAngleBrackets = new() { @@ -173,9 +169,8 @@ parent.Name is "p" or "#document" && content = BackTicks().Replace(content, p => p.Value.Replace(_escapedKeyCharsReverse)); } - content = EscapeSpecialMarkdownCharacters(content); - if (isCommonMark) { + content = EscapeSpecialCommonMarkCharacters(content); content = content.Replace("`", "\\`"); } @@ -187,16 +182,16 @@ parent.Name is "p" or "#document" && } - private static string EscapeSpecialMarkdownCharacters(string content) + private const string AmpersandPlaceholder = "__REVERSEMARKDOWN_AMP__"; + private const string NbspPlaceholder = "__REVERSEMARKDOWN_NBSP__"; + + private static string EscapeSpecialCommonMarkCharacters(string content) { return content.StartsWith('`') && content.EndsWith('`') ? content - : content.Replace(_specialMarkdownCharacters); + : content.Replace(_commonMarkSpecialMarkdownCharacters); } - private const string AmpersandPlaceholder = "__REVERSEMARKDOWN_AMP__"; - private const string NbspPlaceholder = "__REVERSEMARKDOWN_NBSP__"; - private static string PreserveCommonMarkAmpersands(string rawContent) { if (string.IsNullOrEmpty(rawContent)) { From 5c3e363135cf145a184dba510320e692df72a997 Mon Sep 17 00:00:00 2001 From: mysticmnd Date: Tue, 31 Mar 2026 06:39:37 +0530 Subject: [PATCH 2/2] fix(commonmark): escape markdown delimiters only in markdown-like patterns --- src/ReverseMarkdown.Test/ConverterTests.cs | 15 +- src/ReverseMarkdown/Converters/Text.cs | 170 +++++++++++++++++++-- 2 files changed, 175 insertions(+), 10 deletions(-) diff --git a/src/ReverseMarkdown.Test/ConverterTests.cs b/src/ReverseMarkdown.Test/ConverterTests.cs index 100cfbf..9676e1e 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.cs +++ b/src/ReverseMarkdown.Test/ConverterTests.cs @@ -126,7 +126,20 @@ public void WhenTextContainsBracketsBracesAndParentheses_ThenDoNotEscapeThem() Assert.Equal(html, converter.Convert(html)); var commonMarkConverter = new Converter(new Config { CommonMark = true }); - Assert.Equal("This is \\[a\\] test of the (reverse) {markdown} system.", commonMarkConverter.Convert(html)); + Assert.Equal(html, commonMarkConverter.Convert(html)); + } + + [Fact] + public void WhenCommonMarkTextContainsMarkdownLinkPattern_ThenEscapeOnlyPatternDelimiters() + { + const string html = "This is [a] and [label](https://example.com/path) with {plain} braces."; + + var converter = new Converter(new Config { CommonMark = true }); + + Assert.Equal( + "This is [a] and \\[label\\]\\(https://example.com/path\\) with {plain} braces.", + converter.Convert(html) + ); } [Fact] diff --git a/src/ReverseMarkdown/Converters/Text.cs b/src/ReverseMarkdown/Converters/Text.cs index 9679f42..5c2129c 100644 --- a/src/ReverseMarkdown/Converters/Text.cs +++ b/src/ReverseMarkdown/Converters/Text.cs @@ -1,6 +1,7 @@ -using System; +using System; using System.IO; using System.Linq; +using System.Text; using System.Text.RegularExpressions; using HtmlAgilityPack; using ReverseMarkdown.Helpers; @@ -26,11 +27,6 @@ public Text(Converter converter) : base(converter) [@"\_"] = "_", }; - private static readonly StringReplaceValues _commonMarkSpecialMarkdownCharacters = new() { - ["["] = @"\[", - ["]"] = @"\]", - }; - private static readonly StringReplaceValues _preserveAngleBrackets = new() { ["<"] = "%3C", [">"] = "%3E", @@ -44,6 +40,15 @@ public Text(Converter converter) : base(converter) [GeneratedRegex(@"`.*?`")] private static partial Regex BackTicks(); + [GeneratedRegex(@"!?\[[^\]\r\n]*\]\([^\)\r\n]*\)")] + private static partial Regex CommonMarkInlineLinkOrImagePattern(); + + [GeneratedRegex(@"\[[^\]\r\n]+\]\[[^\]\r\n]*\]")] + private static partial Regex CommonMarkReferenceLinkPattern(); + + [GeneratedRegex(@"(?m)^ {0,3}\[[^\]\r\n]+\]:")] + private static partial Regex CommonMarkLinkDefinitionPattern(); + #endregion @@ -91,7 +96,7 @@ private void TreatText(TextWriter writer, HtmlNode node) rawText.Contains("(); + + var parent = node.ParentNode; + if (parent == null || parent.ChildNodes.Count < 2) { + return false; + } + + if (!parent.ChildNodes.Any(child => child.NodeType != HtmlNodeType.Text)) { + return false; + } + + var parentText = new StringBuilder(); + var nodeInnerText = node.InnerText; + var nodeStart = -1; + + foreach (var child in parent.ChildNodes) { + if (child == node) { + nodeStart = parentText.Length; + } + + parentText.Append(child.InnerText); + } + + if (nodeStart < 0 || string.IsNullOrEmpty(nodeInnerText)) { + return false; + } + + var combined = parentText.ToString(); + var shouldEscape = new bool[combined.Length]; + var hasDelimitersToEscape = + MarkCommonMarkPatternDelimiters(shouldEscape, combined, CommonMarkInlineLinkOrImagePattern()) | + MarkCommonMarkPatternDelimiters(shouldEscape, combined, CommonMarkReferenceLinkPattern()) | + MarkCommonMarkPatternDelimiters(shouldEscape, combined, CommonMarkLinkDefinitionPattern()); + + if (!hasDelimitersToEscape) { + return false; + } + + var marks = new bool[nodeInnerText.Count(IsCommonMarkDelimiter)]; + var delimiterIndex = 0; + var hasMarkedDelimiterInNode = false; + for (var i = 0; i < nodeInnerText.Length; i++) { + var currentChar = nodeInnerText[i]; + if (!IsCommonMarkDelimiter(currentChar)) { + continue; + } + + var marked = shouldEscape[nodeStart + i]; + marks[delimiterIndex++] = marked; + hasMarkedDelimiterInNode |= marked; + } + + if (!hasMarkedDelimiterInNode) { + return false; + } + + delimiterMarks = marks; + return true; + } + + private static string EscapeMarkedDelimiters(string content, bool[] delimiterMarks) + { + if (string.IsNullOrEmpty(content) || delimiterMarks.Length == 0) { + return content; + } + + var escaped = new StringBuilder(content.Length); + var delimiterIndex = 0; + for (var i = 0; i < content.Length; i++) { + var currentChar = content[i]; + if (!IsCommonMarkDelimiter(currentChar)) { + escaped.Append(currentChar); + continue; + } + + var shouldEscape = delimiterIndex < delimiterMarks.Length && delimiterMarks[delimiterIndex]; + delimiterIndex++; + if (shouldEscape && (i == 0 || content[i - 1] != '\\')) { + escaped.Append('\\'); + } + + escaped.Append(currentChar); + } + + return escaped.ToString(); + } + + private static string EscapeCommonMarkPatternDelimiters(string content) + { + if (string.IsNullOrEmpty(content)) { + return content; + } + + var shouldEscape = new bool[content.Length]; + var hasDelimitersToEscape = + MarkCommonMarkPatternDelimiters(shouldEscape, content, CommonMarkInlineLinkOrImagePattern()) | + MarkCommonMarkPatternDelimiters(shouldEscape, content, CommonMarkReferenceLinkPattern()) | + MarkCommonMarkPatternDelimiters(shouldEscape, content, CommonMarkLinkDefinitionPattern()); + + if (!hasDelimitersToEscape) { + return content; + } + + var escaped = new StringBuilder(content.Length); + for (var i = 0; i < content.Length; i++) { + if (shouldEscape[i] && (i == 0 || content[i - 1] != '\\')) { + escaped.Append('\\'); + } + + escaped.Append(content[i]); + } + + return escaped.ToString(); + } + + private static bool MarkCommonMarkPatternDelimiters(bool[] shouldEscape, string content, Regex pattern) + { + var foundDelimiters = false; + + foreach (Match match in pattern.Matches(content)) { + var end = match.Index + match.Length; + for (var i = match.Index; i < end; i++) { + if (IsCommonMarkDelimiter(content[i])) { + shouldEscape[i] = true; + foundDelimiters = true; + } + } + } + + return foundDelimiters; + } + + private static bool IsCommonMarkDelimiter(char character) + { + return character is '[' or ']' or '(' or ')' or '{' or '}'; } private static string PreserveCommonMarkAmpersands(string rawContent)