diff --git a/src/ReverseMarkdown.Test/ConverterTests.EscapeMarkdownCharsInTextProperly.verified.md b/src/ReverseMarkdown.Test/ConverterTests.EscapeMarkdownCharsInTextProperly.verified.md index 7a227a8..72cdbf6 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.EscapeMarkdownCharsInTextProperly.verified.md +++ b/src/ReverseMarkdown.Test/ConverterTests.EscapeMarkdownCharsInTextProperly.verified.md @@ -1 +1 @@ -\[a-z\]\(\[0-9\]\)\{0,4\} \ No newline at end of file +[a-z]([0-9]){0,4} diff --git a/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsHtmlLinkWithDisallowedCharsInChildren_ThenEscapeTextInMarkdown.verified.md b/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsHtmlLinkWithDisallowedCharsInChildren_ThenEscapeTextInMarkdown.verified.md index 4d2805e..d3e7482 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsHtmlLinkWithDisallowedCharsInChildren_ThenEscapeTextInMarkdown.verified.md +++ b/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsHtmlLinkWithDisallowedCharsInChildren_ThenEscapeTextInMarkdown.verified.md @@ -1 +1 @@ -[this \\]\( might break things](http://example.com) \ No newline at end of file +[this \]( might break things](http://example.com) diff --git a/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsSingleAsteriskInText_ThenConvertToMarkdownEscapedAsterisk.verified.md b/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsSingleAsteriskInText_ThenConvertToMarkdownEscapedAsterisk.verified.md index adccbaf..e792ec7 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsSingleAsteriskInText_ThenConvertToMarkdownEscapedAsterisk.verified.md +++ b/src/ReverseMarkdown.Test/ConverterTests.WhenThereIsSingleAsteriskInText_ThenConvertToMarkdownEscapedAsterisk.verified.md @@ -1 +1 @@ -This is a sample\(\*\) paragraph \ No newline at end of file +This is a sample(\*) paragraph diff --git a/src/ReverseMarkdown.Test/ConverterTests.cs b/src/ReverseMarkdown.Test/ConverterTests.cs index 50a2ea3..9676e1e 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.cs +++ b/src/ReverseMarkdown.Test/ConverterTests.cs @@ -117,6 +117,31 @@ public void WhenEscapeMarkdownLineStartsEnabled_ThenEscapeHeadingAndListMarkers( Assert.Equal(@"1\. Point 1", converter.Convert("
1. Point 1
")); } + [Fact] + public void WhenTextContainsBracketsBracesAndParentheses_ThenDoNotEscapeThem() + { + const string html = "This is [a] test of the (reverse) {markdown} system."; + + var converter = new Converter(); + Assert.Equal(html, converter.Convert(html)); + + var commonMarkConverter = new Converter(new Config { CommonMark = true }); + Assert.Equal(html, commonMarkConverter.Convert(html)); + } + + [Fact] + public void WhenCommonMarkTextContainsMarkdownLinkPattern_ThenEscapeOnlyPatternDelimiters() + { + const string html = "This is [a] and [label](https://example.com/path) with {plain} braces."; + + var converter = new Converter(new Config { CommonMark = true }); + + Assert.Equal( + "This is [a] and \\[label\\]\\(https://example.com/path\\) with {plain} braces.", + converter.Convert(html) + ); + } + [Fact] public void WhenOutputLineEndingConfigured_ThenNormalizeOutputLineEndings() { diff --git a/src/ReverseMarkdown/Converters/Text.cs b/src/ReverseMarkdown/Converters/Text.cs index a31a9f4..5c2129c 100644 --- a/src/ReverseMarkdown/Converters/Text.cs +++ b/src/ReverseMarkdown/Converters/Text.cs @@ -1,6 +1,7 @@ -using System; +using System; using System.IO; using System.Linq; +using System.Text; using System.Text.RegularExpressions; using HtmlAgilityPack; using ReverseMarkdown.Helpers; @@ -26,15 +27,6 @@ public Text(Converter converter) : base(converter) [@"\_"] = "_", }; - private static readonly StringReplaceValues _specialMarkdownCharacters = new() { - ["["] = @"\[", - ["]"] = @"\]", - ["("] = @"\(", - [")"] = @"\)", - ["{"] = @"\{", - ["}"] = @"\}", - }; - private static readonly StringReplaceValues _preserveAngleBrackets = new() { ["<"] = "%3C", [">"] = "%3E", @@ -48,6 +40,15 @@ public Text(Converter converter) : base(converter) [GeneratedRegex(@"`.*?`")] private static partial Regex BackTicks(); + [GeneratedRegex(@"!?\[[^\]\r\n]*\]\([^\)\r\n]*\)")] + private static partial Regex CommonMarkInlineLinkOrImagePattern(); + + [GeneratedRegex(@"\[[^\]\r\n]+\]\[[^\]\r\n]*\]")] + private static partial Regex CommonMarkReferenceLinkPattern(); + + [GeneratedRegex(@"(?m)^ {0,3}\[[^\]\r\n]+\]:")] + private static partial Regex CommonMarkLinkDefinitionPattern(); + #endregion @@ -95,7 +96,7 @@ private void TreatText(TextWriter writer, HtmlNode node) rawText.Contains(" p.Value.Replace(_escapedKeyCharsReverse)); } - content = EscapeSpecialMarkdownCharacters(content); - if (isCommonMark) { + content = EscapeSpecialCommonMarkCharacters(content, node); content = content.Replace("`", "\\`"); } @@ -187,15 +187,162 @@ parent.Name is "p" or "#document" && } - private static string EscapeSpecialMarkdownCharacters(string content) + private const string AmpersandPlaceholder = "__REVERSEMARKDOWN_AMP__"; + private const string NbspPlaceholder = "__REVERSEMARKDOWN_NBSP__"; + + private static string EscapeSpecialCommonMarkCharacters(string content, HtmlNode node) + { + var escaped = EscapeSpecialCommonMarkCharacters(content); + return TryGetMarkedDelimiterSequence(node, out var delimiterMarks) + ? EscapeMarkedDelimiters(escaped, delimiterMarks) + : escaped; + } + + private static string EscapeSpecialCommonMarkCharacters(string content) { return content.StartsWith('`') && content.EndsWith('`') ? content - : content.Replace(_specialMarkdownCharacters); + : EscapeCommonMarkPatternDelimiters(content); } - private const string AmpersandPlaceholder = "__REVERSEMARKDOWN_AMP__"; - private const string NbspPlaceholder = "__REVERSEMARKDOWN_NBSP__"; + private static bool TryGetMarkedDelimiterSequence(HtmlNode node, out bool[] delimiterMarks) + { + delimiterMarks = Array.Empty