Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1 +1 @@
\[a-z\]\(\[0-9\]\)\{0,4\}
[a-z]([0-9]){0,4}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[this \\]\( might break things](http://example.com)
[this \]( might break things](http://example.com)
Original file line number Diff line number Diff line change
@@ -1 +1 @@
This is a sample\(\*\) paragraph
This is a sample(\*) paragraph
25 changes: 25 additions & 0 deletions src/ReverseMarkdown.Test/ConverterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,31 @@ public void WhenEscapeMarkdownLineStartsEnabled_ThenEscapeHeadingAndListMarkers(
Assert.Equal(@"1\. Point 1", converter.Convert("<p>1. Point 1</p>"));
}

[Fact]
public void WhenTextContainsBracketsBracesAndParentheses_ThenDoNotEscapeThem()
{
const string html = "This is [a] test of the (reverse) {markdown} system.";

var converter = new Converter();
Assert.Equal(html, converter.Convert(html));

var commonMarkConverter = new Converter(new Config { CommonMark = true });
Assert.Equal(html, commonMarkConverter.Convert(html));
}

[Fact]
public void WhenCommonMarkTextContainsMarkdownLinkPattern_ThenEscapeOnlyPatternDelimiters()
{
const string html = "This is [a] and [label](https://example.com/path) with {plain} braces.";

var converter = new Converter(new Config { CommonMark = true });

Assert.Equal(
"This is [a] and \\[label\\]\\(https://example.com/path\\) with {plain} braces.",
converter.Convert(html)
);
}

[Fact]
public void WhenOutputLineEndingConfigured_ThenNormalizeOutputLineEndings()
{
Expand Down
181 changes: 164 additions & 17 deletions src/ReverseMarkdown/Converters/Text.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
using ReverseMarkdown.Helpers;
Expand All @@ -26,15 +27,6 @@ public Text(Converter converter) : base(converter)
[@"\_"] = "_",
};

private static readonly StringReplaceValues _specialMarkdownCharacters = new() {
["["] = @"\[",
["]"] = @"\]",
["("] = @"\(",
[")"] = @"\)",
["{"] = @"\{",
["}"] = @"\}",
};

private static readonly StringReplaceValues _preserveAngleBrackets = new() {
["&lt;"] = "%3C",
["&gt;"] = "%3E",
Expand All @@ -48,6 +40,15 @@ public Text(Converter converter) : base(converter)
[GeneratedRegex(@"`.*?`")]
private static partial Regex BackTicks();

[GeneratedRegex(@"!?\[[^\]\r\n]*\]\([^\)\r\n]*\)")]
private static partial Regex CommonMarkInlineLinkOrImagePattern();

[GeneratedRegex(@"\[[^\]\r\n]+\]\[[^\]\r\n]*\]")]
private static partial Regex CommonMarkReferenceLinkPattern();

[GeneratedRegex(@"(?m)^ {0,3}\[[^\]\r\n]+\]:")]
private static partial Regex CommonMarkLinkDefinitionPattern();

#endregion


Expand Down Expand Up @@ -95,7 +96,7 @@ private void TreatText(TextWriter writer, HtmlNode node)
rawText.Contains("<![CDATA[", StringComparison.Ordinal) ||
rawText.Contains("</", StringComparison.Ordinal) ||
rawText.Contains("<!", StringComparison.Ordinal))) {
writer.Write(rawText);
writer.Write(EscapeSpecialCommonMarkCharacters(rawText));
return;
}
var text = isCommonMark
Expand Down Expand Up @@ -173,9 +174,8 @@ parent.Name is "p" or "#document" &&
content = BackTicks().Replace(content, p => p.Value.Replace(_escapedKeyCharsReverse));
}

content = EscapeSpecialMarkdownCharacters(content);

if (isCommonMark) {
content = EscapeSpecialCommonMarkCharacters(content, node);
content = content.Replace("`", "\\`");
}

Expand All @@ -187,15 +187,162 @@ parent.Name is "p" or "#document" &&
}


private static string EscapeSpecialMarkdownCharacters(string content)
private const string AmpersandPlaceholder = "__REVERSEMARKDOWN_AMP__";
private const string NbspPlaceholder = "__REVERSEMARKDOWN_NBSP__";

private static string EscapeSpecialCommonMarkCharacters(string content, HtmlNode node)
{
var escaped = EscapeSpecialCommonMarkCharacters(content);
return TryGetMarkedDelimiterSequence(node, out var delimiterMarks)
? EscapeMarkedDelimiters(escaped, delimiterMarks)
: escaped;
}

private static string EscapeSpecialCommonMarkCharacters(string content)
{
return content.StartsWith('`') && content.EndsWith('`')
? content
: content.Replace(_specialMarkdownCharacters);
: EscapeCommonMarkPatternDelimiters(content);
}

private const string AmpersandPlaceholder = "__REVERSEMARKDOWN_AMP__";
private const string NbspPlaceholder = "__REVERSEMARKDOWN_NBSP__";
private static bool TryGetMarkedDelimiterSequence(HtmlNode node, out bool[] delimiterMarks)
{
delimiterMarks = Array.Empty<bool>();

var parent = node.ParentNode;
if (parent == null || parent.ChildNodes.Count < 2) {
return false;
}

if (!parent.ChildNodes.Any(child => child.NodeType != HtmlNodeType.Text)) {
return false;
}

var parentText = new StringBuilder();
var nodeInnerText = node.InnerText;
var nodeStart = -1;

foreach (var child in parent.ChildNodes) {
if (child == node) {
nodeStart = parentText.Length;
}

parentText.Append(child.InnerText);
}

if (nodeStart < 0 || string.IsNullOrEmpty(nodeInnerText)) {
return false;
}

var combined = parentText.ToString();
var shouldEscape = new bool[combined.Length];
var hasDelimitersToEscape =
MarkCommonMarkPatternDelimiters(shouldEscape, combined, CommonMarkInlineLinkOrImagePattern()) |
MarkCommonMarkPatternDelimiters(shouldEscape, combined, CommonMarkReferenceLinkPattern()) |
MarkCommonMarkPatternDelimiters(shouldEscape, combined, CommonMarkLinkDefinitionPattern());

if (!hasDelimitersToEscape) {
return false;
}

var marks = new bool[nodeInnerText.Count(IsCommonMarkDelimiter)];
var delimiterIndex = 0;
var hasMarkedDelimiterInNode = false;
for (var i = 0; i < nodeInnerText.Length; i++) {
var currentChar = nodeInnerText[i];
if (!IsCommonMarkDelimiter(currentChar)) {
continue;
}

var marked = shouldEscape[nodeStart + i];
marks[delimiterIndex++] = marked;
hasMarkedDelimiterInNode |= marked;
}

if (!hasMarkedDelimiterInNode) {
return false;
}

delimiterMarks = marks;
return true;
}

private static string EscapeMarkedDelimiters(string content, bool[] delimiterMarks)
{
if (string.IsNullOrEmpty(content) || delimiterMarks.Length == 0) {
return content;
}

var escaped = new StringBuilder(content.Length);
var delimiterIndex = 0;
for (var i = 0; i < content.Length; i++) {
var currentChar = content[i];
if (!IsCommonMarkDelimiter(currentChar)) {
escaped.Append(currentChar);
continue;
}

var shouldEscape = delimiterIndex < delimiterMarks.Length && delimiterMarks[delimiterIndex];
delimiterIndex++;
if (shouldEscape && (i == 0 || content[i - 1] != '\\')) {
escaped.Append('\\');
}

escaped.Append(currentChar);
}

return escaped.ToString();
}

private static string EscapeCommonMarkPatternDelimiters(string content)
{
if (string.IsNullOrEmpty(content)) {
return content;
}

var shouldEscape = new bool[content.Length];
var hasDelimitersToEscape =
MarkCommonMarkPatternDelimiters(shouldEscape, content, CommonMarkInlineLinkOrImagePattern()) |
MarkCommonMarkPatternDelimiters(shouldEscape, content, CommonMarkReferenceLinkPattern()) |
MarkCommonMarkPatternDelimiters(shouldEscape, content, CommonMarkLinkDefinitionPattern());

if (!hasDelimitersToEscape) {
return content;
}

var escaped = new StringBuilder(content.Length);
for (var i = 0; i < content.Length; i++) {
if (shouldEscape[i] && (i == 0 || content[i - 1] != '\\')) {
escaped.Append('\\');
}

escaped.Append(content[i]);
}

return escaped.ToString();
}

private static bool MarkCommonMarkPatternDelimiters(bool[] shouldEscape, string content, Regex pattern)
{
var foundDelimiters = false;

foreach (Match match in pattern.Matches(content)) {
var end = match.Index + match.Length;
for (var i = match.Index; i < end; i++) {
if (IsCommonMarkDelimiter(content[i])) {
shouldEscape[i] = true;
foundDelimiters = true;
}
}
}

return foundDelimiters;
}

private static bool IsCommonMarkDelimiter(char character)
{
return character is '[' or ']' or '(' or ')' or '{' or '}';
}

private static string PreserveCommonMarkAmpersands(string rawContent)
{
Expand Down
Loading