Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions BetterMD/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
import logging
from .elements import A, H1, H2, H3, H4, H5, H6, Head, OL, UL, LI, Text, Div, P, Span, Img, B, I, Br, Blockquote, Hr, Table, Tr, Td, Th, THead, TBody, Input, Code
from .html import CustomHTML
from .markdown import CustomMarkdown
from .rst import CustomRst
from .elements import *
from .parse import Collection, HTMLParser, MDParser, RSTParser

def from_html(html:'str'):
"""
Converts an HTML string into a Symbol object.

Args:
html: A string containing HTML content to convert.

Returns:
A Symbol object representing the parsed HTML.
"""
return Symbol.from_html(html)

def enable_debug_mode():
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger("BetterMD")
def from_md(md:'str'):
"""
Converts a Markdown string into a Symbol.

Args:
md: A Markdown-formatted string.

Returns:
The Symbol object generated from the Markdown input.
"""
return Symbol.from_md(md)
118 changes: 106 additions & 12 deletions BetterMD/elements/a.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,117 @@
from BetterMD.rst.custom_rst import CustomRst
from .symbol import Symbol
from ..rst import CustomRst
from ..markdown import CustomMarkdown
from ..html import CustomHTML
import re
import typing as t

class MD(CustomMarkdown['A']):
def to_md(self, inner, symbol, parent, **kwargs):
return f"[{" ".join([e.to_md(**kwargs) for e in inner])}]({symbol.get_prop("href")})"
if t.TYPE_CHECKING:
from ..parse import Collection

class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent):
"""
Convert a list of elements into a Markdown hyperlink.

This function concatenates the Markdown representation of each element in the
`inner` list (using the element’s own `to_md` method) with a space separator,
and wraps the result in Markdown link syntax. The URL is obtained from the
`href` property of the given symbol via its `get_prop` method.

Args:
inner: A list of elements that provide a Markdown representation.
symbol: An object with a "href" property accessed through `get_prop`.
parent: A parent element (unused in this conversion).

Returns:
A string formatted as a Markdown hyperlink.
"""
return f"[{" ".join([e.to_md() for e in inner])}]({symbol.get_prop("href")})"

def verify(self, text:'str'):
"""
Checks if the input text contains any Markdown hyperlink patterns.

This function inspects the provided text for various Markdown link formats:
inline links (e.g., [text](url)), automatic links (e.g., <url>), and reference links
(e.g., [text][ref] with an associated [ref]: url declaration). It returns True if
any valid link pattern is detected; otherwise, it returns False.
"""
if re.findall("\[([^\]]+)\]\((https?:\/\/[^\s)]+)\)", text):
# Case 1: Inline link
return True

elif re.findall("<(https?:\/\/[^\s>]+)>", text):
# Case 2: Automatic Links
return True

elif re.findall("\[([^\]]+)\]\[([^\]]+)\]\s*\n?\[([^\]]+)\]:\s*(https?:\/\/[^\s]+)", text):
# Case 3: Reference Links
return True

return False

class HTML(CustomHTML['A']):
def to_html(self, inner, symbol, parent, **kwargs):
return f"<a href={symbol.get_prop('href')}>{" ".join([e.to_html(**kwargs) for e in inner])}</a>"

class RST(CustomRst['A']):
def to_rst(self, inner, symbol, parent, **kwargs):
return f"`{' '.join([e.to_rst(**kwargs) for e in inner])} <{symbol.get_prop('href')}>`_"
def to_rst(self, inner, symbol, parent):
"""
Converts inner elements into an RST hyperlink.

Joins the reStructuredText representations of the inner elements using a space and
formats them as an RST hyperlink with the URL obtained from the symbol's "href" property.
The parent parameter is not used in the conversion.

Parameters:
inner: A list of objects that implement a to_rst() method, representing the link text.
symbol: An object providing hyperlink properties, where the URL is retrieved via get_prop('href').
parent: An unused parameter for interface consistency.

Returns:
A string formatted as an RST hyperlink.
"""
return f"`{' '.join([e.to_rst() for e in inner])} <{symbol.get_prop('href')}>`_"

class A(Symbol):
prop_list = ["href"]

refs = {}
md = MD()
html = HTML()
rst = RST()
html = "a"
rst = RST()

@classmethod
def md_refs(cls, references: 'list[str]' = None):
"""
Process Markdown references.

This placeholder class method accepts an optional list of Markdown reference
strings for future processing. Currently, no operations are performed.

Args:
references (list[str], optional): A list of Markdown reference strings.
"""
pass

@classmethod
def rst_refs(cls, references: 'list[str]' = None):
"""
Processes reStructuredText references.

This placeholder method is intended for future implementation of RST reference
handling. If provided, the list of reference strings may later be validated,
transformed, or registered. The method currently performs no operations.

Parameters:
references (list[str], optional): A list of RST reference strings. Defaults to None.
"""
pass

@classmethod
def html_refs(cls, references: 'list[str]' = None):
"""
Processes HTML references from a list of reference strings.

This class method is a placeholder for HTML reference processing. If provided, the
'references' parameter should be an optional list of HTML reference strings to be
handled. Currently, the method does not perform any processing.
"""
pass
117 changes: 104 additions & 13 deletions BetterMD/elements/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,125 @@
from .text import Text
from ..markdown import CustomMarkdown
from ..html import CustomHTML
from ..rst import CustomRst

class MD(CustomMarkdown['Code']):
def to_md(self, inner, symbol, parent, **kwargs):
language = symbol.get_prop("language", "")
class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent):
"""
Converts content into Markdown code formatting.

If the input is a Text instance, it is first converted using its own Markdown method. When a language is specified in the symbol or the content is multiline, the method formats the content as a code block with triple backticks and a language identifier. Otherwise, it returns the content wrapped in single backticks for inline code.

Parameters:
inner: The content to convert, which may be a raw string or a Text object.
symbol: An object from which the programming language is retrieved.
parent: A placeholder parameter for the parent element (currently unused).

content = " ".join([e.to_md(**kwargs) for e in inner])
Returns:
A string containing the Markdown formatted code.
"""
language = symbol.get_prop("language", "")
if isinstance(inner, Text):
inner = inner.to_md()

# If it's a code block (has language or multiline)
if language or "\n" in inner:
return f"```{language}\n{content}\n```\n"
return f"```{language}\n{inner}\n```\n"

# Inline code
return f"`{content}`"
return f"`{inner}`"

class HTML(CustomHTML):
def to_html(self, inner, symbol, parent, **kwargs):
language = symbol.get_prop("language", "")
def to_html(self, inner, symbol, parent):
"""
Generate HTML markup for a code block.

content = " ".join([e.to_html(**kwargs) for e in inner])
Converts a list of elements to their HTML representations by joining the results
of each item’s `to_html()` method with newline characters, and wraps the result in an
HTML <code> tag. If the symbol specifies a programming language via its 'language'
property, a corresponding language-specific class is added to the tag.

Parameters:
inner: A list of objects with a `to_html()` method.
symbol: An object with properties (including an optional 'language') used for formatting.
parent: A placeholder for potential hierarchical context (unused).

Returns:
A string containing the HTML markup for the code.
"""
language = symbol.get_prop("language", "")
inner = "\n".join([i.to_html() for i in inner])

if language:
return f'<pre><code class="language-{language}">{content}</code></pre>'
return f'<code class="language-{language}">{inner}</code>'

return f"<code>{inner}</code>"

def verify(self, text: str) -> bool:
"""
Checks if the provided text equals "code", ignoring case.

Args:
text: The string to verify.

return f"<code>{content}</code>"
Returns:
bool: True if text equals "code" (case-insensitive), otherwise False.
"""
return text.lower() == "code"

class RST(CustomRst):
def to_rst(self, inner, symbol, parent):
"""
Convert inner content to reStructuredText code format.

Transforms the provided content into its RST representation based on the language
specified in the symbol and whether the content spans multiple lines. When a language
is indicated or the content contains newlines, the function formats the text as a
code block using the appropriate directive and indentation. Otherwise, it returns the
content as inline code, escaping backticks if present.

Parameters:
inner: Content to be converted, which may be a list of items (each supporting to_rst())
or a single item.
symbol: An object used to retrieve properties (e.g., the programming language) for
formatting purposes.
parent: Unused parameter included for interface consistency.

Returns:
str: The reStructuredText formatted representation of the code.
"""
language = symbol.get_prop("language", "")

# Handle inner content
if isinstance(inner, list):
content = "".join([
i.to_rst() if isinstance(i, Symbol) else str(i)
for i in inner
])
else:
content = inner.to_rst() if isinstance(inner, Symbol) else str(inner)

# If it's a code block (has language or multiline)
if language or "\n" in content:
# Use code-block directive for language-specific blocks
if language:
# Indent the content by 3 spaces (RST requirement)
indented_content = "\n".join(f" {line}" for line in content.strip().split("\n"))
return f".. code-block:: {language}\n\n{indented_content}\n\n"

# Use simple literal block for language-less blocks
# Indent the content by 3 spaces (RST requirement)
indented_content = "\n".join(f" {line}" for line in content.strip().split("\n"))
return f"::\n\n{indented_content}\n\n"

# Inline code
# Escape backticks if they exist in content
if "`" in content:
return f"``{content}``"
return f"`{content}`"

class Code(Symbol):
prop_list = ["language"]
html = HTML()
md = MD()
rst = "``"
rst = RST()
nl = True
38 changes: 17 additions & 21 deletions BetterMD/elements/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,28 @@
from ..markdown import CustomMarkdown
from ..rst import CustomRst

class HTML(CustomHTML):
def to_html(self, inner, symbol, parent, **kwargs):
# Collect all input attributes
attrs = []
for prop in Input.props:
value = symbol.get_prop(prop)
if value:
# Handle boolean attributes like 'required', 'disabled', etc.
if isinstance(value, bool) and value:
attrs.append(prop)
else:
attrs.append(f'{prop}="{value}"')

attrs_str = " ".join(attrs)
return f"<input {attrs_str} />"

class MD(CustomMarkdown):
def to_md(self, inner, symbol, parent, **kwargs):
def to_md(self, inner, symbol, parent):
"""
Converts an input symbol to its Markdown representation.

If the symbol is of type "checkbox", returns a Markdown list item that displays the
checkbox's status ("x" if checked, a space if not) followed by the inner content’s Markdown.
For other types, returns the symbol’s HTML representation.
"""
if symbol.get_prop("type") == "checkbox":
return f"- [{'x' if symbol.get_prop('checked', '') else ''}] {inner.to_md()}"
return f"- [{'x' if symbol.get_prop('checked', '') else ' '}] {inner.to_md()}"
return symbol.to_html()

class RST(CustomRst):
def to_rst(self, inner, symbol, parent, **kwargs):
def to_rst(self, inner, symbol, parent):
"""
Generate a reStructuredText representation of an input symbol.

If the symbol's type is "checkbox", returns a formatted checkbox with an "x" when checked or a space when not, optionally followed by inner content rendered in RST. For other input types, returns an empty string.
"""
if symbol.get_prop("type") == "checkbox":
return f"[ ] {inner.to_rst() if inner else ''}"
return f"[{'x' if symbol.get_prop('checked', '') else ' '}] {inner.to_rst() if inner else ''}"
return "" # Most input types don't have RST equivalents

class Input(Symbol):
Expand All @@ -50,6 +46,6 @@ class Input(Symbol):
"multiple",
"step"
]
html = HTML()
html = "input"
md = MD()
rst = RST()
Loading