mutablelogic
diff --git a/‎README.md‎
Lines changed: 200 additions & 1 deletion b/‎README.md‎
Lines changed: 200 additions & 1 deletion
diff --git a/‎doc.go‎
Lines changed: 31 additions & 1 deletion b/‎doc.go‎
Lines changed: 31 additions & 1 deletion
diff --git a/‎pkg/ast/kind.go‎
Lines changed: 102 additions & 0 deletions b/‎pkg/ast/kind.go‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎pkg/ast/node.go‎
Lines changed: 54 additions & 0 deletions b/‎pkg/ast/node.go‎
Lines changed: 54 additions & 0 deletions
@@ -1,3 +1,202 @@
 # go-tokenizer
 
-General Tokenizer and Abstract Syntax Tree Generator
+A general-purpose tokenizer and Markdown parser with HTML rendering for Go.
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/mutablelogic/go-tokenizer.svg)](https://pkg.go.dev/github.com/mutablelogic/go-tokenizer)
+
+## Features
+
+- **Lexical Scanner**: Tokenizes text into identifiers, numbers, strings, operators, and punctuation
+- **Markdown Parser**: Converts Markdown text into an Abstract Syntax Tree (AST)
+- **HTML Renderer**: Renders Markdown AST to HTML with proper escaping
+- **Configurable**: Optional features like comment parsing, newline handling, and float parsing
+
+## Installation
+
+```bash
+go get github.com/mutablelogic/go-tokenizer
+```
+
+Requires Go 1.23 or later.
+
+## Quick Start
+
+### Tokenizing Text
+
+```go
+package main
+
+import (
+    "fmt"
+    "strings"
+    
+    "github.com/mutablelogic/go-tokenizer"
+)
+
+func main() {
+    scanner := tokenizer.NewScanner(strings.NewReader("hello world 123"), tokenizer.Pos{})
+    for {
+        tok := scanner.Next()
+        if tok.Kind == tokenizer.EOF {
+            break
+        }
+        fmt.Printf("%s: %q\n", tok.Kind, tok.Value)
+    }
+}
+```
+
+Output:
+
+```bash
+Ident: "hello"
+Space: " "
+Ident: "world"
+Space: " "
+NumberInteger: "123"
+```
+
+### Parsing Markdown
+
+```go
+package main
+
+import (
+    "fmt"
+    "strings"
+    
+    "github.com/mutablelogic/go-tokenizer"
+    "github.com/mutablelogic/go-tokenizer/pkg/markdown"
+    "github.com/mutablelogic/go-tokenizer/pkg/markdown/html"
+)
+
+func main() {
+    input := `# Hello World
+
+This is **bold** and _italic_ text.
+
+- Item 1
+- Item 2
+- Item 3
+`
+    doc := markdown.Parse(strings.NewReader(input), tokenizer.Pos{})
+    output := html.RenderString(doc)
+    fmt.Println(output)
+}
+```
+
+Output:
+
+```html
+<h1>Hello World</h1><p>This is <strong>bold</strong> and <em>italic</em> text.</p><ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>
+```
+
+## Packages
+
+### `tokenizer` (root package)
+
+The lexical scanner that breaks input text into tokens.
+
+**Token Types:**
+
+- `Ident` - Identifiers (hello, world)
+- `NumberInteger`, `NumberFloat`, `NumberHex`, `NumberOctal`, `NumberBinary` - Numbers
+- `String`, `QuotedString` - String literals
+- `Hash`, `Asterisk`, `Underscore`, `Backtick`, `Tilde` - Special characters
+- `Space`, `Newline` - Whitespace
+- `Comment` - Comments (when enabled)
+- And more...
+
+**Scanner Features:**
+
+```go
+// Enable features with bitwise OR
+scanner := tokenizer.NewScanner(r, pos, 
+    tokenizer.HashComment |      // # style comments
+    tokenizer.LineComment |      // // style comments  
+    tokenizer.BlockComment |     // /* */ style comments
+    tokenizer.NewlineToken |     // Emit newlines as separate tokens
+    tokenizer.UnderscoreToken |  // Emit underscores as separate tokens
+    tokenizer.NumberFloatToken,  // Parse floating point numbers
+)
+```
+
+### `pkg/ast`
+
+Defines the AST node types and tree traversal.
+
+```go
+// Node interface
+type Node interface {
+    Kind() Kind
+    Children() []Node
+}
+
+// Walk the AST
+ast.Walk(doc, func(node ast.Node, depth int) error {
+    fmt.Printf("%s%s\n", strings.Repeat("  ", depth), node.Kind())
+    return nil
+})
+```
+
+### `pkg/markdown`
+
+Parses Markdown text into an AST.
+
+**Supported Syntax:**
+
+- Headings: `# H1` through `###### H6`
+- Paragraphs: Text separated by blank lines
+- Emphasis: `_italic_` or `*italic*`
+- Strong: `__bold__` or `**bold**`
+- Strikethrough: `~~deleted~~`
+- Inline code: `` `code` ``
+- Code blocks: ` ```language ... ``` `
+- Links: `[text](url)` or `<url>`
+- Images: `![alt](url)`
+- Blockquotes: `> quoted text`
+- Unordered lists: `- item`, `* item`, or `+ item`
+- Ordered lists: `1. item` or `1) item`
+- Horizontal rules: `---`, `***`, or `___`
+
+### `pkg/markdown/html`
+
+Renders Markdown AST to HTML.
+
+```go
+// Render to string
+output := html.RenderString(doc)
+
+// Render to io.Writer with indentation
+renderer := html.NewRenderer(w).WithIndent(true)
+err := renderer.Render(doc)
+```
+
+**Features:**
+
+- Proper HTML escaping for XSS prevention
+- Optional indented output for readability
+- Language classes on code blocks: `<code class="language-go">`
+
+## AST Node Types
+
+| Kind | Description | HTML Output |
+|------|-------------|-------------|
+| `Document` | Root node | (container) |
+| `Paragraph` | Text block | `<p>...</p>` |
+| `Heading` | H1-H6 | `<h1>...</h1>` |
+| `Text` | Plain text | (escaped text) |
+| `Emphasis` | Italic | `<em>...</em>` |
+| `Strong` | Bold | `<strong>...</strong>` |
+| `Strikethrough` | Deleted | `<del>...</del>` |
+| `Code` | Inline code | `<code>...</code>` |
+| `CodeBlock` | Fenced code | `<pre><code>...</code></pre>` |
+| `Link` | Hyperlink | `<a href="...">...</a>` |
+| `Image` | Image | `<img src="..." alt="..."/>` |
+| `Blockquote` | Quote | `<blockquote>...</blockquote>` |
+| `List` | Ordered/Unordered | `<ol>...</ol>` or `<ul>...</ul>` |
+| `ListItem` | List item | `<li>...</li>` |
+| `HorizontalRule` | Divider | `<hr/>` |
+
+## License
+
+Apache 2.0 - see [LICENSE](LICENSE) for details.
@@ -1,4 +1,34 @@
 /*
-The `tokenizer` package implements a generic expression scanner for tokens
+Package tokenizer implements a generic lexical scanner for tokenizing text input.
+
+The tokenizer breaks input text into tokens such as identifiers, numbers, strings,
+operators, and punctuation. It supports various number formats (integer, float,
+hex, octal, binary) and can be configured with optional features like comment
+parsing and newline handling.
+
+# Basic Usage
+
+	scanner := tokenizer.NewScanner(strings.NewReader("hello world"), tokenizer.Pos{})
+	for {
+		tok := scanner.Next()
+		if tok.Kind == tokenizer.EOF {
+			break
+		}
+		fmt.Println(tok)
+	}
+
+# Features
+
+The scanner supports optional features that can be enabled:
+
+  - HashComment: Enable # style single-line comments
+  - LineComment: Enable // style single-line comments
+  - BlockComment: Enable block comments
+  - UnderscoreToken: Emit underscores as separate tokens (for markdown parsing)
+  - NewlineToken: Emit newlines as separate tokens instead of whitespace
+
+Features are combined using bitwise OR:
+
+	scanner := tokenizer.NewScanner(r, pos, tokenizer.HashComment|tokenizer.LineComment)
 */
 package tokenizer
@@ -0,0 +1,102 @@
+package ast
+
+///////////////////////////////////////////////////////////////////////////////
+// TYPES
+
+// Kind identifies the type of an AST node.
+// Each node in the syntax tree has a Kind that describes what it represents,
+// such as a document, paragraph, heading, or inline formatting like emphasis.
+type Kind uint
+
+///////////////////////////////////////////////////////////////////////////////
+// GLOBALS
+
+const (
+	Any Kind = iota
+	Attr
+	Block
+	BlockList
+	Ident
+	Label
+	List
+	Map
+	Ref
+	String
+
+	// Markdown kinds
+	Document
+	Paragraph
+	Heading
+	CodeBlock
+	Blockquote
+	ListItem
+	Text
+	Emphasis       // *italic* or _italic_
+	Strong         // **bold** or __bold__
+	Strikethrough  // ~~deleted~~
+	Code           // `code`
+	Link           // [text](url)
+	Image          // ![alt](url)
+	HorizontalRule // --- or *** or ___
+	LineBreak
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// STRINGIFY
+
+func (k Kind) String() string {
+	switch k {
+	case Any:
+		return "Any"
+	case Attr:
+		return "Attr"
+	case Block:
+		return "Block"
+	case BlockList:
+		return "BlockList"
+	case Ident:
+		return "Ident"
+	case Label:
+		return "Label"
+	case List:
+		return "List"
+	case Map:
+		return "Map"
+	case Ref:
+		return "Ref"
+	case String:
+		return "String"
+	case Document:
+		return "Document"
+	case Paragraph:
+		return "Paragraph"
+	case Heading:
+		return "Heading"
+	case CodeBlock:
+		return "CodeBlock"
+	case Blockquote:
+		return "Blockquote"
+	case ListItem:
+		return "ListItem"
+	case Text:
+		return "Text"
+	case Emphasis:
+		return "Emphasis"
+	case Strong:
+		return "Strong"
+	case Strikethrough:
+		return "Strikethrough"
+	case Code:
+		return "Code"
+	case Link:
+		return "Link"
+	case Image:
+		return "Image"
+	case HorizontalRule:
+		return "HorizontalRule"
+	case LineBreak:
+		return "LineBreak"
+	default:
+		return "[?? Invalid Kind value]"
+	}
+}
@@ -0,0 +1,54 @@
+// Package ast defines the abstract syntax tree node types used by parsers.
+// It provides a common Node interface that all AST nodes implement,
+// allowing uniform traversal of syntax trees.
+package ast
+
+///////////////////////////////////////////////////////////////////////////////
+// TYPES
+
+// Node is the interface implemented by all AST nodes.
+// It provides methods for inspecting the node's type and accessing child nodes.
+type Node interface {
+	// Kind returns the type of this node (e.g., Document, Paragraph, Text).
+	Kind() Kind
+
+	// Children returns the immediate child nodes of this node.
+	// Leaf nodes (like Text) return nil.
+	Children() []Node
+}
+
+// WalkFunc is the function signature for the callback used by Walk.
+// It receives the current node and its depth in the tree (0 for root).
+// Return an error to stop walking, or nil to continue.
+type WalkFunc func(node Node, depth int) error
+
+///////////////////////////////////////////////////////////////////////////////
+// PUBLIC FUNCTIONS
+
+// Walk traverses the AST starting from the given node, calling fn for each node
+// in depth-first pre-order (parent before children). The depth parameter indicates
+// how deep in the tree the current node is (0 for the root).
+// If fn returns an error, walking stops and that error is returned.
+func Walk(node Node, fn WalkFunc) error {
+	return walk(node, fn, 0)
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PRIVATE FUNCTIONS
+
+func walk(node Node, fn WalkFunc, depth int) error {
+	if node == nil {
+		return nil
+	}
+	// Visit this node
+	if err := fn(node, depth); err != nil {
+		return err
+	}
+	// Visit children
+	for _, child := range node.Children() {
+		if err := walk(child, fn, depth+1); err != nil {
+			return err
+		}
+	}
+	return nil
+}