diff --git a/next.config.ts b/next.config.ts index 3294967c1a..a1277d13ce 100644 --- a/next.config.ts +++ b/next.config.ts @@ -1,7 +1,7 @@ import type { NextConfig } from "next"; const nextConfig = { - serverExternalPackages: ["@tailwindcss/node"], + serverExternalPackages: ["@tailwindcss/node", "markdown-to-jsx"], pageExtensions: ["js", "jsx", "ts", "tsx", "mdx"], outputFileTracingIncludes: { "/**/*": ["./src/docs/*.mdx"], diff --git a/package.json b/package.json index 4e0fed5fba..bd190ff655 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "fathom-client": "^3.7.2", "feed": "^5.1.0", "framer-motion": "^12.20.0", + "markdown-to-jsx": "^9.4.1", "motion": "^12.20.0", "next": "15.3.8", "open-graph-scraper-lite": "^2.1.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 61ce9010d6..b41015cffd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -41,6 +41,9 @@ importers: framer-motion: specifier: ^12.20.0 version: 12.20.0(react-dom@19.2.3(react@19.2.3))(react@19.2.3) + markdown-to-jsx: + specifier: ^9.4.1 + version: 9.4.1(react@19.2.3) motion: specifier: ^12.20.0 version: 12.20.0(react-dom@19.2.3(react@19.2.3))(react@19.2.3) @@ -1725,6 +1728,24 @@ packages: magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} + markdown-to-jsx@9.4.1: + resolution: {integrity: sha512-K7UCtk+t+4emmngIWviRgdc7zb8chVJbMMkYrNxXb+pX3pCuwIbIx+L2Kx4HJDhkA3ZQHRy7VBaaNoz3ekeCjw==} + engines: {node: '>= 18'} + peerDependencies: + react: '>= 16.0.0' + react-native: '*' + solid-js: '>=1.0.0' + vue: '>=3.0.0' + peerDependenciesMeta: + react: + optional: true + react-native: + optional: true + solid-js: + optional: true + vue: + optional: true + mdast-util-from-markdown@2.0.2: resolution: {integrity: sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==} @@ -4030,6 +4051,10 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 + markdown-to-jsx@9.4.1(react@19.2.3): + optionalDependencies: + react: 19.2.3 + mdast-util-from-markdown@2.0.2: dependencies: '@types/mdast': 4.0.4 diff --git a/src/app/api/llms-txt/ast-extract.ts b/src/app/api/llms-txt/ast-extract.ts new file mode 100644 index 0000000000..47577bfd15 --- /dev/null +++ b/src/app/api/llms-txt/ast-extract.ts @@ -0,0 +1,425 @@ +import { parser, RuleType } from "markdown-to-jsx"; +import { astToMarkdown } from "markdown-to-jsx/markdown"; + +const MEDIA_ELEMENTS = ["img", "svg", "video", "iframe", "picture", "source", "audio", "canvas", "embed", "object"]; + +export function extractTextFromMDX(mdxContent: string): string { + try { + if (!mdxContent) return ""; + + let content = mdxContent; + + let title = extractExport("title", content); + let description = extractExport("description", content); + + content = removeImportsAndExports(content); + + let ast; + try { + ast = parser(content); + } catch (error) { + console.error("Error parsing markdown:", error); + return ""; + } + let renderRule = ( + next: () => string, + node: any, + renderChildren: (children: any[]) => string, + state: any, + ): string => { + if (node.type === RuleType.htmlSelfClosing || node.type === RuleType.htmlBlock) { + if (node.tag && MEDIA_ELEMENTS.includes(node.tag.toLowerCase())) { + return ""; + } + } + + if (node.type === RuleType.text) { + let text = node.text?.trim(); + if (text === "{" || text === "}") { + return ""; + } + } + + if (node.type === RuleType.htmlComment) { + return ""; + } + + if ( + (node.type === RuleType.htmlBlock || node.type === RuleType.htmlSelfClosing) && + node.tag && + /^[A-Z]/.test(node.tag) + ) { + let tag = node.tag; + + if (tag === "ApiTable") { + return extractApiTableText(node); + } + + if (tag === "ResponsiveDesign") { + return extractResponsiveDesignText(node); + } + + if (tag === "CustomizingYourTheme") { + return extractCustomizingYourThemeText(node); + } + + if (tag === "TargetingSpecificStates") { + return extractTargetingSpecificStatesText(node); + } + + if (["Figure", "Example"].includes(tag)) { + if (node.children && node.children.length > 0) { + let result = ""; + for (let child of node.children) { + if (child.type === RuleType.codeBlock) { + result += cleanCodeBlock(child, renderChildren, state); + result += "\n\n"; + } else if (child.tag && ["CodeExampleStack", "CodeExampleWrapper"].includes(child.tag)) { + result += extractComponentTextContent(child, renderChildren, state); + result += "\n\n"; + } + } + return result.trim(); + } + return ""; + } + + if (["TipInfo", "TipGood", "TipBad"].includes(tag)) { + let prefix = tag === "TipInfo" ? "INFO: " : tag === "TipGood" ? "DO: " : "DON'T: "; + let text = extractComponentTextContent(node, renderChildren, state); + return text ? `\n${prefix}${text.trim()}\n` : ""; + } + + if (["CodeExampleWrapper", "CodeExampleStack"].includes(tag)) { + return extractComponentTextContent(node, renderChildren, state); + } + + return ""; + } + + if (node.type === RuleType.codeBlock) { + return cleanCodeBlock(node, renderChildren, state); + } + + if (node.type === RuleType.htmlBlock || node.type === RuleType.htmlSelfClosing) { + if (node.tag && /^[a-z]/.test(node.tag)) { + if (node.tag === "details" || node.tag === "summary") { + if (node.children && node.children.length > 0) { + return renderChildren(node.children); + } + return ""; + } + + if (node.tag === "table" || node.tag === "thead" || node.tag === "tbody") { + if (node.children && node.children.length > 0) { + return renderChildren(node.children); + } + return ""; + } + + if (node.tag === "tr") { + let text = (node.rawText || node.text || "") + .replace(/<[^>]+>/g, "") + .replace(/\s+/g, " ") + .trim(); + return text ? text + "\n" : ""; + } + + if (node.tag === "th" || node.tag === "td") { + let text = (node.rawText || node.text || "").replace(/<[^>]+>/g, "").trim(); + return text; + } + + let hasCodeBlock = false; + if (node.children && node.children.length > 0) { + for (let child of node.children) { + if (child.type === RuleType.codeBlock) { + hasCodeBlock = true; + break; + } + } + } + if (hasCodeBlock) { + return next(); + } + return ""; + } + } + + if (node.type === RuleType.paragraph) { + if (node.children && node.children.length > 0) { + let allText = node.children + .filter((c: any) => c.type === RuleType.text) + .map((c: any) => c.text || "") + .join("") + .trim(); + if (allText.match(/^\{\/\*.*\*\/\}$/)) { + return ""; + } + + if (node.children.length === 1) { + let firstChild = node.children[0]; + + if (firstChild.type === RuleType.text) { + let text = firstChild.text?.trim(); + if (text === "{" || text === "}") { + return ""; + } + } + + if ( + (firstChild.type === RuleType.htmlSelfClosing || firstChild.type === RuleType.htmlBlock) && + firstChild.tag && + MEDIA_ELEMENTS.includes(firstChild.tag.toLowerCase()) + ) { + return ""; + } + + if ( + (firstChild.type === RuleType.htmlBlock || firstChild.type === RuleType.htmlSelfClosing) && + firstChild.tag && + ["TipInfo", "TipGood", "TipBad"].includes(firstChild.tag) + ) { + let prefix = firstChild.tag === "TipInfo" ? "INFO: " : firstChild.tag === "TipGood" ? "DO: " : "DON'T: "; + let text = extractComponentTextContent(firstChild, renderChildren, state); + return text ? `\n${prefix}${text.trim()}\n` : ""; + } + } + } + + if (node.children && node.children.length > 1) { + let transformedChildren: string[] = []; + + for (let child of node.children) { + if ( + (child.type === RuleType.htmlSelfClosing || child.type === RuleType.htmlBlock) && + child.tag && + MEDIA_ELEMENTS.includes(child.tag.toLowerCase()) + ) { + continue; + } + + if ( + (child.type === RuleType.htmlBlock || child.type === RuleType.htmlSelfClosing) && + child.tag && + ["TipInfo", "TipGood", "TipBad"].includes(child.tag) + ) { + let prefix = child.tag === "TipInfo" ? "INFO: " : child.tag === "TipGood" ? "DO: " : "DON'T: "; + let text = extractComponentTextContent(child, renderChildren, state); + if (text) { + transformedChildren.push(`\n${prefix}${text.trim()}\n`); + } + continue; + } + + transformedChildren.push(renderChildren([child])); + } + + if (transformedChildren.length === 0) { + return ""; + } + + return transformedChildren.join(""); + } + } + + return next(); + }; + + let cleanMarkdown = astToMarkdown(ast, { renderRule }); + cleanMarkdown = cleanWhitespace(cleanMarkdown); + + let result = ""; + if (title) { + result += `# ${title}\n\n`; + } + if (description) { + result += `${description}\n\n`; + } + if (title || description) { + result += "---\n\n"; + } + + result += cleanMarkdown; + + return result.trim(); + } catch (error) { + console.error("Fatal error in extractTextFromMDX:", error); + console.error("Error stack:", error instanceof Error ? error.stack : String(error)); + return ""; + } +} + +function extractExport(name: string, content: string): string | null { + let match = content.match(new RegExp(`export\\s+const\\s+${name}\\s*=\\s*["']([^"']+)["']`, "s")); + if (match) return match[1]; + + match = content.match(new RegExp(`export\\s+const\\s+${name}\\s*=\\s*\`([^\`]+)\``, "s")); + if (match) return match[1]; + + return null; +} + +function removeImportsAndExports(content: string): string { + content = content.replace(/^import\s+.*?from\s+["'][^"']+["'];?\s*$/gm, ""); + content = content.replace(/^export\s+const\s+(title|description)\s*=\s*["']([^"']+)["'];?\s*$/gm, ""); + content = content.replace(/^export\s+const\s+(title|description)\s*=\s*`([^`]+)`;?\s*$/gm, ""); + content = content.replace(/export\s+const\s+(title|description)\s*=\s*["']([^"']+)["'];?/g, ""); + content = content.replace(/export\s+const\s+(title|description)\s*=\s*`([^`]+)`;?/g, ""); + return content; +} + +function extractApiTableText(node: any): string { + let attrs = node.attrs || {}; + let rows = attrs.rows; + + // If still a string (single quotes), convert to valid JSON and parse + if (typeof rows === "string") { + try { + let jsonStr = rows.trim(); + if (jsonStr.startsWith("{") && jsonStr.endsWith("}")) { + jsonStr = jsonStr.slice(1, -1).trim(); + } + jsonStr = jsonStr.replace(/'/g, '"').replace(/,(\s*[\]}])/g, "$1"); + rows = JSON.parse(jsonStr); + } catch (e) { + return ""; + } + } + + if (Array.isArray(rows)) { + let extracted = rows + .map((row: any) => { + if (Array.isArray(row)) { + return row.map((cell: any) => String(cell || "")).join(" | "); + } + return ""; + }) + .filter((row: string) => row.trim()) + .join("\n"); + + return extracted ? `\n${extracted}\n` : ""; + } + + return ""; +} + +function extractResponsiveDesignText(node: any): string { + let attrs = node.attrs || {}; + let property = attrs.property || "utility"; + let featuredClass = attrs.featuredClass || ""; + let breakpoint = attrs.breakpoint || "md"; + + if (property && featuredClass) { + return `\n\nPrefix a ${property} utility with a breakpoint variant like ${breakpoint}: to only apply the utility at ${breakpoint} screen sizes and above. Use ${breakpoint}:${featuredClass} to apply ${featuredClass} at the ${breakpoint} breakpoint and above.\n\n`; + } + return "\n\nUse responsive variants to apply utilities at specific breakpoints.\n\n"; +} + +function extractCustomizingYourThemeText(node: any): string { + let attrs = node.attrs || {}; + let utility = attrs.utility || "utility"; + return `\n\nUse the --${utility}-* theme variables to customize the ${utility} utilities in your project.\n\n`; +} + +function extractTargetingSpecificStatesText(node: any): string { + let attrs = node.attrs || {}; + let property = attrs.property || "utility"; + let variant = attrs.variant || "hover"; + return `\n\nPrefix a ${property} utility with a variant like ${variant}: to only apply the utility in that state.\n\n`; +} + +function extractComponentTextContent(node: any, renderChildren: (children: any[]) => string, state: any): string { + if ((node.verbatim || node.noInnerParse) && (node.rawText || node.text)) { + let text = node.rawText || node.text; + text = text + .replace(/\{<>/g, "") + .replace(/<\/>\}/g, "") + .replace(/<>/g, "") + .replace(/<\/>/g, "") + .replace(/<[^>]+>/g, "") + .replace(/\{|\}/g, "") + .replace(/\s+/g, " ") + .trim(); + return text ? `${text}` : ""; + } + + if (node.children && node.children.length > 0) { + let allText = node.children + .filter((c: any) => c.type === RuleType.text) + .map((c: any) => c.text || "") + .join(""); + + if (allText) { + let text = allText + .replace(/\{<>/g, "") + .replace(/<\/>\}/g, "") + .replace(/<>/g, "") + .replace(/<\/>/g, "") + .replace(/\{|\}/g, "") + .replace(/<[^>]+>/g, "") + .replace(/\s+/g, " ") + .trim(); + return text ? `${text}` : ""; + } + + let rendered = renderChildren(node.children); + let text = rendered + .replace(/<>/g, "") + .replace(/<\/>/g, "") + .replace(/<[^>]+>/g, "") + .replace(/\s+/g, " ") + .trim(); + return text ? `${text}` : ""; + } + return ""; +} + +function cleanCodeBlock(node: any, renderChildren: (children: any[]) => string, state: any): string { + let lang = node.lang || ""; + let code = node.text || ""; + + let lines = code.split("\n"); + let filteredLines: string[] = []; + for (let i = 0; i < lines.length; i++) { + let line = lines[i]; + let trimmed = line.trim(); + + let removeMatch = trimmed.match(/\[\!code\s+--(?::(\d+))?\]/); + if (removeMatch) { + let count = parseInt(removeMatch[1] || "1", 10); + i += count - 1; + continue; + } + + line = line.replace(//g, ""); + line = line.replace(/\/\*\s*\[!code[^\]]+\]\s*\*\//g, ""); + line = line.replace(/#\s*\[!code[^\]]+\]/g, ""); + line = line.replace(/\/\/\s*\[!code[^\]]+\]/g, ""); + line = line.replace(/\[\!code[^\]]+\]/g, ""); + + line = line.replace(//g, ""); + line = line.replace(/\/\*\s*prettier-ignore\s*\*\//g, ""); + line = line.replace(/#\s*prettier-ignore/g, ""); + line = line.replace(/\/\/\s*prettier-ignore/g, ""); + + if (line.trim().length === 0 && trimmed.match(/\[\!code|prettier-ignore/)) { + continue; + } + + filteredLines.push(line); + } + + code = filteredLines.join("\n").trim(); + + return `\`\`\`${lang}\n${code}\n\`\`\``; +} + +function cleanWhitespace(content: string): string { + content = content.replace(/\n{3,}/g, "\n\n"); + content = content + .split("\n") + .map((line) => line.trimEnd()) + .join("\n"); + return content.trim(); +} diff --git a/src/app/api/llms-txt/extract-text.test.ts b/src/app/api/llms-txt/extract-text.test.ts new file mode 100644 index 0000000000..4a990f1c6f --- /dev/null +++ b/src/app/api/llms-txt/extract-text.test.ts @@ -0,0 +1,947 @@ +// Note: can run these tests with `bun test` or `node --test` + +import { describe, test } from "node:test"; +import assert from "node:assert"; +import { extractTextFromMDX } from "./ast-extract"; +import dedent from "dedent"; + +describe("extractTextFromMDX", () => { + describe("extracting title and description", () => { + test("extracts title and description from exports", (t) => { + let input = dedent` + export const title = "Test Title"; + export const description = "Test description"; + + ## Content + Some content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("Test Title")); + assert.ok(result.includes("Test description")); + assert.ok(result.includes("## Content")); + assert.ok(result.includes("Some content here")); + }); + + test("handles missing title and description", (t) => { + let input = dedent` + ## Content + Some content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("## Content")); + assert.ok(result.includes("Some content here")); + }); + }); + + describe("extracting markdown headings", () => { + test("preserves heading hierarchy", (t) => { + let input = dedent` + ## Heading 2 + Content under h2 + + ### Heading 3 + Content under h3 + + #### Heading 4 + Content under h4 + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("## Heading 2")); + assert.ok(result.includes("### Heading 3")); + assert.ok(result.includes("#### Heading 4")); + }); + + test("preserves heading text with special characters", (t) => { + let input = dedent` + ## Heading with \`code\` and **bold** + Content here + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("## Heading")); + }); + }); + + describe("extracting paragraph text", () => { + test("extracts paragraph text", (t) => { + let input = dedent` + ## Title + + This is a paragraph with some text. + + This is another paragraph. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("This is a paragraph with some text")); + assert.ok(result.includes("This is another paragraph")); + }); + + test("preserves line breaks in paragraphs", (t) => { + let input = dedent` + This is a paragraph + that spans multiple lines. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("This is a paragraph")); + assert.ok(result.includes("that spans multiple lines")); + }); + }); + + describe("extracting code blocks", () => { + test("preserves code block content with language hint", (t) => { + let input = dedent` + ## Code Example + + \`\`\`html +
Hello
+ \`\`\` + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("```html")); + assert.ok(result.includes('
Hello
')); + }); + + test("removes code example directives from code blocks", (t) => { + let input = dedent` + \`\`\`html + +
Hello
+ \`\`\` + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("[!code")); + assert.ok(result.includes('
Hello
')); + }); + + test("preserves multiple code blocks", (t) => { + let input = dedent` + \`\`\`html +
HTML
+ \`\`\` + + \`\`\`css + .test { color: red; } + \`\`\` + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("```html")); + assert.ok(result.includes("
HTML
")); + assert.ok(result.includes("```css")); + assert.ok(result.includes(".test { color: red; }")); + }); + }); + + describe("stripping import statements", () => { + test("removes import statements", (t) => { + let input = dedent` + import { Example } from "@/components/example"; + import dedent from "dedent"; + + ## Content + Some content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("import")); + assert.ok(result.includes("## Content")); + assert.ok(result.includes("Some content here")); + }); + + test("removes various import formats", (t) => { + let input = dedent` + import React from "react"; + import type { Component } from "./types"; + import { Example, Figure } from "@/components"; + + ## Content + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("import")); + assert.ok(result.includes("## Content")); + }); + }); + + describe("stripping JSX/React component syntax", () => { + test("removes JSX component tags", (t) => { + let input = dedent` + ## Title + + +
Content
+
+ + More content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("")); + assert.ok(!result.includes("")); + assert.ok(result.includes("More content here")); + }); + + test("removes component props", (t) => { + let input = dedent` +
+ + Content + +
+ `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("hint=")); + assert.ok(!result.includes("padding=")); + }); + + test("skips Example components (visual demonstrations)", (t) => { + let input = dedent` + Some text before. + + +
This is demo content
+

More demo here

+
+ + Some text after. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("Some text before")); + assert.ok(result.includes("Some text after")); + assert.ok(!result.includes("This is demo content")); + assert.ok(!result.includes("More demo here")); + }); + }); + + describe("handling ApiTable components", () => { + test("extracts table data as text", (t) => { + let input = dedent` + + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("inline")); + assert.ok(result.includes("display: inline")); + assert.ok(result.includes("block")); + assert.ok(result.includes("display: block")); + }); + + test("handles ApiTable with multiple rows", (t) => { + let input = dedent` + ", "z-index: ;"], + ["z-auto", "z-index: auto;"], + ["z-[]", "z-index: ;"], + ]} + /> + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("z-")); + assert.ok(result.includes("z-index: ")); + assert.ok(result.includes("z-auto")); + assert.ok(result.includes("z-index: auto")); + }); + }); + + describe("handling links", () => { + test("converts markdown links to plain text", (t) => { + let input = dedent` + Check out the [display docs](/docs/display) for more info. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("display docs")); + assert.ok(result.includes("/docs/display")); + }); + + test("handles links in headings", (t) => { + let input = dedent` + ## See [this page](/docs/page) + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("this page")); + assert.ok(result.includes("/docs/page")); + }); + }); + + describe("handling code example directives", () => { + test("removes [!code ...] directives from code", (t) => { + let input = dedent` + \`\`\`html + +
Hello
+ + \`\`\` + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("[!code")); + assert.ok(result.includes('
Hello
')); + }); + + test("removes [!code filename:...] directives", (t) => { + let input = dedent` + \`\`\`html + +
Content
+ \`\`\` + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("[!code filename")); + assert.ok(result.includes("
Content
")); + }); + }); + + describe("edge cases", () => { + test("handles empty file", (t) => { + let result = extractTextFromMDX(""); + assert.strictEqual(typeof result, "string"); + }); + + test("handles file with only exports", (t) => { + let input = dedent` + export const title = "Title"; + export const description = "Description"; + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("Title")); + assert.ok(result.includes("Description")); + }); + + test("handles special characters", (t) => { + let input = dedent` + ## Title with "quotes" and 'apostrophes' + Content with and & entities. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("Title")); + }); + + test("handles nested components", (t) => { + let input = dedent` +
+ +
Nested content
+
+
+ `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("
")); + assert.ok(!result.includes("")); + }); + }); + + describe("removing export statements", () => { + test("removes export const title and description", (t) => { + let input = dedent` + export const title = "Test Title"; + export const description = "Test description"; + + ## Content + Some content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("export const title")); + assert.ok(!result.includes("export const description")); + assert.ok(result.includes("## Content")); + assert.ok(result.includes("Some content here")); + }); + + test("removes export statements with template literals", (t) => { + let input = dedent` + export const title = \`Test Title\`; + export const description = \`Test description\`; + + ## Content + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("export const title")); + assert.ok(!result.includes("export const description")); + }); + }); + + describe("handling content components", () => { + test("extracts text from ResponsiveDesign component", (t) => { + let input = dedent` + ### Responsive design + + + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("### Responsive design")); + assert.ok(result.includes("Prefix a color utility")); + assert.ok(result.includes("breakpoint variant like md:")); + assert.ok(result.includes("md:text-green-600")); + assert.ok(!result.includes(" { + let input = dedent` + ## Customizing your theme + + + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("## Customizing your theme")); + assert.ok(result.includes("Use the --spacing-* theme variables")); + assert.ok(!result.includes(" { + let input = dedent` + ### Targeting specific states + + + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("### Targeting specific states")); + assert.ok(result.includes("Prefix a background-color utility")); + assert.ok(result.includes("variant like hover:")); + assert.ok(!result.includes(" { + let input = dedent` + + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("Use responsive variants")); + assert.ok(!result.includes(" { + test("removes HTML blocks that are not in code blocks", (t) => { + let input = dedent` + ## Example + +
+

Basic Tee

+

$35

+
+ + More content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("## Example")); + assert.ok(result.includes("More content here")); + assert.ok(!result.includes("
")); + assert.ok(!result.includes("

Basic Tee

")); + }); + + test("preserves HTML in code blocks", (t) => { + let input = dedent` + ## Example + + \`\`\`html +
+

Basic Tee

+
+ \`\`\` + + More content. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("```html")); + assert.ok(result.includes("
")); + assert.ok(result.includes("

Basic Tee

")); + assert.ok(result.includes("More content")); + }); + + test("removes nested HTML structures", (t) => { + let input = dedent` +
+
+

Text

+
+
+ + Content after HTML. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("Content after HTML")); + assert.ok(!result.includes("
")); + assert.ok(!result.includes("

Text

")); + }); + + test("removes HTML with empty elements", (t) => { + let input = dedent` +
+ +

Content

+
+ `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes("
")); + assert.ok(!result.includes("")); + assert.ok(!result.includes("

Content

")); + }); + }); + + describe("handling JSX expressions with nested braces", () => { + test("extracts table headers from JSX expression", (t) => { + let input = dedent` + ## Breakpoints + + { + + + + + + + +
BreakpointWidth
+ } + + More content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("Breakpoint")); + assert.ok(result.includes("Width")); + assert.ok(result.includes("More content here")); + assert.ok(!result.includes("{")); + assert.ok(!result.includes("}")); + }); + + test("handles JSX expressions with code blocks inside", (t) => { + let input = dedent` + { +
+ \`\`\`html +
Code example
+ \`\`\` +
+ } + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("```html") || result.includes("Code example")); + }); + }); + + describe("real-world examples", () => { + test("handles responsive-design.mdx with JSX table expression", (t) => { + let input = dedent` + export const title = "Responsive design"; + export const description = "Using responsive utility variants."; + + ## Overview + + There are five breakpoints: + + { + + + + + + + + + + + + + +
Breakpoint prefixMinimum width
sm40rem (640px)
+ } + + ### Customizing your theme + + More content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("Responsive design")); + assert.ok(result.includes("Using responsive utility variants")); + assert.ok(result.includes("## Overview")); + assert.ok(result.includes("Breakpoint prefix")); + assert.ok(result.includes("Minimum width")); + assert.ok(result.includes("### Customizing your theme")); + assert.ok(result.includes("More content here")); + assert.ok(!result.includes("export const")); + assert.ok(!result.includes("{")); + assert.ok(!result.includes("}")); + }); + + test("handles display.mdx structure", (t) => { + let input = dedent` + import dedent from "dedent"; + import { ApiTable } from "@/components/api-table.tsx"; + import { Example } from "@/components/example.tsx"; + import { Figure } from "@/components/figure.tsx"; + + export const title = "display"; + export const description = "Utilities for controlling the display box type of an element."; + + + + ## Examples + + ### Block and Inline + + Use the \`inline\`, \`inline-block\`, and \`block\` utilities: + +
+ + {
Example content
} +
+ + \`\`\`html + +
Content
+ \`\`\` +
+ `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("display")); + assert.ok(result.includes("Utilities for controlling the display box type")); + assert.ok(result.includes("## Examples")); + assert.ok(result.includes("### Block and Inline")); + assert.ok(result.includes("Use the")); + assert.ok(result.includes("```html")); + assert.ok(result.includes('
Content
')); + assert.ok(!result.includes("import")); + assert.ok(!result.includes("")); + assert.ok(!result.includes("[!code")); + }); + }); + + describe("handling media elements", () => { + test("removes img tags completely", (t) => { + let input = dedent` + ## Editor setup + + Some content before. + + Photo description + + More content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("## Editor setup")); + assert.ok(result.includes("Some content before")); + assert.ok(result.includes("More content here")); + assert.ok(!result.includes(" { + let input = dedent` + Built-in support for Tailwind CSS in Zed + `; + + let result = extractTextFromMDX(input); + assert.ok(!result.includes(" { + let input = dedent` + ## Icons + + Here's an icon: + + + + + + More content here. + `; + + let result = extractTextFromMDX(input); + assert.ok(result.includes("## Icons")); + assert.ok(result.includes("More content here")); + assert.ok(!result.includes(" { + let input = dedent` + ## Demo + +