From 4e92bf77bb3e17f2f7606282affac0e5f32cd1f4 Mon Sep 17 00:00:00 2001 From: sacOO7 Date: Thu, 5 Feb 2026 19:21:15 +0530 Subject: [PATCH 1/3] Added script to add sub-headings to generated markdown snippets --- .../transpileMdxToMarkdown.test.ts.snap | 3 + .../transpileMdxToMarkdown.test.ts | 149 ++++++++++++++++++ data/onPostBuild/transpileMdxToMarkdown.ts | 54 ++++++- 3 files changed, 205 insertions(+), 1 deletion(-) diff --git a/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap b/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap index e1fa016b40..ab773410fe 100644 --- a/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap +++ b/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap @@ -34,6 +34,9 @@ Use your-api-key and your-channel-name in your code. ## Code blocks + +#### Javascript + \`\`\`javascript const channel = realtime.channels.get('your-channel-name'); \`\`\` diff --git a/data/onPostBuild/transpileMdxToMarkdown.test.ts b/data/onPostBuild/transpileMdxToMarkdown.test.ts index 86fcdd759b..98e70660c5 100644 --- a/data/onPostBuild/transpileMdxToMarkdown.test.ts +++ b/data/onPostBuild/transpileMdxToMarkdown.test.ts @@ -11,6 +11,8 @@ import { convertRelativeUrls, replaceTemplateVariables, calculateOutputPath, + getLanguageDisplayName, + addLanguageSubheadingsToCodeBlocks, } from './transpileMdxToMarkdown'; import * as fs from 'fs'; import * as path from 'path'; @@ -622,4 +624,151 @@ Real prop: link: '/docs/presence'`; expect(output).toMatch(/public\/docs\/chat\/moderation\/direct\/bodyguard\.md$/); }); }); + + describe('getLanguageDisplayName', () => { + it('should capitalize simple language names', () => { + expect(getLanguageDisplayName('javascript')).toBe('Javascript'); + expect(getLanguageDisplayName('kotlin')).toBe('Kotlin'); + expect(getLanguageDisplayName('swift')).toBe('Swift'); + }); + + it('should handle underscore-separated variants', () => { + expect(getLanguageDisplayName('realtime_javascript')).toBe('Realtime Javascript'); + expect(getLanguageDisplayName('rest_python')).toBe('Rest Python'); + }); + + it('should handle empty string', () => { + expect(getLanguageDisplayName('')).toBe(''); + }); + + it('should handle single character', () => { + expect(getLanguageDisplayName('a')).toBe('A'); + }); + }); + + describe('addLanguageSubheadingsToCodeBlocks', () => { + it('should add subheadings to multiple code blocks within tags', () => { + const input = ` +\`\`\`javascript +const x = 1; +\`\`\` + +\`\`\`kotlin +val x = 1 +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Javascript'); + expect(output).toContain('#### Kotlin'); + expect(output).toContain('```javascript'); + expect(output).toContain('```kotlin'); + }); + + it('should add subheading to single code block within tags', () => { + const input = ` +\`\`\`javascript +const x = 1; +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Javascript'); + expect(output).toContain('```javascript'); + }); + + it('should handle realtime/rest SDK variants', () => { + const input = ` +\`\`\`realtime_javascript +const channel = realtime.channels.get('test'); +\`\`\` + +\`\`\`rest_javascript +const channel = rest.channels.get('test'); +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Realtime Javascript'); + expect(output).toContain('#### Rest Javascript'); + }); + + it('should handle jetpack and other special languages', () => { + const input = ` +\`\`\`kotlin +val x = 1 +\`\`\` + +\`\`\`jetpack +@Composable +fun MyComponent() {} +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Kotlin'); + expect(output).toContain('#### Jetpack'); + }); + + it('should not modify code blocks outside tags', () => { + const input = `\`\`\`javascript +const x = 1; +\`\`\` + +\`\`\`kotlin +val x = 1 +\`\`\``; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).not.toContain('####'); + expect(output).toBe(input); + }); + + it('should handle multiple blocks in content', () => { + const input = `First section: + +\`\`\`javascript +const a = 1; +\`\`\` + +\`\`\`python +a = 1 +\`\`\` + + +Second section: + +\`\`\`swift +let b = 2 +\`\`\` + +\`\`\`kotlin +val b = 2 +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Javascript'); + expect(output).toContain('#### Python'); + expect(output).toContain('#### Swift'); + expect(output).toContain('#### Kotlin'); + }); + + it('should preserve code block content', () => { + const input = ` +\`\`\`javascript +const channel = realtime.channels.get('{{RANDOM_CHANNEL_NAME}}'); +channel.subscribe((message) => { + console.log(message); +}); +\`\`\` + +\`\`\`python +channel = realtime.channels.get('channel-name') +def on_message(message): + print(message) +channel.subscribe(on_message) +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain("const channel = realtime.channels.get('{{RANDOM_CHANNEL_NAME}}');"); + expect(output).toContain("channel = realtime.channels.get('channel-name')"); + expect(output).toContain('console.log(message);'); + expect(output).toContain('print(message)'); + }); + }); }); diff --git a/data/onPostBuild/transpileMdxToMarkdown.ts b/data/onPostBuild/transpileMdxToMarkdown.ts index ffcacdc569..e7fb211157 100644 --- a/data/onPostBuild/transpileMdxToMarkdown.ts +++ b/data/onPostBuild/transpileMdxToMarkdown.ts @@ -5,6 +5,53 @@ import frontMatter from 'front-matter'; const REPORTER_PREFIX = 'onPostBuild:transpileMdxToMarkdown'; +/** + * Get the display name for a language identifier + * Capitalizes the first letter of each word (e.g. javascript -> Javascript) + * Handles underscore-separated variants (e.g., realtime_javascript -> Realtime Javascript, rest_javascript -> Rest Javascript) + */ +function getLanguageDisplayName(lang: string): string { + if (!lang) return ''; + // Split by underscore, capitalize each part, join with space + return lang + .split('_') + .map((part) => part.charAt(0).toUpperCase() + part.slice(1)) + .join(' '); +} + +/** + * Add language subheadings before each code block within tags + * This makes it easier for LLMs to identify which language each code snippet belongs to + */ +function addLanguageSubheadingsToCodeBlocks(content: string): string { + // Match blocks (case-insensitive for the tag) + const codeTagRegex = /([\s\S]*?)<\/Code>/gi; + + return content.replace(codeTagRegex, (match, innerContent: string) => { + // Find all code blocks within this tag + // Match ```language followed by code and closing ``` + const codeBlockRegex = /(```(\w+)\n[\s\S]*?```)/g; + + // Check if there are any code blocks + const codeBlocks = innerContent.match(codeBlockRegex); + if (!codeBlocks || codeBlocks.length === 0) { + // No code blocks - return as-is + return match; + } + + // Replace each code block with a subheading followed by the code block + const transformedContent = innerContent.replace(codeBlockRegex, (codeBlock, fullMatch, lang) => { + const displayName = getLanguageDisplayName(lang); + return `#### ${displayName}\n\n${codeBlock}`; + }); + + // Ensure proper newline after tag for markdown formatting + // Trim leading whitespace and add two newlines (blank line before heading) + const trimmedContent = transformedContent.trimStart(); + return `\n\n${trimmedContent}`; + }); +} + interface MdxNode { parent: { relativeDirectory: string; @@ -474,7 +521,10 @@ function transformMdxToMarkdown( // Stage 11: Replace template variables content = replaceTemplateVariables(content); - // Stage 12: Prepend title as markdown heading + // Stage 12: Add language subheadings to code blocks within tags + content = addLanguageSubheadingsToCodeBlocks(content); + + // Stage 13: Prepend title as markdown heading const finalContent = `# ${title}\n\n${intro ? `${intro}\n\n` : ''}${content}`; return { content: finalContent, title, intro }; @@ -601,4 +651,6 @@ export { replaceTemplateVariables, calculateOutputPath, transformMdxToMarkdown, + getLanguageDisplayName, + addLanguageSubheadingsToCodeBlocks, }; From 383a3a5fe02787d9b1d96ab1e2614e3b35c6b08b Mon Sep 17 00:00:00 2001 From: sacOO7 Date: Fri, 6 Feb 2026 14:10:30 +0530 Subject: [PATCH 2/3] Addressed markdown snippet headings review comments --- .../transpileMdxToMarkdown.test.ts | 108 ++++++++++-------- data/onPostBuild/transpileMdxToMarkdown.ts | 24 ++-- 2 files changed, 71 insertions(+), 61 deletions(-) diff --git a/data/onPostBuild/transpileMdxToMarkdown.test.ts b/data/onPostBuild/transpileMdxToMarkdown.test.ts index 98e70660c5..9ecbbbd55b 100644 --- a/data/onPostBuild/transpileMdxToMarkdown.test.ts +++ b/data/onPostBuild/transpileMdxToMarkdown.test.ts @@ -664,17 +664,6 @@ val x = 1 expect(output).toContain('```kotlin'); }); - it('should add subheading to single code block within tags', () => { - const input = ` -\`\`\`javascript -const x = 1; -\`\`\` -`; - const output = addLanguageSubheadingsToCodeBlocks(input); - expect(output).toContain('#### Javascript'); - expect(output).toContain('```javascript'); - }); - it('should handle realtime/rest SDK variants', () => { const input = ` \`\`\`realtime_javascript @@ -690,20 +679,27 @@ const channel = rest.channels.get('test'); expect(output).toContain('#### Rest Javascript'); }); - it('should handle jetpack and other special languages', () => { - const input = ` -\`\`\`kotlin -val x = 1 + it('should handle tags with attributes like fixed="true"', () => { + const input = ` +\`\`\`javascript +const x = 1; \`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Javascript'); + expect(output).toContain('```javascript'); + }); -\`\`\`jetpack -@Composable -fun MyComponent() {} + it('should handle code blocks without a language identifier', () => { + const input = ` +\`\`\` +const x = 1; \`\`\` `; const output = addLanguageSubheadingsToCodeBlocks(input); - expect(output).toContain('#### Kotlin'); - expect(output).toContain('#### Jetpack'); + // Code blocks without language should be returned as-is (no subheading added) + expect(output).not.toContain('####'); + expect(output).toContain('```\nconst x = 1;'); }); it('should not modify code blocks outside tags', () => { @@ -719,35 +715,6 @@ val x = 1 expect(output).toBe(input); }); - it('should handle multiple blocks in content', () => { - const input = `First section: - -\`\`\`javascript -const a = 1; -\`\`\` - -\`\`\`python -a = 1 -\`\`\` - - -Second section: - -\`\`\`swift -let b = 2 -\`\`\` - -\`\`\`kotlin -val b = 2 -\`\`\` -`; - const output = addLanguageSubheadingsToCodeBlocks(input); - expect(output).toContain('#### Javascript'); - expect(output).toContain('#### Python'); - expect(output).toContain('#### Swift'); - expect(output).toContain('#### Kotlin'); - }); - it('should preserve code block content', () => { const input = ` \`\`\`javascript @@ -770,5 +737,48 @@ channel.subscribe(on_message) expect(output).toContain('console.log(message);'); expect(output).toContain('print(message)'); }); + + it('should handle empty tags', () => { + const input = ``; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toBe(''); + }); + + it('should handle tags with only whitespace', () => { + const input = ` + +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toBe(input); + }); + + it('should handle code blocks with Windows-style line endings', () => { + const input = `\r\n\`\`\`javascript\r\nconst x = 1;\r\n\`\`\`\r\n`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Javascript'); + expect(output).toContain('```javascript'); + }); + + it('should handle language identifiers with hyphens', () => { + const input = ` +\`\`\`objective-c +NSLog(@"Hello"); +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Objective-c'); + expect(output).toContain('```objective-c'); + }); + + it('should handle language identifiers with special characters', () => { + const input = ` +\`\`\`shell-session +$ npm install +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Shell-session'); + expect(output).toContain('```shell-session'); + }); }); }); diff --git a/data/onPostBuild/transpileMdxToMarkdown.ts b/data/onPostBuild/transpileMdxToMarkdown.ts index e7fb211157..cd35c136fc 100644 --- a/data/onPostBuild/transpileMdxToMarkdown.ts +++ b/data/onPostBuild/transpileMdxToMarkdown.ts @@ -11,7 +11,9 @@ const REPORTER_PREFIX = 'onPostBuild:transpileMdxToMarkdown'; * Handles underscore-separated variants (e.g., realtime_javascript -> Realtime Javascript, rest_javascript -> Rest Javascript) */ function getLanguageDisplayName(lang: string): string { - if (!lang) return ''; + if (!lang) { + return ''; + } // Split by underscore, capitalize each part, join with space return lang .split('_') @@ -24,13 +26,16 @@ function getLanguageDisplayName(lang: string): string { * This makes it easier for LLMs to identify which language each code snippet belongs to */ function addLanguageSubheadingsToCodeBlocks(content: string): string { - // Match blocks (case-insensitive for the tag) - const codeTagRegex = /([\s\S]*?)<\/Code>/gi; + // Match blocks with optional attributes (case-insensitive for the tag) + // Handles both and etc. + const codeTagRegex = /]*>([\s\S]*?)<\/Code>/gi; return content.replace(codeTagRegex, (match, innerContent: string) => { // Find all code blocks within this tag // Match ```language followed by code and closing ``` - const codeBlockRegex = /(```(\w+)\n[\s\S]*?```)/g; + // Uses [^\n`]+ to capture language identifiers with hyphens, plus signs, dots (e.g., objective-c, c++, shell-session) + // Supports both Unix (\n) and Windows (\r\n) line endings + const codeBlockRegex = /```([^\n`]+)\r?\n[\s\S]*?```/g; // Check if there are any code blocks const codeBlocks = innerContent.match(codeBlockRegex); @@ -40,7 +45,7 @@ function addLanguageSubheadingsToCodeBlocks(content: string): string { } // Replace each code block with a subheading followed by the code block - const transformedContent = innerContent.replace(codeBlockRegex, (codeBlock, fullMatch, lang) => { + const transformedContent = innerContent.replace(codeBlockRegex, (codeBlock, lang) => { const displayName = getLanguageDisplayName(lang); return `#### ${displayName}\n\n${codeBlock}`; }); @@ -240,9 +245,7 @@ function removeImportExportStatements(content: string): string { * Remove script tags that are not inside code blocks */ function removeScriptTags(content: string): string { - return transformNonCodeBlocks(content, (text) => - text.replace(/]*>[\s\S]*?<\/script>/gi, ''), - ); + return transformNonCodeBlocks(content, (text) => text.replace(/]*>[\s\S]*?<\/script>/gi, '')); } /** @@ -266,9 +269,7 @@ function removeAnchorTags(content: string): string { * This makes hidden type definition tables visible in markdown output */ function stripHiddenFromTables(content: string): string { - return transformNonCodeBlocks(content, (text) => - text.replace(/(]*)\bhidden\b\s*/gi, '$1'), - ); + return transformNonCodeBlocks(content, (text) => text.replace(/(]*)\bhidden\b\s*/gi, '$1')); } /** @@ -322,7 +323,6 @@ function convertImagePathsToGitHub(content: string): string { * Preserves: Non-Ably /docs/ links, sdk.ably.com links (API docs), already .md links */ function convertDocsLinksToMarkdown(content: string): string { - // Allowed hostnames for docs link conversion (exact matches only) const ALLOWED_DOCS_HOSTNAMES = ['ably.com', 'www.ably.com', 'ably-dev.com', 'www.ably-dev.com']; From a26a35bb66c0e495471be09892ca99b33464f910 Mon Sep 17 00:00:00 2001 From: sacOO7 Date: Fri, 6 Feb 2026 19:27:13 +0530 Subject: [PATCH 3/3] Implemented proper sub-headings for code snippets, removed fencing --- .../transpileMdxToMarkdown.test.ts.snap | 4 +- .../transpileMdxToMarkdown.test.ts | 181 +++++++++++++++++- data/onPostBuild/transpileMdxToMarkdown.ts | 79 +++++--- 3 files changed, 232 insertions(+), 32 deletions(-) diff --git a/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap b/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap index ab773410fe..392b7cf05d 100644 --- a/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap +++ b/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap @@ -35,9 +35,9 @@ Use your-api-key and your-channel-name in your code. -#### Javascript +### Javascript -\`\`\`javascript +\`\`\` const channel = realtime.channels.get('your-channel-name'); \`\`\` diff --git a/data/onPostBuild/transpileMdxToMarkdown.test.ts b/data/onPostBuild/transpileMdxToMarkdown.test.ts index 9ecbbbd55b..4904852fad 100644 --- a/data/onPostBuild/transpileMdxToMarkdown.test.ts +++ b/data/onPostBuild/transpileMdxToMarkdown.test.ts @@ -12,6 +12,8 @@ import { replaceTemplateVariables, calculateOutputPath, getLanguageDisplayName, + findPrecedingHeadingLevel, + transformCodeBlocksWithSubheadings, addLanguageSubheadingsToCodeBlocks, } from './transpileMdxToMarkdown'; import * as fs from 'fs'; @@ -646,8 +648,82 @@ Real prop: link: '/docs/presence'`; }); }); + describe('findPrecedingHeadingLevel', () => { + it('should return 3 when no heading is found (so +1 gives h4 default)', () => { + const content = 'Some text without headings'; + expect(findPrecedingHeadingLevel(content, content.length)).toBe(3); + }); + + it('should find h1 heading level', () => { + const content = '# Main Title\n\nSome content'; + expect(findPrecedingHeadingLevel(content, content.length)).toBe(1); + }); + + it('should find h2 heading level', () => { + const content = '## Section\n\nSome content'; + expect(findPrecedingHeadingLevel(content, content.length)).toBe(2); + }); + + it('should find the nearest preceding heading', () => { + const content = '# Title\n\n## Section\n\n### Subsection\n\nContent here'; + expect(findPrecedingHeadingLevel(content, content.length)).toBe(3); + }); + + it('should only consider headings before the given position', () => { + const content = '## First\n\nContent\n\n### Second'; + const positionBeforeSecond = content.indexOf('### Second'); + expect(findPrecedingHeadingLevel(content, positionBeforeSecond)).toBe(2); + }); + + it('should handle h6 heading level', () => { + const content = '###### Deep heading\n\nContent'; + expect(findPrecedingHeadingLevel(content, content.length)).toBe(6); + }); + }); + + describe('transformCodeBlocksWithSubheadings', () => { + it('should transform code blocks with subheadings and remove language from fence', () => { + const input = ` +\`\`\`javascript +const x = 1; +\`\`\` +`; + const output = transformCodeBlocksWithSubheadings(input, '###'); + expect(output).toContain('### Javascript'); + expect(output).toContain('```\nconst x = 1;'); + expect(output).not.toContain('```javascript'); + }); + + it('should return null when no code blocks with language identifiers', () => { + const input = ` +\`\`\` +const x = 1; +\`\`\` +`; + const output = transformCodeBlocksWithSubheadings(input, '###'); + expect(output).toBeNull(); + }); + + it('should handle multiple code blocks', () => { + const input = ` +\`\`\`javascript +const x = 1; +\`\`\` + +\`\`\`python +x = 1 +\`\`\` +`; + const output = transformCodeBlocksWithSubheadings(input, '####'); + expect(output).toContain('#### Javascript'); + expect(output).toContain('#### Python'); + expect(output).not.toContain('```javascript'); + expect(output).not.toContain('```python'); + }); + }); + describe('addLanguageSubheadingsToCodeBlocks', () => { - it('should add subheadings to multiple code blocks within tags', () => { + it('should add subheadings to multiple code blocks within tags and remove language from fence', () => { const input = ` \`\`\`javascript const x = 1; @@ -660,8 +736,11 @@ val x = 1 const output = addLanguageSubheadingsToCodeBlocks(input); expect(output).toContain('#### Javascript'); expect(output).toContain('#### Kotlin'); - expect(output).toContain('```javascript'); - expect(output).toContain('```kotlin'); + // Language should be removed from fenced code blocks + expect(output).not.toContain('```javascript'); + expect(output).not.toContain('```kotlin'); + expect(output).toContain('```\nconst x = 1;'); + expect(output).toContain('```\nval x = 1'); }); it('should handle realtime/rest SDK variants', () => { @@ -677,6 +756,9 @@ const channel = rest.channels.get('test'); const output = addLanguageSubheadingsToCodeBlocks(input); expect(output).toContain('#### Realtime Javascript'); expect(output).toContain('#### Rest Javascript'); + // Language should be removed from fenced code blocks + expect(output).not.toContain('```realtime_javascript'); + expect(output).not.toContain('```rest_javascript'); }); it('should handle tags with attributes like fixed="true"', () => { @@ -687,7 +769,9 @@ const x = 1; `; const output = addLanguageSubheadingsToCodeBlocks(input); expect(output).toContain('#### Javascript'); - expect(output).toContain('```javascript'); + // Language should be removed from fenced code blocks + expect(output).not.toContain('```javascript'); + expect(output).toContain('```\nconst x = 1;'); }); it('should handle code blocks without a language identifier', () => { @@ -756,7 +840,8 @@ channel.subscribe(on_message) const input = `\r\n\`\`\`javascript\r\nconst x = 1;\r\n\`\`\`\r\n`; const output = addLanguageSubheadingsToCodeBlocks(input); expect(output).toContain('#### Javascript'); - expect(output).toContain('```javascript'); + // Language should be removed from fenced code blocks + expect(output).not.toContain('```javascript'); }); it('should handle language identifiers with hyphens', () => { @@ -767,7 +852,8 @@ NSLog(@"Hello"); `; const output = addLanguageSubheadingsToCodeBlocks(input); expect(output).toContain('#### Objective-c'); - expect(output).toContain('```objective-c'); + // Language should be removed from fenced code blocks + expect(output).not.toContain('```objective-c'); }); it('should handle language identifiers with special characters', () => { @@ -778,7 +864,88 @@ $ npm install `; const output = addLanguageSubheadingsToCodeBlocks(input); expect(output).toContain('#### Shell-session'); - expect(output).toContain('```shell-session'); + // Language should be removed from fenced code blocks + expect(output).not.toContain('```shell-session'); + }); + + it('should use h3 subheading when preceded by h2 heading', () => { + const input = `## Section Title + + +\`\`\`javascript +const x = 1; +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('### Javascript'); + expect(output).not.toContain('#### Javascript'); + }); + + it('should use h4 subheading when preceded by h3 heading', () => { + const input = `### Subsection Title + + +\`\`\`javascript +const x = 1; +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Javascript'); + }); + + it('should use h5 subheading when preceded by h4 heading', () => { + const input = `#### Deep Section + + +\`\`\`javascript +const x = 1; +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('##### Javascript'); + }); + + it('should use h7 when preceded by h6 heading (no cap for LLM consumption)', () => { + const input = `###### Deepest Section + + +\`\`\`javascript +const x = 1; +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('####### Javascript'); + }); + + it('should use h4 as default when no preceding heading', () => { + const input = ` +\`\`\`javascript +const x = 1; +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('#### Javascript'); + }); + + it('should handle multiple blocks with different preceding headings', () => { + const input = `## First Section + + +\`\`\`javascript +const a = 1; +\`\`\` + + +### Nested Section + + +\`\`\`python +b = 2 +\`\`\` +`; + const output = addLanguageSubheadingsToCodeBlocks(input); + expect(output).toContain('### Javascript'); + expect(output).toContain('#### Python'); }); }); }); diff --git a/data/onPostBuild/transpileMdxToMarkdown.ts b/data/onPostBuild/transpileMdxToMarkdown.ts index cd35c136fc..0a87243acd 100644 --- a/data/onPostBuild/transpileMdxToMarkdown.ts +++ b/data/onPostBuild/transpileMdxToMarkdown.ts @@ -22,38 +22,69 @@ function getLanguageDisplayName(lang: string): string { } /** - * Add language subheadings before each code block within tags - * This makes it easier for LLMs to identify which language each code snippet belongs to + * Find the heading level of the nearest preceding heading before a given position. + * Used to determine the appropriate subheading level for code block language labels. + */ +function findPrecedingHeadingLevel(content: string, position: number): number { + const contentBefore = content.substring(0, position); + const headingRegex = /^(#+)\s+/gm; + let lastHeadingLevel = 3; // Defaults to 3 when no heading is found + let match; + + while ((match = headingRegex.exec(contentBefore)) !== null) { + lastHeadingLevel = match[1].length; + } + + return lastHeadingLevel; +} + +/** + * Transform code blocks within a tag by adding language subheadings + * and removing language identifiers from fenced code blocks. + * Returns null if no code blocks with language identifiers are found. + */ +function transformCodeBlocksWithSubheadings(innerContent: string, headingPrefix: string): string | null { + // Match ```language followed by code and closing ``` + // Uses [^\n`]+ to capture language identifiers with hyphens, plus signs, dots (e.g., objective-c, c++, shell-session) + // Supports both Unix (\n) and Windows (\r\n) line endings + const codeBlockRegex = /```([^\n`]+)\r?\n([\s\S]*?)```/g; + + // Check if there are any code blocks with language identifiers + if (!innerContent.match(codeBlockRegex)) { + return null; + } + + // Replace each code block with a subheading followed by the code block (without language in fence) + return innerContent.replace(codeBlockRegex, (_codeBlock, lang, codeContent) => { + const displayName = getLanguageDisplayName(lang); + return `${headingPrefix} ${displayName}\n\n\`\`\`\n${codeContent}\`\`\``; + }); +} + +/** + * Add language subheadings before each code block within tags. + * This makes it easier for LLMs to identify which language each code snippet belongs to. + * - Removes language identifier from fenced code blocks (since subheading provides this info) + * - Dynamically determines heading level based on preceding heading context */ function addLanguageSubheadingsToCodeBlocks(content: string): string { // Match blocks with optional attributes (case-insensitive for the tag) // Handles both and etc. const codeTagRegex = /]*>([\s\S]*?)<\/Code>/gi; - return content.replace(codeTagRegex, (match, innerContent: string) => { - // Find all code blocks within this tag - // Match ```language followed by code and closing ``` - // Uses [^\n`]+ to capture language identifiers with hyphens, plus signs, dots (e.g., objective-c, c++, shell-session) - // Supports both Unix (\n) and Windows (\r\n) line endings - const codeBlockRegex = /```([^\n`]+)\r?\n[\s\S]*?```/g; - - // Check if there are any code blocks - const codeBlocks = innerContent.match(codeBlockRegex); - if (!codeBlocks || codeBlocks.length === 0) { - // No code blocks - return as-is - return match; - } + return content.replace(codeTagRegex, (fullMatch, innerContent: string, offset: number) => { + // Calculate the appropriate heading level based on preceding headings + const precedingLevel = findPrecedingHeadingLevel(content, offset); + const headingPrefix = '#'.repeat(precedingLevel + 1); - // Replace each code block with a subheading followed by the code block - const transformedContent = innerContent.replace(codeBlockRegex, (codeBlock, lang) => { - const displayName = getLanguageDisplayName(lang); - return `#### ${displayName}\n\n${codeBlock}`; - }); + // Transform code blocks with subheadings + const transformedContent = transformCodeBlocksWithSubheadings(innerContent, headingPrefix); + if (transformedContent === null) { + return fullMatch; // No code blocks with language - return unchanged + } // Ensure proper newline after tag for markdown formatting - // Trim leading whitespace and add two newlines (blank line before heading) - const trimmedContent = transformedContent.trimStart(); - return `\n\n${trimmedContent}`; + return `\n\n${transformedContent.trimStart()}`; }); } @@ -652,5 +683,7 @@ export { calculateOutputPath, transformMdxToMarkdown, getLanguageDisplayName, + findPrecedingHeadingLevel, + transformCodeBlocksWithSubheadings, addLanguageSubheadingsToCodeBlocks, };