diff --git a/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap b/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap
index e1fa016b40..392b7cf05d 100644
--- a/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap
+++ b/data/onPostBuild/__snapshots__/transpileMdxToMarkdown.test.ts.snap
@@ -34,7 +34,10 @@ Use your-api-key and your-channel-name in your code.
## Code blocks
-\`\`\`javascript
+
+### Javascript
+
+\`\`\`
const channel = realtime.channels.get('your-channel-name');
\`\`\`
diff --git a/data/onPostBuild/transpileMdxToMarkdown.test.ts b/data/onPostBuild/transpileMdxToMarkdown.test.ts
index 86fcdd759b..4904852fad 100644
--- a/data/onPostBuild/transpileMdxToMarkdown.test.ts
+++ b/data/onPostBuild/transpileMdxToMarkdown.test.ts
@@ -11,6 +11,10 @@ import {
convertRelativeUrls,
replaceTemplateVariables,
calculateOutputPath,
+ getLanguageDisplayName,
+ findPrecedingHeadingLevel,
+ transformCodeBlocksWithSubheadings,
+ addLanguageSubheadingsToCodeBlocks,
} from './transpileMdxToMarkdown';
import * as fs from 'fs';
import * as path from 'path';
@@ -622,4 +626,326 @@ Real prop: link: '/docs/presence'`;
expect(output).toMatch(/public\/docs\/chat\/moderation\/direct\/bodyguard\.md$/);
});
});
+
+ describe('getLanguageDisplayName', () => {
+ it('should capitalize simple language names', () => {
+ expect(getLanguageDisplayName('javascript')).toBe('Javascript');
+ expect(getLanguageDisplayName('kotlin')).toBe('Kotlin');
+ expect(getLanguageDisplayName('swift')).toBe('Swift');
+ });
+
+ it('should handle underscore-separated variants', () => {
+ expect(getLanguageDisplayName('realtime_javascript')).toBe('Realtime Javascript');
+ expect(getLanguageDisplayName('rest_python')).toBe('Rest Python');
+ });
+
+ it('should handle empty string', () => {
+ expect(getLanguageDisplayName('')).toBe('');
+ });
+
+ it('should handle single character', () => {
+ expect(getLanguageDisplayName('a')).toBe('A');
+ });
+ });
+
+ describe('findPrecedingHeadingLevel', () => {
+ it('should return 3 when no heading is found (so +1 gives h4 default)', () => {
+ const content = 'Some text without headings';
+ expect(findPrecedingHeadingLevel(content, content.length)).toBe(3);
+ });
+
+ it('should find h1 heading level', () => {
+ const content = '# Main Title\n\nSome content';
+ expect(findPrecedingHeadingLevel(content, content.length)).toBe(1);
+ });
+
+ it('should find h2 heading level', () => {
+ const content = '## Section\n\nSome content';
+ expect(findPrecedingHeadingLevel(content, content.length)).toBe(2);
+ });
+
+ it('should find the nearest preceding heading', () => {
+ const content = '# Title\n\n## Section\n\n### Subsection\n\nContent here';
+ expect(findPrecedingHeadingLevel(content, content.length)).toBe(3);
+ });
+
+ it('should only consider headings before the given position', () => {
+ const content = '## First\n\nContent\n\n### Second';
+ const positionBeforeSecond = content.indexOf('### Second');
+ expect(findPrecedingHeadingLevel(content, positionBeforeSecond)).toBe(2);
+ });
+
+ it('should handle h6 heading level', () => {
+ const content = '###### Deep heading\n\nContent';
+ expect(findPrecedingHeadingLevel(content, content.length)).toBe(6);
+ });
+ });
+
+ describe('transformCodeBlocksWithSubheadings', () => {
+ it('should transform code blocks with subheadings and remove language from fence', () => {
+ const input = `
+\`\`\`javascript
+const x = 1;
+\`\`\`
+`;
+ const output = transformCodeBlocksWithSubheadings(input, '###');
+ expect(output).toContain('### Javascript');
+ expect(output).toContain('```\nconst x = 1;');
+ expect(output).not.toContain('```javascript');
+ });
+
+ it('should return null when no code blocks with language identifiers', () => {
+ const input = `
+\`\`\`
+const x = 1;
+\`\`\`
+`;
+ const output = transformCodeBlocksWithSubheadings(input, '###');
+ expect(output).toBeNull();
+ });
+
+ it('should handle multiple code blocks', () => {
+ const input = `
+\`\`\`javascript
+const x = 1;
+\`\`\`
+
+\`\`\`python
+x = 1
+\`\`\`
+`;
+ const output = transformCodeBlocksWithSubheadings(input, '####');
+ expect(output).toContain('#### Javascript');
+ expect(output).toContain('#### Python');
+ expect(output).not.toContain('```javascript');
+ expect(output).not.toContain('```python');
+ });
+ });
+
+ describe('addLanguageSubheadingsToCodeBlocks', () => {
+ it('should add subheadings to multiple code blocks within tags and remove language from fence', () => {
+ const input = `
+\`\`\`javascript
+const x = 1;
+\`\`\`
+
+\`\`\`kotlin
+val x = 1
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('#### Javascript');
+ expect(output).toContain('#### Kotlin');
+ // Language should be removed from fenced code blocks
+ expect(output).not.toContain('```javascript');
+ expect(output).not.toContain('```kotlin');
+ expect(output).toContain('```\nconst x = 1;');
+ expect(output).toContain('```\nval x = 1');
+ });
+
+ it('should handle realtime/rest SDK variants', () => {
+ const input = `
+\`\`\`realtime_javascript
+const channel = realtime.channels.get('test');
+\`\`\`
+
+\`\`\`rest_javascript
+const channel = rest.channels.get('test');
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('#### Realtime Javascript');
+ expect(output).toContain('#### Rest Javascript');
+ // Language should be removed from fenced code blocks
+ expect(output).not.toContain('```realtime_javascript');
+ expect(output).not.toContain('```rest_javascript');
+ });
+
+ it('should handle tags with attributes like fixed="true"', () => {
+ const input = `
+\`\`\`javascript
+const x = 1;
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('#### Javascript');
+ // Language should be removed from fenced code blocks
+ expect(output).not.toContain('```javascript');
+ expect(output).toContain('```\nconst x = 1;');
+ });
+
+ it('should handle code blocks without a language identifier', () => {
+ const input = `
+\`\`\`
+const x = 1;
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ // Code blocks without language should be returned as-is (no subheading added)
+ expect(output).not.toContain('####');
+ expect(output).toContain('```\nconst x = 1;');
+ });
+
+ it('should not modify code blocks outside tags', () => {
+ const input = `\`\`\`javascript
+const x = 1;
+\`\`\`
+
+\`\`\`kotlin
+val x = 1
+\`\`\``;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).not.toContain('####');
+ expect(output).toBe(input);
+ });
+
+ it('should preserve code block content', () => {
+ const input = `
+\`\`\`javascript
+const channel = realtime.channels.get('{{RANDOM_CHANNEL_NAME}}');
+channel.subscribe((message) => {
+ console.log(message);
+});
+\`\`\`
+
+\`\`\`python
+channel = realtime.channels.get('channel-name')
+def on_message(message):
+ print(message)
+channel.subscribe(on_message)
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain("const channel = realtime.channels.get('{{RANDOM_CHANNEL_NAME}}');");
+ expect(output).toContain("channel = realtime.channels.get('channel-name')");
+ expect(output).toContain('console.log(message);');
+ expect(output).toContain('print(message)');
+ });
+
+ it('should handle empty tags', () => {
+ const input = ``;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toBe('');
+ });
+
+ it('should handle tags with only whitespace', () => {
+ const input = `
+
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toBe(input);
+ });
+
+ it('should handle code blocks with Windows-style line endings', () => {
+ const input = `\r\n\`\`\`javascript\r\nconst x = 1;\r\n\`\`\`\r\n`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('#### Javascript');
+ // Language should be removed from fenced code blocks
+ expect(output).not.toContain('```javascript');
+ });
+
+ it('should handle language identifiers with hyphens', () => {
+ const input = `
+\`\`\`objective-c
+NSLog(@"Hello");
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('#### Objective-c');
+ // Language should be removed from fenced code blocks
+ expect(output).not.toContain('```objective-c');
+ });
+
+ it('should handle language identifiers with special characters', () => {
+ const input = `
+\`\`\`shell-session
+$ npm install
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('#### Shell-session');
+ // Language should be removed from fenced code blocks
+ expect(output).not.toContain('```shell-session');
+ });
+
+ it('should use h3 subheading when preceded by h2 heading', () => {
+ const input = `## Section Title
+
+
+\`\`\`javascript
+const x = 1;
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('### Javascript');
+ expect(output).not.toContain('#### Javascript');
+ });
+
+ it('should use h4 subheading when preceded by h3 heading', () => {
+ const input = `### Subsection Title
+
+
+\`\`\`javascript
+const x = 1;
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('#### Javascript');
+ });
+
+ it('should use h5 subheading when preceded by h4 heading', () => {
+ const input = `#### Deep Section
+
+
+\`\`\`javascript
+const x = 1;
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('##### Javascript');
+ });
+
+ it('should use h7 when preceded by h6 heading (no cap for LLM consumption)', () => {
+ const input = `###### Deepest Section
+
+
+\`\`\`javascript
+const x = 1;
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('####### Javascript');
+ });
+
+ it('should use h4 as default when no preceding heading', () => {
+ const input = `
+\`\`\`javascript
+const x = 1;
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('#### Javascript');
+ });
+
+ it('should handle multiple blocks with different preceding headings', () => {
+ const input = `## First Section
+
+
+\`\`\`javascript
+const a = 1;
+\`\`\`
+
+
+### Nested Section
+
+
+\`\`\`python
+b = 2
+\`\`\`
+`;
+ const output = addLanguageSubheadingsToCodeBlocks(input);
+ expect(output).toContain('### Javascript');
+ expect(output).toContain('#### Python');
+ });
+ });
});
diff --git a/data/onPostBuild/transpileMdxToMarkdown.ts b/data/onPostBuild/transpileMdxToMarkdown.ts
index ffcacdc569..0a87243acd 100644
--- a/data/onPostBuild/transpileMdxToMarkdown.ts
+++ b/data/onPostBuild/transpileMdxToMarkdown.ts
@@ -5,6 +5,89 @@ import frontMatter from 'front-matter';
const REPORTER_PREFIX = 'onPostBuild:transpileMdxToMarkdown';
+/**
+ * Get the display name for a language identifier
+ * Capitalizes the first letter of each word (e.g. javascript -> Javascript)
+ * Handles underscore-separated variants (e.g., realtime_javascript -> Realtime Javascript, rest_javascript -> Rest Javascript)
+ */
+function getLanguageDisplayName(lang: string): string {
+ if (!lang) {
+ return '';
+ }
+ // Split by underscore, capitalize each part, join with space
+ return lang
+ .split('_')
+ .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
+ .join(' ');
+}
+
+/**
+ * Find the heading level of the nearest preceding heading before a given position.
+ * Used to determine the appropriate subheading level for code block language labels.
+ */
+function findPrecedingHeadingLevel(content: string, position: number): number {
+ const contentBefore = content.substring(0, position);
+ const headingRegex = /^(#+)\s+/gm;
+ let lastHeadingLevel = 3; // Defaults to 3 when no heading is found
+ let match;
+
+ while ((match = headingRegex.exec(contentBefore)) !== null) {
+ lastHeadingLevel = match[1].length;
+ }
+
+ return lastHeadingLevel;
+}
+
+/**
+ * Transform code blocks within a tag by adding language subheadings
+ * and removing language identifiers from fenced code blocks.
+ * Returns null if no code blocks with language identifiers are found.
+ */
+function transformCodeBlocksWithSubheadings(innerContent: string, headingPrefix: string): string | null {
+ // Match ```language followed by code and closing ```
+ // Uses [^\n`]+ to capture language identifiers with hyphens, plus signs, dots (e.g., objective-c, c++, shell-session)
+ // Supports both Unix (\n) and Windows (\r\n) line endings
+ const codeBlockRegex = /```([^\n`]+)\r?\n([\s\S]*?)```/g;
+
+ // Check if there are any code blocks with language identifiers
+ if (!innerContent.match(codeBlockRegex)) {
+ return null;
+ }
+
+ // Replace each code block with a subheading followed by the code block (without language in fence)
+ return innerContent.replace(codeBlockRegex, (_codeBlock, lang, codeContent) => {
+ const displayName = getLanguageDisplayName(lang);
+ return `${headingPrefix} ${displayName}\n\n\`\`\`\n${codeContent}\`\`\``;
+ });
+}
+
+/**
+ * Add language subheadings before each code block within tags.
+ * This makes it easier for LLMs to identify which language each code snippet belongs to.
+ * - Removes language identifier from fenced code blocks (since subheading provides this info)
+ * - Dynamically determines heading level based on preceding heading context
+ */
+function addLanguageSubheadingsToCodeBlocks(content: string): string {
+ // Match blocks with optional attributes (case-insensitive for the tag)
+ // Handles both and etc.
+ const codeTagRegex = /]*>([\s\S]*?)<\/Code>/gi;
+
+ return content.replace(codeTagRegex, (fullMatch, innerContent: string, offset: number) => {
+ // Calculate the appropriate heading level based on preceding headings
+ const precedingLevel = findPrecedingHeadingLevel(content, offset);
+ const headingPrefix = '#'.repeat(precedingLevel + 1);
+
+ // Transform code blocks with subheadings
+ const transformedContent = transformCodeBlocksWithSubheadings(innerContent, headingPrefix);
+ if (transformedContent === null) {
+ return fullMatch; // No code blocks with language - return unchanged
+ }
+
+ // Ensure proper newline after tag for markdown formatting
+ return `\n\n${transformedContent.trimStart()}`;
+ });
+}
+
interface MdxNode {
parent: {
relativeDirectory: string;
@@ -193,9 +276,7 @@ function removeImportExportStatements(content: string): string {
* Remove script tags that are not inside code blocks
*/
function removeScriptTags(content: string): string {
- return transformNonCodeBlocks(content, (text) =>
- text.replace(/