diff --git a/package.json b/package.json index ff592dc..ea31cc2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@coseeing/see-mark", - "version": "1.2.0", + "version": "1.3.0", "description": "A markdown parser for a11y", "main": "./lib/see-mark.cjs", "files": [ diff --git a/src/markdown-processor/markdown-processor.js b/src/markdown-processor/markdown-processor.js index 09a81b9..4e280eb 100644 --- a/src/markdown-processor/markdown-processor.js +++ b/src/markdown-processor/markdown-processor.js @@ -2,6 +2,7 @@ import markedProcessorFactory from './marked-wrapper/marked-wrapper'; import math from './marked-extentions/math'; import alert from './marked-extentions/alert'; +import heading from './marked-extentions/heading'; import internalLink from './marked-extentions/internal-link'; import image from './marked-extentions/image'; import internalLinkTitle from './marked-extentions/internal-link-title'; @@ -25,6 +26,7 @@ const markdownProcessor = (markdownContent = '', options = {}) => { extensions: [ math, alert, + heading, internalLink, internalLinkTitle, image, diff --git a/src/markdown-processor/markdown-processor.test.js b/src/markdown-processor/markdown-processor.test.js index e611218..2cef15c 100644 --- a/src/markdown-processor/markdown-processor.test.js +++ b/src/markdown-processor/markdown-processor.test.js @@ -1,5 +1,4 @@ import '@testing-library/jest-dom'; -import { getByRole } from '@testing-library/dom'; import createDOMFromHTML from '../testing-helpers/create-dom-from-html'; import { getElementByType } from '../testing-helpers/custom-query'; @@ -21,12 +20,13 @@ describe('markdownProcessor', () => { const container = createDOMFromHTML(result); - const heading = getByRole(container, 'heading', { - level: 1, - name: 'Hello World', - }); + const heading = getElementByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); expect(heading).toBeTruthy(); + expect(heading.textContent).toBe('Hello World'); }); it('should handle math expressions with brackets', () => { diff --git a/src/markdown-processor/marked-extentions/heading.js b/src/markdown-processor/marked-extentions/heading.js new file mode 100644 index 0000000..ab9369e --- /dev/null +++ b/src/markdown-processor/marked-extentions/heading.js @@ -0,0 +1,78 @@ +import { SUPPORTED_COMPONENT_TYPES } from '../../shared/supported-components'; +import { extractTokenMeta, buildHTMLMarkup } from './helpers'; + +/** + * Converts text to a URL-friendly slug. + * - Converts to lowercase + * - Replaces spaces and special characters with hyphens + * - Preserves Chinese characters, letters, numbers, and hyphens + * - Removes consecutive hyphens and leading/trailing hyphens + * + * @param {string} text - The text to slugify + * @returns {string} The slugified text + * + * @example + * slugify("Hello World") // "hello-world" + * slugify("What is JavaScript?") // "what-is-javascript" + * slugify("什麼是 React") // "什麼是-react" + */ +export function slugify(text) { + return text + .toLowerCase() + .replace(/[^\p{L}\p{N}\s-]/gu, '') // Remove special chars, keep letters/numbers/spaces/hyphens (Unicode) + .replace(/\s+/g, '-') // Replace spaces with hyphens + .replace(/-+/g, '-') // Remove consecutive hyphens + .replace(/^-|-$/g, ''); // Remove leading/trailing hyphens +} + +/** + * Generates a unique ID by appending a suffix if the base ID is already used. + * + * @param {string} baseId - The base slug ID + * @param {Map} usedIds - Map tracking used IDs and their counts + * @returns {string} A unique ID + */ +function getUniqueId(baseId, usedIds) { + if (!usedIds.has(baseId)) { + usedIds.set(baseId, 0); + return baseId; + } + + const count = usedIds.get(baseId) + 1; + usedIds.set(baseId, count); + return `${baseId}-${count}`; +} + +/** + * Marked extension for heading custom component. + * All headings become custom components with auto-generated slug IDs. + */ +const markedHeading = () => { + // Track used IDs within a single markdown document to ensure uniqueness + const usedIds = new Map(); + + const renderer = { + heading(token) { + const { depth, text, tokens = [] } = token; + + // Generate unique slug ID from heading text + const baseId = slugify(text); + const id = getUniqueId(baseId, usedIds); + + // Parse inline tokens for children (handles bold, italic, links, etc.) + const children = this.parser.parseInline(tokens); + + const meta = extractTokenMeta(token, { + id, + level: depth, + text, + }); + + return buildHTMLMarkup(SUPPORTED_COMPONENT_TYPES.HEADING, meta, children); + }, + }; + + return { renderer }; +}; + +export default markedHeading; diff --git a/src/markdown-processor/marked-extentions/heading.test.js b/src/markdown-processor/marked-extentions/heading.test.js new file mode 100644 index 0000000..9a7ed00 --- /dev/null +++ b/src/markdown-processor/marked-extentions/heading.test.js @@ -0,0 +1,200 @@ +import '@testing-library/jest-dom'; + +import createDOMFromHTML from '../../testing-helpers/create-dom-from-html'; +import { + getElementByType, + getAllElementsByType, +} from '../../testing-helpers/custom-query'; +import { SEE_MARK_PAYLOAD_DATA_ATTRIBUTES } from '../../shared/common-markup'; +import { SUPPORTED_COMPONENT_TYPES } from '../../shared/supported-components'; + +import markdownProcessor from '../markdown-processor'; +import { slugify } from './heading'; + +describe('slugify', () => { + it('should convert basic text to slug', () => { + expect(slugify('Hello World')).toBe('hello-world'); + }); + + it('should remove special characters', () => { + expect(slugify('What is JavaScript?')).toBe('what-is-javascript'); + expect(slugify('Hello! @World#')).toBe('hello-world'); + }); + + it('should preserve Chinese characters', () => { + expect(slugify('什麼是 React')).toBe('什麼是-react'); + expect(slugify('前端開發入門')).toBe('前端開發入門'); + }); + + it('should handle multiple spaces', () => { + expect(slugify('Hello World')).toBe('hello-world'); + }); + + it('should handle leading/trailing spaces', () => { + expect(slugify(' Hello World ')).toBe('hello-world'); + }); + + it('should handle empty string', () => { + expect(slugify('')).toBe(''); + }); + + it('should handle numbers', () => { + expect(slugify('Chapter 1 Introduction')).toBe('chapter-1-introduction'); + }); +}); + +describe('markdownProcessor - heading', () => { + const options = { + latexDelimiter: 'bracket', + documentFormat: 'inline', + imageFiles: {}, + }; + + it('should process heading with auto-generated slug id in payload', () => { + const markdownContent = '# Hello World'; + const result = markdownProcessor(markdownContent, options); + const container = createDOMFromHTML(result); + + const heading = getElementByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); + expect(heading).toBeTruthy(); + expect(heading.textContent).toBe('Hello World'); + + const payload = JSON.parse( + heading.getAttribute(SEE_MARK_PAYLOAD_DATA_ATTRIBUTES) + ); + expect(payload).toMatchObject({ + id: 'hello-world', + level: 1, + text: 'Hello World', + }); + expect(payload.position).toBeDefined(); + }); + + it('should handle all heading levels', () => { + const levels = [1, 2, 3, 4, 5, 6]; + + levels.forEach((level) => { + const hashes = '#'.repeat(level); + const markdownContent = `${hashes} Heading ${level}`; + const result = markdownProcessor(markdownContent, options); + const container = createDOMFromHTML(result); + + const heading = getElementByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); + + const payload = JSON.parse( + heading.getAttribute(SEE_MARK_PAYLOAD_DATA_ATTRIBUTES) + ); + expect(payload.level).toBe(level); + expect(payload.id).toBe(`heading-${level}`); + }); + }); + + it('should include position info in heading payload', () => { + const markdownContent = '# Test Heading'; + const result = markdownProcessor(markdownContent, options); + const container = createDOMFromHTML(result); + + const heading = getElementByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); + + const payload = JSON.parse( + heading.getAttribute(SEE_MARK_PAYLOAD_DATA_ATTRIBUTES) + ); + + expect(payload.position).toBeDefined(); + expect(payload.position.start).toBe(0); + expect(payload.position.end).toBe(14); + }); + + it('should preserve inline formatting in headings', () => { + const markdownContent = '# Hello **World**'; + const result = markdownProcessor(markdownContent, options); + const container = createDOMFromHTML(result); + + const heading = getElementByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); + const strong = heading.querySelector('strong'); + expect(strong).toBeTruthy(); + expect(strong.textContent).toBe('World'); + }); + + it('should generate unique ids for duplicate headings', () => { + const markdownContent = + '# Introduction\n\n## Introduction\n\n### Introduction'; + const result = markdownProcessor(markdownContent, options); + const container = createDOMFromHTML(result); + + const headings = getAllElementsByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); + expect(headings).toHaveLength(3); + + const payloads = headings.map((h) => + JSON.parse(h.getAttribute(SEE_MARK_PAYLOAD_DATA_ATTRIBUTES)) + ); + expect(payloads[0].id).toBe('introduction'); + expect(payloads[1].id).toBe('introduction-1'); + expect(payloads[2].id).toBe('introduction-2'); + }); + + it('should handle special characters in heading text', () => { + const markdownContent = '## What is JavaScript?'; + const result = markdownProcessor(markdownContent, options); + const container = createDOMFromHTML(result); + + const heading = getElementByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); + + const payload = JSON.parse( + heading.getAttribute(SEE_MARK_PAYLOAD_DATA_ATTRIBUTES) + ); + expect(payload.id).toBe('what-is-javascript'); + expect(heading.textContent).toBe('What is JavaScript?'); + }); + + it('should preserve Chinese characters in slug', () => { + const markdownContent = '# 什麼是 React'; + const result = markdownProcessor(markdownContent, options); + const container = createDOMFromHTML(result); + + const heading = getElementByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); + + const payload = JSON.parse( + heading.getAttribute(SEE_MARK_PAYLOAD_DATA_ATTRIBUTES) + ); + expect(payload.id).toBe('什麼是-react'); + }); + + it('should handle multiple headings with different text', () => { + const markdownContent = '# First Section\n\n## Second Section'; + const result = markdownProcessor(markdownContent, options); + const container = createDOMFromHTML(result); + + const headings = getAllElementsByType( + container, + SUPPORTED_COMPONENT_TYPES.HEADING + ); + + const payloads = headings.map((h) => + JSON.parse(h.getAttribute(SEE_MARK_PAYLOAD_DATA_ATTRIBUTES)) + ); + expect(payloads[0].id).toBe('first-section'); + expect(payloads[1].id).toBe('second-section'); + }); +}); diff --git a/src/markdown-processor/marked-extentions/helpers.js b/src/markdown-processor/marked-extentions/helpers.js index ebf8af8..ae85c4d 100644 --- a/src/markdown-processor/marked-extentions/helpers.js +++ b/src/markdown-processor/marked-extentions/helpers.js @@ -18,6 +18,14 @@ export const extractTokenMeta = (token, customMeta = {}) => { }; }; +/** + * Builds HTML markup with SeeMark data attributes for custom components. + * + * @param {string} type - The component type identifier (from SUPPORTED_COMPONENT_TYPES) + * @param {Object} meta - Metadata to include in payload (passed to React component as props) + * @param {string} children - Inner HTML content + * @returns {string} HTML string + */ export const buildHTMLMarkup = (type = '', meta = {}, children = '') => { const payload = JSON.stringify(meta); diff --git a/src/markup-converters/react/default-components/default-components.js b/src/markup-converters/react/default-components/default-components.js index 5c45315..99cedc6 100644 --- a/src/markup-converters/react/default-components/default-components.js +++ b/src/markup-converters/react/default-components/default-components.js @@ -1,6 +1,7 @@ import { SUPPORTED_COMPONENT_TYPES } from '../../../shared/supported-components'; import Alert from './alert/Alert.jsx'; +import Heading from './heading/Heading.jsx'; import InternalLink from './internal-link/InternalLink.jsx'; import InternalLinkTitle from './internal-link-title/InternalLinkTitle.jsx'; import Image from './image/Image.jsx'; @@ -16,6 +17,7 @@ import Iframe from './iframe/Iframe.jsx'; const defaultComponents = { [SUPPORTED_COMPONENT_TYPES.ALERT]: Alert, + [SUPPORTED_COMPONENT_TYPES.HEADING]: Heading, [SUPPORTED_COMPONENT_TYPES.INTERNAL_LINK]: InternalLink, [SUPPORTED_COMPONENT_TYPES.INTERNAL_LINK_TITLE]: InternalLinkTitle, [SUPPORTED_COMPONENT_TYPES.IMAGE]: Image, diff --git a/src/markup-converters/react/default-components/heading/Heading.jsx b/src/markup-converters/react/default-components/heading/Heading.jsx new file mode 100644 index 0000000..c0a8314 --- /dev/null +++ b/src/markup-converters/react/default-components/heading/Heading.jsx @@ -0,0 +1,19 @@ +import React from 'react'; +import PropTypes from 'prop-types'; + +// text prop is available for custom implementations (e.g., search indexing, accessibility) +const Heading = ({ children, id = null, level = 1 }) => { + const Tag = `h${level}`; + + return {children}; +}; + +Heading.propTypes = { + children: PropTypes.node.isRequired, + id: PropTypes.string, + level: PropTypes.oneOf([1, 2, 3, 4, 5, 6]), + text: PropTypes.string, + position: PropTypes.shape({ start: PropTypes.number, end: PropTypes.number }), +}; + +export default Heading; diff --git a/src/shared/supported-components.js b/src/shared/supported-components.js index 1f60e58..7979f53 100644 --- a/src/shared/supported-components.js +++ b/src/shared/supported-components.js @@ -1,5 +1,6 @@ export const SUPPORTED_COMPONENT_TYPES = { ALERT: 'alert', + HEADING: 'heading', INTERNAL_LINK: 'internalLink', INTERNAL_LINK_TITLE: 'internalLinkTitle', IMAGE: 'image', diff --git a/src/testing-helpers/custom-query.js b/src/testing-helpers/custom-query.js index 9fe8a62..fe0ffb0 100644 --- a/src/testing-helpers/custom-query.js +++ b/src/testing-helpers/custom-query.js @@ -4,3 +4,10 @@ export const getElementByType = (container, elementType) => container.querySelector( `[${SEEMARK_ELEMENT_TYPE_DATA_ATTRIBUTE}="${elementType}"]` ); + +export const getAllElementsByType = (container, elementType) => + Array.from( + container.querySelectorAll( + `[${SEEMARK_ELEMENT_TYPE_DATA_ATTRIBUTE}="${elementType}"]` + ) + );