diff --git a/.gitignore b/.gitignore index af49edf4..40f6ef61 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,9 @@ bower_components # node-waf configuration .lock-wscript +# Compiled binary +rst-to-mdx + # Compiled binary addons (https://nodejs.org/api/addons.html) build/Release diff --git a/daml.yaml b/daml.yaml new file mode 100644 index 00000000..56d7d2e7 --- /dev/null +++ b/daml.yaml @@ -0,0 +1,4 @@ +# docs/daml.yaml +override-components: + rst-to-mdx: + local-path: ./tools/rst-to-mdx \ No newline at end of file diff --git a/scripts/generate_canton_protobuf_history.py b/scripts/generate_canton_protobuf_history.py index c181d25e..2130db03 100644 --- a/scripts/generate_canton_protobuf_history.py +++ b/scripts/generate_canton_protobuf_history.py @@ -505,6 +505,8 @@ def main() -> int: target_dir=Path(args.legacy_output_dir).resolve(), ) + canonical_dir = Path(args.legacy_output_dir).resolve() + package_paths = sorted((canonical_dir / "packages").glob("*.mdx")) update_docs_navigation( docs_json_path=Path(args.docs_json).resolve(), dropdown_label=args.nav_dropdown, diff --git a/tools/rst-to-mdx/.vscode/settings.json b/tools/rst-to-mdx/.vscode/settings.json new file mode 100644 index 00000000..82d90596 --- /dev/null +++ b/tools/rst-to-mdx/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.readonlyInclude": { + "**/.daml/unpacked-dars/**": true + } +} \ No newline at end of file diff --git a/tools/rst-to-mdx/LICENSE b/tools/rst-to-mdx/LICENSE new file mode 100644 index 00000000..a3602cbc --- /dev/null +++ b/tools/rst-to-mdx/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright (c) 2022 Digital Asset (Switzerland) GmbH and/or its affiliates + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/tools/rst-to-mdx/Makefile b/tools/rst-to-mdx/Makefile new file mode 100644 index 00000000..ef49aa08 --- /dev/null +++ b/tools/rst-to-mdx/Makefile @@ -0,0 +1,53 @@ +BINARY := rst-to-mdx +DIST := dist +GO ?= go +OS_ARCHES := darwin/arm64 darwin/amd64 linux/arm64 linux/amd64 windows/amd64 + +.PHONY: all build test fmt vet tidy smoke release clean + +all: build + +build: + $(GO) build -o $(BINARY) ./cmd/rst-to-mdx + +test: + $(GO) test ./... + +fmt: + $(GO) fmt ./... + +vet: + $(GO) vet ./... + +tidy: + $(GO) mod tidy + +smoke: build + ./smoke-test.sh + +# Cross-compile to dist/-/ for publishing via +# `dpm artifacts publish component`. +# +# Each platform directory gets a component.yaml whose `path:` matches the +# platform's binary name. Windows uses component.windows.yaml (`.exe` +# suffix); every other platform uses the default component.yaml. +release: + @rm -rf $(DIST) + @for pair in $(OS_ARCHES); do \ + os=$${pair%/*}; arch=$${pair#*/}; \ + out=$(DIST)/$$os-$$arch; \ + mkdir -p $$out; \ + ext=""; if [ "$$os" = "windows" ]; then ext=".exe"; fi; \ + echo ">> $$os/$$arch"; \ + GOOS=$$os GOARCH=$$arch $(GO) build -o $$out/$(BINARY)$$ext ./cmd/rst-to-mdx; \ + if [ "$$os" = "windows" ]; then \ + cp component.windows.yaml $$out/component.yaml; \ + else \ + cp component.yaml $$out/; \ + fi; \ + cp LICENSE $$out/; \ + done + +clean: + rm -f $(BINARY) + rm -rf $(DIST) diff --git a/tools/rst-to-mdx/README.md b/tools/rst-to-mdx/README.md new file mode 100644 index 00000000..4f3ed860 --- /dev/null +++ b/tools/rst-to-mdx/README.md @@ -0,0 +1,168 @@ +# rst-to-mdx + +A DPM component that converts reStructuredText documents into +Mintlify-compatible MDX files. Built for the migration from +`docs-website/docs/replicated/` (RST, Sphinx) to `docs/docs-main/` (MDX, +Mintlify), but the converter runs against any RST file on disk. + +## Quick start + +```bash +cd tools/rst-to-mdx +make build + +# Convert one file +./rst-to-mdx path/to/input.rst path/to/output.mdx + +# Or invoke through dpm — the local daml.yaml registers the component +# automatically when dpm runs from this directory. +dpm rst-to-mdx path/to/input.rst path/to/output.mdx + +# Convert an entire RST tree to a mirror MDX tree in one command +./rst-to-mdx --batch \ + --input-dir /docs/replicated \ + --output-dir \ + --target-root \ + --copy-images + +# Audit which RST files don't have a corresponding page in docs.json +./rst-to-mdx --audit-coverage \ + --docs-root \ + --target-root +``` + +## Input flexibility + +The converter has no hard dependency on `docs-website/`. Any RST file +works as input, regardless of where it lives on disk. The only feature +that depends on `docs-website/` is **cross-reference resolution**: + +| Situation | Cross-reference (`:ref:`, `:doc:`, etc.) behavior | +|---|---| +| Input lives somewhere under `docs-website/` | Auto-detected; the label index is built once and refs resolve to `/docs-main/#` URLs. | +| Input lives elsewhere, `--docs-root ` passed | Same as above using the explicit root. | +| Input lives elsewhere, no `--docs-root` | Refs become `[label](#TODO-resolve-…)` markers a human can resolve later. | + +Every other transform — headings, inline formatting and roles, links +(named, anonymous, `:doc:`, `:download:`, autolinks like ``), +code blocks, admonitions, `.. todo::` / `.. wip::` notes, `.. toggle::` +collapsibles → ``, images and figures, lists, tables +(list-table, csv-table, grid), Sphinx tabs, `.. youtube::` and +`.. raw:: html` video embeds, comments, frontmatter, and provenance +markers — runs identically regardless of input location. + +## Heading mapping + +Underline characters map to a fixed level (Canton's published CSS uses +the same rule): + +| Underline | Level | +|---|---| +| `#` | H1 | +| `*` / `=` | H2 | +| `-` | H3 | +| `~` | H4 | +| `^` | H5 | +| `"` | H6 | + +Overlined+underlined headings render one level shallower than the same +character used as underline-only, capped at H1. So `### Title ###` and +`############` both produce H1; `*** Title ***` is H1; `--- Title ---` +is H2. + +## CLI flags + +``` +rst-to-mdx [output.mdx] [flags] +rst-to-mdx --batch --input-dir --output-dir [flags] + + --title string override auto-detected page title (default: first heading) + --description string set frontmatter description + --source-label string provenance source label (auto from path) + --docs-root string root of an RST docs tree for cross-ref resolution + (auto-detects `docs-website/` if input lives in one) + --target-root string target docs-main/ root for image copy and path + derivation (default "./docs-main") + --copy-images copy referenced images into target-root/images/docs_website/ + --strict fail on unresolved :ref: or missing literalinclude + --dry-run print what would be written without touching disk + -v, --verbose show detailed conversion progress + --version print version and exit +``` + +### Common invocations + +```bash +# Single file with image asset copy +./rst-to-mdx in.rst out.mdx --copy-images --target-root /tmp --verbose + +# Override the auto-detected title +./rst-to-mdx in.rst out.mdx --title "Setting Up the Sandbox" + +# Bail loudly on missing refs/files +./rst-to-mdx in.rst out.mdx --strict +``` + +## Directory layout + +``` +tools/rst-to-mdx/ +├── component.yaml # Unix DPM component manifest +├── component.windows.yaml # Windows manifest (.exe path) +├── daml.yaml # local override-components for `dpm --help` +├── LICENSE # required at every published artifact root +├── cmd/rst-to-mdx/main.go # Cobra CLI entrypoint +├── internal/ +│ ├── convert/ # transform pipeline (one file per transform) +│ ├── include/ # .. include:: + .. literalinclude:: resolver +│ ├── labelindex/ # corpus walker that builds label → heading map +│ ├── navindex/ # docs.json walker so cross-refs land on real pages +│ └── pathmap/ # RST source path → MDX target path rules +├── smoke-test.sh # end-to-end smoke against a real RST file +├── go.mod +├── Makefile +└── README.md +``` + +## Local DPM registration + +Running `dpm --help` from inside `tools/rst-to-mdx/` picks up the +local `daml.yaml` and surfaces `rst-to-mdx` under "Dpm-SDK Commands" +without any install step. Useful for iterative development. + +## Packaging and publishing + +Cross-compile to all five supported platforms: + +``` +make release +``` + +That writes `dist/-/` directories, each containing the +platform binary, a matching `component.yaml`, and a `LICENSE` file +(both required by the DPM publish validator). + +Publish (DPM 1.0.12+ / SDK 3.5+): + +``` +dpm artifacts publish component \ + --name rst-to-mdx \ + --version 0.1.0-alpha \ + --platform darwin/arm64=./dist/darwin-arm64 \ + --platform darwin/amd64=./dist/darwin-amd64 \ + --platform linux/arm64=./dist/linux-arm64 \ + --platform linux/amd64=./dist/linux-amd64 \ + --platform windows/amd64=./dist/windows-amd64 \ + --registry oci:// +``` + +DPM 1.0.10 — 1.0.11 use the older form with the same flags: + +``` +dpm repo publish-component rst-to-mdx 0.1.0-alpha \ + -p darwin/arm64=./dist/darwin-arm64 \ + ... \ + --registry oci:// +``` + +Add `--dry-run` to validate without pushing. diff --git a/tools/rst-to-mdx/cmd/rst-to-mdx/main.go b/tools/rst-to-mdx/cmd/rst-to-mdx/main.go new file mode 100644 index 00000000..8f51c215 --- /dev/null +++ b/tools/rst-to-mdx/cmd/rst-to-mdx/main.go @@ -0,0 +1,758 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +// Command rst-to-mdx converts reStructuredText files from docs-website/ +// into Mintlify-compatible MDX files for docs/docs-main/. +// +// Conversion logic lives in the internal convert/, labelindex/, and +// pathmap/ packages. This file is the Cobra CLI wrapper: it parses +// flags, opens files, builds the cross-reference index once, and calls +// Convert for each input. +package main + +import ( + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/spf13/cobra" + + "daml.com/x/dpm-components/rst-to-mdx/internal/convert" + "daml.com/x/dpm-components/rst-to-mdx/internal/labelindex" + "daml.com/x/dpm-components/rst-to-mdx/internal/navindex" + "daml.com/x/dpm-components/rst-to-mdx/internal/pathmap" +) + +func mustRegex(p string) *regexp.Regexp { return regexp.MustCompile(p) } + +const imagesSubdir = "images/docs_website" + +var version = "0.0.1-dev" + +type runOptions struct { + title string + description string + sourceLabel string + batch bool + inputDir string + outputDir string + docsRoot string + targetRoot string + docsJSON string + copyImages bool + strict bool + dryRun bool + verbose bool + auditCoverage bool +} + +func main() { + if err := newRootCmd().Execute(); err != nil { + os.Exit(1) + } +} + +func newRootCmd() *cobra.Command { + opts := &runOptions{} + + cmd := &cobra.Command{ + Use: "rst-to-mdx [output.mdx]", + Short: "Convert reStructuredText to Mintlify MDX", + Long: `Convert reStructuredText files into Mintlify-compatible MDX. Handles +headings, admonitions, code blocks, cross-references, images, tables, +literalinclude, and frontmatter + provenance markers. + +The input RST file may live anywhere on disk. Cross-reference resolution +(:ref:, :doc:, :externalref:) is the only feature that needs an RST +docs tree to read against — pass --docs-root or place the input under a +docs-website/ subtree (auto-detected). Without a docs-root, cross-refs +emit #TODO-resolve-* markers and the rest of the conversion proceeds. + +Use --batch to walk a directory tree.`, + Args: cobra.MaximumNArgs(2), + SilenceUsage: true, + SilenceErrors: false, + RunE: func(cmd *cobra.Command, args []string) error { + if opts.auditCoverage { + return runAudit(cmd.OutOrStdout(), opts) + } + if opts.batch { + if opts.inputDir == "" { + return fmt.Errorf("--batch requires --input-dir") + } + if opts.outputDir == "" { + return fmt.Errorf("--batch requires --output-dir") + } + return runBatch(cmd.OutOrStdout(), opts) + } + if len(args) == 0 { + return fmt.Errorf("input file required (or use --batch or --audit-coverage)") + } + in := args[0] + var out string + if len(args) == 2 { + out = args[1] + } else { + out = deriveOutputPath(in) + } + return runSingle(cmd.OutOrStdout(), in, out, opts) + }, + } + + cmd.Flags().StringVar(&opts.title, "title", "", "override auto-detected page title") + cmd.Flags().StringVar(&opts.description, "description", "", "set frontmatter description") + cmd.Flags().StringVar(&opts.sourceLabel, "source-label", "", "provenance source label (auto from path)") + cmd.Flags().BoolVar(&opts.batch, "batch", false, "convert all .rst files in --input-dir") + cmd.Flags().StringVar(&opts.inputDir, "input-dir", "", "input directory for --batch") + cmd.Flags().StringVar(&opts.outputDir, "output-dir", "./converted", "output directory for --batch") + cmd.Flags().StringVar(&opts.docsRoot, "docs-root", "", "root of an RST docs tree for cross-ref resolution (auto-detects docs-website/ when input lives in one)") + cmd.Flags().StringVar(&opts.targetRoot, "target-root", "./docs-main", "target docs-main/ root for path derivation") + cmd.Flags().StringVar(&opts.docsJSON, "docs-json", "", "path to Mintlify docs.json for nav-aware link resolution (auto-detects /docs.json)") + cmd.Flags().BoolVar(&opts.copyImages, "copy-images", false, "copy referenced images into target-root/images/docs_website/") + cmd.Flags().BoolVar(&opts.strict, "strict", false, "fail on unresolved :ref: or missing literalinclude") + cmd.Flags().BoolVar(&opts.dryRun, "dry-run", false, "show what would be written without touching the filesystem") + cmd.Flags().BoolVarP(&opts.verbose, "verbose", "v", false, "show detailed conversion progress") + cmd.Flags().BoolVar(&opts.auditCoverage, "audit-coverage", false, "report which RST files under --docs-root have no matching page in --target-root/docs.json") + cmd.Flags().Bool("version", false, "print version and exit") + + cmd.PreRun = func(c *cobra.Command, _ []string) { + if v, _ := c.Flags().GetBool("version"); v { + fmt.Fprintln(c.OutOrStdout(), "rst-to-mdx", version) + os.Exit(0) + } + } + + return cmd +} + +// runContext bundles the indexes and config that are constant across +// every file converted in a single invocation, so batch mode can build +// them once instead of per-file. +type runContext struct { + labels *labelindex.Index + nav *navindex.Index + docsRoot string +} + +func newRunContext(w io.Writer, anchorPath string, opts *runOptions) (*runContext, error) { + labels, err := loadLabelIndex(w, anchorPath, opts) + if err != nil { + return nil, err + } + docsRoot := opts.docsRoot + if docsRoot == "" && anchorPath != "" { + docsRoot = autoDetectDocsRoot(anchorPath) + } + nav, err := loadNavIndex(w, opts) + if err != nil { + return nil, err + } + return &runContext{labels: labels, nav: nav, docsRoot: docsRoot}, nil +} + +// fileResult is the per-file outcome of a convert + write step. Batch +// mode aggregates these into a summary. +type fileResult struct { + inputPath string + outputPath string + bytes int + images int + unknown int +} + +// runConvertOne converts one RST file using a pre-built run context. +// Caller is responsible for picking the output path. Returns the +// per-file stats so batch mode can aggregate them. +func runConvertOne(w io.Writer, ctx *runContext, inputPath, outputPath string, opts *runOptions) (fileResult, error) { + r := fileResult{inputPath: inputPath, outputPath: outputPath} + data, err := os.ReadFile(inputPath) + if err != nil { + return r, fmt.Errorf("read input: %w", err) + } + + co := convert.Options{ + Title: opts.title, + Description: opts.description, + SourceLabel: firstNonEmpty(opts.sourceLabel, normalizeSourceLabel(inputPath)), + SourcePath: inputPath, + LabelIndex: ctx.labels, + NavIndex: ctx.nav, + DocsRoot: ctx.docsRoot, + Strict: opts.strict, + } + res, err := convert.Convert(data, co) + if err != nil { + return r, fmt.Errorf("convert: %w", err) + } + r.bytes = len(res.Body) + r.images = len(res.Images) + + if opts.dryRun { + fmt.Fprintf(w, "[dry-run] would write %d bytes to %s\n", r.bytes, outputPath) + if opts.copyImages && r.images > 0 { + fmt.Fprintf(w, "[dry-run] would copy %d image asset(s)\n", r.images) + } + return r, nil + } + if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil { + return r, err + } + if err := os.WriteFile(outputPath, res.Body, 0o644); err != nil { + return r, fmt.Errorf("write output: %w", err) + } + if opts.verbose { + fmt.Fprintf(w, "wrote %s (%d bytes)\n", outputPath, r.bytes) + } + + if opts.copyImages { + if err := copyImageAssets(w, res.Images, opts); err != nil { + return r, err + } + } + + if ctx.nav != nil { + unknown, err := reportUnknownLinks(w, res.Body, ctx.nav, opts) + if err != nil { + return r, err + } + r.unknown = unknown + } + return r, nil +} + +func runSingle(w io.Writer, inputPath, outputPath string, opts *runOptions) error { + ctx, err := newRunContext(w, inputPath, opts) + if err != nil { + return err + } + _, err = runConvertOne(w, ctx, inputPath, outputPath, opts) + return err +} + +// reportUnknownLinks walks the converted MDX for absolute internal +// link targets (`(/path/to/page)`) and warns when the target page +// isn't registered in docs.json. Returns the number of unique unknown +// targets so the caller can aggregate counts across a batch run. In +// --strict mode any unknown link is an error. +// +// Asset paths (under /images/) and file-extension paths are filtered +// out — those aren't navigation pages, so docs.json is the wrong +// authority for them. +func reportUnknownLinks(w io.Writer, body []byte, nav *navindex.Index, opts *runOptions) (int, error) { + matches := reInternalLinkTarget.FindAllSubmatch(body, -1) + if len(matches) == 0 { + return 0, nil + } + seen := make(map[string]struct{}) + var unknown []string + for _, m := range matches { + page := string(m[1]) + if _, ok := seen[page]; ok { + continue + } + seen[page] = struct{}{} + if isAssetPath(page) { + continue + } + if !nav.HasPage(page) { + unknown = append(unknown, page) + } + } + if len(unknown) == 0 { + if opts.verbose { + fmt.Fprintf(w, "links: %d internal targets, all registered in docs.json\n", len(seen)) + } + return 0, nil + } + if opts.strict { + return len(unknown), fmt.Errorf("strict: %d link target(s) not in docs.json: %v", len(unknown), unknown) + } + if opts.verbose { + fmt.Fprintf(w, "warn: %d internal link target(s) not registered in docs.json:\n", len(unknown)) + for _, p := range unknown { + fmt.Fprintf(w, " - /%s\n", p) + } + } + return len(unknown), nil +} + +// reInternalLinkTarget pulls the page slug out of `(/path#anchor)` +// link targets in the emitted MDX. Captures the slug part before any +// `#fragment`. Mintlify serves docs-main/ as site root so internal +// links are root-relative without a docs-main/ prefix. +var reInternalLinkTarget = mustRegex(`\(/([A-Za-z0-9_\-/.]+?)(?:#[^)]*)?\)`) + +// isAssetPath returns true for paths that aren't navigation pages — +// images, public assets, and anything with a file extension. docs.json +// only registers MDX pages, so we skip these to avoid false-positive +// "unknown link" warnings. +func isAssetPath(p string) bool { + if strings.HasPrefix(p, "images/") { + return true + } + for _, ext := range []string{".png", ".jpg", ".jpeg", ".svg", ".gif", + ".webp", ".ico", ".pdf", ".css", ".js", ".json", ".yaml", ".yml"} { + if strings.HasSuffix(p, ext) { + return true + } + } + return false +} + +// copyImageAssets copies every image referenced by the source RST into +// the target docs tree under /images/docs_website/. A +// missing source file is reported (and counted) but doesn't abort the +// run unless --strict is set. +func copyImageAssets(w io.Writer, refs []convert.ImageRef, opts *runOptions) error { + if len(refs) == 0 { + return nil + } + imagesRoot := filepath.Join(opts.targetRoot, imagesSubdir) + if err := os.MkdirAll(imagesRoot, 0o755); err != nil { + return fmt.Errorf("mkdir images dir: %w", err) + } + + copied, missing, collisions := 0, 0, 0 + for _, ref := range refs { + if ref.SourceAbs == "" { + missing++ + if opts.verbose { + fmt.Fprintf(w, "skip image %q: no source path resolved\n", ref.SourceRel) + } + continue + } + dst := filepath.Join(opts.targetRoot, ref.TargetRel) + // Detect basename collisions with already-present, differently + // sourced files. A collision isn't fatal — the second copy + // just overwrites — but we count it for the operator. + if existing, err := os.Stat(dst); err == nil { + if same, err := sameFileContent(existing, ref.SourceAbs); err == nil && !same { + collisions++ + if opts.verbose { + fmt.Fprintf(w, "warn: %s already exists with different content (will overwrite)\n", dst) + } + } + } + if err := copyFile(ref.SourceAbs, dst); err != nil { + missing++ + if opts.strict { + return fmt.Errorf("copy %s: %w", ref.SourceAbs, err) + } + if opts.verbose { + fmt.Fprintf(w, "warn: copy %s -> %s: %v\n", ref.SourceAbs, dst, err) + } + continue + } + copied++ + } + if opts.verbose || copied > 0 || missing > 0 { + fmt.Fprintf(w, "images: %d copied, %d missing, %d collisions\n", + copied, missing, collisions) + } + return nil +} + +// copyFile reads src and writes its bytes to dst, creating any missing +// parent directories. Cheap byte-copy; image assets are small. +func copyFile(src, dst string) error { + data, err := os.ReadFile(src) + if err != nil { + return err + } + if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil { + return err + } + return os.WriteFile(dst, data, 0o644) +} + +// sameFileContent returns true when the existing target and a candidate +// source have the same size — a cheap proxy for "this is the same +// asset, no collision". A full byte-compare would be more accurate but +// we don't need that for the warning. +func sameFileContent(existing os.FileInfo, srcPath string) (bool, error) { + srcInfo, err := os.Stat(srcPath) + if err != nil { + return false, err + } + return existing.Size() == srcInfo.Size(), nil +} + +// runBatch walks --input-dir for content RST files, converts each +// using a shared run context, and writes the results under +// --output-dir at paths derived by `pathmap.Derive`. The same content +// filters as `--audit-coverage` apply: scaffolding (`index.rst`, +// `conf.py`, `*.inc`), dotfile dirs, build trees, and files outside +// `docs/replicated/` are skipped. +// +// The label index, nav index, and docs-root are loaded ONCE and shared +// across every conversion in the run. +func runBatch(w io.Writer, opts *runOptions) error { + inputDir := opts.inputDir + outputDir := opts.outputDir + if outputDir == "" { + return fmt.Errorf("--batch requires --output-dir") + } + + // Anchor the run context on the input directory so docs-root + // auto-detection still works when --input-dir lives under a + // docs-website/ tree. + ctx, err := newRunContext(w, inputDir, opts) + if err != nil { + return err + } + + type batchEntry struct { + input string + output string + } + var queue []batchEntry + var skipped, unsupported int + + walkErr := filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + base := filepath.Base(path) + if strings.HasPrefix(base, ".") || base == "_build" || + base == "target" || base == "node_modules" { + return filepath.SkipDir + } + return nil + } + if !strings.HasSuffix(path, ".rst") { + return nil + } + base := filepath.Base(path) + if base == "index.rst" || base == "conf.py" || + strings.HasSuffix(base, ".inc") || strings.HasSuffix(base, ".inc.rst") { + skipped++ + return nil + } + // Stay inside the replicated/ corpus — the rest of + // docs-website/ is build scaffolding or vendored content. + if !strings.Contains(path, "docs/replicated/") { + skipped++ + return nil + } + derived, ok := pathmap.Derive(path) + if !ok { + unsupported++ + if opts.verbose { + fmt.Fprintf(w, "no pathmap rule for %s — skipping\n", path) + } + return nil + } + out := filepath.Join(outputDir, string(derived)+".mdx") + queue = append(queue, batchEntry{input: path, output: out}) + return nil + }) + if walkErr != nil { + return walkErr + } + + if opts.verbose { + fmt.Fprintf(w, "batch: %d files queued, %d scaffolding skipped, %d without pathmap rule\n", + len(queue), skipped, unsupported) + } + + var ( + converted int + failed int + totalImages int + totalUnknown int + failures []string + ) + // Track output paths so we can report when multiple inputs map to + // the same target. With multiple RST versions side-by-side + // (canton/3.4, canton/3.5, canton/3.6), pathmap.Derive collapses + // them onto one MDX slug; the lexically-later version wins. + writes := make(map[string][]string) + for _, e := range queue { + res, err := runConvertOne(w, ctx, e.input, e.output, opts) + if err != nil { + failed++ + failures = append(failures, fmt.Sprintf("%s: %v", e.input, err)) + if opts.strict { + return fmt.Errorf("strict: %w", err) + } + continue + } + converted++ + totalImages += res.images + totalUnknown += res.unknown + writes[e.output] = append(writes[e.output], e.input) + } + + collisions := 0 + for _, sources := range writes { + if len(sources) > 1 { + collisions++ + } + } + + fmt.Fprintln(w) + fmt.Fprintln(w, "batch summary:") + fmt.Fprintf(w, " input dir: %s\n", inputDir) + fmt.Fprintf(w, " output dir: %s\n", outputDir) + fmt.Fprintf(w, " converted: %d (%d unique outputs)\n", converted, len(writes)) + fmt.Fprintf(w, " failed: %d\n", failed) + fmt.Fprintf(w, " scaffolding: %d skipped\n", skipped) + fmt.Fprintf(w, " no pathmap rule: %d skipped\n", unsupported) + fmt.Fprintf(w, " output collisions: %d (multiple inputs → same MDX path; latest wins)\n", collisions) + fmt.Fprintf(w, " image refs seen: %d\n", totalImages) + fmt.Fprintf(w, " unresolved nav links: %d\n", totalUnknown) + + if collisions > 0 && opts.verbose { + fmt.Fprintln(w) + fmt.Fprintln(w, "collisions (output path → contributing sources):") + for out, sources := range writes { + if len(sources) <= 1 { + continue + } + fmt.Fprintf(w, " %s\n", out) + for _, src := range sources { + fmt.Fprintf(w, " ← %s\n", src) + } + } + } + + if len(failures) > 0 { + fmt.Fprintln(w) + fmt.Fprintln(w, "failures:") + for _, f := range failures { + fmt.Fprintf(w, " - %s\n", f) + } + } + if failed > 0 { + return fmt.Errorf("batch finished with %d failures", failed) + } + return nil +} + +// runAudit walks every .rst file under --docs-root and reports which +// have no matching page in the live docs.json navigation, producing +// an inventory of unmigrated files. It uses the same pathmap + +// NavIndex resolution that the converter applies during cross-ref +// rewriting, so the answer reflects the tool's view of "where would +// this file land if migrated today?" +// +// Buckets: +// - direct — pathmap-derived path is registered in docs.json +// - matched — NavIndex.BestMatch finds a different page that fits +// - missing — no docs.json hit at all (candidate for migration) +// - skipped — index/conf/.inc files we never migrate +func runAudit(w io.Writer, opts *runOptions) error { + root := opts.docsRoot + if root == "" { + return fmt.Errorf("--audit-coverage requires --docs-root") + } + nav, err := loadNavIndex(w, opts) + if err != nil { + return err + } + if nav == nil { + return fmt.Errorf("--audit-coverage requires a docs.json (use --target-root or --docs-json)") + } + + var direct, matched, missing, skipped []string + walkErr := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + // Skip dotfile dirs (e.g. .venv) and Sphinx build trees. + base := filepath.Base(path) + if strings.HasPrefix(base, ".") || base == "_build" || + base == "target" || base == "node_modules" { + return filepath.SkipDir + } + return nil + } + if !strings.HasSuffix(path, ".rst") { + return nil + } + base := filepath.Base(path) + // Skip RST files that are scaffolding rather than content. + if base == "index.rst" || base == "conf.py" || + strings.HasSuffix(base, ".inc") || strings.HasSuffix(base, ".inc.rst") { + skipped = append(skipped, path) + return nil + } + // Only audit files under docs/replicated/ — the rest of the + // docs-website/ tree is build scaffolding, vendored Sphinx + // extensions, or alternative manual sources we don't migrate. + if !strings.Contains(path, "docs/replicated/") { + skipped = append(skipped, path) + return nil + } + rel := strings.TrimPrefix(path, root) + rel = strings.TrimPrefix(rel, string(filepath.Separator)) + + derived, ok := pathmap.Derive(path) + if ok && nav.HasPage(string(derived)) { + direct = append(direct, rel) + return nil + } + if best := nav.BestMatch(stripDocsWebsitePrefixForAudit(path)); best != "" { + matched = append(matched, fmt.Sprintf("%s → %s", rel, best)) + return nil + } + missing = append(missing, rel) + return nil + }) + if walkErr != nil { + return walkErr + } + + total := len(direct) + len(matched) + len(missing) + fmt.Fprintf(w, "audit summary (under %s):\n", root) + fmt.Fprintf(w, " total content RST files: %d\n", total) + fmt.Fprintf(w, " direct path match: %d\n", len(direct)) + fmt.Fprintf(w, " matched via NavIndex: %d\n", len(matched)) + fmt.Fprintf(w, " missing from docs.json: %d\n", len(missing)) + fmt.Fprintf(w, " scaffolding skipped: %d\n", len(skipped)) + fmt.Fprintln(w) + + if len(matched) > 0 && opts.verbose { + fmt.Fprintln(w, "matched (RST → docs.json page):") + for _, m := range matched { + fmt.Fprintf(w, " %s\n", m) + } + fmt.Fprintln(w) + } + + if len(missing) > 0 { + fmt.Fprintln(w, "missing — RST files with no docs.json target:") + for _, m := range missing { + fmt.Fprintf(w, " %s\n", m) + } + } + return nil +} + +// stripDocsWebsitePrefixForAudit mirrors the CLI version of the prefix +// stripper used by convert/links.go so the audit output applies the +// same NavIndex matching logic as cross-ref resolution. +func stripDocsWebsitePrefixForAudit(p string) string { + marker := "docs-website/docs/replicated/" + i := strings.LastIndex(p, marker) + if i < 0 { + return p + } + return p[i+len(marker):] +} + +// loadLabelIndex resolves docs-root (explicit flag, auto-detected, or +// nothing) and builds a label index against it. Returns nil, nil when no +// docs-root is available — conversion still works, cross-refs just fall +// back to TODO markers. +func loadLabelIndex(w io.Writer, inputPath string, opts *runOptions) (*labelindex.Index, error) { + root := opts.docsRoot + if root == "" { + root = autoDetectDocsRoot(inputPath) + } + if root == "" { + if opts.verbose { + fmt.Fprintln(w, "no docs-website/ root detected; cross-refs will emit TODO markers") + } + return nil, nil + } + if opts.verbose { + fmt.Fprintf(w, "building label index under %s…\n", root) + } + idx, err := labelindex.Build(root) + if err != nil { + return nil, fmt.Errorf("build label index: %w", err) + } + if opts.verbose { + fmt.Fprintf(w, "indexed %d labels (%d definitions across files)\n", + idx.Size(), idx.TotalDefinitions()) + } + return idx, nil +} + +// loadNavIndex resolves the docs.json path (explicit flag, or +// `/docs.json` when the file exists) and parses it. +// Returns nil, nil when no docs.json is locatable — link resolution +// then falls back to the algorithmic pathmap. +func loadNavIndex(w io.Writer, opts *runOptions) (*navindex.Index, error) { + jsonPath := opts.docsJSON + if jsonPath == "" && opts.targetRoot != "" { + candidate := filepath.Join(opts.targetRoot, "docs.json") + if _, err := os.Stat(candidate); err == nil { + jsonPath = candidate + } + } + if jsonPath == "" { + if opts.verbose { + fmt.Fprintln(w, "no docs.json found; cross-refs will use algorithmic pathmap only") + } + return nil, nil + } + if opts.verbose { + fmt.Fprintf(w, "reading nav index from %s…\n", jsonPath) + } + idx, err := navindex.Build(jsonPath) + if err != nil { + return nil, fmt.Errorf("build nav index: %w", err) + } + if opts.verbose { + fmt.Fprintf(w, "indexed %d pages from docs.json\n", idx.Size()) + } + return idx, nil +} + +// autoDetectDocsRoot walks up from the input file looking for a +// directory named `docs-website`. Returns it when found, or an empty +// string if the file isn't inside one. +func autoDetectDocsRoot(inputPath string) string { + abs, err := filepath.Abs(inputPath) + if err != nil { + return "" + } + dir := filepath.Dir(abs) + for dir != "/" && dir != "." { + if filepath.Base(dir) == "docs-website" { + return dir + } + parent := filepath.Dir(dir) + if parent == dir { + break + } + dir = parent + } + return "" +} + +func deriveOutputPath(inputPath string) string { + base := filepath.Base(inputPath) + base = strings.TrimSuffix(base, filepath.Ext(base)) + base = strings.ReplaceAll(base, "_", "-") + base = strings.ToLower(base) + return filepath.Join("./converted", base+".mdx") +} + +func firstNonEmpty(a, b string) string { + if a != "" { + return a + } + return b +} + +// normalizeSourceLabel produces the `docs-website:` form +// that matches the provenance convention in the migration guide. +// If the path doesn't live under a `docs-website/` directory, the +// original path is returned unchanged. +func normalizeSourceLabel(p string) string { + marker := "docs-website/" + idx := strings.LastIndex(p, marker) + if idx < 0 { + return p + } + rel := p[idx+len(marker):] + return "docs-website:" + rel +} diff --git a/tools/rst-to-mdx/component.windows.yaml b/tools/rst-to-mdx/component.windows.yaml new file mode 100644 index 00000000..d48b5782 --- /dev/null +++ b/tools/rst-to-mdx/component.windows.yaml @@ -0,0 +1,13 @@ +# Windows-specific manifest. The DPM component model expects per-platform +# manifests when the binary suffix differs; on Windows the binary is named +# rst-to-mdx.exe. +# +# $schema: https://raw.githubusercontent.com/DACH-NY/dpm/refs/heads/json-schema/schema/component.schema.json +apiVersion: digitalasset.com/v1 +kind: Component +spec: + commands: + - path: ./rst-to-mdx.exe + name: rst-to-mdx + desc: Convert reStructuredText files to Mintlify MDX. + aliases: [] diff --git a/tools/rst-to-mdx/component.yaml b/tools/rst-to-mdx/component.yaml new file mode 100644 index 00000000..59e5d5b7 --- /dev/null +++ b/tools/rst-to-mdx/component.yaml @@ -0,0 +1,12 @@ +# $schema: https://raw.githubusercontent.com/DACH-NY/dpm/refs/heads/json-schema/schema/component.schema.json +# DPM component manifest for the rst-to-mdx converter. +# See scripts/rst-to-mdx/README.md for development and publishing notes. + +apiVersion: digitalasset.com/v1 +kind: Component +spec: + commands: + - path: ./rst-to-mdx + name: rst-to-mdx + desc: Convert reStructuredText files to Mintlify MDX. + aliases: [] diff --git a/tools/rst-to-mdx/daml.yaml b/tools/rst-to-mdx/daml.yaml new file mode 100644 index 00000000..d1467015 --- /dev/null +++ b/tools/rst-to-mdx/daml.yaml @@ -0,0 +1,6 @@ +# Local-dev override so `dpm --help` discovers the rst-to-mdx command +# when invoked from this directory. See the "Publishing Components" docs +# in dpm/docs-internal/src/components/ for the full mechanism. +override-components: + rst-to-mdx: + local-path: . diff --git a/tools/rst-to-mdx/go.mod b/tools/rst-to-mdx/go.mod new file mode 100644 index 00000000..ae9a1df3 --- /dev/null +++ b/tools/rst-to-mdx/go.mod @@ -0,0 +1,10 @@ +module daml.com/x/dpm-components/rst-to-mdx + +go 1.22 + +require github.com/spf13/cobra v1.8.1 + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect +) diff --git a/tools/rst-to-mdx/go.sum b/tools/rst-to-mdx/go.sum new file mode 100644 index 00000000..912390a7 --- /dev/null +++ b/tools/rst-to-mdx/go.sum @@ -0,0 +1,10 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/tools/rst-to-mdx/internal/convert/admonitions.go b/tools/rst-to-mdx/internal/convert/admonitions.go new file mode 100644 index 00000000..5d5dea40 --- /dev/null +++ b/tools/rst-to-mdx/internal/convert/admonitions.go @@ -0,0 +1,143 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package convert + +import ( + "regexp" + "strings" +) + +// RST admonitions wrap short callouts. Mintlify has Note/Tip/Warning/Info +// JSX components that render as colored boxes. The mapping (per the +// migration guide): +// +// .. note:: → +// .. attention:: → +// .. tip:: → +// .. hint:: → +// .. warning:: → +// .. caution:: → +// .. danger:: → (prefixed "**Danger:** ") +// .. important:: → (prefixed "**Important:** ") +// .. seealso:: → +// .. deprecated:: → (version note prefix) +// .. versionadded:: → (version note prefix) +// .. versionchanged:: → (version note prefix) +// +// Some admonitions have content on the same line (inline) and some have +// an indented block that follows. We handle both. + +type admonitionSpec struct { + kind string // RST directive name, without :: + tag string // JSX component name (Note, Tip, Warning, Info) + prefix string // optional body prefix like "**Important:** " + argBold bool // if true, emit the directive argument as bold prefix +} + +var admonitions = []admonitionSpec{ + {kind: "note", tag: "Note"}, + {kind: "attention", tag: "Note"}, + {kind: "tip", tag: "Tip"}, + {kind: "hint", tag: "Tip"}, + {kind: "warning", tag: "Warning"}, + {kind: "caution", tag: "Warning"}, + {kind: "danger", tag: "Warning", prefix: "**Danger:** "}, + {kind: "important", tag: "Warning", prefix: "**Important:** "}, + {kind: "seealso", tag: "Info"}, + {kind: "deprecated", tag: "Warning", argBold: true}, + {kind: "versionadded", tag: "Note", argBold: true}, + {kind: "versionchanged", tag: "Note", argBold: true}, +} + +// convertAdmonitions walks the input line-by-line and rewrites RST +// admonition directives into JSX component blocks. +func convertAdmonitions(s string) string { + lines := strings.Split(s, "\n") + var out []string + + // Precompile the directive matchers for speed and to capture the + // indent + optional argument. + matchers := make(map[string]*regexp.Regexp, len(admonitions)) + for _, a := range admonitions { + matchers[a.kind] = regexp.MustCompile( + `^(\s*)\.\.\s+` + regexp.QuoteMeta(a.kind) + `::\s*(.*)$`) + } + + i := 0 + for i < len(lines) { + line := lines[i] + matched := false + for _, a := range admonitions { + m := matchers[a.kind].FindStringSubmatch(line) + if m == nil { + continue + } + indent := m[1] + arg := strings.TrimSpace(m[2]) + i++ + + body, consumed := collectAdmonitionBody(lines[i:], indent, arg, a) + i += consumed + + out = append(out, indent+"<"+a.tag+">") + out = append(out, body...) + out = append(out, indent+"") + matched = true + break + } + if matched { + continue + } + out = append(out, line) + i++ + } + return strings.Join(out, "\n") +} + +// collectAdmonitionBody gathers the indented content (or inline arg) of +// an admonition. It returns the body lines, dedented and with the +// configured prefix applied, plus how many input lines were consumed. +func collectAdmonitionBody(lines []string, parentIndent, arg string, a admonitionSpec) ([]string, int) { + // Inline form: `.. note:: some text` puts the content on the same + // line; no indented block follows. + if arg != "" { + body := arg + switch { + case a.argBold: + body = "**" + a.prefix + capitalize(a.kind) + " " + arg + ":** " + // The argument is usually a version; any text that + // follows it lives in the indented block below. + case a.prefix != "": + body = a.prefix + arg + } + // Check whether there's ALSO an indented block; if so, merge. + indented, consumed := consumeIndentedBlock(lines, parentIndent) + if len(indented) == 0 { + return []string{body}, consumed + } + var combined []string + combined = append(combined, body) + combined = append(combined, indented...) + return combined, consumed + } + + // Block form: indented lines after a blank separator. + // Skip blank separator(s). + skip := 0 + for skip < len(lines) && strings.TrimSpace(lines[skip]) == "" { + skip++ + } + body, consumed := consumeIndentedBlock(lines[skip:], parentIndent) + if a.prefix != "" && len(body) > 0 { + body[0] = a.prefix + body[0] + } + return body, skip + consumed +} + +func capitalize(s string) string { + if s == "" { + return s + } + return strings.ToUpper(s[:1]) + s[1:] +} diff --git a/tools/rst-to-mdx/internal/convert/admonitions_test.go b/tools/rst-to-mdx/internal/convert/admonitions_test.go new file mode 100644 index 00000000..f7d0c178 --- /dev/null +++ b/tools/rst-to-mdx/internal/convert/admonitions_test.go @@ -0,0 +1,84 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package convert + +import "testing" + +func TestConvertAdmonitions(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + { + name: "note block", + in: `.. note:: + + Canton Admin APIs are not the same as the admin package of gRPC.`, + want: ` +Canton Admin APIs are not the same as the admin package of gRPC. +`, + }, + { + name: "inline note", + in: `.. note:: Quick heads-up here.`, + want: ` +Quick heads-up here. +`, + }, + { + name: "warning", + in: `.. warning:: + + Running in production requires extra care.`, + want: ` +Running in production requires extra care. +`, + }, + { + name: "important maps to Warning with prefix", + in: `.. important:: + + In-memory config should not be used in production.`, + want: ` +**Important:** In-memory config should not be used in production. +`, + }, + { + name: "tip maps to Tip", + in: `.. tip:: + + Use dpm version --active to see the active SDK.`, + want: ` +Use dpm version --active to see the active SDK. +`, + }, + { + name: "hint maps to Tip", + in: `.. hint:: + + You can alias commands.`, + want: ` +You can alias commands. +`, + }, + { + name: "seealso maps to Info", + in: `.. seealso:: + + See the Canton Admin docs.`, + want: ` +See the Canton Admin docs. +`, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := convertAdmonitions(tc.in) + if got != tc.want { + t.Errorf("mismatch\nwant:\n%q\n got:\n%q", tc.want, got) + } + }) + } +} diff --git a/tools/rst-to-mdx/internal/convert/codeblocks.go b/tools/rst-to-mdx/internal/convert/codeblocks.go new file mode 100644 index 00000000..a32739ae --- /dev/null +++ b/tools/rst-to-mdx/internal/convert/codeblocks.go @@ -0,0 +1,226 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package convert + +import ( + "regexp" + "strings" +) + +// RST has three ways to introduce a code block: +// +// 1. `.. code-block:: ` directive, optionally with options, +// followed by a blank line, followed by indented content. +// 2. `.. code:: ` — older alias used heavily in the Canton and +// DPM docs. +// 3. `::` at the end of a line, introducing an indented literal block +// with no language tag. +// +// We walk the document line-by-line so we can track the indent of the +// directive and consume all lines that are more indented than it. That's +// cheaper and more correct than trying to write one regex that respects +// Python-style indentation. + +var ( + reCodeBlockDirective = regexp.MustCompile( + `^(\s*)\.\.\s+(?:code-block|code|sourcecode)::\s*([\w+\-]*)\s*$`) + reDirectiveOption = regexp.MustCompile( + `^(\s+):[A-Za-z][A-Za-z0-9_\-]*:[^\n]*$`) + // A line ending in `::` (but not a role like `:code:`) introduces + // a literal block. The capture group keeps the prefix so we can + // emit it before the fenced block opens. + reLiteralIntro = regexp.MustCompile(`^(.*[^:\s])::\s*$`) + // A line that is JUST `::` on its own (or with leading whitespace) + // also introduces a literal block — there's no prefix text to keep. + reLiteralIntroSolo = regexp.MustCompile(`^(\s*)::\s*$`) +) + +// convertCodeBlocks walks s line-by-line and rewrites `.. code-block:: lang` +// and `::` literal blocks as fenced ```lang``` blocks. +func convertCodeBlocks(s string) string { + lines := strings.Split(s, "\n") + var out []string + + i := 0 + for i < len(lines) { + line := lines[i] + + // Case 1+2: `.. code-block:: lang` or `.. code:: lang`. + if m := reCodeBlockDirective.FindStringSubmatch(line); m != nil { + indent := m[1] + lang := strings.TrimSpace(m[2]) + i++ + + // Skip directive option lines (`:linenos:` and so on). + for i < len(lines) && reDirectiveOption.MatchString(lines[i]) { + i++ + } + // Skip blank line that separates options from content. + for i < len(lines) && strings.TrimSpace(lines[i]) == "" { + i++ + } + + // Consume indented content. + body, consumed := consumeIndentedBlock(lines[i:], indent) + i += consumed + + out = append(out, "") + out = append(out, indent+"```"+lang) + out = append(out, body...) + out = append(out, indent+"```") + out = append(out, "") + continue + } + + // Case 3: line ending in `::`. We skip lines that start with + // `.. ` — those are unhandled directives that happen to end in + // `::`, not literal-block introducers (e.g. `.. tabs::`, + // `.. tab::`, custom Sphinx extensions we don't rewrite). + trimmedLeft := strings.TrimLeft(line, " \t") + if strings.HasPrefix(trimmedLeft, ".. ") { + out = append(out, line) + i++ + continue + } + // 3a: standalone `::` on its own line. + if m := reLiteralIntroSolo.FindStringSubmatch(line); m != nil { + directiveIndent := m[1] + i++ + for i < len(lines) && strings.TrimSpace(lines[i]) == "" { + i++ + } + if i >= len(lines) { + continue + } + body, consumed := consumeIndentedBlock(lines[i:], directiveIndent) + i += consumed + out = append(out, emitUnlabeledLiteral(directiveIndent, body)...) + continue + } + // 3b: text + `::` on the same line. + if m := reLiteralIntro.FindStringSubmatch(line); m != nil { + prefix := m[1] + out = append(out, prefix+":") + i++ + // Skip blank lines. + for i < len(lines) && strings.TrimSpace(lines[i]) == "" { + i++ + } + if i >= len(lines) { + continue + } + directiveIndent := leadingWS(line) + body, consumed := consumeIndentedBlock(lines[i:], directiveIndent) + i += consumed + out = append(out, emitUnlabeledLiteral(directiveIndent, body)...) + continue + } + + out = append(out, line) + i++ + } + return strings.Join(out, "\n") +} + +// consumeIndentedBlock returns all lines that are more indented than +// `parentIndent`, stopping at the first non-blank line whose indent is ≤ +// parentIndent. It also returns how many input lines were consumed. +// The returned body is dedented by the minimum common indent of the +// block so the fenced output reads naturally. +func consumeIndentedBlock(lines []string, parentIndent string) ([]string, int) { + var body []string + i := 0 + for i < len(lines) { + line := lines[i] + if strings.TrimSpace(line) == "" { + body = append(body, "") + i++ + continue + } + if !startsWithIndentDeeper(line, parentIndent) { + break + } + body = append(body, line) + i++ + } + + // Trim trailing blank lines — they belong after the fence. + for len(body) > 0 && strings.TrimSpace(body[len(body)-1]) == "" { + body = body[:len(body)-1] + } + + // Dedent by the minimum non-blank indent. + minIndent := -1 + for _, line := range body { + if strings.TrimSpace(line) == "" { + continue + } + n := len(leadingWS(line)) + if minIndent == -1 || n < minIndent { + minIndent = n + } + } + if minIndent > 0 { + for idx, line := range body { + if len(line) >= minIndent { + body[idx] = line[minIndent:] + } + } + } + + return body, i +} + +// startsWithIndentDeeper reports whether `line` is indented strictly +// further than `parentIndent`. +func startsWithIndentDeeper(line, parentIndent string) bool { + lws := leadingWS(line) + if len(lws) <= len(parentIndent) { + return false + } + // Check that parentIndent is a prefix (mixed tab/space would fail + // this test but RST doesn't mix in the Canton/Daml corpus). + return strings.HasPrefix(line, parentIndent) +} + +// leadingWS returns the leading whitespace run of a line. +func leadingWS(s string) string { + for i, r := range s { + if r != ' ' && r != '\t' { + return s[:i] + } + } + return s +} + +// emitUnlabeledLiteral chooses an MDX rendering for a `::` literal +// block (RST didn't tell us what language it is): +// +// - one non-blank content line → 4-space indented prose so the +// output reads like a small inline command, not a syntax- +// highlighted snippet (matches the human migrator's convention) +// - two or more lines → fenced ```text``` block +// +// Either way we surround the result with blank lines so it doesn't +// glue onto adjacent prose. +func emitUnlabeledLiteral(indent string, body []string) []string { + nonBlank := 0 + for _, l := range body { + if strings.TrimSpace(l) != "" { + nonBlank++ + } + } + if nonBlank == 1 { + // Find the single content line and emit it with 4-space indent. + for _, l := range body { + if strings.TrimSpace(l) != "" { + return []string{"", indent + " " + strings.TrimSpace(l), ""} + } + } + } + out := []string{"", indent + "```text"} + out = append(out, body...) + out = append(out, indent+"```", "") + return out +} diff --git a/tools/rst-to-mdx/internal/convert/codeblocks_test.go b/tools/rst-to-mdx/internal/convert/codeblocks_test.go new file mode 100644 index 00000000..8b946dcc --- /dev/null +++ b/tools/rst-to-mdx/internal/convert/codeblocks_test.go @@ -0,0 +1,127 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package convert + +import ( + "strings" + "testing" +) + +func TestConvertCodeBlocks(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + { + name: "code-block directive with language", + in: `Before. + +.. code-block:: bash + + dpm install 3.4.11 + dpm version + +After.`, + want: `Before. + + +` + "```bash" + ` +dpm install 3.4.11 +dpm version +` + "```" + ` + +After.`, + }, + { + name: "code directive alias", + in: `.. code:: yaml + + sdk-version: 3.4.11 + dependencies: []`, + want: ` +` + "```yaml" + ` +sdk-version: 3.4.11 +dependencies: [] +` + "```" + ` +`, + }, + { + name: "multi-line :: literal block fences as text", + in: `Clone the repo:: + + git clone https://example.com/x.git + cd x + +Then build it.`, + want: `Clone the repo: + +` + "```text" + ` +git clone https://example.com/x.git +cd x +` + "```" + ` + +Then build it.`, + }, + { + name: "single-line :: literal block uses 4-space indent", + in: `Open the URL: + +:: + + app-provider.localhost:3000 + +Then continue.`, + want: `Open the URL: + + + app-provider.localhost:3000 + +Then continue.`, + }, + { + name: "single-line :: with text prefix uses 4-space indent", + in: `Run from quickstart/:: + + make open-app-ui + +Done.`, + want: `Run from quickstart/: + + make open-app-ui + +Done.`, + }, + { + name: "options are stripped", + in: `.. code-block:: python + :linenos: + :emphasize-lines: 2 + + def foo(): + pass`, + want: ` +` + "```python" + ` +def foo(): + pass +` + "```" + ` +`, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := convertCodeBlocks(tc.in) + if !equalTrim(got, tc.want) { + t.Errorf("mismatch\nwant:\n%s\n got:\n%s", tc.want, got) + } + }) + } +} + +// equalTrim ignores trailing whitespace differences because the various +// transform phases each fiddle with surrounding blank lines. The cleanup +// pass collapses them; these unit tests care about structural equality. +func equalTrim(a, b string) bool { + return strings.TrimRight(a, "\n ") == strings.TrimRight(b, "\n ") +} diff --git a/tools/rst-to-mdx/internal/convert/comments.go b/tools/rst-to-mdx/internal/convert/comments.go new file mode 100644 index 00000000..1f9abec8 --- /dev/null +++ b/tools/rst-to-mdx/internal/convert/comments.go @@ -0,0 +1,135 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package convert + +import ( + "regexp" + "strings" +) + +// RST comments come in two forms: +// +// .. Single line comment +// +// .. +// Multi-line comment +// indented under the dots. +// +// This transform runs near the end of the pipeline, AFTER directive +// stripping (so we don't match a directive's `::` prefix) and admonition +// conversion (so `.. note::` etc. are already gone). That leaves only +// true comments by the time we get here. +// +// Any `.. ::` that survived is an unknown directive. We leave those +// untouched so a reader can spot them and fix the converter. + +var ( + // `.. something` where "something" doesn't look like a directive + // head (directive heads end in `::`). Go's regexp doesn't support + // negative lookahead, so we detect directives with a separate + // pattern and skip. + reCommentLead = regexp.MustCompile(`^\.\.\s+(.+)$`) + reDirectiveLead = regexp.MustCompile(`^\.\.\s+[A-Za-z][A-Za-z0-9_\-]*::`) + reMultiCommentHd = regexp.MustCompile(`^\.\.\s*$`) + // `.. [N] body` is RST footnote syntax — the body should render + // as visible text, not get hidden as a comment. The `N` can be a + // number, `*`, `#`, or a name. + reFootnote = regexp.MustCompile(`^\.\.\s+\[([^\]]+)\]\s+(.+)$`) +) + +func convertComments(s string) string { + lines := strings.Split(s, "\n") + var out []string + i := 0 + for i < len(lines) { + line := lines[i] + + // Multi-line: `..` on its own, then indented body. + // RST's rule: the comment body continues until a line returns + // to or below the column where `..` sits. Blank lines do NOT + // terminate the body — they're part of it as long as content + // at body-indent or deeper resumes after them. + if reMultiCommentHd.MatchString(line) { + parentIndent := leadingWS(line) + i++ + indent := "" + var body []string + for i < len(lines) { + cur := lines[i] + if strings.TrimSpace(cur) == "" { + // Look ahead past consecutive blank lines. + j := i + 1 + for j < len(lines) && strings.TrimSpace(lines[j]) == "" { + j++ + } + if j >= len(lines) { + // File ends inside the comment. + break + } + nextIndent := leadingWS(lines[j]) + // If the next non-blank line returns to or below + // the directive's parent indent, the comment ends. + if len(nextIndent) <= len(parentIndent) { + break + } + // Still inside the comment — keep the blank line + // in the body. + body = append(body, "") + i++ + continue + } + lws := leadingWS(cur) + if indent == "" { + indent = lws + } + // A line with strictly less indent than the body + // indent ends the comment. + if len(lws) < len(indent) { + break + } + body = append(body, strings.TrimPrefix(cur, indent)) + i++ + } + joined := strings.Join(body, "\n") + joined = sanitizeCommentBody(joined) + if strings.Contains(joined, "\n") { + out = append(out, "{/*\n"+joined+"\n*/}") + } else { + out = append(out, "{/* "+joined+" */}") + } + continue + } + + // `.. [N] body`: RST footnote, render the body as visible text + // with a bracketed marker so the reader still sees the + // reference. Must come before the generic comment branch. + if m := reFootnote.FindStringSubmatch(line); m != nil { + out = append(out, "["+m[1]+"] "+m[2]) + i++ + continue + } + + // Single-line `.. text`: only if NOT a directive head. + if m := reCommentLead.FindStringSubmatch(line); m != nil { + if !reDirectiveLead.MatchString(line) { + out = append(out, "{/* "+m[1]+" */}") + i++ + continue + } + } + + out = append(out, line) + i++ + } + return strings.Join(out, "\n") +} + +// sanitizeCommentBody escapes `*/` sequences so they don't terminate +// the surrounding `{/* ... */}` MDX comment early. We replace each +// `*/` with `*\/` which is invisible to JSX but breaks the close +// pattern. Comments are rendering-only so the visual result is the +// same. +func sanitizeCommentBody(s string) string { + return strings.ReplaceAll(s, "*/", "*\\/") +} diff --git a/tools/rst-to-mdx/internal/convert/comments_test.go b/tools/rst-to-mdx/internal/convert/comments_test.go new file mode 100644 index 00000000..5c0be643 --- /dev/null +++ b/tools/rst-to-mdx/internal/convert/comments_test.go @@ -0,0 +1,31 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package convert + +import "testing" + +func TestConvertComments(t *testing.T) { + cases := []struct { + name, in, want string + }{ + { + name: "single line", + in: `.. This is a stray note`, + want: `{/* This is a stray note */}`, + }, + { + name: "does not rewrite unknown directive", + in: `.. unknowndirective::`, + want: `.. unknowndirective::`, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := convertComments(tc.in) + if got != tc.want { + t.Errorf("want %q got %q", tc.want, got) + } + }) + } +} diff --git a/tools/rst-to-mdx/internal/convert/convert.go b/tools/rst-to-mdx/internal/convert/convert.go new file mode 100644 index 00000000..995cd8b9 --- /dev/null +++ b/tools/rst-to-mdx/internal/convert/convert.go @@ -0,0 +1,141 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +// Package convert turns a reStructuredText document into a Mintlify MDX +// document. The pipeline is a sequence of text transforms, each of which +// lives in its own file alongside a focused test suite. +// +// Pipeline order matters. Transforms that wrap content into fenced blocks +// or JSX components (code blocks, admonitions) run before inline role +// transforms so we don't rewrite content that will end up inside a fence. +// Comment conversion runs last so it doesn't swallow real directives. +package convert + +import ( + "fmt" + "strings" + + "daml.com/x/dpm-components/rst-to-mdx/internal/include" + "daml.com/x/dpm-components/rst-to-mdx/internal/labelindex" + "daml.com/x/dpm-components/rst-to-mdx/internal/navindex" +) + +// Options controls a single conversion. +type Options struct { + // Title overrides the auto-detected page title. Empty means + // auto-detect from the first RST heading. + Title string + // Description sets the frontmatter `description:` field. + Description string + // SourceLabel is the provenance label (typically the source RST + // path relative to docs-website/). + SourceLabel string + // SourcePath is the on-disk path of the RST file being converted. + // Used for resolving relative paths in literalinclude and images + // (Phase 3+) and for the cross-reference resolver to prefer + // same-version-tree label definitions. + SourcePath string + // LabelIndex, if non-nil, resolves `:ref:`, `:externalref:`, + // `:subsiteref:`, and `:brokenref:` targets to concrete MDX URLs + // via the Phase-2 label index + pathmap. + LabelIndex *labelindex.Index + // NavIndex, if non-nil, holds the page paths registered in the + // target docs site's docs.json. When supplied, cross-reference + // resolution prefers a NavIndex hit over a pathmap-derived path + // so links land on real pages. + NavIndex *navindex.Index + // DocsRoot is the filesystem root of docs-website/, used to + // resolve absolute `.. include::` and `.. literalinclude::` paths + // (i.e. paths beginning with `/`). Optional. + DocsRoot string + // Strict fails the conversion on unresolved :ref:, missing + // literalinclude targets, or unrecognized directives. + Strict bool +} + +// Result is what Convert returns: the rewritten MDX bytes plus any +// side data the caller needs to act on after conversion (currently +// just the list of image references found in the source so the CLI +// can copy assets). +type Result struct { + // Body is the converted MDX file content. + Body []byte + // Images lists every `.. image::` and `.. figure::` directive + // the converter saw. Populated regardless of `--copy-images`; + // it's the CLI's job to act on them. + Images []ImageRef +} + +// Convert transforms an RST document into an MDX document. The byte +// output is deterministic — same input + same options produces the same +// bytes — so callers can use it for golden-file testing. +func Convert(rst []byte, opts Options) (*Result, error) { + if len(rst) == 0 { + return nil, fmt.Errorf("empty input") + } + + // Normalize line endings so every downstream transform only has to + // worry about '\n'. + body := strings.ReplaceAll(string(rst), "\r\n", "\n") + + // Capture image references off the RAW RST before transforms run, + // so we still see the original directive shapes (including option + // blocks for :alt: text). The rewriter in images.go then mutates + // the directive lines independently — they only need to agree on + // the basename. + images := extractImageRefs(body, opts.SourcePath) + + // Resolve file-system includes BEFORE any transform runs so the + // spliced content flows through the whole pipeline. literalinclude + // is rewritten as a `.. code-block::` directive that the downstream + // codeblocks transform handles. + body, err := include.Resolve(body, include.Options{ + SourcePath: opts.SourcePath, + DocsRoot: opts.DocsRoot, + Strict: opts.Strict, + }) + if err != nil { + return nil, fmt.Errorf("resolve includes: %w", err) + } + + // The pipeline. Order matters — see package doc. + body = stripCopyrightHeader(body) + body = stripSimpleDirectives(body) + body = stripLabels(body) + // Collapse RST `..` comment blocks before any JSX-emitting transform + // runs. If we let admonitions/figures/headings rewrite content that + // lives inside an RST comment, we end up with `` / `` + // tags whose closes straddle the comment boundary — Mintlify then + // errors with "unexpected closing slash" or "expected an open tag". + body = convertComments(body) + // `.. wip::` runs before heading detection because its body is + // indented; dedenting first lets nested `=== underlines` register + // as real headings downstream. + body = convertWip(body) + body = convertTodo(body) + body = convertToggle(body) + body = convertRawHTMLVideo(body) + body = convertYoutube(body) + body = convertTabs(body) + body = convertTableTitle(body) + body = convertTables(body) + body = convertHeadings(body) + body = convertCodeBlocks(body) + body = convertAdmonitions(body) + body = convertImages(body) + // Everything from here on runs on a document that already contains + // fenced code blocks. Wrap each transform so it only touches prose. + body = transformOutsideFences(body, func(s string) string { return convertLinks(s, opts) }) + body = transformOutsideFences(body, convertInlineRoles) + body = transformOutsideFences(body, convertLists) + body = transformOutsideFences(body, convertRubric) + body = normalizeLanguages(body) + body = stripDoubleBackticksInFences(body) + body = escapeMDXPlaceholders(body) + body = cleanupWhitespace(body) + + return &Result{ + Body: composeOutput(body, rst, opts), + Images: images, + }, nil +} diff --git a/tools/rst-to-mdx/internal/convert/directives.go b/tools/rst-to-mdx/internal/convert/directives.go new file mode 100644 index 00000000..7911be07 --- /dev/null +++ b/tools/rst-to-mdx/internal/convert/directives.go @@ -0,0 +1,619 @@ +// Copyright (c) 2026 Digital Asset (Switzerland) GmbH and/or its affiliates. +// SPDX-License-Identifier: Apache-2.0 + +package convert + +import ( + "regexp" + "strings" +) + +var ( + // Copyright/SPDX header at the very top of the file. + // .. + // Copyright (c) … + // .. + // SPDX-License-Identifier: … + // is converted to an MDX comment so provenance isn't lost. + reCopyrightHeader = regexp.MustCompile( + `(?s)\A\.\.\s*\n\s+Copyright\s+[^\n]+\n(?:\.\.\s*\n\s+SPDX[^\n]+\n)?`) + + // :orphan: directive (whole line). + reOrphan = regexp.MustCompile(`(?m)^:orphan:\s*$\n?`) + + // .. contents:: [title] + // :option: value + // … + // Consumes the directive line plus any immediately-following option + // lines (starting with three+ spaces and a colon). + reContents = regexp.MustCompile( + `(?m)^\.\.\s+contents::[^\n]*\n(?:[ \t]+:[^\n]+\n)*`) + + // .. toctree:: + // :option: + // page1 + // page2 + // Consumes the directive + indented block until a non-indented line. + reToctree = regexp.MustCompile( + `(?m)^\.\.\s+toctree::[^\n]*\n(?:[ \t]+[^\n]*\n|\s*\n)*`) + + // .. _label-name: + reLabel = regexp.MustCompile( + `(?m)^\.\.\s+_[A-Za-z0-9][A-Za-z0-9_\- ]*:\s*$\n?`) + +) + +// reTodoStart matches `.. todo::` with an optional inline summary on the +// same line. Body lines (if any) are indented under it. The inline +// portion is often a `` issue link. +var reTodoStart = regexp.MustCompile(`^(\s*)\.\.\s+todo::(?:\s+(.*?))?\s*$`) + +// stripCopyrightHeader converts the standard Canton/Daml copyright +// comment at the top of an RST file into an MDX comment so we keep the +// attribution but don't render it. +func stripCopyrightHeader(s string) string { + m := reCopyrightHeader.FindStringSubmatch(s) + if m == nil { + return s + } + header := m[0] + // Extract the copyright + SPDX lines and emit them as one comment. + var kept []string + for _, line := range strings.Split(header, "\n") { + t := strings.TrimSpace(line) + if strings.HasPrefix(t, "Copyright") || strings.HasPrefix(t, "SPDX") { + kept = append(kept, t) + } + } + rest := strings.TrimLeft(s[len(header):], "\n") + replacement := "" + if len(kept) > 0 { + replacement = "{/* " + strings.Join(kept, " — ") + " */}\n\n" + } + return replacement + rest +} + +// stripSimpleDirectives removes directives that have no MDX counterpart: +// contents (Mintlify auto-TOCs), toctree (docs.json handles nav), orphan +// (not expressible in MDX). `.. todo::` is intentionally NOT stripped — +// see convertTodo, which renders todos as a visible `` so readers +// can see pending work and follow any linked issue. +func stripSimpleDirectives(s string) string { + s = reOrphan.ReplaceAllString(s, "") + s = reContents.ReplaceAllString(s, "") + s = reToctree.ReplaceAllString(s, "") + return s +} + +// stripLabels removes `.. _label-name:` anchors. Their targets will be +// resolved by the Phase-2 label index; the label line itself produces no +// MDX output because Mintlify auto-generates anchors from headings. +func stripLabels(s string) string { + return reLabel.ReplaceAllString(s, "") +} + +// reRubric matches `.. rubric:: Heading` — a non-TOC inline heading. We +// emit it as bold text so it still stands out without appearing in the +// right-sidebar TOC. +var reRubric = regexp.MustCompile(`(?m)^\.\.\s+rubric::\s+(.+)$`) + +// reTableTitle matches `.. table:: Title` — an RST wrapper that +// decorates a following grid/list/csv table with a title. We emit the +// title as bold above the table that follows. +var reTableTitle = regexp.MustCompile(`(?m)^(\s*)\.\.\s+table::\s+(.+?)\s*$`) + +// reYoutubeStart matches `.. youtube:: ` opening the +// directive. The line walker below consumes the directive plus any +// indented `:option:` lines that follow. +var reYoutubeStart = regexp.MustCompile( + `^(\s*)\.\.\s+youtube::\s+([A-Za-z0-9_\-]+)\s*$`) + +// reAnyHeading matches an RST underline heading shape so we can use the +// most recent one as the iframe title. +var reAnyHeadingTitle = regexp.MustCompile(`^([=\-~^"]{3,})\s*$`) + +// reToggleStart matches `.. toggle::` — the sphinx-togglebutton directive +// that wraps indented content in a click-to-expand button. An optional +// argument on the same line becomes the accordion title; otherwise we +// use a generic default. Mintlify's `` is the natural target +// because it has the same expand/collapse UX. +var reToggleStart = regexp.MustCompile(`^(\s*)\.\.\s+toggle::\s*(.*)$`) + +// reWipStart matches `.. wip::` — Canton's custom Sphinx directive +// for "work in progress" content. Body lines are indented under the +// directive; convertWip wraps them in an Info admonition with a +// `**WIP:**` prefix and dedents the body so any nested headings, +// code blocks, and admonitions flow through the rest of the pipeline +// like ordinary content. We treat the WIP block as a hint to the +// reader, not as content to drop. +var reWipStart = regexp.MustCompile(`^(\s*)\.\.\s+wip::\s*$`) + +// reRawHTMLStart matches `.. raw:: html`. Sphinx's `raw` directive +// passes its indented body straight through to the configured output +// format. Most uses in the corpus are `