From 6098bd9a1a48211aac3b1205b6adc71aee7c19ac Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Wed, 22 Apr 2026 23:13:18 +0800 Subject: [PATCH] lint: Add ast-grep check for visibility-macro annotations Add an ast-grep lint for the MLD_INTERNAL_API / MLD_EXTERNAL_API / MLD_API_QUALIFIER convention on externally-linked declarations under mldsa/src/ (functions for now; data definitions not yet covered). Rule lives in scripts/.ast-grep/mldsa-declarations.yml as a multi-document YAML with two rules (function definition + function prototype). Each rule matches on tree-sitter-c node kinds (function_definition / declaration), excludes the three accepted annotation macros, static functions, and assembly entry points (*_asm, empty_cu_*). scripts/lint gains a check-ast-grep step (placed right after the C clang-format step); nix/util.nix's linters package picks up the ast-grep binary. Invocation: ast-grep scan --rule scripts/.ast-grep/mldsa-declarations.yml . Rationale for ast-grep over alternatives considered: - CodeQL: powerful but requires a full build + database; overkill for a lint rule and pulls in a non-OSS CLI. - Semgrep: C parser chokes on unknown macros and lacks sibling- relationship matchers, forcing return-type enumeration and implicit reliance on parser quirks. - ast-grep: tree-sitter-c parses reliably; rule matches on AST node kinds and names required annotations literally via regex. Known limitations: - Unlike CodeQL (which only sees compiled code), ast-grep scans all source text, so declarations inside #if blocks (e.g. MLDSA_DEBUG, native backend headers) are also flagged. Either annotate them or scope the `files:` globs narrower in follow-up. - A `static MLD_INLINE ...` combination parses with `static` outside the function_definition AST node, so the rule falls back to a text-regex and a `follows:` sibling check. Co-Authored-By: Claude Opus 4.7 (1M context) --- nix/util.nix | 3 +- scripts/.ast-grep/mldsa-declarations.yml | 103 +++++++++++++++++++++++ scripts/lint | 24 ++++++ 3 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 scripts/.ast-grep/mldsa-declarations.yml diff --git a/nix/util.nix b/nix/util.nix index 1283b0d4a..2a9b862a9 100644 --- a/nix/util.nix +++ b/nix/util.nix @@ -88,7 +88,8 @@ rec { shfmt shellcheck actionlint - ruff; + ruff + ast-grep; inherit (pkgs.python3Packages) mpmath sympy pyparsing pyyaml rich; diff --git a/scripts/.ast-grep/mldsa-declarations.yml b/scripts/.ast-grep/mldsa-declarations.yml new file mode 100644 index 000000000..fd5235a2e --- /dev/null +++ b/scripts/.ast-grep/mldsa-declarations.yml @@ -0,0 +1,103 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT +# +# ast-grep rules enforcing visibility-macro annotations on every +# externally-linked declaration under mldsa/src/. See mldsa/src/common.h +# for the macro definitions. +# +# Invoked by scripts/lint via `ast-grep scan --rule`. + +id: mldsa-function-def-missing-visibility +language: c +files: + - 'mldsa/src/**/*.c' + - 'mldsa/src/**/*.h' +rule: + all: + - kind: function_definition + - not: + has: + kind: type_identifier + regex: '^(MLD_INTERNAL_API|MLD_EXTERNAL_API|MLD_API_QUALIFIER)$' + stopBy: end + - not: + follows: + stopBy: neighbor + has: + kind: identifier + regex: '^(MLD_INTERNAL_API|MLD_EXTERNAL_API|MLD_API_QUALIFIER)$' + stopBy: end + - not: + has: + kind: storage_class_specifier + regex: '^static$' + stopBy: end + - not: + follows: + stopBy: neighbor + has: + kind: storage_class_specifier + regex: '^static$' + stopBy: end + - not: + regex: '^\s*(static|MLD_STATIC_TESTABLE)\b' + - not: + has: + kind: identifier + regex: '^(empty_cu_|.*_asm$)' + stopBy: end +severity: error +message: |- + Function definition has external linkage but is not preceded by + MLD_INTERNAL_API, MLD_EXTERNAL_API, or MLD_API_QUALIFIER + (see mldsa/src/common.h). + +--- + +id: mldsa-function-proto-missing-visibility +language: c +files: + - 'mldsa/src/**/*.c' + - 'mldsa/src/**/*.h' +rule: + all: + - kind: declaration + - has: + kind: function_declarator + stopBy: end + - not: + has: + kind: type_identifier + regex: '^(MLD_INTERNAL_API|MLD_EXTERNAL_API|MLD_API_QUALIFIER)$' + stopBy: end + - not: + follows: + stopBy: neighbor + has: + kind: identifier + regex: '^(MLD_INTERNAL_API|MLD_EXTERNAL_API|MLD_API_QUALIFIER)$' + stopBy: end + - not: + has: + kind: storage_class_specifier + regex: '^static$' + stopBy: end + - not: + follows: + stopBy: neighbor + has: + kind: storage_class_specifier + regex: '^static$' + stopBy: end + - not: + regex: '^\s*(static|MLD_STATIC_TESTABLE)\b' + - not: + has: + kind: identifier + regex: '^(empty_cu_|.*_asm$)' + stopBy: end +severity: error +message: |- + Function prototype has external linkage but is not preceded by + MLD_INTERNAL_API, MLD_EXTERNAL_API, or MLD_API_QUALIFIER + (see mldsa/src/common.h). diff --git a/scripts/lint b/scripts/lint index cff911ee3..e1eb268fc 100755 --- a/scripts/lint +++ b/scripts/lint @@ -195,6 +195,30 @@ lint-c-files() checkerr "Lint C" "$(lint-c-files)" gh_group_end +check-ast-grep() +{ + if ! command -v ast-grep >/dev/null; then + gh_error_simple "ast-grep missing" "ast-grep is not installed" + error "Check ast-grep" + SUCCESS=false + gh_summary_failure "Check ast-grep" + return 0 + fi + + if (cd "$ROOT" && ast-grep scan --rule scripts/.ast-grep/mldsa-declarations.yml .); then + info "Check ast-grep" + gh_summary_success "Check ast-grep" + else + error "Check ast-grep" + gh_summary_failure "Check ast-grep" + SUCCESS=false + fi +} + +gh_group_start "Check ast-grep (visibility-macro annotations)" +check-ast-grep +gh_group_end + check-eol-dry-run() { for file in $(git ls-files -- ":/" ":/!:*.png"); do