diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a45168..3f2025e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,39 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.4] - 2026-03-20 + +### Added + +- `Style::Plain` — new style variant returned by `detect_style` for docstrings + that contain no NumPy section underlines or Google section headers (e.g. + summary-only docstrings, Sphinx-style docstrings). +- `SyntaxKind::PLAIN_DOCSTRING` — root node kind for plain-style parse trees. +- `parse_plain(input)` — lightweight parser that extracts only a `SUMMARY` and + an optional `EXTENDED_SUMMARY` token from the input, without attempting + section detection. +- `parse(input)` — unified entry point that calls `detect_style` and dispatches + to `parse_google`, `parse_numpy`, or `parse_plain` automatically. +- `PlainDocstring` typed wrapper with `summary()` and `extended_summary()` + accessors (mirrors the existing `GoogleDocstring` / `NumPyDocstring` API). +- Python bindings: `Style.PLAIN`, `SyntaxKind.PLAIN_DOCSTRING`, `PlainDocstring` + class, and `parse_plain(input)` function. +- Google parser: zero-length `DESCRIPTION` token emitted when a colon is + present but no description text follows (e.g. `a (int):`, `a:`), and + zero-length `TYPE` token emitted for empty brackets `()`. +- NumPy parser: zero-length `TYPE` token emitted when a colon is present but + type text is absent (e.g. `a :`); zero-length `DEFAULT_VALUE` token emitted + when a default separator is present but no value follows (e.g. `default =`). + Callers can use `find_missing(KIND)` to detect these absent-but-declared + slots without inspecting surrounding tokens. +- `examples/parse_auto.rs` — demonstrates the unified `parse()` entry point + with Google, NumPy, and plain-style inputs. + +### Changed + +- `detect_style` rewritten as a single O(n) pass; returns `Style::Plain` as the + fallback instead of `Style::Google`. + ## [0.1.3] - 2026-03-19 ### Added @@ -94,6 +127,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Zero external crate dependencies - Python bindings via PyO3 (`pydocstring-rs`) +[0.1.4]: https://github.com/qraqras/pydocstring/compare/v0.1.3...v0.1.4 +[0.1.3]: https://github.com/qraqras/pydocstring/compare/v0.1.2...v0.1.3 [0.1.2]: https://github.com/qraqras/pydocstring/compare/v0.1.1...v0.1.2 [0.1.1]: https://github.com/qraqras/pydocstring/compare/v0.1.0...v0.1.1 [0.1.0]: https://github.com/qraqras/pydocstring/releases/tag/v0.1.0 diff --git a/Cargo.lock b/Cargo.lock index fba3b52..87d0ea9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,4 +4,4 @@ version = 4 [[package]] name = "pydocstring" -version = "0.1.3" +version = "0.1.4" diff --git a/Cargo.toml b/Cargo.toml index c00ebde..8933ef2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pydocstring" -version = "0.1.3" +version = "0.1.4" edition = "2024" authors = ["Ryuma Asai"] description = "A zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations" diff --git a/README.md b/README.md index da94e11..00cb04d 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,13 @@ Python bindings are also available as [`pydocstring-rs`](https://pypi.org/projec - **Byte-precise source locations** — every token carries its exact byte range for pinpoint diagnostics - **Zero dependencies** — pure Rust, no external crates, no regex - **Error-resilient** — never panics; malformed input still yields a best-effort tree -- **Style auto-detection** — hand it a docstring, it tells you the convention +- **Style auto-detection** — hand it a docstring, get back `Style::Google`, `Style::NumPy`, or `Style::Plain` ## Installation ```toml [dependencies] -pydocstring = "0.1.3" +pydocstring = "0.1.4" ``` ## Usage @@ -60,6 +60,25 @@ use pydocstring::parse::{detect_style, Style}; assert_eq!(detect_style("Summary.\n\nArgs:\n x: Desc."), Style::Google); assert_eq!(detect_style("Summary.\n\nParameters\n----------\nx : int"), Style::NumPy); +assert_eq!(detect_style("Just a summary."), Style::Plain); +``` + +`Style::Plain` covers docstrings with no recognised section markers: summary-only, +summary + extended summary, and unrecognised styles such as Sphinx. + +### Unified Auto-Detecting Parser + +Use `parse()` to let the library detect the style and parse in one step: + +```rust +use pydocstring::parse::parse; +use pydocstring::syntax::SyntaxKind; + +let result = parse("Summary.\n\nArgs:\n x: Desc."); +assert_eq!(result.root().kind(), SyntaxKind::GOOGLE_DOCSTRING); + +let result = parse("Just a summary."); +assert_eq!(result.root().kind(), SyntaxKind::PLAIN_DOCSTRING); ``` ### Source Locations @@ -215,13 +234,14 @@ Both styles support the following section categories. Typed accessor methods are | Methods | `methods()` → `GoogleMethod` | `methods()` → `NumPyMethod` | | Free text (Notes, Examples, etc.) | `body_text()` | `body_text()` | -Root-level accessors: `summary()`, `extended_summary()` (NumPy also has `deprecation()`). +Root-level accessors: `summary()`, `extended_summary()` (NumPy also has `deprecation()`). `PlainDocstring` exposes only `summary()` and `extended_summary()`. ## Development ```bash cargo build cargo test +cargo run --example parse_auto cargo run --example parse_google cargo run --example parse_numpy ``` diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 25605ea..f6e6773 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -67,11 +67,11 @@ dependencies = [ [[package]] name = "pydocstring" -version = "0.1.2" +version = "0.1.4" [[package]] name = "pydocstring-python" -version = "0.1.2" +version = "0.1.4" dependencies = [ "pydocstring", "pyo3", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 6bc550f..be50569 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pydocstring-python" -version = "0.1.3" +version = "0.1.4" edition = "2024" authors = ["Ryuma Asai"] description = "Python bindings for pydocstring — a fast docstring parser for Google and NumPy styles" @@ -12,5 +12,5 @@ name = "pydocstring" crate-type = ["cdylib"] [dependencies] -pydocstring_core = { package = "pydocstring", version = "0.1.3", path = "../.." } +pydocstring_core = { package = "pydocstring", version = "0.1.4", path = "../.." } pyo3 = { version = "0.24", features = ["extension-module"] } diff --git a/bindings/python/README.md b/bindings/python/README.md index fa94f9d..f02ffb9 100644 --- a/bindings/python/README.md +++ b/bindings/python/README.md @@ -16,7 +16,7 @@ Produces a **unified syntax tree** with **byte-precise source locations** on eve - **Byte-precise source locations** — every token carries its exact byte range for pinpoint diagnostics - **Powered by Rust** — native extension with no Python runtime overhead - **Error-resilient** — never raises exceptions; malformed input still yields a best-effort tree -- **Style auto-detection** — hand it a docstring, get back `Style.GOOGLE` or `Style.NUMPY` +- **Style auto-detection** — hand it a docstring, get back `Style.GOOGLE`, `Style.NUMPY`, or `Style.PLAIN` ## Installation @@ -26,6 +26,37 @@ pip install pydocstring-rs ## Usage +### Unified Parse (auto-detect) + +Use `parse()` when you don't know the style in advance. +The returned object has a `.style` property so you can dispatch without `isinstance` checks: + +```python +from pydocstring import parse, Style + +doc = parse(source) + +match doc.style: + case Style.GOOGLE: + for arg in doc.sections[0].args: + print(arg.name.text, arg.description.text) + case Style.NUMPY: + for param in doc.sections[0].parameters: + print([n.text for n in param.names], param.description.text) + case Style.PLAIN: + print(doc.summary.text) +``` + +When you only need the style-independent model, no dispatch is necessary: + +```python +model = parse(source).to_model() # works for all three styles +``` + +If you already know the style, prefer the explicit functions `parse_google()`, +`parse_numpy()`, or `parse_plain()` — they return a concrete type and are +slightly more efficient. + ### Style Detection ```python @@ -33,8 +64,32 @@ from pydocstring import detect_style, Style detect_style("Summary.\n\nArgs:\n x: Desc.") # Style.GOOGLE detect_style("Summary.\n\nParameters\n----------\n") # Style.NUMPY +detect_style("Just a summary.") # Style.PLAIN ``` +`Style.PLAIN` covers docstrings with no recognised section markers: +summary-only, summary + extended, and unrecognised styles such as Sphinx. + +### Plain Style + +Docstrings with no NumPy or Google section markers are parsed as plain: + +```python +from pydocstring import parse_plain + +doc = parse_plain("""Brief summary. + +More detail here. +Spanning multiple lines. +""") + +print(doc.summary.text) # "Brief summary." +print(doc.extended_summary.text) # "More detail here.\nSpanning multiple lines." +``` + +Unrecognised styles such as Sphinx are also treated as plain: the `:param:` +lines are preserved verbatim in `extended_summary`. + ### Google Style ```python @@ -230,25 +285,28 @@ print(numpy_text) # Contains "Parameters\n----------" ### Functions -| Function | Returns | Description | -|----------------------|-------------------|------------------------------------------------| -| `parse_google(text)` | `GoogleDocstring` | Parse a Google-style docstring | -| `parse_numpy(text)` | `NumPyDocstring` | Parse a NumPy-style docstring | -| `detect_style(text)` | `Style` | Detect style: `Style.GOOGLE` or `Style.NUMPY` | -| `emit_google(doc)` | `str` | Emit a `Docstring` model as Google-style text | -| `emit_numpy(doc)` | `str` | Emit a `Docstring` model as NumPy-style text | +| Function | Returns | Description | +|----------------------|-------------------------------------------------|---------------------------------------------------------------| +| `parse(text)` | `GoogleDocstring \| NumPyDocstring \| PlainDocstring` | Auto-detect style and parse | +| `parse_google(text)` | `GoogleDocstring` | Parse a Google-style docstring | +| `parse_numpy(text)` | `NumPyDocstring` | Parse a NumPy-style docstring | +| `parse_plain(text)` | `PlainDocstring` | Parse a plain docstring (no section markers) | +| `detect_style(text)` | `Style` | Detect style: `Style.GOOGLE`, `Style.NUMPY`, or `Style.PLAIN` | +| `emit_google(doc)` | `str` | Emit a `Docstring` model as Google-style text | +| `emit_numpy(doc)` | `str` | Emit a `Docstring` model as NumPy-style text | ### Objects | Class | Key Properties | |-------------------|------------------------------------------------------------------------------------------------------------------| -| `Style` | `GOOGLE`, `NUMPY` (enum) | -| `GoogleDocstring` | `summary`, `extended_summary`, `sections`, `node`, `source`, `pretty_print()`, `to_model()` | +| `Style` | `GOOGLE`, `NUMPY`, `PLAIN` (enum) | +| `GoogleDocstring` | `style`, `summary`, `extended_summary`, `sections`, `node`, `source`, `pretty_print()`, `to_model()` | | `GoogleSection` | `kind`, `args`, `returns`, `exceptions`, `body_text`, `node` | | `GoogleArg` | `name`, `type`, `description`, `optional` | | `GoogleReturns` | `return_type`, `description` | | `GoogleException` | `type`, `description` | -| `NumPyDocstring` | `summary`, `extended_summary`, `sections`, `node`, `source`, `pretty_print()`, `to_model()` | +| `PlainDocstring` | `style`, `summary`, `extended_summary`, `node`, `source`, `pretty_print()`, `to_model()` | +| `NumPyDocstring` | `style`, `summary`, `extended_summary`, `sections`, `node`, `source`, `pretty_print()`, `to_model()` | | `NumPySection` | `kind`, `parameters`, `returns`, `exceptions`, `body_text`, `node` | | `NumPyParameter` | `names`, `type`, `description`, `optional`, `default_value` | | `NumPyReturns` | `name`, `return_type`, `description` | diff --git a/bindings/python/pydocstring.pyi b/bindings/python/pydocstring.pyi index 84e7396..9c843e7 100644 --- a/bindings/python/pydocstring.pyi +++ b/bindings/python/pydocstring.pyi @@ -4,6 +4,7 @@ from typing import Generator, Union class Style(enum.IntEnum): GOOGLE = ... NUMPY = ... + PLAIN = ... class SyntaxKind(enum.IntEnum): # Common tokens @@ -56,6 +57,8 @@ class SyntaxKind(enum.IntEnum): NUMPY_REFERENCE = ... NUMPY_ATTRIBUTE = ... NUMPY_METHOD = ... + # Plain node + PLAIN_DOCSTRING = ... class TextRange: @property @@ -129,6 +132,8 @@ class GoogleSection: def node(self) -> Node: ... class GoogleDocstring: + @property + def style(self) -> Style: ... @property def summary(self) -> Token | None: ... @property @@ -192,6 +197,8 @@ class NumPySection: def node(self) -> Node: ... class NumPyDocstring: + @property + def style(self) -> Style: ... @property def summary(self) -> Token | None: ... @property @@ -212,6 +219,29 @@ class NumPyDocstring: """ ... +# ─── Plain ─────────────────────────────────────────────────────────────────── + +class PlainDocstring: + @property + def style(self) -> Style: ... + @property + def summary(self) -> Token | None: ... + @property + def extended_summary(self) -> Token | None: ... + @property + def node(self) -> Node: ... + @property + def source(self) -> str: ... + def pretty_print(self) -> str: ... + def to_model(self) -> Docstring: ... + def line_col(self, offset: int) -> LineColumn: + """Convert a byte offset to a LineColumn. + + The offset is typically ``token.range.start`` or ``token.range.end``. + ``lineno`` is 1-based; ``col`` is 0-based Unicode codepoints. + """ + ... + # ─── Model IR ──────────────────────────────────────────────────────────────── class Deprecation: @@ -361,8 +391,10 @@ class Docstring: # ─── Functions ─────────────────────────────────────────────────────────────── +def parse(input: str) -> GoogleDocstring | NumPyDocstring | PlainDocstring: ... def parse_google(input: str) -> GoogleDocstring: ... def parse_numpy(input: str) -> NumPyDocstring: ... +def parse_plain(input: str) -> PlainDocstring: ... def detect_style(input: str) -> Style: ... def emit_google(doc: Docstring, base_indent: int = 0) -> str: ... def emit_numpy(doc: Docstring, base_indent: int = 0) -> str: ... diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 38662ba..6c043cd 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "pydocstring-rs" -version = "0.1.3" +version = "0.1.4" description = "Python bindings for pydocstring — a zero-dependency Rust parser for Python docstrings (Google and NumPy styles) with a unified syntax tree and byte-precise source locations" license = {text = "MIT"} authors = [{name = "Ryuma Asai"}] diff --git a/bindings/python/rustfmt.toml b/bindings/python/rustfmt.toml new file mode 100644 index 0000000..7530651 --- /dev/null +++ b/bindings/python/rustfmt.toml @@ -0,0 +1 @@ +max_width = 120 diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index eeefeed..17c99f2 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -150,6 +150,8 @@ enum PySyntaxKind { NUMPY_REFERENCE, NUMPY_ATTRIBUTE, NUMPY_METHOD, + // Plain node + PLAIN_DOCSTRING, } #[pymethods] @@ -211,6 +213,7 @@ impl PySyntaxKind { SyntaxKind::NUMPY_REFERENCE => Self::NUMPY_REFERENCE, SyntaxKind::NUMPY_ATTRIBUTE => Self::NUMPY_ATTRIBUTE, SyntaxKind::NUMPY_METHOD => Self::NUMPY_METHOD, + SyntaxKind::PLAIN_DOCSTRING => Self::PLAIN_DOCSTRING, } } @@ -261,6 +264,7 @@ impl PySyntaxKind { Self::NUMPY_REFERENCE => SyntaxKind::NUMPY_REFERENCE, Self::NUMPY_ATTRIBUTE => SyntaxKind::NUMPY_ATTRIBUTE, Self::NUMPY_METHOD => SyntaxKind::NUMPY_METHOD, + Self::PLAIN_DOCSTRING => SyntaxKind::PLAIN_DOCSTRING, } } } @@ -289,11 +293,7 @@ impl PyToken { self.range.clone_ref(py) } fn __repr__(&self) -> String { - format!( - "Token(SyntaxKind.{}, {:?})", - self.kind.to_core().name(), - self.text - ) + format!("Token(SyntaxKind.{}, {:?})", self.kind.to_core().name(), self.text) } } @@ -308,11 +308,7 @@ fn to_py_token(py: Python<'_>, token: &SyntaxToken, source: &str) -> PyResult, - token: Option<&SyntaxToken>, - source: &str, -) -> PyResult>> { +fn to_py_token_opt(py: Python<'_>, token: Option<&SyntaxToken>, source: &str) -> PyResult>> { token.map(|t| to_py_token(py, t, source)).transpose() } @@ -353,12 +349,8 @@ fn to_py_node(py: Python<'_>, node: &SyntaxNode, source: &str) -> PyResult { - Ok(to_py_node(py, n, source)?.into_any()) - } - pydocstring_core::syntax::SyntaxElement::Token(t) => { - Ok(to_py_token(py, t, source)?.into_any()) - } + pydocstring_core::syntax::SyntaxElement::Node(n) => Ok(to_py_node(py, n, source)?.into_any()), + pydocstring_core::syntax::SyntaxElement::Token(t) => Ok(to_py_token(py, t, source)?.into_any()), }) .collect::>>()?; @@ -532,6 +524,10 @@ impl PyGoogleDocstring { let lc = byte_offset_to_line_col(&self.source, offset as usize)?; Py::new(py, lc) } + #[getter] + fn style(&self) -> PyStyle { + PyStyle::Google + } fn __repr__(&self) -> String { "GoogleDocstring(...)".to_string() } @@ -571,11 +567,7 @@ impl PyNumPyParameter { self.default_value.as_ref().map(|t| t.clone_ref(py)) } fn __repr__(&self, py: Python<'_>) -> String { - let name_texts: Vec = self - .names - .iter() - .map(|n| n.borrow(py).text.clone()) - .collect(); + let name_texts: Vec = self.names.iter().map(|n| n.borrow(py).text.clone()).collect(); format!("NumPyParameter({})", name_texts.join(", ")) } } @@ -712,18 +704,72 @@ impl PyNumPyDocstring { let lc = byte_offset_to_line_col(&self.source, offset as usize)?; Py::new(py, lc) } + #[getter] + fn style(&self) -> PyStyle { + PyStyle::NumPy + } fn __repr__(&self) -> String { "NumPyDocstring(...)".to_string() } } +// ─── Plain typed wrapper ──────────────────────────────────────────────────── + +#[pyclass(name = "PlainDocstring", frozen)] +struct PyPlainDocstring { + summary: Option>, + extended_summary: Option>, + node: Py, + source: String, +} + +#[pymethods] +impl PyPlainDocstring { + #[getter] + fn summary(&self, py: Python<'_>) -> Option> { + self.summary.as_ref().map(|t| t.clone_ref(py)) + } + #[getter] + fn extended_summary(&self, py: Python<'_>) -> Option> { + self.extended_summary.as_ref().map(|t| t.clone_ref(py)) + } + #[getter] + fn node(&self, py: Python<'_>) -> Py { + self.node.clone_ref(py) + } + #[getter] + fn source(&self) -> &str { + &self.source + } + fn pretty_print(&self) -> String { + pydocstring_core::parse::plain::parse_plain(&self.source).pretty_print() + } + fn to_model(&self) -> PyResult { + let parsed = pydocstring_core::parse::plain::parse_plain(&self.source); + let doc = pydocstring_core::parse::plain::to_model::to_model(&parsed) + .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("failed to convert to model"))?; + Ok(PyModelDocstring { inner: doc }) + } + /// Convert a byte offset to a `LineColumn` with codepoint-based `col`. + fn line_col(&self, py: Python<'_>, offset: u32) -> PyResult> { + let lc = byte_offset_to_line_col(&self.source, offset as usize)?; + Py::new(py, lc) + } + #[getter] + fn style(&self) -> PyStyle { + PyStyle::Plain + } + fn __repr__(&self) -> String { + "PlainDocstring(...)".to_string() + } +} + // ─── Conversion helpers ───────────────────────────────────────────────────── fn build_google_docstring(py: Python<'_>, parsed: &Parsed) -> PyResult> { let source = parsed.source(); - let doc = gn::GoogleDocstring::cast(parsed.root()).ok_or_else(|| { - pyo3::exceptions::PyValueError::new_err("root node is not a GOOGLE_DOCSTRING") - })?; + let doc = gn::GoogleDocstring::cast(parsed.root()) + .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("root node is not a GOOGLE_DOCSTRING"))?; let summary = to_py_token_opt(py, doc.summary(), source)?; let extended_summary = to_py_token_opt(py, doc.extended_summary(), source)?; @@ -802,9 +848,8 @@ fn build_google_docstring(py: Python<'_>, parsed: &Parsed) -> PyResult, parsed: &Parsed) -> PyResult> { let source = parsed.source(); - let doc = nn::NumPyDocstring::cast(parsed.root()).ok_or_else(|| { - pyo3::exceptions::PyValueError::new_err("root node is not a NUMPY_DOCSTRING") - })?; + let doc = nn::NumPyDocstring::cast(parsed.root()) + .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("root node is not a NUMPY_DOCSTRING"))?; let summary = to_py_token_opt(py, doc.summary(), source)?; let extended_summary = to_py_token_opt(py, doc.extended_summary(), source)?; @@ -887,6 +932,26 @@ fn build_numpy_docstring(py: Python<'_>, parsed: &Parsed) -> PyResult, parsed: &Parsed) -> PyResult> { + let source = parsed.source(); + let doc = pydocstring_core::parse::plain::nodes::PlainDocstring::cast(parsed.root()) + .ok_or_else(|| pyo3::exceptions::PyValueError::new_err("root node is not a PLAIN_DOCSTRING"))?; + + let summary = to_py_token_opt(py, doc.summary(), source)?; + let extended_summary = to_py_token_opt(py, doc.extended_summary(), source)?; + let node = to_py_node(py, parsed.root(), source)?; + + Py::new( + py, + PyPlainDocstring { + summary, + extended_summary, + node, + source: source.to_string(), + }, + ) +} + // ─── Model IR types ───────────────────────────────────────────────────────── #[pyclass(name = "Deprecation")] @@ -903,10 +968,7 @@ impl PyModelDeprecation { #[new] #[pyo3(signature = (version, *, description=None))] fn new(version: String, description: Option) -> Self { - Self { - version, - description, - } + Self { version, description } } fn __repr__(&self) -> String { format!("Deprecation(version={:?})", self.version) @@ -967,11 +1029,7 @@ struct PyModelReturn { impl PyModelReturn { #[new] #[pyo3(signature = (*, name=None, type_annotation=None, description=None))] - fn new( - name: Option, - type_annotation: Option, - description: Option, - ) -> Self { + fn new(name: Option, type_annotation: Option, description: Option) -> Self { Self { name, type_annotation, @@ -1001,10 +1059,7 @@ impl PyModelExceptionEntry { #[new] #[pyo3(signature = (type_name, *, description=None))] fn new(type_name: String, description: Option) -> Self { - Self { - type_name, - description, - } + Self { type_name, description } } fn __repr__(&self) -> String { format!("ExceptionEntry({})", self.type_name) @@ -1177,10 +1232,7 @@ fn extract_returns(py: Python<'_>, entries: &[Py]) -> Vec, - entries: &[Py], -) -> Vec { +fn extract_exceptions(py: Python<'_>, entries: &[Py]) -> Vec { entries .iter() .map(|e| { @@ -1221,10 +1273,7 @@ fn extract_methods(py: Python<'_>, entries: &[Py]) -> Vec, - entries: &[Py], -) -> Vec { +fn extract_see_also(py: Python<'_>, entries: &[Py]) -> Vec { entries .iter() .map(|s| { @@ -1274,38 +1323,22 @@ impl PyModelSection { body: Option, ) -> PyResult { let inner = match kind { - "parameters" => { - model::Section::Parameters(extract_parameters(py, ¶meters.unwrap_or_default())) + "parameters" => model::Section::Parameters(extract_parameters(py, ¶meters.unwrap_or_default())), + "keyword_parameters" => { + model::Section::KeywordParameters(extract_parameters(py, ¶meters.unwrap_or_default())) } - "keyword_parameters" => model::Section::KeywordParameters(extract_parameters( - py, - ¶meters.unwrap_or_default(), - )), - "other_parameters" => model::Section::OtherParameters(extract_parameters( - py, - ¶meters.unwrap_or_default(), - )), - "receives" => { - model::Section::Receives(extract_parameters(py, ¶meters.unwrap_or_default())) + "other_parameters" => { + model::Section::OtherParameters(extract_parameters(py, ¶meters.unwrap_or_default())) } + "receives" => model::Section::Receives(extract_parameters(py, ¶meters.unwrap_or_default())), "returns" => model::Section::Returns(extract_returns(py, &returns.unwrap_or_default())), "yields" => model::Section::Yields(extract_returns(py, &returns.unwrap_or_default())), - "raises" => { - model::Section::Raises(extract_exceptions(py, &exceptions.unwrap_or_default())) - } - "warns" => { - model::Section::Warns(extract_exceptions(py, &exceptions.unwrap_or_default())) - } - "attributes" => { - model::Section::Attributes(extract_attributes(py, &attributes.unwrap_or_default())) - } + "raises" => model::Section::Raises(extract_exceptions(py, &exceptions.unwrap_or_default())), + "warns" => model::Section::Warns(extract_exceptions(py, &exceptions.unwrap_or_default())), + "attributes" => model::Section::Attributes(extract_attributes(py, &attributes.unwrap_or_default())), "methods" => model::Section::Methods(extract_methods(py, &methods.unwrap_or_default())), - "see_also" => { - model::Section::SeeAlso(extract_see_also(py, &see_also_entries.unwrap_or_default())) - } - "references" => { - model::Section::References(extract_references(py, &references.unwrap_or_default())) - } + "see_also" => model::Section::SeeAlso(extract_see_also(py, &see_also_entries.unwrap_or_default())), + "references" => model::Section::References(extract_references(py, &references.unwrap_or_default())), other => model::Section::FreeText { kind: str_to_free_section_kind(other), body: body.unwrap_or_default(), @@ -1587,10 +1620,7 @@ impl PyModelDocstring { #[setter] fn set_sections(&mut self, sections: Vec>) { Python::with_gil(|py| { - self.inner.sections = sections - .iter() - .map(|s| s.borrow(py).inner.clone()) - .collect(); + self.inner.sections = sections.iter().map(|s| s.borrow(py).inner.clone()).collect(); }); } @@ -1615,6 +1645,27 @@ fn parse_numpy(py: Python<'_>, input: &str) -> PyResult> { build_numpy_docstring(py, &parsed) } +/// Parse a plain docstring (no NumPy or Google section markers) and return a PlainDocstring object. +#[pyfunction] +fn parse_plain(py: Python<'_>, input: &str) -> PyResult> { + let parsed = pydocstring_core::parse::plain::parse_plain(input); + build_plain_docstring(py, &parsed) +} + +/// Auto-detect the docstring style and parse it, returning a GoogleDocstring, +/// NumPyDocstring, or PlainDocstring. Use `.style` on the result to distinguish +/// between them without `isinstance` checks. +#[pyfunction] +fn parse(py: Python<'_>, input: &str) -> PyResult { + use pydocstring_core::syntax::SyntaxKind; + let parsed = pydocstring_core::parse::parse(input); + match parsed.root().kind() { + SyntaxKind::GOOGLE_DOCSTRING => Ok(build_google_docstring(py, &parsed)?.into_any()), + SyntaxKind::NUMPY_DOCSTRING => Ok(build_numpy_docstring(py, &parsed)?.into_any()), + _ => Ok(build_plain_docstring(py, &parsed)?.into_any()), + } +} + /// Docstring style enum. #[pyclass(eq, eq_int, frozen, name = "Style")] #[derive(Clone, PartialEq)] @@ -1623,6 +1674,8 @@ enum PyStyle { Google, #[pyo3(name = "NUMPY")] NumPy, + #[pyo3(name = "PLAIN")] + Plain, } #[pymethods] @@ -1631,6 +1684,7 @@ impl PyStyle { match self { PyStyle::Google => "Style.GOOGLE", PyStyle::NumPy => "Style.NUMPY", + PyStyle::Plain => "Style.PLAIN", } } @@ -1638,6 +1692,7 @@ impl PyStyle { match self { PyStyle::Google => "google", PyStyle::NumPy => "numpy", + PyStyle::Plain => "plain", } } } @@ -1648,6 +1703,7 @@ fn detect_style(input: &str) -> PyStyle { match pydocstring_core::parse::detect_style(input) { pydocstring_core::parse::Style::Google => PyStyle::Google, pydocstring_core::parse::Style::NumPy => PyStyle::NumPy, + pydocstring_core::parse::Style::Plain => PyStyle::Plain, } } @@ -1671,8 +1727,10 @@ fn py_emit_numpy(py: Python<'_>, doc: Py, base_indent: usize) #[pymodule] fn pydocstring(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(parse, m)?)?; m.add_function(wrap_pyfunction!(parse_google, m)?)?; m.add_function(wrap_pyfunction!(parse_numpy, m)?)?; + m.add_function(wrap_pyfunction!(parse_plain, m)?)?; m.add_function(wrap_pyfunction!(detect_style, m)?)?; m.add_function(wrap_pyfunction!(py_emit_google, m)?)?; m.add_function(wrap_pyfunction!(py_emit_numpy, m)?)?; @@ -1692,6 +1750,7 @@ fn pydocstring(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/bindings/python/tests/test_pydocstring.py b/bindings/python/tests/test_pydocstring.py index 6471397..6469013 100644 --- a/bindings/python/tests/test_pydocstring.py +++ b/bindings/python/tests/test_pydocstring.py @@ -13,16 +13,18 @@ def test_numpy(self): == pydocstring.Style.NUMPY ) - def test_fallback_to_google(self): - assert pydocstring.detect_style("Just a summary.") == pydocstring.Style.GOOGLE + def test_fallback_to_plain(self): + assert pydocstring.detect_style("Just a summary.") == pydocstring.Style.PLAIN def test_str(self): assert str(pydocstring.Style.GOOGLE) == "google" assert str(pydocstring.Style.NUMPY) == "numpy" + assert str(pydocstring.Style.PLAIN) == "plain" def test_repr(self): assert repr(pydocstring.Style.GOOGLE) == "Style.GOOGLE" assert repr(pydocstring.Style.NUMPY) == "Style.NUMPY" + assert repr(pydocstring.Style.PLAIN) == "Style.PLAIN" class TestParseGoogle: @@ -349,6 +351,183 @@ def test_google_to_model_returns(self): assert len(rets) == 1 assert rets[0].type_annotation == "int" + def test_plain_to_model(self): + doc = pydocstring.parse_plain("Brief summary.\n\nMore details.") + model = doc.to_model() + assert model.summary == "Brief summary." + assert model.extended_summary == "More details." + assert model.sections == [] + + def test_plain_to_model_summary_only(self): + doc = pydocstring.parse_plain("Just a summary.") + model = doc.to_model() + assert model.summary == "Just a summary." + assert model.extended_summary is None + + +class TestParsePlain: + def test_summary(self): + doc = pydocstring.parse_plain("Summary line.") + assert doc.summary is not None + assert doc.summary.text == "Summary line." + assert doc.extended_summary is None + + def test_empty(self): + doc = pydocstring.parse_plain("") + assert doc.summary is None + assert doc.extended_summary is None + + def test_extended_summary(self): + doc = pydocstring.parse_plain("Summary.\n\nMore details here.\nContinued.") + assert doc.summary is not None + assert doc.summary.text == "Summary." + assert doc.extended_summary is not None + assert "More details here." in doc.extended_summary.text + + def test_no_sections(self): + # Plain docstrings never produce sections — Sphinx-like text stays plain + doc = pydocstring.parse_plain( + "Summary.\n\n:param x: A value.\n:returns: Something." + ) + model = doc.to_model() + assert model.sections == [] + + def test_node_kind(self): + doc = pydocstring.parse_plain("Summary.") + assert doc.node.kind == pydocstring.SyntaxKind.PLAIN_DOCSTRING + + def test_source(self): + text = "Summary.\n\nExtended." + doc = pydocstring.parse_plain(text) + assert doc.source == text + + def test_pretty_print(self): + doc = pydocstring.parse_plain("Summary.\n\nExtended.") + output = doc.pretty_print() + assert "PLAIN_DOCSTRING" in output + assert "SUMMARY" in output + assert "EXTENDED_SUMMARY" in output + + def test_summary_token_kind(self): + doc = pydocstring.parse_plain("Summary.") + assert doc.summary.kind == pydocstring.SyntaxKind.SUMMARY + + def test_extended_summary_token_kind(self): + doc = pydocstring.parse_plain("Summary.\n\nExtended.") + assert doc.extended_summary.kind == pydocstring.SyntaxKind.EXTENDED_SUMMARY + + def test_repr(self): + doc = pydocstring.parse_plain("Summary.") + assert repr(doc) == "PlainDocstring(...)" + + def test_line_col_summary(self): + doc = pydocstring.parse_plain("Summary.") + lc = doc.line_col(doc.summary.range.start) + assert lc.lineno == 1 + assert lc.col == 0 + + def test_line_col_extended_summary(self): + doc = pydocstring.parse_plain("Summary.\n\nExtended.") + lc = doc.line_col(doc.extended_summary.range.start) + assert lc.lineno == 3 + assert lc.col == 0 + + def test_detect_style_dispatches_to_plain(self): + assert pydocstring.detect_style("Just a summary.") == pydocstring.Style.PLAIN + assert ( + pydocstring.detect_style("Summary.\n\n:param x: value.") + == pydocstring.Style.PLAIN + ) + + def test_style_property(self): + doc = pydocstring.parse_plain("Summary.") + assert doc.style == pydocstring.Style.PLAIN + + +class TestParse: + """Tests for the unified parse() entry point.""" + + def test_google_returns_google_docstring(self): + doc = pydocstring.parse("Summary.\n\nArgs:\n x (int): Value.") + assert isinstance(doc, pydocstring.GoogleDocstring) + assert doc.style == pydocstring.Style.GOOGLE + + def test_numpy_returns_numpy_docstring(self): + doc = pydocstring.parse( + "Summary.\n\nParameters\n----------\nx : int\n Value." + ) + assert isinstance(doc, pydocstring.NumPyDocstring) + assert doc.style == pydocstring.Style.NUMPY + + def test_plain_returns_plain_docstring(self): + doc = pydocstring.parse("Just a summary.") + assert isinstance(doc, pydocstring.PlainDocstring) + assert doc.style == pydocstring.Style.PLAIN + + def test_empty_returns_plain_docstring(self): + doc = pydocstring.parse("") + assert isinstance(doc, pydocstring.PlainDocstring) + assert doc.style == pydocstring.Style.PLAIN + + def test_sphinx_returns_plain_docstring(self): + doc = pydocstring.parse("Summary.\n\n:param x: A value.\n:returns: Something.") + assert isinstance(doc, pydocstring.PlainDocstring) + assert doc.style == pydocstring.Style.PLAIN + + def test_google_style_property(self): + doc = pydocstring.parse_google("Summary.\n\nArgs:\n x: Desc.") + assert doc.style == pydocstring.Style.GOOGLE + + def test_numpy_style_property(self): + doc = pydocstring.parse_numpy( + "Summary.\n\nParameters\n----------\nx : int\n Desc." + ) + assert doc.style == pydocstring.Style.NUMPY + + def test_parse_google_summary(self): + doc = pydocstring.parse("Summary.\n\nArgs:\n x (int): Value.") + assert doc.summary.text == "Summary." + + def test_parse_numpy_summary(self): + doc = pydocstring.parse( + "Summary.\n\nParameters\n----------\nx : int\n Value." + ) + assert doc.summary.text == "Summary." + + def test_parse_plain_summary(self): + doc = pydocstring.parse("Plain summary.") + assert doc.summary.text == "Plain summary." + + def test_parse_to_model_google(self): + doc = pydocstring.parse("Summary.\n\nArgs:\n x (int): Value.") + model = doc.to_model() + assert model.summary == "Summary." + assert model.sections[0].kind == "parameters" + + def test_parse_to_model_numpy(self): + doc = pydocstring.parse( + "Summary.\n\nParameters\n----------\nx : int\n Value." + ) + model = doc.to_model() + assert model.summary == "Summary." + assert model.sections[0].kind == "parameters" + + def test_parse_to_model_plain(self): + doc = pydocstring.parse("Summary.\n\nExtended.") + model = doc.to_model() + assert model.summary == "Summary." + assert model.sections == [] + + def test_match_style(self): + # Verify match-statement style dispatch works + for src, expected_style in [ + ("Summary.\n\nArgs:\n x: Desc.", pydocstring.Style.GOOGLE), + ("Summary.\n\nParameters\n----------\nx : int\n Desc.", pydocstring.Style.NUMPY), + ("Just a summary.", pydocstring.Style.PLAIN), + ]: + doc = pydocstring.parse(src) + assert doc.style == expected_style + class TestEmit: def test_emit_google(self): diff --git a/examples/parse_auto.rs b/examples/parse_auto.rs new file mode 100644 index 0000000..5691b0c --- /dev/null +++ b/examples/parse_auto.rs @@ -0,0 +1,83 @@ +//! Example: Auto-detecting docstring style with `parse()` +//! +//! Demonstrates the unified `parse()` entry point, which detects the style +//! automatically and returns a `Parsed` result whose root `SyntaxKind` +//! reflects the detected style: +//! +//! - `GOOGLE_DOCSTRING` — Google style (section headers ending with `:`) +//! - `NUMPY_DOCSTRING` — NumPy style (section headers with `---` underlines) +//! - `PLAIN_DOCSTRING` — no recognised section markers (summary/extended +//! summary only, or unrecognised styles such as Sphinx) + +use pydocstring::parse::parse; +use pydocstring::syntax::SyntaxKind; + +fn show(label: &str, input: &str) { + let parsed = parse(input); + let kind = parsed.root().kind(); + let style_label = match kind { + SyntaxKind::GOOGLE_DOCSTRING => "Google", + SyntaxKind::NUMPY_DOCSTRING => "NumPy", + SyntaxKind::PLAIN_DOCSTRING => "Plain", + _ => "Unknown", + }; + + println!( + "── {} → {} ──────────────────────────────────────────", + label, style_label + ); + print!("{}", parsed.pretty_print()); + println!(); +} + +fn main() { + println!("╔══════════════════════════════════════════════════╗"); + println!("║ Auto-detecting Docstring Style ║"); + println!("╚══════════════════════════════════════════════════╝"); + println!(); + + show( + "Google", + r#" +Calculate the area of a rectangle. + +Args: + width (float): The width of the rectangle. + height (float): The height of the rectangle. + +Returns: + float: The area of the rectangle. +"#, + ); + + show( + "NumPy", + r#" +Calculate the area of a rectangle. + +Parameters +---------- +width : float + The width of the rectangle. +height : float + The height of the rectangle. + +Returns +------- +float + The area of the rectangle. +"#, + ); + + show("Plain (summary only)", "Calculate the area of a rectangle."); + + show( + "Plain (summary + extended)", + r#" +Calculate the area of a rectangle. + +Takes width and height as arguments and returns their product. +Negative values will raise a ValueError. +"#, + ); +} diff --git a/src/parse.rs b/src/parse.rs index 3b37c12..81034b6 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -9,6 +9,7 @@ use google::GoogleSectionKind; pub mod google; pub mod numpy; +pub mod plain; pub(crate) mod utils; // ============================================================================= @@ -22,6 +23,9 @@ pub enum Style { NumPy, /// Google style (section headers with colons). Google, + /// Plain docstring: no recognised style markers (summary/extended summary + /// only). Also covers unrecognised styles such as Sphinx. + Plain, } impl fmt::Display for Style { @@ -29,6 +33,7 @@ impl fmt::Display for Style { match self { Style::NumPy => write!(f, "numpy"), Style::Google => write!(f, "google"), + Style::Plain => write!(f, "plain"), } } } @@ -42,7 +47,9 @@ impl fmt::Display for Style { /// Uses heuristics to identify the style: /// 1. **NumPy**: Section headers followed by `---` underlines /// 2. **Google**: Section headers ending with `:` (e.g., `Args:`, `Returns:`) -/// 3. Falls back to `Google` if no style-specific patterns are found +/// 3. Falls back to [`Style::Plain`] if no style-specific patterns are found. +/// This includes summary-only docstrings and unrecognised styles such as +/// Sphinx. /// /// # Example /// @@ -55,41 +62,66 @@ impl fmt::Display for Style { /// /// let google = "Summary.\n\nArgs:\n x: Description."; /// assert_eq!(detect_style(google), Style::Google); +/// +/// let plain = "Just a summary."; +/// assert_eq!(detect_style(plain), Style::Plain); /// ``` pub fn detect_style(input: &str) -> Style { - if has_numpy_sections(input) { - return Style::NumPy; - } - if has_google_sections(input) { - return Style::Google; - } - Style::Google -} + let lines: Vec<&str> = input.lines().collect(); -// ============================================================================= -// Style detection helpers -// ============================================================================= + for (i, line) in lines.iter().enumerate() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } -fn has_numpy_sections(input: &str) -> bool { - let lines: Vec<&str> = input.lines().collect(); - for i in 0..lines.len().saturating_sub(1) { - let current = lines[i].trim(); - let next = lines[i + 1].trim(); - if !current.is_empty() && !next.is_empty() && next.len() >= 3 && next.chars().all(|c| c == '-') { - return true; + // NumPy: non-empty line followed by a line of 3+ dashes. + if let Some(next) = lines.get(i + 1) { + let next_trimmed = next.trim(); + if !next_trimmed.is_empty() && next_trimmed.len() >= 3 && next_trimmed.bytes().all(|b| b == b'-') { + return Style::NumPy; + } } - } - false -} -fn has_google_sections(input: &str) -> bool { - for line in input.lines() { - let trimmed = line.trim(); + // Google: known section name ending with `:`. if let Some(name) = trimmed.strip_suffix(':') { if GoogleSectionKind::is_known(&name.to_ascii_lowercase()) { - return true; + return Style::Google; } } } - false + + Style::Plain +} + +// ============================================================================= +// Unified parse entry point +// ============================================================================= + +/// Parse a docstring, auto-detecting its style. +/// +/// Internally calls [`detect_style`] and dispatches to the appropriate parser. +/// The root node kind of the returned [`Parsed`] reflects the detected style: +/// - [`SyntaxKind::NUMPY_DOCSTRING`](crate::syntax::SyntaxKind::NUMPY_DOCSTRING) for NumPy +/// - [`SyntaxKind::GOOGLE_DOCSTRING`](crate::syntax::SyntaxKind::GOOGLE_DOCSTRING) for Google +/// - [`SyntaxKind::PLAIN_DOCSTRING`](crate::syntax::SyntaxKind::PLAIN_DOCSTRING) for Plain (and unrecognised styles) +/// +/// # Example +/// +/// ```rust +/// use pydocstring::parse::parse; +/// use pydocstring::syntax::SyntaxKind; +/// +/// let result = parse("Summary.\n\nArgs:\n x: Description."); +/// assert_eq!(result.root().kind(), SyntaxKind::GOOGLE_DOCSTRING); +/// +/// let plain = parse("Just a summary."); +/// assert_eq!(plain.root().kind(), SyntaxKind::PLAIN_DOCSTRING); +/// ``` +pub fn parse(input: &str) -> crate::syntax::Parsed { + match detect_style(input) { + Style::NumPy => numpy::parse_numpy(input), + Style::Google => google::parse_google(input), + Style::Plain => plain::parse_plain(input), + } } diff --git a/src/parse/google/parser.rs b/src/parse/google/parser.rs index 4525a33..122ec83 100644 --- a/src/parse/google/parser.rs +++ b/src/parse/google/parser.rs @@ -279,6 +279,14 @@ fn build_arg_node(kind: SyntaxKind, header: &EntryHeader, range: TextRange) -> S ))); if let Some(t) = ti.r#type { children.push(SyntaxElement::Token(SyntaxToken::new(SyntaxKind::TYPE, t))); + } else { + // Empty brackets `()`: emit a zero-length missing TYPE token right + // after the open bracket so callers can distinguish `a ()` from `a:`. + let missing_pos = ti.open_bracket.end(); + children.push(SyntaxElement::Token(SyntaxToken::new( + SyntaxKind::TYPE, + TextRange::new(missing_pos, missing_pos), + ))); } if let Some(cb) = ti.close_bracket { children.push(SyntaxElement::Token(SyntaxToken::new(SyntaxKind::CLOSE_BRACKET, cb))); @@ -311,6 +319,13 @@ fn build_arg_node(kind: SyntaxKind, header: &EntryHeader, range: TextRange) -> S } if let Some(desc) = header.first_description { children.push(SyntaxElement::Token(SyntaxToken::new(SyntaxKind::DESCRIPTION, desc))); + } else if let Some(colon) = header.colon { + // Colon present but no description: zero-length placeholder so callers + // can distinguish `a (int):` from `a (int)` via find_missing(DESCRIPTION). + children.push(SyntaxElement::Token(SyntaxToken::new( + SyntaxKind::DESCRIPTION, + TextRange::new(colon.end(), colon.end()), + ))); } // Ensure children are in source order (needed when colon/description // appear before the close bracket, e.g., `arg (int:desc.)`). @@ -327,6 +342,11 @@ fn build_exception_node(header: &EntryHeader, range: TextRange) -> SyntaxNode { } if let Some(desc) = header.first_description { children.push(SyntaxElement::Token(SyntaxToken::new(SyntaxKind::DESCRIPTION, desc))); + } else if let Some(colon) = header.colon { + children.push(SyntaxElement::Token(SyntaxToken::new( + SyntaxKind::DESCRIPTION, + TextRange::new(colon.end(), colon.end()), + ))); } SyntaxNode::new(SyntaxKind::GOOGLE_EXCEPTION, range, children) } @@ -343,6 +363,11 @@ fn build_warning_node(header: &EntryHeader, range: TextRange) -> SyntaxNode { } if let Some(desc) = header.first_description { children.push(SyntaxElement::Token(SyntaxToken::new(SyntaxKind::DESCRIPTION, desc))); + } else if let Some(colon) = header.colon { + children.push(SyntaxElement::Token(SyntaxToken::new( + SyntaxKind::DESCRIPTION, + TextRange::new(colon.end(), colon.end()), + ))); } SyntaxNode::new(SyntaxKind::GOOGLE_WARNING, range, children) } @@ -370,6 +395,11 @@ fn build_see_also_node(header: &EntryHeader, range: TextRange, source: &str) -> } if let Some(desc) = header.first_description { children.push(SyntaxElement::Token(SyntaxToken::new(SyntaxKind::DESCRIPTION, desc))); + } else if let Some(colon) = header.colon { + children.push(SyntaxElement::Token(SyntaxToken::new( + SyntaxKind::DESCRIPTION, + TextRange::new(colon.end(), colon.end()), + ))); } SyntaxNode::new(SyntaxKind::GOOGLE_SEE_ALSO_ITEM, range, children) } @@ -412,7 +442,13 @@ fn extend_last_node_description(nodes: &mut [SyntaxElement], cont: TextRange) { for child in node.children_mut() { if let SyntaxElement::Token(t) = child { if t.kind() == SyntaxKind::DESCRIPTION { - t.extend_range(cont); + if t.is_missing() { + // Zero-length placeholder: replace range entirely rather + // than extending from the old (wrong) start position. + *t = SyntaxToken::new(SyntaxKind::DESCRIPTION, cont); + } else { + t.extend_range(cont); + } found_desc = true; break; } diff --git a/src/parse/numpy/parser.rs b/src/parse/numpy/parser.rs index f562d03..89a3db0 100644 --- a/src/parse/numpy/parser.rs +++ b/src/parse/numpy/parser.rs @@ -136,10 +136,13 @@ fn parse_name_and_type(text: &str, line_idx: usize, col_base: usize, cursor: &Li let after_trimmed = after_colon.trim(); if after_trimmed.is_empty() { + // Colon present but no type text: emit a zero-length TYPE so callers + // can use `type_().is_missing()` to distinguish `a :` from `a`. + let missing_type = cursor.make_line_range(line_idx, colon_col + 1, 0); return ParamHeaderParts { names, colon: colon_span, - param_type: None, + param_type: Some(missing_type), optional: None, default_keyword: None, default_separator: None, @@ -180,6 +183,10 @@ fn parse_name_and_type(text: &str, line_idx: usize, col_base: usize, cursor: &Li if !val.is_empty() { let val_abs = cursor.substr_offset(val); default_value = Some(TextRange::from_offset_len(val_abs, val.len())); + } else { + // Separator present but value absent: zero-length placeholder. + let missing_pos = sep_abs + 1; + default_value = Some(TextRange::from_offset_len(missing_pos, 0)); } } else if let Some(rest) = after_kw.strip_prefix(':') { let sep_pos = seg.rfind(':').unwrap(); @@ -189,6 +196,10 @@ fn parse_name_and_type(text: &str, line_idx: usize, col_base: usize, cursor: &Li if !val.is_empty() { let val_abs = cursor.substr_offset(val); default_value = Some(TextRange::from_offset_len(val_abs, val.len())); + } else { + // Separator present but value absent: zero-length placeholder. + let missing_pos = sep_abs + 1; + default_value = Some(TextRange::from_offset_len(missing_pos, 0)); } } else { let val = after_kw.trim_start(); diff --git a/src/parse/plain.rs b/src/parse/plain.rs new file mode 100644 index 0000000..ec13b52 --- /dev/null +++ b/src/parse/plain.rs @@ -0,0 +1,11 @@ +//! Plain docstring style. +//! +//! "Plain" covers docstrings that contain no NumPy or Google style section +//! markers — i.e. a summary, an optional extended summary, and nothing else. +//! Unrecognised styles such as Sphinx are also treated as plain. + +pub mod nodes; +pub mod parser; +pub mod to_model; + +pub use parser::parse_plain; diff --git a/src/parse/plain/nodes.rs b/src/parse/plain/nodes.rs new file mode 100644 index 0000000..3cf28b6 --- /dev/null +++ b/src/parse/plain/nodes.rs @@ -0,0 +1,33 @@ +//! Typed wrapper for the plain-style docstring root node. + +use crate::syntax::{SyntaxKind, SyntaxNode, SyntaxToken}; + +// ============================================================================= +// PlainDocstring +// ============================================================================= + +/// Typed wrapper for [`SyntaxKind::PLAIN_DOCSTRING`] nodes. +#[derive(Debug)] +pub struct PlainDocstring<'a>(pub(crate) &'a SyntaxNode); + +impl<'a> PlainDocstring<'a> { + /// Try to cast a `SyntaxNode` reference into this typed wrapper. + pub fn cast(node: &'a SyntaxNode) -> Option { + (node.kind() == SyntaxKind::PLAIN_DOCSTRING).then_some(Self(node)) + } + + /// Access the underlying `SyntaxNode`. + pub fn syntax(&self) -> &'a SyntaxNode { + self.0 + } + + /// Brief summary token, if present. + pub fn summary(&self) -> Option<&'a SyntaxToken> { + self.0.find_token(SyntaxKind::SUMMARY) + } + + /// Extended summary token, if present. + pub fn extended_summary(&self) -> Option<&'a SyntaxToken> { + self.0.find_token(SyntaxKind::EXTENDED_SUMMARY) + } +} diff --git a/src/parse/plain/parser.rs b/src/parse/plain/parser.rs new file mode 100644 index 0000000..f9c9404 --- /dev/null +++ b/src/parse/plain/parser.rs @@ -0,0 +1,107 @@ +//! Plain docstring parser (SyntaxNode-based). +//! +//! Parses docstrings that contain no NumPy or Google style section markers. +//! Produces a [`Parsed`] with a [`SyntaxKind::PLAIN_DOCSTRING`] root that may +//! contain a [`SyntaxKind::SUMMARY`] token and an +//! [`SyntaxKind::EXTENDED_SUMMARY`] token. + +use crate::cursor::{LineCursor, indent_len}; +use crate::syntax::{Parsed, SyntaxElement, SyntaxKind, SyntaxNode, SyntaxToken}; +use crate::text::TextRange; + +// ============================================================================= +// Parser +// ============================================================================= + +/// Build the TextRange covering `first..=last` content lines (trimmed). +fn build_content_range(cursor: &LineCursor, first: Option, last: usize) -> Option { + first.map(|f| { + let first_line = cursor.line_text(f); + let first_col = indent_len(first_line); + let last_line = cursor.line_text(last); + let last_col = indent_len(last_line) + last_line.trim().len(); + cursor.make_range(f, first_col, last, last_col) + }) +} + +/// Parse a plain docstring (no NumPy or Google section markers). +/// +/// The returned [`Parsed`] has a [`SyntaxKind::PLAIN_DOCSTRING`] root that +/// contains at most one `SUMMARY` token and one `EXTENDED_SUMMARY` token. +/// Unrecognised styles (e.g. Sphinx) are also parsed this way. +/// +/// # Example +/// +/// ```rust +/// use pydocstring::parse::plain::{parse_plain, nodes::PlainDocstring}; +/// use pydocstring::syntax::SyntaxKind; +/// +/// let result = parse_plain("Summary.\n\nMore details here."); +/// assert_eq!(result.root().kind(), SyntaxKind::PLAIN_DOCSTRING); +/// +/// let doc = PlainDocstring::cast(result.root()).unwrap(); +/// assert_eq!(doc.summary().unwrap().text(result.source()), "Summary."); +/// assert_eq!(doc.extended_summary().unwrap().text(result.source()), "More details here."); +/// ``` +pub fn parse_plain(input: &str) -> Parsed { + let mut line_cursor = LineCursor::new(input); + let mut root_children: Vec = Vec::new(); + + line_cursor.skip_blanks(); + if line_cursor.is_eof() { + let root = SyntaxNode::new(SyntaxKind::PLAIN_DOCSTRING, line_cursor.full_range(), root_children); + return Parsed::new(input.to_string(), root); + } + + let mut summary_done = false; + let mut summary_first: Option = None; + let mut summary_last: usize = 0; + let mut ext_first: Option = None; + let mut ext_last: usize = 0; + + while !line_cursor.is_eof() { + if line_cursor.current_trimmed().is_empty() { + // Blank line: flush summary if not done yet. + if !summary_done && summary_first.is_some() { + root_children.push(SyntaxElement::Token(SyntaxToken::new( + SyntaxKind::SUMMARY, + build_content_range(&line_cursor, summary_first, summary_last).unwrap(), + ))); + summary_done = true; + } + line_cursor.advance(); + continue; + } + + if !summary_done { + if summary_first.is_none() { + summary_first = Some(line_cursor.line); + } + summary_last = line_cursor.line; + } else { + if ext_first.is_none() { + ext_first = Some(line_cursor.line); + } + ext_last = line_cursor.line; + } + + line_cursor.advance(); + } + + // Finalise at EOF. + if !summary_done && summary_first.is_some() { + root_children.push(SyntaxElement::Token(SyntaxToken::new( + SyntaxKind::SUMMARY, + build_content_range(&line_cursor, summary_first, summary_last).unwrap(), + ))); + } + if ext_first.is_some() { + root_children.push(SyntaxElement::Token(SyntaxToken::new( + SyntaxKind::EXTENDED_SUMMARY, + build_content_range(&line_cursor, ext_first, ext_last).unwrap(), + ))); + } + + let root = SyntaxNode::new(SyntaxKind::PLAIN_DOCSTRING, line_cursor.full_range(), root_children); + Parsed::new(input.to_string(), root) +} diff --git a/src/parse/plain/to_model.rs b/src/parse/plain/to_model.rs new file mode 100644 index 0000000..76e60d1 --- /dev/null +++ b/src/parse/plain/to_model.rs @@ -0,0 +1,20 @@ +//! Convert a plain-style AST into the style-independent [`Docstring`] model. + +use crate::model::Docstring; +use crate::parse::plain::nodes::PlainDocstring; +use crate::syntax::Parsed; + +/// Build a [`Docstring`] from a plain-style [`Parsed`] result. +/// +/// Returns `None` if the root node is not a `PLAIN_DOCSTRING`. +pub fn to_model(parsed: &Parsed) -> Option { + let source = parsed.source(); + let root = PlainDocstring::cast(parsed.root())?; + + Some(Docstring { + summary: root.summary().map(|t| t.text(source).to_owned()), + extended_summary: root.extended_summary().map(|t| t.text(source).to_owned()), + deprecation: None, + sections: Vec::new(), + }) +} diff --git a/src/syntax.rs b/src/syntax.rs index 921a411..da7c8c1 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -97,6 +97,12 @@ pub enum SyntaxKind { /// A single method entry. GOOGLE_METHOD, + // ── Plain node ───────────────────────────────────────────────────── + /// Root node for a plain docstring (summary/extended summary only, + /// no NumPy or Google style section markers). + /// Also used for unrecognised styles such as Sphinx. + PLAIN_DOCSTRING, + // ── NumPy nodes ──────────────────────────────────────────────────── /// Root node for a NumPy-style docstring. NUMPY_DOCSTRING, @@ -129,7 +135,8 @@ impl SyntaxKind { pub const fn is_node(self) -> bool { matches!( self, - Self::GOOGLE_DOCSTRING + Self::PLAIN_DOCSTRING + | Self::GOOGLE_DOCSTRING | Self::GOOGLE_SECTION | Self::GOOGLE_SECTION_HEADER | Self::GOOGLE_ARG @@ -199,6 +206,8 @@ impl SyntaxKind { Self::GOOGLE_SEE_ALSO_ITEM => "GOOGLE_SEE_ALSO_ITEM", Self::GOOGLE_ATTRIBUTE => "GOOGLE_ATTRIBUTE", Self::GOOGLE_METHOD => "GOOGLE_METHOD", + // Plain node + Self::PLAIN_DOCSTRING => "PLAIN_DOCSTRING", // NumPy nodes Self::NUMPY_DOCSTRING => "NUMPY_DOCSTRING", Self::NUMPY_SECTION => "NUMPY_SECTION", diff --git a/tests/detect_style.rs b/tests/detect_style.rs index df44b4e..bba81f6 100644 --- a/tests/detect_style.rs +++ b/tests/detect_style.rs @@ -1,4 +1,5 @@ -use pydocstring::parse::{Style, detect_style}; +use pydocstring::parse::{Style, detect_style, parse}; +use pydocstring::syntax::SyntaxKind; #[test] fn test_detect_numpy() { @@ -13,6 +14,40 @@ fn test_detect_google() { } #[test] -fn test_detect_plain_defaults_to_google() { - assert_eq!(detect_style("Just a summary."), Style::Google); +fn test_detect_plain_summary_only() { + assert_eq!(detect_style("Just a summary."), Style::Plain); +} + +#[test] +fn test_detect_plain_summary_and_extended() { + assert_eq!(detect_style("Summary.\n\nMore detail here."), Style::Plain); +} + +#[test] +fn test_detect_plain_empty() { + assert_eq!(detect_style(""), Style::Plain); +} + +#[test] +fn test_detect_plain_sphinx() { + let input = "Summary.\n\n:param x: Description.\n:type x: int\n:rtype: int"; + assert_eq!(detect_style(input), Style::Plain); +} + +#[test] +fn test_parse_dispatches_to_plain() { + let result = parse("Just a summary."); + assert_eq!(result.root().kind(), SyntaxKind::PLAIN_DOCSTRING); +} + +#[test] +fn test_parse_dispatches_to_google() { + let result = parse("Summary.\n\nArgs:\n x: Desc."); + assert_eq!(result.root().kind(), SyntaxKind::GOOGLE_DOCSTRING); +} + +#[test] +fn test_parse_dispatches_to_numpy() { + let result = parse("Summary.\n\nParameters\n----------\nx : int\n Desc."); + assert_eq!(result.root().kind(), SyntaxKind::NUMPY_DOCSTRING); } diff --git a/tests/plain.rs b/tests/plain.rs new file mode 100644 index 0000000..73573de --- /dev/null +++ b/tests/plain.rs @@ -0,0 +1,64 @@ +use pydocstring::parse::plain::{nodes::PlainDocstring, parse_plain, to_model::to_model}; +use pydocstring::syntax::SyntaxKind; + +#[test] +fn test_empty() { + let result = parse_plain(""); + assert_eq!(result.root().kind(), SyntaxKind::PLAIN_DOCSTRING); + let doc = PlainDocstring::cast(result.root()).unwrap(); + assert!(doc.summary().is_none()); + assert!(doc.extended_summary().is_none()); +} + +#[test] +fn test_summary_only() { + let result = parse_plain("Just a summary."); + let doc = PlainDocstring::cast(result.root()).unwrap(); + assert_eq!(doc.summary().unwrap().text(result.source()), "Just a summary."); + assert!(doc.extended_summary().is_none()); +} + +#[test] +fn test_summary_and_extended() { + let result = parse_plain("Summary.\n\nExtended description.\nMore lines."); + let doc = PlainDocstring::cast(result.root()).unwrap(); + assert_eq!(doc.summary().unwrap().text(result.source()), "Summary."); + assert_eq!( + doc.extended_summary().unwrap().text(result.source()), + "Extended description.\nMore lines." + ); +} + +#[test] +fn test_sphinx_treated_as_plain() { + let input = "Summary.\n\n:param x: Description.\n:rtype: int"; + let result = parse_plain(input); + assert_eq!(result.root().kind(), SyntaxKind::PLAIN_DOCSTRING); + let doc = PlainDocstring::cast(result.root()).unwrap(); + assert_eq!(doc.summary().unwrap().text(result.source()), "Summary."); +} + +#[test] +fn test_to_model_empty() { + let result = parse_plain(""); + let model = to_model(&result).unwrap(); + assert!(model.summary.is_none()); + assert!(model.extended_summary.is_none()); + assert!(model.sections.is_empty()); +} + +#[test] +fn test_to_model_summary_only() { + let result = parse_plain("Summary."); + let model = to_model(&result).unwrap(); + assert_eq!(model.summary.as_deref(), Some("Summary.")); + assert!(model.extended_summary.is_none()); +} + +#[test] +fn test_to_model_returns_none_for_wrong_kind() { + use pydocstring::parse::google::parse_google; + let result = parse_google("Summary.\n\nArgs:\n x: Desc."); + // google root → plain to_model should return None + assert!(to_model(&result).is_none()); +}