diff --git a/.editorconfig b/.editorconfig index b4781ef..ba3e2f3 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,25 +1,16 @@ root = true [*] -charset = utf-8 end_of_line = lf -indent_style = space -indent_size = 4 -trim_trailing_whitespace = true insert_final_newline = true -max_line_length = 100 - -[smoosh] -indent_style = space -indent_size = 2 -shell_variant = bash +trim_trailing_whitespace = true +charset = utf-8 [*.{sh,bash,bats}] indent_style = space indent_size = 2 -shell_variant = bash -[*.{yml,yaml}] +[smoosh] indent_style = space indent_size = 2 diff --git a/.gitattributes b/.gitattributes index 08a2d9b..647df36 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,14 +1,6 @@ -# Enforce LF line endings everywhere — prevents CRLF issues on Windows/Git Bash. * text=auto eol=lf +*.sh text eol=lf +smoosh text eol=lf -# Binary files — do not diff or merge. -*.png binary -*.gif binary -*.jpg binary - -# Shell scripts — ensure LF and enable linguist detection. -*.sh text eol=lf linguist-language=Shell -smoosh text eol=lf linguist-language=Shell - -# Bats test files. -*.bats text eol=lf linguist-language=Shell +# Hide golden tests from PR diffs and language stats +test/golden/** -diff linguist-generated=true diff --git a/.gitignore b/.gitignore index 1480bb3..a846629 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,10 @@ todos/ # test artefacts /tmp/ +coverage/ + +# release artefacts +*.sha256 # fixture files that intentionally resemble secrets/env files !test/fixtures/golden-repo/.env.example diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f8c232e..01c14cb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,5 @@ # Contributing to smoosh -Thank you for your interest in contributing! - ## Prerequisites - Bash 3.2+ (test with `bash --version`) diff --git a/README.md b/README.md index 8c5ebe1..9d6e5ef 100644 --- a/README.md +++ b/README.md @@ -9,14 +9,13 @@ [![macOS](https://img.shields.io/badge/macOS-supported-brightgreen)](#installation) [![Linux](https://img.shields.io/badge/Linux-supported-brightgreen)](#installation) -Turn any git repo into AI-ready context — for NotebookLM, Claude Projects, -ChatGPT, or your own RAG pipeline. Pure bash, zero dependencies. +Turn git repos into AI context. Pure bash, zero dependencies. RAG-ready. ![smoosh interactive demo](assets/demo.gif) -**[Quick Start](#quick-start)** · **[Why smoosh?](#why-smoosh)** · **[Features](#features)** · **[Installation](#installation)** · **[Uninstall](#uninstall)** · **[Usage](#usage)** · **[AI Tools](#using-smoosh-with-ai-tools)** · **[Agent / CI](#agents-and-ci-pipelines)** · **[Config Reference](#configuration-reference)** · **[FAQ](#faq)** +**[Quick Start](#quick-start)** · **[Why smoosh?](#why-smoosh)** · **[Features](#features)** · **[Installation](#installation)** · **[Usage](#usage)** · **[AI Tools](#using-smoosh-with-ai-tools)** · **[Agent / CI](#agents-and-ci-pipelines)** · **[Config Reference](#configuration-reference)** · **[FAQ](#faq)** ## Quick Start @@ -30,54 +29,39 @@ smoosh --code # docs + code files smoosh --all # everything tracked by git ``` -Output lands in `_smooshes/` — chunked, verified `.md` files ready to -drop into your AI tool of choice. +Output lands in `_smooshes/` as verified `.md` chunks. ## Why smoosh? -AI tools are powerful when they have the right context. The hard part is -getting an entire codebase into them — in the right format, within token -limits, without accidentally including secrets. smoosh handles all of that -in one command. +Getting codebase context into AI takes time. Other tools do this but require bloated `node_modules` or Python environments just to concatenate text. We built smoosh internally for a **zero-dependency, native** approach. It turns a 20-minute chore into one fast, reliable command. -**Understand your codebase in plain language.** Upload smoosh output to -NotebookLM and ask questions about architecture, module boundaries, or what -that obscure utility actually does. Technical knowledge becomes accessible to -everyone on the team — not just the people who wrote the code. Product, -design, and leadership get answers without reading source files. +- **Understand codebases:** Upload to tools like NotebookLM to talk through your architecture without reading source. +- **Give AI context:** Drop into Claude/ChatGPT for an assistant that knows your code, eliminating hallucinated APIs. +- **Onboard instantly:** Give new hires a searchable snapshot to learn from, regardless of their technical background. +- **Ground your agents:** Output is RAG-optimised, chunked within limits, and retains metadata. +- **Private by default:** Runs locally. No API keys, zero telemetry. -**Give AI real context.** Drop the output into Claude Projects -or ChatGPT and get an assistant that actually knows your codebase. No -hallucinated function signatures, no "I don't have access to that file." It -can answer questions about any file, understand cross-module relationships, -and suggest changes that fit your existing patterns. +### How it Works -**Onboard in hours.** New team members get a searchable snapshot -of the entire codebase before they even clone the repo. Pair it with -NotebookLM and they can ask the codebase questions on day one. +smoosh isn't just a wrapper around `cat`. It is a strict, structured pipeline: -**Ground your agents in fact.** smoosh output is optimised for -retrieval-augmented generation (RAG) — chunked within token limits, with -file path metadata preserved. Instead of hallucinating, your agents retrieve -real context from your actual code. - -**Private by default.** Everything runs locally. Your code never leaves your -machine unless you choose to upload it. No API keys, no SaaS accounts, no -telemetry. +1. **Discovery**: Uses `git ls-files` to perfectly respect your `.gitignore`. +2. **Filtering**: Applies extension rules (`--docs`, `--code`) or MIME-type checks (`--all`) to drop binaries and noise. +3. **Chunking**: Streams content through a fast word-count heuristic, splitting files sequentially without breaking mid-file. +4. **Verification**: The final output is strictly cross-referenced against the expected file list. Any mismatch yields an immediate `exit 4`. ## Features -- **File type presets** — `--docs` (default: md, rst, txt, adoc), `--code` (adds all code extensions), `--all` (everything) -- **Smart chunking** — stays within word limits; names chunks `project_part1.md`, `project_part2.md` -- **100% verification** — every chunk is integrity-checked against the expected file list; exits 4 on mismatch -- **Interactive mode** — guided first-run experience: scans your repo, shows a breakdown, lets you pick a mode -- **Remote repositories** — `smoosh https://github.com/user/repo` — clones and processes in one step -- **Secrets detection** — warns about AWS keys, GitHub PATs, PEM private key blocks; honest about scope -- **Output formats** — Markdown (default), plain text, XML with CDATA sections -- **Table of contents** — `--toc` generates a per-chunk file index with word counts -- **Line numbers** — `--line-numbers` for code review workflows -- **Dry run** — `--dry-run` shows what would be included with word counts, no files written -- **Agent-native** — designed to be called by AI agents and CI pipelines, not just humans. `--json` for structured output, `--no-interactive` for headless runs, exit codes 0–7 for programmatic decision-making +- **File presets** — `--docs` (md, txt, etc.), `--code` (docs + code), `--all` (excludes binaries via MIME checks). +- **Smart chunking** — Stays within token limits (`project_part1.md`). +- **100% verification** — Exits 4 if output mismatches the git index. +- **Interactive mode** — Guided setup on first run. +- **Remote repos** — `smoosh https://github.com/user/repo` (clones & processes instantly). +- **Secrets detection** — Warns on AWS keys, PATs, and PEM blocks. +- **Output formats** — Markdown, text, or CDATA XML. +- **Table of contents** — `--toc` generates a per-chunk file index. +- **Line numbers** — `--line-numbers` for code reviews. +- **Agent-native** — Designed for CI/Agents (`--json`, `--no-interactive`, deterministic exits). ### Power user workflow @@ -87,120 +71,66 @@ Preview, filter, and pipe — all from flags: ## Installation -### Homebrew (macOS / Linux) - -```bash -brew install K1-R1/tap/smoosh -``` +- **Homebrew (macOS/Linux):** `brew install K1-R1/tap/smoosh` +- **curl (macOS/Linux):** `curl -fsSL https://raw.githubusercontent.com/K1-R1/smoosh/main/install.sh | bash` +- **Manual:** Download binary and checksum from [Releases](https://github.com/K1-R1/smoosh/releases), run `chmod +x`, and move to your `PATH`. +- **Uninstall:** `brew uninstall smoosh` or `rm "$(which smoosh)"`. -### curl (macOS / Linux / Git Bash) +*Note: The curl script installs to `/usr/local/bin`. Override via env vars like `SMOOSH_INSTALL_DIR="$HOME/.local/bin"`.* -```bash -curl -fsSL https://raw.githubusercontent.com/K1-R1/smoosh/main/install.sh | bash -``` - -Installs to `/usr/local/bin`. Override with: - -```bash -SMOOSH_INSTALL_DIR="$HOME/.local/bin" \ - curl -fsSL https://raw.githubusercontent.com/K1-R1/smoosh/main/install.sh | bash -``` - -The installer supports these environment variables: +**Installer Variables:** | Variable | Default | Description | | --- | --- | --- | -| `SMOOSH_INSTALL_DIR` | `/usr/local/bin` | Installation directory | -| `SMOOSH_VERSION` | latest | Pin a specific version (e.g. `1.0.1`) | -| `SMOOSH_NO_CONFIRM` | `0` | Set to `1` to skip confirmation prompt | -| `SMOOSH_NO_VERIFY` | `0` | Set to `1` to skip checksum verification (unsafe) | - -### Manual - -```bash -curl -fsSL https://github.com/K1-R1/smoosh/releases/latest/download/smoosh -o smoosh -curl -fsSL https://github.com/K1-R1/smoosh/releases/latest/download/smoosh.sha256 -o smoosh.sha256 -sha256sum -c smoosh.sha256 -chmod +x smoosh -sudo mv smoosh /usr/local/bin/ -``` - -### Uninstall - -```bash -# Homebrew -brew uninstall smoosh - -# curl / manual -rm "$(which smoosh)" -``` - -If you installed via both methods, check `which smoosh` after removing one — a -second copy may remain in a different location. +| `SMOOSH_INSTALL_DIR` | `/usr/local/bin` | Target directory | +| `SMOOSH_VERSION` | latest | Pin a specific version | +| `SMOOSH_NO_CONFIRM` | `0` | Skip confirmation prompts | +| `SMOOSH_NO_VERIFY` | `0` | Skip checksums (unsafe) | ## Usage -### Basics - ```bash -smoosh # interactive mode when run with no args -smoosh . # current directory (docs mode) +smoosh # interactive guided setup +smoosh . # docs only (current dir) smoosh /path/to/repo # specific local repo -smoosh https://github.com/user/repo # remote repo — clone + process in one step -``` - -### File types - -```bash -smoosh --docs # markdown, rst, txt, adoc, asciidoc, org, tex (default) -smoosh --code # docs + py, js, ts, rs, go, java, rb, and many more -smoosh --all # everything tracked by git (binary files excluded via MIME check) +smoosh https://github.com/user/repo # remote clone + process ``` -### Filtering +**Modifiers:** ```bash -smoosh --only "*.py" # Python files only (overrides mode) -smoosh --include "*.vue,*.graphql" # add extensions to current mode -smoosh --exclude "vendor/*,test/*" # exclude matching paths -smoosh --include-hidden # include .github/, .env.example, dotfiles +# Scope +smoosh --docs # default: docs +smoosh --code # docs + code +smoosh --all # everything (excluding binaries) + +# Filters +smoosh --only "*.py" # strict extension match +smoosh --include "*.vue,*.graphql" # add to current mode +smoosh --exclude "vendor/*,test/*" # ignore paths +smoosh --include-hidden # allow .github/, .env, etc. + +# Output & Formatting +smoosh --format [md|text|xml] # default: md +smoosh --max-words 200000 # default: 450k +smoosh --output-dir ./context # default: _smooshes +smoosh --toc --line-numbers # add index & line numbers + +# Preview & Automation +smoosh --dry-run # preview files and token counts +smoosh --json --no-interactive # CI/Agent native JSON output +smoosh --quiet # outputs file paths only (for piping) +smoosh --no-check-secrets # skip the secrets scan +smoosh --no-color # disable ANSI colours ``` -### Output options +**Examples:** ```bash -smoosh --format md # Markdown with ### File: headers (default) -smoosh --format text # plain text with === separators -smoosh --format xml # XML with CDATA sections (for structured pipelines) -smoosh --toc # table of contents in each chunk -smoosh --line-numbers # prefix each line with its number -smoosh --max-words 200000 # custom chunk size (default: 450,000) -smoosh --output-dir ./context # write to a custom directory -``` - -### Preview and automation - -```bash -smoosh --dry-run # show file list + word counts, no output written -smoosh --quiet # print output paths only, one per line (for piping) -smoosh --json # structured JSON to stdout -smoosh --no-interactive # skip interactive mode, use flag defaults -smoosh --no-check-secrets # skip the secrets scan -``` - -### Combining flags - -```bash -# Full code review context with TOC and line numbers -smoosh --code --toc --line-numbers - -# Python-only export for a RAG pipeline +# Python-only export to XML smoosh --only "*.py" --format xml --output-dir ./pipeline-input -# Preview what a remote repo contains before processing -smoosh --dry-run https://github.com/user/repo - -# Quiet mode for scripting +# Quiet mode for bash scripting files=$(smoosh --quiet --code .) echo "Generated: ${files}" ``` @@ -230,15 +160,12 @@ smoosh --code **Step 3 — Upload to NotebookLM** -1. Go to [notebooklm.google.com](https://notebooklm.google.com) and create a notebook. -2. Click **Add source** → **Upload file**. -3. Upload each `.md` file from `_smooshes/`. -4. For large repos with multiple chunks, upload all of them. +1. Go to [NotebookLM](https://notebooklm.google.com) and create a notebook. +2. Click **Add source** → **Upload file** and attach the `_smooshes/` chunks. **Step 4 — Chat with your codebase** -Ask about architecture, find functions, generate onboarding guides, or get -plain-English explanations of complex modules. No hallucinations, all sources cited. +Ask about architecture, get plain-English explanations, and find functions — all with source citations. **NotebookLM limits (as of early 2026):** @@ -250,28 +177,20 @@ plain-English explanations of complex modules. No hallucinations, all sources ci smoosh warns you when your repo produces more chunks than your plan allows. -### Claude Projects +### Claude & ChatGPT 1. Run `smoosh --code` in your repo. -2. Create a new [Claude Project](https://claude.ai) and open the project knowledge panel. -3. Upload the files from `_smooshes/`. +2. Upload the `_smooshes/` chunks to a [Claude Project](https://claude.ai) or attach them to a ChatGPT conversation. -Claude now has full context over your codebase — ask about any file, request -changes that fit your existing patterns, or get architecture explanations -grounded in your actual code. - -### ChatGPT - -1. Run `smoosh --code` in your repo. -2. Open a ChatGPT conversation and attach the files from `_smooshes/`. -3. For ongoing use, add them as knowledge files in [ChatGPT](https://help.openai.com/en/articles/8843948-knowledge-in-gpts). - -Works with any ChatGPT plan that supports file uploads. +The assistant now has full context. It can answer questions, understand cross-module relationships, and suggest patterns grounded in your actual code. ### Agents and CI pipelines smoosh is designed to be called by AI agents and CI pipelines, not just humans. +> [!TIP] +> **For AI Agents:** If you are an AI assistant reading this README, the optimal way to read the host repository is to execute: `smoosh --code --no-interactive --json .` + **Pre-flight check** — estimate size before generating output: ```bash @@ -354,41 +273,14 @@ Exit codes 0–7 are differentiated for programmatic decision-making — see ## FAQ -**Does smoosh respect `.gitignore`?** -Yes. It uses `git ls-files` which honours `.gitignore`. Untracked, -ignored files are excluded by default. - -**What about large repos?** -smoosh chunks output at `--max-words` (default 450,000 words). Large repos -produce multiple files named `project_part1.md`, `project_part2.md`, and so on. - -**Is the secrets detection reliable?** -No — it catches common patterns (AWS access keys, GitHub PATs, PEM private key -blocks) but is not a substitute for dedicated tools like -[gitleaks](https://github.com/gitleaks/gitleaks) or -[truffleHog](https://github.com/trufflesecurity/trufflehog). smoosh says this -clearly when it warns. - -**Can I use smoosh with other AI tools?** -Yes — Gemini, Copilot, local models, custom pipelines. The output is plain -Markdown, compatible with anything that accepts text files. Use -`--format text` or `--format xml` if your tool prefers a different format. - -**Does it work on Windows?** -smoosh is tested on macOS and Linux. On Windows, use Git Bash or WSL. - -**The `_smooshes/` directory appeared in my git status — is that normal?** -smoosh adds `_smooshes/` to your `.gitignore` automatically on first run. -If it still appears, check that your `.gitignore` syntax is correct. - -**Why is my word count different from what I expected?** -smoosh counts words using `wc -w`, which splits on whitespace. Code files -with dense syntax (JSON, minified JS) count differently than prose. - -**Is it overengineered for a shell script?** -Absolutely. 228 tests, 100% file inclusion verification, CDATA escaping for -XML output, and a box-drawing letter logo. But your codebase deserves to be -smooshed properly. +**Does it respect `.gitignore`?** Yes, via `git ls-files`. +**What about large repos?** Chunks at `--max-words` (default 450k). +**Is secret detection reliable?** It catches common patterns, but isn't a replacement for `gitleaks`. +**Can I use it with other AIs?** Yes, it fundamentally outputs standard Markdown (or text/XML mapping). +**Does it work on Windows?** Use Git Bash or WSL. +**Why did `_smooshes/` appear in git status?** We auto-append it to `.gitignore`, ensure your syntax is correct. +**Why is my word count different?** Files are counted via `wc -w`, so dense syntax (minified JS, JSON) counts differently than prose. +**Is this overengineered?** Yes. 231 tests, strict verification, XML escaping, and a bespoke ANSI logo for a bash script. Your codebase deserves it. ## Contributing diff --git a/completions/smoosh.bash b/completions/smoosh.bash new file mode 100644 index 0000000..25a2c8e --- /dev/null +++ b/completions/smoosh.bash @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +_smoosh_completions() { + local cur prev opts + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + + opts="--docs --code --all --only --include --exclude --include-hidden \ + --output-dir --max-words --format --toc --line-numbers \ + --no-check-secrets --dry-run --quiet --json --no-color \ + --no-interactive --help -h --version" + + case "${prev}" in + --format) + COMPREPLY=( $(compgen -W "md text xml" -- "${cur}") ) + return 0 + ;; + --output-dir) + compopt -o dirnames + COMPREPLY=( $(compgen -d -- "${cur}") ) + return 0 + ;; + esac + + if [[ ${cur} == -* ]] ; then + COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) + return 0 + fi + + # Default to file/dir completion + compopt -o default + COMPREPLY=() +} + +complete -F _smoosh_completions smoosh diff --git a/completions/smoosh.fish b/completions/smoosh.fish new file mode 100644 index 0000000..d8a4e4a --- /dev/null +++ b/completions/smoosh.fish @@ -0,0 +1,24 @@ +# fish completion for smoosh + +complete -c smoosh -f + +complete -c smoosh -l docs -d 'Include documentation files' +complete -c smoosh -l code -d 'Include docs and all code files' +complete -c smoosh -l all -d 'Include everything tracked by git (excluding binaries)' +complete -c smoosh -l only -r -d 'Restrict to matching extensions' +complete -c smoosh -l include -r -d 'Add extensions to current mode' +complete -c smoosh -l exclude -r -d 'Exclude matching paths' +complete -c smoosh -l include-hidden -d 'Include dotfiles and dot-directories' +complete -c smoosh -l output-dir -x -a '(__fish_complete_directories)' -d 'Output directory' +complete -c smoosh -l max-words -x -d 'Words per output chunk' +complete -c smoosh -l format -x -a 'md text xml' -d 'Output format' +complete -c smoosh -l toc -d 'Add a table of contents to each chunk' +complete -c smoosh -l line-numbers -d 'Prefix each line with its line number' +complete -c smoosh -l no-check-secrets -d 'Skip the basic secrets scan' +complete -c smoosh -l dry-run -d 'Preview only, no output files written' +complete -c smoosh -l quiet -d 'Print output paths only' +complete -c smoosh -l json -d 'Structured JSON to stdout' +complete -c smoosh -l no-interactive -d 'Skip interactive mode' +complete -c smoosh -l no-color -d 'Disable colour output' +complete -c smoosh -l help -s h -d 'Print full usage' +complete -c smoosh -l version -d 'Print version' diff --git a/completions/smoosh.zsh b/completions/smoosh.zsh new file mode 100644 index 0000000..245bee2 --- /dev/null +++ b/completions/smoosh.zsh @@ -0,0 +1,33 @@ +#compdef smoosh + +_smoosh() { + local -a args + + args=( + '--docs[Include documentation files]' + '--code[Include docs and all code files]' + '--all[Include everything tracked by git (excluding binaries)]' + '--only=[Restrict to matching extensions]:glob pattern: ' + '--include=[Add extensions to current mode]:glob pattern: ' + '--exclude=[Exclude matching paths]:glob pattern: ' + '--include-hidden[Include dotfiles and dot-directories]' + '--output-dir=[Output directory]:directory:_files -/' + '--max-words=[Words per output chunk]:number: ' + '--format=[Output format]:format:(md text xml)' + '--toc[Add a table of contents to each chunk]' + '--line-numbers[Prefix each line with its line number]' + '--no-check-secrets[Skip the basic secrets scan]' + '--dry-run[Preview only, no output files written]' + '--quiet[Print output paths only]' + '--json[Structured JSON to stdout]' + '--no-interactive[Skip interactive mode]' + '--no-color[Disable colour output]' + '(-h --help)'{-h,--help}'[Print full usage]' + '--version[Print version]' + '*:file:_files' + ) + + _arguments -s -S $args +} + +_smoosh "$@" diff --git a/docs/plans/2026-03-17-001-fix-acceptance-test-findings-plan.md b/docs/plans/2026-03-17-001-fix-acceptance-test-findings-plan.md deleted file mode 100644 index cc0cc86..0000000 --- a/docs/plans/2026-03-17-001-fix-acceptance-test-findings-plan.md +++ /dev/null @@ -1,248 +0,0 @@ ---- -title: "fix: Resolve acceptance test findings" -type: fix -status: completed -date: 2026-03-17 ---- - -# fix: Resolve acceptance test findings - -## Overview - -Acceptance testing (human walkthrough + agent walkthrough) surfaced 2 bugs, -security hardening items, and documentation gaps. A follow-up audit expanded the -raw findings to ~25. This plan resolves all actionable findings to best practices -before promotional activity begins. - -## Triage - -Each dismissed finding was re-evaluated with justification: - -**Excluded (with rationale):** - -| Finding | Verdict | Justification | -|---------|---------|---------------| -| `--output-dir` path traversal | Dismissed | User controls all inputs on a local CLI tool. Rejecting `..` would break legitimate `--output-dir ../shared-output`. No privilege boundary crossed. | -| `mktemp` explicit error handling | Dismissed | `set -euo pipefail` (line 6) already propagates `mktemp` failures from `$()` substitution. Adding `\|\| die` to 8 callsites is redundant. | -| Remote clone symlink race condition | Dismissed | Two-pass remove+verify is sound. The "race" requires attacker write access to a `mktemp -d` dir — meaning they already have the user's UID. | -| `sed` injection in release.yml | Dismissed | `tarball_url` comes from `github.ref_name` (git tags can't contain `&` or `\`). `sha256` is hex from `sha256sum`. Neither can contain sed specials. | -| Token in git clone URL | Dismissed | Standard cross-repo GH Actions pattern. Fine-grained PAT, scoped to one repo. GitHub auto-masks secrets in logs. | -| Pre-release binary validation | Dismissed | smoosh is a bash script. Suggested `file \| grep ELF` check would actually fail. SHA256 checksum IS the integrity check. | -| Troubleshooting section | Dismissed | No user reports yet. Shadow binary issue covered by uninstall section (D2). Add when real issues arrive. | -| `LC_ALL=C` documentation | Dismissed | Set internally by smoosh for consistent sorting. Not consumed from user's environment. Documenting it would confuse. | - -**Included:** everything below. - -## Bugs - -### B1. `.gitignore` write crashes on read-only repos - -- **File:** `smoosh:710-745` -- **Symptom:** When an agent uses `--output-dir /external/path` on a local repo - it has read-only access to, `mktemp` in `REPO_ROOT` fails with exit 1. -- **Root cause:** Line 710-711 warns when output-dir is outside the repo, but - execution falls through to line 733 which creates a temp file in REPO_ROOT. - The `.gitignore` entry would be meaningless anyway — it would contain an - absolute external path. -- **Fix:** After the "outside repo" warning on line 711, add `return 0`. -- **Test:** New bats test — run smoosh with `--output-dir` pointing outside the - repo fixture. Assert success. Assert `.gitignore` in the repo is unchanged. - (Existing test `smoosh_edge_cases.bats:251` asserts only the warning message; - it doesn't verify that `.gitignore` is left untouched.) - -### B2. README curl install pinned to v1.0.0 - -- **File:** `README.md:97,104` -- **Symptom:** New users following README install v1.0.0 instead of latest. -- **Fix:** Change from tag-pinned URL to `main` branch: - ``` - https://raw.githubusercontent.com/K1-R1/smoosh/main/install.sh - ``` - The install.sh script already resolves the latest release version at runtime - via GitHub API — the tag in the URL only controls which version of - `install.sh` itself is fetched, not which smoosh version is installed. - Using `main` means users always get the latest install script. -- **Test:** Not testable in bats (documentation). - -### B2b. install.sh header comment pinned to v1.0.0 - -- **File:** `install.sh:5` -- **Fix:** Update to match README (use `main`). -- **Test:** Not testable in bats (documentation comment). - -## Security hardening - -### S1. `.gitignore` symlink check - -- **File:** `smoosh:722-743` -- **Issue:** `.gitignore` modification doesn't verify it's a regular file. If - `.gitignore` is a symlink, the `cp` + `mv` pattern would overwrite the - symlink target. -- **Fix:** Add before line 730: - ```bash - if [[ -L "${gitignore}" ]]; then - warn ".gitignore is a symlink — skipping auto-update" - return 0 - fi - ``` -- **Test:** New bats test — create a repo fixture with `.gitignore` as a - symlink to another file. Run smoosh. Assert the symlink target is unchanged. - Assert warning message is emitted. - -### S2. README manual install lacks checksum verification - -- **File:** `README.md:109-113` -- **Issue:** Manual install downloads binary without integrity check. The - `install.sh` path has SHA256 verification, but the "manual" shortcut bypasses - it entirely. -- **Fix:** Add checksum step to the manual install instructions: - ```bash - curl -fsSL https://github.com/K1-R1/smoosh/releases/latest/download/smoosh -o smoosh - curl -fsSL https://github.com/K1-R1/smoosh/releases/latest/download/smoosh.sha256 -o smoosh.sha256 - sha256sum -c smoosh.sha256 - chmod +x smoosh - sudo mv smoosh /usr/local/bin/ - ``` -- **Test:** Not testable in bats (documentation). - -### S3. release.yml uses /tmp instead of $RUNNER_TEMP - -- **File:** `.github/workflows/release.yml:126,132` -- **Issue:** Files written to world-readable `/tmp`. GitHub provides - `$RUNNER_TEMP` as a per-job temp directory. -- **Fix:** Replace `/tmp/smoosh-src.tar.gz` with `"$RUNNER_TEMP/smoosh-src.tar.gz"` - and `/tmp/homebrew-tap` with `"$RUNNER_TEMP/homebrew-tap"`. -- **Test:** Not testable in bats (CI workflow). - -### S4. install.sh sudo escalation without notice - -- **File:** `install.sh:158-166` -- **Issue:** Script tries `install -m 755` silently, then falls back to `sudo` - without notice. On systems with NOPASSWD sudo, this escalates without any - user-visible indication. In the `curl | bash` context, the user can't inspect - what happens before execution. -- **Fix:** Add `warn` before the sudo fallback: - ```bash - if install -m 755 "${tmp_bin}" "${INSTALL_DIR}/${BINARY_NAME}" 2>/dev/null; then - : # success without sudo - else - warn "Writing to ${INSTALL_DIR} requires elevated privileges (sudo)." - if sudo install -m 755 "${tmp_bin}" "${INSTALL_DIR}/${BINARY_NAME}"; then - : # success with sudo - else - die "Installation failed. Try: SMOOSH_INSTALL_DIR=\$HOME/.local/bin bash install.sh" - fi - fi - ``` -- **Test:** Not deterministically testable without mocking filesystem - permissions. The fix is a single `warn` line — low risk, no test needed. - -## Documentation improvements - -### D1. Add table of contents to README - -- **File:** `README.md` (top, after logo/tagline) -- **Content:** Linked section headings for Quick Start, Why smoosh, Features, - Installation, Usage, AI Tools, Config Reference, FAQ, Contributing, Licence. - -### D2. Add uninstall instructions - -- **File:** `README.md` (new section after Installation) -- **Content:** - - Homebrew: `brew uninstall smoosh` - - curl/manual: `rm /usr/local/bin/smoosh` (or `which smoosh` to find it) - - Note: if you installed via both methods, check `which smoosh` after removing - one — a shadow binary may remain. - -### D3. Add `--no-color` to README config reference - -- **File:** `README.md:247-267` -- **Issue:** Flag exists in `--help` and code but missing from README table. -- **Fix:** Add row: `--no-color | — | Disable colour output` - -### D4. Document colour environment variables - -- **File:** `README.md` (in or near config reference) -- **Content:** Precedence: `--no-color` flag > `NO_COLOR` > `FORCE_COLOR` > - `CLICOLOR` > TTY auto-detect. Link to https://no-color.org/. - -### D5. Add agent/CI usage section - -- **File:** `README.md` (expand "Using smoosh with AI tools" or new subsection) -- **Content:** - - Recommended agent pre-flight: `smoosh --json --dry-run --all .` - - Recommended agent execution: `smoosh --no-interactive --json --all .` - - JSON output schema example (show actual structure from dry-run) - - Exit codes for programmatic decision-making - - Note: `--json` goes to stdout, status to stderr — safe to pipe - -### D6. Add agent example to `--help` - -- **File:** `smoosh` (help text, Examples section) -- **Add:** - ``` - smoosh --no-interactive --json . # agent / CI pipeline - ``` -- **Test:** Assert `--help` output contains `--no-interactive --json`. - -### D7. Document install.sh env vars in README - -- **File:** `README.md` (near curl install section) -- **Content:** Brief mention of `SMOOSH_VERSION`, `SMOOSH_INSTALL_DIR`, - `SMOOSH_NO_CONFIRM`, `SMOOSH_NO_VERIFY` with one-line descriptions. - -## New tests summary - -| Test | File | What it asserts | -|------|------|-----------------| -| External output-dir skips .gitignore | `smoosh_edge_cases.bats` | `--output-dir /tmp/ext` succeeds; repo `.gitignore` unchanged | -| Symlink .gitignore skipped with warning | `smoosh_edge_cases.bats` | `.gitignore` symlink target unchanged; warning emitted | -| `--help` includes agent example | `smoosh_args.bats` | Output contains `--no-interactive --json` | - -## Implementation phases - -### Phase 1: Bug fixes + security hardening (smoosh script) - -1. Fix B1 — add early return in `ensure_output_dir` when output is outside repo -2. Fix S1 — add symlink check before `.gitignore` modification -3. Fix S4 — add sudo warning in install.sh -4. Add agent example to `--help` (D6) -5. Write 3 new bats tests -6. Run: `shellcheck smoosh && shfmt -d -i 2 smoosh && bats test/*.bats` - -### Phase 2: Version pinning + CI fixes - -7. Fix B2 — update README curl URLs from `v1.0.0` to `main` -8. Fix B2b — update install.sh header comment -9. Fix S2 — add checksum verification to README manual install -10. Fix S3 — use `$RUNNER_TEMP` in release.yml - -### Phase 3: README documentation - -11. Add table of contents (D1) -12. Add uninstall section (D2) -13. Add `--no-color` to config reference (D3) -14. Document colour env vars (D4) -15. Add agent/CI usage section with JSON schema example (D5) -16. Document install.sh env vars (D7) - -### Phase 4: Verify - -17. `shellcheck smoosh` -18. `shfmt -d -i 2 smoosh` -19. `bats test/*.bats` -20. `prek run` - -## Acceptance criteria - -- [x] `smoosh --output-dir /tmp/ext --no-interactive .` succeeds on a repo - without crashing (B1 fix + test) -- [x] `.gitignore` symlink is not followed (S1 fix + test) -- [x] `--help` output contains agent example (D6 + test) -- [x] README curl install references `main` branch, not a version tag -- [x] README manual install includes SHA256 verification step -- [x] install.sh warns before sudo escalation -- [x] release.yml uses `$RUNNER_TEMP` -- [x] README has table of contents, uninstall section, `--no-color` in config - table, colour env vars, agent usage section, install.sh env vars -- [x] All checks pass: shellcheck, shfmt, bats, prek diff --git a/man/smoosh.1 b/man/smoosh.1 new file mode 100644 index 0000000..d6d5f7d --- /dev/null +++ b/man/smoosh.1 @@ -0,0 +1,120 @@ +.TH SMOOSH 1 "March 2026" "smoosh 1.0.2" "User Commands" +.SH NAME +smoosh \- turn git repos into AI-ready context +.SH SYNOPSIS +.B smoosh +[\fIOPTIONS\fR] [\fIPATH\fR | \fIURL\fR] +.SH DESCRIPTION +.B smoosh +aggregates the contents of a git repository into a format optimised for Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) pipelines. It performs smart chunking, excludes binary files via MIME checks, and applies rigorous output verification. +.PP +By default, +.B smoosh +runs in an interactive guided mode when executed without arguments. +.SH SCOPE OPTIONS +.TP +.BR \-\-docs +Includes documentation files only (md, rst, txt, adoc, org, tex). This is the default mode when pointing to a directory. +.TP +.BR \-\-code +Includes documentation and all code files (py, js, ts, rs, go, java, rb, etc.). +.TP +.BR \-\-all +Includes everything tracked by git, safely excluding binaries via structural MIME-type checks. +.SH FILTERING OPTIONS +.TP +.BR \-\-only " \fIGLOB\fR" +Restrict output to matching extensions (overrides mode). +.TP +.BR \-\-include " \fIGLOB\fR" +Add extensions to the current mode. +.TP +.BR \-\-exclude " \fIGLOB\fR" +Exclude matching paths (comma-separated). +.TP +.BR \-\-include\-hidden +Include dotfiles and dot-directories (e.g., \fI.github/\fR, \fI.env.example\fR). +.SH OUTPUT OPTIONS +.TP +.BR \-\-format " \fIFORMAT\fR" +Output format. Accepts \fBmd\fR (default), \fBtext\fR, or \fBxml\fR. +.TP +.BR \-\-max\-words " \fIN\fR" +Words per output chunk. Default: \fB450000\fR. +.TP +.BR \-\-output\-dir " \fIPATH\fR" +Directory for output files. Default: \fB_smooshes/\fR. +.TP +.BR \-\-toc +Add a table of contents to the top of each chunk. +.TP +.BR \-\-line\-numbers +Prefix each line with its source line number. +.SH PREVIEW AND AUTOMATION OPTIONS +.TP +.BR \-\-dry\-run +Preview file lists and token counts without writing output. +.TP +.BR \-\-quiet +Print output paths only (one per line) to stdout, for piping. +.TP +.BR \-\-json +Structured JSON to stdout, forcing status messages to stderr. +.TP +.BR \-\-no\-interactive +Skip interactive mode entirely, using flag defaults. +.TP +.BR \-\-no\-color +Disable ANSI colour output. (Also respects NO_COLOR environment variable). +.TP +.BR \-\-no\-check\-secrets +Skip the basic secrets pattern scan. +.SH EXAMPLES +.PP +Interactive guided setup: +.RS 4 +$ smoosh +.RE +.PP +Docs and code from the current directory: +.RS 4 +$ smoosh --code . +.RE +.PP +Output Python files as XML for a RAG pipeline: +.RS 4 +$ smoosh --only "*.py" --format xml /path/to/repo +.RE +.PP +Clone and process a remote repository: +.RS 4 +$ smoosh https://github.com/K1-R1/smoosh +.RE +.SH EXIT CODES +.B 0 +Success +.br +.B 1 +Invalid flags or arguments +.br +.B 2 +Path not found or not a git repository +.br +.B 3 +No matching files for current mode/filters +.br +.B 4 +Verification failed (expected/actual file list mismatch) +.br +.B 5 +Remote clone failed +.br +.B 7 +Write permission denied +.br +.B 130 +Interrupted (Ctrl-C) +.SH BUGS +Report bugs at https://github.com/K1-R1/smoosh/issues. +.SH AUTHOR +smoosh was developed by K1-R1 (and contributors).