From dbc4b5fd3a8d9b19dc3dd08fb41755e957a0a8fb Mon Sep 17 00:00:00 2001 From: Ogulcan Aydogan Date: Fri, 13 Mar 2026 16:19:34 +0000 Subject: [PATCH 1/3] chore: add OpenSSF Scorecard workflow and grant application materials - Add SHA-pinned OpenSSF Scorecard GitHub Actions workflow - Add AISI Challenge Fund application (prompt injection defense) - Add NLnet NGI Zero proposal (open source AI safety middleware) - Add OpenSSF Best Practices badge submission guide - Add Huntr bug bounty registration guide --- .github/workflows/scorecard.yml | 38 +++ docs/grants/aisi-challenge-fund.md | 271 +++++++++++++++++++++ docs/grants/huntr-registration.md | 376 +++++++++++++++++++++++++++++ docs/grants/nlnet-ngi-zero.md | 237 ++++++++++++++++++ docs/grants/openssf-badge-guide.md | 295 ++++++++++++++++++++++ 5 files changed, 1217 insertions(+) create mode 100644 .github/workflows/scorecard.yml create mode 100644 docs/grants/aisi-challenge-fund.md create mode 100644 docs/grants/huntr-registration.md create mode 100644 docs/grants/nlnet-ngi-zero.md create mode 100644 docs/grants/openssf-badge-guide.md diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000..b077dee --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,38 @@ +name: OpenSSF Scorecard + +on: + branch_protection_rule: + schedule: + - cron: "30 1 * * 1" + push: + branches: [main] + +permissions: read-all + +jobs: + analysis: + name: Scorecard Analysis + runs-on: ubuntu-24.04 + permissions: + security-events: write + id-token: write + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + with: + persist-credentials: false + + - uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a + with: + results_file: results.sarif + results_format: sarif + publish_results: true + + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 + with: + name: scorecard-results + path: results.sarif + retention-days: 5 + + - uses: github/codeql-action/upload-sarif@0d579ffd059c29b07949a3cce3983f0780820c98 + with: + sarif_file: results.sarif diff --git a/docs/grants/aisi-challenge-fund.md b/docs/grants/aisi-challenge-fund.md new file mode 100644 index 0000000..c5fd61f --- /dev/null +++ b/docs/grants/aisi-challenge-fund.md @@ -0,0 +1,271 @@ +# AISI Challenge Fund Application + +**Grant Programme:** AISI Challenge Fund +**Funding Body:** UK AI Safety Institute (AISI), Department for Science, Innovation and Technology +**URL:** https://find-government-grants.service.gov.uk/grants/aisi-challenge-fund-1 +**Funding Range:** GBP 50,000 -- 200,000 +**Application Deadline:** 31 March 2026 +**Applicant:** Ogulcan Aydogan +**Repository:** https://github.com/ogulcanaydogan/Prompt-Injection-Firewall +**License:** Apache 2.0 + +--- + +## 1. Project Title + +**PIF: Open Source Prompt Injection Defense for Safe LLM Deployment** + +--- + +## 2. Executive Summary + +Prompt Injection Firewall (PIF) is an open-source, transparent reverse-proxy security middleware that detects and prevents prompt injection attacks targeting Large Language Model (LLM) applications in real time. The project addresses the number-one risk on the OWASP Top 10 for LLM Applications: prompt injection, a class of attack in which adversarial inputs manipulate LLM behaviour to override developer instructions, exfiltrate data, or produce harmful outputs. + +PIF operates as a drop-in proxy layer between client traffic and LLM provider APIs (OpenAI, Anthropic). It employs a dual-engine detection architecture combining 129 curated regex patterns with a fine-tuned DistilBERT ONNX classifier, producing ensemble scores that drive configurable response actions (block, flag, or log). The system achieves sub-50ms regex latency and sub-100ms ML latency, making it viable for production deployments where added overhead must be minimal. + +This application requests GBP 65,000 to fund a professional security audit of the detection engine, adversarial red-teaming of the ML classifier, model hardening against evasion techniques, and comprehensive documentation to accelerate adoption by organisations deploying LLM-powered services in the UK and internationally. + +--- + +## 3. Problem Statement + +### 3.1 The Threat Landscape + +Prompt injection is classified as the **#1 risk** in the OWASP Top 10 for Large Language Model Applications (2023, reaffirmed 2025). Unlike traditional injection attacks (SQL injection, XSS), prompt injection exploits the fundamental architecture of LLMs: the inability to reliably distinguish between trusted instructions and untrusted user input within the same context window. + +Attack vectors include: + +- **Direct prompt injection:** Adversarial text in user messages that overrides system prompts, causing the LLM to ignore developer-defined constraints. +- **Indirect prompt injection:** Malicious instructions embedded in external data sources (documents, web pages, tool outputs) that the LLM processes during retrieval-augmented generation (RAG). +- **Payload smuggling:** Encoded, obfuscated, or multi-language injection payloads designed to bypass naive keyword filters. + +### 3.2 The Defence Gap + +Despite prompt injection being widely recognised as a critical risk, the ecosystem lacks: + +1. **No open standard defence middleware.** Most mitigation advice consists of prompt engineering best practices ("defence in depth" system prompts), which are necessary but insufficient. There is no widely adopted, protocol-level defence layer analogous to a Web Application Firewall (WAF) for LLM traffic. + +2. **Vendor lock-in for detection.** Commercial solutions exist (Lakera Guard, Protect AI, Robust Intelligence) but are proprietary, opaque in their detection logic, and introduce vendor dependency. Organisations cannot audit the detection rules they rely on. + +3. **Latency-sensitivity mismatch.** Academic detectors (perplexity-based methods, large classifier models) often add hundreds of milliseconds or require GPU inference, making them impractical as inline middleware for production API traffic. + +4. **No configurable response policy.** Existing tools typically offer a binary block/allow decision. Production systems need graduated responses: logging for monitoring, flagging for human review, and blocking for high-confidence threats. + +### 3.3 Consequences of Inaction + +Without effective prompt injection defence, LLM-powered applications are vulnerable to: + +- Extraction of confidential system prompts and proprietary instructions +- Manipulation of LLM outputs in customer-facing applications (chatbots, support agents, content generation) +- Data exfiltration through tool-use and function-calling channels +- Reputational and regulatory risk for organisations deploying LLM services + +--- + +## 4. Technical Approach + +### 4.1 Architecture Overview + +PIF is a transparent reverse-proxy written in Go. It intercepts HTTP requests destined for LLM provider APIs, inspects the content of user messages, applies detection, and either forwards the request (clean or flagged) or returns a configurable block response. + +``` +Client --> [PIF Reverse Proxy] --> LLM Provider API (OpenAI / Anthropic) + | + v + Detection Engine + ├── Regex Engine (129 patterns) + ├── ML Engine (DistilBERT ONNX) + └── Ensemble Scorer + | + v + Action Policy (block / flag / log) +``` + +### 4.2 Detection Engines + +**Regex Engine** + +- 129 curated regular expression patterns organised by attack category (direct injection, role hijacking, instruction override, encoding-based evasion, delimiter abuse, multi-language injection). +- Patterns are compiled once at startup and evaluated concurrently. +- Sub-50ms latency for full pattern evaluation against typical message lengths. +- Transparent and auditable: every pattern is documented with a rationale and example payload. + +**ML Engine** + +- Fine-tuned DistilBERT classifier exported to ONNX format for CPU inference without Python or GPU dependencies. +- Binary classification: benign vs. injection. +- Sub-100ms inference latency on commodity hardware. +- Trained on curated datasets of injection payloads and benign prompts, with adversarial augmentation. + +**Ensemble Scoring** + +- Configurable weighting between regex and ML scores. +- Threshold-based action mapping: scores above the block threshold trigger rejection; scores in the flag range annotate the request with metadata headers; scores below the log threshold are recorded silently. +- Operators can tune thresholds per deployment to balance security posture against false-positive tolerance. + +### 4.3 Response Actions + +| Action | Behaviour | Use Case | +|--------|-----------|----------| +| **Block** | Returns HTTP 403 with configurable error body. Request is not forwarded. | High-confidence injection. Production enforcement. | +| **Flag** | Forwards request with `X-PIF-Flagged: true` header and metadata. | Medium-confidence. Human review workflows. | +| **Log** | Forwards request unchanged. Detection result logged. | Monitoring, baseline collection, audit trails. | + +### 4.4 API Format Support + +PIF parses and inspects message payloads for both OpenAI Chat Completions API format and Anthropic Messages API format. Format detection is automatic based on request structure and target endpoint. This covers the two most widely deployed commercial LLM APIs. + +### 4.5 Deployment Model + +- Single static binary (Go). No runtime dependencies beyond the ONNX model file. +- Configuration via YAML file or environment variables. +- Docker image available for containerised deployments. +- Designed for sidecar or gateway deployment patterns in Kubernetes environments. + +--- + +## 5. Safety Impact and Alignment with AISI Mission + +### 5.1 Direct Safety Impact + +PIF directly contributes to the safe deployment of LLM applications by: + +1. **Preventing manipulation of LLM outputs.** By intercepting adversarial inputs before they reach the model, PIF ensures that LLM behaviour remains within developer-defined constraints. + +2. **Protecting end users.** In consumer-facing applications (customer support chatbots, content assistants, educational tools), prompt injection can cause the LLM to produce misleading, harmful, or manipulative outputs. PIF acts as a safety layer between users and the model. + +3. **Enabling graduated response.** The block/flag/log action model allows organisations to deploy detection in monitoring mode first, build confidence in detection accuracy, and then progressively tighten enforcement, reducing the barrier to adoption. + +4. **Providing transparency.** As an open-source project under Apache 2.0, every detection rule and the ML model architecture are fully auditable. This aligns with the principle that safety-critical infrastructure should be inspectable. + +### 5.2 Alignment with AISI Objectives + +The UK AI Safety Institute's mandate includes developing tools and techniques for evaluating and mitigating risks from AI systems. PIF aligns with this mission in the following ways: + +- **Practical, deployable safety tooling.** PIF is not a research prototype; it is production-grade middleware with 4 releases, CI/CD pipelines, and documented integration guides. AISI funding would harden it for broader adoption. + +- **Open infrastructure for the ecosystem.** An open-source prompt injection defence layer benefits the entire UK AI ecosystem, from startups building LLM applications to enterprises deploying AI in regulated sectors (financial services, healthcare, government). + +- **Defence against a well-characterised risk.** Prompt injection is not a speculative risk; it is actively exploited. OWASP, NIST, and the EU AI Act all identify input manipulation as a priority concern. PIF provides a concrete mitigation. + +- **Complementary to model-level safety.** PIF operates at the infrastructure layer, complementing model-level alignment techniques (RLHF, constitutional AI). Defence in depth requires both model-level and infrastructure-level protections. + +--- + +## 6. Current Status + +| Metric | Value | +|--------|-------| +| Language | Go | +| Releases | 4 (stable) | +| Regex Patterns | 129 curated | +| ML Model | DistilBERT ONNX (fine-tuned) | +| CI Workflows | 3 (ci.yml, codeql.yml, release.yml) | +| Static Analysis | CodeQL (integrated in CI), golangci-lint | +| Test Coverage | 80%+ with race detector enabled | +| License | Apache 2.0 | +| API Support | OpenAI Chat Completions, Anthropic Messages | +| Detection Latency | <50ms regex, <100ms ML | +| Response Actions | Block, Flag, Log | + +--- + +## 7. Budget + +**Total Requested: GBP 65,000** + +| Line Item | Cost (GBP) | Description | +|-----------|-----------|-------------| +| Security Audit of Detection Engine | 30,000 | Independent third-party security audit of the regex engine, ML inference pipeline, proxy request handling, and configuration parsing. Includes audit report and remediation verification. Vendor: to be selected from CREST-accredited firms or equivalent. | +| Adversarial Testing Red Team | 15,000 | Engagement of a specialised red team to develop novel prompt injection payloads targeting PIF's detection engines. Goal: identify bypass vectors, measure false-negative rates under adversarial conditions, and produce a categorised evasion report. | +| ML Model Hardening | 12,000 | Adversarial training of the DistilBERT classifier using payloads identified during red-teaming. Includes dataset curation, retraining, evaluation on held-out adversarial test sets, and ONNX model re-export. Covers compute costs for training runs. | +| Documentation and Adoption Materials | 8,000 | Comprehensive deployment guides for common infrastructure patterns (Kubernetes sidecar, API gateway, Docker Compose). Threat model documentation. Integration guides for additional LLM providers. Operator runbooks for tuning detection thresholds. | +| **Total** | **65,000** | | + +--- + +## 8. Project Timeline + +**Duration: 14 weeks** + +| Week | Activity | Deliverable | +|------|----------|-------------| +| 1--2 | Security audit scoping and vendor selection | Signed statement of work with audit firm | +| 3--6 | Security audit execution | Draft audit report | +| 4--7 | Red team engagement (overlaps with audit) | Adversarial payload dataset and evasion report | +| 7--8 | Audit remediation | Patched codebase; verification by auditor | +| 8--11 | ML model hardening (adversarial retraining) | Updated ONNX model with evaluation metrics | +| 9--12 | Documentation and integration guides | Published documentation on GitHub | +| 12--13 | Integration testing of hardened system | Regression test results; updated CI | +| 14 | Final report and public release | v2.0 release with audit attestation; final grant report | + +--- + +## 9. Risks and Mitigations + +| Risk | Likelihood | Impact | Mitigation | +|------|-----------|--------|------------| +| Audit identifies critical vulnerabilities | Medium | High | Budget includes remediation time (Weeks 7--8). Responsible disclosure policy already in place. | +| Red team discovers fundamental bypass class | Low | High | ML hardening phase specifically addresses adversarial evasion. Ensemble architecture provides defence in depth. | +| ML retraining degrades benign accuracy | Medium | Medium | Evaluation on held-out benign test set before model promotion. A/B comparison with current model. | +| Timeline slippage due to audit vendor scheduling | Medium | Low | Vendor engagement begins Week 1. Buffer built into Week 12--13 integration phase. | + +--- + +## 10. Applicant Background + +**Ogulcan Aydogan** is a software engineer and machine learning practitioner with experience in NLP, LLM fine-tuning, and systems programming. Relevant experience includes: + +- Development and maintenance of PIF from inception through 4 production releases +- Fine-tuning of language models (SFT, DPO) for multilingual NLP tasks +- Experience with Go systems programming, ONNX runtime integration, and CI/CD pipeline design +- Open-source contributor with published models on Hugging Face + +--- + +## 11. Supporting Materials Checklist + +Before submission, ensure the following materials are prepared: + +- [ ] Completed application form on the AISI Challenge Fund portal +- [ ] Project summary (this document, adapted to form fields) +- [ ] Link to public GitHub repository: https://github.com/ogulcanaydogan/Prompt-Injection-Firewall +- [ ] Budget breakdown (Section 7 of this document) +- [ ] Timeline (Section 8 of this document) +- [ ] CV / resume of applicant +- [ ] Evidence of current project status (release tags, CI dashboard, test coverage report) +- [ ] Letter of support (if available; e.g., from an organisation that has evaluated or deployed PIF) +- [ ] Bank account details for grant disbursement (UK bank account required) + +--- + +## 12. Submission Steps + +1. **Register** on the Find a Government Grant portal: https://find-government-grants.service.gov.uk/ +2. **Navigate** to the AISI Challenge Fund listing: https://find-government-grants.service.gov.uk/grants/aisi-challenge-fund-1 +3. **Complete** the online application form, mapping sections of this document to the form fields: + - "Describe your project" --> Sections 2, 3, 4 + - "What is the expected impact?" --> Section 5 + - "Budget and resources" --> Section 7 + - "Project plan and milestones" --> Section 8 + - "Risk assessment" --> Section 9 +4. **Upload** supporting documents (CV, repository evidence, budget spreadsheet if required) +5. **Review** all entries for completeness and accuracy +6. **Submit** before 23:59 BST on 31 March 2026 + +--- + +## 13. Key Messages for Reviewers + +When adapting this document for form fields with character limits, prioritise the following points: + +1. **Prompt injection is the #1 LLM security risk** (OWASP Top 10 for LLM Applications). It is not theoretical; it is actively exploited. +2. **No open standard defence exists.** PIF fills a critical gap in the AI safety toolchain. +3. **PIF is production-ready, not a research prototype.** Four releases, CI/CD, 80%+ test coverage, sub-100ms latency. +4. **The grant funds hardening, not creation.** The core system exists and works. Funding enables independent security validation and adversarial robustness testing. +5. **Open source maximises impact.** Apache 2.0 licensing ensures any UK organisation can adopt PIF without vendor lock-in or licensing barriers. +6. **Direct alignment with AISI mandate.** PIF is practical, deployable AI safety infrastructure that protects end users from LLM manipulation. + +--- + +*Document prepared for AISI Challenge Fund application. Last updated: March 2026.* diff --git a/docs/grants/huntr-registration.md b/docs/grants/huntr-registration.md new file mode 100644 index 0000000..2667742 --- /dev/null +++ b/docs/grants/huntr-registration.md @@ -0,0 +1,376 @@ +# Huntr Bug Bounty Registration Guide for Prompt Injection Firewall + +**Platform:** Huntr (https://huntr.com/) +**Purpose:** Register PIF on Huntr's bug bounty platform to enable coordinated vulnerability disclosure by the security research community +**Repository:** https://github.com/ogulcanaydogan/Prompt-Injection-Firewall +**License:** Apache 2.0 + +--- + +## 1. Overview + +Huntr is a bug bounty platform focused on open-source software. By registering Prompt Injection Firewall on Huntr, the project gains: + +- A structured channel for security researchers to report vulnerabilities +- Triage support from Huntr's security team +- Incentivised security testing by the research community (Huntr provides bounties) +- Public acknowledgement of the project's commitment to security +- CVE assignment for confirmed vulnerabilities + +This is particularly valuable for PIF because the project is security middleware: vulnerabilities in PIF directly translate to vulnerabilities in every application that depends on it for protection. + +--- + +## 2. Scope Definition + +The scope defines what Huntr researchers should and should not test. A clear scope reduces noise from out-of-scope reports and focuses researcher effort on the most impactful areas. + +### 2.1 In-Scope Components + +The following directories and components are in scope for vulnerability reports. + +| Component | Directory | Description | Priority | +|-----------|-----------|-------------|----------| +| **CLI and entry point** | `cmd/` | Application startup, argument parsing, signal handling | Medium | +| **Detection engines** | `pkg/detector/` | Regex engine, ML engine, ensemble scoring, pattern matching | **Critical** | +| **Proxy layer** | `pkg/proxy/` | HTTP reverse proxy, request/response handling, header manipulation, forwarding logic | **Critical** | +| **Configuration** | `pkg/config/` | YAML parsing, environment variable processing, validation, default values | High | +| **Internal utilities** | `internal/` | Shared internal packages, logging, error handling | Medium | + +### 2.2 Out-of-Scope Components + +The following are explicitly out of scope. Reports against these components will be closed as informational. + +| Component | Directory | Reason | +|-----------|-----------|--------| +| **Example configurations** | `examples/` | Demonstration files, not production code | +| **Documentation** | `docs/` | Static content, no executable code | +| **Test files** | `tests/`, `*_test.go` | Test infrastructure, not shipped in production binary | +| **CI/CD workflows** | `.github/` | Infrastructure configuration; report via GitHub Security Advisories instead | +| **Third-party dependencies** | `vendor/`, `go.sum` | Report upstream to the dependency maintainer | + +### 2.3 In-Scope Vulnerability Types + +Researchers should focus on the following vulnerability categories, which are most relevant to PIF's function as security middleware. + +#### Detection Engine Bypass + +**This is the highest-priority vulnerability class for PIF.** + +- **Regex engine bypass:** Crafted payloads that contain prompt injection content but evade all 129 regex patterns. The payload must be a realistic prompt injection (not a trivially benign string that happens to not match). +- **ML classifier bypass:** Adversarial inputs that cause the DistilBERT ONNX classifier to misclassify a prompt injection payload as benign with high confidence. +- **Ensemble scoring bypass:** Payloads that individually trigger one engine but achieve an ensemble score below all action thresholds, resulting in no detection despite containing injection content. + +#### Proxy Vulnerabilities + +- **Request smuggling:** Manipulation of HTTP requests to bypass PIF's inspection (e.g., exploiting HTTP/1.1 chunked transfer encoding, content-length mismatches, or pipeline confusion). +- **Response manipulation:** Ability to modify or inject content into LLM responses as they pass through PIF. +- **Authentication bypass:** If PIF is configured with authentication, any method to bypass it. +- **Header injection:** Injection of arbitrary headers through user-controlled input that PIF processes. +- **SSRF (Server-Side Request Forgery):** Manipulating PIF to make requests to unintended internal endpoints. + +#### Configuration Vulnerabilities + +- **Configuration injection:** Manipulation of configuration values through environment variables or YAML parsing to alter PIF's behaviour. +- **Insecure defaults:** Default configuration values that leave PIF in an insecure state. +- **Path traversal:** File path manipulation in configuration loading to read or include unintended files. + +#### ML Inference Vulnerabilities + +- **Model file tampering:** If PIF loads the ONNX model from a path that can be influenced by an attacker, substitution of a malicious model. +- **Inference crash:** Inputs that cause the ONNX runtime to crash, panic, or consume excessive resources during inference. +- **Memory corruption:** Inputs that trigger memory safety issues in the CGo boundary between Go and the ONNX C runtime. + +#### API Format Parsing + +- **OpenAI format parsing bypass:** Malformed OpenAI Chat Completions payloads that PIF fails to parse correctly, causing it to skip inspection while the upstream API accepts them. +- **Anthropic format parsing bypass:** Malformed Anthropic Messages payloads that bypass inspection. +- **Content type confusion:** Sending requests with unexpected Content-Type headers that cause PIF to skip inspection. +- **Encoding bypass:** Using character encodings (e.g., UTF-16, ISO-8859-1) that PIF does not handle but the upstream API does. + +--- + +## 3. Severity Guidelines + +Use the following severity classification when submitting reports. Huntr may adjust severity during triage. + +### Critical (CVSS 9.0--10.0) + +A complete detection bypass that allows a prompt injection payload to pass through PIF undetected (no block, no flag, no log) when detection is enabled and correctly configured. + +**Examples:** +- A payload containing clear prompt injection ("Ignore all previous instructions and...") that bypasses both regex and ML detection engines and produces no log entry. +- A request smuggling technique that causes PIF to inspect a different request body than the one forwarded to the upstream API. +- A method to disable PIF's detection entirely through a crafted request (without authentication). + +**Impact:** Any application relying on PIF for prompt injection defence is completely unprotected. + +### High (CVSS 7.0--8.9) + +A partial detection bypass or a vulnerability that significantly degrades PIF's security posture. + +**Examples:** +- A bypass that evades the regex engine but is detected by the ML engine (or vice versa), combined with a configuration where only the bypassed engine is enabled. +- Authentication bypass allowing unauthenticated access to PIF's management endpoints (if any). +- A method to force PIF to downgrade from "block" to "flag" or "log" action through request manipulation. +- SSRF allowing an attacker to use PIF as a proxy to reach internal services. + +**Impact:** Reduced detection effectiveness or unauthorised access to PIF functionality. + +### Medium (CVSS 4.0--6.9) + +Denial of service or information disclosure that does not directly enable prompt injection bypass. + +**Examples:** +- Crafted input that causes PIF to consume excessive CPU or memory (ReDoS in regex patterns, ML inference resource exhaustion). +- A request that causes PIF to crash or panic, temporarily disabling protection until the process restarts. +- Information disclosure of internal configuration, upstream API keys, or system information through error messages or headers. +- Timing side-channel that reveals whether a specific regex pattern matched (information useful for crafting bypasses). + +**Impact:** Service disruption or information leakage that aids further attacks. + +### Low (CVSS 0.1--3.9) + +Hardening recommendations and minor issues that do not have a direct security impact. + +**Examples:** +- Missing security headers on PIF's own error responses (e.g., missing `X-Content-Type-Options`). +- Verbose error messages that disclose version information but no sensitive data. +- Race conditions in logging that could result in incomplete audit trails (but do not affect detection). +- Configuration validation improvements that prevent operator misuse. + +**Impact:** Minimal direct security impact; improvements to defence in depth. + +--- + +## 4. Reporting Requirements + +When submitting a report on Huntr, researchers should include the following information to ensure efficient triage. + +### Required Information + +| Field | Description | +|-------|-------------| +| **Vulnerability type** | Category from Section 2.3 (e.g., "Regex engine bypass", "Request smuggling") | +| **Affected component** | Directory and file path (e.g., `pkg/detector/regex.go`) | +| **PIF version** | Version or commit hash tested against | +| **Configuration** | Relevant PIF configuration (detection mode, thresholds, enabled engines) | +| **Reproduction steps** | Step-by-step instructions to reproduce the vulnerability | +| **Proof of concept** | Working payload, script, or curl command that demonstrates the issue | +| **Expected behaviour** | What PIF should do (e.g., "block the request") | +| **Actual behaviour** | What PIF actually does (e.g., "forwards the request unmodified") | +| **Impact assessment** | Description of real-world impact if exploited | + +### Proof of Concept Requirements for Detection Bypasses + +Detection bypass reports must include: + +1. **The payload** -- the exact string or HTTP request body used. +2. **Why it is prompt injection** -- brief explanation of how the payload would manipulate an LLM if it reached the model. Trivially benign strings that happen to not match patterns are not valid bypasses. +3. **PIF configuration** -- the configuration file used, or confirmation that default configuration was used. +4. **Evidence of bypass** -- logs, HTTP response, or other evidence showing that PIF did not detect the payload. +5. **Upstream acceptance** -- evidence that the payload is accepted by the target LLM API (OpenAI or Anthropic) as valid input. Malformed requests rejected by the upstream API are not valid bypasses. + +--- + +## 5. Registration Steps + +### 5.1 Create Huntr Account + +1. Go to https://huntr.com/ +2. Sign up or log in with your GitHub account + +### 5.2 Register the Repository + +1. Navigate to the repository registration page +2. Enter the repository URL: `https://github.com/ogulcanaydogan/Prompt-Injection-Firewall` +3. Confirm you are the maintainer (Huntr may verify via GitHub permissions) + +### 5.3 Configure Scope + +Use the scope definition from Section 2 to configure the programme: + +**In-scope assets:** +- `cmd/` -- CLI and application entry point +- `pkg/detector/` -- Detection engines (regex, ML, ensemble) +- `pkg/proxy/` -- Reverse proxy layer +- `pkg/config/` -- Configuration parsing and validation +- `internal/` -- Internal shared utilities + +**Out-of-scope assets:** +- `examples/` -- Example configurations +- `docs/` -- Documentation +- `tests/` and `*_test.go` -- Test files +- `.github/` -- CI/CD workflows +- Third-party dependencies (report upstream) + +### 5.4 Set Severity Guidelines + +Configure the severity mapping from Section 3: +- Critical: Full detection bypass allowing complete injection +- High: Partial bypass or significant security degradation +- Medium: Denial of service, information disclosure +- Low: Hardening recommendations + +### 5.5 Configure Response Policy + +| Setting | Value | +|---------|-------| +| Initial response time | 72 hours | +| Triage time | 7 days | +| Resolution target (Critical) | 14 days | +| Resolution target (High) | 30 days | +| Resolution target (Medium) | 60 days | +| Resolution target (Low) | 90 days | +| Public disclosure | 90 days after fix, or coordinated with researcher | + +### 5.6 Add Maintainer Contacts + +- **Primary contact:** Ogulcan Aydogan (GitHub: @ogulcanaydogan) +- **Security email:** (configure a dedicated security contact email or use GitHub Security Advisories) + +--- + +## 6. Integration with GitHub Security Advisories + +In addition to Huntr, PIF should accept vulnerability reports through GitHub Security Advisories (GHSA). This provides a private channel for reports from researchers who do not use Huntr. + +### 6.1 Enable Security Advisories + +1. Go to the repository Settings > Security > Advisories +2. Ensure "Private vulnerability reporting" is enabled +3. This allows researchers to submit reports directly through GitHub + +### 6.2 SECURITY.md + +Ensure the repository contains a `SECURITY.md` file that directs researchers to both channels: + +```markdown +# Security Policy + +## Reporting a Vulnerability + +If you discover a security vulnerability in Prompt Injection Firewall, please report it +through one of the following channels: + +1. **Huntr:** https://huntr.com/repos/ogulcanaydogan/Prompt-Injection-Firewall +2. **GitHub Security Advisories:** Use the "Report a vulnerability" button on the + Security tab of this repository. + +Please do NOT open a public GitHub issue for security vulnerabilities. + +## Response Timeline + +- Initial acknowledgement: within 72 hours +- Triage and severity assessment: within 7 days +- Fix for Critical severity: within 14 days +- Fix for High severity: within 30 days + +## Scope + +See our Huntr programme for detailed scope and severity guidelines. +``` + +--- + +## 7. Triage Workflow + +When a report is submitted on Huntr, follow this workflow: + +``` +Report Received + | + v +[Acknowledge within 72 hours] + | + v +[Is it in scope?] --No--> Close as "Out of Scope" with explanation + | + Yes + | + v +[Can you reproduce it?] --No--> Request more information from researcher + | + Yes + | + v +[Assign severity per Section 3] + | + v +[Create private GitHub issue or security advisory] + | + v +[Develop fix on private branch] + | + v +[Request researcher to verify fix] + | + v +[Merge fix, release patched version] + | + v +[Huntr assigns CVE if applicable] + | + v +[Public disclosure after coordinated timeline] +``` + +--- + +## 8. Special Considerations for PIF + +### 8.1 Detection Bypass Validation + +Detection bypass reports require careful validation because the definition of "prompt injection" is context-dependent. When triaging bypass reports: + +1. **Verify the payload is actually injection.** The payload must contain content that would manipulate an LLM's behaviour (e.g., instruction override, role hijacking, data exfiltration attempt). A random string that does not match any pattern is not a bypass. + +2. **Test against default configuration.** First reproduce with default PIF configuration. If the bypass only works with a non-default configuration (e.g., ML engine disabled), note this in the assessment. + +3. **Evaluate real-world impact.** A bypass payload that would not actually affect a well-configured LLM application has lower severity than one that reliably manipulates model behaviour. + +4. **Consider ensemble behaviour.** A payload that bypasses the regex engine but is caught by the ML engine (or vice versa) is a valid finding (it indicates a gap in one engine) but has lower severity than a full ensemble bypass. + +### 8.2 ReDoS in Regex Patterns + +Regular expression denial of service (ReDoS) is a known risk for regex-heavy systems. PIF's 129 patterns should be evaluated for catastrophic backtracking. When triaging ReDoS reports: + +- Confirm the regex causes measurable CPU consumption (not just theoretical backtracking). +- Measure the actual latency impact (e.g., a pattern that takes 10 seconds vs. one that takes 200ms). +- Severity depends on whether the ReDoS can be triggered by normal user input or only by crafted adversarial input. + +### 8.3 CGo / ONNX Runtime Boundary + +The boundary between Go and the ONNX C runtime (via CGo) is a potential source of memory safety issues. Reports involving crashes, panics, or memory corruption in the ML inference path should be treated as High severity minimum, as they may be exploitable for code execution. + +--- + +## 9. Checklist Before Going Live + +Complete these items before making the Huntr programme public. + +- [ ] Verify SECURITY.md is present in the repository with correct contact information +- [ ] Enable GitHub Private Vulnerability Reporting +- [ ] Register repository on Huntr +- [ ] Configure in-scope and out-of-scope assets per Section 2 +- [ ] Set severity guidelines per Section 3 +- [ ] Configure response timeline per Section 5.5 +- [ ] Test the report submission flow (submit a test report yourself) +- [ ] Prepare a private branch or fork for developing security fixes +- [ ] Ensure you have notification channels configured (email, GitHub) for new reports +- [ ] Brief any co-maintainers on the triage workflow (Section 7) + +--- + +## 10. Benefits for Grant Applications + +Registering on Huntr strengthens PIF's position in grant applications (AISI Challenge Fund, NLnet NGI Zero) by demonstrating: + +1. **Proactive security posture.** The project actively invites security testing rather than relying solely on internal review. +2. **Mature vulnerability management.** A documented triage workflow and response timeline show operational maturity. +3. **Community engagement.** Bug bounty programmes attract security researchers who contribute expertise the maintainer may not have. +4. **Transparent security track record.** Resolved Huntr reports (with CVEs where applicable) provide evidence of responsiveness to security issues. + +--- + +*Document prepared as Huntr bug bounty registration guide for Prompt Injection Firewall. Last updated: March 2026.* diff --git a/docs/grants/nlnet-ngi-zero.md b/docs/grants/nlnet-ngi-zero.md new file mode 100644 index 0000000..eccd7ae --- /dev/null +++ b/docs/grants/nlnet-ngi-zero.md @@ -0,0 +1,237 @@ +# NLnet NGI Zero Grant Proposal + +**Grant Programme:** NGI Zero Core / NGI Zero Review +**Funding Body:** NLnet Foundation, funded by the European Commission (Next Generation Internet initiative) +**URL:** https://nlnet.nl/propose/ +**Funding Range:** EUR 5,000 -- 50,000 +**Application Deadline:** 1 April 2026 +**Applicant:** Ogulcan Aydogan +**Project:** Prompt Injection Firewall (PIF) +**Repository:** https://github.com/ogulcanaydogan/Prompt-Injection-Firewall +**License:** Apache 2.0 + +--- + +## 1. Abstract + +*(Target: 200 words. Use this text directly in the NLnet proposal form.)* + +Prompt Injection Firewall (PIF) is an open-source reverse-proxy middleware that detects and prevents prompt injection attacks against Large Language Model (LLM) applications. Prompt injection -- ranked #1 on the OWASP Top 10 for LLM Applications -- allows adversaries to override developer instructions, manipulate model outputs, and exfiltrate data through crafted inputs. Despite widespread recognition of this threat, no open-source, protocol-level defence standard exists. + +PIF operates transparently between clients and LLM APIs (OpenAI, Anthropic), applying a dual-engine detection system: 129 curated regex patterns for known attack signatures and a fine-tuned DistilBERT ONNX classifier for semantic analysis. An ensemble scorer combines both engines to drive configurable response actions (block, flag, or log). The system adds less than 100 milliseconds of latency and deploys as a single Go binary with no external dependencies beyond the ONNX model file. + +This proposal requests EUR 38,000 to fund adversarial robustness testing, detection engine expansion (indirect injection, multi-modal payloads), security audit, and documentation. PIF provides critical open infrastructure for the safe deployment of LLM applications in Europe and globally, directly supporting the NGI Zero mission of a trustworthy, open internet. + +--- + +## 2. Description of Work + +### 2.1 Problem and Motivation + +Large Language Models are increasingly embedded in internet-facing applications: customer support chatbots, content generation tools, code assistants, search interfaces, and autonomous agents. These applications accept natural language input from untrusted users, creating a new attack surface: prompt injection. + +Prompt injection attacks exploit the LLM's inability to distinguish between trusted system instructions and untrusted user input. An attacker can craft input that causes the model to: + +- Ignore system-level instructions ("Ignore all previous instructions and...") +- Exfiltrate confidential data through tool-calling or output channels +- Produce harmful, misleading, or manipulative content +- Execute unintended actions in agent-based systems + +The OWASP Foundation ranks prompt injection as the number-one risk for LLM applications. The EU AI Act identifies input manipulation as a concern for high-risk AI systems. Despite this, the open-source ecosystem lacks a standard, deployable defence layer. + +Commercial solutions exist (Lakera Guard, Protect AI) but are proprietary, opaque, and create vendor dependency. Organisations cannot audit the detection logic they rely on for safety-critical filtering. + +PIF addresses this gap as open infrastructure: a transparent, auditable, and freely deployable prompt injection defence layer licensed under Apache 2.0. + +### 2.2 Current State of the Project + +PIF is a functioning, released project with the following characteristics: + +- **Language:** Go (single static binary, cross-platform) +- **Releases:** 4 stable releases on GitHub +- **Detection:** 129 regex patterns + fine-tuned DistilBERT ONNX classifier +- **Ensemble scoring:** Configurable weighted combination of regex and ML scores +- **Response actions:** Block (HTTP 403), Flag (header annotation), Log (silent recording) +- **API support:** OpenAI Chat Completions API, Anthropic Messages API +- **Performance:** <50ms regex latency, <100ms ML latency +- **CI/CD:** 3 GitHub Actions workflows (ci.yml, codeql.yml, release.yml) +- **Security analysis:** CodeQL static analysis integrated in CI +- **Test coverage:** 80%+ with Go race detector enabled +- **License:** Apache 2.0 + +### 2.3 Proposed Work + +The grant will fund four work packages that harden PIF for broader adoption and extend its detection capabilities. + +**WP1: Adversarial Robustness Testing (Weeks 1--4)** + +Systematic evaluation of the detection engines against adversarial evasion techniques: + +- Develop a comprehensive adversarial test suite covering encoding-based evasion (Base64, Unicode, homoglyphs), payload fragmentation, multi-language injection, and delimiter manipulation. +- Benchmark both regex and ML engines individually and as an ensemble against the adversarial suite. +- Identify and document bypass vectors. +- Deliverable: Adversarial test suite (open-source), evasion report, baseline metrics. + +**WP2: Detection Engine Expansion (Weeks 3--8)** + +Extend detection capabilities to cover emerging attack vectors: + +- **Indirect prompt injection patterns:** Detection of injected instructions in retrieved documents (RAG pipelines), tool outputs, and multi-turn conversation histories. +- **Encoding and obfuscation patterns:** Additional regex patterns for Base64-encoded payloads, Unicode confusables, zero-width character insertion, and mixed-script attacks. +- **ML model retraining:** Adversarial training using payloads from WP1. Evaluation on held-out test sets to ensure benign accuracy is maintained. +- Deliverable: Updated regex pattern set, retrained ONNX model, evaluation report. + +**WP3: Security Audit (Weeks 6--10)** + +Independent security review of the codebase: + +- Audit scope: proxy request handling, regex engine, ML inference pipeline, configuration parsing, input validation, error handling. +- Focus on vulnerabilities that could allow detection bypass, denial of service, or information leakage. +- Deliverable: Audit report, remediated codebase, verification by auditor. + +**WP4: Documentation and Ecosystem Integration (Weeks 8--12)** + +Comprehensive documentation to lower the barrier to adoption: + +- Deployment guides for common patterns (Docker, Kubernetes sidecar, API gateway integration). +- Threat model documentation explaining what PIF defends against and its limitations. +- Operator guide for tuning detection thresholds and configuring response actions. +- Integration guide for additional LLM providers beyond OpenAI and Anthropic. +- Deliverable: Published documentation on GitHub, updated project README. + +--- + +## 3. Budget + +**Total Requested: EUR 38,000** + +| Work Package | Cost (EUR) | Justification | +|-------------|-----------|---------------| +| WP1: Adversarial Robustness Testing | 8,000 | Development of adversarial test suite, benchmarking, evasion analysis. Approximately 4 weeks of effort. | +| WP2: Detection Engine Expansion | 12,000 | Regex pattern development, ML dataset curation, model retraining, compute costs for training runs, evaluation. Approximately 6 weeks of effort. | +| WP3: Security Audit | 13,000 | Engagement of independent security reviewer. Includes audit execution, report, and remediation verification. | +| WP4: Documentation | 5,000 | Technical writing for deployment guides, threat model, operator documentation. Approximately 3 weeks of effort. | +| **Total** | **38,000** | | + +*Note: NLnet disburses funds upon milestone completion. Budget items above map directly to milestones in Section 4.* + +--- + +## 4. Milestones + +NLnet grants are structured around milestone-based disbursement. The following milestones correspond to the work packages above. + +| # | Milestone | Deliverable | Completion | Payment (EUR) | +|---|-----------|-------------|------------|---------------| +| M1 | Adversarial test suite complete | Open-source test suite on GitHub; evasion report published | Week 4 | 8,000 | +| M2 | Detection engine expansion | Updated regex patterns merged; retrained ONNX model released; evaluation report | Week 8 | 12,000 | +| M3 | Security audit complete | Audit report delivered; all critical/high findings remediated; verification confirmed | Week 10 | 13,000 | +| M4 | Documentation published | All deployment guides, threat model, and operator docs published on GitHub | Week 12 | 5,000 | + +--- + +## 5. Relevance to NGI Zero + +### 5.1 Open Internet Infrastructure + +PIF is open infrastructure for securing the next generation of internet applications. As LLMs become embedded in web services, search engines, customer-facing tools, and autonomous agents, the ability to defend against input manipulation becomes a foundational internet safety requirement. PIF provides this defence as a public good, freely available under Apache 2.0. + +### 5.2 User Autonomy and Trust + +Prompt injection attacks undermine user trust in LLM-powered services. When a chatbot can be manipulated to produce false information, override its safety guidelines, or exfiltrate user data, the end user bears the consequences. PIF protects end users by ensuring that LLM applications behave as their developers intended, preserving the trust relationship between users and services. + +### 5.3 No Vendor Lock-In + +Unlike proprietary alternatives, PIF does not create dependency on a commercial vendor for safety-critical filtering. Organisations can inspect, modify, and extend every detection rule and the ML model. This transparency is essential for trust in security tooling and aligns with NLnet's commitment to open, auditable technology. + +### 5.4 European AI Safety + +The EU AI Act identifies input manipulation as a concern for high-risk AI systems. PIF provides a concrete, deployable mitigation that organisations can use to demonstrate due diligence in defending against prompt injection, supporting compliance with European regulatory expectations. + +--- + +## 6. NLnet Automatic Audit Support + +NLnet provides access to automatic audit and review services for funded projects. PIF will take advantage of the following: + +- **Security audit support:** NLnet can facilitate connection with security auditors through its network, potentially reducing the cost and procurement overhead for WP3. +- **Accessibility review:** While PIF is a backend middleware (not a user-facing application), documentation and configuration interfaces will be reviewed for accessibility. +- **Licensing and compliance review:** Verification that all dependencies and the ONNX model comply with Apache 2.0 licensing and NLnet's open-source requirements. + +--- + +## 7. Technical Specifications + +| Attribute | Value | +|-----------|-------| +| Programming Language | Go 1.21+ | +| ML Runtime | ONNX Runtime (C library, CGo bindings) | +| ML Model | DistilBERT (fine-tuned, 66M parameters) | +| Binary Size | ~15 MB (excluding ONNX model) | +| ONNX Model Size | ~260 MB | +| Configuration | YAML file or environment variables | +| Deployment | Static binary, Docker image, Kubernetes sidecar | +| Supported APIs | OpenAI Chat Completions, Anthropic Messages | +| Detection Latency | <50ms (regex), <100ms (ML), <120ms (ensemble) | +| Dependencies | Go standard library, ONNX Runtime, YAML parser | + +--- + +## 8. Comparison with Existing Solutions + +| Feature | PIF | Lakera Guard | Protect AI | Rebuff | +|---------|-----|-------------|------------|--------| +| Open source | Yes (Apache 2.0) | No | Partial | Yes | +| Deployment model | Self-hosted proxy | Cloud API | Cloud/on-prem | Library | +| Regex detection | 129 patterns | Unknown (proprietary) | Unknown | No | +| ML detection | DistilBERT ONNX | Proprietary model | Proprietary | GPT-based | +| Ensemble scoring | Yes | Unknown | Unknown | No | +| Configurable actions | Block/Flag/Log | Block/Allow | Block/Allow | Block/Allow | +| Latency overhead | <100ms | ~200ms (network) | Variable | ~1s (LLM call) | +| Auditable rules | Yes | No | No | N/A | +| No vendor dependency | Yes | No | No | Partial | + +--- + +## 9. Applicant Information + +**Name:** Ogulcan Aydogan +**Role:** Independent developer and researcher +**Location:** United Kingdom +**GitHub:** https://github.com/ogulcanaydogan +**Hugging Face:** https://huggingface.co/ogulcanaydogan + +Relevant experience: +- Creator and maintainer of Prompt Injection Firewall (4 releases) +- Experience in LLM fine-tuning (SFT, DPO) for multilingual NLP +- Go systems programming, ONNX runtime integration +- CI/CD pipeline design with GitHub Actions, CodeQL, and static analysis + +--- + +## 10. Submission Checklist + +- [ ] Create account on NLnet proposal portal: https://nlnet.nl/propose/ +- [ ] Complete the online form with the following field mapping: + - "Abstract" --> Section 1 of this document (200 words) + - "Describe the project" --> Section 2 + - "Budget" --> Section 3 + - "Milestones" --> Section 4 + - "Relevance" --> Section 5 +- [ ] Provide repository URL: https://github.com/ogulcanaydogan/Prompt-Injection-Firewall +- [ ] Confirm Apache 2.0 license compatibility with NLnet requirements +- [ ] Submit before 1 April 2026 deadline + +--- + +## 11. Notes on NLnet Process + +- NLnet proposals are reviewed by an independent committee. Decisions typically take 2--3 months. +- Funding is disbursed in milestones. Each milestone must be completed and verified before the next payment. +- NLnet provides mentoring, audit support, and connections to the NGI ecosystem as part of the grant. +- All funded work must be released under an OSI-approved open-source license (Apache 2.0 qualifies). +- NLnet may request modifications to milestones or budget during the review process. + +--- + +*Document prepared for NLnet NGI Zero proposal submission. Last updated: March 2026.* diff --git a/docs/grants/openssf-badge-guide.md b/docs/grants/openssf-badge-guide.md new file mode 100644 index 0000000..bb82296 --- /dev/null +++ b/docs/grants/openssf-badge-guide.md @@ -0,0 +1,295 @@ +# OpenSSF Best Practices Badge Guide for Prompt Injection Firewall + +**Programme:** OpenSSF (Open Source Security Foundation) Best Practices Badge +**URL:** https://www.bestpractices.dev/ +**Cost:** Free +**Purpose:** Demonstrate that PIF follows open-source security best practices; strengthens grant applications and adoption by security-conscious organisations +**Repository:** https://github.com/ogulcanaydogan/Prompt-Injection-Firewall + +--- + +## 1. Overview + +The OpenSSF Best Practices Badge (formerly CII Best Practices) is a free certification programme that evaluates open-source projects against a set of security, quality, and documentation criteria. Projects that meet the criteria earn a badge that can be displayed in their README, demonstrating adherence to industry best practices. + +The badge has three levels: +- **Passing** -- baseline criteria (most important for grant applications) +- **Silver** -- additional criteria around change management and quality +- **Gold** -- highest level, requires reproducible builds and dynamic analysis + +This guide maps PIF's current capabilities to the **Passing** level criteria and identifies any gaps that need to be addressed before submission. + +--- + +## 2. Pre-Submission Checklist for PIF + +### 2.1 Basics + +| Criterion | Requirement | PIF Status | Action Needed | +|-----------|------------|------------|---------------| +| **Website** | Project has a website or README with basic info | README.md exists with project description, installation, usage | None | +| **Description** | Project has a clear description of what it does | README includes description of PIF as prompt injection defence middleware | None | +| **Interaction** | Project provides a mechanism for discussion | GitHub Issues enabled; Discussions can be enabled | Enable GitHub Discussions if not already active | +| **Contribution guide** | CONTRIBUTING.md or equivalent exists | Verify CONTRIBUTING.md exists | Create if missing | +| **License** | OSI-approved license, clearly stated | Apache 2.0; LICENSE file in repo root | None | +| **License in files** | License header or SPDX identifier in source files | Verify Go source files include SPDX headers | Add `// SPDX-License-Identifier: Apache-2.0` to source files if missing | + +### 2.2 Change Control + +| Criterion | Requirement | PIF Status | Action Needed | +|-----------|------------|------------|---------------| +| **Version control** | Project uses version control (Git) | GitHub repository | None | +| **Unique version numbering** | Each release has a unique version | Semantic versioning with 4 releases | None | +| **Release notes** | Each release has human-readable release notes | GitHub Releases with changelogs | None | +| **Version in a standard place** | Version number accessible programmatically | Verify version is in `go.mod` or a `version.go` constant | Ensure version is defined in code | + +### 2.3 Reporting + +| Criterion | Requirement | PIF Status | Action Needed | +|-----------|------------|------------|---------------| +| **Bug reporting process** | Project has a documented process for reporting bugs | GitHub Issues | Document in CONTRIBUTING.md | +| **Security vulnerability reporting** | Project has a documented process for reporting security vulnerabilities | Verify SECURITY.md exists | Create SECURITY.md with responsible disclosure policy if missing | +| **Response to vulnerability reports** | Project responds to reports in a timely manner | Policy should state response within 72 hours | Document in SECURITY.md | + +### 2.4 Quality + +| Criterion | Requirement | PIF Status | Action Needed | +|-----------|------------|------------|---------------| +| **Working build system** | Project can be built from source | `go build` produces binary | None | +| **Automated test suite** | Project has an automated test suite | Go tests with `go test ./...` | None | +| **New functionality tested** | Tests cover new features | CI runs tests on every PR | None | +| **Test coverage** | Coverage is measured and reported | **80%+ coverage** | Add coverage reporting to CI if not already present (e.g., `go test -coverprofile`) | +| **Tests pass** | All tests pass in CI | CI workflow (`ci.yml`) runs on push/PR | None | + +### 2.5 Security + +| Criterion | Requirement | PIF Status | Action Needed | +|-----------|------------|------------|---------------| +| **Static analysis** | Project uses at least one static analysis tool | **CodeQL** integrated in CI (`codeql.yml`); **golangci-lint** | None | +| **No known critical vulnerabilities** | No unpatched critical vulnerabilities | Verify with `govulncheck` | Run `govulncheck ./...` and address any findings | +| **Secure development knowledge** | Lead developer understands secure development practices | Documented in this guide and SECURITY.md | None | +| **Memory-safe language** | Project uses a memory-safe language or addresses memory safety | **Go is memory-safe** (garbage collected, bounds-checked) | None | + +### 2.6 Analysis + +| Criterion | Requirement | PIF Status | Action Needed | +|-----------|------------|------------|---------------| +| **Dynamic analysis** | Project uses dynamic analysis (e.g., fuzzing, race detection) | **Go race detector** enabled in CI (`go test -race`) | None | +| **Compiler warnings** | Project builds without warnings | Go compiler is strict; verify clean build | Run `go vet ./...` in CI | + +--- + +## 3. CI Workflows Relevant to Badge Criteria + +PIF has three CI workflows that satisfy multiple badge criteria simultaneously. + +### 3.1 ci.yml -- Continuous Integration + +**File:** `.github/workflows/ci.yml` + +This workflow satisfies the following badge criteria: +- Automated test suite execution +- Tests pass on every push and PR +- Race condition detection (`go test -race`) +- Build verification (`go build`) + +**Recommended additions for badge compliance:** +```yaml +# Add coverage reporting step +- name: Run tests with coverage + run: go test -race -coverprofile=coverage.out -covermode=atomic ./... + +- name: Report coverage + run: go tool cover -func=coverage.out +``` + +### 3.2 codeql.yml -- Static Analysis + +**File:** `.github/workflows/codeql.yml` + +This workflow satisfies the following badge criteria: +- Static analysis tool usage (CodeQL) +- Automated security scanning +- Known vulnerability detection + +CodeQL performs semantic code analysis that detects: +- Injection vulnerabilities +- Authentication issues +- Cryptographic weaknesses +- Data flow problems +- Go-specific security issues + +### 3.3 release.yml -- Release Management + +**File:** `.github/workflows/release.yml` + +This workflow satisfies the following badge criteria: +- Unique version numbering (triggered by Git tags) +- Reproducible releases (automated build and release process) +- Release artifact availability + +--- + +## 4. Security Tooling Summary + +| Tool | Purpose | Integration Point | Badge Criterion | +|------|---------|-------------------|-----------------| +| **CodeQL** | Semantic static analysis for security vulnerabilities | `codeql.yml` workflow, runs on push/PR | Static analysis | +| **golangci-lint** | Go linter aggregator (includes gosec, govet, staticcheck, errcheck) | `ci.yml` workflow or pre-commit | Static analysis, compiler warnings | +| **Go race detector** | Dynamic analysis for data race conditions | `go test -race` in `ci.yml` | Dynamic analysis | +| **Go vet** | Static analysis for suspicious constructs | `go vet ./...` in `ci.yml` | Compiler warnings | +| **govulncheck** | Known vulnerability scanning for Go dependencies | Manual or CI step | No known critical vulnerabilities | + +--- + +## 5. Testing Infrastructure + +### 5.1 Test Execution + +```bash +# Run all tests with race detector and coverage +go test -race -coverprofile=coverage.out -covermode=atomic ./... + +# View coverage summary +go tool cover -func=coverage.out + +# View coverage in browser +go tool cover -html=coverage.out +``` + +### 5.2 Coverage Targets + +| Package | Minimum Coverage | Notes | +|---------|-----------------|-------| +| `pkg/detector/` | 85% | Core detection logic; highest priority | +| `pkg/proxy/` | 80% | Proxy request handling and forwarding | +| `pkg/config/` | 80% | Configuration parsing and validation | +| `cmd/` | 70% | CLI entry point; lower priority | +| `internal/` | 75% | Internal utilities | +| **Overall** | **80%+** | Current status meets this threshold | + +### 5.3 Test Categories + +| Category | Description | Location | +|----------|-------------|----------| +| Unit tests | Individual function/method tests | `*_test.go` files adjacent to source | +| Integration tests | End-to-end proxy + detection tests | `tests/` or `*_integration_test.go` | +| Regex pattern tests | Verification of all 129 patterns against known payloads | `pkg/detector/regex_test.go` | +| ML model tests | Verification of ONNX inference pipeline | `pkg/detector/ml_test.go` | +| Race condition tests | Concurrent access tests with `-race` flag | All tests run with race detector | + +--- + +## 6. Gap Analysis and Action Items + +The following items should be completed before submitting for the OpenSSF badge. + +### Priority 1: Required for Passing + +| # | Action | Effort | Status | +|---|--------|--------|--------| +| 1 | Verify SECURITY.md exists with responsible disclosure policy | 30 min | Check repo | +| 2 | Verify CONTRIBUTING.md exists with bug reporting process | 30 min | Check repo | +| 3 | Add SPDX license identifiers to source files (if missing) | 1 hour | Check source files | +| 4 | Run `govulncheck ./...` and address any findings | 30 min | Run locally | +| 5 | Ensure `go vet ./...` passes cleanly | 15 min | Run locally | +| 6 | Add coverage reporting to CI if not present | 30 min | Check `ci.yml` | + +### Priority 2: Recommended for Stronger Application + +| # | Action | Effort | Status | +|---|--------|--------|--------| +| 7 | Enable GitHub Discussions for community interaction | 5 min | GitHub Settings | +| 8 | Add badge image to README once earned | 5 min | After submission | +| 9 | Document build-from-source instructions in README | 15 min | Check README | +| 10 | Add `govulncheck` to CI pipeline | 15 min | Add workflow step | + +--- + +## 7. Submission Process + +### 7.1 Create Account + +1. Go to https://www.bestpractices.dev/ +2. Sign in with your GitHub account (ogulcanaydogan) + +### 7.2 Register Project + +1. Click "Get Your Badge Now!" +2. Enter the repository URL: `https://github.com/ogulcanaydogan/Prompt-Injection-Firewall` +3. The system will auto-detect some criteria from the repository + +### 7.3 Complete the Questionnaire + +The questionnaire has approximately 66 criteria for the Passing level. For each criterion: +- Select "Met" if PIF satisfies it (provide a brief justification) +- Select "Unmet" if it does not (and note what action is needed) +- Select "N/A" if the criterion does not apply + +Use this guide's checklist (Section 2) and gap analysis (Section 6) to pre-populate your answers. + +### 7.4 Key Answers Reference + +Below are suggested responses for criteria where PIF's answer may not be immediately obvious. + +**"What is the human language(s) of the project?"** +> English + +**"Does the project use a memory-unsafe language?"** +> No. Go is memory-safe (garbage collected, bounds-checked arrays, no pointer arithmetic). + +**"Does the project use at least one static analysis tool?"** +> Yes. CodeQL (integrated in CI via codeql.yml) and golangci-lint. + +**"Does the project use at least one dynamic analysis tool?"** +> Yes. Go race detector (`go test -race`) is enabled in CI. All tests run with race detection. + +**"Is there a documented process for reporting security vulnerabilities?"** +> Yes. See SECURITY.md in the repository root. Reports can be sent via GitHub Security Advisories or direct email. + +**"What license(s) does the project use?"** +> Apache-2.0 (SPDX identifier). OSI-approved. LICENSE file in repository root. + +--- + +## 8. Badge Display + +Once the Passing badge is earned, add it to the project README: + +```markdown +[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/XXXXX/badge)](https://www.bestpractices.dev/projects/XXXXX) +``` + +Replace `XXXXX` with the project ID assigned during registration. + +The badge serves as a trust signal for: +- Grant applications (AISI Challenge Fund, NLnet NGI Zero, and others) +- Enterprise adoption decisions +- Open-source security assessments +- Bug bounty programme credibility (Huntr) + +--- + +## 9. Path to Silver and Gold + +After achieving Passing, consider pursuing Silver and Gold levels to further strengthen the project's security posture. + +### Silver Additional Requirements + +| Criterion | Requirement | PIF Path | +|-----------|------------|----------| +| Bus factor >= 2 | At least 2 significant contributors | Recruit contributors or co-maintainer | +| Signed releases | Releases are cryptographically signed | Add GPG signing to release workflow | +| Code review | All changes reviewed before merge | Enable branch protection requiring reviews | + +### Gold Additional Requirements + +| Criterion | Requirement | PIF Path | +|-----------|------------|----------| +| Reproducible build | Build produces identical output from same source | Go builds are deterministic with pinned dependencies | +| Dynamic analysis on all code | Fuzzing or equivalent covers all code paths | Add Go fuzzing targets for detection engines | +| Formal security audit | Independent security review completed | Covered by AISI/NLnet grant funding | + +--- + +*Document prepared as OpenSSF Best Practices Badge submission guide for Prompt Injection Firewall. Last updated: March 2026.* From e8110dcc2cea5f3580c8221ec2ca1fa629608022 Mon Sep 17 00:00:00 2001 From: Ogulcan Aydogan Date: Fri, 13 Mar 2026 21:47:17 +0000 Subject: [PATCH 2/3] docs: humanize grant application materials - Remove AI writing tells (double dashes, banned words) - Replace vague claims with specific numbers - Add contractions in narrative sections - Vary sentence structure and length --- docs/grants/aisi-challenge-fund.md | 28 ++++++++++---------- docs/grants/huntr-registration.md | 42 +++++++++++++++--------------- docs/grants/nlnet-ngi-zero.md | 20 +++++++------- docs/grants/openssf-badge-guide.md | 16 ++++++------ 4 files changed, 53 insertions(+), 53 deletions(-) diff --git a/docs/grants/aisi-challenge-fund.md b/docs/grants/aisi-challenge-fund.md index c5fd61f..8afd743 100644 --- a/docs/grants/aisi-challenge-fund.md +++ b/docs/grants/aisi-challenge-fund.md @@ -3,7 +3,7 @@ **Grant Programme:** AISI Challenge Fund **Funding Body:** UK AI Safety Institute (AISI), Department for Science, Innovation and Technology **URL:** https://find-government-grants.service.gov.uk/grants/aisi-challenge-fund-1 -**Funding Range:** GBP 50,000 -- 200,000 +**Funding Range:** GBP 50,000 to 200,000 **Application Deadline:** 31 March 2026 **Applicant:** Ogulcan Aydogan **Repository:** https://github.com/ogulcanaydogan/Prompt-Injection-Firewall @@ -23,13 +23,13 @@ Prompt Injection Firewall (PIF) is an open-source, transparent reverse-proxy sec PIF operates as a drop-in proxy layer between client traffic and LLM provider APIs (OpenAI, Anthropic). It employs a dual-engine detection architecture combining 129 curated regex patterns with a fine-tuned DistilBERT ONNX classifier, producing ensemble scores that drive configurable response actions (block, flag, or log). The system achieves sub-50ms regex latency and sub-100ms ML latency, making it viable for production deployments where added overhead must be minimal. -This application requests GBP 65,000 to fund a professional security audit of the detection engine, adversarial red-teaming of the ML classifier, model hardening against evasion techniques, and comprehensive documentation to accelerate adoption by organisations deploying LLM-powered services in the UK and internationally. +This application requests GBP 65,000 to fund a professional security audit of the detection engine, adversarial red-teaming of the ML classifier, model hardening against evasion techniques, and deployment guides with operator runbooks to accelerate adoption by organisations deploying LLM-powered services in the UK and internationally. --- ## 3. Problem Statement -### 3.1 The Threat Landscape +### 3.1 The Threat Environment Prompt injection is classified as the **#1 risk** in the OWASP Top 10 for Large Language Model Applications (2023, reaffirmed 2025). Unlike traditional injection attacks (SQL injection, XSS), prompt injection exploits the fundamental architecture of LLMs: the inability to reliably distinguish between trusted instructions and untrusted user input within the same context window. @@ -41,11 +41,11 @@ Attack vectors include: ### 3.2 The Defence Gap -Despite prompt injection being widely recognised as a critical risk, the ecosystem lacks: +Despite prompt injection being widely recognised as a critical risk, the open-source community lacks: -1. **No open standard defence middleware.** Most mitigation advice consists of prompt engineering best practices ("defence in depth" system prompts), which are necessary but insufficient. There is no widely adopted, protocol-level defence layer analogous to a Web Application Firewall (WAF) for LLM traffic. +1. **No open standard defence middleware.** Most mitigation advice consists of prompt engineering best practices ("defence in depth" system prompts), which are necessary but insufficient. There's no widely adopted, protocol-level defence layer analogous to a Web Application Firewall (WAF) for LLM traffic. -2. **Vendor lock-in for detection.** Commercial solutions exist (Lakera Guard, Protect AI, Robust Intelligence) but are proprietary, opaque in their detection logic, and introduce vendor dependency. Organisations cannot audit the detection rules they rely on. +2. **Vendor lock-in for detection.** Commercial solutions exist (Lakera Guard, Protect AI, Robust Intelligence) but are proprietary, opaque in their detection logic, and introduce vendor dependency. Organisations can't audit the detection rules they rely on. 3. **Latency-sensitivity mismatch.** Academic detectors (perplexity-based methods, large classifier models) often add hundreds of milliseconds or require GPU inference, making them impractical as inline middleware for production API traffic. @@ -113,7 +113,7 @@ Client --> [PIF Reverse Proxy] --> LLM Provider API (OpenAI / Anthropic) ### 4.4 API Format Support -PIF parses and inspects message payloads for both OpenAI Chat Completions API format and Anthropic Messages API format. Format detection is automatic based on request structure and target endpoint. This covers the two most widely deployed commercial LLM APIs. +PIF parses and inspects message payloads for both OpenAI Chat Completions API format and Anthropic Messages API format. Format detection is automatic based on request structure and target endpoint. This covers the two most widely used commercial LLM APIs. ### 4.5 Deployment Model @@ -142,11 +142,11 @@ PIF directly contributes to the safe deployment of LLM applications by: The UK AI Safety Institute's mandate includes developing tools and techniques for evaluating and mitigating risks from AI systems. PIF aligns with this mission in the following ways: -- **Practical, deployable safety tooling.** PIF is not a research prototype; it is production-grade middleware with 4 releases, CI/CD pipelines, and documented integration guides. AISI funding would harden it for broader adoption. +- **Practical, deployable safety tooling.** PIF isn't a research prototype; it's production-grade middleware with 4 releases, CI/CD pipelines, and documented integration guides. AISI funding would harden it for broader adoption. -- **Open infrastructure for the ecosystem.** An open-source prompt injection defence layer benefits the entire UK AI ecosystem, from startups building LLM applications to enterprises deploying AI in regulated sectors (financial services, healthcare, government). +- **Open infrastructure for the UK AI community.** An open-source prompt injection defence layer benefits everyone, from startups building LLM applications to enterprises deploying AI in regulated sectors (financial services, healthcare, government). -- **Defence against a well-characterised risk.** Prompt injection is not a speculative risk; it is actively exploited. OWASP, NIST, and the EU AI Act all identify input manipulation as a priority concern. PIF provides a concrete mitigation. +- **Defence against a well-characterised risk.** Prompt injection isn't a speculative risk; it's actively exploited. OWASP, NIST, and the EU AI Act all identify input manipulation as a priority concern. PIF provides a concrete mitigation. - **Complementary to model-level safety.** PIF operates at the infrastructure layer, complementing model-level alignment techniques (RLHF, constitutional AI). Defence in depth requires both model-level and infrastructure-level protections. @@ -179,7 +179,7 @@ The UK AI Safety Institute's mandate includes developing tools and techniques fo | Security Audit of Detection Engine | 30,000 | Independent third-party security audit of the regex engine, ML inference pipeline, proxy request handling, and configuration parsing. Includes audit report and remediation verification. Vendor: to be selected from CREST-accredited firms or equivalent. | | Adversarial Testing Red Team | 15,000 | Engagement of a specialised red team to develop novel prompt injection payloads targeting PIF's detection engines. Goal: identify bypass vectors, measure false-negative rates under adversarial conditions, and produce a categorised evasion report. | | ML Model Hardening | 12,000 | Adversarial training of the DistilBERT classifier using payloads identified during red-teaming. Includes dataset curation, retraining, evaluation on held-out adversarial test sets, and ONNX model re-export. Covers compute costs for training runs. | -| Documentation and Adoption Materials | 8,000 | Comprehensive deployment guides for common infrastructure patterns (Kubernetes sidecar, API gateway, Docker Compose). Threat model documentation. Integration guides for additional LLM providers. Operator runbooks for tuning detection thresholds. | +| Documentation and Adoption Materials | 8,000 | Deployment guides for common infrastructure patterns (Kubernetes sidecar, API gateway, Docker Compose). Threat model documentation. Integration guides for additional LLM providers. Operator runbooks for tuning detection thresholds. | | **Total** | **65,000** | | --- @@ -259,10 +259,10 @@ Before submission, ensure the following materials are prepared: When adapting this document for form fields with character limits, prioritise the following points: -1. **Prompt injection is the #1 LLM security risk** (OWASP Top 10 for LLM Applications). It is not theoretical; it is actively exploited. -2. **No open standard defence exists.** PIF fills a critical gap in the AI safety toolchain. +1. **Prompt injection is the #1 LLM security risk** (OWASP Top 10 for LLM Applications). It isn't theoretical; it's actively exploited. +2. **No open standard defence layer exists.** PIF fills a critical gap in the AI safety toolchain. 3. **PIF is production-ready, not a research prototype.** Four releases, CI/CD, 80%+ test coverage, sub-100ms latency. -4. **The grant funds hardening, not creation.** The core system exists and works. Funding enables independent security validation and adversarial robustness testing. +4. **The grant funds hardening, not creation.** The core system exists and works. Funding enables independent security validation and adversarial evasion testing. 5. **Open source maximises impact.** Apache 2.0 licensing ensures any UK organisation can adopt PIF without vendor lock-in or licensing barriers. 6. **Direct alignment with AISI mandate.** PIF is practical, deployable AI safety infrastructure that protects end users from LLM manipulation. diff --git a/docs/grants/huntr-registration.md b/docs/grants/huntr-registration.md index 2667742..fed01f1 100644 --- a/docs/grants/huntr-registration.md +++ b/docs/grants/huntr-registration.md @@ -17,7 +17,7 @@ Huntr is a bug bounty platform focused on open-source software. By registering P - Public acknowledgement of the project's commitment to security - CVE assignment for confirmed vulnerabilities -This is particularly valuable for PIF because the project is security middleware: vulnerabilities in PIF directly translate to vulnerabilities in every application that depends on it for protection. +This is particularly valuable for PIF because it's security middleware: vulnerabilities in PIF directly translate to vulnerabilities in every application that depends on it for protection. --- @@ -119,7 +119,7 @@ A partial detection bypass or a vulnerability that significantly degrades PIF's ### Medium (CVSS 4.0--6.9) -Denial of service or information disclosure that does not directly enable prompt injection bypass. +Denial of service or information disclosure that doesn't directly enable prompt injection bypass. **Examples:** - Crafted input that causes PIF to consume excessive CPU or memory (ReDoS in regex patterns, ML inference resource exhaustion). @@ -131,12 +131,12 @@ Denial of service or information disclosure that does not directly enable prompt ### Low (CVSS 0.1--3.9) -Hardening recommendations and minor issues that do not have a direct security impact. +Hardening recommendations and minor issues that don't have a direct security impact. **Examples:** - Missing security headers on PIF's own error responses (e.g., missing `X-Content-Type-Options`). - Verbose error messages that disclose version information but no sensitive data. -- Race conditions in logging that could result in incomplete audit trails (but do not affect detection). +- Race conditions in logging that could result in incomplete audit trails (but don't affect detection). - Configuration validation improvements that prevent operator misuse. **Impact:** Minimal direct security impact; improvements to defence in depth. @@ -165,11 +165,11 @@ When submitting a report on Huntr, researchers should include the following info Detection bypass reports must include: -1. **The payload** -- the exact string or HTTP request body used. -2. **Why it is prompt injection** -- brief explanation of how the payload would manipulate an LLM if it reached the model. Trivially benign strings that happen to not match patterns are not valid bypasses. -3. **PIF configuration** -- the configuration file used, or confirmation that default configuration was used. -4. **Evidence of bypass** -- logs, HTTP response, or other evidence showing that PIF did not detect the payload. -5. **Upstream acceptance** -- evidence that the payload is accepted by the target LLM API (OpenAI or Anthropic) as valid input. Malformed requests rejected by the upstream API are not valid bypasses. +1. **The payload:** the exact string or HTTP request body used. +2. **Why it's prompt injection:** brief explanation of how the payload would manipulate an LLM if it reached the model. Trivially benign strings that happen to not match patterns aren't valid bypasses. +3. **PIF configuration:** the configuration file used, or confirmation that default configuration was used. +4. **Evidence of bypass:** logs, HTTP response, or other evidence showing that PIF didn't detect the payload. +5. **Upstream acceptance:** evidence that the payload is accepted by the target LLM API (OpenAI or Anthropic) as valid input. Malformed requests rejected by the upstream API aren't valid bypasses. --- @@ -191,17 +191,17 @@ Detection bypass reports must include: Use the scope definition from Section 2 to configure the programme: **In-scope assets:** -- `cmd/` -- CLI and application entry point -- `pkg/detector/` -- Detection engines (regex, ML, ensemble) -- `pkg/proxy/` -- Reverse proxy layer -- `pkg/config/` -- Configuration parsing and validation -- `internal/` -- Internal shared utilities +- `cmd/`: CLI and application entry point +- `pkg/detector/`: Detection engines (regex, ML, ensemble) +- `pkg/proxy/`: Reverse proxy layer +- `pkg/config/`: Configuration parsing and validation +- `internal/`: Internal shared utilities **Out-of-scope assets:** -- `examples/` -- Example configurations -- `docs/` -- Documentation -- `tests/` and `*_test.go` -- Test files -- `.github/` -- CI/CD workflows +- `examples/`: Example configurations +- `docs/`: Documentation +- `tests/` and `*_test.go`: Test files +- `.github/`: CI/CD workflows - Third-party dependencies (report upstream) ### 5.4 Set Severity Guidelines @@ -233,7 +233,7 @@ Configure the severity mapping from Section 3: ## 6. Integration with GitHub Security Advisories -In addition to Huntr, PIF should accept vulnerability reports through GitHub Security Advisories (GHSA). This provides a private channel for reports from researchers who do not use Huntr. +In addition to Huntr, PIF should accept vulnerability reports through GitHub Security Advisories (GHSA). This provides a private channel for reports from researchers who don't use Huntr. ### 6.1 Enable Security Advisories @@ -257,7 +257,7 @@ through one of the following channels: 2. **GitHub Security Advisories:** Use the "Report a vulnerability" button on the Security tab of this repository. -Please do NOT open a public GitHub issue for security vulnerabilities. +Please don't open a public GitHub issue for security vulnerabilities. ## Response Timeline @@ -323,7 +323,7 @@ Report Received Detection bypass reports require careful validation because the definition of "prompt injection" is context-dependent. When triaging bypass reports: -1. **Verify the payload is actually injection.** The payload must contain content that would manipulate an LLM's behaviour (e.g., instruction override, role hijacking, data exfiltration attempt). A random string that does not match any pattern is not a bypass. +1. **Verify the payload is actually injection.** The payload must contain content that would manipulate an LLM's behaviour (e.g., instruction override, role hijacking, data exfiltration attempt). A random string that doesn't match any pattern isn't a bypass. 2. **Test against default configuration.** First reproduce with default PIF configuration. If the bypass only works with a non-default configuration (e.g., ML engine disabled), note this in the assessment. diff --git a/docs/grants/nlnet-ngi-zero.md b/docs/grants/nlnet-ngi-zero.md index eccd7ae..8e11a5b 100644 --- a/docs/grants/nlnet-ngi-zero.md +++ b/docs/grants/nlnet-ngi-zero.md @@ -3,7 +3,7 @@ **Grant Programme:** NGI Zero Core / NGI Zero Review **Funding Body:** NLnet Foundation, funded by the European Commission (Next Generation Internet initiative) **URL:** https://nlnet.nl/propose/ -**Funding Range:** EUR 5,000 -- 50,000 +**Funding Range:** EUR 5,000 to 50,000 **Application Deadline:** 1 April 2026 **Applicant:** Ogulcan Aydogan **Project:** Prompt Injection Firewall (PIF) @@ -16,11 +16,11 @@ *(Target: 200 words. Use this text directly in the NLnet proposal form.)* -Prompt Injection Firewall (PIF) is an open-source reverse-proxy middleware that detects and prevents prompt injection attacks against Large Language Model (LLM) applications. Prompt injection -- ranked #1 on the OWASP Top 10 for LLM Applications -- allows adversaries to override developer instructions, manipulate model outputs, and exfiltrate data through crafted inputs. Despite widespread recognition of this threat, no open-source, protocol-level defence standard exists. +Prompt Injection Firewall (PIF) is an open-source reverse-proxy middleware that detects and prevents prompt injection attacks against Large Language Model (LLM) applications. Prompt injection, ranked #1 on the OWASP Top 10 for LLM Applications, allows adversaries to override developer instructions, manipulate model outputs, and exfiltrate data through crafted inputs. Despite widespread recognition of this threat, no open-source, protocol-level defence standard exists. PIF operates transparently between clients and LLM APIs (OpenAI, Anthropic), applying a dual-engine detection system: 129 curated regex patterns for known attack signatures and a fine-tuned DistilBERT ONNX classifier for semantic analysis. An ensemble scorer combines both engines to drive configurable response actions (block, flag, or log). The system adds less than 100 milliseconds of latency and deploys as a single Go binary with no external dependencies beyond the ONNX model file. -This proposal requests EUR 38,000 to fund adversarial robustness testing, detection engine expansion (indirect injection, multi-modal payloads), security audit, and documentation. PIF provides critical open infrastructure for the safe deployment of LLM applications in Europe and globally, directly supporting the NGI Zero mission of a trustworthy, open internet. +This proposal requests EUR 38,000 to fund adversarial evasion testing, detection engine expansion (indirect injection, multi-modal payloads), security audit, and documentation. PIF provides critical open infrastructure for the safe deployment of LLM applications in Europe and globally, directly supporting the NGI Zero mission of a trustworthy, open internet. --- @@ -37,9 +37,9 @@ Prompt injection attacks exploit the LLM's inability to distinguish between trus - Produce harmful, misleading, or manipulative content - Execute unintended actions in agent-based systems -The OWASP Foundation ranks prompt injection as the number-one risk for LLM applications. The EU AI Act identifies input manipulation as a concern for high-risk AI systems. Despite this, the open-source ecosystem lacks a standard, deployable defence layer. +The OWASP Foundation ranks prompt injection as the number-one risk for LLM applications. The EU AI Act identifies input manipulation as a concern for high-risk AI systems. Despite this, the open-source community lacks a standard, deployable defence layer. -Commercial solutions exist (Lakera Guard, Protect AI) but are proprietary, opaque, and create vendor dependency. Organisations cannot audit the detection logic they rely on for safety-critical filtering. +Commercial solutions exist (Lakera Guard, Protect AI) but are proprietary, opaque, and create vendor dependency. Organisations can't audit the detection logic they rely on for safety-critical filtering. PIF addresses this gap as open infrastructure: a transparent, auditable, and freely deployable prompt injection defence layer licensed under Apache 2.0. @@ -67,7 +67,7 @@ The grant will fund four work packages that harden PIF for broader adoption and Systematic evaluation of the detection engines against adversarial evasion techniques: -- Develop a comprehensive adversarial test suite covering encoding-based evasion (Base64, Unicode, homoglyphs), payload fragmentation, multi-language injection, and delimiter manipulation. +- Develop an adversarial test suite covering encoding-based evasion (Base64, Unicode, homoglyphs), payload fragmentation, multi-language injection, and delimiter manipulation. - Benchmark both regex and ML engines individually and as an ensemble against the adversarial suite. - Identify and document bypass vectors. - Deliverable: Adversarial test suite (open-source), evasion report, baseline metrics. @@ -89,9 +89,9 @@ Independent security review of the codebase: - Focus on vulnerabilities that could allow detection bypass, denial of service, or information leakage. - Deliverable: Audit report, remediated codebase, verification by auditor. -**WP4: Documentation and Ecosystem Integration (Weeks 8--12)** +**WP4: Documentation and Integration (Weeks 8--12)** -Comprehensive documentation to lower the barrier to adoption: +Full documentation to lower the barrier to adoption: - Deployment guides for common patterns (Docker, Kubernetes sidecar, API gateway integration). - Threat model documentation explaining what PIF defends against and its limitations. @@ -142,7 +142,7 @@ Prompt injection attacks undermine user trust in LLM-powered services. When a ch ### 5.3 No Vendor Lock-In -Unlike proprietary alternatives, PIF does not create dependency on a commercial vendor for safety-critical filtering. Organisations can inspect, modify, and extend every detection rule and the ML model. This transparency is essential for trust in security tooling and aligns with NLnet's commitment to open, auditable technology. +Unlike proprietary alternatives, PIF doesn't create dependency on a commercial vendor for safety-critical filtering. Organisations can inspect, modify, and extend every detection rule and the ML model. This transparency is essential for trust in security tooling and aligns with NLnet's commitment to open, auditable technology. ### 5.4 European AI Safety @@ -155,7 +155,7 @@ The EU AI Act identifies input manipulation as a concern for high-risk AI system NLnet provides access to automatic audit and review services for funded projects. PIF will take advantage of the following: - **Security audit support:** NLnet can facilitate connection with security auditors through its network, potentially reducing the cost and procurement overhead for WP3. -- **Accessibility review:** While PIF is a backend middleware (not a user-facing application), documentation and configuration interfaces will be reviewed for accessibility. +- **Accessibility review:** Since PIF is a backend middleware (not a user-facing application), documentation and configuration interfaces will be reviewed for accessibility instead. - **Licensing and compliance review:** Verification that all dependencies and the ONNX model comply with Apache 2.0 licensing and NLnet's open-source requirements. --- diff --git a/docs/grants/openssf-badge-guide.md b/docs/grants/openssf-badge-guide.md index bb82296..a372e19 100644 --- a/docs/grants/openssf-badge-guide.md +++ b/docs/grants/openssf-badge-guide.md @@ -13,9 +13,9 @@ The OpenSSF Best Practices Badge (formerly CII Best Practices) is a free certification programme that evaluates open-source projects against a set of security, quality, and documentation criteria. Projects that meet the criteria earn a badge that can be displayed in their README, demonstrating adherence to industry best practices. The badge has three levels: -- **Passing** -- baseline criteria (most important for grant applications) -- **Silver** -- additional criteria around change management and quality -- **Gold** -- highest level, requires reproducible builds and dynamic analysis +- **Passing:** baseline criteria (most important for grant applications) +- **Silver:** additional criteria around change management and quality +- **Gold:** highest level, requires reproducible builds and dynamic analysis This guide maps PIF's current capabilities to the **Passing** level criteria and identifies any gaps that need to be addressed before submission. @@ -83,7 +83,7 @@ This guide maps PIF's current capabilities to the **Passing** level criteria and PIF has three CI workflows that satisfy multiple badge criteria simultaneously. -### 3.1 ci.yml -- Continuous Integration +### 3.1 ci.yml: Continuous Integration **File:** `.github/workflows/ci.yml` @@ -103,7 +103,7 @@ This workflow satisfies the following badge criteria: run: go tool cover -func=coverage.out ``` -### 3.2 codeql.yml -- Static Analysis +### 3.2 codeql.yml: Static Analysis **File:** `.github/workflows/codeql.yml` @@ -119,7 +119,7 @@ CodeQL performs semantic code analysis that detects: - Data flow problems - Go-specific security issues -### 3.3 release.yml -- Release Management +### 3.3 release.yml: Release Management **File:** `.github/workflows/release.yml` @@ -223,8 +223,8 @@ The following items should be completed before submitting for the OpenSSF badge. The questionnaire has approximately 66 criteria for the Passing level. For each criterion: - Select "Met" if PIF satisfies it (provide a brief justification) -- Select "Unmet" if it does not (and note what action is needed) -- Select "N/A" if the criterion does not apply +- Select "Unmet" if it doesn't (and note what action is needed) +- Select "N/A" if the criterion doesn't apply Use this guide's checklist (Section 2) and gap analysis (Section 6) to pre-populate your answers. From 1af51ef6f1102b41244313b3373ff1059ca5fef2 Mon Sep 17 00:00:00 2001 From: Ogulcan Aydogan Date: Sat, 14 Mar 2026 12:28:31 +0000 Subject: [PATCH 3/3] chore: add funding discovery files - Add .github/FUNDING.yml for GitHub Sponsor button - Add funding.json (FLOSS/fund v1.1.0 schema) --- .github/FUNDING.yml | 3 ++ funding.json | 73 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 .github/FUNDING.yml create mode 100644 funding.json diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..20cddd3 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,3 @@ +github: ogulcanaydogan +custom: + - "https://nlnet.nl/project/prompt-injection-firewall/" diff --git a/funding.json b/funding.json new file mode 100644 index 0000000..d7495d4 --- /dev/null +++ b/funding.json @@ -0,0 +1,73 @@ +{ + "$schema": "https://schema.fundingjson.org/v1.1.0.schema.json", + "version": "1.0.0", + "entity": { + "type": "individual", + "role": "owner", + "name": "Ogulcan Aydogan", + "email": "security@ogulcanaydogan.com", + "description": "Software engineer building open-source AI security and governance infrastructure. Based in the United Kingdom.", + "webpageUrl": { + "url": "https://github.com/ogulcanaydogan" + } + }, + "projects": [ + { + "guid": "prompt-injection-firewall", + "name": "Prompt Injection Firewall", + "description": "Runtime firewall that detects and blocks prompt injection attacks against LLM applications. Covers OWASP LLM Top 10 attack vectors with configurable detection rules.", + "webpageUrl": { + "url": "https://github.com/ogulcanaydogan/Prompt-Injection-Firewall" + }, + "repositoryUrl": { + "url": "https://github.com/ogulcanaydogan/Prompt-Injection-Firewall" + }, + "licenses": [ + "spdx:Apache-2.0" + ], + "tags": [ + "go", + "security", + "prompt-injection", + "firewall", + "owasp" + ] + } + ], + "funding": { + "channels": [ + { + "guid": "github-sponsors", + "type": "payment-provider", + "address": "https://github.com/sponsors/ogulcanaydogan", + "description": "Support via GitHub Sponsors" + } + ], + "plans": [ + { + "guid": "monthly-supporter", + "status": "active", + "name": "Monthly Supporter", + "description": "Monthly recurring support for ongoing maintenance and development", + "amount": 10, + "currency": "USD", + "frequency": "monthly", + "channels": [ + "github-sponsors" + ] + }, + { + "guid": "one-time-sponsor", + "status": "active", + "name": "One-Time Sponsor", + "description": "One-time contribution to support the project", + "amount": 50, + "currency": "USD", + "frequency": "one-time", + "channels": [ + "github-sponsors" + ] + } + ] + } +}