diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a6027b..f95f14a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,18 @@ what got documented across releases. ## [Unreleased] +### Hardened + +- Added detection for CSS `@font-face` rules that load fonts from remote HTTP(S) URLs in retrieved + or external web content (`ii_css_font_injection`, score 55). Attackers manipulate the font's + character mapping table so that text appearing harmless to human readers encodes adversarial + instructions at the LLM token level — when an AI agent processes the HTML, it "reads" the + injection instead of the innocent-looking surface text. Both attack scenarios demonstrated by + researchers (arxiv:2505.16957, May 2026) against MCP-enabled agents — silent message relay to + an attacker server and sensitive data exfiltration via tool calls — successfully bypassed + production model safety filters. Retrieved documents processed by AI agents rarely have a + legitimate need to load custom remote fonts, making this a reliable low-false-positive signal. + ## [1.1.6] - 2026-05-18 ### Hardened diff --git a/aigis/filters/patterns.py b/aigis/filters/patterns.py index 0473207..aa26af1 100644 --- a/aigis/filters/patterns.py +++ b/aigis/filters/patterns.py @@ -1704,6 +1704,36 @@ def _p(pattern: str, flags: int = re.IGNORECASE | re.DOTALL) -> re.Pattern: "operations and do not allow tool-call arguments to be sourced from untrusted documents." ), ), + # --- CSS @font-face remote font injection (arxiv:2505.16957, May 2026) --- + # Malicious TrueType fonts loaded via @font-face remap standard ASCII characters + # to attacker-chosen glyph sequences. What human users see ("product description") + # differs from what the LLM tokenises (an injection instruction). Two attack + # scenarios were demonstrated against MCP-enabled agents: silent message relay to + # an attacker-controlled endpoint, and context exfiltration via tool calls. + # Retrieved documents processed by AI agents rarely have a legitimate need to load + # remote fonts; the @font-face + external HTTP URL combination is a reliable signal. + DetectionPattern( + id="ii_css_font_injection", + name="CSS Remote Font Injection (@font-face)", + category="indirect_injection", + pattern=_p(r"@font-face\s*\{[^}]*src\s*:\s*url\(\s*['\"]?\s*https?://"), + base_score=55, + description=( + "Detects @font-face CSS rules that load a font from a remote HTTP(S) URL in retrieved " + "or external web content. Attackers manipulate the font's character-code-to-glyph " + "mapping table so that text visible to human readers encodes adversarial instructions " + "at the LLM token level. Both attack scenarios demonstrated in the original research " + "(arxiv:2505.16957, May 2026) — malicious content relay and sensitive data leakage via " + "MCP tool calls — successfully bypassed production model safety filters." + ), + owasp_ref="OWASP LLM01: Prompt Injection (Indirect)", + remediation_hint=( + "Sanitize or strip CSS content (including @font-face blocks) from all external " + "documents before inserting them into an AI agent's context. Web content retrieved " + "for RAG or browser-agent use should never load remote fonts; presence of this " + "pattern in ingested content is a strong indicator of a font-injection attack attempt." + ), + ), ] diff --git a/auto-improvement/INDEX.md b/auto-improvement/INDEX.md index 213f49c..81d0543 100644 --- a/auto-improvement/INDEX.md +++ b/auto-improvement/INDEX.md @@ -4,6 +4,7 @@ | Run UTC | # | Domain | Research | Changes | Release | Pending | |---------|---|--------|----------|---------|---------|---------| +| 2026-05-18T09-01 | 0 | prompt-injection | [research](research/2026-05-18T09-01_0-prompt-injection.md) | [changes](changes/2026-05-18T09-01_changes.md) | — | 1 | | 2026-05-18T03-06 | 9 | incident-postmortems | [research](research/2026-05-18T03-06_9-incident-postmortems.md) | [changes](changes/2026-05-18T03-06_changes.md) | v1.1.5 | 1 | | 2026-05-17T09-15 | 8 | compliance-regulation | [research](research/2026-05-17T09-15_8-compliance-regulation.md) | [changes](changes/2026-05-17T09-15_changes.md) | v1.1.4 | 2 | | 2026-05-16T09-03 | 7 | evasion-obfuscation | [research](research/2026-05-16T09-03_7-evasion-obfuscation.md) | [changes](changes/2026-05-16T09-03_changes.md) | — | 2 | diff --git a/auto-improvement/ROTATION.md b/auto-improvement/ROTATION.md index 7b904ca..19899dc 100644 --- a/auto-improvement/ROTATION.md +++ b/auto-improvement/ROTATION.md @@ -6,8 +6,8 @@ aigis 自動強化ループのリサーチ領域。6 時間ごとに 1 領域ず ## 現在のカウンタ ``` -NEXT_INDEX: 0 -LAST_RUN_UTC: 2026-05-18T03-06 +NEXT_INDEX: 1 +LAST_RUN_UTC: 2026-05-18T09-01 ``` > 保守エージェントは実行開始時に `NEXT_INDEX` を読み、終了時に `(NEXT_INDEX + 1) % 10` に更新し、`LAST_RUN_UTC` を当回の開始 UTC に書き換える。 diff --git a/auto-improvement/changes/2026-05-18T09-01_changes.md b/auto-improvement/changes/2026-05-18T09-01_changes.md new file mode 100644 index 0000000..b04683a --- /dev/null +++ b/auto-improvement/changes/2026-05-18T09-01_changes.md @@ -0,0 +1,87 @@ +# Cycle Changes — 2026-05-18T09-01 + +**Domain:** 0 — `prompt-injection` +**Cycle index:** 0 +**Research file:** `research/2026-05-18T09-01_0-prompt-injection.md` + +--- + +## What was researched + +Fourth pass over the `prompt-injection` domain. Key papers reviewed: + +- **arxiv:2505.16957** (May 2026) — Malicious CSS `@font-face` injection: attackers remap + TrueType character code-to-glyph tables so that text visible to humans encodes LLM-level + injection instructions. Demonstrated against MCP-enabled agents with 100% scenario completion + (content relay + MCP data exfiltration) while bypassing safety filters. +- **arxiv:2601.17548** (Jan 2026) — SoK on prompt injection in agentic coding assistants + (78-study meta-analysis); ASR > 85% under adaptive attacks. +- **arxiv:2603.03637** (Mar 2026) — Image-based pixel injection; 64% ASR. Text-pattern + detection not applicable — deferred. +- **arxiv:2511.05797** (Nov 2025, IEEE S&P 2026) — Chatbot plugin RAG poisoning; 13% of + e-commerce sites already exposed; conversation-history forgery boosts injection 3–8×. + Network-layer attack, not addressable by text-pattern detection — deferred. +- **arxiv:2505.11459** (May 2026) — ProxyPrompt defense; confirms aigis' existing prompt + leakage detection direction. + +## What was implemented + +**One new detection rule** added to `INDIRECT_INJECTION_PATTERNS` in +`aigis/filters/patterns.py`: + +| Rule ID | Score | Category | What it detects | +|---------|-------|----------|-----------------| +| `ii_css_font_injection` | 55 | indirect_injection | `@font-face` CSS rules with remote HTTP(S) font sources in retrieved/external content | + +**New test file:** `tests/test_prompt_injection_cycle0_pass4.py` +- 3 registration/metadata tests +- 7 positive-case tests (attack payloads) +- 6 negative-case tests (local font, data URI, relative path, benign CSS, prose, normal docs) + +## What changed for users + +AI agents that process raw HTML (via MCP browsing, web scraping, or RAG ingestion) can now +detect when retrieved content attempts to load a custom font from a remote server — the +delivery mechanism for CSS font injection attacks. A match at score 55 should prompt the +operator to strip CSS from retrieved content before passing it to the agent. + +## Files touched + +- `aigis/filters/patterns.py` — added `ii_css_font_injection` DetectionPattern (~24 lines) +- `tests/test_prompt_injection_cycle0_pass4.py` — new (77 lines) +- `auto-improvement/research/2026-05-18T09-01_0-prompt-injection.md` — new research file +- `auto-improvement/changes/2026-05-18T09-01_changes.md` — this file +- `CHANGELOG.md` — Unreleased section updated +- `auto-improvement/INDEX.md` — new row added +- `auto-improvement/ROTATION.md` — NEXT_INDEX advanced to 1 + +## Quality gate results + +- **ruff format:** 1 file reformatted (patterns.py), all others already formatted +- **ruff format --check:** All 146 files already formatted (clean) +- **ruff check:** All checks passed +- **pytest:** 19 failed, 1551 passed, 5 skipped (19 failures are pre-existing, in + `test_guard.py`, `test_oss_comparison_bench.py`, `test_spec_lang.py`, + `test_release_preflight.py` — none caused by this cycle's changes) + +## Implementation caveats + +- The pattern fires on `@font-face` with any `https?://` URL in `src:`. This includes + third-party CDNs (e.g., Google Fonts). In the context of retrieved external documents, + this is acceptable (legitimate RAG content rarely needs to load custom fonts). Operators + can add allowlist rules for known-safe font CDNs if needed. +- The pattern requires the `}` closing bracket of the @font-face block to be within 4096 + characters of the opening. Very long CSS blocks might not match — acceptable tradeoff. + +## Pending ideas (deferred this cycle) + +- See `pending/2026-05-18_image-based-pixel-injection-docs.md` for the image-based injection + documentation guide (text-level detection not feasible). +- Network-layer chatbot plugin hardening for conversation-history forgery (arxiv:2511.05797) + → better as a compliance template, deferred to domain 8 cycle. + +## Release decision + +Current accumulated Unreleased items since v1.1.6: 1 new detection rule. +Threshold for release: 3+ new rules, a new compliance template, or a meaningful hardening step. +Decision: **no release this cycle** — accumulate further. diff --git a/auto-improvement/pending/2026-05-18_image-based-pixel-injection-docs.md b/auto-improvement/pending/2026-05-18_image-based-pixel-injection-docs.md new file mode 100644 index 0000000..5ab4b38 --- /dev/null +++ b/auto-improvement/pending/2026-05-18_image-based-pixel-injection-docs.md @@ -0,0 +1,58 @@ +# Pending: Image-Based Pixel Injection Hardening Guide + +**Title:** Documentation hardening guide for multimodal / image-based prompt injection +**Date proposed:** 2026-05-18 +**Research source:** `research/2026-05-18T09-01_0-prompt-injection.md` (arxiv:2603.03637) + +--- + +## Motivation + +Image-based Prompt Injection (IPI) embeds adversarial instructions as visible or rendered text +within natural images (photos, screenshots, diagrams). A multimodal LLM processing the image +as part of a vision task (e.g., "describe this image", "extract text from this document") may +read and execute the embedded instruction. Attack success rates reach 64% in black-box settings +against GPT-4-turbo (arxiv:2603.03637, Mar 2026). + +Unlike text-level injection, the payload is encoded in pixels — rule-based text-pattern +detection cannot catch it. Defense requires either: +1. Image pre-processing (OCR + injection scanning on extracted text), or +2. A separate multimodal injection classifier. + +## Research finding + +arxiv:2603.03637 — "Image-based Prompt Injection: Hijacking Multimodal LLMs through Visually +Embedded Adversarial Instructions" (Nagaraja et al., Mar 2026). Key findings: +- End-to-end black-box pipeline: segmentation-based region selection, adaptive font scaling, + background-aware rendering to minimize human-visible footprint. +- Up to 64% ASR against GPT-4-turbo under stealth constraints. +- Tested on COCO dataset with 12 adversarial prompt strategies. + +## Proposed change + +Add `docs/hardening-multimodal-injection.md` — a guide for operators deploying vision-enabled +AI agents. Should cover: +- What image-based prompt injection is and how it differs from text injection. +- Recommended defense layers: OCR-then-scan pipelines, image provenance tracking. +- aigis integration: how to run the injection scanner on OCR-extracted text before passing + it to a vision model. +- Example threat model for an AI agent that processes user-uploaded images or screenshots. + +## Why it was held back + +No implementation is needed in `aigis/` Python code. The change is purely documentation. +The documentation work is non-trivial (requires clear explanation for operators who may not +be familiar with multimodal AI) and would benefit from more research on defensive OCR pipelines +before being written. + +## Constraint that blocked it + +Step 4 guidance: "Prefer additive changes" and docs work doesn't need to be rushed. The +research hasn't converged on best-practice OCR defenses yet as of May 2026. + +## Suggested next step + +- In the next domain 0 (`prompt-injection`) cycle or a future `docs` cycle, write the guide + based on the arxiv:2603.03637 paper plus any follow-up defensive research. +- Coordinate with domain 2 (`data-exfiltration`) cycle if image-based exfiltration patterns + are documented separately. diff --git a/auto-improvement/research/2026-05-18T09-01_0-prompt-injection.md b/auto-improvement/research/2026-05-18T09-01_0-prompt-injection.md new file mode 100644 index 0000000..ff3c97d --- /dev/null +++ b/auto-improvement/research/2026-05-18T09-01_0-prompt-injection.md @@ -0,0 +1,118 @@ +# Research: Prompt Injection — 2026-05-18T09-01 + +**Domain index:** 0 — `prompt-injection` +**Cycle:** Fourth pass at this domain +**Cycle timestamp:** 2026-05-18T09-01 + +--- + +## Key Findings + +- **Malicious font injection via CSS @font-face remapping (arxiv:2505.16957, May 2026).** + Researchers from Xi'an Jiaotong-Liverpool University systematically studied how LLM agents + processing HTML web content can be attacked via manipulated TrueType font files injected using + standard CSS `@font-face` rules. The attack works by modifying the font's `cmap` table (the + character code-to-glyph-index mapping), so that glyphs that render as harmless text to human + users actually encode a different sequence of characters at the LLM token level. Two attack + scenarios were demonstrated against MCP-enabled agents: (1) "malicious content relay" — the + agent silently forwards user messages to an attacker-controlled endpoint; (2) "sensitive data + leakage" — the agent exfiltrates context data via MCP tool calls, bypassing model safety + mechanisms. The malicious font is loaded via a standard web CDN pattern using + `@font-face { src: url('https://attacker.com/...'); }` embedded in a scraped or RAG-ingested + web page. The attack successfully bypassed safety filters in production models. + Source: https://arxiv.org/abs/2505.16957 + **Aigis takeaway:** The delivery mechanism — `@font-face` CSS with a remote HTTP(S) URL in + retrieved/external web content — is a concrete, detectable text pattern. Retrieved documents + processed by an AI agent rarely have a legitimate need to load custom fonts; the presence of + `@font-face` with an external URL is a reliable signal of a font injection attempt. + +- **Prompt injection attacks on agentic coding assistants: 78-study meta-analysis (arxiv:2601.17548, Jan 2026).** + A systematic analysis of 78 studies (2021–2026) on prompt injection attacks targeting Claude + Code, GitHub Copilot, Cursor, and skill-based MCP ecosystems. The paper proposes a + three-dimensional taxonomy across delivery vectors, attack modalities, and propagation behaviors. + Key finding: attack success rates against state-of-the-art defenses exceed 85% when adaptive + attacks are used. Specific attack classes identified for coding assistants: + - Tool-result injection: malicious content embedded in tool return values (file reads, API + responses) redirects agent actions. + - Skill file poisoning: installing malicious skill files (equivalent to MCP tools) that + execute attacker-controlled code when invoked. + - Protocol exploitation: attacks specific to the MCP protocol structure. + Source: https://arxiv.org/abs/2601.17548 + **Aigis takeaway:** Tool-result injection patterns (instructions embedded in tool output) are + covered by existing INDIRECT_INJECTION_PATTERNS. Skill file poisoning is better handled in + the supply-chain domain (index 5). No new aigis pattern needed from this paper this cycle. + +- **Image-based prompt injection: 64% ASR in black-box settings (arxiv:2603.03637, Mar 2026).** + End-to-end black-box pipeline for embedding adversarial instructions in natural images using + segmentation-based region selection, adaptive font scaling, and background-aware rendering. + Tested against GPT-4-turbo on the COCO dataset; achieved up to 64% attack success under stealth + constraints. The technique is strictly a multimodal (vision) attack: the payload is rendered into + pixels, not into text or CSS. Rule-based text-pattern filters (like those in aigis) cannot detect + the visual payload directly; defense requires either image pre-processing or separate multimodal + classifiers. + Source: https://arxiv.org/abs/2603.03637 + **Aigis takeaway:** No new text-level aigis pattern is applicable for pixel-embedded visual + injection. Deferred for research; could be added as a documentation hardening guide in a future + docs/ cycle. + +- **Chatbot plugin RAG poisoning: 13% of e-commerce sites already exposed (arxiv:2511.05797, Nov 2025, IEEE S&P 2026).** + Study of 17 third-party chatbot plugins deployed on over 10,000 public websites. Key findings: + - 15/17 plugins scrape third-party content (comments, reviews, product listings) for RAG + without content isolation, opening a mass indirect prompt injection surface. + - 8/17 plugins fail to integrity-protect the conversation history transmitted in HTTP requests, + allowing an adversary to forge system-role messages and boost injection success 3–8x. + - Manual audit found 13% of randomly sampled e-commerce sites already had chatbot contexts + containing attacker-inserted third-party content. + Source: https://arxiv.org/abs/2511.05797 + **Aigis takeaway:** The "conversation history forgery" attack forges + `{"role": "system", "content": "..."}` JSON into chatbot API calls, but the injection happens + at the HTTP-request layer rather than in the text content aigis scans. A text-level detection + rule (`"role": "system"` in retrieved content) would have a high false-positive rate against + legitimate API documentation and code examples. Deferred; better addressed as a network-layer + hardening guide. + +- **ProxyPrompt: defense against system prompt extraction achieving 94.7% protection (arxiv:2505.11459, May 2026).** + Defense mechanism for protecting AI system prompts from extraction attacks. Replaces the original + system prompt with a proxy that preserves task utility while obfuscating the extractable prompt. + Evaluated against Pleak, Raccoon, and 14 other extraction techniques. Complementary to aigis' + existing `pi_system_prompt_leak` and `PROMPT_LEAK_PATTERNS` rules. + Source: https://arxiv.org/abs/2505.11459 + **Aigis takeaway:** Confirms aigis' existing prompt-leakage detection is in the right direction. + No new pattern needed, but this paper could be referenced in the docs/ hardening guide for + operators who want server-side extraction defenses. + +- **Invisible Unicode injection in retrieved web content: documented real-world exploitation (May 2026).** + Multiple analyses (Idan Habler / Medium, Hiding in Plain Sight blog, and supporting research) + document real exploitation of non-rendering Unicode characters — zero-width joiners (U+200D), + zero-width non-joiners (U+200C), soft hyphens (U+00AD), and the Unicode Tags block (U+E0000– + U+E007F) — to embed hidden instructions in web content that gets RAG-ingested or browser- + summarized. The characters are invisible to human viewers but fully tokenized by LLMs. Aigis + already covers this via `te_zwsp_splitter`, `te_unicode_tag_smuggling`, `enc_tag_block_ascii`, + and related patterns from domain 7 cycles. + Source: https://idanhabler.medium.com/hiding-in-plain-sight-weaponizing-invisible-unicode-to-attack-llms-f9033865ec10 + **Aigis takeaway:** Already covered. No new pattern needed this cycle. + +--- + +## Candidate Hardenings + +1. **`ii_css_font_injection`** ← **IMPLEMENT THIS CYCLE** — Detect `@font-face` CSS rules with + remote HTTP(S) font-source URLs in retrieved/external web content. When an AI agent processes + raw HTML content (via MCP browsing tools, RAG ingestion, or web-page summarization), a malicious + `@font-face` rule can remap standard ASCII characters to adversarial glyph sequences that the + LLM processes as injection instructions. The delivery mechanism — `@font-face { src: url(https:// + attacker.com/...) }` — is a detectable, low-false-positive text pattern since retrieved documents + legitimately processed by AI agents rarely require loading custom remote fonts. + Source: arxiv:2505.16957, May 2026. Demonstrated ASR against production models; bypassed built-in + safety filters in both tested scenarios (malicious content relay + sensitive data leakage via MCP). + +2. **Image-based pixel injection documentation** — Deferred; text-level detection not feasible. + Suggest future `docs/hardening-multimodal-injection.md` guide for operators deploying vision- + enabled AI agents. + +3. **Network-layer HTTP request integrity for chatbot plugins** — Deferred; this is a deployment + hardening concern, not a text-pattern detection problem. Suggest a compliance template addition + in the compliance-regulation domain cycle. + +4. **ProxyPrompt-style system prompt obfuscation guide** — Deferred to docs/ hardening guide; + no new detection pattern needed since aigis already covers extraction techniques. diff --git a/tests/test_prompt_injection_cycle0_pass4.py b/tests/test_prompt_injection_cycle0_pass4.py new file mode 100644 index 0000000..6e5a9f3 --- /dev/null +++ b/tests/test_prompt_injection_cycle0_pass4.py @@ -0,0 +1,131 @@ +"""Tests for the ii_css_font_injection pattern added in cycle 0 (pass 4). + +Covers: +- ii_css_font_injection (arxiv:2505.16957, May 2026 — CSS @font-face remote font injection) + +Attackers embed @font-face CSS rules in web content to load malicious TrueType fonts +that remap ASCII characters. Human readers see innocuous text; the LLM tokenises the +injected instruction. Demonstrated against MCP-enabled agents; bypassed safety filters +in both tested attack scenarios (content relay + data exfiltration). +""" + +from aigis.filters.patterns import INDIRECT_INJECTION_PATTERNS + + +def _get(rule_id: str): + for p in INDIRECT_INJECTION_PATTERNS: + if p.id == rule_id: + return p + raise KeyError(f"Pattern {rule_id!r} not found in INDIRECT_INJECTION_PATTERNS") + + +# --------------------------------------------------------------------------- +# Registration +# --------------------------------------------------------------------------- + + +def test_ii_css_font_injection_registered() -> None: + ids = {p.id for p in INDIRECT_INJECTION_PATTERNS} + assert "ii_css_font_injection" in ids + + +def test_ii_css_font_injection_has_owasp_ref() -> None: + pat = _get("ii_css_font_injection") + assert "OWASP LLM01" in pat.owasp_ref + + +def test_ii_css_font_injection_has_remediation() -> None: + pat = _get("ii_css_font_injection") + assert len(pat.remediation_hint) > 50 + + +# --------------------------------------------------------------------------- +# ii_css_font_injection — positive cases (attack payloads) +# --------------------------------------------------------------------------- + + +class TestCssFontInjectionPositive: + def setup_method(self): + self.pat = _get("ii_css_font_injection") + + def test_basic_http_font_detected(self): + text = "@font-face { src: url('http://attacker.com/evil.ttf'); }" + assert self.pat.pattern.search(text) + + def test_basic_https_font_detected(self): + text = "@font-face { src: url('https://evil.example.com/m.woff2'); }" + assert self.pat.pattern.search(text) + + def test_no_quotes_url_detected(self): + text = "@font-face { src: url(https://cdn.attacker.net/font.ttf); }" + assert self.pat.pattern.search(text) + + def test_double_quotes_url_detected(self): + text = '@font-face { src: url("https://fonts.evil.io/remap.woff"); }' + assert self.pat.pattern.search(text) + + def test_multiline_font_face_detected(self): + text = ( + "@font-face {\n" + " font-family: 'MyFont';\n" + " src: url('https://attacker.com/inject.ttf');\n" + "}" + ) + assert self.pat.pattern.search(text) + + def test_font_family_before_src_detected(self): + text = ( + "@font-face { font-family: 'Normal'; font-weight: 400; " + "src: url('https://evil.org/payload.ttf') format('truetype'); }" + ) + assert self.pat.pattern.search(text) + + def test_embedded_in_html_detected(self): + html = ( + "" + "
This looks like normal product text.
" + ) + assert self.pat.pattern.search(html) + + +# --------------------------------------------------------------------------- +# ii_css_font_injection — negative cases (legitimate / non-attack content) +# --------------------------------------------------------------------------- + + +class TestCssFontInjectionNegative: + def setup_method(self): + self.pat = _get("ii_css_font_injection") + + def test_local_font_not_detected(self): + # Local font reference — no remote URL + text = "@font-face { src: local('Helvetica Neue'); }" + assert not self.pat.pattern.search(text) + + def test_data_uri_font_not_detected(self): + # Inline data URI — no HTTP(S) host involved + text = "@font-face { src: url('data:font/woff2;base64,AAAA...'); }" + assert not self.pat.pattern.search(text) + + def test_relative_path_font_not_detected(self): + # Relative path, not a remote URL + text = "@font-face { src: url('/assets/fonts/myfont.woff2'); }" + assert not self.pat.pattern.search(text) + + def test_css_without_font_face_not_detected(self): + text = "body { font-family: Arial, sans-serif; color: #333; }" + assert not self.pat.pattern.search(text) + + def test_font_face_text_in_prose_not_detected(self): + # Natural language mention of font-face concept, no CSS block + text = "The @font-face CSS rule lets you define custom fonts for your website." + assert not self.pat.pattern.search(text) + + def test_normal_document_not_detected(self): + text = ( + "Product description: The laptop features a 15-inch display, " + "8 GB RAM, and 256 GB SSD. Price: $799." + ) + assert not self.pat.pattern.search(text)