From 9eeb7e24a73a9c5e92b0ff5604ca861d507c1197 Mon Sep 17 00:00:00 2001
From: Siddhartha Prasad <siddhartha_prasad@brown.edu>
Date: Sat, 21 Mar 2026 14:29:53 -0400
Subject: [PATCH 1/7] Add three alternative LTL-to-English translators
 (multi-sentence)

The existing ltltoeng.py forces every LTL formula into a single sentence,
which breaks down for complex nested formulas. A single LTL formula doesn't
need to translate to a single sentence.

Three new translators, each with a different presentation style:

1. ltltoeng_structured.py - Hierarchical bullet-point format that reveals
   formula structure through indentation. Best for tutoring contexts where
   students need to understand each component.

2. ltltoeng_prose.py - Requirements-document style flowing prose with
   short sentences connected by discourse connectives ("After that point...",
   "This applies every time...").

3. ltltoeng_contextualized.py - Wason-selection-task inspired translator
   that maps abstract propositions to concrete scenarios (colored lights,
   robot tasks, traffic signals, web servers). Research shows people reason
   far better about logical rules in concrete vs. abstract framing.

All three share a common `translate(node) -> str` API. The existing
ltltoeng.py is unchanged. 36 new tests across two test files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/ltltoeng_contextualized.py      | 556 ++++++++++++++++++++++++++
 src/ltltoeng_prose.py               | 578 ++++++++++++++++++++++++++++
 src/ltltoeng_structured.py          | 552 ++++++++++++++++++++++++++
 test/test_contextualized.py         | 208 ++++++++++
 test/test_translation_comparison.py | 306 +++++++++++++++
 5 files changed, 2200 insertions(+)
 create mode 100644 src/ltltoeng_contextualized.py
 create mode 100644 src/ltltoeng_prose.py
 create mode 100644 src/ltltoeng_structured.py
 create mode 100644 test/test_contextualized.py
 create mode 100644 test/test_translation_comparison.py

diff --git a/src/ltltoeng_contextualized.py b/src/ltltoeng_contextualized.py
new file mode 100644
index 0000000..d3ba564
--- /dev/null
+++ b/src/ltltoeng_contextualized.py
@@ -0,0 +1,556 @@
+"""
+Contextualized English translation for LTL formulas.
+
+Inspired by the Wason selection task: people reason far better about logical
+rules when framed in concrete, familiar terms rather than abstract symbols.
+
+This module translates LTL formulas using a concrete "theme" — a mapping from
+abstract proposition letters to real-world descriptions.  The default theme is
+a 3-light panel (red, green, blue), but custom themes can be supplied.
+
+Public API
+----------
+    translate(node, theme=None) -> str
+    THEMES: dict of built-in theme names -> Theme objects
+
+Example:
+    node = parse_ltl_string("G(p -> F q)")
+    translate(node, theme=THEMES["traffic_lights"])
+    # => "Whenever the red light turns on, the green light must eventually turn on."
+"""
+
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Dict, Optional
+
+import ltlnode
+
+
+# ---------------------------------------------------------------------------
+# Theme definition
+# ---------------------------------------------------------------------------
+
+@dataclass
+class Theme:
+    """Maps abstract literals to concrete descriptions.
+
+    Attributes:
+        name:        Human-readable theme name.
+        description: One-line description of the scenario.
+        literals:    Maps literal names to (positive_phrase, negative_phrase).
+                     e.g. {"p": ("the red light is on", "the red light is off")}
+        event_form:  Maps literal names to an "event" phrasing for triggers.
+                     e.g. {"p": ("the red light turns on", "the red light turns off")}
+                     Falls back to the positive/negative phrase if not provided.
+    """
+    name: str
+    description: str
+    literals: Dict[str, tuple[str, str]]  # lit -> (positive, negative)
+    event_form: Dict[str, tuple[str, str]] = field(default_factory=dict)  # lit -> (becomes_true, becomes_false)
+
+    def positive(self, lit: str) -> str:
+        if lit in self.literals:
+            return self.literals[lit][0]
+        return f"'{lit}'"
+
+    def negative(self, lit: str) -> str:
+        if lit in self.literals:
+            return self.literals[lit][1]
+        return f"'{lit}' does not hold"
+
+    def event_on(self, lit: str) -> str:
+        """Event phrasing for when the literal becomes true."""
+        if lit in self.event_form:
+            return self.event_form[lit][0]
+        return self.positive(lit)
+
+    def event_off(self, lit: str) -> str:
+        """Event phrasing for when the literal becomes false."""
+        if lit in self.event_form:
+            return self.event_form[lit][1]
+        return self.negative(lit)
+
+
+# ---------------------------------------------------------------------------
+# Built-in themes
+# ---------------------------------------------------------------------------
+
+THEMES: Dict[str, Theme] = {}
+
+THEMES["lights"] = Theme(
+    name="Light Panel",
+    description="A panel with three colored lights: red, green, and blue.",
+    literals={
+        "p": ("the red light is on",    "the red light is off"),
+        "q": ("the green light is on",  "the green light is off"),
+        "r": ("the blue light is on",   "the blue light is off"),
+    },
+    event_form={
+        "p": ("the red light turns on",    "the red light turns off"),
+        "q": ("the green light turns on",  "the green light turns off"),
+        "r": ("the blue light turns on",   "the blue light turns off"),
+    },
+)
+
+THEMES["robot"] = Theme(
+    name="Robot Tasks",
+    description="A robot that can pick up items, move to a location, and charge its battery.",
+    literals={
+        "p": ("the robot is holding an item",  "the robot is not holding an item"),
+        "q": ("the robot is at the goal",      "the robot is not at the goal"),
+        "r": ("the battery is charged",        "the battery is not charged"),
+    },
+    event_form={
+        "p": ("the robot picks up an item",     "the robot drops the item"),
+        "q": ("the robot reaches the goal",     "the robot leaves the goal"),
+        "r": ("the battery finishes charging",  "the battery runs out"),
+    },
+)
+
+THEMES["traffic"] = Theme(
+    name="Traffic Intersection",
+    description="A traffic intersection with signals for north-south and east-west traffic.",
+    literals={
+        "p": ("the north-south signal is green",  "the north-south signal is red"),
+        "q": ("the east-west signal is green",     "the east-west signal is red"),
+        "r": ("a pedestrian is crossing",          "no pedestrian is crossing"),
+    },
+    event_form={
+        "p": ("the north-south signal turns green",  "the north-south signal turns red"),
+        "q": ("the east-west signal turns green",    "the east-west signal turns red"),
+        "r": ("a pedestrian starts crossing",        "the pedestrian finishes crossing"),
+    },
+)
+
+THEMES["server"] = Theme(
+    name="Web Server",
+    description="A web server handling requests, with a cache and a database.",
+    literals={
+        "p": ("a request is being processed",  "no request is being processed"),
+        "q": ("the cache has a valid entry",    "the cache is empty"),
+        "r": ("the database is available",      "the database is down"),
+    },
+    event_form={
+        "p": ("a new request arrives",         "the request completes"),
+        "q": ("the cache is populated",        "the cache is invalidated"),
+        "r": ("the database comes online",     "the database goes down"),
+    },
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _capitalize(text: str) -> str:
+    if not text:
+        return text
+    text = " ".join(text.split())
+    return text[0].upper() + text[1:] if len(text) > 1 else text.upper()
+
+
+def _ensure_period(text: str) -> str:
+    text = text.rstrip()
+    if text and text[-1] not in ".!?":
+        text += "."
+    return text
+
+
+def _join(*parts: str) -> str:
+    out = []
+    for p in parts:
+        p = p.strip()
+        if not p:
+            continue
+        p = _capitalize(p)
+        p = _ensure_period(p)
+        out.append(p)
+    return " ".join(out)
+
+
+def _same_literal(a: ltlnode.LTLNode, b: ltlnode.LTLNode) -> bool:
+    return (isinstance(a, ltlnode.LiteralNode) and
+            isinstance(b, ltlnode.LiteralNode) and
+            a.value == b.value)
+
+
+def _count_next(node: ltlnode.LTLNode) -> tuple[int, ltlnode.LTLNode]:
+    n = 0
+    while isinstance(node, ltlnode.NextNode):
+        n += 1
+        node = node.operand
+    return n, node
+
+
+def _steps(n: int) -> str:
+    if n == 1:
+        return "the very next step"
+    return f"{n} steps from now"
+
+
+# ---------------------------------------------------------------------------
+# Core translator
+# ---------------------------------------------------------------------------
+
+def _t(node: ltlnode.LTLNode, theme: Theme) -> str:
+    """Recursively translate *node* using *theme*.
+
+    Returns a sentence fragment (lowercase, no trailing period).
+    """
+
+    # --- Literal ---
+    if isinstance(node, ltlnode.LiteralNode):
+        return theme.positive(node.value)
+
+    # --- Not ---
+    if isinstance(node, ltlnode.NotNode):
+        return _t_not(node, theme)
+
+    # --- And ---
+    if isinstance(node, ltlnode.AndNode):
+        return _t_and(node, theme)
+
+    # --- Or ---
+    if isinstance(node, ltlnode.OrNode):
+        return _t_or(node, theme)
+
+    # --- Implies ---
+    if isinstance(node, ltlnode.ImpliesNode):
+        return _t_implies(node, theme)
+
+    # --- Equivalence ---
+    if isinstance(node, ltlnode.EquivalenceNode):
+        return _t_equiv(node, theme)
+
+    # --- Globally ---
+    if isinstance(node, ltlnode.GloballyNode):
+        return _t_globally(node, theme)
+
+    # --- Finally ---
+    if isinstance(node, ltlnode.FinallyNode):
+        return _t_finally(node, theme)
+
+    # --- Next ---
+    if isinstance(node, ltlnode.NextNode):
+        return _t_next(node, theme)
+
+    # --- Until ---
+    if isinstance(node, ltlnode.UntilNode):
+        return _t_until(node, theme)
+
+    return str(node)
+
+
+# --- NOT ------------------------------------------------------------------
+
+def _t_not(node: ltlnode.NotNode, theme: Theme) -> str:
+    inner = node.operand
+
+    if isinstance(inner, ltlnode.LiteralNode):
+        return theme.negative(inner.value)
+
+    # !(F p) -> never
+    if isinstance(inner, ltlnode.FinallyNode) and isinstance(inner.operand, ltlnode.LiteralNode):
+        return f"it is never the case that {theme.positive(inner.operand.value)}"
+
+    # !!p
+    if isinstance(inner, ltlnode.NotNode):
+        return _t(inner.operand, theme)
+
+    # !(p & q) -> not both
+    if isinstance(inner, ltlnode.AndNode):
+        return f"it is not the case that both {_t(inner.left, theme)} and {_t(inner.right, theme)}"
+
+    # !(p | q) -> neither/nor
+    if isinstance(inner, ltlnode.OrNode):
+        return f"neither {_t(inner.left, theme)} nor {_t(inner.right, theme)}"
+
+    # !(p -> q)
+    if isinstance(inner, ltlnode.ImpliesNode):
+        return f"{_t(inner.left, theme)}, but {theme.negative(inner.right.value) if isinstance(inner.right, ltlnode.LiteralNode) else 'not ' + _t(inner.right, theme)}"
+
+    return f"it is not the case that {_t(inner, theme)}"
+
+
+# --- AND ------------------------------------------------------------------
+
+def _t_and(node: ltlnode.AndNode, theme: Theme) -> str:
+    # !p & !q -> neither/nor
+    if isinstance(node.left, ltlnode.NotNode) and isinstance(node.right, ltlnode.NotNode):
+        ll = node.left.operand
+        rr = node.right.operand
+        if isinstance(ll, ltlnode.LiteralNode) and isinstance(rr, ltlnode.LiteralNode):
+            return f"neither {_t(ll, theme)} nor {_t(rr, theme)}"
+    return f"both {_t(node.left, theme)} and {_t(node.right, theme)}"
+
+
+# --- OR -------------------------------------------------------------------
+
+def _t_or(node: ltlnode.OrNode, theme: Theme) -> str:
+    # !p | !q -> not both
+    if isinstance(node.left, ltlnode.NotNode) and isinstance(node.right, ltlnode.NotNode):
+        ll = node.left.operand
+        rr = node.right.operand
+        if isinstance(ll, ltlnode.LiteralNode) and isinstance(rr, ltlnode.LiteralNode):
+            return f"it cannot be the case that both {_t(ll, theme)} and {_t(rr, theme)}"
+    return f"either {_t(node.left, theme)} or {_t(node.right, theme)}"
+
+
+# --- IMPLIES --------------------------------------------------------------
+
+def _t_implies(node: ltlnode.ImpliesNode, theme: Theme) -> str:
+    l = _t(node.left, theme)
+    r = _t(node.right, theme)
+
+    # (p & q) -> r
+    if isinstance(node.left, ltlnode.AndNode):
+        return f"if both {_t(node.left.left, theme)} and {_t(node.left.right, theme)}, then {r}"
+
+    # (p | q) -> r
+    if isinstance(node.left, ltlnode.OrNode):
+        return f"if either {_t(node.left.left, theme)} or {_t(node.left.right, theme)}, then {r}"
+
+    return f"if {l}, then {r}"
+
+
+# --- EQUIVALENCE ----------------------------------------------------------
+
+def _t_equiv(node: ltlnode.EquivalenceNode, theme: Theme) -> str:
+    return f"{_t(node.left, theme)} exactly when {_t(node.right, theme)}"
+
+
+# --- GLOBALLY -------------------------------------------------------------
+
+def _t_globally(node: ltlnode.GloballyNode, theme: Theme) -> str:
+    inner = node.operand
+
+    # G(!p) -> never
+    if isinstance(inner, ltlnode.NotNode):
+        negated = inner.operand
+        if isinstance(negated, ltlnode.LiteralNode):
+            return f"it is never the case that {theme.positive(negated.value)}"
+        return f"at no point may it be the case that {_t(negated, theme)}"
+
+    # G(p -> ...) patterns
+    if isinstance(inner, ltlnode.ImpliesNode):
+        left = inner.left
+        right = inner.right
+
+        # G(p -> X p) or G(p -> G p) — persistence
+        if isinstance(right, (ltlnode.NextNode, ltlnode.GloballyNode)):
+            if _same_literal(left, right.operand):
+                lit = left.value
+                return f"once {theme.event_on(lit)}, it stays that way forever"
+
+        # G(p -> F q) — response
+        if isinstance(right, ltlnode.FinallyNode):
+            if isinstance(left, ltlnode.UntilNode):
+                p = _t(left.left, theme)
+                q = _t(left.right, theme)
+                r = _t(right.operand, theme)
+                return _join(
+                    f"suppose {p} continues until {q}",
+                    f"then eventually, {r}",
+                    "this rule applies every time",
+                )
+            trigger = theme.event_on(left.value) if isinstance(left, ltlnode.LiteralNode) else _t(left, theme)
+            response = theme.positive(right.operand.value) if isinstance(right.operand, ltlnode.LiteralNode) else _t(right.operand, theme)
+            return f"whenever {trigger}, then eventually {response}"
+
+        # G(p -> X(F q)) — bounded response
+        if isinstance(right, ltlnode.NextNode) and isinstance(right.operand, ltlnode.FinallyNode):
+            trigger = theme.event_on(left.value) if isinstance(left, ltlnode.LiteralNode) else _t(left, theme)
+            response = theme.positive(right.operand.operand.value) if isinstance(right.operand.operand, ltlnode.LiteralNode) else _t(right.operand.operand, theme)
+            return f"whenever {trigger}, starting from the very next step, eventually {response}"
+
+        # G(p -> X q) — immediate response
+        if isinstance(right, ltlnode.NextNode):
+            trigger = theme.event_on(left.value) if isinstance(left, ltlnode.LiteralNode) else _t(left, theme)
+            response = _t(right.operand, theme)
+            return f"whenever {trigger}, then {response} in the very next step"
+
+        # G(p -> (q U r)) — chain precedence
+        if isinstance(right, ltlnode.UntilNode):
+            trigger = theme.event_on(left.value) if isinstance(left, ltlnode.LiteralNode) else _t(left, theme)
+            held = _t(right.left, theme)
+            goal = _t(right.right, theme)
+            return f"whenever {trigger}, it must remain the case that {held} until {goal}"
+
+        # G(p -> (F q & F r)) — chain response
+        if (isinstance(right, ltlnode.AndNode)
+                and isinstance(right.left, ltlnode.FinallyNode)
+                and isinstance(right.right, ltlnode.FinallyNode)):
+            trigger = theme.event_on(left.value) if isinstance(left, ltlnode.LiteralNode) else _t(left, theme)
+            ra = theme.event_on(right.left.operand.value) if isinstance(right.left.operand, ltlnode.LiteralNode) else _t(right.left.operand, theme)
+            rb = theme.event_on(right.right.operand.value) if isinstance(right.right.operand, ltlnode.LiteralNode) else _t(right.right.operand, theme)
+            return f"whenever {trigger}, two things must eventually happen: {ra}, and {rb}"
+
+        # G(p -> q) — generic
+        trigger = theme.event_on(left.value) if isinstance(left, ltlnode.LiteralNode) else _t(left, theme)
+        consequence = _t(right, theme)
+        return f"whenever {trigger}, it must be the case that {consequence}"
+
+    # G(F p) — recurrence
+    if isinstance(inner, ltlnode.FinallyNode):
+        fi = inner.operand
+        if isinstance(fi, ltlnode.AndNode):
+            return f"it must keep being the case, over and over forever, that {_t(fi.left, theme)} and {_t(fi.right, theme)}"
+        if isinstance(fi, ltlnode.LiteralNode):
+            return f"{theme.event_on(fi.value)} must keep happening over and over, forever"
+        target = _t(fi, theme)
+        return f"it must keep being the case, over and over forever, that {target}"
+
+    # G(G(...)) — idempotent
+    if isinstance(inner, ltlnode.GloballyNode):
+        return _t_globally(inner, theme)
+
+    # G(p & q) / G(p | q)
+    if isinstance(inner, ltlnode.AndNode):
+        return f"at every moment, {_t(inner.left, theme)} and {_t(inner.right, theme)}"
+    if isinstance(inner, ltlnode.OrNode):
+        return f"at every moment, either {_t(inner.left, theme)} or {_t(inner.right, theme)}"
+
+    # G(literal)
+    if isinstance(inner, ltlnode.LiteralNode):
+        return f"{theme.positive(inner.value)} at all times"
+    return f"at all times, {_t(inner, theme)}"
+
+
+# --- FINALLY --------------------------------------------------------------
+
+def _t_finally(node: ltlnode.FinallyNode, theme: Theme) -> str:
+    inner = node.operand
+
+    # F(F(...)) — idempotent
+    if isinstance(inner, ltlnode.FinallyNode):
+        return _t_finally(inner, theme)
+
+    # F(G(...)) patterns
+    if isinstance(inner, ltlnode.GloballyNode):
+        gi = inner.operand
+
+        # F(G(!p))
+        if isinstance(gi, ltlnode.NotNode) and isinstance(gi.operand, ltlnode.LiteralNode):
+            return _join(
+                f"eventually, {theme.event_off(gi.operand.value)}",
+                f"and from that point on, {theme.event_on(gi.operand.value)} never happens again",
+            )
+
+        # F(G(p -> F q))
+        if isinstance(gi, ltlnode.ImpliesNode) and isinstance(gi.right, ltlnode.FinallyNode):
+            trigger = theme.event_on(gi.left.value) if isinstance(gi.left, ltlnode.LiteralNode) else _t(gi.left, theme)
+            response = theme.positive(gi.right.operand.value) if isinstance(gi.right.operand, ltlnode.LiteralNode) else _t(gi.right.operand, theme)
+            return _join(
+                "eventually, the system stabilizes",
+                f"from that point on, whenever {trigger}, then eventually {response}",
+            )
+
+        # F(G(p & q))
+        if isinstance(gi, ltlnode.AndNode):
+            return f"eventually, {_t(gi.left, theme)} and {_t(gi.right, theme)}, and they stay that way forever"
+
+        # F(G p) — generic persistence
+        target = _t(gi, theme)
+        return f"eventually, {target}, and it stays that way forever"
+
+    # F(!p)
+    if isinstance(inner, ltlnode.NotNode) and isinstance(inner.operand, ltlnode.LiteralNode):
+        return f"eventually, {theme.event_off(inner.operand.value)}"
+
+    # F(p & G q) / F(G q & p) — persistence after trigger
+    if isinstance(inner, ltlnode.AndNode):
+        l, r = inner.left, inner.right
+        if isinstance(r, ltlnode.GloballyNode):
+            trigger = _t(l, theme)
+            persist = _t(r.operand, theme)
+            return _join(f"eventually, {trigger}", f"from that point on, {persist} forever")
+        if isinstance(l, ltlnode.GloballyNode):
+            trigger = _t(r, theme)
+            persist = _t(l.operand, theme)
+            return _join(f"eventually, {trigger}", f"from that point on, {persist} forever")
+        return f"eventually, {_t(l, theme)} and {_t(r, theme)} at the same time"
+
+    # F(p -> G q) — trigger to permanence
+    if isinstance(inner, ltlnode.ImpliesNode) and isinstance(inner.right, ltlnode.GloballyNode):
+        trigger = theme.event_on(inner.left.value) if isinstance(inner.left, ltlnode.LiteralNode) else _t(inner.left, theme)
+        result = _t(inner.right.operand, theme)
+        return f"eventually, once {trigger}, then {result} forever after"
+
+    # F(literal)
+    if isinstance(inner, ltlnode.LiteralNode):
+        return f"eventually, {theme.event_on(inner.value)}"
+
+    return f"eventually, {_t(inner, theme)}"
+
+
+# --- NEXT -----------------------------------------------------------------
+
+def _t_next(node: ltlnode.NextNode, theme: Theme) -> str:
+    steps, core = _count_next(node)
+
+    if steps == 1 and isinstance(core, ltlnode.UntilNode):
+        l = _t(core.left, theme)
+        r = _t(core.right, theme)
+        return f"starting from the next step, {l} until {r}"
+
+    if steps == 1 and isinstance(core, ltlnode.FinallyNode):
+        target = _t(core.operand, theme)
+        return f"starting from the next step, {target} must eventually happen"
+
+    target = _t(core, theme)
+    return f"in {_steps(steps)}, {target}"
+
+
+# --- UNTIL ----------------------------------------------------------------
+
+def _t_until(node: ltlnode.UntilNode, theme: Theme) -> str:
+    l_node, r_node = node.left, node.right
+
+    # (G p) U (F q)
+    if isinstance(l_node, ltlnode.GloballyNode) and isinstance(r_node, ltlnode.FinallyNode):
+        l = _t(l_node.operand, theme)
+        r_event = theme.event_on(r_node.operand.value) if isinstance(r_node.operand, ltlnode.LiteralNode) else _t(r_node.operand, theme)
+        return _join(
+            f"it must stay the case that {l}",
+            f"this continues until eventually {r_event}",
+        )
+
+    # (p U q) U r
+    if isinstance(l_node, ltlnode.UntilNode):
+        p = _t(l_node.left, theme)
+        q = _t(l_node.right, theme)
+        r = _t(r_node, theme)
+        return _join(
+            f"first, {p} continues until {q}",
+            f"that whole phase lasts until {r}",
+        )
+
+    l = _t(l_node, theme)
+    r = _t(r_node, theme)
+    return f"{l} must continue until {r}"
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def translate(node: ltlnode.LTLNode, theme: Optional[Theme] = None) -> str:
+    """Translate an LTL AST node into contextualized English.
+
+    Args:
+        node:  LTL formula AST node.
+        theme: A Theme mapping literals to concrete descriptions.
+               Defaults to the "lights" theme (red/green/blue lights).
+
+    Returns:
+        A capitalized, period-terminated English paragraph.
+    """
+    if theme is None:
+        theme = THEMES["lights"]
+
+    raw = _t(node, theme)
+    raw = raw.strip()
+    result = _capitalize(raw)
+    if result and result[-1] not in ".!?":
+        result += "."
+    return result
diff --git a/src/ltltoeng_prose.py b/src/ltltoeng_prose.py
new file mode 100644
index 0000000..cbd5fd6
--- /dev/null
+++ b/src/ltltoeng_prose.py
@@ -0,0 +1,578 @@
+"""
+Clause-chaining prose translator for LTL formulas.
+
+Produces flowing, natural prose that reads like a paragraph from a
+requirements document.  A single LTL formula may become multiple short
+sentences connected by discourse connectives ("After that point, ...",
+"Suppose that ...", "This applies every time ...").
+
+Public API
+----------
+    translate(node) -> str
+
+``node`` is any ``ltlnode.LTLNode`` produced by ``parse_ltl_string``.
+"""
+
+from __future__ import annotations
+
+import ltlnode
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _lit(node: ltlnode.LTLNode) -> str:
+    """Return the quoted literal value, e.g. ``'p'``."""
+    if isinstance(node, ltlnode.LiteralNode):
+        return f"'{node.value}'"
+    # Fallback: translate the subtree inline.
+    return _inner(node, _Ctx())
+
+
+def _is_lit(node: ltlnode.LTLNode) -> bool:
+    return isinstance(node, ltlnode.LiteralNode)
+
+
+def _same_literal(a: ltlnode.LTLNode, b: ltlnode.LTLNode) -> bool:
+    """True when *a* and *b* are LiteralNodes with identical value."""
+    return (_is_lit(a) and _is_lit(b) and a.value == b.value)
+
+
+def _capitalize(text: str) -> str:
+    """Capitalize the first letter, but leave quoted literals alone."""
+    if not text:
+        return text
+    text = " ".join(text.split())  # collapse whitespace
+    if text[0] == "'":
+        return text
+    return text[0].upper() + text[1:]
+
+
+def _ensure_period(text: str) -> str:
+    """Ensure *text* ends with exactly one period."""
+    text = text.rstrip()
+    if text and text[-1] not in ".!?":
+        text += "."
+    return text
+
+
+def _join_sentences(*parts: str) -> str:
+    """Join sentence fragments, capitalizing and adding periods."""
+    out: list[str] = []
+    for p in parts:
+        p = p.strip()
+        if not p:
+            continue
+        p = _capitalize(p)
+        p = _ensure_period(p)
+        out.append(p)
+    return " ".join(out)
+
+
+# ---------------------------------------------------------------------------
+# Translation context
+# ---------------------------------------------------------------------------
+
+class _Ctx:
+    """Tracks the temporal frame the translator is currently inside.
+
+    *frame* is one of:
+        ``"top"``      -- outermost level, no enclosing temporal operator
+        ``"globally"`` -- inside a G(...)
+        ``"finally"``  -- inside an F(...)
+        ``"next"``     -- inside an X(...)
+        ``"until"``    -- inside a U(...)
+    """
+
+    __slots__ = ("frame", "inline")
+
+    def __init__(self, frame: str = "top", inline: bool = False):
+        self.frame = frame
+        # When *inline* is True the caller wants a short fragment that can be
+        # embedded inside another sentence (no leading capital, no trailing
+        # period, no discourse connectives).
+        self.inline = inline
+
+    def child(self, frame: str | None = None, *, inline: bool | None = None) -> "_Ctx":
+        return _Ctx(
+            frame=frame if frame is not None else self.frame,
+            inline=inline if inline is not None else self.inline,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Complexity heuristic
+# ---------------------------------------------------------------------------
+
+def _depth(node: ltlnode.LTLNode) -> int:
+    """Return the AST depth (leaves = 0)."""
+    if _is_lit(node):
+        return 0
+    if isinstance(node, ltlnode.UnaryOperatorNode):
+        return 1 + _depth(node.operand)
+    if isinstance(node, ltlnode.BinaryOperatorNode):
+        return 1 + max(_depth(node.left), _depth(node.right))
+    return 0
+
+
+def _is_simple(node: ltlnode.LTLNode) -> bool:
+    """True for nodes simple enough to translate as one inline clause."""
+    return _depth(node) <= 1
+
+
+# ---------------------------------------------------------------------------
+# Core recursive translator
+# ---------------------------------------------------------------------------
+
+def _inner(node: ltlnode.LTLNode, ctx: _Ctx) -> str:
+    """Recursively translate *node* in the given context.
+
+    Returns a **sentence fragment** (lowercase start, no trailing period)
+    unless the node is complex enough to warrant its own sentence(s), in
+    which case it returns already-joined prose.
+    """
+
+    # --- Literal -----------------------------------------------------------
+    if isinstance(node, ltlnode.LiteralNode):
+        return f"'{node.value}'"
+
+    # --- Not ---------------------------------------------------------------
+    if isinstance(node, ltlnode.NotNode):
+        return _translate_not(node, ctx)
+
+    # --- And / Or ----------------------------------------------------------
+    if isinstance(node, ltlnode.AndNode):
+        return _translate_and(node, ctx)
+    if isinstance(node, ltlnode.OrNode):
+        return _translate_or(node, ctx)
+
+    # --- Implies -----------------------------------------------------------
+    if isinstance(node, ltlnode.ImpliesNode):
+        return _translate_implies(node, ctx)
+
+    # --- Equivalence -------------------------------------------------------
+    if isinstance(node, ltlnode.EquivalenceNode):
+        return _translate_equivalence(node, ctx)
+
+    # --- Globally ----------------------------------------------------------
+    if isinstance(node, ltlnode.GloballyNode):
+        return _translate_globally(node, ctx)
+
+    # --- Finally -----------------------------------------------------------
+    if isinstance(node, ltlnode.FinallyNode):
+        return _translate_finally(node, ctx)
+
+    # --- Next --------------------------------------------------------------
+    if isinstance(node, ltlnode.NextNode):
+        return _translate_next(node, ctx)
+
+    # --- Until -------------------------------------------------------------
+    if isinstance(node, ltlnode.UntilNode):
+        return _translate_until(node, ctx)
+
+    # Fallback: use the node's own __str__
+    return str(node)
+
+
+# ---------------------------------------------------------------------------
+# Operator translators
+# ---------------------------------------------------------------------------
+
+# --- NOT ------------------------------------------------------------------
+
+def _translate_not(node: ltlnode.NotNode, ctx: _Ctx) -> str:
+    inner = node.operand
+
+    # !(F p) => "'p' never occurs."
+    if isinstance(inner, ltlnode.FinallyNode):
+        target = _inner(inner.operand, ctx.child(inline=True))
+        return f"{target} never occurs"
+
+    # !(G p) => "it is not always the case that 'p' holds"
+    if isinstance(inner, ltlnode.GloballyNode):
+        target = _inner(inner.operand, ctx.child(inline=True))
+        return f"it is not always the case that {target} holds"
+
+    # !!p => p  (double negation)
+    if isinstance(inner, ltlnode.NotNode):
+        return _inner(inner.operand, ctx)
+
+    # !(p & q) => De Morgan
+    if isinstance(inner, ltlnode.AndNode):
+        l = _inner(inner.left, ctx.child(inline=True))
+        r = _inner(inner.right, ctx.child(inline=True))
+        return f"not both {l} and {r}"
+
+    # !(p | q) => De Morgan
+    if isinstance(inner, ltlnode.OrNode):
+        l = _inner(inner.left, ctx.child(inline=True))
+        r = _inner(inner.right, ctx.child(inline=True))
+        return f"neither {l} nor {r}"
+
+    # !(p -> q)
+    if isinstance(inner, ltlnode.ImpliesNode):
+        l = _inner(inner.left, ctx.child(inline=True))
+        r = _inner(inner.right, ctx.child(inline=True))
+        return f"{l} holds, but {r} does not"
+
+    # Generic negation
+    if _is_lit(inner):
+        return f"'{inner.value}' does not hold"
+    target = _inner(inner, ctx.child(inline=True))
+    return f"it is not the case that {target}"
+
+
+# --- AND ------------------------------------------------------------------
+
+def _translate_and(node: ltlnode.AndNode, ctx: _Ctx) -> str:
+    # !p & !q  => neither ... nor
+    if isinstance(node.left, ltlnode.NotNode) and isinstance(node.right, ltlnode.NotNode):
+        ll = _inner(node.left.operand, ctx.child(inline=True))
+        rr = _inner(node.right.operand, ctx.child(inline=True))
+        return f"neither {ll} nor {rr}"
+
+    l = _inner(node.left, ctx.child(inline=True))
+    r = _inner(node.right, ctx.child(inline=True))
+    return f"both {l} and {r}"
+
+
+# --- OR -------------------------------------------------------------------
+
+def _translate_or(node: ltlnode.OrNode, ctx: _Ctx) -> str:
+    # !p | !q  => not both ... and
+    if isinstance(node.left, ltlnode.NotNode) and isinstance(node.right, ltlnode.NotNode):
+        ll = _inner(node.left.operand, ctx.child(inline=True))
+        rr = _inner(node.right.operand, ctx.child(inline=True))
+        return f"not both {ll} and {rr}"
+
+    l = _inner(node.left, ctx.child(inline=True))
+    r = _inner(node.right, ctx.child(inline=True))
+    return f"either {l} or {r}"
+
+
+# --- IMPLIES --------------------------------------------------------------
+
+def _translate_implies(node: ltlnode.ImpliesNode, ctx: _Ctx) -> str:
+    l = _inner(node.left, ctx.child(inline=True))
+    r = _inner(node.right, ctx.child(inline=True))
+
+    # p -> !q  (exclusion)
+    if isinstance(node.right, ltlnode.NotNode):
+        rr = _inner(node.right.operand, ctx.child(inline=True))
+        if _is_lit(node.left) and _is_lit(node.right.operand):
+            return f"{l} excludes {rr}"
+        return f"if {l} holds, then {rr} does not"
+
+    # !p -> q  (unless)
+    if isinstance(node.left, ltlnode.NotNode):
+        ll = _inner(node.left.operand, ctx.child(inline=True))
+        return f"{r} unless {ll}"
+
+    # (p & q) -> r
+    if isinstance(node.left, ltlnode.AndNode):
+        pl = _inner(node.left.left, ctx.child(inline=True))
+        pr = _inner(node.left.right, ctx.child(inline=True))
+        return f"if both {pl} and {pr}, then {r}"
+
+    # (p | q) -> r
+    if isinstance(node.left, ltlnode.OrNode):
+        pl = _inner(node.left.left, ctx.child(inline=True))
+        pr = _inner(node.left.right, ctx.child(inline=True))
+        return f"if either {pl} or {pr}, then {r}"
+
+    # p -> (q & r)
+    if isinstance(node.right, ltlnode.AndNode):
+        rl = _inner(node.right.left, ctx.child(inline=True))
+        rr = _inner(node.right.right, ctx.child(inline=True))
+        return f"if {l} holds, then both {rl} and {rr} follow"
+
+    # p -> (q | r)
+    if isinstance(node.right, ltlnode.OrNode):
+        rl = _inner(node.right.left, ctx.child(inline=True))
+        rr = _inner(node.right.right, ctx.child(inline=True))
+        return f"if {l} holds, then either {rl} or {rr}"
+
+    return f"if {l} holds, then {r}"
+
+
+# --- EQUIVALENCE ----------------------------------------------------------
+
+def _translate_equivalence(node: ltlnode.EquivalenceNode, ctx: _Ctx) -> str:
+    l = _inner(node.left, ctx.child(inline=True))
+    r = _inner(node.right, ctx.child(inline=True))
+    return f"{l} holds exactly when {r} holds"
+
+
+# --- GLOBALLY -------------------------------------------------------------
+
+def _translate_globally(node: ltlnode.GloballyNode, ctx: _Ctx) -> str:
+    inner = node.operand
+
+    # G(!p)  => "'p' never holds."
+    if isinstance(inner, ltlnode.NotNode):
+        target = _inner(inner.operand, ctx.child("globally", inline=True))
+        return f"{target} never holds"
+
+    # G(p -> ...) patterns
+    if isinstance(inner, ltlnode.ImpliesNode):
+        left = inner.left
+        right = inner.right
+
+        # -- G(p -> X p) or G(p -> G p):  persistence
+        if isinstance(right, (ltlnode.NextNode, ltlnode.GloballyNode)):
+            rhs_inner = right.operand
+            if _same_literal(left, rhs_inner):
+                lit = _lit(left)
+                return f"once {lit} becomes true, it remains true forever"
+
+        # -- G(p -> F q):  response pattern
+        if isinstance(right, ltlnode.FinallyNode):
+            # G((p U q) -> F r)
+            if isinstance(left, ltlnode.UntilNode):
+                p = _inner(left.left, ctx.child("globally", inline=True))
+                q = _inner(left.right, ctx.child("globally", inline=True))
+                r = _inner(right.operand, ctx.child("globally", inline=True))
+                return _join_sentences(
+                    f"suppose {p} holds continuously until {q} occurs",
+                    f"then {r} must eventually follow",
+                    "this applies every time such a situation arises",
+                )
+
+            lhs = _inner(left, ctx.child("globally", inline=True))
+            rhs = _inner(right.operand, ctx.child("globally", inline=True))
+            return f"whenever {lhs} holds, {rhs} must eventually follow"
+
+        # -- G(p -> X(F q)):  bounded response
+        if isinstance(right, ltlnode.NextNode) and isinstance(right.operand, ltlnode.FinallyNode):
+            lhs = _inner(left, ctx.child("globally", inline=True))
+            rhs = _inner(right.operand.operand, ctx.child("globally", inline=True))
+            return f"whenever {lhs} holds, starting from the very next step, {rhs} is guaranteed to eventually occur"
+
+        # -- G(p -> X q):  immediate response (not same literal)
+        if isinstance(right, ltlnode.NextNode):
+            lhs = _inner(left, ctx.child("globally", inline=True))
+            rhs = _inner(right.operand, ctx.child("globally", inline=True))
+            return f"whenever {lhs} holds, {rhs} must hold in the very next step"
+
+        # -- G(p -> (q U r)):  chain precedence
+        if isinstance(right, ltlnode.UntilNode):
+            lhs = _inner(left, ctx.child("globally", inline=True))
+            ul = _inner(right.left, ctx.child("globally", inline=True))
+            ur = _inner(right.right, ctx.child("globally", inline=True))
+            return f"whenever {lhs} holds, {ul} must hold until {ur} occurs"
+
+        # -- G(p -> (F q & F r)):  chain response
+        if isinstance(right, ltlnode.AndNode):
+            if isinstance(right.left, ltlnode.FinallyNode) and isinstance(right.right, ltlnode.FinallyNode):
+                lhs = _inner(left, ctx.child("globally", inline=True))
+                rl = _inner(right.left.operand, ctx.child("globally", inline=True))
+                rr = _inner(right.right.operand, ctx.child("globally", inline=True))
+                return f"whenever {lhs} holds, both {rl} and {rr} are guaranteed to eventually occur (though not necessarily at the same time)"
+
+        # -- G(p -> q):  generic rule
+        lhs = _inner(left, ctx.child("globally", inline=True))
+        rhs = _inner(right, ctx.child("globally", inline=True))
+        return f"whenever {lhs} holds, {rhs} must also hold"
+
+    # G(F p)  => recurrence / infinitely often
+    if isinstance(inner, ltlnode.FinallyNode):
+        fi = inner.operand
+        # G(F(p & q))
+        if isinstance(fi, ltlnode.AndNode):
+            l = _inner(fi.left, ctx.child("globally", inline=True))
+            r = _inner(fi.right, ctx.child("globally", inline=True))
+            return f"both {l} and {r} must occur together infinitely often"
+        target = _inner(fi, ctx.child("globally", inline=True))
+        return f"{target} must occur infinitely often"
+
+    # G(G(...)) => idempotent
+    if isinstance(inner, ltlnode.GloballyNode):
+        return _translate_globally(inner, ctx)
+
+    # G(p & q) / G(p | q) - simple
+    if isinstance(inner, ltlnode.AndNode):
+        l = _inner(inner.left, ctx.child("globally", inline=True))
+        r = _inner(inner.right, ctx.child("globally", inline=True))
+        return f"at all times, both {l} and {r} must hold"
+
+    if isinstance(inner, ltlnode.OrNode):
+        l = _inner(inner.left, ctx.child("globally", inline=True))
+        r = _inner(inner.right, ctx.child("globally", inline=True))
+        return f"at all times, either {l} or {r} must hold"
+
+    # G(literal) or G(complex)
+    target = _inner(inner, ctx.child("globally", inline=True))
+    if _is_lit(inner):
+        return f"{target} must hold at all times"
+    return f"at all times, {target}"
+
+
+# --- FINALLY --------------------------------------------------------------
+
+def _translate_finally(node: ltlnode.FinallyNode, ctx: _Ctx) -> str:
+    inner = node.operand
+
+    # F(F(...)) => idempotent
+    if isinstance(inner, ltlnode.FinallyNode):
+        return _translate_finally(inner, ctx)
+
+    # F(G(...)) patterns
+    if isinstance(inner, ltlnode.GloballyNode):
+        gi = inner.operand
+
+        # F(G(!p))
+        if isinstance(gi, ltlnode.NotNode):
+            target = _inner(gi.operand, ctx.child("finally", inline=True))
+            return f"eventually, a point is reached after which {target} never holds again"
+
+        # F(G(p -> F q))
+        if isinstance(gi, ltlnode.ImpliesNode) and isinstance(gi.right, ltlnode.FinallyNode):
+            lhs = _inner(gi.left, ctx.child("finally", inline=True))
+            rhs = _inner(gi.right.operand, ctx.child("finally", inline=True))
+            return _join_sentences(
+                "eventually, a stable regime is reached",
+                f"after that point, whenever {lhs} holds, {rhs} must eventually follow",
+            )
+
+        # F(G(p -> q))  generic stable rule
+        if isinstance(gi, ltlnode.ImpliesNode):
+            lhs = _inner(gi.left, ctx.child("finally", inline=True))
+            rhs = _inner(gi.right, ctx.child("finally", inline=True))
+            return _join_sentences(
+                "eventually, a stable regime is reached",
+                f"after that point, whenever {lhs} holds, {rhs} must also hold",
+            )
+
+        # F(G(p & q))
+        if isinstance(gi, ltlnode.AndNode):
+            l = _inner(gi.left, ctx.child("finally", inline=True))
+            r = _inner(gi.right, ctx.child("finally", inline=True))
+            return f"eventually, both {l} and {r} become true and remain true forever"
+
+        # F(G p) generic persistence / stability
+        target = _inner(gi, ctx.child("finally", inline=True))
+        return f"eventually, {target} becomes true and remains true forever"
+
+    # F(!p)
+    if isinstance(inner, ltlnode.NotNode):
+        target = _inner(inner.operand, ctx.child("finally", inline=True))
+        return f"eventually, {target} will cease to hold"
+
+    # F(p & G q)  or  F(G q & p)  -  persistence after trigger
+    if isinstance(inner, ltlnode.AndNode):
+        l, r = inner.left, inner.right
+        if isinstance(r, ltlnode.GloballyNode):
+            trigger = _inner(l, ctx.child("finally", inline=True))
+            persist = _inner(r.operand, ctx.child("finally", inline=True))
+            return _join_sentences(
+                f"eventually, {trigger} occurs",
+                f"from that point on, {persist} holds forever",
+            )
+        if isinstance(l, ltlnode.GloballyNode):
+            trigger = _inner(r, ctx.child("finally", inline=True))
+            persist = _inner(l.operand, ctx.child("finally", inline=True))
+            return _join_sentences(
+                f"eventually, {trigger} occurs",
+                f"from that point on, {persist} holds forever",
+            )
+        # F(p & q) simple simultaneity
+        ll = _inner(l, ctx.child("finally", inline=True))
+        rr = _inner(r, ctx.child("finally", inline=True))
+        return f"eventually, both {ll} and {rr} will be true at the same time"
+
+    # F(p -> G q)  trigger-to-permanence
+    if isinstance(inner, ltlnode.ImpliesNode) and isinstance(inner.right, ltlnode.GloballyNode):
+        trigger = _inner(inner.left, ctx.child("finally", inline=True))
+        result = _inner(inner.right.operand, ctx.child("finally", inline=True))
+        return f"eventually, once {trigger} holds, {result} will hold forever after"
+
+    # F(literal)
+    if _is_lit(inner):
+        return f"{_lit(inner)} must eventually occur"
+
+    # Generic F(...)
+    target = _inner(inner, ctx.child("finally", inline=True))
+    return f"eventually, {target}"
+
+
+# --- NEXT -----------------------------------------------------------------
+
+def _count_next_chain(node: ltlnode.LTLNode) -> tuple[int, ltlnode.LTLNode]:
+    """Count consecutive X nodes; return (count, innermost operand)."""
+    n = 0
+    while isinstance(node, ltlnode.NextNode):
+        n += 1
+        node = node.operand
+    return n, node
+
+
+def _translate_next(node: ltlnode.NextNode, ctx: _Ctx) -> str:
+    steps, core = _count_next_chain(node)
+
+    # X(p U q) => "starting from the next step, ..."
+    if steps == 1 and isinstance(core, ltlnode.UntilNode):
+        l = _inner(core.left, ctx.child("next", inline=True))
+        r = _inner(core.right, ctx.child("next", inline=True))
+        return f"starting from the next step, {l} holds until {r} occurs"
+
+    # X(F q) => "starting from the next step, q must eventually occur"
+    if steps == 1 and isinstance(core, ltlnode.FinallyNode):
+        target = _inner(core.operand, ctx.child("next", inline=True))
+        return f"starting from the next step, {target} must eventually occur"
+
+    target = _inner(core, ctx.child("next", inline=True))
+
+    if steps == 1:
+        return f"in the very next step, {target}"
+    return f"in {steps} steps, {target}"
+
+
+# --- UNTIL ----------------------------------------------------------------
+
+def _translate_until(node: ltlnode.UntilNode, ctx: _Ctx) -> str:
+    l_node, r_node = node.left, node.right
+
+    # (G p) U (F q) => obligation until release
+    if isinstance(l_node, ltlnode.GloballyNode) and isinstance(r_node, ltlnode.FinallyNode):
+        l = _inner(l_node.operand, ctx.child("until", inline=True))
+        r = _inner(r_node.operand, ctx.child("until", inline=True))
+        return _join_sentences(
+            f"{l} must hold continuously at all times",
+            f"this obligation persists until {r} eventually occurs",
+        )
+
+    # (p U q) U r => nested until
+    if isinstance(l_node, ltlnode.UntilNode):
+        p = _inner(l_node.left, ctx.child("until", inline=True))
+        q = _inner(l_node.right, ctx.child("until", inline=True))
+        r = _inner(r_node, ctx.child("until", inline=True))
+        return _join_sentences(
+            f"first, {p} holds until {q} occurs",
+            f"that whole phase lasts until {r} occurs",
+        )
+
+    l = _inner(l_node, ctx.child("until", inline=True))
+    r = _inner(r_node, ctx.child("until", inline=True))
+    return f"{l} must hold until {r} occurs"
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def translate(node: ltlnode.LTLNode) -> str:
+    """Translate an LTL AST node into requirements-style English prose.
+
+    The returned string is a complete, capitalized, period-terminated
+    paragraph (possibly multiple sentences for complex formulas).
+    """
+    raw = _inner(node, _Ctx())
+    # If _inner already produced joined sentences (with periods inside),
+    # just ensure it's clean.
+    raw = raw.strip()
+    # Capitalize and terminate.
+    result = _capitalize(raw)
+    if result and result[-1] not in ".!?":
+        result += "."
+    return result
diff --git a/src/ltltoeng_structured.py b/src/ltltoeng_structured.py
new file mode 100644
index 0000000..35284e6
--- /dev/null
+++ b/src/ltltoeng_structured.py
@@ -0,0 +1,552 @@
+"""Structured, hierarchical English translation for LTL formulas.
+
+Instead of forcing every LTL formula into a single flat sentence (as ltltoeng
+does), this module produces indented, bullet-point explanations that reveal the
+structure of the formula.  Simple formulas still get a single line; complex ones
+are broken into labeled components (Condition, Response, Requirement, Rule).
+
+Public API
+----------
+translate(node) -> str
+    Given an LTLNode AST, return a human-readable English string.
+    For simple formulas the result is a single line.
+    For complex formulas the result is a multi-line, indented explanation.
+"""
+
+from __future__ import annotations
+
+import ltlnode
+
+
+# ---------------------------------------------------------------------------
+# Complexity metric
+# ---------------------------------------------------------------------------
+
+def _temporal_depth(node: ltlnode.LTLNode) -> int:
+    """Return the nesting depth of temporal operators in *node*.
+
+    Temporal operators are G, F, X, and U.  Propositional connectives (&, |,
+    ->, <->, !) do *not* increase temporal depth -- they only propagate the
+    maximum depth of their children.
+
+    A bare literal has depth 0.
+    """
+    if isinstance(node, ltlnode.LiteralNode):
+        return 0
+
+    if isinstance(node, (ltlnode.GloballyNode, ltlnode.FinallyNode,
+                         ltlnode.NextNode)):
+        return 1 + _temporal_depth(node.operand)
+
+    if isinstance(node, ltlnode.UntilNode):
+        return 1 + max(_temporal_depth(node.left), _temporal_depth(node.right))
+
+    # Propositional connectives: Not, And, Or, Implies, Equivalence
+    if isinstance(node, ltlnode.NotNode):
+        return _temporal_depth(node.operand)
+
+    if isinstance(node, (ltlnode.AndNode, ltlnode.OrNode,
+                         ltlnode.ImpliesNode, ltlnode.EquivalenceNode)):
+        return max(_temporal_depth(node.left), _temporal_depth(node.right))
+
+    # Fallback for unknown node types
+    return 0
+
+
+# The threshold at which we switch from single-line to structured output.
+_COMPLEXITY_THRESHOLD = 2
+
+
+def _is_simple(node: ltlnode.LTLNode) -> bool:
+    """True when *node* is simple enough for a single-line translation."""
+    return _temporal_depth(node) < _COMPLEXITY_THRESHOLD
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_INDENT = "  "
+
+
+def _indent(text: str, level: int) -> str:
+    """Indent every line of *text* by *level* levels."""
+    prefix = _INDENT * level
+    return "\n".join(prefix + line for line in text.splitlines())
+
+
+def _bullet(text: str, level: int) -> str:
+    """Return *text* as a bulleted line at the given indentation level."""
+    prefix = _INDENT * level
+    return f"{prefix}- {text}"
+
+
+def _quote(node: ltlnode.LTLNode) -> str:
+    """Quote a literal node's value.  Non-literals get their LTL string."""
+    if isinstance(node, ltlnode.LiteralNode):
+        return f"'{node.value}'"
+    return str(node)
+
+
+def _count_next_chain(node: ltlnode.LTLNode):
+    """Count consecutive X (Next) wrappers.  Returns (count, inner_node)."""
+    n = 0
+    while isinstance(node, ltlnode.NextNode):
+        n += 1
+        node = node.operand
+    return n, node
+
+
+def _steps_phrase(count: int) -> str:
+    """Human-readable phrase like '3 steps from now'."""
+    if count == 1:
+        return "the next step"
+    return f"{count} steps from now"
+
+
+# ---------------------------------------------------------------------------
+# Single-line (flat) translator  -- used for simple sub-formulas
+# ---------------------------------------------------------------------------
+
+def _flat(node: ltlnode.LTLNode) -> str:
+    """Produce a concise, single-line English translation of *node*.
+
+    This is deliberately kept simple and deterministic (no randomised
+    alternatives) so that structured output is reproducible in tests.
+    """
+    if isinstance(node, ltlnode.LiteralNode):
+        return f"'{node.value}'"
+
+    # -- unary temporal --
+    if isinstance(node, ltlnode.GloballyNode):
+        inner = node.operand
+        # G(!p) -> never pattern
+        if isinstance(inner, ltlnode.NotNode):
+            if isinstance(inner.operand, ltlnode.LiteralNode):
+                return f"'{inner.operand.value}' must never hold"
+            return f"it is never the case that {_flat(inner.operand)}"
+        return f"at all times, {_flat(inner)}"
+
+    if isinstance(node, ltlnode.FinallyNode):
+        return f"eventually, {_flat(node.operand)}"
+
+    if isinstance(node, ltlnode.NextNode):
+        steps, inner = _count_next_chain(node)
+        if steps == 1:
+            return f"in the next step, {_flat(inner)}"
+        return f"{steps} steps from now, {_flat(inner)}"
+
+    # -- binary temporal --
+    if isinstance(node, ltlnode.UntilNode):
+        return f"{_flat(node.left)} until {_flat(node.right)}"
+
+    # -- propositional --
+    if isinstance(node, ltlnode.NotNode):
+        inner = node.operand
+        if isinstance(inner, ltlnode.LiteralNode):
+            return f"not '{inner.value}'"
+        # !(F p) -> never
+        if isinstance(inner, ltlnode.FinallyNode):
+            return f"{_flat(inner.operand)} never occurs"
+        # !!p -> double negation elimination
+        if isinstance(inner, ltlnode.NotNode):
+            return _flat(inner.operand)
+        # !(p & q) -> not both
+        if isinstance(inner, ltlnode.AndNode):
+            return f"not both {_flat(inner.left)} and {_flat(inner.right)}"
+        # !(p | q) -> neither/nor
+        if isinstance(inner, ltlnode.OrNode):
+            return f"neither {_flat(inner.left)} nor {_flat(inner.right)}"
+        # !(p -> q) -> p but not q
+        if isinstance(inner, ltlnode.ImpliesNode):
+            return f"{_flat(inner.left)}, but not {_flat(inner.right)}"
+        return f"it is not the case that {_flat(inner)}"
+
+    if isinstance(node, ltlnode.AndNode):
+        # !p & !q -> neither/nor
+        if isinstance(node.left, ltlnode.NotNode) and isinstance(node.right, ltlnode.NotNode):
+            return f"neither {_flat(node.left.operand)} nor {_flat(node.right.operand)}"
+        return f"{_flat(node.left)} and {_flat(node.right)}"
+
+    if isinstance(node, ltlnode.OrNode):
+        # !p | !q -> not both
+        if isinstance(node.left, ltlnode.NotNode) and isinstance(node.right, ltlnode.NotNode):
+            return f"not both {_flat(node.left.operand)} and {_flat(node.right.operand)}"
+        return f"{_flat(node.left)} or {_flat(node.right)}"
+
+    if isinstance(node, ltlnode.ImpliesNode):
+        return f"if {_flat(node.left)}, then {_flat(node.right)}"
+
+    if isinstance(node, ltlnode.EquivalenceNode):
+        return f"{_flat(node.left)} if and only if {_flat(node.right)}"
+
+    # Fallback
+    return str(node)
+
+
+# ---------------------------------------------------------------------------
+# Structured (hierarchical) translator
+# ---------------------------------------------------------------------------
+
+def _structured(node: ltlnode.LTLNode, depth: int = 0) -> str:
+    """Recursively produce a structured, multi-line translation.
+
+    *depth* tracks the current bullet indentation level.
+    When a sub-formula is simple enough it collapses to a single line via
+    ``_flat``.
+    """
+
+    # ---- Leaf / simple sub-trees -> flat ----
+    if _is_simple(node):
+        return _flat(node)
+
+    # ---- G(...) ----
+    if isinstance(node, ltlnode.GloballyNode):
+        inner = node.operand
+        return _translate_globally(inner, depth)
+
+    # ---- F(...) ----
+    if isinstance(node, ltlnode.FinallyNode):
+        inner = node.operand
+        return _translate_finally(inner, depth)
+
+    # ---- X(...) ----
+    if isinstance(node, ltlnode.NextNode):
+        steps, inner = _count_next_chain(node)
+        return _translate_next(steps, inner, depth)
+
+    # ---- p U q ----
+    if isinstance(node, ltlnode.UntilNode):
+        return _translate_until(node, depth)
+
+    # ---- p -> q ----
+    if isinstance(node, ltlnode.ImpliesNode):
+        return _translate_implies(node, depth)
+
+    # ---- p <-> q ----
+    if isinstance(node, ltlnode.EquivalenceNode):
+        return _translate_equivalence(node, depth)
+
+    # ---- p & q ----
+    if isinstance(node, ltlnode.AndNode):
+        return _translate_and(node, depth)
+
+    # ---- p | q ----
+    if isinstance(node, ltlnode.OrNode):
+        return _translate_or(node, depth)
+
+    # ---- !p ----
+    if isinstance(node, ltlnode.NotNode):
+        return _translate_not(node, depth)
+
+    return _flat(node)
+
+
+# ---- Globally ----
+
+def _translate_globally(inner: ltlnode.LTLNode, depth: int) -> str:
+    # G(p -> F q)  -- response pattern
+    if isinstance(inner, ltlnode.ImpliesNode) and isinstance(inner.right, ltlnode.FinallyNode):
+        trigger = inner.left
+        response = inner.right.operand
+        if _is_simple(trigger) and _is_simple(response):
+            return f"Whenever {_flat(trigger)} holds, {_flat(response)} must eventually follow."
+        lines = ["At all times, the following rule holds:"]
+        lines.append(_bullet(f"When {_flat(trigger)} occurs:", depth + 1))
+        lines.append(_bullet(f"{_flat(response)} must eventually occur", depth + 2))
+        return "\n".join(lines)
+
+    # G(p -> X(F q))  -- bounded response
+    if (isinstance(inner, ltlnode.ImpliesNode)
+            and isinstance(inner.right, ltlnode.NextNode)
+            and isinstance(inner.right.operand, ltlnode.FinallyNode)):
+        trigger = inner.left
+        response = inner.right.operand.operand
+        lines = ["At all times, the following rule holds:"]
+        lines.append(_bullet(f"When {_flat(trigger)} occurs:", depth + 1))
+        lines.append(_bullet(f"Starting from the next step, {_flat(response)} must eventually occur", depth + 2))
+        return "\n".join(lines)
+
+    # G(p -> X q)  -- immediate next-step response
+    if isinstance(inner, ltlnode.ImpliesNode) and isinstance(inner.right, ltlnode.NextNode):
+        trigger = inner.left
+        response = inner.right.operand
+        if _is_simple(trigger) and _is_simple(response):
+            return f"At all times, when {_flat(trigger)} holds, {_flat(response)} must hold in the next step."
+        lines = ["At all times, the following rule holds:"]
+        lines.append(_bullet(f"When {_flat(trigger)} occurs:", depth + 1))
+        lines.append(_bullet(f"{_flat(response)} must hold in the next step", depth + 2))
+        return "\n".join(lines)
+
+    # G(p -> (q U r))  -- chain precedence
+    if isinstance(inner, ltlnode.ImpliesNode) and isinstance(inner.right, ltlnode.UntilNode):
+        trigger = inner.left
+        held = inner.right.left
+        goal = inner.right.right
+        lines = ["At all times, the following rule holds:"]
+        lines.append(_bullet(f"When {_flat(trigger)} occurs:", depth + 1))
+        lines.append(_bullet(f"{_flat(held)} must hold until {_flat(goal)} occurs", depth + 2))
+        return "\n".join(lines)
+
+    # G(p -> (F q & F r))  -- chain response (conjunction of eventually)
+    if (isinstance(inner, ltlnode.ImpliesNode)
+            and isinstance(inner.right, ltlnode.AndNode)
+            and isinstance(inner.right.left, ltlnode.FinallyNode)
+            and isinstance(inner.right.right, ltlnode.FinallyNode)):
+        trigger = inner.left
+        resp_a = inner.right.left.operand
+        resp_b = inner.right.right.operand
+        lines = ["At all times, the following rule holds:"]
+        lines.append(_bullet(f"When {_flat(trigger)} occurs, two responses are required:", depth + 1))
+        lines.append(_bullet(f"{_flat(resp_a)} must eventually occur", depth + 2))
+        lines.append(_bullet(f"{_flat(resp_b)} must eventually occur", depth + 2))
+        return "\n".join(lines)
+
+    # G(p -> q)  -- generic implication under G
+    if isinstance(inner, ltlnode.ImpliesNode):
+        trigger = inner.left
+        consequence = inner.right
+        if _is_simple(trigger) and _is_simple(consequence):
+            return f"At all times, if {_flat(trigger)}, then {_flat(consequence)}."
+        lines = ["At all times, the following rule holds:"]
+        lines.append(_bullet(f"Condition: {_structured(trigger, depth + 1)}", depth + 1))
+        lines.append(_bullet(f"Response: {_structured(consequence, depth + 1)}", depth + 1))
+        return "\n".join(lines)
+
+    # G(F p)  -- recurrence / infinitely often
+    if isinstance(inner, ltlnode.FinallyNode):
+        return f"{_flat(inner.operand)} must occur infinitely often."
+
+    # G(!p)  -- never
+    if isinstance(inner, ltlnode.NotNode):
+        negated = inner.operand
+        if isinstance(negated, ltlnode.LiteralNode):
+            return f"'{negated.value}' must never hold."
+        return f"It is never the case that {_flat(negated)}."
+
+    # G(p & q) -- invariant conjunction
+    if isinstance(inner, ltlnode.AndNode):
+        lines = ["At all times, the following must hold simultaneously:"]
+        lines.append(_bullet(_structured(inner.left, depth + 1), depth + 1))
+        lines.append(_bullet(_structured(inner.right, depth + 1), depth + 1))
+        return "\n".join(lines)
+
+    # G(p | q) -- invariant disjunction
+    if isinstance(inner, ltlnode.OrNode):
+        lines = ["At all times, at least one of the following must hold:"]
+        lines.append(_bullet(_structured(inner.left, depth + 1), depth + 1))
+        lines.append(_bullet(_structured(inner.right, depth + 1), depth + 1))
+        return "\n".join(lines)
+
+    # Generic G(...)
+    inner_text = _structured(inner, depth + 1)
+    if "\n" in inner_text:
+        lines = ["At all times, the following holds:"]
+        for line in inner_text.splitlines():
+            lines.append(_bullet(line.lstrip(), depth + 1))
+        return "\n".join(lines)
+    return f"At all times, {inner_text}."
+
+
+# ---- Finally ----
+
+def _translate_finally(inner: ltlnode.LTLNode, depth: int) -> str:
+    # F(G(p -> F q))  -- eventual permanent rule
+    if (isinstance(inner, ltlnode.GloballyNode)
+            and isinstance(inner.operand, ltlnode.ImpliesNode)
+            and isinstance(inner.operand.right, ltlnode.FinallyNode)):
+        trigger = inner.operand.left
+        response = inner.operand.right.operand
+        lines = ["Eventually, a permanent rule takes effect:"]
+        lines.append(_bullet(
+            f"From that point on, whenever {_flat(trigger)} holds, "
+            f"{_flat(response)} must eventually follow.",
+            depth + 1))
+        return "\n".join(lines)
+
+    # F(G p)  -- persistence / stability
+    if isinstance(inner, ltlnode.GloballyNode):
+        g_inner = inner.operand
+        if _is_simple(g_inner):
+            return f"Eventually, {_flat(g_inner)} becomes permanently true."
+        lines = ["Eventually, a permanent state is reached:"]
+        for sub in _structured(g_inner, depth + 1).splitlines():
+            lines.append(_bullet(sub.lstrip(), depth + 1))
+        return "\n".join(lines)
+
+    # F(!p)
+    if isinstance(inner, ltlnode.NotNode) and isinstance(inner.operand, ltlnode.LiteralNode):
+        return f"Eventually, '{inner.operand.value}' will cease to hold."
+
+    # F(p & q)
+    if isinstance(inner, ltlnode.AndNode):
+        lines = ["Eventually, the following will all be true simultaneously:"]
+        lines.append(_bullet(_structured(inner.left, depth + 1), depth + 1))
+        lines.append(_bullet(_structured(inner.right, depth + 1), depth + 1))
+        return "\n".join(lines)
+
+    # Generic F(...)
+    inner_text = _structured(inner, depth + 1)
+    if "\n" in inner_text:
+        lines = ["Eventually, the following will hold:"]
+        for line in inner_text.splitlines():
+            lines.append(_bullet(line.lstrip(), depth + 1))
+        return "\n".join(lines)
+    return f"Eventually, {inner_text}."
+
+
+# ---- Next ----
+
+def _translate_next(steps: int, inner: ltlnode.LTLNode, depth: int) -> str:
+    timing = _steps_phrase(steps)
+
+    # X(F q) or XX...(F q)
+    if isinstance(inner, ltlnode.FinallyNode):
+        resp = inner.operand
+        return f"Starting from {timing}, {_flat(resp)} must eventually occur."
+
+    inner_text = _structured(inner, depth + 1)
+    if "\n" in inner_text:
+        lines = [f"In {timing}:"]
+        for line in inner_text.splitlines():
+            lines.append(_bullet(line.lstrip(), depth + 1))
+        return "\n".join(lines)
+    return f"In {timing}, {inner_text}."
+
+
+# ---- Until ----
+
+def _translate_until(node: ltlnode.UntilNode, depth: int) -> str:
+    left = node.left
+    right = node.right
+
+    # (G p) U (F q)
+    if isinstance(left, ltlnode.GloballyNode) and isinstance(right, ltlnode.FinallyNode):
+        l_eng = _flat(left.operand)
+        r_eng = _flat(right.operand)
+        lines = [f"{l_eng} must hold at all times."]
+        lines.append(f"This requirement continues until {r_eng} eventually occurs.")
+        return "\n".join(lines)
+
+    # (p U q) where both sides are simple
+    if _is_simple(left) and _is_simple(right):
+        return f"{_flat(left)} holds until {_flat(right)} occurs"
+
+    lines = ["The following 'until' relationship holds:"]
+    lines.append(_bullet(f"Maintained: {_structured(left, depth + 1)}", depth + 1))
+    lines.append(_bullet(f"Until: {_structured(right, depth + 1)}", depth + 1))
+    return "\n".join(lines)
+
+
+# ---- Implies ----
+
+def _translate_implies(node: ltlnode.ImpliesNode, depth: int) -> str:
+    left = node.left
+    right = node.right
+
+    # X^n p -> X^m q  -- aligned next chains
+    left_steps, left_core = _count_next_chain(left)
+    right_steps, right_core = _count_next_chain(right)
+    if left_steps >= 1 and right_steps >= 1:
+        lines = [f"If {_flat(left_core)} holds in {_steps_phrase(left_steps)}, then:"]
+        lines.append(_bullet(
+            f"{_flat(right_core)} must hold {_steps_phrase(right_steps)}",
+            depth + 1))
+        return "\n".join(lines)
+
+    if _is_simple(left) and _is_simple(right):
+        return f"If {_flat(left)}, then {_flat(right)}."
+
+    cond_text = _structured(left, depth + 1)
+    resp_text = _structured(right, depth + 1)
+
+    lines = []
+    if "\n" in cond_text:
+        lines.append("If the following condition holds:")
+        for l in cond_text.splitlines():
+            lines.append(_bullet(l.lstrip(), depth + 1))
+    else:
+        lines.append(f"If {cond_text}, then:")
+
+    if "\n" in resp_text:
+        for l in resp_text.splitlines():
+            lines.append(_bullet(l.lstrip(), depth + 1))
+    else:
+        lines.append(_bullet(resp_text, depth + 1))
+
+    return "\n".join(lines)
+
+
+# ---- Equivalence ----
+
+def _translate_equivalence(node: ltlnode.EquivalenceNode, depth: int) -> str:
+    left = node.left
+    right = node.right
+
+    if _is_simple(left) and _is_simple(right):
+        return f"{_flat(left)} if and only if {_flat(right)}."
+
+    lines = ["The following two statements are equivalent:"]
+    lines.append(_bullet(f"Statement A: {_structured(left, depth + 1)}", depth + 1))
+    lines.append(_bullet(f"Statement B: {_structured(right, depth + 1)}", depth + 1))
+    return "\n".join(lines)
+
+
+# ---- And ----
+
+def _translate_and(node: ltlnode.AndNode, depth: int) -> str:
+    left = node.left
+    right = node.right
+
+    if _is_simple(left) and _is_simple(right):
+        return f"{_flat(left)} and {_flat(right)}"
+
+    lines = ["All of the following must hold:"]
+    lines.append(_bullet(_structured(left, depth + 1), depth + 1))
+    lines.append(_bullet(_structured(right, depth + 1), depth + 1))
+    return "\n".join(lines)
+
+
+# ---- Or ----
+
+def _translate_or(node: ltlnode.OrNode, depth: int) -> str:
+    left = node.left
+    right = node.right
+
+    if _is_simple(left) and _is_simple(right):
+        return f"{_flat(left)} or {_flat(right)}"
+
+    lines = ["At least one of the following must hold:"]
+    lines.append(_bullet(_structured(left, depth + 1), depth + 1))
+    lines.append(_bullet(_structured(right, depth + 1), depth + 1))
+    return "\n".join(lines)
+
+
+# ---- Not ----
+
+def _translate_not(node: ltlnode.NotNode, depth: int) -> str:
+    inner = node.operand
+
+    if isinstance(inner, ltlnode.LiteralNode):
+        return f"not '{inner.value}'"
+
+    inner_text = _structured(inner, depth + 1)
+    if "\n" in inner_text:
+        lines = ["It is NOT the case that:"]
+        for line in inner_text.splitlines():
+            lines.append(_bullet(line.lstrip(), depth + 1))
+        return "\n".join(lines)
+    return f"It is not the case that {inner_text}."
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def translate(node: ltlnode.LTLNode) -> str:
+    """Translate an LTL AST node into structured English.
+
+    Simple formulas (temporal depth < 2) produce a single line.
+    Complex formulas produce indented, bullet-point explanations.
+    """
+    return _structured(node, depth=0)
diff --git a/test/test_contextualized.py b/test/test_contextualized.py
new file mode 100644
index 0000000..c34ba3b
--- /dev/null
+++ b/test/test_contextualized.py
@@ -0,0 +1,208 @@
+"""Test the contextualized (Wason-style) LTL translator.
+
+Run with:
+    python -m pytest test/test_contextualized.py -v -s
+"""
+
+import unittest
+import sys
+import os
+import random
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
+from unittest.mock import MagicMock
+sys.modules['spot'] = MagicMock()
+
+from ltlnode import parse_ltl_string
+import ltltoeng
+import ltltoeng_prose as prose
+import ltltoeng_contextualized as ctx
+
+
+FORMULAS = [
+    # (formula, description)
+    ("G p",                   "Invariant"),
+    ("F p",                   "Liveness"),
+    ("G !p",                  "Safety / never"),
+    ("G (p -> F q)",          "Response"),
+    ("G (p -> X q)",          "Immediate response"),
+    ("G(p -> (F q & F r))",   "Chain response"),
+    ("G(p -> (q U r))",       "Chain precedence"),
+    ("F (G p)",               "Persistence"),
+    ("G (F p)",               "Recurrence"),
+    ("(G p) U (F q)",         "Obligation until release"),
+    ("G(p -> X p)",           "Once true, stays true"),
+    ("!(F p)",                "Impossibility"),
+    ("p -> q",                "Simple implication"),
+    ("G(p -> X(F q))",        "Bounded response"),
+    ("F(G(p -> F q))",        "Eventual stable response"),
+    ("G((p U q) -> F r)",     "Until-triggered response"),
+    ("!(p & q)",              "Mutual exclusion"),
+]
+
+
+class TestContextualizedPrintComparison(unittest.TestCase):
+    """Print abstract vs. contextualized translations for all 4 themes."""
+
+    def setUp(self):
+        random.seed(42)
+
+    def test_print_all_themes(self):
+        print("\n" + "=" * 110)
+        print("ABSTRACT vs. CONTEXTUALIZED TRANSLATIONS  (Wason-style)")
+        print("=" * 110)
+
+        for formula_str, desc in FORMULAS:
+            node = parse_ltl_string(formula_str)
+
+            random.seed(42)
+            abstract = prose.translate(parse_ltl_string(formula_str))
+            lights   = ctx.translate(parse_ltl_string(formula_str), ctx.THEMES["lights"])
+            robot    = ctx.translate(parse_ltl_string(formula_str), ctx.THEMES["robot"])
+            traffic  = ctx.translate(parse_ltl_string(formula_str), ctx.THEMES["traffic"])
+
+            print(f"\n{'─' * 110}")
+            print(f"  {formula_str:30s}  ({desc})")
+            print(f"{'─' * 110}")
+            print(f"  Abstract:  {abstract}")
+            print(f"  Lights:    {lights}")
+            print(f"  Robot:     {robot}")
+            print(f"  Traffic:   {traffic}")
+
+        print("\n" + "=" * 110)
+
+
+class TestLightsTheme(unittest.TestCase):
+    """Verify the lights theme produces concrete, accurate translations."""
+
+    def setUp(self):
+        random.seed(42)
+        self.theme = ctx.THEMES["lights"]
+
+    def _tr(self, formula):
+        return ctx.translate(parse_ltl_string(formula), self.theme)
+
+    def test_globally_literal(self):
+        result = self._tr("G p")
+        self.assertIn("red light", result.lower())
+        self.assertIn("all times", result.lower())
+
+    def test_finally_literal(self):
+        result = self._tr("F p")
+        self.assertIn("red light", result.lower())
+        self.assertIn("eventually", result.lower())
+
+    def test_never(self):
+        result = self._tr("G !p")
+        self.assertIn("red light", result.lower())
+        self.assertIn("never", result.lower())
+
+    def test_response_uses_event_form(self):
+        """G(p -> F q) should use 'turns on' not just 'is on'."""
+        result = self._tr("G(p -> F q)")
+        self.assertIn("red light turns on", result.lower())
+        self.assertIn("green light", result.lower())
+        self.assertIn("eventually", result.lower())
+
+    def test_immediate_response(self):
+        result = self._tr("G(p -> X q)")
+        self.assertIn("red light turns on", result.lower())
+        self.assertIn("green light", result.lower())
+        self.assertIn("next step", result.lower())
+
+    def test_chain_response_mentions_both(self):
+        result = self._tr("G(p -> (F q & F r))")
+        self.assertIn("red light", result.lower())
+        self.assertIn("green light", result.lower())
+        self.assertIn("blue light", result.lower())
+
+    def test_persistence(self):
+        result = self._tr("G(p -> X p)")
+        self.assertIn("red light", result.lower())
+        self.assertIn("forever", result.lower())
+
+    def test_recurrence(self):
+        result = self._tr("G(F p)")
+        self.assertIn("red light", result.lower())
+        self.assertIn("over and over", result.lower())
+
+    def test_mutual_exclusion(self):
+        result = self._tr("!(p & q)")
+        self.assertIn("red light", result.lower())
+        self.assertIn("green light", result.lower())
+
+    def test_all_end_with_period(self):
+        for formula_str, _ in FORMULAS:
+            result = self._tr(formula_str)
+            self.assertTrue(result.endswith("."),
+                            f"{formula_str} => '{result}' missing period")
+
+    def test_all_start_capitalized(self):
+        for formula_str, _ in FORMULAS:
+            result = self._tr(formula_str)
+            self.assertTrue(result[0].isupper(),
+                            f"{formula_str} => '{result}' not capitalized")
+
+    def test_no_abstract_quotes(self):
+        """Contextualized output should never contain raw 'p', 'q', 'r' quotes."""
+        for formula_str, _ in FORMULAS:
+            result = self._tr(formula_str)
+            for lit in ["'p'", "'q'", "'r'"]:
+                self.assertNotIn(lit, result,
+                                 f"{formula_str} => still has abstract literal {lit}: {result}")
+
+
+class TestRobotTheme(unittest.TestCase):
+
+    def setUp(self):
+        random.seed(42)
+        self.theme = ctx.THEMES["robot"]
+
+    def _tr(self, formula):
+        return ctx.translate(parse_ltl_string(formula), self.theme)
+
+    def test_response_pattern(self):
+        result = self._tr("G(p -> F q)")
+        self.assertIn("robot", result.lower())
+        self.assertIn("goal", result.lower())
+
+    def test_never(self):
+        result = self._tr("G !p")
+        self.assertIn("robot", result.lower())
+        self.assertIn("never", result.lower())
+
+
+class TestCustomTheme(unittest.TestCase):
+    """Verify that custom themes work correctly."""
+
+    def test_custom_theme(self):
+        elevator = ctx.Theme(
+            name="Elevator",
+            description="An elevator system",
+            literals={
+                "p": ("the door is open",   "the door is closed"),
+                "q": ("the elevator is moving", "the elevator is stopped"),
+            },
+            event_form={
+                "p": ("the door opens",   "the door closes"),
+                "q": ("the elevator starts moving", "the elevator stops"),
+            },
+        )
+
+        # Safety: door must never be open while moving
+        # G(q -> !p)  "whenever elevator moves, door must not be open"
+        node = parse_ltl_string("G(q -> !p)")
+        result = ctx.translate(node, elevator)
+        self.assertIn("elevator", result.lower())
+        self.assertIn("door", result.lower())
+
+        # Response: if door opens, elevator must eventually move
+        node = parse_ltl_string("G(p -> F q)")
+        result = ctx.translate(node, elevator)
+        self.assertIn("door opens", result.lower())
+        self.assertIn("elevator", result.lower())
+        self.assertIn("eventually", result.lower())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/test_translation_comparison.py b/test/test_translation_comparison.py
new file mode 100644
index 0000000..95c504b
--- /dev/null
+++ b/test/test_translation_comparison.py
@@ -0,0 +1,306 @@
+"""Side-by-side comparison of three LTL-to-English translation approaches.
+
+Run with:
+    python -m pytest test/test_translation_comparison.py -v -s
+
+The -s flag is important to see the printed comparison tables.
+"""
+
+import unittest
+import sys
+import os
+import random
+
+# Add the src directory to sys.path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
+
+# Mock spot module to avoid import error
+from unittest.mock import MagicMock
+sys.modules['spot'] = MagicMock()
+
+from ltlnode import parse_ltl_string
+import ltltoeng
+import ltltoeng_structured as structured
+import ltltoeng_prose as prose
+
+
+# ---------------------------------------------------------------------------
+# Test formulas grouped by category
+# ---------------------------------------------------------------------------
+
+SIMPLE_FORMULAS = [
+    ("G p",         "Globally literal"),
+    ("F p",         "Finally literal"),
+    ("X p",         "Next literal"),
+    ("p U q",       "Until"),
+    ("p & q",       "And"),
+    ("p | q",       "Or"),
+    ("p -> q",      "Implies"),
+    ("!p",          "Not"),
+]
+
+RESPONSE_PATTERNS = [
+    ("G (p -> F q)",      "Response pattern"),
+    ("G (p -> X q)",      "Immediate response"),
+    ("G (p -> X(F q))",   "Bounded response"),
+    ("G(p -> (F q & F r))", "Chain response"),
+    ("G(p -> (q U r))",   "Chain precedence"),
+]
+
+RECURRENCE_PERSISTENCE = [
+    ("G (F p)",           "Recurrence (infinitely often)"),
+    ("F (G p)",           "Persistence (stability)"),
+    ("G !p",              "Never"),
+    ("!(F p)",            "Never (negated finally)"),
+]
+
+COMPLEX_NESTED = [
+    ("F(G(p -> F q))",    "Eventual stable response"),
+    ("G((p U q) -> F r)", "Until-triggered response"),
+    ("(G p) U (F q)",     "Globally until finally"),
+    ("(p U q) U r",       "Nested until"),
+    ("G(p -> X p)",       "Final state (persistence)"),
+]
+
+NEXT_CHAINS = [
+    ("X X X p",           "Triple next"),
+    ("(X p) -> (X X X q)", "Aligned next implication"),
+]
+
+PROPOSITIONAL = [
+    ("!!p",               "Double negation"),
+    ("!(p & q)",          "Negated conjunction (De Morgan)"),
+    ("!(p | q)",          "Negated disjunction (De Morgan)"),
+    ("!(p -> q)",         "Negated implication"),
+    ("!p & !q",           "And of negations"),
+    ("!p | !q",           "Or of negations"),
+    ("(p & q) -> r",      "Conjunction implies"),
+    ("p -> (q & r)",      "Implies conjunction"),
+    ("!p -> q",           "Negation implies (unless)"),
+]
+
+ALL_FORMULAS = (
+    SIMPLE_FORMULAS
+    + RESPONSE_PATTERNS
+    + RECURRENCE_PERSISTENCE
+    + COMPLEX_NESTED
+    + NEXT_CHAINS
+    + PROPOSITIONAL
+)
+
+
+# ---------------------------------------------------------------------------
+# Helper: translate with all three systems
+# ---------------------------------------------------------------------------
+
+def _translate_all(formula_str: str):
+    """Return (original, structured, prose) translations."""
+    random.seed(42)  # deterministic for original translator
+    node = parse_ltl_string(formula_str)
+
+    original = ltltoeng.finalize_sentence(node.__to_english__())
+    struct = structured.translate(parse_ltl_string(formula_str))
+    pro = prose.translate(parse_ltl_string(formula_str))
+
+    return original, struct, pro
+
+
+# ---------------------------------------------------------------------------
+# Comparison printer (runs as a test so it shows up with -s)
+# ---------------------------------------------------------------------------
+
+class TestTranslationComparison(unittest.TestCase):
+    """Print side-by-side translations for manual review."""
+
+    def setUp(self):
+        random.seed(42)
+
+    def test_print_comparison_table(self):
+        """Print all translations for visual comparison."""
+        print("\n" + "=" * 100)
+        print("LTL-TO-ENGLISH TRANSLATION COMPARISON")
+        print("=" * 100)
+
+        categories = [
+            ("SIMPLE", SIMPLE_FORMULAS),
+            ("RESPONSE PATTERNS", RESPONSE_PATTERNS),
+            ("RECURRENCE / PERSISTENCE", RECURRENCE_PERSISTENCE),
+            ("COMPLEX NESTED", COMPLEX_NESTED),
+            ("NEXT CHAINS", NEXT_CHAINS),
+            ("PROPOSITIONAL", PROPOSITIONAL),
+        ]
+
+        for cat_name, formulas in categories:
+            print(f"\n{'─' * 100}")
+            print(f"  {cat_name}")
+            print(f"{'─' * 100}")
+
+            for formula_str, description in formulas:
+                original, struct, pro = _translate_all(formula_str)
+
+                print(f"\n  Formula:    {formula_str}  ({description})")
+                print(f"  Original:   {original}")
+
+                # For structured output, indent multi-line
+                if "\n" in struct:
+                    lines = struct.split("\n")
+                    print(f"  Structured: {lines[0]}")
+                    for line in lines[1:]:
+                        print(f"              {line}")
+                else:
+                    print(f"  Structured: {struct}")
+
+                # For prose output, show multi-sentence flow
+                print(f"  Prose:      {pro}")
+
+        print("\n" + "=" * 100)
+
+
+# ---------------------------------------------------------------------------
+# Actual assertions
+# ---------------------------------------------------------------------------
+
+class TestStructuredBasicProperties(unittest.TestCase):
+    """Basic assertions for the structured translator."""
+
+    def setUp(self):
+        random.seed(42)
+
+    def test_simple_formulas_are_single_line(self):
+        """Simple formulas should not produce bullet points."""
+        for formula_str, _ in SIMPLE_FORMULAS:
+            node = parse_ltl_string(formula_str)
+            result = structured.translate(node)
+            self.assertNotIn("\n", result,
+                             f"Simple formula {formula_str} should be single-line, got:\n{result}")
+
+    def test_complex_formulas_use_bullets(self):
+        """Complex formulas should produce multi-line output with bullets."""
+        complex_formulas = [
+            "F(G(p -> F q))",
+            "G(p -> (F q & F r))",
+            "G(p -> X(F q))",
+        ]
+        for formula_str in complex_formulas:
+            node = parse_ltl_string(formula_str)
+            result = structured.translate(node)
+            self.assertIn("-", result,
+                          f"Complex formula {formula_str} should have bullets, got:\n{result}")
+
+    def test_response_pattern(self):
+        node = parse_ltl_string("G(p -> F q)")
+        result = structured.translate(node)
+        self.assertIn("'p'", result)
+        self.assertIn("'q'", result)
+        self.assertIn("eventually", result.lower())
+
+    def test_recurrence(self):
+        node = parse_ltl_string("G(F p)")
+        result = structured.translate(node)
+        self.assertIn("infinitely often", result.lower())
+
+    def test_persistence(self):
+        node = parse_ltl_string("F(G p)")
+        result = structured.translate(node)
+        self.assertIn("permanently", result.lower())
+
+    def test_never(self):
+        node = parse_ltl_string("G !p")
+        result = structured.translate(node)
+        self.assertIn("never", result.lower())
+
+    def test_eventual_stable_response(self):
+        node = parse_ltl_string("F(G(p -> F q))")
+        result = structured.translate(node)
+        self.assertIn("permanent", result.lower())
+        self.assertIn("'p'", result)
+        self.assertIn("'q'", result)
+
+
+class TestProseBasicProperties(unittest.TestCase):
+    """Basic assertions for the prose translator."""
+
+    def setUp(self):
+        random.seed(42)
+
+    def test_all_translations_end_with_period(self):
+        """Every translation should end with a period."""
+        for formula_str, _ in ALL_FORMULAS:
+            node = parse_ltl_string(formula_str)
+            result = prose.translate(node)
+            self.assertTrue(result.endswith("."),
+                            f"Translation of {formula_str} should end with period: {result}")
+
+    def test_all_translations_start_capitalized_or_quoted(self):
+        """Every translation should start with a capital letter or a quote."""
+        for formula_str, _ in ALL_FORMULAS:
+            node = parse_ltl_string(formula_str)
+            result = prose.translate(node)
+            self.assertTrue(
+                result[0].isupper() or result[0] == "'",
+                f"Translation of {formula_str} should start capitalized: {result}")
+
+    def test_response_pattern(self):
+        node = parse_ltl_string("G(p -> F q)")
+        result = prose.translate(node)
+        self.assertIn("whenever", result.lower())
+        self.assertIn("eventually", result.lower())
+
+    def test_recurrence(self):
+        node = parse_ltl_string("G(F p)")
+        result = prose.translate(node)
+        self.assertIn("infinitely often", result.lower())
+
+    def test_persistence(self):
+        node = parse_ltl_string("F(G p)")
+        result = prose.translate(node)
+        self.assertIn("forever", result.lower())
+
+    def test_never(self):
+        node = parse_ltl_string("G !p")
+        result = prose.translate(node)
+        self.assertIn("never", result.lower())
+
+    def test_multi_sentence_for_complex(self):
+        """Complex formulas should produce multiple sentences."""
+        node = parse_ltl_string("F(G(p -> F q))")
+        result = prose.translate(node)
+        # Count sentences (periods not at end)
+        sentences = [s.strip() for s in result.split(".") if s.strip()]
+        self.assertGreaterEqual(len(sentences), 2,
+                                f"Expected multi-sentence for F(G(p->Fq)), got: {result}")
+
+    def test_de_morgan_neither_nor(self):
+        node = parse_ltl_string("!(p | q)")
+        result = prose.translate(node)
+        self.assertIn("neither", result.lower())
+        self.assertIn("nor", result.lower())
+
+    def test_de_morgan_not_both(self):
+        node = parse_ltl_string("!(p & q)")
+        result = prose.translate(node)
+        self.assertIn("not both", result.lower())
+
+    def test_persistence_same_literal(self):
+        node = parse_ltl_string("G(p -> X p)")
+        result = prose.translate(node)
+        self.assertIn("once", result.lower())
+        self.assertIn("forever", result.lower())
+
+    def test_until_triggered_response(self):
+        """G((p U q) -> F r) should mention until and eventually."""
+        node = parse_ltl_string("G((p U q) -> F r)")
+        result = prose.translate(node)
+        self.assertIn("until", result.lower())
+        self.assertIn("eventually", result.lower())
+
+    def test_globally_until_finally(self):
+        """(G p) U (F q) should produce multi-sentence."""
+        node = parse_ltl_string("(G p) U (F q)")
+        result = prose.translate(node)
+        sentences = [s.strip() for s in result.split(".") if s.strip()]
+        self.assertGreaterEqual(len(sentences), 2)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 3e69ddcaad2974447954469c915b3f86ca68fe8d Mon Sep 17 00:00:00 2001
From: Siddhartha Prasad <siddhartha_prasad@brown.edu>
Date: Sat, 21 Mar 2026 18:09:32 -0400
Subject: [PATCH 2/7] Add within-subjects A/B test for abstract vs.
 contextualized translation

Each English-to-LTL question is randomly assigned to either:
- "abstract": existing translation (e.g., "Whenever 'p' holds, 'q' must eventually follow")
- "contextualized": lights theme (e.g., "Whenever the red light turns on, then eventually the green light is on")

The condition is logged in the student_responses table via a new
translation_mode column, enabling analysis of accuracy per condition.

Changes:
- logger.py: Add translation_mode column to StudentResponse
- exercisebuilder.py: Random 50/50 condition assignment with fallback
  to abstract when formula uses literals not in the lights theme
- app.py: Pass translation_mode through to logging
- checkanswers.js: Include translation_mode in POST data
- exercise.html: Carry translation_mode as data attribute

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/app.py                    |  4 +++-
 src/exercisebuilder.py        | 45 +++++++++++++++++++++++++++++++----
 src/logger.py                 | 43 +++++++++++++++++----------------
 src/static/js/checkanswers.js |  7 +++++-
 src/templates/exercise.html   |  3 ++-
 5 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/src/app.py b/src/app.py
index d5ac57f..5114e84 100644
--- a/src/app.py
+++ b/src/app.py
@@ -854,9 +854,11 @@ def loganswer(questiontype):
                 return {"error": "submission_limit", "message": "This exercise only allows one submission."}, 403
 
 
+    translation_mode = data.get('translation_mode', '')
     answer_logger.logStudentResponse(userId = userId, misconceptions = misconceptions, question_text = question_text,
                                       question_options = question_options, correct_answer = isCorrect,
-                                      questiontype=questiontype, mp_class = mp_class, exercise = exercise, course = courseId)
+                                      questiontype=questiontype, mp_class = mp_class, exercise = exercise, course = courseId,
+                                      translation_mode = translation_mode)
     
 
     if questiontype == "english_to_ltl":
diff --git a/src/exercisebuilder.py b/src/exercisebuilder.py
index a14e387..42edaa3 100644
--- a/src/exercisebuilder.py
+++ b/src/exercisebuilder.py
@@ -8,6 +8,7 @@
 import re
 import math
 import ltltoeng
+import ltltoeng_contextualized
 from syntacticmutator import applyRandomMutationNotEquivalentTo
 
 
@@ -495,13 +496,38 @@ def gen_nl_question(self, formula):
         formula_eng = as_node.__to_english__()
         if formula_eng is None or formula_eng == "":
             return None
-        
+
         formula_eng_corrected = ltltoeng.correct_grammar(formula_eng)
         ### If there are multiple '.' in a row, replace with a single '.'
         formula_eng_corrected = re.sub(r'\.{2,}', '.', formula_eng_corrected)
         return ltltoeng.finalize_sentence(formula_eng_corrected)
 
 
+    def gen_nl_question_contextualized(self, formula):
+        """Generate a contextualized English question using the lights theme.
+
+        Returns None if the formula uses literals not covered by the lights
+        theme (only p, q, r are mapped), to avoid mixing abstract and concrete.
+        """
+        LIGHTS_THEME = ltltoeng_contextualized.THEMES["lights"]
+
+        as_node = ltlnode.parse_ltl_string(formula)
+
+        # Check that all literals in the formula are covered by the theme
+        literals_in_formula = set(re.findall(r'\b([a-z][a-z0-9]*)\b', formula))
+        # Remove LTL operators from the set
+        ltl_operators = {'G', 'F', 'X', 'U', 'W', 'R'}
+        literals_in_formula -= ltl_operators
+        covered_literals = set(LIGHTS_THEME.literals.keys())
+        if not literals_in_formula.issubset(covered_literals):
+            return None
+
+        result = ltltoeng_contextualized.translate(as_node, LIGHTS_THEME)
+        if not result or result.strip() == "":
+            return None
+        return result
+
+
     def get_options_with_misconceptions_as_formula(self, answer):
         ltl = ltlnode.parse_ltl_string(answer)
         d = codebook.getAllApplicableMisconceptions(ltl)
@@ -548,12 +574,22 @@ def get_options_with_misconceptions_as_formula(self, answer):
         return merged_options
 
     def build_english_to_ltl_question(self, answer):
-        
+
         options = self.get_options_with_misconceptions_as_formula(answer)
         if options is None:
             return None
 
-        question = self.gen_nl_question(answer)
+        # A/B test: randomly assign abstract vs. contextualized (lights)
+        translation_mode = random.choice(["abstract", "contextualized"])
+
+        if translation_mode == "contextualized":
+            question = self.gen_nl_question_contextualized(answer)
+            # Fall back to abstract if contextualized fails (e.g., unsupported literals)
+            if question is None or question == "":
+                question = self.gen_nl_question(answer)
+                translation_mode = "abstract"
+        else:
+            question = self.gen_nl_question(answer)
 
         if question is None or question == "":
             print("Question generation failed unexpectedly.")
@@ -562,7 +598,8 @@ def build_english_to_ltl_question(self, answer):
         return {
             "question": question,
             "type": self.ENGLISHTOLTL,
-            "options": options
+            "options": options,
+            "translation_mode": translation_mode
         }
 
     def build_tracesat_mc_question(self, answer):
diff --git a/src/logger.py b/src/logger.py
index 389e3a0..76047de 100644
--- a/src/logger.py
+++ b/src/logger.py
@@ -52,6 +52,7 @@ class StudentResponse(Base):
     mp_class = Column(String)
     exercise = Column(String)
     course = Column(String, default="")
+    translation_mode = Column(String, default="")
 
 
 class GeneratedExercise(Base):
@@ -114,25 +115,25 @@ def __init__(self):
         if SENTENCE_PAIR_RATING_TABLE not in self.inspector.get_table_names():
             Base.metadata.tables[SENTENCE_PAIR_RATING_TABLE].create(self.engine)
 
-    def record(self, log):
-        with self.Session() as session:
-            print("Recording log")
-            session.add(log)
-            session.commit()
-
-    def getRatedEnglishFormulas(self, user_id):
-        """Return a set of LTL formulas this user has already rated."""
-        if not isinstance(user_id, str):
-            raise ValueError("user_id should be a string")
-
-        with self.Session() as session:
-            rows = session.query(EnglishLTLRating.ltl).filter(
-                EnglishLTLRating.user_id == user_id
-            ).all()
-            return {row.ltl for row in rows if row.ltl}
+    def record(self, log):
+        with self.Session() as session:
+            print("Recording log")
+            session.add(log)
+            session.commit()
+
+    def getRatedEnglishFormulas(self, user_id):
+        """Return a set of LTL formulas this user has already rated."""
+        if not isinstance(user_id, str):
+            raise ValueError("user_id should be a string")
+
+        with self.Session() as session:
+            rows = session.query(EnglishLTLRating.ltl).filter(
+                EnglishLTLRating.user_id == user_id
+            ).all()
+            return {row.ltl for row in rows if row.ltl}
 
     
-    def logStudentResponse(self, userId, misconceptions, question_text, question_options, correct_answer, questiontype, mp_class, exercise, course):
+    def logStudentResponse(self, userId, misconceptions, question_text, question_options, correct_answer, questiontype, mp_class, exercise, course, translation_mode=""):
 
         if not isinstance(userId, str):
             raise ValueError("userId should be a string")
@@ -154,9 +155,9 @@ def logStudentResponse(self, userId, misconceptions, question_text, question_opt
 
         ## We still want to log the response if there are no misconceptions
         if misconceptions == None or len(misconceptions) == 0:
-            log = StudentResponse(user_id=userId, timestamp=datetime.datetime.now(), 
+            log = StudentResponse(user_id=userId, timestamp=datetime.datetime.now(),
                                   misconception="", question_text=question_text, question_options=question_options, correct_answer=correct_answer,
-                                  question_type=questiontype, mp_class=mp_class, exercise=exercise, course=course)
+                                  question_type=questiontype, mp_class=mp_class, exercise=exercise, course=course, translation_mode=translation_mode)
             self.record(log)
 
 
@@ -165,9 +166,9 @@ def logStudentResponse(self, userId, misconceptions, question_text, question_opt
             if not isinstance(misconception, str):
                 raise ValueError("misconception should be a string")
 
-            log = StudentResponse(user_id=userId, timestamp=datetime.datetime.now(), 
+            log = StudentResponse(user_id=userId, timestamp=datetime.datetime.now(),
                                   misconception=misconception, question_text=question_text, question_options=question_options, correct_answer=correct_answer,
-                                  question_type=questiontype, mp_class=mp_class, exercise=exercise, course=course)
+                                  question_type=questiontype, mp_class=mp_class, exercise=exercise, course=course, translation_mode=translation_mode)
             self.record(log)
 
     
diff --git a/src/static/js/checkanswers.js b/src/static/js/checkanswers.js
index 4d21ba4..28c3439 100644
--- a/src/static/js/checkanswers.js
+++ b/src/static/js/checkanswers.js
@@ -292,6 +292,10 @@ async function englishtoltl_getfeedback(button) {
     let correct = show_feedback(parent_node, QUESTION_TYPE);
     lockQuestionInteractions(parent_node);
 
+    // Read A/B test condition (abstract vs. contextualized)
+    let question_text_el = parent_node.querySelector('.actualQuestion');
+    let translation_mode = question_text_el ? (question_text_el.dataset.translationMode || '') : '';
+
     let data = {
         selected_option: selected_radio.value,
         correct_option: correct_option,
@@ -300,7 +304,8 @@ async function englishtoltl_getfeedback(button) {
         question_text: question_text,
         question_options: question_options,
         formula_for_mp_class: get_formula_for_MP_Classification(parent_node, QUESTION_TYPE),
-        exercise: getExerciseName()
+        exercise: getExerciseName(),
+        translation_mode: translation_mode
     }
 
     let response = await postFeedback(data, QUESTION_TYPE);
diff --git a/src/templates/exercise.html b/src/templates/exercise.html
index 6200357..b80ca27 100644
--- a/src/templates/exercise.html
+++ b/src/templates/exercise.html
@@ -93,7 +93,8 @@ <h5 id="{{ qindex }}question_text" class="card-title mb-2">
                     </h5>
                     {% else %}
                     <h5 id="{{ qindex }}question_text" class="card-title mb-2 actualQuestion"
-                        style="white-space: pre-wrap;">{{ q.question}}
+                        style="white-space: pre-wrap;"
+                        data-translation-mode="{{ q.translation_mode | default('') }}">{{ q.question}}
                     </h5>
                     <div class="row mb-2 ml-2">
                         {% if q.type == "englishtoltl" %}

From ff61251e904877a87dd37908187c51a863f312d4 Mon Sep 17 00:00:00 2001
From: Siddhartha Prasad <siddhartha_prasad@brown.edu>
Date: Sat, 21 Mar 2026 18:12:43 -0400
Subject: [PATCH 3/7] Use r/g/b literals for lights theme instead of p/q/r
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The lights theme now maps r=red, g=green, b=blue — making the
literal-to-color mapping self-evident. The exercise builder remaps
formula literals (p->r, q->g, r->b) when generating contextualized
questions, so both the English text and the LTL answer options use
the same r/g/b literals.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/exercisebuilder.py         | 74 ++++++++++++++++++++++++----------
 src/ltltoeng_contextualized.py | 12 +++---
 test/test_contextualized.py    | 62 ++++++++++++++--------------
 3 files changed, 89 insertions(+), 59 deletions(-)

diff --git a/src/exercisebuilder.py b/src/exercisebuilder.py
index 42edaa3..defcfc3 100644
--- a/src/exercisebuilder.py
+++ b/src/exercisebuilder.py
@@ -503,29 +503,53 @@ def gen_nl_question(self, formula):
         return ltltoeng.finalize_sentence(formula_eng_corrected)
 
 
+    # Mapping from standard formula literals to lights-theme literals
+    LIGHTS_LITERAL_MAP = {'p': 'r', 'q': 'g', 'r': 'b'}
+    LIGHTS_LITERAL_UNMAP = {v: k for k, v in LIGHTS_LITERAL_MAP.items()}
+
+    @staticmethod
+    def _remap_formula_literals(formula, mapping):
+        """Rewrite literal names in a formula string using the given mapping.
+
+        Uses word-boundary-aware replacement to avoid mangling operator names.
+        Applies replacements via intermediate placeholders to prevent collisions
+        (e.g., p->r and r->b would collide without this).
+        """
+        result = formula
+        # Phase 1: replace originals with unique placeholders
+        for orig, new in mapping.items():
+            placeholder = f"__REMAP_{orig}__"
+            result = re.sub(rf'\b{re.escape(orig)}\b', placeholder, result)
+        # Phase 2: replace placeholders with final values
+        for orig, new in mapping.items():
+            placeholder = f"__REMAP_{orig}__"
+            result = result.replace(placeholder, new)
+        return result
+
     def gen_nl_question_contextualized(self, formula):
         """Generate a contextualized English question using the lights theme.
 
-        Returns None if the formula uses literals not covered by the lights
-        theme (only p, q, r are mapped), to avoid mixing abstract and concrete.
+        Remaps formula literals (p->r, q->g, r->b) so they match the theme.
+        Returns (question_text, remapped_formula) or (None, None) if the formula
+        uses literals not covered by the mapping.
         """
         LIGHTS_THEME = ltltoeng_contextualized.THEMES["lights"]
 
-        as_node = ltlnode.parse_ltl_string(formula)
-
-        # Check that all literals in the formula are covered by the theme
+        # Check that all literals in the formula are in our mapping
         literals_in_formula = set(re.findall(r'\b([a-z][a-z0-9]*)\b', formula))
-        # Remove LTL operators from the set
-        ltl_operators = {'G', 'F', 'X', 'U', 'W', 'R'}
-        literals_in_formula -= ltl_operators
-        covered_literals = set(LIGHTS_THEME.literals.keys())
-        if not literals_in_formula.issubset(covered_literals):
-            return None
+        # LTL operators are uppercase (G, F, X, U) so won't match [a-z], but
+        # just in case, filter out any known operators
+        covered = set(self.LIGHTS_LITERAL_MAP.keys())
+        if not literals_in_formula.issubset(covered):
+            return None, None
+
+        remapped_formula = self._remap_formula_literals(formula, self.LIGHTS_LITERAL_MAP)
+        as_node = ltlnode.parse_ltl_string(remapped_formula)
 
         result = ltltoeng_contextualized.translate(as_node, LIGHTS_THEME)
         if not result or result.strip() == "":
-            return None
-        return result
+            return None, None
+        return result, remapped_formula
 
 
     def get_options_with_misconceptions_as_formula(self, answer):
@@ -575,21 +599,27 @@ def get_options_with_misconceptions_as_formula(self, answer):
 
     def build_english_to_ltl_question(self, answer):
 
-        options = self.get_options_with_misconceptions_as_formula(answer)
-        if options is None:
-            return None
-
         # A/B test: randomly assign abstract vs. contextualized (lights)
         translation_mode = random.choice(["abstract", "contextualized"])
 
         if translation_mode == "contextualized":
-            question = self.gen_nl_question_contextualized(answer)
-            # Fall back to abstract if contextualized fails (e.g., unsupported literals)
-            if question is None or question == "":
-                question = self.gen_nl_question(answer)
+            question, remapped_formula = self.gen_nl_question_contextualized(answer)
+            if question is not None and remapped_formula is not None:
+                # Build options using the remapped (r,g,b) formula
+                options = self.get_options_with_misconceptions_as_formula(remapped_formula)
+                if options is None:
+                    # Fall back to abstract
+                    translation_mode = "abstract"
+            else:
+                # Fall back to abstract if contextualized fails
                 translation_mode = "abstract"
-        else:
+
+        if translation_mode == "abstract":
             question = self.gen_nl_question(answer)
+            options = self.get_options_with_misconceptions_as_formula(answer)
+
+        if options is None:
+            return None
 
         if question is None or question == "":
             print("Question generation failed unexpectedly.")
diff --git a/src/ltltoeng_contextualized.py b/src/ltltoeng_contextualized.py
index d3ba564..e09d84e 100644
--- a/src/ltltoeng_contextualized.py
+++ b/src/ltltoeng_contextualized.py
@@ -81,14 +81,14 @@ def event_off(self, lit: str) -> str:
     name="Light Panel",
     description="A panel with three colored lights: red, green, and blue.",
     literals={
-        "p": ("the red light is on",    "the red light is off"),
-        "q": ("the green light is on",  "the green light is off"),
-        "r": ("the blue light is on",   "the blue light is off"),
+        "r": ("the red light is on",    "the red light is off"),
+        "g": ("the green light is on",  "the green light is off"),
+        "b": ("the blue light is on",   "the blue light is off"),
     },
     event_form={
-        "p": ("the red light turns on",    "the red light turns off"),
-        "q": ("the green light turns on",  "the green light turns off"),
-        "r": ("the blue light turns on",   "the blue light turns off"),
+        "r": ("the red light turns on",    "the red light turns off"),
+        "g": ("the green light turns on",  "the green light turns off"),
+        "b": ("the blue light turns on",   "the blue light turns off"),
     },
 )
 
diff --git a/test/test_contextualized.py b/test/test_contextualized.py
index c34ba3b..bad1fad 100644
--- a/test/test_contextualized.py
+++ b/test/test_contextualized.py
@@ -21,23 +21,23 @@
 
 FORMULAS = [
     # (formula, description)
-    ("G p",                   "Invariant"),
-    ("F p",                   "Liveness"),
-    ("G !p",                  "Safety / never"),
-    ("G (p -> F q)",          "Response"),
-    ("G (p -> X q)",          "Immediate response"),
-    ("G(p -> (F q & F r))",   "Chain response"),
-    ("G(p -> (q U r))",       "Chain precedence"),
-    ("F (G p)",               "Persistence"),
-    ("G (F p)",               "Recurrence"),
-    ("(G p) U (F q)",         "Obligation until release"),
-    ("G(p -> X p)",           "Once true, stays true"),
-    ("!(F p)",                "Impossibility"),
-    ("p -> q",                "Simple implication"),
-    ("G(p -> X(F q))",        "Bounded response"),
-    ("F(G(p -> F q))",        "Eventual stable response"),
-    ("G((p U q) -> F r)",     "Until-triggered response"),
-    ("!(p & q)",              "Mutual exclusion"),
+    ("G r",                   "Invariant"),
+    ("F r",                   "Liveness"),
+    ("G !r",                  "Safety / never"),
+    ("G (r -> F g)",          "Response"),
+    ("G (r -> X g)",          "Immediate response"),
+    ("G(r -> (F g & F b))",   "Chain response"),
+    ("G(r -> (g U b))",       "Chain precedence"),
+    ("F (G r)",               "Persistence"),
+    ("G (F r)",               "Recurrence"),
+    ("(G r) U (F g)",         "Obligation until release"),
+    ("G(r -> X r)",           "Once true, stays true"),
+    ("!(F r)",                "Impossibility"),
+    ("r -> g",                "Simple implication"),
+    ("G(r -> X(F g))",        "Bounded response"),
+    ("F(G(r -> F g))",        "Eventual stable response"),
+    ("G((r U g) -> F b)",     "Until-triggered response"),
+    ("!(r & g)",              "Mutual exclusion"),
 ]
 
 
@@ -83,51 +83,51 @@ def _tr(self, formula):
         return ctx.translate(parse_ltl_string(formula), self.theme)
 
     def test_globally_literal(self):
-        result = self._tr("G p")
+        result = self._tr("G r")
         self.assertIn("red light", result.lower())
         self.assertIn("all times", result.lower())
 
     def test_finally_literal(self):
-        result = self._tr("F p")
+        result = self._tr("F r")
         self.assertIn("red light", result.lower())
         self.assertIn("eventually", result.lower())
 
     def test_never(self):
-        result = self._tr("G !p")
+        result = self._tr("G !r")
         self.assertIn("red light", result.lower())
         self.assertIn("never", result.lower())
 
     def test_response_uses_event_form(self):
-        """G(p -> F q) should use 'turns on' not just 'is on'."""
-        result = self._tr("G(p -> F q)")
+        """G(r -> F g) should use 'turns on' not just 'is on'."""
+        result = self._tr("G(r -> F g)")
         self.assertIn("red light turns on", result.lower())
         self.assertIn("green light", result.lower())
         self.assertIn("eventually", result.lower())
 
     def test_immediate_response(self):
-        result = self._tr("G(p -> X q)")
+        result = self._tr("G(r -> X g)")
         self.assertIn("red light turns on", result.lower())
         self.assertIn("green light", result.lower())
         self.assertIn("next step", result.lower())
 
     def test_chain_response_mentions_both(self):
-        result = self._tr("G(p -> (F q & F r))")
+        result = self._tr("G(r -> (F g & F b))")
         self.assertIn("red light", result.lower())
         self.assertIn("green light", result.lower())
         self.assertIn("blue light", result.lower())
 
     def test_persistence(self):
-        result = self._tr("G(p -> X p)")
+        result = self._tr("G(r -> X r)")
         self.assertIn("red light", result.lower())
         self.assertIn("forever", result.lower())
 
     def test_recurrence(self):
-        result = self._tr("G(F p)")
+        result = self._tr("G(F r)")
         self.assertIn("red light", result.lower())
         self.assertIn("over and over", result.lower())
 
     def test_mutual_exclusion(self):
-        result = self._tr("!(p & q)")
+        result = self._tr("!(r & g)")
         self.assertIn("red light", result.lower())
         self.assertIn("green light", result.lower())
 
@@ -144,10 +144,10 @@ def test_all_start_capitalized(self):
                             f"{formula_str} => '{result}' not capitalized")
 
     def test_no_abstract_quotes(self):
-        """Contextualized output should never contain raw 'p', 'q', 'r' quotes."""
+        """Contextualized output should never contain raw quoted literals."""
         for formula_str, _ in FORMULAS:
             result = self._tr(formula_str)
-            for lit in ["'p'", "'q'", "'r'"]:
+            for lit in ["'r'", "'g'", "'b'"]:
                 self.assertNotIn(lit, result,
                                  f"{formula_str} => still has abstract literal {lit}: {result}")
 
@@ -162,12 +162,12 @@ def _tr(self, formula):
         return ctx.translate(parse_ltl_string(formula), self.theme)
 
     def test_response_pattern(self):
-        result = self._tr("G(p -> F q)")
+        result = self._tr("G(p -> F q)")  # robot theme still uses p,q,r
         self.assertIn("robot", result.lower())
         self.assertIn("goal", result.lower())
 
     def test_never(self):
-        result = self._tr("G !p")
+        result = self._tr("G !p")  # robot theme still uses p,q,r
         self.assertIn("robot", result.lower())
         self.assertIn("never", result.lower())
 

From 23d9944b0610f340aa52b3d0b820958511dddb1e Mon Sep 17 00:00:00 2001
From: Siddhartha Prasad <siddhartha_prasad@brown.edu>
Date: Sat, 21 Mar 2026 18:15:14 -0400
Subject: [PATCH 4/7] Simplify: generate formulas with r,g,b literals directly

Instead of generating with p,q,r and remapping to r,g,b, generate a
second formula pool using r,g,b literals from the start. The
contextualized condition draws from this pool, so the formula literals
naturally match the lights theme. Removes all remapping logic.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/exercisebuilder.py | 108 ++++++++++++++++-------------------------
 1 file changed, 43 insertions(+), 65 deletions(-)

diff --git a/src/exercisebuilder.py b/src/exercisebuilder.py
index defcfc3..0316c56 100644
--- a/src/exercisebuilder.py
+++ b/src/exercisebuilder.py
@@ -412,16 +412,28 @@ def contains_undersirable_lit(s):
 
         ## First generate a large pool from spot randltl
         pool_size = 2*num_questions
-        question_answers = spotutils.gen_rand_ltl(atoms = literals, 
-                                                  tree_size = tree_size, 
-                                                  ltl_priorities = self.ltl_priorities, 
+        question_answers = spotutils.gen_rand_ltl(atoms = literals,
+                                                  tree_size = tree_size,
+                                                  ltl_priorities = self.ltl_priorities,
                                                   num_formulae = pool_size)
-        
+
         ## Augment with template-generated formulas for pattern-specific misconceptions
         ## This helps ensure we get formulas that can actually be mutated with these misconceptions
         template_formulas = self.generate_template_formulas(literals, num_templates=max(1, num_questions // 4))
         question_answers.extend(template_formulas)
-        
+
+        ## A/B test: generate a second pool with r,g,b literals for contextualized questions
+        CONTEXTUALIZED_LITERALS = list(ltltoeng_contextualized.THEMES["lights"].literals.keys())
+        ctx_answers = spotutils.gen_rand_ltl(atoms = CONTEXTUALIZED_LITERALS,
+                                             tree_size = tree_size,
+                                             ltl_priorities = self.ltl_priorities,
+                                             num_formulae = pool_size)
+        ctx_templates = self.generate_template_formulas(CONTEXTUALIZED_LITERALS, num_templates=max(1, num_questions // 4))
+        ctx_answers.extend(ctx_templates)
+        ctx_answers = [a for a in ctx_answers if not contains_undersirable_lit(a)]
+        random.shuffle(ctx_answers)
+        ctx_iter = iter(ctx_answers)
+
 
         def formula_choice_metric(formula):
 
@@ -448,7 +460,15 @@ def formula_choice_metric(formula):
             if kind == self.TRACESATMC:
                 question = self.build_tracesat_mc_question(answer)
             elif kind == self.ENGLISHTOLTL:
-                question = self.build_english_to_ltl_question(answer)
+                # A/B test: 50/50 abstract vs. contextualized
+                if random.random() < 0.5:
+                    ctx_answer = next(ctx_iter, None)
+                    if ctx_answer is not None:
+                        question = self.build_english_to_ltl_question(ctx_answer, contextualized=True)
+                    else:
+                        question = self.build_english_to_ltl_question(answer)
+                else:
+                    question = self.build_english_to_ltl_question(answer)
             elif kind == self.TRACESATYN:
                 question = self.build_tracesat_yn_question(answer)
 
@@ -503,53 +523,18 @@ def gen_nl_question(self, formula):
         return ltltoeng.finalize_sentence(formula_eng_corrected)
 
 
-    # Mapping from standard formula literals to lights-theme literals
-    LIGHTS_LITERAL_MAP = {'p': 'r', 'q': 'g', 'r': 'b'}
-    LIGHTS_LITERAL_UNMAP = {v: k for k, v in LIGHTS_LITERAL_MAP.items()}
-
-    @staticmethod
-    def _remap_formula_literals(formula, mapping):
-        """Rewrite literal names in a formula string using the given mapping.
-
-        Uses word-boundary-aware replacement to avoid mangling operator names.
-        Applies replacements via intermediate placeholders to prevent collisions
-        (e.g., p->r and r->b would collide without this).
-        """
-        result = formula
-        # Phase 1: replace originals with unique placeholders
-        for orig, new in mapping.items():
-            placeholder = f"__REMAP_{orig}__"
-            result = re.sub(rf'\b{re.escape(orig)}\b', placeholder, result)
-        # Phase 2: replace placeholders with final values
-        for orig, new in mapping.items():
-            placeholder = f"__REMAP_{orig}__"
-            result = result.replace(placeholder, new)
-        return result
-
     def gen_nl_question_contextualized(self, formula):
         """Generate a contextualized English question using the lights theme.
 
-        Remaps formula literals (p->r, q->g, r->b) so they match the theme.
-        Returns (question_text, remapped_formula) or (None, None) if the formula
-        uses literals not covered by the mapping.
+        Expects the formula to already use r,g,b literals (matching the theme).
+        Returns None if translation fails.
         """
         LIGHTS_THEME = ltltoeng_contextualized.THEMES["lights"]
-
-        # Check that all literals in the formula are in our mapping
-        literals_in_formula = set(re.findall(r'\b([a-z][a-z0-9]*)\b', formula))
-        # LTL operators are uppercase (G, F, X, U) so won't match [a-z], but
-        # just in case, filter out any known operators
-        covered = set(self.LIGHTS_LITERAL_MAP.keys())
-        if not literals_in_formula.issubset(covered):
-            return None, None
-
-        remapped_formula = self._remap_formula_literals(formula, self.LIGHTS_LITERAL_MAP)
-        as_node = ltlnode.parse_ltl_string(remapped_formula)
-
+        as_node = ltlnode.parse_ltl_string(formula)
         result = ltltoeng_contextualized.translate(as_node, LIGHTS_THEME)
         if not result or result.strip() == "":
-            return None, None
-        return result, remapped_formula
+            return None
+        return result
 
 
     def get_options_with_misconceptions_as_formula(self, answer):
@@ -597,29 +582,22 @@ def get_options_with_misconceptions_as_formula(self, answer):
 
         return merged_options
 
-    def build_english_to_ltl_question(self, answer):
+    def build_english_to_ltl_question(self, answer, contextualized=False):
 
-        # A/B test: randomly assign abstract vs. contextualized (lights)
-        translation_mode = random.choice(["abstract", "contextualized"])
+        options = self.get_options_with_misconceptions_as_formula(answer)
+        if options is None:
+            return None
 
-        if translation_mode == "contextualized":
-            question, remapped_formula = self.gen_nl_question_contextualized(answer)
-            if question is not None and remapped_formula is not None:
-                # Build options using the remapped (r,g,b) formula
-                options = self.get_options_with_misconceptions_as_formula(remapped_formula)
-                if options is None:
-                    # Fall back to abstract
-                    translation_mode = "abstract"
-            else:
-                # Fall back to abstract if contextualized fails
+        if contextualized:
+            question = self.gen_nl_question_contextualized(answer)
+            translation_mode = "contextualized"
+            # Fall back to abstract if contextualized translation fails
+            if question is None or question == "":
+                question = self.gen_nl_question(answer)
                 translation_mode = "abstract"
-
-        if translation_mode == "abstract":
+        else:
             question = self.gen_nl_question(answer)
-            options = self.get_options_with_misconceptions_as_formula(answer)
-
-        if options is None:
-            return None
+            translation_mode = "abstract"
 
         if question is None or question == "":
             print("Question generation failed unexpectedly.")

From 00f76965e20dc759c6a5a2801583943d8b46ff5e Mon Sep 17 00:00:00 2001
From: Siddhartha Prasad <siddhartha_prasad@brown.edu>
Date: Sat, 21 Mar 2026 18:19:00 -0400
Subject: [PATCH 5/7] Clean up anti-patterns found in review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove unused themes (robot, traffic, server) — only lights is used
  in the A/B test, and the others used p/q/r literals which don't match
  the r/g/b convention
- Fix _t_implies: don't eagerly translate both sides then ignore them
- Fix _t_not !(p->q): unpack inline ternary into readable if/else
- Fix docstring example referencing nonexistent THEMES["traffic_lights"]
- Make ctx formula pool generation lazy — only call spot when a
  contextualized englishtoltl question is actually needed
- Custom theme test uses d/m literals (not p/q) to match the convention
  that literal names should be meaningful

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/exercisebuilder.py         | 27 ++++++++------
 src/ltltoeng_contextualized.py | 64 +++++++---------------------------
 test/test_contextualized.py    | 41 ++++------------------
 3 files changed, 35 insertions(+), 97 deletions(-)

diff --git a/src/exercisebuilder.py b/src/exercisebuilder.py
index 0316c56..93e1f88 100644
--- a/src/exercisebuilder.py
+++ b/src/exercisebuilder.py
@@ -422,17 +422,22 @@ def contains_undersirable_lit(s):
         template_formulas = self.generate_template_formulas(literals, num_templates=max(1, num_questions // 4))
         question_answers.extend(template_formulas)
 
-        ## A/B test: generate a second pool with r,g,b literals for contextualized questions
+        ## A/B test: lazily generate a pool with r,g,b literals for contextualized questions
         CONTEXTUALIZED_LITERALS = list(ltltoeng_contextualized.THEMES["lights"].literals.keys())
-        ctx_answers = spotutils.gen_rand_ltl(atoms = CONTEXTUALIZED_LITERALS,
-                                             tree_size = tree_size,
-                                             ltl_priorities = self.ltl_priorities,
-                                             num_formulae = pool_size)
-        ctx_templates = self.generate_template_formulas(CONTEXTUALIZED_LITERALS, num_templates=max(1, num_questions // 4))
-        ctx_answers.extend(ctx_templates)
-        ctx_answers = [a for a in ctx_answers if not contains_undersirable_lit(a)]
-        random.shuffle(ctx_answers)
-        ctx_iter = iter(ctx_answers)
+        ctx_iter = None
+        def _get_ctx_iter():
+            nonlocal ctx_iter
+            if ctx_iter is None:
+                ctx_pool = spotutils.gen_rand_ltl(atoms = CONTEXTUALIZED_LITERALS,
+                                                  tree_size = tree_size,
+                                                  ltl_priorities = self.ltl_priorities,
+                                                  num_formulae = pool_size)
+                ctx_templates = self.generate_template_formulas(CONTEXTUALIZED_LITERALS, num_templates=max(1, num_questions // 4))
+                ctx_pool.extend(ctx_templates)
+                ctx_pool = [a for a in ctx_pool if not contains_undersirable_lit(a)]
+                random.shuffle(ctx_pool)
+                ctx_iter = iter(ctx_pool)
+            return ctx_iter
 
 
         def formula_choice_metric(formula):
@@ -462,7 +467,7 @@ def formula_choice_metric(formula):
             elif kind == self.ENGLISHTOLTL:
                 # A/B test: 50/50 abstract vs. contextualized
                 if random.random() < 0.5:
-                    ctx_answer = next(ctx_iter, None)
+                    ctx_answer = next(_get_ctx_iter(), None)
                     if ctx_answer is not None:
                         question = self.build_english_to_ltl_question(ctx_answer, contextualized=True)
                     else:
diff --git a/src/ltltoeng_contextualized.py b/src/ltltoeng_contextualized.py
index e09d84e..6aee86c 100644
--- a/src/ltltoeng_contextualized.py
+++ b/src/ltltoeng_contextualized.py
@@ -14,9 +14,9 @@
     THEMES: dict of built-in theme names -> Theme objects
 
 Example:
-    node = parse_ltl_string("G(p -> F q)")
-    translate(node, theme=THEMES["traffic_lights"])
-    # => "Whenever the red light turns on, the green light must eventually turn on."
+    node = parse_ltl_string("G(r -> F g)")
+    translate(node, theme=THEMES["lights"])
+    # => "Whenever the red light turns on, then eventually the green light is on."
 """
 
 from __future__ import annotations
@@ -92,50 +92,6 @@ def event_off(self, lit: str) -> str:
     },
 )
 
-THEMES["robot"] = Theme(
-    name="Robot Tasks",
-    description="A robot that can pick up items, move to a location, and charge its battery.",
-    literals={
-        "p": ("the robot is holding an item",  "the robot is not holding an item"),
-        "q": ("the robot is at the goal",      "the robot is not at the goal"),
-        "r": ("the battery is charged",        "the battery is not charged"),
-    },
-    event_form={
-        "p": ("the robot picks up an item",     "the robot drops the item"),
-        "q": ("the robot reaches the goal",     "the robot leaves the goal"),
-        "r": ("the battery finishes charging",  "the battery runs out"),
-    },
-)
-
-THEMES["traffic"] = Theme(
-    name="Traffic Intersection",
-    description="A traffic intersection with signals for north-south and east-west traffic.",
-    literals={
-        "p": ("the north-south signal is green",  "the north-south signal is red"),
-        "q": ("the east-west signal is green",     "the east-west signal is red"),
-        "r": ("a pedestrian is crossing",          "no pedestrian is crossing"),
-    },
-    event_form={
-        "p": ("the north-south signal turns green",  "the north-south signal turns red"),
-        "q": ("the east-west signal turns green",    "the east-west signal turns red"),
-        "r": ("a pedestrian starts crossing",        "the pedestrian finishes crossing"),
-    },
-)
-
-THEMES["server"] = Theme(
-    name="Web Server",
-    description="A web server handling requests, with a cache and a database.",
-    literals={
-        "p": ("a request is being processed",  "no request is being processed"),
-        "q": ("the cache has a valid entry",    "the cache is empty"),
-        "r": ("the database is available",      "the database is down"),
-    },
-    event_form={
-        "p": ("a new request arrives",         "the request completes"),
-        "q": ("the cache is populated",        "the cache is invalidated"),
-        "r": ("the database comes online",     "the database goes down"),
-    },
-)
 
 
 # ---------------------------------------------------------------------------
@@ -267,7 +223,12 @@ def _t_not(node: ltlnode.NotNode, theme: Theme) -> str:
 
     # !(p -> q)
     if isinstance(inner, ltlnode.ImpliesNode):
-        return f"{_t(inner.left, theme)}, but {theme.negative(inner.right.value) if isinstance(inner.right, ltlnode.LiteralNode) else 'not ' + _t(inner.right, theme)}"
+        left_text = _t(inner.left, theme)
+        if isinstance(inner.right, ltlnode.LiteralNode):
+            right_text = theme.negative(inner.right.value)
+        else:
+            right_text = f"not {_t(inner.right, theme)}"
+        return f"{left_text}, but {right_text}"
 
     return f"it is not the case that {_t(inner, theme)}"
 
@@ -299,18 +260,17 @@ def _t_or(node: ltlnode.OrNode, theme: Theme) -> str:
 # --- IMPLIES --------------------------------------------------------------
 
 def _t_implies(node: ltlnode.ImpliesNode, theme: Theme) -> str:
-    l = _t(node.left, theme)
-    r = _t(node.right, theme)
-
     # (p & q) -> r
     if isinstance(node.left, ltlnode.AndNode):
+        r = _t(node.right, theme)
         return f"if both {_t(node.left.left, theme)} and {_t(node.left.right, theme)}, then {r}"
 
     # (p | q) -> r
     if isinstance(node.left, ltlnode.OrNode):
+        r = _t(node.right, theme)
         return f"if either {_t(node.left.left, theme)} or {_t(node.left.right, theme)}, then {r}"
 
-    return f"if {l}, then {r}"
+    return f"if {_t(node.left, theme)}, then {_t(node.right, theme)}"
 
 
 # --- EQUIVALENCE ----------------------------------------------------------
diff --git a/test/test_contextualized.py b/test/test_contextualized.py
index bad1fad..757a3a3 100644
--- a/test/test_contextualized.py
+++ b/test/test_contextualized.py
@@ -42,7 +42,7 @@
 
 
 class TestContextualizedPrintComparison(unittest.TestCase):
-    """Print abstract vs. contextualized translations for all 4 themes."""
+    """Print abstract vs. contextualized (lights) translations."""
 
     def setUp(self):
         random.seed(42)
@@ -53,21 +53,15 @@ def test_print_all_themes(self):
         print("=" * 110)
 
         for formula_str, desc in FORMULAS:
-            node = parse_ltl_string(formula_str)
-
             random.seed(42)
             abstract = prose.translate(parse_ltl_string(formula_str))
             lights   = ctx.translate(parse_ltl_string(formula_str), ctx.THEMES["lights"])
-            robot    = ctx.translate(parse_ltl_string(formula_str), ctx.THEMES["robot"])
-            traffic  = ctx.translate(parse_ltl_string(formula_str), ctx.THEMES["traffic"])
 
             print(f"\n{'─' * 110}")
             print(f"  {formula_str:30s}  ({desc})")
             print(f"{'─' * 110}")
             print(f"  Abstract:  {abstract}")
             print(f"  Lights:    {lights}")
-            print(f"  Robot:     {robot}")
-            print(f"  Traffic:   {traffic}")
 
         print("\n" + "=" * 110)
 
@@ -152,26 +146,6 @@ def test_no_abstract_quotes(self):
                                  f"{formula_str} => still has abstract literal {lit}: {result}")
 
 
-class TestRobotTheme(unittest.TestCase):
-
-    def setUp(self):
-        random.seed(42)
-        self.theme = ctx.THEMES["robot"]
-
-    def _tr(self, formula):
-        return ctx.translate(parse_ltl_string(formula), self.theme)
-
-    def test_response_pattern(self):
-        result = self._tr("G(p -> F q)")  # robot theme still uses p,q,r
-        self.assertIn("robot", result.lower())
-        self.assertIn("goal", result.lower())
-
-    def test_never(self):
-        result = self._tr("G !p")  # robot theme still uses p,q,r
-        self.assertIn("robot", result.lower())
-        self.assertIn("never", result.lower())
-
-
 class TestCustomTheme(unittest.TestCase):
     """Verify that custom themes work correctly."""
 
@@ -180,24 +154,23 @@ def test_custom_theme(self):
             name="Elevator",
             description="An elevator system",
             literals={
-                "p": ("the door is open",   "the door is closed"),
-                "q": ("the elevator is moving", "the elevator is stopped"),
+                "d": ("the door is open",       "the door is closed"),
+                "m": ("the elevator is moving",  "the elevator is stopped"),
             },
             event_form={
-                "p": ("the door opens",   "the door closes"),
-                "q": ("the elevator starts moving", "the elevator stops"),
+                "d": ("the door opens",           "the door closes"),
+                "m": ("the elevator starts moving", "the elevator stops"),
             },
         )
 
         # Safety: door must never be open while moving
-        # G(q -> !p)  "whenever elevator moves, door must not be open"
-        node = parse_ltl_string("G(q -> !p)")
+        node = parse_ltl_string("G(m -> !d)")
         result = ctx.translate(node, elevator)
         self.assertIn("elevator", result.lower())
         self.assertIn("door", result.lower())
 
         # Response: if door opens, elevator must eventually move
-        node = parse_ltl_string("G(p -> F q)")
+        node = parse_ltl_string("G(d -> F m)")
         result = ctx.translate(node, elevator)
         self.assertIn("door opens", result.lower())
         self.assertIn("elevator", result.lower())

From 5d150d06261bf8be107fc4bd553a0f0ebb9e78e4 Mon Sep 17 00:00:00 2001
From: Siddhartha Prasad <siddhartha_prasad@brown.edu>
Date: Sat, 21 Mar 2026 18:22:03 -0400
Subject: [PATCH 6/7] Use prose translator for abstract condition; remove
 structured translator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The abstract condition in the A/B test now uses ltltoeng_prose (multi-
sentence, natural English) instead of the original ltltoeng (single-
sentence with known awkward phrasings). This ensures the A/B test
compares abstract-vs-contextualized framing, not bad-English-vs-good.

Removes ltltoeng_structured.py and test_translation_comparison.py —
the structured (bullet-point) translator was unused in any code path.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/exercisebuilder.py              |  12 +-
 src/ltltoeng_structured.py          | 552 ----------------------------
 test/test_translation_comparison.py | 306 ---------------
 3 files changed, 4 insertions(+), 866 deletions(-)
 delete mode 100644 src/ltltoeng_structured.py
 delete mode 100644 test/test_translation_comparison.py

diff --git a/src/exercisebuilder.py b/src/exercisebuilder.py
index 93e1f88..b7f85ea 100644
--- a/src/exercisebuilder.py
+++ b/src/exercisebuilder.py
@@ -8,6 +8,7 @@
 import re
 import math
 import ltltoeng
+import ltltoeng_prose
 import ltltoeng_contextualized
 from syntacticmutator import applyRandomMutationNotEquivalentTo
 
@@ -516,16 +517,11 @@ def formula_choice_metric(formula):
 
     
     def gen_nl_question(self, formula):
-
         as_node = ltlnode.parse_ltl_string(formula)
-        formula_eng = as_node.__to_english__()
-        if formula_eng is None or formula_eng == "":
+        result = ltltoeng_prose.translate(as_node)
+        if not result or result.strip() == "":
             return None
-
-        formula_eng_corrected = ltltoeng.correct_grammar(formula_eng)
-        ### If there are multiple '.' in a row, replace with a single '.'
-        formula_eng_corrected = re.sub(r'\.{2,}', '.', formula_eng_corrected)
-        return ltltoeng.finalize_sentence(formula_eng_corrected)
+        return result
 
 
     def gen_nl_question_contextualized(self, formula):
diff --git a/src/ltltoeng_structured.py b/src/ltltoeng_structured.py
deleted file mode 100644
index 35284e6..0000000
--- a/src/ltltoeng_structured.py
+++ /dev/null
@@ -1,552 +0,0 @@
-"""Structured, hierarchical English translation for LTL formulas.
-
-Instead of forcing every LTL formula into a single flat sentence (as ltltoeng
-does), this module produces indented, bullet-point explanations that reveal the
-structure of the formula.  Simple formulas still get a single line; complex ones
-are broken into labeled components (Condition, Response, Requirement, Rule).
-
-Public API
-----------
-translate(node) -> str
-    Given an LTLNode AST, return a human-readable English string.
-    For simple formulas the result is a single line.
-    For complex formulas the result is a multi-line, indented explanation.
-"""
-
-from __future__ import annotations
-
-import ltlnode
-
-
-# ---------------------------------------------------------------------------
-# Complexity metric
-# ---------------------------------------------------------------------------
-
-def _temporal_depth(node: ltlnode.LTLNode) -> int:
-    """Return the nesting depth of temporal operators in *node*.
-
-    Temporal operators are G, F, X, and U.  Propositional connectives (&, |,
-    ->, <->, !) do *not* increase temporal depth -- they only propagate the
-    maximum depth of their children.
-
-    A bare literal has depth 0.
-    """
-    if isinstance(node, ltlnode.LiteralNode):
-        return 0
-
-    if isinstance(node, (ltlnode.GloballyNode, ltlnode.FinallyNode,
-                         ltlnode.NextNode)):
-        return 1 + _temporal_depth(node.operand)
-
-    if isinstance(node, ltlnode.UntilNode):
-        return 1 + max(_temporal_depth(node.left), _temporal_depth(node.right))
-
-    # Propositional connectives: Not, And, Or, Implies, Equivalence
-    if isinstance(node, ltlnode.NotNode):
-        return _temporal_depth(node.operand)
-
-    if isinstance(node, (ltlnode.AndNode, ltlnode.OrNode,
-                         ltlnode.ImpliesNode, ltlnode.EquivalenceNode)):
-        return max(_temporal_depth(node.left), _temporal_depth(node.right))
-
-    # Fallback for unknown node types
-    return 0
-
-
-# The threshold at which we switch from single-line to structured output.
-_COMPLEXITY_THRESHOLD = 2
-
-
-def _is_simple(node: ltlnode.LTLNode) -> bool:
-    """True when *node* is simple enough for a single-line translation."""
-    return _temporal_depth(node) < _COMPLEXITY_THRESHOLD
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-_INDENT = "  "
-
-
-def _indent(text: str, level: int) -> str:
-    """Indent every line of *text* by *level* levels."""
-    prefix = _INDENT * level
-    return "\n".join(prefix + line for line in text.splitlines())
-
-
-def _bullet(text: str, level: int) -> str:
-    """Return *text* as a bulleted line at the given indentation level."""
-    prefix = _INDENT * level
-    return f"{prefix}- {text}"
-
-
-def _quote(node: ltlnode.LTLNode) -> str:
-    """Quote a literal node's value.  Non-literals get their LTL string."""
-    if isinstance(node, ltlnode.LiteralNode):
-        return f"'{node.value}'"
-    return str(node)
-
-
-def _count_next_chain(node: ltlnode.LTLNode):
-    """Count consecutive X (Next) wrappers.  Returns (count, inner_node)."""
-    n = 0
-    while isinstance(node, ltlnode.NextNode):
-        n += 1
-        node = node.operand
-    return n, node
-
-
-def _steps_phrase(count: int) -> str:
-    """Human-readable phrase like '3 steps from now'."""
-    if count == 1:
-        return "the next step"
-    return f"{count} steps from now"
-
-
-# ---------------------------------------------------------------------------
-# Single-line (flat) translator  -- used for simple sub-formulas
-# ---------------------------------------------------------------------------
-
-def _flat(node: ltlnode.LTLNode) -> str:
-    """Produce a concise, single-line English translation of *node*.
-
-    This is deliberately kept simple and deterministic (no randomised
-    alternatives) so that structured output is reproducible in tests.
-    """
-    if isinstance(node, ltlnode.LiteralNode):
-        return f"'{node.value}'"
-
-    # -- unary temporal --
-    if isinstance(node, ltlnode.GloballyNode):
-        inner = node.operand
-        # G(!p) -> never pattern
-        if isinstance(inner, ltlnode.NotNode):
-            if isinstance(inner.operand, ltlnode.LiteralNode):
-                return f"'{inner.operand.value}' must never hold"
-            return f"it is never the case that {_flat(inner.operand)}"
-        return f"at all times, {_flat(inner)}"
-
-    if isinstance(node, ltlnode.FinallyNode):
-        return f"eventually, {_flat(node.operand)}"
-
-    if isinstance(node, ltlnode.NextNode):
-        steps, inner = _count_next_chain(node)
-        if steps == 1:
-            return f"in the next step, {_flat(inner)}"
-        return f"{steps} steps from now, {_flat(inner)}"
-
-    # -- binary temporal --
-    if isinstance(node, ltlnode.UntilNode):
-        return f"{_flat(node.left)} until {_flat(node.right)}"
-
-    # -- propositional --
-    if isinstance(node, ltlnode.NotNode):
-        inner = node.operand
-        if isinstance(inner, ltlnode.LiteralNode):
-            return f"not '{inner.value}'"
-        # !(F p) -> never
-        if isinstance(inner, ltlnode.FinallyNode):
-            return f"{_flat(inner.operand)} never occurs"
-        # !!p -> double negation elimination
-        if isinstance(inner, ltlnode.NotNode):
-            return _flat(inner.operand)
-        # !(p & q) -> not both
-        if isinstance(inner, ltlnode.AndNode):
-            return f"not both {_flat(inner.left)} and {_flat(inner.right)}"
-        # !(p | q) -> neither/nor
-        if isinstance(inner, ltlnode.OrNode):
-            return f"neither {_flat(inner.left)} nor {_flat(inner.right)}"
-        # !(p -> q) -> p but not q
-        if isinstance(inner, ltlnode.ImpliesNode):
-            return f"{_flat(inner.left)}, but not {_flat(inner.right)}"
-        return f"it is not the case that {_flat(inner)}"
-
-    if isinstance(node, ltlnode.AndNode):
-        # !p & !q -> neither/nor
-        if isinstance(node.left, ltlnode.NotNode) and isinstance(node.right, ltlnode.NotNode):
-            return f"neither {_flat(node.left.operand)} nor {_flat(node.right.operand)}"
-        return f"{_flat(node.left)} and {_flat(node.right)}"
-
-    if isinstance(node, ltlnode.OrNode):
-        # !p | !q -> not both
-        if isinstance(node.left, ltlnode.NotNode) and isinstance(node.right, ltlnode.NotNode):
-            return f"not both {_flat(node.left.operand)} and {_flat(node.right.operand)}"
-        return f"{_flat(node.left)} or {_flat(node.right)}"
-
-    if isinstance(node, ltlnode.ImpliesNode):
-        return f"if {_flat(node.left)}, then {_flat(node.right)}"
-
-    if isinstance(node, ltlnode.EquivalenceNode):
-        return f"{_flat(node.left)} if and only if {_flat(node.right)}"
-
-    # Fallback
-    return str(node)
-
-
-# ---------------------------------------------------------------------------
-# Structured (hierarchical) translator
-# ---------------------------------------------------------------------------
-
-def _structured(node: ltlnode.LTLNode, depth: int = 0) -> str:
-    """Recursively produce a structured, multi-line translation.
-
-    *depth* tracks the current bullet indentation level.
-    When a sub-formula is simple enough it collapses to a single line via
-    ``_flat``.
-    """
-
-    # ---- Leaf / simple sub-trees -> flat ----
-    if _is_simple(node):
-        return _flat(node)
-
-    # ---- G(...) ----
-    if isinstance(node, ltlnode.GloballyNode):
-        inner = node.operand
-        return _translate_globally(inner, depth)
-
-    # ---- F(...) ----
-    if isinstance(node, ltlnode.FinallyNode):
-        inner = node.operand
-        return _translate_finally(inner, depth)
-
-    # ---- X(...) ----
-    if isinstance(node, ltlnode.NextNode):
-        steps, inner = _count_next_chain(node)
-        return _translate_next(steps, inner, depth)
-
-    # ---- p U q ----
-    if isinstance(node, ltlnode.UntilNode):
-        return _translate_until(node, depth)
-
-    # ---- p -> q ----
-    if isinstance(node, ltlnode.ImpliesNode):
-        return _translate_implies(node, depth)
-
-    # ---- p <-> q ----
-    if isinstance(node, ltlnode.EquivalenceNode):
-        return _translate_equivalence(node, depth)
-
-    # ---- p & q ----
-    if isinstance(node, ltlnode.AndNode):
-        return _translate_and(node, depth)
-
-    # ---- p | q ----
-    if isinstance(node, ltlnode.OrNode):
-        return _translate_or(node, depth)
-
-    # ---- !p ----
-    if isinstance(node, ltlnode.NotNode):
-        return _translate_not(node, depth)
-
-    return _flat(node)
-
-
-# ---- Globally ----
-
-def _translate_globally(inner: ltlnode.LTLNode, depth: int) -> str:
-    # G(p -> F q)  -- response pattern
-    if isinstance(inner, ltlnode.ImpliesNode) and isinstance(inner.right, ltlnode.FinallyNode):
-        trigger = inner.left
-        response = inner.right.operand
-        if _is_simple(trigger) and _is_simple(response):
-            return f"Whenever {_flat(trigger)} holds, {_flat(response)} must eventually follow."
-        lines = ["At all times, the following rule holds:"]
-        lines.append(_bullet(f"When {_flat(trigger)} occurs:", depth + 1))
-        lines.append(_bullet(f"{_flat(response)} must eventually occur", depth + 2))
-        return "\n".join(lines)
-
-    # G(p -> X(F q))  -- bounded response
-    if (isinstance(inner, ltlnode.ImpliesNode)
-            and isinstance(inner.right, ltlnode.NextNode)
-            and isinstance(inner.right.operand, ltlnode.FinallyNode)):
-        trigger = inner.left
-        response = inner.right.operand.operand
-        lines = ["At all times, the following rule holds:"]
-        lines.append(_bullet(f"When {_flat(trigger)} occurs:", depth + 1))
-        lines.append(_bullet(f"Starting from the next step, {_flat(response)} must eventually occur", depth + 2))
-        return "\n".join(lines)
-
-    # G(p -> X q)  -- immediate next-step response
-    if isinstance(inner, ltlnode.ImpliesNode) and isinstance(inner.right, ltlnode.NextNode):
-        trigger = inner.left
-        response = inner.right.operand
-        if _is_simple(trigger) and _is_simple(response):
-            return f"At all times, when {_flat(trigger)} holds, {_flat(response)} must hold in the next step."
-        lines = ["At all times, the following rule holds:"]
-        lines.append(_bullet(f"When {_flat(trigger)} occurs:", depth + 1))
-        lines.append(_bullet(f"{_flat(response)} must hold in the next step", depth + 2))
-        return "\n".join(lines)
-
-    # G(p -> (q U r))  -- chain precedence
-    if isinstance(inner, ltlnode.ImpliesNode) and isinstance(inner.right, ltlnode.UntilNode):
-        trigger = inner.left
-        held = inner.right.left
-        goal = inner.right.right
-        lines = ["At all times, the following rule holds:"]
-        lines.append(_bullet(f"When {_flat(trigger)} occurs:", depth + 1))
-        lines.append(_bullet(f"{_flat(held)} must hold until {_flat(goal)} occurs", depth + 2))
-        return "\n".join(lines)
-
-    # G(p -> (F q & F r))  -- chain response (conjunction of eventually)
-    if (isinstance(inner, ltlnode.ImpliesNode)
-            and isinstance(inner.right, ltlnode.AndNode)
-            and isinstance(inner.right.left, ltlnode.FinallyNode)
-            and isinstance(inner.right.right, ltlnode.FinallyNode)):
-        trigger = inner.left
-        resp_a = inner.right.left.operand
-        resp_b = inner.right.right.operand
-        lines = ["At all times, the following rule holds:"]
-        lines.append(_bullet(f"When {_flat(trigger)} occurs, two responses are required:", depth + 1))
-        lines.append(_bullet(f"{_flat(resp_a)} must eventually occur", depth + 2))
-        lines.append(_bullet(f"{_flat(resp_b)} must eventually occur", depth + 2))
-        return "\n".join(lines)
-
-    # G(p -> q)  -- generic implication under G
-    if isinstance(inner, ltlnode.ImpliesNode):
-        trigger = inner.left
-        consequence = inner.right
-        if _is_simple(trigger) and _is_simple(consequence):
-            return f"At all times, if {_flat(trigger)}, then {_flat(consequence)}."
-        lines = ["At all times, the following rule holds:"]
-        lines.append(_bullet(f"Condition: {_structured(trigger, depth + 1)}", depth + 1))
-        lines.append(_bullet(f"Response: {_structured(consequence, depth + 1)}", depth + 1))
-        return "\n".join(lines)
-
-    # G(F p)  -- recurrence / infinitely often
-    if isinstance(inner, ltlnode.FinallyNode):
-        return f"{_flat(inner.operand)} must occur infinitely often."
-
-    # G(!p)  -- never
-    if isinstance(inner, ltlnode.NotNode):
-        negated = inner.operand
-        if isinstance(negated, ltlnode.LiteralNode):
-            return f"'{negated.value}' must never hold."
-        return f"It is never the case that {_flat(negated)}."
-
-    # G(p & q) -- invariant conjunction
-    if isinstance(inner, ltlnode.AndNode):
-        lines = ["At all times, the following must hold simultaneously:"]
-        lines.append(_bullet(_structured(inner.left, depth + 1), depth + 1))
-        lines.append(_bullet(_structured(inner.right, depth + 1), depth + 1))
-        return "\n".join(lines)
-
-    # G(p | q) -- invariant disjunction
-    if isinstance(inner, ltlnode.OrNode):
-        lines = ["At all times, at least one of the following must hold:"]
-        lines.append(_bullet(_structured(inner.left, depth + 1), depth + 1))
-        lines.append(_bullet(_structured(inner.right, depth + 1), depth + 1))
-        return "\n".join(lines)
-
-    # Generic G(...)
-    inner_text = _structured(inner, depth + 1)
-    if "\n" in inner_text:
-        lines = ["At all times, the following holds:"]
-        for line in inner_text.splitlines():
-            lines.append(_bullet(line.lstrip(), depth + 1))
-        return "\n".join(lines)
-    return f"At all times, {inner_text}."
-
-
-# ---- Finally ----
-
-def _translate_finally(inner: ltlnode.LTLNode, depth: int) -> str:
-    # F(G(p -> F q))  -- eventual permanent rule
-    if (isinstance(inner, ltlnode.GloballyNode)
-            and isinstance(inner.operand, ltlnode.ImpliesNode)
-            and isinstance(inner.operand.right, ltlnode.FinallyNode)):
-        trigger = inner.operand.left
-        response = inner.operand.right.operand
-        lines = ["Eventually, a permanent rule takes effect:"]
-        lines.append(_bullet(
-            f"From that point on, whenever {_flat(trigger)} holds, "
-            f"{_flat(response)} must eventually follow.",
-            depth + 1))
-        return "\n".join(lines)
-
-    # F(G p)  -- persistence / stability
-    if isinstance(inner, ltlnode.GloballyNode):
-        g_inner = inner.operand
-        if _is_simple(g_inner):
-            return f"Eventually, {_flat(g_inner)} becomes permanently true."
-        lines = ["Eventually, a permanent state is reached:"]
-        for sub in _structured(g_inner, depth + 1).splitlines():
-            lines.append(_bullet(sub.lstrip(), depth + 1))
-        return "\n".join(lines)
-
-    # F(!p)
-    if isinstance(inner, ltlnode.NotNode) and isinstance(inner.operand, ltlnode.LiteralNode):
-        return f"Eventually, '{inner.operand.value}' will cease to hold."
-
-    # F(p & q)
-    if isinstance(inner, ltlnode.AndNode):
-        lines = ["Eventually, the following will all be true simultaneously:"]
-        lines.append(_bullet(_structured(inner.left, depth + 1), depth + 1))
-        lines.append(_bullet(_structured(inner.right, depth + 1), depth + 1))
-        return "\n".join(lines)
-
-    # Generic F(...)
-    inner_text = _structured(inner, depth + 1)
-    if "\n" in inner_text:
-        lines = ["Eventually, the following will hold:"]
-        for line in inner_text.splitlines():
-            lines.append(_bullet(line.lstrip(), depth + 1))
-        return "\n".join(lines)
-    return f"Eventually, {inner_text}."
-
-
-# ---- Next ----
-
-def _translate_next(steps: int, inner: ltlnode.LTLNode, depth: int) -> str:
-    timing = _steps_phrase(steps)
-
-    # X(F q) or XX...(F q)
-    if isinstance(inner, ltlnode.FinallyNode):
-        resp = inner.operand
-        return f"Starting from {timing}, {_flat(resp)} must eventually occur."
-
-    inner_text = _structured(inner, depth + 1)
-    if "\n" in inner_text:
-        lines = [f"In {timing}:"]
-        for line in inner_text.splitlines():
-            lines.append(_bullet(line.lstrip(), depth + 1))
-        return "\n".join(lines)
-    return f"In {timing}, {inner_text}."
-
-
-# ---- Until ----
-
-def _translate_until(node: ltlnode.UntilNode, depth: int) -> str:
-    left = node.left
-    right = node.right
-
-    # (G p) U (F q)
-    if isinstance(left, ltlnode.GloballyNode) and isinstance(right, ltlnode.FinallyNode):
-        l_eng = _flat(left.operand)
-        r_eng = _flat(right.operand)
-        lines = [f"{l_eng} must hold at all times."]
-        lines.append(f"This requirement continues until {r_eng} eventually occurs.")
-        return "\n".join(lines)
-
-    # (p U q) where both sides are simple
-    if _is_simple(left) and _is_simple(right):
-        return f"{_flat(left)} holds until {_flat(right)} occurs"
-
-    lines = ["The following 'until' relationship holds:"]
-    lines.append(_bullet(f"Maintained: {_structured(left, depth + 1)}", depth + 1))
-    lines.append(_bullet(f"Until: {_structured(right, depth + 1)}", depth + 1))
-    return "\n".join(lines)
-
-
-# ---- Implies ----
-
-def _translate_implies(node: ltlnode.ImpliesNode, depth: int) -> str:
-    left = node.left
-    right = node.right
-
-    # X^n p -> X^m q  -- aligned next chains
-    left_steps, left_core = _count_next_chain(left)
-    right_steps, right_core = _count_next_chain(right)
-    if left_steps >= 1 and right_steps >= 1:
-        lines = [f"If {_flat(left_core)} holds in {_steps_phrase(left_steps)}, then:"]
-        lines.append(_bullet(
-            f"{_flat(right_core)} must hold {_steps_phrase(right_steps)}",
-            depth + 1))
-        return "\n".join(lines)
-
-    if _is_simple(left) and _is_simple(right):
-        return f"If {_flat(left)}, then {_flat(right)}."
-
-    cond_text = _structured(left, depth + 1)
-    resp_text = _structured(right, depth + 1)
-
-    lines = []
-    if "\n" in cond_text:
-        lines.append("If the following condition holds:")
-        for l in cond_text.splitlines():
-            lines.append(_bullet(l.lstrip(), depth + 1))
-    else:
-        lines.append(f"If {cond_text}, then:")
-
-    if "\n" in resp_text:
-        for l in resp_text.splitlines():
-            lines.append(_bullet(l.lstrip(), depth + 1))
-    else:
-        lines.append(_bullet(resp_text, depth + 1))
-
-    return "\n".join(lines)
-
-
-# ---- Equivalence ----
-
-def _translate_equivalence(node: ltlnode.EquivalenceNode, depth: int) -> str:
-    left = node.left
-    right = node.right
-
-    if _is_simple(left) and _is_simple(right):
-        return f"{_flat(left)} if and only if {_flat(right)}."
-
-    lines = ["The following two statements are equivalent:"]
-    lines.append(_bullet(f"Statement A: {_structured(left, depth + 1)}", depth + 1))
-    lines.append(_bullet(f"Statement B: {_structured(right, depth + 1)}", depth + 1))
-    return "\n".join(lines)
-
-
-# ---- And ----
-
-def _translate_and(node: ltlnode.AndNode, depth: int) -> str:
-    left = node.left
-    right = node.right
-
-    if _is_simple(left) and _is_simple(right):
-        return f"{_flat(left)} and {_flat(right)}"
-
-    lines = ["All of the following must hold:"]
-    lines.append(_bullet(_structured(left, depth + 1), depth + 1))
-    lines.append(_bullet(_structured(right, depth + 1), depth + 1))
-    return "\n".join(lines)
-
-
-# ---- Or ----
-
-def _translate_or(node: ltlnode.OrNode, depth: int) -> str:
-    left = node.left
-    right = node.right
-
-    if _is_simple(left) and _is_simple(right):
-        return f"{_flat(left)} or {_flat(right)}"
-
-    lines = ["At least one of the following must hold:"]
-    lines.append(_bullet(_structured(left, depth + 1), depth + 1))
-    lines.append(_bullet(_structured(right, depth + 1), depth + 1))
-    return "\n".join(lines)
-
-
-# ---- Not ----
-
-def _translate_not(node: ltlnode.NotNode, depth: int) -> str:
-    inner = node.operand
-
-    if isinstance(inner, ltlnode.LiteralNode):
-        return f"not '{inner.value}'"
-
-    inner_text = _structured(inner, depth + 1)
-    if "\n" in inner_text:
-        lines = ["It is NOT the case that:"]
-        for line in inner_text.splitlines():
-            lines.append(_bullet(line.lstrip(), depth + 1))
-        return "\n".join(lines)
-    return f"It is not the case that {inner_text}."
-
-
-# ---------------------------------------------------------------------------
-# Public API
-# ---------------------------------------------------------------------------
-
-def translate(node: ltlnode.LTLNode) -> str:
-    """Translate an LTL AST node into structured English.
-
-    Simple formulas (temporal depth < 2) produce a single line.
-    Complex formulas produce indented, bullet-point explanations.
-    """
-    return _structured(node, depth=0)
diff --git a/test/test_translation_comparison.py b/test/test_translation_comparison.py
deleted file mode 100644
index 95c504b..0000000
--- a/test/test_translation_comparison.py
+++ /dev/null
@@ -1,306 +0,0 @@
-"""Side-by-side comparison of three LTL-to-English translation approaches.
-
-Run with:
-    python -m pytest test/test_translation_comparison.py -v -s
-
-The -s flag is important to see the printed comparison tables.
-"""
-
-import unittest
-import sys
-import os
-import random
-
-# Add the src directory to sys.path
-sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
-
-# Mock spot module to avoid import error
-from unittest.mock import MagicMock
-sys.modules['spot'] = MagicMock()
-
-from ltlnode import parse_ltl_string
-import ltltoeng
-import ltltoeng_structured as structured
-import ltltoeng_prose as prose
-
-
-# ---------------------------------------------------------------------------
-# Test formulas grouped by category
-# ---------------------------------------------------------------------------
-
-SIMPLE_FORMULAS = [
-    ("G p",         "Globally literal"),
-    ("F p",         "Finally literal"),
-    ("X p",         "Next literal"),
-    ("p U q",       "Until"),
-    ("p & q",       "And"),
-    ("p | q",       "Or"),
-    ("p -> q",      "Implies"),
-    ("!p",          "Not"),
-]
-
-RESPONSE_PATTERNS = [
-    ("G (p -> F q)",      "Response pattern"),
-    ("G (p -> X q)",      "Immediate response"),
-    ("G (p -> X(F q))",   "Bounded response"),
-    ("G(p -> (F q & F r))", "Chain response"),
-    ("G(p -> (q U r))",   "Chain precedence"),
-]
-
-RECURRENCE_PERSISTENCE = [
-    ("G (F p)",           "Recurrence (infinitely often)"),
-    ("F (G p)",           "Persistence (stability)"),
-    ("G !p",              "Never"),
-    ("!(F p)",            "Never (negated finally)"),
-]
-
-COMPLEX_NESTED = [
-    ("F(G(p -> F q))",    "Eventual stable response"),
-    ("G((p U q) -> F r)", "Until-triggered response"),
-    ("(G p) U (F q)",     "Globally until finally"),
-    ("(p U q) U r",       "Nested until"),
-    ("G(p -> X p)",       "Final state (persistence)"),
-]
-
-NEXT_CHAINS = [
-    ("X X X p",           "Triple next"),
-    ("(X p) -> (X X X q)", "Aligned next implication"),
-]
-
-PROPOSITIONAL = [
-    ("!!p",               "Double negation"),
-    ("!(p & q)",          "Negated conjunction (De Morgan)"),
-    ("!(p | q)",          "Negated disjunction (De Morgan)"),
-    ("!(p -> q)",         "Negated implication"),
-    ("!p & !q",           "And of negations"),
-    ("!p | !q",           "Or of negations"),
-    ("(p & q) -> r",      "Conjunction implies"),
-    ("p -> (q & r)",      "Implies conjunction"),
-    ("!p -> q",           "Negation implies (unless)"),
-]
-
-ALL_FORMULAS = (
-    SIMPLE_FORMULAS
-    + RESPONSE_PATTERNS
-    + RECURRENCE_PERSISTENCE
-    + COMPLEX_NESTED
-    + NEXT_CHAINS
-    + PROPOSITIONAL
-)
-
-
-# ---------------------------------------------------------------------------
-# Helper: translate with all three systems
-# ---------------------------------------------------------------------------
-
-def _translate_all(formula_str: str):
-    """Return (original, structured, prose) translations."""
-    random.seed(42)  # deterministic for original translator
-    node = parse_ltl_string(formula_str)
-
-    original = ltltoeng.finalize_sentence(node.__to_english__())
-    struct = structured.translate(parse_ltl_string(formula_str))
-    pro = prose.translate(parse_ltl_string(formula_str))
-
-    return original, struct, pro
-
-
-# ---------------------------------------------------------------------------
-# Comparison printer (runs as a test so it shows up with -s)
-# ---------------------------------------------------------------------------
-
-class TestTranslationComparison(unittest.TestCase):
-    """Print side-by-side translations for manual review."""
-
-    def setUp(self):
-        random.seed(42)
-
-    def test_print_comparison_table(self):
-        """Print all translations for visual comparison."""
-        print("\n" + "=" * 100)
-        print("LTL-TO-ENGLISH TRANSLATION COMPARISON")
-        print("=" * 100)
-
-        categories = [
-            ("SIMPLE", SIMPLE_FORMULAS),
-            ("RESPONSE PATTERNS", RESPONSE_PATTERNS),
-            ("RECURRENCE / PERSISTENCE", RECURRENCE_PERSISTENCE),
-            ("COMPLEX NESTED", COMPLEX_NESTED),
-            ("NEXT CHAINS", NEXT_CHAINS),
-            ("PROPOSITIONAL", PROPOSITIONAL),
-        ]
-
-        for cat_name, formulas in categories:
-            print(f"\n{'─' * 100}")
-            print(f"  {cat_name}")
-            print(f"{'─' * 100}")
-
-            for formula_str, description in formulas:
-                original, struct, pro = _translate_all(formula_str)
-
-                print(f"\n  Formula:    {formula_str}  ({description})")
-                print(f"  Original:   {original}")
-
-                # For structured output, indent multi-line
-                if "\n" in struct:
-                    lines = struct.split("\n")
-                    print(f"  Structured: {lines[0]}")
-                    for line in lines[1:]:
-                        print(f"              {line}")
-                else:
-                    print(f"  Structured: {struct}")
-
-                # For prose output, show multi-sentence flow
-                print(f"  Prose:      {pro}")
-
-        print("\n" + "=" * 100)
-
-
-# ---------------------------------------------------------------------------
-# Actual assertions
-# ---------------------------------------------------------------------------
-
-class TestStructuredBasicProperties(unittest.TestCase):
-    """Basic assertions for the structured translator."""
-
-    def setUp(self):
-        random.seed(42)
-
-    def test_simple_formulas_are_single_line(self):
-        """Simple formulas should not produce bullet points."""
-        for formula_str, _ in SIMPLE_FORMULAS:
-            node = parse_ltl_string(formula_str)
-            result = structured.translate(node)
-            self.assertNotIn("\n", result,
-                             f"Simple formula {formula_str} should be single-line, got:\n{result}")
-
-    def test_complex_formulas_use_bullets(self):
-        """Complex formulas should produce multi-line output with bullets."""
-        complex_formulas = [
-            "F(G(p -> F q))",
-            "G(p -> (F q & F r))",
-            "G(p -> X(F q))",
-        ]
-        for formula_str in complex_formulas:
-            node = parse_ltl_string(formula_str)
-            result = structured.translate(node)
-            self.assertIn("-", result,
-                          f"Complex formula {formula_str} should have bullets, got:\n{result}")
-
-    def test_response_pattern(self):
-        node = parse_ltl_string("G(p -> F q)")
-        result = structured.translate(node)
-        self.assertIn("'p'", result)
-        self.assertIn("'q'", result)
-        self.assertIn("eventually", result.lower())
-
-    def test_recurrence(self):
-        node = parse_ltl_string("G(F p)")
-        result = structured.translate(node)
-        self.assertIn("infinitely often", result.lower())
-
-    def test_persistence(self):
-        node = parse_ltl_string("F(G p)")
-        result = structured.translate(node)
-        self.assertIn("permanently", result.lower())
-
-    def test_never(self):
-        node = parse_ltl_string("G !p")
-        result = structured.translate(node)
-        self.assertIn("never", result.lower())
-
-    def test_eventual_stable_response(self):
-        node = parse_ltl_string("F(G(p -> F q))")
-        result = structured.translate(node)
-        self.assertIn("permanent", result.lower())
-        self.assertIn("'p'", result)
-        self.assertIn("'q'", result)
-
-
-class TestProseBasicProperties(unittest.TestCase):
-    """Basic assertions for the prose translator."""
-
-    def setUp(self):
-        random.seed(42)
-
-    def test_all_translations_end_with_period(self):
-        """Every translation should end with a period."""
-        for formula_str, _ in ALL_FORMULAS:
-            node = parse_ltl_string(formula_str)
-            result = prose.translate(node)
-            self.assertTrue(result.endswith("."),
-                            f"Translation of {formula_str} should end with period: {result}")
-
-    def test_all_translations_start_capitalized_or_quoted(self):
-        """Every translation should start with a capital letter or a quote."""
-        for formula_str, _ in ALL_FORMULAS:
-            node = parse_ltl_string(formula_str)
-            result = prose.translate(node)
-            self.assertTrue(
-                result[0].isupper() or result[0] == "'",
-                f"Translation of {formula_str} should start capitalized: {result}")
-
-    def test_response_pattern(self):
-        node = parse_ltl_string("G(p -> F q)")
-        result = prose.translate(node)
-        self.assertIn("whenever", result.lower())
-        self.assertIn("eventually", result.lower())
-
-    def test_recurrence(self):
-        node = parse_ltl_string("G(F p)")
-        result = prose.translate(node)
-        self.assertIn("infinitely often", result.lower())
-
-    def test_persistence(self):
-        node = parse_ltl_string("F(G p)")
-        result = prose.translate(node)
-        self.assertIn("forever", result.lower())
-
-    def test_never(self):
-        node = parse_ltl_string("G !p")
-        result = prose.translate(node)
-        self.assertIn("never", result.lower())
-
-    def test_multi_sentence_for_complex(self):
-        """Complex formulas should produce multiple sentences."""
-        node = parse_ltl_string("F(G(p -> F q))")
-        result = prose.translate(node)
-        # Count sentences (periods not at end)
-        sentences = [s.strip() for s in result.split(".") if s.strip()]
-        self.assertGreaterEqual(len(sentences), 2,
-                                f"Expected multi-sentence for F(G(p->Fq)), got: {result}")
-
-    def test_de_morgan_neither_nor(self):
-        node = parse_ltl_string("!(p | q)")
-        result = prose.translate(node)
-        self.assertIn("neither", result.lower())
-        self.assertIn("nor", result.lower())
-
-    def test_de_morgan_not_both(self):
-        node = parse_ltl_string("!(p & q)")
-        result = prose.translate(node)
-        self.assertIn("not both", result.lower())
-
-    def test_persistence_same_literal(self):
-        node = parse_ltl_string("G(p -> X p)")
-        result = prose.translate(node)
-        self.assertIn("once", result.lower())
-        self.assertIn("forever", result.lower())
-
-    def test_until_triggered_response(self):
-        """G((p U q) -> F r) should mention until and eventually."""
-        node = parse_ltl_string("G((p U q) -> F r)")
-        result = prose.translate(node)
-        self.assertIn("until", result.lower())
-        self.assertIn("eventually", result.lower())
-
-    def test_globally_until_finally(self):
-        """(G p) U (F q) should produce multi-sentence."""
-        node = parse_ltl_string("(G p) U (F q)")
-        result = prose.translate(node)
-        sentences = [s.strip() for s in result.split(".") if s.strip()]
-        self.assertGreaterEqual(len(sentences), 2)
-
-
-if __name__ == "__main__":
-    unittest.main()

From 383132b774a91a6b74cf5f5e6406b1b9400959d1 Mon Sep 17 00:00:00 2001
From: Siddhartha Prasad <siddhartha_prasad@brown.edu>
Date: Sun, 22 Mar 2026 11:26:58 -0400
Subject: [PATCH 7/7] Switch all call sites from old ltltoeng to ltltoeng_prose
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

app.py, stepper.py, and exercisebuilder.py all called
ltltoeng.finalize_sentence(node.__to_english__()) directly. Now they
use ltltoeng_prose.translate(node) for consistent, multi-sentence
English output.

The old ltltoeng module is now only used internally by ltlnode.py as
the pattern-matching engine for __to_english__() — no external callers
remain.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/app.py             | 6 +++---
 src/exercisebuilder.py | 1 -
 src/stepper.py         | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/app.py b/src/app.py
index 5114e84..0d7b97d 100644
--- a/src/app.py
+++ b/src/app.py
@@ -22,7 +22,7 @@
 import uuid
 import requests
 from stepper import traceSatisfactionPerStep, getTraceRenderData
-import ltltoeng
+import ltltoeng_prose
 from authroutes import (
     authroutes,
     init_app,
@@ -412,7 +412,7 @@ def ltl_to_english():
         return jsonify({"error": "Missing required query parameter 'formula'."}), 400
     try:
         node = parse_ltl_string(formula)
-        english = ltltoeng.finalize_sentence(node.__to_english__())
+        english = ltltoeng_prose.translate(node)
     except Exception as e:
         return jsonify({"error": "Failed to translate formula.", "details": str(e)}), 400
 
@@ -516,7 +516,7 @@ def ltl_to_english_ui():
         else:
             try:
                 node = parse_ltl_string(input_formula)
-                translation = ltltoeng.finalize_sentence(node.__to_english__())
+                translation = ltltoeng_prose.translate(node)
             except Exception as e:
                 error = f"Failed to translate formula: {e}"
 
diff --git a/src/exercisebuilder.py b/src/exercisebuilder.py
index b7f85ea..82786ca 100644
--- a/src/exercisebuilder.py
+++ b/src/exercisebuilder.py
@@ -7,7 +7,6 @@
 import random
 import re
 import math
-import ltltoeng
 import ltltoeng_prose
 import ltltoeng_contextualized
 from syntacticmutator import applyRandomMutationNotEquivalentTo
diff --git a/src/stepper.py b/src/stepper.py
index bb17e2a..295876b 100644
--- a/src/stepper.py
+++ b/src/stepper.py
@@ -1,5 +1,5 @@
 from ltlnode import UnaryOperatorNode, BinaryOperatorNode, LiteralNode, parse_ltl_string
-import ltltoeng
+import ltltoeng_prose
 from spotutils import is_trace_satisfied
 import re
 import random
@@ -28,7 +28,7 @@ def getLTLFormulaAsString(node, syntax):
     elif syntax == "English":
         ## We should hopefully never get here. However, 
         ## I'm adding it here to suggest a way forward.
-        return ltltoeng.finalize_sentence(node.__to_english__())
+        return ltltoeng_prose.translate(node)
 
     ## Default to classic syntax
     return str(node)