brownplt · sidprasad · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/src/app.py b/src/app.py
@@ -22,7 +22,7 @@
 import uuid
 import requests
 from stepper import traceSatisfactionPerStep, getTraceRenderData
-import ltltoeng
+import ltltoeng_prose
 from authroutes import (
     authroutes,
     init_app,
@@ -412,7 +412,7 @@ def ltl_to_english():
         return jsonify({"error": "Missing required query parameter 'formula'."}), 400
     try:
         node = parse_ltl_string(formula)
-        english = ltltoeng.finalize_sentence(node.__to_english__())
+        english = ltltoeng_prose.translate(node)
     except Exception as e:
         return jsonify({"error": "Failed to translate formula.", "details": str(e)}), 400
 
@@ -516,7 +516,7 @@ def ltl_to_english_ui():
         else:
             try:
                 node = parse_ltl_string(input_formula)
-                translation = ltltoeng.finalize_sentence(node.__to_english__())
+                translation = ltltoeng_prose.translate(node)
             except Exception as e:
                 error = f"Failed to translate formula: {e}"
 
@@ -854,9 +854,11 @@ def loganswer(questiontype):
                 return {"error": "submission_limit", "message": "This exercise only allows one submission."}, 403
 
 
+    translation_mode = data.get('translation_mode', '')
     answer_logger.logStudentResponse(userId = userId, misconceptions = misconceptions, question_text = question_text,
                                       question_options = question_options, correct_answer = isCorrect,
-                                      questiontype=questiontype, mp_class = mp_class, exercise = exercise, course = courseId)
+                                      questiontype=questiontype, mp_class = mp_class, exercise = exercise, course = courseId,
+                                      translation_mode = translation_mode)
 
 
     if questiontype == "english_to_ltl":

diff --git a/src/exercisebuilder.py b/src/exercisebuilder.py
@@ -7,7 +7,8 @@
 import random
 import re
 import math
-import ltltoeng
+import ltltoeng_prose
+import ltltoeng_contextualized
 from syntacticmutator import applyRandomMutationNotEquivalentTo
 
 
@@ -411,16 +412,33 @@ def contains_undersirable_lit(s):
 
         ## First generate a large pool from spot randltl
         pool_size = 2*num_questions
-        question_answers = spotutils.gen_rand_ltl(atoms = literals, 
-                                                  tree_size = tree_size, 
-                                                  ltl_priorities = self.ltl_priorities, 
+        question_answers = spotutils.gen_rand_ltl(atoms = literals,
+                                                  tree_size = tree_size,
+                                                  ltl_priorities = self.ltl_priorities,
                                                   num_formulae = pool_size)
-        
+
         ## Augment with template-generated formulas for pattern-specific misconceptions
         ## This helps ensure we get formulas that can actually be mutated with these misconceptions
         template_formulas = self.generate_template_formulas(literals, num_templates=max(1, num_questions // 4))
         question_answers.extend(template_formulas)
-
+
+        ## A/B test: lazily generate a pool with r,g,b literals for contextualized questions
+        CONTEXTUALIZED_LITERALS = list(ltltoeng_contextualized.THEMES["lights"].literals.keys())
+        ctx_iter = None
+        def _get_ctx_iter():
+            nonlocal ctx_iter
+            if ctx_iter is None:
+                ctx_pool = spotutils.gen_rand_ltl(atoms = CONTEXTUALIZED_LITERALS,
+                                                  tree_size = tree_size,
+                                                  ltl_priorities = self.ltl_priorities,
+                                                  num_formulae = pool_size)
+                ctx_templates = self.generate_template_formulas(CONTEXTUALIZED_LITERALS, num_templates=max(1, num_questions // 4))
+                ctx_pool.extend(ctx_templates)
+                ctx_pool = [a for a in ctx_pool if not contains_undersirable_lit(a)]
+                random.shuffle(ctx_pool)
+                ctx_iter = iter(ctx_pool)
+            return ctx_iter
+
 
         def formula_choice_metric(formula):
 
@@ -447,7 +465,15 @@ def formula_choice_metric(formula):
             if kind == self.TRACESATMC:
                 question = self.build_tracesat_mc_question(answer)
             elif kind == self.ENGLISHTOLTL:
-                question = self.build_english_to_ltl_question(answer)
+                # A/B test: 50/50 abstract vs. contextualized
+                if random.random() < 0.5:
+                    ctx_answer = next(_get_ctx_iter(), None)
+                    if ctx_answer is not None:
+                        question = self.build_english_to_ltl_question(ctx_answer, contextualized=True)
+                    else:
+                        question = self.build_english_to_ltl_question(answer)
+                else:
+                    question = self.build_english_to_ltl_question(answer)
             elif kind == self.TRACESATYN:
                 question = self.build_tracesat_yn_question(answer)
 
@@ -490,16 +516,25 @@ def formula_choice_metric(formula):
 
 
     def gen_nl_question(self, formula):
+        as_node = ltlnode.parse_ltl_string(formula)
+        result = ltltoeng_prose.translate(as_node)
+        if not result or result.strip() == "":
+            return None
+        return result
 
+
+    def gen_nl_question_contextualized(self, formula):
+        """Generate a contextualized English question using the lights theme.
+
+        Expects the formula to already use r,g,b literals (matching the theme).
+        Returns None if translation fails.
+        """
+        LIGHTS_THEME = ltltoeng_contextualized.THEMES["lights"]
         as_node = ltlnode.parse_ltl_string(formula)
-        formula_eng = as_node.__to_english__()
-        if formula_eng is None or formula_eng == "":
+        result = ltltoeng_contextualized.translate(as_node, LIGHTS_THEME)
+        if not result or result.strip() == "":
             return None
-
-        formula_eng_corrected = ltltoeng.correct_grammar(formula_eng)
-        ### If there are multiple '.' in a row, replace with a single '.'
-        formula_eng_corrected = re.sub(r'\.{2,}', '.', formula_eng_corrected)
-        return ltltoeng.finalize_sentence(formula_eng_corrected)
+        return result
 
 
     def get_options_with_misconceptions_as_formula(self, answer):
@@ -547,13 +582,22 @@ def get_options_with_misconceptions_as_formula(self, answer):
 
         return merged_options
 
-    def build_english_to_ltl_question(self, answer):
-        
+    def build_english_to_ltl_question(self, answer, contextualized=False):
+
         options = self.get_options_with_misconceptions_as_formula(answer)
         if options is None:
             return None
 
-        question = self.gen_nl_question(answer)
+        if contextualized:
+            question = self.gen_nl_question_contextualized(answer)
+            translation_mode = "contextualized"
+            # Fall back to abstract if contextualized translation fails
+            if question is None or question == "":
+                question = self.gen_nl_question(answer)
+                translation_mode = "abstract"
+        else:
+            question = self.gen_nl_question(answer)
+            translation_mode = "abstract"
 
         if question is None or question == "":
             print("Question generation failed unexpectedly.")
@@ -562,7 +606,8 @@ def build_english_to_ltl_question(self, answer):
         return {
             "question": question,
             "type": self.ENGLISHTOLTL,
-            "options": options
+            "options": options,
+            "translation_mode": translation_mode
         }
 
     def build_tracesat_mc_question(self, answer):

diff --git a/src/logger.py b/src/logger.py
@@ -52,6 +52,7 @@ class StudentResponse(Base):
     mp_class = Column(String)
     exercise = Column(String)
     course = Column(String, default="")
+    translation_mode = Column(String, default="")
 
 
 class GeneratedExercise(Base):
@@ -114,25 +115,25 @@ def __init__(self):
         if SENTENCE_PAIR_RATING_TABLE not in self.inspector.get_table_names():
             Base.metadata.tables[SENTENCE_PAIR_RATING_TABLE].create(self.engine)
 
-    def record(self, log):
-        with self.Session() as session:
-            print("Recording log")
-            session.add(log)
-            session.commit()
-
-    def getRatedEnglishFormulas(self, user_id):
-        """Return a set of LTL formulas this user has already rated."""
-        if not isinstance(user_id, str):
-            raise ValueError("user_id should be a string")
-
-        with self.Session() as session:
-            rows = session.query(EnglishLTLRating.ltl).filter(
-                EnglishLTLRating.user_id == user_id
-            ).all()
-            return {row.ltl for row in rows if row.ltl}
+    def record(self, log):
+        with self.Session() as session:
+            print("Recording log")
+            session.add(log)
+            session.commit()
+
+    def getRatedEnglishFormulas(self, user_id):
+        """Return a set of LTL formulas this user has already rated."""
+        if not isinstance(user_id, str):
+            raise ValueError("user_id should be a string")
+
+        with self.Session() as session:
+            rows = session.query(EnglishLTLRating.ltl).filter(
+                EnglishLTLRating.user_id == user_id
+            ).all()
+            return {row.ltl for row in rows if row.ltl}
 
 
-    def logStudentResponse(self, userId, misconceptions, question_text, question_options, correct_answer, questiontype, mp_class, exercise, course):
+    def logStudentResponse(self, userId, misconceptions, question_text, question_options, correct_answer, questiontype, mp_class, exercise, course, translation_mode=""):
 
         if not isinstance(userId, str):
             raise ValueError("userId should be a string")
@@ -154,9 +155,9 @@ def logStudentResponse(self, userId, misconceptions, question_text, question_opt
 
         ## We still want to log the response if there are no misconceptions
         if misconceptions == None or len(misconceptions) == 0:
-            log = StudentResponse(user_id=userId, timestamp=datetime.datetime.now(), 
+            log = StudentResponse(user_id=userId, timestamp=datetime.datetime.now(),
                                   misconception="", question_text=question_text, question_options=question_options, correct_answer=correct_answer,
-                                  question_type=questiontype, mp_class=mp_class, exercise=exercise, course=course)
+                                  question_type=questiontype, mp_class=mp_class, exercise=exercise, course=course, translation_mode=translation_mode)
             self.record(log)
 
 
@@ -165,9 +166,9 @@ def logStudentResponse(self, userId, misconceptions, question_text, question_opt
             if not isinstance(misconception, str):
                 raise ValueError("misconception should be a string")
 
-            log = StudentResponse(user_id=userId, timestamp=datetime.datetime.now(), 
+            log = StudentResponse(user_id=userId, timestamp=datetime.datetime.now(),
                                   misconception=misconception, question_text=question_text, question_options=question_options, correct_answer=correct_answer,
-                                  question_type=questiontype, mp_class=mp_class, exercise=exercise, course=course)
+                                  question_type=questiontype, mp_class=mp_class, exercise=exercise, course=course, translation_mode=translation_mode)
             self.record(log)