Merge pull request #73 from lambda-feedback/tr102-rtol-slow

KarlLundengaard · web-flow · commit 7a9c098dce9d · 2023-09-06T00:03:49.000Z
Fixed similarly to compareExpressions
diff --git a/app/evaluation.py b/app/evaluation.py
@@ -145,31 +145,48 @@ def check_equality(response, answer, params, eval_response) -> dict:
         eval_response.is_correct = ((res.args[0]-res.args[1])/(ans.args[0]-ans.args[1])).simplify().is_constant()
         return eval_response
 
-    error_below_atol = False
-    error_below_rtol = False
-
-    if params.get("numerical", False) or params.get("rtol", False) or params.get("atol", False):
-        # REMARK: 'pi' should be a reserve symbols but is sometimes not treated as one, possibly because of input symbols
-        # The two lines below this comments fixes the issue but a more robust solution should be found for cases where there
-        # are other reserved symbols.
-        ans = ans.subs(Symbol('pi'), float(pi))
-        res = res.subs(Symbol('pi'), float(pi))
-        if res.is_constant() and ans.is_constant():
+    is_correct = bool((res - ans).simplify() == 0)
+    eval_response.is_correct = is_correct
+
+    error_below_atol = None
+    error_below_rtol = None
+
+    if eval_response.is_correct is False:
+        if params.get("numerical", False) or params.get("rtol", False) or params.get("atol", False):
+            # REMARK: 'pi' should be a reserved symbol but it is sometimes not treated as one, possibly because of input symbols.
+            # The two lines below this comments fixes the issue but a more robust solution should be found for cases where there
+            # are other reserved symbols.
+            def replace_pi(expr):
+                pi_symbol = pi
+                for s in expr.free_symbols:
+                    if str(s) == 'pi':
+                        pi_symbol = s
+                return expr.subs(pi_symbol, float(pi))
+            ans = replace_pi(ans)
+            res = replace_pi(res)
             if "atol" in params.keys():
-                error_below_atol = bool(abs(float(ans-res)) < float(params["atol"]))
+                try:
+                    absolute_error = abs(float(ans-res))
+                    error_below_atol = bool(absolute_error < float(params["atol"]))
+                except TypeError:
+                    error_below_atol = None
             else:
                 error_below_atol = True
             if "rtol" in params.keys():
-                rtol = float(params["rtol"])
-                error_below_rtol = bool(float(abs(((ans-res)/ans).simplify())) < rtol)
+                try:
+                    relative_error = abs(float((ans-res)/ans))
+                    error_below_rtol = bool(relative_error < float(params["rtol"]))
+                except TypeError:
+                    error_below_rtol = None
             else:
                 error_below_rtol = True
-        if error_below_atol and error_below_rtol:
-            eval_response.is_correct = True
-            tag = "WITHIN_TOLERANCE"
-            eval_response.add_feedback((tag, symbolic_equal_internal_messages[tag]))
-            return eval_response
+            if error_below_atol is None or error_below_rtol is None:
+                eval_response.is_correct = False
+                tag = "NOT_NUMERICAL"
+                eval_response.add_feedback((tag, symbolic_equal_internal_messages[tag]))
+            elif error_below_atol is True and error_below_rtol is True:
+                eval_response.is_correct = True
+                tag = "WITHIN_TOLERANCE"
+                eval_response.add_feedback((tag, symbolic_equal_internal_messages[tag]))
 
-    is_correct = bool((res - ans).simplify() == 0)
-    eval_response.is_correct = is_correct
     return eval_response
diff --git a/app/evaluation_tests.py b/app/evaluation_tests.py
@@ -548,71 +548,81 @@ def test_empty_input_symbols_codes_and_alternatives(self):
         assert result["is_correct"] is True
 
     @pytest.mark.parametrize(
-        "description,response,answer,tolerance,outcome",
+        "description,response,answer,tolerance,tags,outcome",
         [
             (
                 "Correct response, tolerance specified with atol",
                 "6.73",
                 "sqrt(3)+5",
                 {"atol": 0.005},
+                ["WITHIN_TOLERANCE"],
                 True
             ),
             (
                 "Incorrect response, tolerance specified with atol",
                 "6.7",
                 "sqrt(3)+5",
                 {"atol": 0.005},
+                [],
                 False
             ),
             (
                 "Correct response, tolerance specified with rtol",
                 "6.73",
                 "sqrt(3)+5",
                 {"rtol": 0.0005},
+                ["WITHIN_TOLERANCE"],
                 True
             ),
             (
                 "Incorrect response, tolerance specified with rtol",
                 "6.7",
                 "sqrt(3)+5",
                 {"rtol": 0.0005},
+                [],
                 False
             ),
             (
                 "Response is not constant, tolerance specified with atol",
                 "6.7+x",
                 "sqrt(3)+5",
                 {"atol": 0.005},
+                ["NOT_NUMERICAL"],
                 False
             ),
             (
                 "Answer is not constant, tolerance specified with atol",
                 "6.73",
                 "sqrt(3)+x",
                 {"atol": 0.005},
+                ["NOT_NUMERICAL"],
                 False
             ),
             (
                 "Response is not constant, tolerance specified with rtol",
                 "6.7+x",
                 "sqrt(3)+5",
                 {"rtol": 0.0005},
+                ["NOT_NUMERICAL"],
                 False
             ),
             (
                 "Answer is not constant, tolerance specified with rtol",
                 "6.73",
                 "sqrt(3)+x",
                 {"rtol": 0.0005},
+                ["NOT_NUMERICAL"],
                 False
             ),
         ]
     )
-    def test_numerical_comparison(self, description, response, answer, tolerance, outcome):
+    def test_numerical_comparison_problem(self, description, response, answer, tolerance, tags, outcome):
         params = {"numerical": True}
         params.update(tolerance)
-        result = evaluation_function(response, answer, params)
+        result = evaluation_function(response, answer, params, include_test_data=True)
         assert result["is_correct"] is outcome
+        for tag in tags:
+            tag in result["tags"]
 
     def test_warning_inappropriate_symbol(self):
         answer = 'factorial(2**4)'
diff --git a/app/feedback/symbolic_equal.py b/app/feedback/symbolic_equal.py
@@ -11,5 +11,5 @@
     "EXPRESSION_NOT_EQUALITY": "The response was an expression but was expected to be an equality.",
     "EQUALITY_NOT_EXPRESSION": "The response was an equality but was expected to be an expression.",
     "WITHIN_TOLERANCE": "",  # "The difference between the response the answer is within specified error tolerance.",
-    "SYMBOLICALLY_EQUAL": "The response and answer are symbolically equal.",
+    "NOT_NUMERICAL": "", #"The expression cannot be evaluated numerically.",
 }

Original file line number	Diff line number	Diff line change
`@@ -11,5 +11,5 @@`
`11`	`11`	`"EXPRESSION_NOT_EQUALITY": "The response was an expression but was expected to be an equality.",`
`12`	`12`	`"EQUALITY_NOT_EXPRESSION": "The response was an equality but was expected to be an expression.",`
`13`	`13`	`"WITHIN_TOLERANCE": "", # "The difference between the response the answer is within specified error tolerance.",`
`14`		`- "SYMBOLICALLY_EQUAL": "The response and answer are symbolically equal.",`
	`14`	`+ "NOT_NUMERICAL": "", #"The expression cannot be evaluated numerically.",`
`15`	`15`	`}`