General formating updates; no major changes from the version discussed with alex

BigBigboss02 · BigBigboss02 · commit bf99b4506754 · 2025-02-19T11:22:39.000Z
diff --git a/app/compare_text_lists.py b/app/compare_text_lists.py
@@ -0,0 +1,43 @@
+def process_list(input_list):
+    """
+    Detects if the input is a list, and if any element in the list contains semicolons,
+    it splits that element into multiple elements.
+
+    Args:
+        input_list (list): A list of strings.
+
+    Returns:
+        list: A processed list where semicolon-separated elements are split into separate elements.
+    """
+    if not isinstance(input_list, list):
+        raise ValueError("Input must be a list of strings.")
+
+    processed_list = []
+    for item in input_list:
+        if not isinstance(item, str):
+            raise ValueError("All elements in the input list must be strings.")
+
+        # Split by semicolon if present, otherwise keep the original item
+        processed_list.extend(item.split(';') if ';' in item else [item])
+
+    return processed_list
+def test_process_list():
+    """
+    Unit tests for process_list function.
+    """
+    test_cases = [
+        (["apple", "banana;orange", "grape"], ["apple", "banana", "orange", "grape"]),
+        (["one;two;three", "four", "five"], ["one", "two", "three", "four", "five"]),
+        (["alpha;beta", "gamma;delta;epsilon"], ["alpha", "beta", "gamma", "delta", "epsilon"]),
+        (["no_separator"], ["no_separator"]),
+        ([], []),
+        (["single"], ["single"]),
+    ]
+
+    for i, (input_list, expected_output) in enumerate(test_cases):
+        assert process_list(input_list) == expected_output, f"Test case {i+1} failed"
+
+    print("All test cases passed!")
+
+# Run the tests
+test_process_list()
diff --git a/app/evaluation.py b/app/evaluation.py
@@ -7,7 +7,7 @@
 from langchain.prompts import PromptTemplate
 from dotenv import load_dotenv
 
-class Config:
+class Param:
     def __init__(self, mode='gpt', llama_version='3_1_8B', temperature=0.01, max_new_token=5):
         load_dotenv()
 
@@ -21,22 +21,46 @@ def __init__(self, mode='gpt', llama_version='3_1_8B', temperature=0.01, max_new
 
         self.response_num_required = 0 #initialise it with 0
 
-def setup_llm(config):
+def compareTextLists(input_list):
+    """
+    Detects if the input is a list, and if any element in the list contains semicolons,
+    it splits that element into multiple elements.
+
+    Args:
+        input_list (list): A list of strings.
+
+    Returns:
+        list: A processed list where semicolon-separated elements are split into separate elements.
+    """
+    if not isinstance(input_list, list):
+        raise ValueError("Input must be a list of strings.")
+
+    processed_list = []
+    for item in input_list:
+        if not isinstance(item, str):
+            raise ValueError("All elements in the input list must be strings.")
+
+        # Split by semicolon if present, otherwise keep the original item
+        processed_list.extend(item.split(';') if ';' in item else [item])
+
+    return processed_list
+
+def setup_llm(param):
     """Initialize the LLM model (GPT-4o or LLaMA 3) based on the given configuration."""
-    if config.mode == 'gpt':
+    if param.mode == 'gpt':
         return ChatOpenAI(
             model="gpt-4o-mini",
-            temperature=config.temperature,
-            max_tokens=config.max_new_token,
-            openai_api_key=config.openai_api_key
+            temperature=param.temperature,
+            max_tokens=param.max_new_token,
+            openai_api_key=param.openai_api_key
         )
-    elif config.mode == 'llama3':
+    elif param.mode == 'llama3':
         from langchain_huggingface import HuggingFaceEndpoint
         return HuggingFaceEndpoint(
-            endpoint_url=config.endpoint_3_1_8B,
-            max_new_tokens=config.max_new_token,
-            temperature=config.temperature,
-            huggingfacehub_api_token=config.huggingfacehub_api_token
+            endpoint_url=param.endpoint_3_1_8B,
+            max_new_tokens=param.max_new_token,
+            temperature=param.temperature,
+            huggingfacehub_api_token=param.huggingfacehub_api_token
         )
 
 
@@ -74,16 +98,26 @@ def recursive_evaluation(responses, answers, chain, parser):
     
     return all(results), matched_pairs, unmatched_responses
 
-def evaluation_function(response, answer, config=None):
+def evaluation_function(response, answer, param=None):
     """Evaluates the given response against the answer using LLaMA 3 or GPT-4o."""
+
+
+
+
+    #split the response and answer into lists with semicolons
+    response = compareTextLists(response)
+
+
+
+
     start_time = time.process_time()
     
-    # Ensure config is provided
-    if config is None:
-        config = Config()
+    # Ensure param is provided
+    if param is None:
+        param = Param()
     
     # Initialize LLM
-    llm = setup_llm(config)
+    llm = setup_llm(param)
     
     # Define prompt template
     prompt_template = PromptTemplate(
@@ -143,7 +177,7 @@ def evaluation_function(response, answer, config=None):
     
     is_correct, correct_answers, incorrect_answers = recursive_evaluation(response, answer, chain, parser)
     #check if student is inputting enough answers
-    if len(response) < config.response_num_required:
+    if len(response) < param.response_num_required:
         is_correct = False
     
     return {
@@ -159,7 +193,7 @@ def evaluation_function(response, answer, config=None):
 
 # Example Usage
 if __name__ == "__main__":
-    custom_config = Config()
+    custom_config = Param()
     print(evaluation_function(
         ["speed"], #response
         ["velocity"], #answer
diff --git a/app/evaluation_tests.py b/app/evaluation_tests.py
@@ -1,5 +1,5 @@
 import unittest
-from evaluation import evaluation_function, Config
+from evaluation import evaluation_function, Param
 
 class TestEvaluationFunction(unittest.TestCase):
     """
@@ -16,22 +16,22 @@ class TestEvaluationFunction(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        """Initialize a shared Config instance for LLM setup."""
-        cls.config = Config()
+        """Initialize a shared Param instance for LLM setup."""
+        cls.param = Param()
 
     def test_basic_correct_response(self):
         """Test if semantically similar responses are marked correct."""
         response = ["Density", "Velocity", "Viscosity", "Length"]
         answer = ["Density", "Velocity", "Viscosity", "Length"]
-        result = evaluation_function(response, answer, self.config)
+        result = evaluation_function(response, answer, self.param)
 
         self.assertTrue(result.get("is_correct"))
 
     def test_basic_incorrect_response(self):
         """Test if semantically different responses are marked incorrect."""
         response = ["Mass", "Speed", "Friction", "Force"]
         answer = ["Density", "Velocity", "Viscosity", "Length"]
-        result = evaluation_function(response, answer, self.config)
+        result = evaluation_function(response, answer, self.param)
 
         self.assertFalse(result.get("is_correct"))
 
@@ -40,9 +40,9 @@ def test_partial_match(self):
         response = ["Density", "Velocity", "Viscosity"]
         answer = ["Density", "Velocity", "Viscosity", "Length"]
 
-        self.config.response_num_required = 4
-        result = evaluation_function(response, answer, self.config)
-        self.config.response_num_required = 0
+        self.param.response_num_required = 4
+        result = evaluation_function(response, answer, self.param)
+        self.param.response_num_required = 0
         
         self.assertFalse(result.get("is_correct"))
 
@@ -51,7 +51,7 @@ def test_synonyms_match(self):
         """Test if abbriviations are correctly identified."""
         response = ['velocity']
         answer = ['speed']
-        result = evaluation_function(response, answer, self.config)
+        result = evaluation_function(response, answer, self.param)
 
         self.assertTrue(result.get("is_correct"))
 
@@ -60,15 +60,15 @@ def test_exact_match_requirement(self):
         response = ["density", "speed", "viscosity", "length"]
         answer = ["Density", "Velocity", "Viscosity", "Length"]
 
-        result = evaluation_function(response, answer, self.config)
+        result = evaluation_function(response, answer, self.param)
         self.assertTrue(result.get("is_correct"))
 
     def test_should_not_contain(self):
         """Test if a response with a prohibited keyword fails."""
         response = ["density", "velocity", "viscosity", "length", "direction"]
         answer = ["Density", "Velocity", "Viscosity", "Length"]
 
-        result = evaluation_function(response, answer, self.config)
+        result = evaluation_function(response, answer, self.param)
         self.assertFalse(result.get("is_correct"))
 
 
@@ -77,7 +77,7 @@ def test_negation_handling(self):
         response = ["not light blue", "dark blue"]
         answer = ["light blue"]
 
-        result = evaluation_function(response, answer, self.config)
+        result = evaluation_function(response, answer, self.param)
 
         self.assertFalse(result.get("is_correct"))
 
@@ -86,7 +86,7 @@ def test_performance(self):
         response = ["Density", "Velocity", "Viscosity", "Length"]
         answer = ["Density", "Velocity", "Viscosity", "Length"]
 
-        result = evaluation_function(response, answer, self.config)
+        result = evaluation_function(response, answer, self.param)
         processing_time = result.get("result", {}).get("processing_time", 0)
 
         self.assertLess(processing_time, 5, msg="Evaluation function should run efficiently.")