Skip to content

Commit cd8b4d3

Browse files
committed
input parser added
1 parent 4fbec4c commit cd8b4d3

File tree

2 files changed

+49
-54
lines changed

2 files changed

+49
-54
lines changed

app/compare_text_lists.py

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,41 @@
1-
def process_list(input_list):
1+
def parse_input(input_data):
22
"""
3-
Detects if the input is a list, and if any element in the list contains semicolons,
4-
it splits that element into multiple elements.
5-
3+
Parses input data, handling both semicolon-separated strings and nested list structures.
4+
65
Args:
7-
input_list (list): A list of strings.
8-
6+
input_data (str or list): A semicolon-separated string or a nested list.
7+
98
Returns:
10-
list: A processed list where semicolon-separated elements are split into separate elements.
11-
"""
12-
if not isinstance(input_list, list):
13-
raise ValueError("Input must be a list of strings.")
14-
15-
processed_list = []
16-
for item in input_list:
17-
if not isinstance(item, str):
18-
raise ValueError("All elements in the input list must be strings.")
19-
20-
# Split by semicolon if present, otherwise keep the original item
21-
processed_list.extend(item.split(';') if ';' in item else [item])
22-
23-
return processed_list
24-
def test_process_list():
25-
"""
26-
Unit tests for process_list function.
9+
list: A processed list of elements.
2710
"""
11+
if isinstance(input_data, str):
12+
if input_data == "":
13+
return []
14+
return [item for item in input_data.split(';') if item]
15+
elif isinstance(input_data, list) and all(isinstance(sublist, list) for sublist in input_data):
16+
return [item for sublist in input_data for item in sublist]
17+
else:
18+
raise ValueError("Input must be either a semicolon-separated string or a nested list.")
19+
20+
# Testing Code
21+
def test_parse_input():
2822
test_cases = [
29-
(["apple", "banana;orange", "grape"], ["apple", "banana", "orange", "grape"]),
30-
(["one;two;three", "four", "five"], ["one", "two", "three", "four", "five"]),
31-
(["alpha;beta", "gamma;delta;epsilon"], ["alpha", "beta", "gamma", "delta", "epsilon"]),
32-
(["no_separator"], ["no_separator"]),
33-
([], []),
34-
(["single"], ["single"]),
23+
("apple;banana;cherry", ["apple", "banana", "cherry"]),
24+
("one;two;three;four", ["one", "two", "three", "four"]),
25+
("hello", ["hello"]),
26+
("a;b;c;d;e", ["a", "b", "c", "d", "e"]),
27+
("", []), # Edge case: empty string to empty list
28+
("word1;;word2", ["word1", "word2"]), # Edge case: consecutive semicolons ignored
29+
([["a"], ["b"], ["c"], ["d"]], ["a", "b", "c", "d"]),
30+
([["apple"], ["banana"], ["cherry"]], ["apple", "banana", "cherry"]),
31+
([[]], []), # Edge case: list of empty lists
3532
]
36-
37-
for i, (input_list, expected_output) in enumerate(test_cases):
38-
assert process_list(input_list) == expected_output, f"Test case {i+1} failed"
39-
33+
34+
for i, (input_data, expected) in enumerate(test_cases, 1):
35+
result = parse_input(input_data)
36+
assert result == expected, f"Test case {i} failed: {result} != {expected}"
37+
4038
print("All test cases passed!")
4139

42-
# Run the tests
43-
test_process_list()
40+
# Run tests
41+
test_parse_input()

app/evaluation.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,26 @@ def __init__(self, mode='gpt', llama_version='3_1_8B', temperature=0.01, max_new
2121

2222
self.response_num_required = 0 #initialise it with 0
2323

24-
def compareTextLists(input_list):
25-
"""
26-
Detects if the input is a list, and if any element in the list contains semicolons,
27-
it splits that element into multiple elements.
2824

25+
def parse_input(input_data):
26+
"""
27+
Parses input data, handling both semicolon-separated strings and nested list structures.
28+
2929
Args:
30-
input_list (list): A list of strings.
31-
30+
input_data (str or list): A semicolon-separated string or a nested list.
31+
3232
Returns:
33-
list: A processed list where semicolon-separated elements are split into separate elements.
33+
list: A processed list of elements.
3434
"""
35-
if not isinstance(input_list, list):
36-
raise ValueError("Input must be a list of strings.")
37-
38-
processed_list = []
39-
for item in input_list:
40-
if not isinstance(item, str):
41-
raise ValueError("All elements in the input list must be strings.")
42-
43-
# Split by semicolon if present, otherwise keep the original item
44-
processed_list.extend(item.split(';') if ';' in item else [item])
35+
if isinstance(input_data, str):
36+
if input_data == "":
37+
return []
38+
return [item for item in input_data.split(';') if item]
39+
elif isinstance(input_data, list) and all(isinstance(sublist, list) for sublist in input_data):
40+
return [item for sublist in input_data for item in sublist]
41+
else:
42+
raise ValueError("Input must be either a semicolon-separated string or a nested list.")
4543

46-
return processed_list
4744

4845
def setup_llm(param: Param):
4946
"""Initialize the LLM model (GPT-4o or LLaMA 3) based on the given configuration."""
@@ -105,7 +102,7 @@ def evaluation_function(response, answer, param=None):
105102

106103

107104
#split the response and answer into lists with semicolons
108-
response = compareTextLists(response)
105+
response = parse_input(response)
109106

110107

111108
start_time = time.process_time()

0 commit comments

Comments
 (0)