Skip to content

Commit 71df346

Browse files
committed
Add keyphrases parameter
1 parent daef3f7 commit 71df346

File tree

2 files changed

+57
-3
lines changed

2 files changed

+57
-3
lines changed

app/evaluation.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,31 @@ def evaluation_function(response, answer, params):
4949
# "feedback": f"Cannot determine if the answer is correct. Please provide more details about '{keyword}"
5050
# }
5151

52-
52+
# params of the form {'keyphrase': ['phrase1', 'phrase2', ...]}
53+
if params is not None and "keyphrases" in params:
54+
keyphrases = params["keyphrases"]
55+
for keyphrase in keyphrases:
56+
response_tokens = preprocess_tokens(response)
57+
keyphrase_tokens = preprocess_tokens(keyphrase)
58+
window_size = len(keyphrase_tokens)
59+
i = 0
60+
found = False
61+
while i + window_size <= len(response_tokens):
62+
response_substring = " ".join(response_tokens[i:i+window_size])
63+
score = sentence_similarity_mean_w2v(response_substring, keyphrase)
64+
i += 1
65+
if score > 0.75:
66+
found = True
67+
continue
68+
if not found:
69+
return {
70+
"is_correct": False,
71+
"result": {
72+
"similarity_value": w2v_similarity,
73+
"Problematic_word": keyphrase
74+
},
75+
"feedback": f"Cannot determine if the answer is correct. Could not identify '{keyphrase}"
76+
}
5377

5478
if w2v_similarity > 0.75:
5579
return {
@@ -151,6 +175,8 @@ def sentence_similarity_mean_w2v(response: str, answer: str):
151175
answer = preprocess_tokens(answer)
152176
response_embeddings = [w2v[word] for word in response if w2v.has_index_for(word)]
153177
answer_embeddings = [w2v[word] for word in answer if w2v.has_index_for(word)]
178+
if len(response_embeddings) == 0 or len(answer_embeddings) == 0:
179+
return 0
154180
response_vector = np.mean(response_embeddings, axis=0)
155181
answer_vector = np.mean(answer_embeddings, axis=0)
156182
return float(np.dot(response_vector, answer_vector) / (np.linalg.norm(response_vector) * np.linalg.norm(answer_vector)))
@@ -159,6 +185,6 @@ def sentence_similarity_mean_w2v(response: str, answer: str):
159185
if __name__ == "__main__":
160186
pass
161187
# print(time.process_time())
162-
# print(evaluation_function("density, velocity,", "Density, Velocity, Viscosity, Length", None))
188+
# print(evaluation_function("density, velocity,Visc", "Density, Velocity, Viscosity, Length", {'keyphrases': ['Density', 'Velocity', 'Viscosity', 'Length']}))
163189
# print(evaluation_function("test", "test", None))
164190
# print(time.process_time())

app/evaluation_tests.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,41 @@ def test_reynolds_number_is_incorrect(self):
5454
incorrect_responses = [
5555
'density,,,',
5656
'rho,u,mu,L',
57-
#'density,velocity,visc,',
5857
]
5958

6059
for response in incorrect_responses:
6160
result = evaluation_function(response, answer, params)
6261

6362
self.assertEqual(result.get("is_correct"), False, msg=f'Response: {response}')
6463

64+
def test_reynolds_number_is_incorrect_with_keyphrase(self):
65+
answer, params = 'Density, Velocity, Viscosity, Length', {'keyphrases': ['density', 'velocity', 'viscosity', 'length']}
66+
incorrect_responses = [
67+
'density,velocity,visc,',
68+
]
69+
70+
for response in incorrect_responses:
71+
result = evaluation_function(response, answer, params)
72+
73+
self.assertEqual(result.get("is_correct"), False, msg=f'Response: {response}')
74+
75+
navier_stokes_answer = "The density of the film is uniform and constant, therefore the flow is incompressible. " \
76+
"Since we have incompressible flow, uniform viscosity, Newtonian fluid, " \
77+
"the most appropriate set of equations for the solution of the problem is the " \
78+
"Navier-Stokes equations. The Navier-Stokes equations in Cartesian coordinates are used."
79+
# TODO: Navier-stokes equations
80+
81+
def test_navier_stokes_equation(self):
82+
answer, params = self.navier_stokes_answer, dict()
83+
correct_responses = [
84+
#'Navier-stokes. Continuum, const and uniform density and viscosity so incompressible, newtonian. Fits all '
85+
#'requirements for navier stokes'
86+
]
87+
88+
for response in correct_responses:
89+
result = evaluation_function(response, answer, params)
90+
print(result)
91+
self.assertEqual(result.get("is_correct"), True, msg=f'Response: {response}')
92+
6593
if __name__ == "__main__":
6694
unittest.main()

0 commit comments

Comments
 (0)