88import numpy .linalg
99from nltk .corpus import stopwords
1010from nltk import word_tokenize
11+ from nltk .data import find
1112
13+ word2vec_sample = str (find ('models/word2vec_sample/pruned.word2vec.txt' ))
14+ w2v = gensim .models .KeyedVectors .load_word2vec_format (word2vec_sample , binary = False )
1215
1316def evaluation_function (response , answer , params ):
1417 """
@@ -155,8 +158,6 @@ def sentence_similarity(response: str, answer: str):
155158 blen = pickle .load (fp )
156159 with open ('word_freqs' , 'rb' ) as fp :
157160 freqs = pickle .load (fp )
158- with open ('w2v' , 'rb' ) as fp :
159- w2v = pickle .load (fp )
160161
161162 def sencence_scores (common_words , sentence ):
162163 scores = []
@@ -194,8 +195,6 @@ def preprocess_tokens(text: str):
194195
195196
196197def sentence_similarity_mean_w2v (response : str , answer : str ):
197- with open ('w2v' , 'rb' ) as fp :
198- w2v = pickle .load (fp )
199198 response = preprocess_tokens (response )
200199 answer = preprocess_tokens (answer )
201200 response_embeddings = [w2v [word ] for word in response if w2v .has_index_for (word )]
@@ -206,7 +205,6 @@ def sentence_similarity_mean_w2v(response: str, answer: str):
206205 answer_vector = np .mean (answer_embeddings , axis = 0 )
207206 return float (
208207 np .dot (response_vector , answer_vector ) / (np .linalg .norm (response_vector ) * np .linalg .norm (answer_vector )))
209- # TODO
210208
211209
212210if __name__ == "__main__" :
0 commit comments