diff --git a/Data Transformation/codechef_problem/transform_description.py b/Data Transformation/codechef_problem/transform_description.py index 88055fc..938a702 100644 --- a/Data Transformation/codechef_problem/transform_description.py +++ b/Data Transformation/codechef_problem/transform_description.py @@ -1,11 +1,17 @@ __author__ = 'Pranay' import lemmatization, minimize_desc, remove_nondecodable_chars, removePunctuation - +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize + def transform(desc): print 'Description Transformation Started: ' desc = remove_nondecodable_chars.removeNondecodableChars(desc) desc = minimize_desc.minimizeDescription(desc) desc = removePunctuation.removePunctuation(desc) desc = lemmatization.lemmatizeDescription(desc) + stop_words = set(stopwords.words('english')) + word_tokens = word_tokenize(desc) + filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words] + desc=' '.join(filtered_sentence) # print 'Description Transformation Ended ' return desc diff --git a/Model Training/get_accuracy.py b/Model Training/get_accuracy.py index 684b125..b7f2eb0 100644 --- a/Model Training/get_accuracy.py +++ b/Model Training/get_accuracy.py @@ -96,7 +96,7 @@ def get_recommendations(username, prev_sub, no_recomm, no_test): with open('accuracy_val.csv', 'w') as f: writer = csv.writer(f) - writer.writerow(['prev_sub', 'no_recomm', 'no_test', 'tp', 'tn', 'fp', 'fn', 'precision', 'recall', 'f1_score']) + writer.writerow(['prev_sub', 'no_recomm', 'no_test', 'tp', 'tn', 'fp', 'fn', 'precision', 'recall', 'f1_score','Specificity','False_Positive_Rate','True_Negative_Rate','False_Negative_Rate']) prev_sub = 5 no_recomm = 10 @@ -117,10 +117,13 @@ def get_recommendations(username, prev_sub, no_recomm, no_test): precision = tp/(tp + fp) recall = tp/(tp + fn) f1_score = 2 * precision * recall / (precision + recall) - + Specificity = tn / tn + fp + False_Positive_Rate = fp / fp + tn + True_Negative_Rate = tn / tn + fp + False_Negative_Rate = fn / fn + tp print "Precision - " + str(precision) print "Recall - " + str(recall) print "F1score - " + str(f1_score) - dat = [prev_sub, no_recomm, no_test, tp, tn, fp, fn, precision, recall, f1_score] + dat = [prev_sub, no_recomm, no_test, tp, tn, fp, fn, precision, recall, f1_score,Specificity,False_Positive_Rate,True_Negative_Rate,False_Negative_Rate] writer.writerow(dat)