diff --git a/ch3/3_1_wordsteam.py b/ch3/3_1_wordsteam.py index 9b10cbb..4850384 100644 --- a/ch3/3_1_wordsteam.py +++ b/ch3/3_1_wordsteam.py @@ -1,8 +1,8 @@ -# This script give you idea how stemming has been placed by using NLTK and Polyglot libraries. +# This script give you idea how stemming has been placed by using NLTK library. # It is part of morphological analysis +# Alternative NLTK-based implementation provided for morphological analysis -from nltk.stem import PorterStemmer -from polyglot.text import Text, Word +from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer word = "unexpected" text = "disagreement" @@ -21,37 +21,48 @@ def stemmer_porter(): port = PorterStemmer() - print "\nDerivational Morphemes" - print " ".join([port.stem(i) for i in text6.split()]) - print " ".join([port.stem(i) for i in text7.split()]) - print "\nInflectional Morphemes" - print " ".join([port.stem(i) for i in text8.split()]) - print " ".join([port.stem(i) for i in text9.split()]) - print "\nSome examples" - print " ".join([port.stem(i) for i in word.split()]) - print " ".join([port.stem(i) for i in text.split()]) - print " ".join([port.stem(i) for i in text1.split()]) - print " ".join([port.stem(i) for i in text2.split()]) - print " ".join([port.stem(i) for i in text3.split()]) - print " ".join([port.stem(i) for i in text4.split()]) - print " ".join([port.stem(i) for i in text5.split()]) + print("\nDerivational Morphemes") + print(" ".join([port.stem(i) for i in text6.split()])) + print(" ".join([port.stem(i) for i in text7.split()])) + print("\nInflectional Morphemes") + print(" ".join([port.stem(i) for i in text8.split()])) + print(" ".join([port.stem(i) for i in text9.split()])) + print("\nSome examples") + print(" ".join([port.stem(i) for i in word.split()])) + print(" ".join([port.stem(i) for i in text.split()])) + print(" ".join([port.stem(i) for i in text1.split()])) + print(" ".join([port.stem(i) for i in text2.split()])) + print(" ".join([port.stem(i) for i in text3.split()])) + print(" ".join([port.stem(i) for i in text4.split()])) + print(" ".join([port.stem(i) for i in text5.split()])) -def polyglot_stem(): - print "\nDerivational Morphemes using polyglot library" +def nltk_alternative_stem(): + porter = PorterStemmer() + lancaster = LancasterStemmer() + snowball = SnowballStemmer('english') + + print("\nDerivational Morphemes using NLTK (alternative to polyglot)") for w in words_derv: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) - print "\nInflectional Morphemes using polyglot library" + print("{:<20}{}".format(w, porter.stem(w))) + + print("\nInflectional Morphemes using NLTK (alternative to polyglot)") for w in word_infle: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) - print "\nSome Morphemes examples using polyglot library" - for w in word_infle: - w = Word(w, language="en") - print("{:<20}{}".format(w, w.morphemes)) + print("{:<20}{}".format(w, porter.stem(w))) + + print("\nComparison of NLTK stemmers (Porter, Lancaster, Snowball)") + test_words = words_derv + word_infle + print("{:<15} {:<15} {:<15} {:<15}".format("Word", "Porter", "Lancaster", "Snowball")) + print("-" * 60) + for word in test_words: + print("{:<15} {:<15} {:<15} {:<15}".format( + word, + porter.stem(word), + lancaster.stem(word), + snowball.stem(word) + )) if __name__ == "__main__": stemmer_porter() - polyglot_stem() + nltk_alternative_stem()