Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 40 additions & 29 deletions ch3/3_1_wordsteam.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# This script give you idea how stemming has been placed by using NLTK and Polyglot libraries.
# This script give you idea how stemming has been placed by using NLTK library.
# It is part of morphological analysis
# Alternative NLTK-based implementation provided for morphological analysis

from nltk.stem import PorterStemmer
from polyglot.text import Text, Word
from nltk.stem import PorterStemmer, LancasterStemmer, SnowballStemmer

word = "unexpected"
text = "disagreement"
Expand All @@ -21,37 +21,48 @@

def stemmer_porter():
port = PorterStemmer()
print "\nDerivational Morphemes"
print " ".join([port.stem(i) for i in text6.split()])
print " ".join([port.stem(i) for i in text7.split()])
print "\nInflectional Morphemes"
print " ".join([port.stem(i) for i in text8.split()])
print " ".join([port.stem(i) for i in text9.split()])
print "\nSome examples"
print " ".join([port.stem(i) for i in word.split()])
print " ".join([port.stem(i) for i in text.split()])
print " ".join([port.stem(i) for i in text1.split()])
print " ".join([port.stem(i) for i in text2.split()])
print " ".join([port.stem(i) for i in text3.split()])
print " ".join([port.stem(i) for i in text4.split()])
print " ".join([port.stem(i) for i in text5.split()])
print("\nDerivational Morphemes")
print(" ".join([port.stem(i) for i in text6.split()]))
print(" ".join([port.stem(i) for i in text7.split()]))
print("\nInflectional Morphemes")
print(" ".join([port.stem(i) for i in text8.split()]))
print(" ".join([port.stem(i) for i in text9.split()]))
print("\nSome examples")
print(" ".join([port.stem(i) for i in word.split()]))
print(" ".join([port.stem(i) for i in text.split()]))
print(" ".join([port.stem(i) for i in text1.split()]))
print(" ".join([port.stem(i) for i in text2.split()]))
print(" ".join([port.stem(i) for i in text3.split()]))
print(" ".join([port.stem(i) for i in text4.split()]))
print(" ".join([port.stem(i) for i in text5.split()]))


def polyglot_stem():
print "\nDerivational Morphemes using polyglot library"
def nltk_alternative_stem():
porter = PorterStemmer()
lancaster = LancasterStemmer()
snowball = SnowballStemmer('english')

print("\nDerivational Morphemes using NLTK (alternative to polyglot)")
for w in words_derv:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print "\nInflectional Morphemes using polyglot library"
print("{:<20}{}".format(w, porter.stem(w)))

print("\nInflectional Morphemes using NLTK (alternative to polyglot)")
for w in word_infle:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print "\nSome Morphemes examples using polyglot library"
for w in word_infle:
w = Word(w, language="en")
print("{:<20}{}".format(w, w.morphemes))
print("{:<20}{}".format(w, porter.stem(w)))

print("\nComparison of NLTK stemmers (Porter, Lancaster, Snowball)")
test_words = words_derv + word_infle
print("{:<15} {:<15} {:<15} {:<15}".format("Word", "Porter", "Lancaster", "Snowball"))
print("-" * 60)
for word in test_words:
print("{:<15} {:<15} {:<15} {:<15}".format(
word,
porter.stem(word),
lancaster.stem(word),
snowball.stem(word)
))


if __name__ == "__main__":
stemmer_porter()
polyglot_stem()
nltk_alternative_stem()