-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTextAnalysis.py
More file actions
60 lines (50 loc) · 1.61 KB
/
TextAnalysis.py
File metadata and controls
60 lines (50 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from textblob import TextBlob
"""
This function will take a string and divide it into separate words
All the words will be in lowercase for the sake of functionality
All the words will be in singular format
The parameter passes through a string
"""
def wordSplitter(sentence):
print sentence
sentence = sentence.lower()
split = TextBlob(sentence)
print split.words.singularize()
return split
"""
deletion is the string to be deleted
list is the list of strings
this function will get rid of unnecessary strings
"""
def wordDeleter(deletion, list):
while deletion in list:
list.remove(deletion)
return list
"""
This function will take the wordlist and extract content-heavy words
The parameter passes through a TextBlob
Will transfer into a normal "list"
"""
def contentExtractor(TextBlobMe):
result = []
itercount = sum(TextBlobMe.word_counts.itervalues())
for i in range(0, itercount):
result.append(TextBlobMe.words[i])
wordDeleter("the", result)
wordDeleter("an", result)
wordDeleter("a", result)
wordDeleter("in", result)
wordDeleter("of", result)
wordDeleter("on", result)
wordDeleter("by", result)
return result
wordSplitter("I am a testers. Please work with me")
wiki = TextBlob("Authorizes TransCanada Keystone Pipeline, L.P. to construct, connect, operate, \
and maintain the pipeline and cross-border facilities specified in an application filled \
by TransCanada Corporation to the Department of State on May 4, 2012.")
#wiki = TextBlob("I am the, I am the, you are the")
#print wiki.tags
#print wiki.words
#print wiki.word_counts
print sum(wiki.word_counts.itervalues())
print contentExtractor(wiki)