forked from stevenjson/CuisineClassifying
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMutInfo.py
More file actions
160 lines (116 loc) · 4.5 KB
/
MutInfo.py
File metadata and controls
160 lines (116 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import math
import argparse
def GetFile(fileName, path):
_file = open(path + fileName, 'r')
recipeList = _file.read().split('\n')
recipeList.pop()
_file.close()
return recipeList
def Count(recipeMap, cuisineList, recipeList):
mutMap = {}
for word in recipeList:
for i in range(len(cuisineList)):
for recipe in recipeMap[cuisineList[i]]:
recipe = recipe.split(" ")
if word in recipe:
if word in mutMap.keys():
mutMap[word][i] += 1
else:
mutMap[word] = [0]*len(cuisineList)
mutMap[word][i] += 1
return mutMap
def MutInfo(word, countMap, foldSize, totalSize, recipeList, cuisineList):
probWord = 0
probClass = foldSize / float(totalSize)
probWordClass = 0
mutInfo = 0
wordTotal = 0
for item in countMap[word]:
wordTotal += item
probWord = wordTotal / float(totalSize)
for count in countMap[word]:
probWordClass = count / float(totalSize)
pointMut = 0.0
if probWordClass != 0:
pointMut = (math.log(probWordClass, 2)) + (math.log(probWord * probClass, 2) * -1)
mutInfo += (probWordClass * pointMut)
probWord = (totalSize - wordTotal) / float(totalSize)
for count in countMap[word]:
probWordClass = (foldSize - count) / float(totalSize)
pointMut = 0.0
if probWordClass != 0:
pointMut = (math.log(probWordClass, 2)) + (math.log(probWord * probClass, 2) * -1)
mutInfo += (probWordClass * pointMut)
return mutInfo
def PrintInfo(probMap, countMap, cuisineList, cuisine, n):
print()
if cuisine == "all":
for word in sorted(probMap, key=probMap.get, reverse=True)[:n]:
maxId = 0
maxCount = 0
for i in range(len(countMap[word])):
if countMap[word][i] > maxCount:
maxId = i
maxCount = countMap[word][i]
print("{:15s}: {:.5f} {:10s}".format(word, probMap[word], cuisineList[maxId]))
print()
#print("Counts: {}".format(countMap[word]))
#print(cuisineList)
elif cuisine in cuisineList:
cuisineCount = 0
for word in sorted(probMap, key=probMap.get, reverse=True):
if cuisineCount > n:
break
maxId = 0
maxCount = 0
for i in range(len(countMap[word])):
if countMap[word][i] > maxCount:
maxId = i
maxCount = countMap[word][i]
if cuisine == cuisineList[maxId]:
print("{:15s}: {:.5f} {:10s}".format(word, probMap[word], cuisineList[maxId]))
print()
cuisineCount += 1
pass
def main():
parser = argparse.ArgumentParser()
parser.add_argument("feature", type=str, help="Type of feature to use. ")
parser.add_argument("topN", type=int, help="Top N words to display")
parser.add_argument("cuisine", type=str, help="Cuisine of interest [all for all cuisines]")
args = parser.parse_args()
fileList = ["chinese.txt", "caribbean.txt", "french.txt", "italian.txt", "mexican.txt"]
featureList = ["unigram", "bigram", "verbs", "nouns", "verbnouns", "ingredients", "cookverbs"]
feature = args.feature
cuisineInfo = args.cuisine
if feature in featureList:
filePath = "Data/features/" + feature + "/"
else:
print("Invalid Feature. Available feature include:", featureList)
exit(-1)
n = args.topN - 1
foldSize = 180
totalSize = 900
cuisineList = []
recipeMap = {}
probMap = {}
for _file in fileList:
cuisine = _file.strip(".txt")
cuisineList.append(cuisine)
recipeList = GetFile(_file, filePath)
recipeMap[cuisine] = recipeList
if cuisineInfo not in cuisineList:
print("Invalid cuisine. Available cuisines include:", cuisineList)
exit(-1)
recipeStr = ""
for cuisine in cuisineList:
for recipe in recipeMap[cuisine]:
recipeStr += (recipe + " ")
recipeList = set(recipeStr.split(" "))
countMap = Count(recipeMap, cuisineList, recipeList)
for word in recipeList:
if word == "" or word == " ":
continue
probMap[word] = MutInfo(word, countMap, foldSize, totalSize, recipeList, cuisineList)
PrintInfo(probMap, countMap, cuisineList, cuisineInfo, n)
pass
main()