From b75ea56294579e54b43c8cab3aadee15e94ea1e7 Mon Sep 17 00:00:00 2001
From: Gavin Lee <sz110010@gmail.com>
Date: Fri, 3 Nov 2017 04:00:39 +0800
Subject: [PATCH 1/5] Edit fp_growth.py to python-3 version

Without change test file, it's still Python 2 version
---
 fp_growth.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/fp_growth.py b/fp_growth.py
index 4ada47d..3d750ff 100644
--- a/fp_growth.py
+++ b/fp_growth.py
@@ -10,7 +10,6 @@
 """
 
 from collections import defaultdict, namedtuple
-from itertools import imap
 
 __author__ = 'Eric Naeseth <eric@naeseth.com>'
 __copyright__ = 'Copyright © 2009 Eric Naeseth'
@@ -40,19 +39,20 @@ def find_frequent_itemsets(transactions, minimum_support, include_support=False)
             items[item] += 1
 
     # Remove infrequent items from the item support dictionary.
-    items = dict((item, support) for item, support in items.iteritems()
-        if support >= minimum_support)
+    tmpList = [(item, support) for item, support in items.items()
+            if support >= minimum_support]
+    items = dict(tmpList)
 
     # Build our FP-tree. Before any transactions can be added to the tree, they
     # must be stripped of infrequent items and their surviving items must be
     # sorted in decreasing order of frequency.
     def clean_transaction(transaction):
         transaction = filter(lambda v: v in items, transaction)
-        transaction.sort(key=lambda v: items[v], reverse=True)
+        transaction = sorted(transaction, key=lambda v: items[v], reverse=True)
         return transaction
 
     master = FPTree()
-    for transaction in imap(clean_transaction, transactions):
+    for transaction in list(map(clean_transaction, transactions)):
         master.add(transaction)
 
     def find_with_suffix(tree, suffix):
@@ -136,7 +136,7 @@ def items(self):
         element of the tuple is the item itself, and the second element is a
         generator that will yield the nodes in the tree that belong to the item.
         """
-        for item in self._routes.iterkeys():
+        for item in self._routes.keys():
             yield (item, self.nodes(item))
 
     def nodes(self, item):
@@ -167,15 +167,15 @@ def collect_path(node):
         return (collect_path(node) for node in self.nodes(item))
 
     def inspect(self):
-        print 'Tree:'
+        print('Tree:')
         self.root.inspect(1)
 
-        print
-        print 'Routes:'
+        print()
+        print('Routes:')
         for item, nodes in self.items():
-            print '  %r' % item
+            print('  %r' % item)
             for node in nodes:
-                print '    %r' % node
+                print('    %r' % node)
 
 def conditional_tree_from_paths(paths):
     """Build a conditional FP-tree from the given prefix paths."""
@@ -312,7 +312,7 @@ def children(self):
         return tuple(self._children.itervalues())
 
     def inspect(self, depth=0):
-        print ('  ' * depth) + repr(self)
+        print (('  ' * depth) + repr(self))
         for child in self.children:
             child.inspect(depth + 1)
 
@@ -355,4 +355,4 @@ def __repr__(self):
 
     result = sorted(result, key=lambda i: i[0])
     for itemset, support in result:
-        print str(itemset) + ' ' + str(support)
+        print(str(itemset) + ' ' + str(support))

From 992d2e8cabf8e22384395bed8d594224eef3eeb8 Mon Sep 17 00:00:00 2001
From: Gavin Lee <sz110010@gmail.com>
Date: Fri, 3 Nov 2017 04:07:25 +0800
Subject: [PATCH 2/5] Change symtax

Also Edit Readme to python-3
---
 Readme.md    | 4 ++--
 fp_growth.py | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Readme.md b/Readme.md
index b15cdaa..17fb5a2 100644
--- a/Readme.md
+++ b/Readme.md
@@ -37,11 +37,11 @@ Once installed, the module can also be used as a stand-alone script. It will
 read a list of transactions formatted as a CSV file. (An example of such a file
 in included in the `examples` directory.)
 
-    python -m fp_growth -s {minimum support} {path to CSV file}
+    python3 -m fp_growth -s {minimum support} {path to CSV file}
     
 For example, to find the itemsets with support ≥ 4 in the included example file:
 
-    python -m fp_growth -s 4 examples/tsk.csv
+    python3 -m fp_growth -s 4 examples/tsk.csv
 
 References
 ----------
diff --git a/fp_growth.py b/fp_growth.py
index 3d750ff..0da58fe 100644
--- a/fp_growth.py
+++ b/fp_growth.py
@@ -39,9 +39,8 @@ def find_frequent_itemsets(transactions, minimum_support, include_support=False)
             items[item] += 1
 
     # Remove infrequent items from the item support dictionary.
-    tmpList = [(item, support) for item, support in items.items()
-            if support >= minimum_support]
-    items = dict(tmpList)
+    items = dict((item, support) for item, support in items.items()
+            if support >= minimum_support)
 
     # Build our FP-tree. Before any transactions can be added to the tree, they
     # must be stripped of infrequent items and their surviving items must be

From 06ccf3607e8b59d643c7ee1e6c7b2bf8fd7df43d Mon Sep 17 00:00:00 2001
From: Gavin Lee <sz110010@gmail.com>
Date: Fri, 3 Nov 2017 23:24:54 +0800
Subject: [PATCH 3/5] Add association rule to the script

---
 Readme.md    | 19 ++++++++++++++--
 fp_growth.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/Readme.md b/Readme.md
index 17fb5a2..daa7315 100644
--- a/Readme.md
+++ b/Readme.md
@@ -25,7 +25,7 @@ in your transactions with the following code:
     from fp_growth import find_frequent_itemsets
     for itemset in find_frequent_itemsets(transactions, minsup):
         print itemset
-        
+
 Note that `find_frequent_itemsets` returns a generator of itemsets, not a
 greedily-populated list. Each item must be hashable (i.e., it must be valid as
 a member of a dictionary or a set).
@@ -38,11 +38,26 @@ read a list of transactions formatted as a CSV file. (An example of such a file
 in included in the `examples` directory.)
 
     python3 -m fp_growth -s {minimum support} {path to CSV file}
-    
+
 For example, to find the itemsets with support ≥ 4 in the included example file:
 
     python3 -m fp_growth -s 4 examples/tsk.csv
 
+Also, sopport can be a support rate, for example:
+
+    python3 -m fp_growth -s 0.3 examples/tsk.csv
+
+You can find association rules as well, as sample
+
+    python3 -m fp_growth -f rule -c 0.4 -s 4 examples/tsk.csv
+
+We used -f to decide what you want to find, association rule or frequency itemset
+
+All parameter:
+    - `-s`: minimum support (count or rate is fine)
+    - `-c`: minimum confidence
+    - `-f`: problem solving (freq or rule)
+
 References
 ----------
 
diff --git a/fp_growth.py b/fp_growth.py
index 0da58fe..2ca4d77 100644
--- a/fp_growth.py
+++ b/fp_growth.py
@@ -32,6 +32,10 @@ def find_frequent_itemsets(transactions, minimum_support, include_support=False)
     """
     items = defaultdict(lambda: 0) # mapping from items to their supports
 
+    # if useing support rate instead of support count
+    if 0 < minimum_support <= 1:
+        minimum_support = minimum_support * len(transactions)
+
     # Load the passed-in transactions and count the support that individual
     # items have.
     for transaction in transactions:
@@ -321,6 +325,37 @@ def __repr__(self):
         return "<%s %r (%r)>" % (type(self).__name__, self.item, self.count)
 
 
+def subs(l):
+    """
+    Used for assRule
+    """
+    assert type(l) is list
+    if len(l) == 1:
+        return [l]
+    x = subs(l[1:])
+    return x + [[l[0]] + y for y in x]
+
+
+# Association rules
+def assRule(freq, min_conf = 0.6):
+    """
+    This assRule must input a dict for itemset -> support rate
+    And also can customize your minimum confidence
+    """
+    assert type(freq) is dict
+    result = []
+    for item, sup in freq.items():
+        for subitem in subs(list(item)):
+            sb = [x for x in item if x not in subitem]
+            if sb == [] or subitem == []: continue
+            if len(subitem) == 1 and (subitem[0][0] == 'in' or subitem[0][0] == 'out'):
+                continue
+            conf = sup/freq[tuple(subitem)]
+            if conf >= min_conf:
+                result.append({'from':subitem, 'to':sb, 'sup':sup, 'conf':conf})
+    return result
+
+
 if __name__ == '__main__':
     from optparse import OptionParser
     import csv
@@ -330,10 +365,18 @@ def __repr__(self):
         help='Minimum itemset support (default: 2)')
     p.add_option('-n', '--numeric', dest='numeric', action='store_true',
         help='Convert the values in datasets to numerals (default: false)')
+    p.add_option('-c', '--minimum-confidence', dest='minconf', type='float',
+        help='Minimum rule confidence (default 0.6)')
+    p.add_option('-f', '--find', dest='find', type='str',
+        help='Finding freq(frequency itemsets) or rule(association rules) (default: freq)')
     p.set_defaults(minsup=2)
     p.set_defaults(numeric=False)
-
+    p.set_defaults(minconf=0.6)
+    p.set_defaults(find='freq')
     options, args = p.parse_args()
+
+    assert options.find == 'freq' or options.find == 'rule'
+
     if len(args) < 1:
         p.error('must provide the path to a CSV file to read')
 
@@ -349,9 +392,17 @@ def __repr__(self):
                 transactions.append(row)
 
     result = []
+    res_for_rul = {}
     for itemset, support in find_frequent_itemsets(transactions, options.minsup, True):
         result.append((itemset,support))
-
-    result = sorted(result, key=lambda i: i[0])
-    for itemset, support in result:
-        print(str(itemset) + ' ' + str(support))
+        res_for_rul[tuple(itemset)] = support
+
+    if options.find == 'freq':
+        result = sorted(result, key=lambda i: i[0])
+        for itemset, support in result:
+            print(str(itemset) + ' ' + str(support))
+    if options.find == 'rule':
+        rules = assRule(res_for_rul, options.minconf)
+        for ru in rules:
+            print(str(ru['from']) + ' -> ' + str(ru['to']))
+            print('support = ' + str(ru['sup']) + 'confindence = ' + str(ru['conf']))

From 75521274d7ca8301cc2077501dd9f4ddd643f403 Mon Sep 17 00:00:00 2001
From: Gavin Lee <sz110010@gmail.com>
Date: Fri, 3 Nov 2017 23:26:48 +0800
Subject: [PATCH 4/5] Fix Readme bug

---
 Readme.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Readme.md b/Readme.md
index daa7315..505bc72 100644
--- a/Readme.md
+++ b/Readme.md
@@ -54,9 +54,9 @@ You can find association rules as well, as sample
 We used -f to decide what you want to find, association rule or frequency itemset
 
 All parameter:
-    - `-s`: minimum support (count or rate is fine)
-    - `-c`: minimum confidence
-    - `-f`: problem solving (freq or rule)
+    * `-s`: minimum support (count or rate is fine)
+    * `-c`: minimum confidence
+    * `-f`: problem solving (freq or rule)
 
 References
 ----------

From 550dc22b0325d6ba3394c87d298e3f5623121206 Mon Sep 17 00:00:00 2001
From: Gavin Lee <sz110010@gmail.com>
Date: Fri, 3 Nov 2017 23:27:42 +0800
Subject: [PATCH 5/5] Fix Readme bug

---
 Readme.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Readme.md b/Readme.md
index 505bc72..8422f26 100644
--- a/Readme.md
+++ b/Readme.md
@@ -54,9 +54,9 @@ You can find association rules as well, as sample
 We used -f to decide what you want to find, association rule or frequency itemset
 
 All parameter:
-    * `-s`: minimum support (count or rate is fine)
-    * `-c`: minimum confidence
-    * `-f`: problem solving (freq or rule)
+* `-s`: minimum support (count or rate is fine)
+* `-c`: minimum confidence
+* `-f`: problem solving (freq or rule)
 
 References
 ----------