From ce92840f0b1edebfc63980d400fa8b5a7ae328a2 Mon Sep 17 00:00:00 2001 From: lucianthorr Date: Tue, 10 Feb 2015 20:46:29 -0500 Subject: [PATCH] Bayes, DT and Random Forests used. --- spambase_hw.ipynb | 489 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 489 insertions(+) create mode 100644 spambase_hw.ipynb diff --git a/spambase_hw.ipynb b/spambase_hw.ipynb new file mode 100644 index 0000000..007e4ef --- /dev/null +++ b/spambase_hw.ipynb @@ -0,0 +1,489 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:b2afb47ddb6ccf28e549643d27bf727ef9299a291c2d45366d45acd456dce7aa" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sb\n", + "#plt.rc('figure', figsize=(25, 15))\n", + "%matplotlib inline" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 52 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "spambase = pd.read_csv(\"spambase/spambase.data\")" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 53 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The last column in spambase tells whether it is spam or not.\n", + " \n", + " 1 = Spam\n", + " 0 = Not Spam" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "base_features_df = spambase.ix[:,:-1]\n", + "base_features = spambase.ix[:,:-1].values" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 54 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "base_spam = spambase.ix[:,-1:].values" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 55 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from sklearn.cross_validation import train_test_split\n", + "feature_training, feature_test, spam_training, spam_test = train_test_split(base_features, base_spam, \n", + " test_size=0.4)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 56 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "spam_training = np.ravel(spam_training)\n", + "#spam_test = np.ravel(feature_test)\n" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 57 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Gaussian Naive Bayes" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from sklearn.naive_bayes import GaussianNB\n", + "classifier = GaussianNB()\n", + "prediction = classifier.fit(feature_training, spam_training).predict(feature_test)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 58 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from sklearn import metrics\n", + "print(metrics.classification_report(spam_test, prediction))\n", + "print(metrics.confusion_matrix(spam_test, prediction))\n", + "print(metrics.f1_score(spam_test, prediction))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.97 0.71 0.82 1135\n", + " 1 0.67 0.96 0.79 705\n", + "\n", + "avg / total 0.85 0.80 0.81 1840\n", + "\n", + "[[801 334]\n", + " [ 26 679]]\n", + "0.790454016298\n" + ] + } + ], + "prompt_number": 59 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Decision Tree Analysis" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from sklearn import tree\n", + "classifier = tree.DecisionTreeClassifier()\n", + "prediction = classifier.fit(feature_training, spam_training).predict(feature_test)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 60 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from sklearn import metrics\n", + "print(metrics.classification_report(spam_test, prediction))\n", + "print(metrics.confusion_matrix(spam_test, prediction))\n", + "print(metrics.f1_score(spam_test, prediction))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.93 0.92 0.93 1135\n", + " 1 0.87 0.89 0.88 705\n", + "\n", + "avg / total 0.91 0.91 0.91 1840\n", + "\n", + "[[1042 93]\n", + " [ 75 630]]\n", + "0.882352941176\n" + ] + } + ], + "prompt_number": 61 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Decision Tree with limited Depth" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from sklearn import tree\n", + "from sklearn import metrics\n", + "best_depth = 0\n", + "best_score = 0\n", + "scores = []\n", + "for n in range(1,100):\n", + " classifier = tree.DecisionTreeClassifier(max_depth=n)\n", + " prediction = classifier.fit(feature_training, spam_training).predict(feature_test)\n", + " score = metrics.f1_score(spam_test, prediction)\n", + " scores.append(score)\n", + " if score > best_score:\n", + " best_depth = n\n", + " best_score = score\n", + "print(\"Best Score: {}\\nBest Depth: {}\".format(best_score,best_depth))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Best Score: 0.8943206326383896\n", + "Best Depth: 12\n" + ] + } + ], + "prompt_number": 105 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "tree_df = pd.DataFrame(scores)\n", + "tree_df.plot(title=\"Progress of Decision Tree over Depth\")\n", + "max_x = scores.index(max(scores))+1\n", + "plt.vlines(max_x,0,1,color='red',label='Max = {}'.format(max_x))\n", + "plt.legend(loc='best')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 106, + "text": [ + "" + ] + }, + { + "metadata": {}, + "output_type": "display_data", + "png": "iVBORw0KGgoAAAANSUhEUgAAAXEAAAEKCAYAAADkYmWmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcHHWd//FXH3Pfk+mck4sc35CQBEiAEK4EOfcnAooH\noiIIy49V1IXVHx64eC7rGlCRQ1CuZdUVFV08IIiwQJAbkkDIN8lAjskxmcx99vRRvz+qZtIzmSvJ\nTM9U5v18PPJId1V31bc+3f3ub32rqifgOA4iIuJPwZFugIiIHDqFuIiIjynERUR8TCEuIuJjCnER\nER9TiIuI+Fh4pBswlhljZgAVwLqUyQHgR9ba+0ekUcPAGBMEHgXm4W7bnSnzbgb+CdjpTcoA3gVu\nsNZuPsT1LQFutNZ+uJ/HfBPYYq39z0NZR8pyioGnvbv5wBTAevdXW2v/3+EsfyT18v4MAjHc1/Bw\n67Ya+Ji1ttYYsxX4oLX29cNZ5lilEB95rdba4zrvGGMmA28ZY1611q4fwXYNpXLgHCDXWtvzwgQH\n+JW19vOdE4wxnwCeMsYssNY2HezKrLWvAX0GuPeYfz3Y5faxnHrgOABjzBnAT1JfzyNAz/fnNNzX\npsVa+7vDWO5ZuB0WcN8DgX4eK/1QiI8y1tpdxpjNwFyvR/kZIBeot9a+zxhzE/AxIA5sAj5nra0y\nxswG7gNKgN24H4qHgWeA54ENwAzgDOAo4BYgD0gCN1tr/2SMmQg8BIzzmvMna+03+pres+3GmNOA\n73vt7QC+DqwBHsftYb9ujPmQtfbdHk/t9gG21j5sjPkk8HHgp8aYo4EfeusPAT/u3FMxxlwJXA8k\ngH3A5cBs4HZr7UJjzKnAKu95DvBv1trfGWMeANZba1f11m5r7RPGmE8DF3vLnuPN+5S19u2e297b\ndnjt2wq8CCwCvgK8CtwOTPNq8itr7b95j11OL6/LYOrstfcFYJW19rfe427x6nmjMeYzwLW4veka\n3PeN9epQivueeMxa+5U+tg1vWduNMd8AvgT8zhiTCfw7cLpX4zeAz1trm7xtfxQ4FSj22na3MaZz\nL/Nvxpj/492+xhhzHDAe+E9r7df7a4fspzHxUcYYczJuCL3oTZoPnOEF+BXAecBSa+1i4C3gAe9x\n/wn8l7V2IfB54GT293CmAN+y1hogihv2n7DWLgEuBO4yxkwFrgYqvOmnAbONMYW9TJ9jjCno0e5x\nwCO4H+DFuGH6MG7wng+0WWuP6yXA+7IWOMYYEwJ+gzs8shRYAXzJGHOSMWYxbuid663zf4Cvedvd\n6ZvArd5zrwRWetMdwOmr3d5QArjh9Dmvrmtww+tgOLhfFvOttX/AfZ3u89pzEnC2MebDxpgS4H56\nf126DNDee4BPe48LAZcB93p7CJ8CTrPWHg/8B5Dai8621h4zUICnWAcs9G7fCMSstUustcfidiBu\nSdn2fGvtCbidh295e1dXePNXWmsrvdtt3uNOBG4wxkwZZFvGPPXER16OMeYN73YYtzf5cWvtTmMM\nwDprbbM3/zzcAGjz7v8Y+JoxZjxwAm6PB2vtRmPMUynriAN/926fDEwC/uAtH9xe30LgL8CfvV3m\nvwJfsdY2GmN6Tr+xl2GOk3DHmF/x2rDBGLMGNzSfOcTatAJzcXuJ96W0Nws4Frcn+ri1dqe3zh8B\nGGNWpCzjv4E7jDEXeG3/Wsq8QD/tXoEbQq9Za3d5j38d+OAhbMdzXrvycMOsxBjzbW9eHrAYaAEm\n0vvrsiNlWf2199fAD4wxE4AlwGZrbYUx5hrcjsELKcsu8b44HNw9tYPh4L42AO8HiowxZ3v3M4Gq\nlMfe4bVzlzHmcdxhtd72ZH7hPa7KGFOF2yPf2cvjpAeF+MhrG2AMtTnldpDuu+xB3NewPeV+wrud\nTHlc1FqbTHnMO9baZZ0zvV5PlbU2boyZiTteeSbwsjHmImvt3/uanrKO3sY0Qxz6e+wE4OfeMup7\njMtOBOpx9xBImZ4FTE+dZq29xxjzGG54nAfcbIxZNMh2x4C2HvMOZey28zUMef+fbK1t99pc5q1j\nJX28LoNYfwgIW2tbjTGP4A5DnQzc680P4g5R3OgtNwBMtdbWeaHecpDbcwLdD3Z+3lr7hLfsfCA7\n5bGJlNuhHvdTxVJua4z8IGg4xV+eAK4wxuR69z8P/K+1thF3V/8KAC9wz6T7sEKnl3CHQ073HrsI\n2AhM9sZQb/J2+7+I22Oaa4z5t16mz+llucYYc4K33AW4Qy/PDLBNvY0jfwZ3/P7XuOP+7caYy7x5\nU3GHWo7DPSvkLC/UwR3z/Y/U7fbGiY+z1j4IXIM7NluSsroX+2n3kAaJ9zq9CNzgrasIt5f+AW96\nb6/LpB6LGajO9+K+D04GfutNWw1cmlKnq71pcJDbaIyZi3usY5U36QngOmNMpnHPQrob+G7KUz7l\nPW8acDbu3h64YZ55MOuW3qknPvL6+xlJp8f8nwNTcXvCQWAz7rgnuB+WnxtjOk/Xe4/9u7xdy7DW\nVhtjPgR83xiTjftF/knvgNVtwIPGmPW4Y+dv4u7mlvYy/ZepDbXW7jPGfBi43fuSSQKfttZu8cZr\n+9pOB/iodwDS8dqzEVhhre0AMMZcCPzIGPNl3IOBN3XuBRhjvgQ87vUod+GOe89LWd+XvOd+h/0H\nC7d5j3estTX9tPuUHu3u+Xr0tT39+TjwE2PMOtwQ+4W19pfetvT2uqQOpfRbZ2/+68aYGPDbzvpZ\na1cbY/4deNIYkwQacA/YDmabUof7krh7fTdaazvD+NvAD3APaAa9/29Ief40Y8xrQA7wBbv/tNHf\nAc8ZYy4aoF4ygIB+ivbIYIz5Ku4H13o9vLXAedbajSPcNBmjjDHvAR+11r480m05kg2qJ26MOQm4\nxVq7ssf0C4CbcA+c3Wet/dnQN1EGaRPw315PK4x7Kp0CXOQIN2BP3NuF/QTQbK1dnjI9A/fc46W4\nu+1rgPdba/cOX3NFRCTVYA5sbsE9rarnAZCjcU91arDWxnBPUzp9iNsnIiL9GDDEvUtr473MKsQ9\nQNKpCSgaonaJiMggHM7ZKQ1A6lV7BUBdf09wHMcJBEbx6Z8zZrj/b906kq0QEempz+A8nBDfiHte\nawnuxQKn456j23crAgGqqw/694zSpjTpHh+oTWMbI5GCUV2TdFM9ulM9DjQWaxKJFPQ572BC3AEw\nxlyK+3sI9xpjrsc92T8I/Nxau/twGioiIgdnUCFurd0KLPdu/zJl+h+BPw5Ly0REZEC67F5ExMcU\n4iIiPqYQFxHxMYW4iIiPKcRFRHxMIS4i4mMKcRERH1OIi4j4mEJcRMTHFOIiIj6mEBcR8TGFuIiI\njynERUR8TCEuIuJjCnERER9TiIuI+JhCXETExxTiIiI+djh/KNnXahvb2bi9jk07GijIzWD5MRMp\nHelGie/E4gkywqGRboaMYb4K8crqZp5bu5t504o55qhSMsIhkkmHtRX7+OurleyqaWFOeTHHzCzF\nTCumLRpnZ3ULO/e1UN8UpTUap7U9Tn1zlH0N7d2W/ae/b8Os/GdOqXyDwPrdZISDZIZDzJ1aRG52\nxght8eFzHIe33qvl98+9S11TlImluUwszaW4IIuG5g5qGtupa4oya0oRF54yg6L8rGFtz76GNuqb\nOzhqUiHBYGBY1zWcmttiPPSE5XVbzfnLpnHhqTMJhwbesXUch4pdjcRiCcz0EoKBQ6tBU2sH9c0d\nTCnLG7V1jMYSRDsSFOZlHtTzmttiAOTn+Pdz15eGlg6KDrIeAwk4jjOkCxyAU13ddEhPfHtrLXc+\nup62aAKArMwQC2eWsq2qiep6N5ALcjNoao31u5xgIEB+TpijJhcxb1oxc6cVU1Xbxpr1u3n73X04\nge4fxILcDD525hyWLZhAYBAfOMdx6IgnycoYXO+srCyf51/bwepXdlDXHOWoyYXMnlLEzEmFZGWE\nCAQ625xxUB9Wx3F4b3cTv3lmCxu31xMASgqzqGuM0vMVD4eCxBNJsjJD/MOy6ZxzwtRe25/03isD\nBU91fRuVe5uZPrGA0sJswN3zeeyFrTy/bjeJpENJQRbL5k/g5AUTmRLJ66ptJFJA6nvEcRxes9X8\n5pkKHBxmTS5i5uRCyr3wCgQCBAMBsjND5GaHyc0Oe3Xr3kbHcXh7ay3bq5qpb4pS3xyluS1GPOEQ\nTyRJOg5TyvI5ZmYp82eU9Ptl9s62On72xw3UNUW7ajd1fD5Xv38+5ePzicWT7K1rpaU9TklBFiUF\n7rJetXtZ/fIOtu5xt29iaS5nnzCV5cdMJCsjRCyeoLU9Tm52BhnhYK/1aIvGefyl7TzxynY6Ykny\nssPMm1aCmVZMfm4G4WCQUChAS1uc6vo2qhva6IglOXtpOWZaSbftqG1sZ8feZgrzMinOz6IwL4OW\n9jh1jVFqm9pJJBwK8zIpyM2gOD+LnKyB+3xJx8Fur+eFt3bzqq0m2pFgfHEOc6cWM2tKIaFgkI54\ngo5YkpKCLBYeVdrVSdpX38ZjL2xlzfo9JB2HvOww40tymDq+gBXHTWbGxMJeawJQVdvKq3YvsXjS\ne72hqS1GTUM7NY3txOIJFswo5dg5ZRw9vYSMcIhEMklre5yMcJDszAO3LdqRoK45SkcsQUc8SSyW\nIJ503y+dtZlQmkthbsaA2ZB0HP5r9SaefmMnc6cWc8EpM5g/vYRAIEDScdhR1UxtYzuzphR1+9Kr\na4ry5uZqPnLu0X2uwBch/vy63Tz4+EYCAfjwytnUN0V5ZeNe9jW0kxEOsmz+BM5aOpXySB57alvZ\nsLWOzZX15OdkMKUsj8lleUSKc/r8gHdKnnoqG8qOou6btxBLJKltjPLXV3fQEU9y9PQSPnj6UeRm\nh90wdejq0dc0tFNV18qumlb21LYS7UgwY2IBi2eXsXj2OMYX5xAIBAgEIJF0aG6L0dwaY29dG8+s\n3cXmHfUAhIIBEsneX4/C3AxOOHoCyxZM4KhJhQdsQyyeYOP2ejbtqGfbnia27mnq6tEsPGocl6yY\nxdTx+URjCapqW6lv7qA4P5OyomyyMkM8u3Y3v3/uXZpaY+RmhSkfn8+kcblEinOoaWxnR1UzO6qb\nCQUCLJo9juPnRFgws5SMcJBYPEk0lmB9RQ1r3trDJm97AMYVZjElks+GrXXEE0kmlOYye0ohr2/a\nR1s0DkBuVphpE/KZOr6A+bPKKM4JM7ksl8aWGA+vtqytqCEcCpKVEaSlPT7g+6WkIItTF07itEWT\nGFeUzdqKGv7w/Hts23Pgey8UDBAKBnCgKwAApkTymFtezOzyImZMLKC+KUrlvhbe293IS29XEQgE\nuPC0mbzv+Cn8+uktPLt2N+FQgJKCLPY1tJP6sQoAmRkhorEEAeD4uRGyM0O8uKGKRNIhMxzstv6M\ncJA55UUcPb2EY+dNoHpfC23ROPsa2njy1Uqa22IU5WUyf0Ypm3bUU9PYfa+yL0tMhI+snE1re5wn\nXtnOK+/s7fP91lMAWDhrHGctKWf+zFJ3D+/dWp5ft5vNlfU43mNiCafrdS0rymbSuDy27GzomtZb\n/edOLaY4P4uX33HrMWlcLhNKcqmqa6W6vo14wm3j7PIizlpSzgkLJ5OIxgiHgmyvauLPL27jlY17\n6SvK8rLdgO5872SGg4RCwa42hUNBlpgIpy+ahJleQsXOBp5du4tXNu6lI5bsfaEpsjNDlEfyWTx7\nHMfNiTBpXG63z2c8keS+P73DixuqunU0Z00ppCgvC7u9rtv7ujySz5ypReyoambLzgYAHlt1oT9D\nPJ5I8vvn3uPPL24jLzvMdR9axNypxe6CHIc9ta0U5GYO2W5X6ZJjAKh97a2uafvq23j4yU2sq6gZ\n8PnhUJCJpTlkZ4V5b1fjoD4ggQAcPyfCeSdNY/rEArZXNbOlsp4de5uJJx0cxyGecNi0o74rlEsL\ns5hUmktZcQ7F+Vls3d3IO9vq6EgJobKibGZMLGDl8eUcPb2kr9V30xaNux+Id/ZS3dDW7UMRDASY\nVJZLWzRObWO03+XMm1aMmVbC9qomNlc20NwWo6womw+cMpOTj5lAKBgkFk+wdksNr9q9bKtqZm9t\na7c9hGAgQDAI8YTD0dNL+OS5hgklOeyta6NiVwN769z2OTgkk9DWEaetPU5ze4wtlQ20d7iBOa4o\nm30N7QSApfPGs2zBBEoLsikuyKIgZe/GcRwqq1t4+71a3nqvhi2VDd3qmWp8cQ5Xf2A+syYXdU17\nc/M+/utJS0c8yaTSXCaOyyUvJ4P6pii1jVEaWzuYP6OUs5eWM74kF3A7AX97fSdvbK4mIxQkLztM\nTlaYPbWtVFa39Lru7MwQ5y+bzjlLp5KVGcJxHKrr26jY2Ug0liCeSBJPOORkhRhfnEOkOIeG1g5+\n9dRmKnY2dusoTCnL44R542mNukOMjS0d5GZnUFqQRUlhFuFgkMbWDppaO9ixt5n3druf3QklObTH\nEjQ0dwDuF3Vmyp7brClFnHLMROZMLSYYCJBMOlRWN7N1TxOBAGRlhMgIBamsbubNLfv2L7c0lwtP\nncGJ8yZ0vS7JpMOGbbU8+Uol69/t/hnMyw53hd+08fmce+K0rr2eQAByszMYV5hNbnaYZNJhy84G\n3tyyj7ferQUgPydMbnYGu2ta2F3T2lXf9g53bz9SnM28aSVkZYa62hwKBQgFg4SCAepbouytbWNP\nXSu797V27alOKM1l/vQS5pS7e9S/emozaytqmF1exBcvWcTe+jYeW7OVNzbvA9zP6rzpJZQVZrOp\nsp7NlQ3E4kkCAZhbXswSE+HS8+f7L8Tf3dXI/X95h53VLZQVZfPPH1nMpHF5w9q43kIc3A/4G5v3\n8fbWWpJJh2TSwQGK8zMZV5jNuKJsIsU5RIpyut58re1x3t5ay/qKGprbYjiO+5xgIEB+bgYFORkU\n5Gay8sRpZA1ilCSeSPL2e7W8uKGKDVtrDxg2mjQul0WzxrFgRikzJhUe9hdbLJ6gqraNvfVtlBZm\nMaUsj4ywGxrbq5p5Y3M1m3bUEwgEyAgHyQgHmTo+n+ULJlJWnNOtdnVNUQrzMvsdM27viFNZ3UJ9\nW5x33t1H5d5mWtrjnH/SNJYfM3FQQ1mdoh0JXtm4l2fX7aKisoElJsIHTp1JeSR/0MuIJ5Jsr2pm\nc2U926ua3RpE8igvy2dSWS6h4PCe2NXQ0sHGbXU0tMVJxhPkZIXIzc5g3rRiCnIPfkzVcRxe2lDF\nn1/cRmFeJueeOI1jZpYeVF3f293IU69V8vI7VWSGQ5y0YAKnLZrE9AkFB7WcnuqaolTXt3UNt/Rl\nd00LL7y1h+b2BFU1zTS0dFCcn8W5J05j4VEHty2pHMcN+OfW7ubtrbXMKS/ijMWTD+qYRXNbjPUV\nNby+uZq33q0lGkt0m79gZimfu3ghWZn7v+yq69sIQLfPC7ifvW1VzYwvzukaWolECvwT4vFEkt/+\nbwWrX9mB48CKYydzyYrZ5GYP/zHYvkJ8OPU2vjcY0Y4E+xrbqW1sZ2KpO+xxJDjUevQl6TiHfPBw\nNBjqegyFaEeCYDDQNW6fbqOxJqlSOwBbKhsoK87mg6fPOqx69Rfio+7slD++sJUnXt7B+JIcrjh/\n3gEHY8SVlRliSlkeU8qGd+/E7/wc4KNVam9SDhQOBTlqciFHTS7k3BPTsL7hX8Xg7dzXwp/+vo2S\ngiy+cfkJael9i4j42ai5YjPpODz4l40kkg6fOGeuAlxEZBBGTYg/88ZOtuxsYKmJcNycyEg3R0TE\nF0ZFiNc2tvObZyrIyQrz8bPnjnRzRER8Y1SE+K+f3kJ7R4KPnjmb4mG+7FtE5Egy4iG+t76NVzbu\nZdr4fE5dNGmkmyMi4isjHuJPvuyeD37eSdN0OpiIyEEa0RBvbovx3PpdjCvMYum88SPZFBERXxrR\nEH/6jZ3eL6xNHdTPeIqISHcjlpyxeIKnXqskJyvMaYsnj1QzRER8rd8raowxQeBOYBEQBa6y1lak\nzL8Y+CrgAPdZa+8e7Ir//nYVjS0dnL9s2qB+p1hERA40UE/8IiDTWrscuBFY1WP+rcDZwCnADcaY\nIgYh6Tg88fJ2QsEAZy2ZerBtFhERz0AhfgrwOIC19iVgaY/5MaAYyMH9TfhB/STinppWdte0cvzc\nSNfv/4qIyMEbKMQLgcaU+wlviKXTKuA14C3gMWtt6mP7VOv9JZLyiH6BT0TkcAw0GN0IFKTcD1pr\nkwDGmGnA54DpQCvwsDHmEmvtb/pbYCRSQIf3V3JmlBcTiRT09/D0Cu7/O4/pNKpqMAqoHt2pHgdS\nTfYbKMTXABcAjxhjlgHrUuZlAwkgaq1NGmP24g6t9Ku6uomt3t+Ny/Dujxal3p+tqk1jm0b7D9yn\nm+rRnepxoLFYk/6+tAYK8UeBs40xa7z7VxhjLgXyrbX3GmMeBF4wxrQDW4AHBtOgzuGU0qLswTxc\nRET60G+IW2sd4NoekzelzL8NuO1gV1rb6P7h2hL92JWIyGEZkYt9ahrbKczLHLG/0ScicqRIe4om\nHYfaxiilhRpKERE5XGkP8aaWDhJJh3GFGkoRETlcaQ/xmsYogHriIiJDIO0h3nlmyjiFuIjIYRuB\nnrh3eqFCXETksI1giGtMXETkcI3AcIo7Jq7hFBGRwzciY+LhUJCC3Ix0r1pE5IgzIiE+rjCLgP4o\nsojIYUtriEdjCRpbYzqoKSIyRNIa4jX1bYDGw0VEhkpaQ7y6zg1xnZkiIjI00hvi9a2AzhEXERkq\naQ5xXa0pIjKU0jyc0tkT13CKiMhQSHNPvHNMXD1xEZGhkPYDm/k5GWRlhNK5WhGRI1bae+IaShER\nGTppDfGOWEIHNUVEhlDaL7vXeLiIyNBJe4irJy4iMnRGoCeuMXERkaGS9hCPFOeke5UiIkestIb4\nFz56LDMmFqRzlSIiR7S0hvhZJ07X74iLiAyhtA+niIjI0FGIi4j4mEJcRMTHFOIiIj6mEBcR8TGF\nuIiIjynERUR8TCEuIuJjCnERER9TiIuI+JhCXETEx8L9zTTGBIE7gUVAFLjKWluRMv8EYBUQAHYC\nn7LWdgxfc0VEJNVAPfGLgExr7XLgRtzABsAYEwDuAT5trT0NeAqYOVwNFRGRAw0U4qcAjwNYa18C\nlqbMmwvUANcbY54Biq21djgaKSIivRsoxAuBxpT7CW+IBaAMWA7cDpwFvM8Ys3LomygiIn3pd0wc\nN8BT/4pD0Fqb9G7XAFs6e9/GmMdxe+pP97fASGQU/1GIoPtb5+lu46iuyQhQPbpTPQ6kmuw3UIiv\nAS4AHjHGLAPWpcx7F8g3xszyDnaeBvxsoBVWVzcdaluHXWnSAaA2jW2MRApGdU3STfXoTvU40Fis\nSX9fWgOF+KPA2caYNd79K4wxlwL51tp7jTGfAX7hHeRcY639y5C0WEREBqXfELfWOsC1PSZvSpn/\nNHDSMLRLREQGQRf7iIj4mEJcRMTHFOIiIj6mEBcR8TGFuIiIjynERUR8TCEuIuJjCnERER9TiIuI\n+JhCXETExxTiIiI+phAXEfExhbiIiI8pxEVEfEwhLiLiYwpxEREfU4iLiPiYQlxExMcU4iIiPqYQ\nFxHxMYW4iIiPKcRFRHxMIS4i4mMKcRERH1OIi4j4mEJcRMTHFOIiIj6mEBcR8TGFuIiIjynERUR8\nTCEuIuJjCnERER9TiIuI+JhCXETExxTiIiI+phAXEfExhbiIiI+F+5tpjAkCdwKLgChwlbW2opfH\n3QPUWGu/MiytFBGRXg3UE78IyLTWLgduBFb1fIAx5hrgGMAZ+uaJiEh/BgrxU4DHAay1LwFLU2ca\nY5YDJwI/BQLD0UAREenbQCFeCDSm3E94QywYYyYB3wA+hwJcRGRE9DsmjhvgBSn3g9bapHf7EqAM\n+DMwEcg1xrxjrX2ovwVGIgX9zR5ZQfe7KN1tHNU1GQGqR3eqx4FUk/0GCvE1wAXAI8aYZcC6zhnW\n2tuB2wGMMZcD8wYKcIDq6qZDb+0wK026w/q1aWxjJFIwqmuSbqpHd6rHgcZiTfr70hooxB8FzjbG\nrPHuX2GMuRTIt9be2+OxOrApIpJm/Ya4tdYBru0xeVMvj3twKBslIiKDo4t9RER8TCEuIuJjCnER\nER9TiIuI+JhCXETExxTiIiI+phAXEfExhbiIiI8pxEVEfEwhLiLiYwpxEREfU4iLiPiYQlxExMcU\n4iIiPqYQFxHxMYW4iIiPKcRFRHxMIS4i4mMKcRERH1OIi4j4mEJcRMTHFOIiIj6mEBcR8TGFuIiI\njynERUR8TCEuIuJjCnERER9TiIuI+JhCXETExxTiIiI+phAXEfExhbiIiI8pxEVEfEwhLiLiYwpx\nEREfU4iLiPhYuL+ZxpggcCewCIgCV1lrK1LmXwp8AYgD64F/stY6w9dcERFJNVBP/CIg01q7HLgR\nWNU5wxiTA3wbWGGtPRUoAt4/XA0VEZEDDRTipwCPA1hrXwKWpsxrB0621rZ798NA25C3UERE+jRQ\niBcCjSn3E94QC9Zax1pbDWCMuQ7Is9b+dXiaKSIivel3TBw3wAtS7gettcnOO16gfx+YDXxoMCuM\nRAoGftBICQaA9LdxVNdkBKge3akeB1JN9hsoxNcAFwCPGGOWAet6zP8p7rDKxYM9oFld3XTQjUyX\n0qS7CbVpbGMkUjCqa5Juqkd3qseBxmJN+vvSGijEHwXONsas8e5f4Z2Rkg+8ClwJPAv8zRgD8CNr\n7e8Pu8UiIjIo/Ya417u+tsfkTSm3Q0PeIhERGTRd7CMi4mMKcRERH1OIi4j4mEJcRMTHFOIiIj6m\nEBcR8TGFuIiIjynERUR8TCEuIuJjA112f8RLJpOsWnULFRVbyM3M4FuxOPkj3SgRkUEa8z3x5557\nhlgsxt1338c/x+J8P2PMf6+JiI+MqsT69d+28MrGvUO6zBPmjecjZ87uc/66dWs56aTlACx2HN4O\njvnvNRHxkTGfWK2tLeTl5XXdDzruEIuIiB+Mqp74R86c3W+veTjk5ubR2tradd8BguqNi4hPjPm0\nWrRoMS9Tb4XfAAAGtklEQVS+6P5c+tpAgLmOeuEi4h9jPsRPP30lmZmZXHvtlXw/I4MbY/GRbpKI\nyKCNquGUkRAIBPiXf/kKAKVLjgGgdiQbJCJyEMZ8T1xExM8U4iIiPqYQFxHxMYW4iIiPKcRFRHxM\nIS4i4mNjPsRff/1VTjvtBJ56anW36Zdf/jG+971vDvv629rauPbaK9m+fSsA8Xicb3/7Jj772au5\n+urLef75Z4e9DSLiX2M+xAGmT5/BX/+6P8QrKrbQ3t4+7OvduHEDl112Gbt27QICAKxe/ReKi0u4\n4457WbXqdm677fvD3g4R8a9RdbFP3s1fJ+ux3w/pMqMXXETLzd/pc34gEGDWrDns2LGdZiAfeOKJ\nP3POOedTVbUHgN/+9r959tlnaGtro7i4mO997wf84Q+/Y/36tdx883f5znf+lQULFnLxxZd0Lffe\ne+9i3bo3u63n1lt/Qji8v+SxWIw777yTL37x+q5pK1eexYoV7wPAcZKEQqEhqoSIHIlGVYiPpBUr\nzuTJt9ZzcSLh9ZAvp6pqD47j0NjYyA9/eCeBQIDrr7+OjRs38KEPfYRXX32Z7373ZhKJRLcAB7j6\n6msHXOfChYuJRAq6TcvJyQHcX1e86aYb+cd//Keh20gROeKMqhBvufk7/faah4PjOACcdda5/Piu\n25nqJFm8+Liu+YFAgHA4zM03f5WcnFyqq6uIx93fV7nsssu59torue++hw9Y7j333Mn69Wu7Tbvt\ntju69cT7UlW1h6997ct88IMf5qyzzj2czRORI9yoCvGRNHnyFFoJ8HA4zJXn/gOVlTsAd3z8uef+\nl3vueYD29nauuuqTOI5DLBbj9ttv5ctf/ho/+MEt3HHHvd0C+lB70LW1NVx//ee44YYbOf74pUOy\nbSJy5BrzBzYDgQCBgHtQ8fxEgj2BAOXlU7umlZeXk5OTw2c/ezXf+tZNzJ07j+rqau6++3ZOOeU0\nLrjgIk466WTuuuv2IWnPQw/dT3NzM/fffy/XXXcN1113DdFodEiWLSJHnkDncEKaONXVTelc30Hp\n+hXD195K2zojkQJGc03STfXoTvU40FisSSRSEOhr3pjviYuI+JlCXETExxTiIiI+phAXEfExhbiI\niI8pxEVEfKzfi32MMUHgTmAREAWustZWpMy/ALgJiAP3WWt/NoxtFRGRHgbqiV8EZFprlwM3Aqs6\nZxhjMoBbgbOBM4B/NMaMH66GiojIgQYK8VOAxwGstS8BqdeBHw1ssdY2WGtjwPPA6cPSShER6dVA\nIV4INKbcT3hDLJ3zGlLmNQFFQ9g2EREZwEA/gNUIpP5WatBam/RuN/SYVwDUDbC8QM+fXh1Vtm8D\nIJLm1Y7qmowA1aM71eNAqsl+A/XE1wD/AGCMWQasS5m3EZhjjCkxxmTiDqX8fVhaKSIiver3B7CM\nMQH2n50CcAWwBMi31t5rjHk/8A3cL4OfW2vvGub2iohIinT/iqGIiAwhXewjIuJjCnERER9TiIuI\n+Fha/sbmQJfvjwXeFa73AdOBLOA7wDvAA0ASeAv4rLV2zB2k8K70fQ14H24tHmCM1sQY8xXgAiAD\n+AnuGWIPMAbr4eXGz4C5uNt/NZBgjNajL+nqifd5+f4YchlQba09HTgPuAO3Dl/1pgWAC0ewfSPC\n+3L7KdCCW4NbGaM1McasAE72PicrgKMY2++Rc4A8a+2pwLeA7zG269GrdIV4f5fvjxWP4J6OCW7d\nY8Dx1tpnvWl/Ac4aiYaNsP8A7gJ2e/fHck3OAdYbY34PPAb8D7BkDNejDSjyTnUuAjoY2/XoVbpC\nvL/L98cEa22LtbbZGFOAG+hfp3v9mxljP1tgjPk07t7Jam9SwPvXaazVJIJ7HcYlwP8FfsHYrsca\nIBv3wsKfAj9mbNejV+kK0v4u3x8zjDFTgb8BD1lrf4k7rtepAKgfkYaNnCuAs40xTwPHAg/S/VcP\nxlpN9gGrrbVxa+0moJ3uITXW6vFlYI211uC+Px7CPVbQaazVo1fpCvH+Lt8fE4wxE4DVwJettQ94\nk98wxpzh3T4feLa35x6prLVnWGtXWGtXAm8CnwIeH8M1eR73eAnGmMlALvDUGK5HHvv34OtwT8QY\n05+Z3qTlis3eLt/3ehpjhjHmR8CHAZsy+Qu4u4iZwAbg6rF6pN3rjV8DOMC9jNGaGGP+HViJ28H6\nCrCVMVoPY0wxcD9QhtsD/yHuWUxjsh590WX3IiI+NqYOLoqIHGkU4iIiPqYQFxHxMYW4iIiPKcRF\nRHxMIS4i4mMKcRERH1OIi4j42P8HgRjjCo60xPMAAAAASUVORK5CYII=\n", + "text": [ + "" + ] + } + ], + "prompt_number": 106 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "classifier = tree.DecisionTreeClassifier(max_depth=best_depth)\n", + "prediction = classifier.fit(feature_training, spam_training).predict(feature_test)\n", + "print(metrics.classification_report(spam_test, prediction))\n", + "print(metrics.confusion_matrix(spam_test, prediction))\n", + "print(metrics.f1_score(spam_test, prediction))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.93 0.95 0.94 1135\n", + " 1 0.91 0.88 0.89 705\n", + "\n", + "avg / total 0.92 0.92 0.92 1840\n", + "\n", + "[[1074 61]\n", + " [ 87 618]]\n", + "0.893063583815\n" + ] + } + ], + "prompt_number": 63 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Parse a DOT file and replace the numPy index with the Dataframe Column" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from sklearn.externals.six import StringIO\n", + "\"\"\" Requires dot which can be installed from http://www.graphviz.org \"\"\"\n", + "with open(\"tree.dot\", 'w') as f:\n", + " f = tree.export_graphviz(classifier, out_file=f)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 64 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def parse_dot(features_df,filename):\n", + " \"\"\" features_df is the dataframe of features used in the decision tree.\n", + " Create a dictionary where keys are nparray indices and\n", + " values are the dataframe indices. \"\"\"\n", + " columns = features_df.columns\n", + " col_dict = {}\n", + " for n in range(0,len(columns)):\n", + " col_dict[\"X[{}]\".format(n)] = columns[n]\n", + " \"\"\" Open the .dot file. \"\"\"\n", + " with open(filename, 'r') as dotfile:\n", + " dot = dotfile.read()\n", + " \"\"\" Loop through each key in the dictionary and replace with value. \"\"\"\n", + " for key in col_dict:\n", + " dot = dot.replace(key,col_dict[key])\n", + " with open(filename, 'w') as dotfile:\n", + " dotfile.write(dot)\n", + " dotfile.close()\n" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 65 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "parse_dot(base_features_df,\"tree.dot\")\n", + "!dot -Tpdf tree.dot -o tree.pdf\n" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Feb 10 20:20:15 Jasons-MBP dot[16478] : The function \u2018CGFontGetGlyphPath\u2019 is obsolete and will be removed in an upcoming update. Unfortunately, this application, or a library it uses, is using this obsolete function, and is thereby contributing to an overall degradation of system performance.\r\n", + "Feb 10 20:20:15 Jasons-MBP dot[16478] : The function \u2018CGFontGetGlyphPaths\u2019 is obsolete and will be removed in an upcoming update. Unfortunately, this application, or a library it uses, is using this obsolete function, and is thereby contributing to an overall degradation of system performance.\r\n" + ] + } + ], + "prompt_number": 66 + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Random Forest Ensemble" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "best_depth = 0\n", + "best_score = 0\n", + "scores = []\n", + "for n in range(1,50):\n", + " classifier = RandomForestClassifier(n_estimators=n)\n", + " prediction = classifier.fit(feature_training, spam_training).predict(feature_test)\n", + " score = metrics.f1_score(spam_test, prediction)\n", + " scores.append(score)\n", + " if score > best_score:\n", + " best_depth = n\n", + " best_score = score\n", + "print(\"Best Score: {}\\nBest Depth: {}\".format(best_score,best_depth))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Best Score: 0.9374999999999999\n", + "Best Depth: 32\n" + ] + } + ], + "prompt_number": 107 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "forest_df = pd.DataFrame(scores)\n", + "forest_df.plot(title=\"Progress of Random Forest over n_estimators\")\n", + "max_x = scores.index(max(scores))+1\n", + "plt.vlines(max_x,0,1,color='red',label=\"Max = {}\".format(max_x))\n", + "plt.legend(loc='best')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 108, + "text": [ + "" + ] + }, + { + "metadata": {}, + "output_type": "display_data", + "png": "iVBORw0KGgoAAAANSUhEUgAAAXEAAAEKCAYAAADkYmWmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcHHWd//FXH3NPTyZHJyEHARLyDQS5EiSG+xSQKIq6\ni6xCXBBZhN0H+NDgirIrsuzKKXIIGo71WH+gLKuuwApyRUHCFSDkExMIIEdmkpnM3T191O+Pqhk6\nk5meSZirZt7Px2Me3dXVXfWtb1e961vf6qqJeJ6HiIiEU3SkCyAiIrtOIS4iEmIKcRGREFOIi4iE\nmEJcRCTEFOIiIiEWH+kCjHbOuT2AjcCagpcjwA1mdseIFGoIOOeiwH3AAvxlu7lg3NnADcBrwUsR\noAZ4AviSmaUHqQy/Ae4xs7sGY3o9pv0osDvQVPCyZ2YHD/a8esx3AnCfmR07lPMZLZxz5wAlZnaL\nc+48oNbM/n2Qpv0Q8Ldm1jAY0xsrFOID025mB3UNOOdmAC8751ab2UsjWK7BNAs4Eag0s94uHnjM\nzD7eNeCcKwOeBM4CbhukMnjB31DwgK+a2a+GaPp9mQgcMszzHEmHAy8BmNkPB3nax+M3IKSAQnwX\nmNk7zrm/APOdc4uAvwcqgW1mdpxz7jLgb4EssB74ipltds7NA1bib9jv4q+QPwEexQ/EtcAewFHA\nXsBVQBWQBy43s98656YDdwOTg+L81sy+1dfrPcvunDsC+I+gvJ3AN4FVwANACfCcc+50M3utx0d7\nbjxTgAnA1mC6pwKXAqXAVOCuoFxHA9/FP5rZDygDLjCzR4Od4V3AbsBbwTT7LKeZPRgcFZwOlAd1\n9SZwE/AVYD5wrZld23O5+1iGrnnNAm4B5gTvucvMrg6Owp7gA3wvwB1AhXPuOWCxmeUL5jshKPsB\n+DuZ3wHfwF+flpnZsuB9C4DfA7Pxj5SuD+YTA75vZncE9XwD0BqU7RAzyxTMa1NQluPwj0h+YWZf\n76Oeuj5TCvw7cGQwr+eBi8ysxTl3PnAe/neTCp4vAJYBxzvnOvDXg8lmdmEw/58CHwvK/m3gMGAR\nkAE+bmbvFlmPuo56H3HOfQx/3fsBMCmou2vM7D971ENl8J39GJgXfF/PAuf10VAJJfWJ7wLn3Efw\nV4qngpf2BY4KAnw5cBL+BnsA8DJwZ/C+/wR+amYfAi4CPoK/AkaAmcC/mpkD0vhh/3dmtgj4BHCL\nc242cC6wMXj9CGCec66ml9f3ds4lepR7MnAP/oZ4AH4r+if4G9XJQIeZHdRLgAMc4Zx73jm31jlX\nB/wC+J6Z/dI5FwEuBr5gZocEy3Wpc25S8NkPA1cHXRc/Bi4PXr8J+KOZ7Qf8A+CKlTMIVfBbe2fj\nh/Y04G+C7opTgCt6KTtBHX8vWIauv5OCcT8FHjaz/fGD5e+cc38TjNvV76Wr/s8O6vXgwgAPfB+o\nD9aHxfhh/lXgZ8DhzrmpwfuWB/ONAvcCK8xsMXA08FXn3KHB+xbidzccWBjgAQ+oMrMjgaXAhc65\nOX3UVZcVQMbMFpnZgfgNj6uCrrfrgI+a2Yfxj8QOM7P7gP/B35HezPZHVh5QFkznkuAz1wfDbwX1\nBH2sR2a2PBh/DPBeMJ8bgvXjZOBK59ySHvVwEP53VB087zoi2rOf5Q4VhfjAVBRs+C8BVwKfM7O3\ng/FrzKw1eH4SsNLMOoLh7wPHBRvkIcCPAMxsHfBwwTyywJ+C5x/Bb53e75x7HvgtfiviQ/ittdOd\nc7/Fb/1cambNvby+wsxaeizHocAGM3smKMNa/Fb4MfR/mPpEsCEsBG7EbzX/TzAdD78Fdohz7lvA\nNcH0qoLPvmFmXecUnsdvPYHfKrwzmMbrwP8Fn+urnEfjh8EzZvZ2MN/XgYeC6b0GlDvnKnspf1d3\nykEFfw8456rwQ+2mYF7NQZlODj6zq99LV/0Xq9eT8FuTmFkncCtwcrAu3Qt83jkXA87E3/k5/COB\nlcH8H8U/IjkwmN5bZvZWkfndH8zrHaCO97+HvpwKfKJr3ccPxH2CndE9wJ+cczfin2dYWfC5SI/H\nLr8MHl8D3ivoitxYUJZi61GX+fg7hP8OlufdYNon4X9nhfXwBLDQOfcH/J3S9X00UkJL3SkD01HY\nJ96L1oLnUbZfeaP49ZwqGM4FzwtbZumClloUeNXMuloWOOdmApvNLOuc2xO/f/BY4M/OudPM7E99\nvV4wj94CJcZOrAdBcH7HObcUP1hODYLwBfwN6Qn8Dfq0gvl1FEzCK3jdY/uGRHYA5czgt4gLZXd8\n+4B1fV+F8yyskw/0veC3Xvubd+F8S4LnP8Jvrb4KrDWzN5xzH8Lvsis8PzMd2AYsYfv1sDd9fQ/F\nyneRmT0YzKsaf6eBmX3eObcvcALwdfwuoNMKpt2bwu9thyOFYOdbbD0qLFdPhd9Zdz2Y2aagG/No\n/O/l9865C83slztOIpzUEh98DwLLC1qDF+GfFGzGb00uBwg2+GPpfYV/Gv9w/MjgvfsD64AZzrmr\ngMvM7H7gn4BX8Pvm/62X1/fuZbrOOXdIMN2F+If+j+7Ccl6Af4TxiWA+iWD+v8XfYMrwN6xiHgC+\nFJRlFn7L3MPvpuqrnIN6YitoLT8VLE9XP/Xnef+ooNDOfi974+9g+qqHBwvmW4ZfFw8F5Xo6mP+3\neP/EsQEp59yZwWdmAy8CxRoYH8SD+N0upUEXyq3Ad51zk51zbwINZnYDcBmwf/CZLH5/dpeBfF9d\nO9H+1qNcMG0DOp1zn4TuHxp8il6+s6Dv/g4ze8jMVgTLtHDgVTD6KcQHpthJkJ6/qPgx/kmoPzvn\n1uIf6p4ZjPsC8Fnn3Av4h9GvA+0952Fm9fgn7/4jeO9PgM+b2Zv4fZEHBt06z+Afmv4M/2RXz9d/\nXlhQM9sCfAa40Tm3Br8v+Gwz29DPcu7wq5HgkPTf8Q95DfgN8Kpz7gn8E5ir8c8b9PaLk67hC4B9\ng3paiR9ImNnWIuUsNr1iy1DMmfg7pDX4QX2vvf8zxw/yvfwceAf/ZPFa59zEHvO9CJgafGYNfqv7\nuwXjb8fvv+3qNujE79I4xzn3In4gXVZwtDXYJ+u+A2zC7wJ7BT8vLgm+nyuAh51zq4F/A84JPvM7\n4CLn3AqK/9qo53fm4ddBX+sRwK/wfwCwN34L/R+Devg/4F/M7LFepn0XEAvq/xn8ncQNO1kPo1pE\nt6IdPs65bwC/NDMLWnwvAicF/eMiIjttQH2hwdnvq8zsmB6vL8M/lMrin8z70eAXcUxZD/zCOZfH\nr/t/U4DLSHHOOeC/+hi9zszOGM7yyK7ptyXunPsa8HdAq5ktLXi9BP/3s4vxuwRWAaeaWd3QFVdE\nRAoNpE98A/5Jg54nKPbB/xlYU/Cb1CfxLwoQEZFh0m+Im3+Zcm8/4aph+/tQtOBfRSUiIsPkg/xO\nvAn/TG+XBNBY7AOe53mRiG59IDLs9tjDf9y0aSRLIbuuz+D8ICG+Dv83sxOBNvyulO8VLUUkQn19\nz4sIJZlMqF56oXrp3a7Uy6S8f+6rYYzW51hfV5LJRJ/jdibEPQDn3Bn49yK43Tl3Mf5vVaPAj4PL\nX0VEZJgMKMTNbBP+/SUws58XvP4b/B/ni4jICNAVmyIiIaYQFxEJMYW4iEiIKcRFREJMIS4iEmIK\ncRGREFOIi4iEmEJcRCTE9D82RULC8zw2N3bQlMqxZWsr2VyeXN4jm/PI5fN4HkybVMlukyqJRnWP\novFCIT6GZLI5XtiwladeeY/ObB43u5YFcyayx/QE8djYPujyPI83N7fy4oYtRCIwZUIFkyeUM2VC\nObXVZYMaarl8nua2DA0tKZpbO4nHo1SWxaksj1NZXkJlWZyS+ODUt+d5bHqvhWfW1bF6XR1bmlL9\nfqY0HmXW1Gp2n5Zg92nVzJmWoCpaQi4ao6E5RUc6S0c6R3s6S0c6SzqTo7w0RmV5nKqg/JXlcSrK\n4sRjUfKeR7ozR6ozRzqTI9WZJd2ZI5PNU1EWp6qihKpy/zOx6PbL7Xkeqc4crR0ZWjsytLR3kurs\nmt/786osi1Na0t+/ZN05ec+jrSNDZyZPTVXpoHwnW7Z1sPaNRra1pJmZrGL3aQmmTCinrxv75fMe\n72xt47V3mnntnSZa2jPdO9rpkyvZbXIV1RUlvX52oIb737N5Y/kmNTtrc2M79z/xOq3pLPvsXsui\n+UmmTqzs/4MFPM9j49vN/PHld/nzq3W0p3e8a3BZSYy9Z01gwZyJLNh9IrtNriQeixKLRYj2WPny\neY+G5hR12zqoa+zofqzf1kE+71FV3rXRllBdUUJVhb/hZ3J5mts63/9rz9Dc1klLeyelJTGmTChn\nSm0FyR6PsWiEVKcfDKkgKFLpLKlMjlnTa0iUxZg+qbLXnZDnebxV18oz6+p4Zl0ddY0dO7wHIBaN\nMLmmnMkTyqmqKKG8JEZZSYyy0hhlJVHKSuOUlUTJe5DN5snk8t2Pmaz/19zWSUNLmm2t/l9/m01J\nPEpleZzJNeUkaytI1paTnFARPK9gYqLvHYvnebz2bjOr19Wxel09W5v94C4vjbH/3MnslkzQ2Zkh\nFo0Sj0WIRSPdYfvOljbe3NzKO1vayOU/+LZdEo+SyeYH/P6KsjhV5XHKSmO0p7K0tGfI5gb2+Xgs\nSnmpH+Se55H3uh49PM+/jV9FWbz7r2tHU1kWp6a6jLqGNlraMzS3d9LS7u8wCr+nRGUJE6vLqE2U\nMTH4q60uo7a6lAlV/mOisnS776W5rZNX32gM/hqo37bjTrSyLM7u097faVaUxXn93WY2vt3M6+82\nk+rMFV3u6ooSdptcycREGbFolFg0QjQK0WiUWCRCNBrhojMO7rMVohAfJK0dGVLpLFNqKwb03v9Z\n9Tp/eO7tHTa0WclqFrkki+YnmZms2mEP35nJ0dzWSVN7J6+83sAfX36vO7xqq0v5yMLpLN1vOonK\nUuytbax7o5F1bzby7tZ2ehOLRojFIsSDQGhLZXvd+MtKYsRjEdpT2QH9N954LEJNlb9RpDtzbGlK\nDXhj7q2Mu02uZGaymlnJKqZPquKNzc08s66ezQ3+cpWWRDlg7hQWL5hKeWmMLU0ptjR1sLUpxdam\nFFuaUjS1de7S/AuXqba6ZwCUkcvnaU9laU9naUtl6UhlaE9nae3I0NCc7rU+Y9EI8XiUaMQP4WgE\nolF/g81k87S0ZwCoKItx4LwpLHZT2W+vSZTEYwO6Y18mm+edLW28sbmFNze3sOV3j1CayxA//jgq\nyuKUl8WoDMKwrCRGqjNHe1BufxmytKUypDM5SuP+Dq+8+8//TEk8Skfaf19bh//YGjymO3NUlsdJ\nVJaQqCylusLf6ScqSygvjZPO5GhLZegoqLf2VJZUZ5ZoJEIk4t/1tOsxGomQ9zxSwdFDezpLNtf7\nmlhZFidRVUqisoSaylJK41G2taZpbO1kW0uadKbvUI1GIiSqSqitKiObz/N2fVv3uIqyOAt2r2Wf\nORNJ1lbw1/pW3tzcypubW9jcRwNit8mV7DWjhrkzJrDXjBpqq8vY3NjOu1vbeW9rO+9ubeO9hnbq\ntnUUbRj8+ppPKMS7pDtzvLKpgbrGDmqrS7s3yImJMkriO3c4l+7M8fxf6nlq7WZeeb2BXN5jxpQq\nFs1PssglmT21ersQzmTzPPzsX/nNHzfRns4ytbaCTx89l6UHzeLhpzbx3Pp6XtnU0L1yTq2tYGay\nym9dBMGd7rFXL41HOdglWbrfdPadM6nP1l1Ta5p1b27D3mykoSVNLpcP+lI9ssHzbM4/PJ420W8t\nTp3Y9VdJTWUJkUiEfN7zN7qODK1dG29Hhng8Sk1lCTVVpUyoKqWiLL7dsuc9j6bWTuq3dbClqYMt\n2/xg9TyP8lI/VLoCorzUbylnPFj3+lb+Wt/GO1vadtj4SuNR9p83hQ8vmMqH5k6mrJ/D8Uw2R0c6\nRyqTIx10DRQ+RqJQEotREo9QEosSj0cpiUeJx6LUVPlB1PPIpT/5vEdjS5r6bf7RTH1TB/Xb/B1M\nJpsnn/dbm7m83+LMB4G/96xaDlkwlYV7TtqhG2CXbkW7aD8AGp59eac+N5plsjna0/7OJ1FTQSaV\nIVFZUrTr0PM8OtI5GlvTbGtJ09iSpqktzbbWTppa02xr8x+bWjvxgHkzJ7DvHhPZZ84k5kyv3qG7\nqEtHOstbdX6gd6Sz7LlbDXvOqKGqfGBdJZlsntaODPm8R87z8PLBOhE8Lv7QjPEd4s1tnbywYQsv\n/GULr2xq6PPwsLqihEmJMibVlBcEmB9ik2v8Q51cPs8rrzfy1Nr3eH79lu5gmTM9wcTqsu2mn6wt\nZ9H8qRzskjQ0p7j30Y1saUpRVR5n2WF7cuzBM4nHotttlB3pLC+9tpVnrZ41r20l3Znrbh1MqCwl\nUVVKTaUflDOmVLHIJakoG5unNgrrJe95bGlK8XZdK+9sbSNZW8EBc6dQVjq4/ahhoBDf0WDfT9zz\nPDzY6Z32UEkmE2MzxLtOmqQz/l9nJh88+sPvbW3n+b9sYePbTd1dADOmVHHQ3lPYY3qiu5+zseCv\noSVFZ2bHkI9FI0yZUE57Ott9qJusLWfJvtNZsnAau02uAiDVmeXl1xpYbXW8uHHrdi3nWDTCcYtm\ncerSPbY7mdHXCpjJ+stTWR4fNSvTcBrrN/rfVQrxHY31daVYiIeuCbetNc2rmxpZ+0YDr77RSENz\nuuj7IxHYe9YEDtw7yUHzpzCtnxOHnufR2pF5/8ReYwd1je3UNXawubGDaASOPXgmSxZOZ+6Mmh36\nrMtL4yxeMJXFC6aSyeZYu6mR59bXE4nAKUvm7NSJy5LgcF5EpC+jPsSzuTxrNm7tDu7CE3RV5XH2\n22tS98+TyuIxSkujlJXEKI3HmFBVysK9JlFTWTrg+UUiERKV/gm5uTM+2P99LonHOGDeFA6YN+UD\nTUdEpC+jOsQ9z+O2X69l9bo6wP8Fwn57TWLfOZPYZ85EZk+rHpfdDCIiXUZ1iHdd4DB3Rg2fOWYe\ne82oGfMXrYiI7IxRG+LN7Z385KH1lMSjnHPqvkybtHMXwYiIjAejtln7s/9bT2tHhk8duZcCXESk\nD6MyxJ9bX8+fX61j7swaTlg8e6SLIyIyao26EG/tyHD3g0Y8FuWLp+yju7GJiBQx6kL857//C81t\nnZx2xJ7dF9CIiEjvRlWIv7hhC3965T32mJ7gox9WN4qISH9GTYi3p7Lc/aARi0b44sf26fNGMyIi\n8r5Rk5S/eOQvNLakWXbYHsxKVo90cUREQmFEfyeeyeZ47Z1mXtnUwBNr3mX3qdWcsmTOSBZJRCRU\nhjXE21MZXnptK+vf2sb6t7bx+rvN3ffOLiuNsfyUfXRFpojIThjWED/jst913/Q+EoHdpybYe/YE\n3Oxa5s+uJbETN6oSEZFhDvGD3VSm1ZYzf3Yt82ZOGLP/zEBEZLgMa4p++5wlY/rG7SIiw00d0CIi\nIaYQFxEJMYW4iEiIKcRFREKs6IlN51wUuBnYH0gD55jZxoLxnwS+AXjASjO7dQjLKiIiPfTXEj8N\nKDWzpcAK4Joe468FTgAOAy5xzn2w/ywsIiI7pb8QPwx4AMDMngYW9xifAWqBCiCC3yIXEZFh0l+I\n1wDNBcO5oIulyzXAs8DLwK/NrPC9IiIyxPq72KcZSBQMR80sD+Cc2x34CjAHaAd+4pz7tJndW2yC\nyWSi2OhxS/XSO9VL73a6XoL/kDWW63MsL1sx/YX4KmAZcI9zbgmwpmBcOZAD0maWd87V4XetFKUr\nNneUTCZUL71QvfRuV+plUnDPooYxWp9jfV0ptoPqL8TvA05wzq0Khpc7584Aqs3sdufcXcAfnXMp\nYANw5yCUV0REBqhoiJuZB5zf4+X1BeOvA64bgnKJiMgA6GIfEZEQU4iLiISYQlxEJMQU4iIiIaYQ\nFxEJMYW4iEiIKcRFREJMIS4iEmIKcRGREFOIi4iEmEJcRCTEFOIiIiGmEBcRCTGFuIhIiCnERURC\nTCEuIhJiCnERkRBTiIuIhJhCXEQkxBTiIiIhphAXEQkxhbiISIgpxEVEQkwhLiISYgpxEZEQU4iL\niISYQlxEJMQU4iIiIaYQFxEJMYW4iEiIKcRFREJMIS4iEmIKcRGREFOIi4iEmEJcRCTEFOIiIiGm\nEBcRCbF4sZHOuShwM7A/kAbOMbONBeMPAa4BIsDbwBfMrHPoiisiIoX6a4mfBpSa2VJgBX5gA+Cc\niwC3AWeb2RHAw8CeQ1VQERHZUX8hfhjwAICZPQ0sLhg3H9gKXOycexSoNTMbikKKiEjv+gvxGqC5\nYDgXdLEATAGWAjcCxwPHOeeOGfwiiohIX4r2ieMHeKJgOGpm+eD5VmBDV+vbOfcAfkv9D8UmmEwm\nio0et1QvvVO99G6n6yUa2bXPhchYXrZi+gvxVcAy4B7n3BJgTcG414Bq59zc4GTnEcCP+pthfX3L\nrpZ1zEomE6qXXqheercr9TIp7wHQMEbrc6yvK8V2UP2F+H3ACc65VcHwcufcGUC1md3unPt74GfB\nSc5VZva7QSmxiIgMSNEQNzMPOL/Hy+sLxv8BOHQIyiUiIgOgi31EREJMIS4iEmIKcRGREFOIi4iE\nmEJcRCTEFOIiIiGmEBcRCTGFuIhIiCnERURCTCEuIhJiCnERkRBTiIuIhJhCXEQkxBTiIiIhphAX\nEQkxhbiISIgpxEVEQkwhLiISYgpxEZEQU4iLiISYQlxEJMQU4iIiIaYQFxEJMYW4iEiIKcRFREJM\nIS4iEmIKcRGREFOIi4iEmEJcRCTEFOIiIiGmEBcRCTGFuIhIiCnERURCTCEuIhJiCnERkRBTiIuI\nhJhCXEQkxOLFRjrnosDNwP5AGjjHzDb28r7bgK1mdumQlFJERHrVX0v8NKDUzJYCK4Brer7BOXce\nsB/gDX7xRESkmP5C/DDgAQAzexpYXDjSObcU+DDwQyAyFAUUEZG+9RfiNUBzwXAu6GLBObcb8C3g\nKyjARURGRNE+cfwATxQMR80sHzz/NDAF+F9gOlDpnHvVzO4uNsFkMlFs9Lileumd6qV3O10v0ciu\nfS5ExvKyFdNfiK8ClgH3OOeWAGu6RpjZjcCNAM65s4AF/QU4QH19y66XdoxKJhOql16oXnq3K/Uy\nKe+fsmoYo/U51teVYjuo/kL8PuAE59yqYHi5c+4MoNrMbu/xXp3YFBEZZkVD3Mw84PweL6/v5X13\nDWahRERkYHSxj4hIiCnERURCTCEuIhJiCnERkRBTiIuIhJhCXEQkxBTiIiIhphAXEQkxhbiISIgp\nxEVEQkwhLiISYgpxEZEQU4iLiISYQlxEJMQU4iIiIaYQFxEJMYW4iEiIKcRFREJMIS4iEmIKcRGR\nEFOIi4iEmEJcRCTEFOIiIiGmEBcRCTGFuIhIiCnERURCTCEuIhJiCnERkRBTiIuIhJhCXEQkxBTi\nIiIhphAXEQkxhbiISIgpxEVEQkwhLiISYgpxEZEQixcb6ZyLAjcD+wNp4Bwz21gw/gzgH4Es8BLw\nD2bmDV1xRUSkUH8t8dOAUjNbCqwAruka4ZyrAL4DHG1mhwMTgFOHqqAiIrKj/kL8MOABADN7Glhc\nMC4FfMTMUsFwHOgY9BKKiEif+gvxGqC5YDgXdLFgZp6Z1QM45y4Eqszs90NTTBER6U3RPnH8AE8U\nDEfNLN81EAT6fwDzgNMHMsNkMtH/m8Yh1UvvVC+92+l6iUZ27XMhMpaXrZj+QnwVsAy4xzm3BFjT\nY/wP8btVPjnQE5r19S07XcixLplMqF56oXrp3a7Uy6S8v3k2jNH6HOvrSrEdVH8hfh9wgnNuVTC8\nPPhFSjWwGvgi8DjwiHMO4AYz++8PXGIRERmQoiEetK7P7/Hy+oLnsUEvkYiIDJgu9hERCTGFuIhI\niCnERURCTCEuIhJiCnERkRBTiIuIhJhCXEQkxBTiIiIhphAXEQmx/i67H/Py+TzXXHMVGzduoKSk\nhBUrLmPmzFkjXSwRkQEZ9y3xJ554lEwmw623ruTLX76QH/zgupEukojIgI2qlvj/e2QDz6yrG9Rp\nHrJgKp89dl6f49eseZFDD10KwMKF+7Fu3auDOn8RkaE07lvi7e1tVFVVdQ9Ho1Hy+XyRT4iIjB6j\nqiX+2WPnFW01D4XKyira29u7hz3PIxod9/s2EQmJcZ9W++9/AE895d8u/eWXX2Lu3OHdiYiIfBCj\nqiU+Eo488hieeeZpzj//iwBceum3R7hEIiIDN+5DPBKJ8NWvXjrSxRAR2SXjvjtFRCTMFOIiIiGm\nEBcRCTGFuIhIiCnERURCTCEuIhJi4z7En3tuNUcccQgPP/zQdq+fddbfcuWV/zKk83700Yc599wv\n8JnPfIZ77vkvALLZLN/5zmVccMG5nHvuWTz55ONDWgYRCbdx/ztxgDlz9uD3v3+I4447EYCNGzeQ\nSqWGdJ65XI5bb72JlSv/k9mzp/LRj57EiSeexJNPPk5t7UQuu+w7NDc3s3z55zj88COHtCwiEl6j\nKsSrLv8mZb/+70GdZnrZabRdfkWf4yORCHPn7s1bb71JW1srVVXVPPjg/3LiiSezefN7APzyl7/g\n8ccfpaOjg9raWq688mruv/9XvPTSi1x++Xe54opvs3Dhh/jkJz/dPd3bb7+FNWte2G4+1177A+Jx\nv8pjsRg/+9m9RKNRGhoayOfzlJSUcuyxJ3DMMccD4Hl5YrHYoNaHiIwtoyrER9LRRx/LY4/9gVNO\nWca6dWs588yz2Lz5PTzPo7m5meuvv5lIJMLFF1/IunVrOf30z7J69Z/57ncvJ5fLbRfgAOeee36/\n84xGozz22CNcf/33WLLkcMrLy7tvvtXe3sZll63gS1/6hyFZXhEZG0ZViLddfkXRVvNQ8DwPgOOP\n/yhXX30VM2bM5IADDuoeH4lEiMfjXH75N6ioqKS+fjPZbBaAM888i/PP/yIrV/5kh+nedtvNvPTS\ni9u9dt11N3W3xLscddSxnH76x/mnf7qEBx74LaecsozNm9/jn//5a3zqU5/h+OM/OtiLLCJjyKgK\n8ZE0Y8bCpz80AAAEZ0lEQVRMUqkO7r33v/jyly/kr399C/D7x5944jFuu+1OUqkU55zzeTzPI5PJ\ncOON1/K1r/0zV199FTfddPt2Ad1fC7qtrZWvf/1irrvuJiKRCOXlFUHXylYuvvgrXHLJCg4+ePGQ\nLrOIhN+4/3VKJBIhEokAcNxxJ1BXV8esWbO7X5s1axYVFRVccMG5/Ou/Xsb8+Quor6/n1ltv5LDD\njmDZstM49NCPcMstN+7UfKuqqjnxxJO54IJz+dznPkc0GuHEE0/m7rvvoLW1lTvuuJ0LLzyPCy88\nj3Q6PejLLSJjQ6SrO2GYePX1LcM5v1BIJhOoXnakeundrtTLpEX7AdDw7MtDUaQRN9bXlWQyEelr\n3LhviYuIhJlCXEQkxBTiIiIhphAXEQkxhbiISIgpxEVEQqzoxT7OuShwM7A/kAbOMbONBeOXAZcB\nWWClmf1oCMsqIiI99NcSPw0oNbOlwArgmq4RzrkS4FrgBOAo4EvOualDVVAREdlRfyF+GPAAgJk9\nDRReB74PsMHMmswsAzwJ6J6pIiLDqL8QrwGaC4ZzQRdL17imgnEtwIRBLJuIiPSjvxtgNQOJguGo\nmeWD5009xiWAxn6mF0kmE/28ZXxSvfRO9dK7na6XN9/wPzcEZRktxuu60l9LfBVwCoBzbgmwpmDc\nOmBv59xE51wpflfKn4aklCIi0quiN8ByzkV4/9cpAMuBRUC1md3unDsV+Bb+zuDHZnbLEJdXREQK\nDPddDEVEZBDpYh8RkRBTiIuIhJhCXEQkxIblf2z2d/n+eOOcOxS4ysyOcc7NA+4E8sDLwAVmNu5O\nVARXAK8E5gBlwBXAq4zjunHOxYDbgfmAB3wZf/u5k3FaJ4WCK8SfBY7Dr487GYf1Mlwt8T4v3x9v\nnHNfw98wy4KXrgW+YWZHAhHgEyNVthF2JlAf1MNJwE3468l4rptTgbyZHQ58E7gS1QnQvdP/IdCG\nXw/jdjsarhAvdvn+eLMB+BT+igZwsJk9Hjz/HXD8iJRq5N2D/3NV8NfLDOO8bszsfuC8YHAP/Ivp\nFo3nOinwPeAW4N1geNyuK8MV4sUu3x9XzOxX+Hd97FL4D1BbGae3LjCzNjNrdc4l8AP9m2y/fo7L\nujGznHPuTuAG4KdofcE5dzb+UdtDwUsRxnG9DFeQFrt8f7wrrIcEsG2kCjLSnHOzgUeAu83s56hu\nADCzswEH/AgoLxg1XutkOXCCc+4PwIHAXWx/R4FxVS/DFeLFLt8f7553zh0VPD8ZeLzYm8cq59w0\n4CHga2Z2Z/DyuK4b59znnXOXBoMdQA5YPZ7rBMDMjjKzo83sGOAF4AvAA+O1Xobl1ynAffh7zlXB\n8PJhmu9o1nXm/BLg9uD+M2uBe0euSCPqG/iHwN9yznX1jf8j8P1xXDf3Anc65x4DSvDrYx1aX3ry\nGMfbkS67FxEJsXF5clFEZKxQiIuIhJhCXEQkxBTiIiIhphAXEQkxhbiISIgpxEVEQkwhLiISYv8f\n9SmTTmM2dXkAAAAASUVORK5CYII=\n", + "text": [ + "" + ] + } + ], + "prompt_number": 108 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print(metrics.classification_report(spam_test, prediction))\n", + "print(metrics.confusion_matrix(spam_test, prediction))\n", + "print(metrics.f1_score(spam_test, prediction))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.94 0.97 0.95 1135\n", + " 1 0.94 0.90 0.92 705\n", + "\n", + "avg / total 0.94 0.94 0.94 1840\n", + "\n", + "[[1096 39]\n", + " [ 71 634]]\n", + "0.920174165457\n" + ] + } + ], + "prompt_number": 69 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file