diff --git a/README.md b/README.md
index dcd6e23..bdc7411 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1 @@
-# Classifing spam
-
-## Description
-
-Use the Spambase dataset to classify spam. This data is already parsed down from email to features.
-
-## Objectives
-
-### Learning Objectives
-
-After completing this assignment, you should understand:
-
-* Simple Bayesian analysis
-* The importance of separating training and test data
-
-### Performance Objectives
-
-After completing this assignment, you should be able to:
-
-* Create a Bayesian classifier
-* Train your classifier
-* Test your classifier
-
-## Details
-
-### Deliverables
-
-* A Git repo called spambase containing at least:
- * `README.md` file explaining how to run your project
- * a `requirements.txt` file
-
-### Requirements
-
-* No PEP8 or Pyflakes warnings or errors
-
-## Normal Mode
-
-Go to the UCI Machine Learning repository and [download the Spambase dataset](https://archive.ics.uci.edu/ml/datasets/Spambase). Make sure you [read the documentation for the data](https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.DOCUMENTATION). This explains what the attributes are in the data file.
-
-Subsample the data set so 60% is training data and 40% is test data. You can subsample however you like, including splitting the original file. Just make sure that you have a representative data set. (The original is about 60% not-spam and 40% spam.)
-
-Then write code to classify the data into spam and not-spam, training with your training data and testing on your test data. Try multiple classifiers to see which gives you the highest success.
-
-## Hard Mode
-
-In addition to the normal mode requirements, try reducing or changing your features in order to get better results.
+The entirety of my code and analysis is contained within an IPython Notebook folder titled spambase.ipynb.
diff --git a/requirements.txt b/requirements.txt
index ed8975d..759d71f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+ipython[all]
scikit-learn
pandas
numpy
diff --git a/spambase.ipynb b/spambase.ipynb
new file mode 100644
index 0000000..61b396a
--- /dev/null
+++ b/spambase.ipynb
@@ -0,0 +1,849 @@
+{
+ "metadata": {
+ "name": "",
+ "signature": "sha256:6b28f1e85f1b01944691cf3cd5e7fc252c80c8193bfff35d7ee22478ec2cd2aa"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+ {
+ "cells": [
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "from sklearn.naive_bayes import MultinomialNB\n",
+ "from sklearn import metrics"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 92
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "data = pd.read_csv(\"spambase.data\")"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 3
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "data.head()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 0.64 | \n",
+ " 0.64.1 | \n",
+ " 0.1 | \n",
+ " 0.32 | \n",
+ " 0.2 | \n",
+ " 0.3 | \n",
+ " 0.4 | \n",
+ " 0.5 | \n",
+ " 0.6 | \n",
+ " ... | \n",
+ " 0.40 | \n",
+ " 0.41 | \n",
+ " 0.42 | \n",
+ " 0.778 | \n",
+ " 0.43 | \n",
+ " 0.44 | \n",
+ " 3.756 | \n",
+ " 61 | \n",
+ " 278 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.21 | \n",
+ " 0.28 | \n",
+ " 0.50 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.28 | \n",
+ " 0.21 | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " 0.94 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.132 | \n",
+ " 0 | \n",
+ " 0.372 | \n",
+ " 0.180 | \n",
+ " 0.048 | \n",
+ " 5.114 | \n",
+ " 101 | \n",
+ " 1028 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.06 | \n",
+ " 0.00 | \n",
+ " 0.71 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.19 | \n",
+ " 0.19 | \n",
+ " 0.12 | \n",
+ " 0.64 | \n",
+ " 0.25 | \n",
+ " ... | \n",
+ " 0.01 | \n",
+ " 0.143 | \n",
+ " 0 | \n",
+ " 0.276 | \n",
+ " 0.184 | \n",
+ " 0.010 | \n",
+ " 9.821 | \n",
+ " 485 | \n",
+ " 2259 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.00 | \n",
+ " 0.31 | \n",
+ " 0.63 | \n",
+ " 0.31 | \n",
+ " 0.63 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.137 | \n",
+ " 0 | \n",
+ " 0.137 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ " 3.537 | \n",
+ " 40 | \n",
+ " 191 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.00 | \n",
+ " 0.31 | \n",
+ " 0.63 | \n",
+ " 0.31 | \n",
+ " 0.63 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.135 | \n",
+ " 0 | \n",
+ " 0.135 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ " 3.537 | \n",
+ " 40 | \n",
+ " 191 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 1.85 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.223 | \n",
+ " 0 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ " 3.000 | \n",
+ " 15 | \n",
+ " 54 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows \u00d7 58 columns
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 31,
+ "text": [
+ " 0 0.64 0.64.1 0.1 0.32 0.2 0.3 0.4 0.5 0.6 ... 0.40 \\\n",
+ "0 0.21 0.28 0.50 0 0 0.28 0.21 0.07 0.00 0.94 ... 0.00 \n",
+ "1 0.06 0.00 0.71 0 0 0.19 0.19 0.12 0.64 0.25 ... 0.01 \n",
+ "2 0.00 0.00 0.00 0 0 0.00 0.31 0.63 0.31 0.63 ... 0.00 \n",
+ "3 0.00 0.00 0.00 0 0 0.00 0.31 0.63 0.31 0.63 ... 0.00 \n",
+ "4 0.00 0.00 0.00 0 0 0.00 0.00 1.85 0.00 0.00 ... 0.00 \n",
+ "\n",
+ " 0.41 0.42 0.778 0.43 0.44 3.756 61 278 1 \n",
+ "0 0.132 0 0.372 0.180 0.048 5.114 101 1028 1 \n",
+ "1 0.143 0 0.276 0.184 0.010 9.821 485 2259 1 \n",
+ "2 0.137 0 0.137 0.000 0.000 3.537 40 191 1 \n",
+ "3 0.135 0 0.135 0.000 0.000 3.537 40 191 1 \n",
+ "4 0.223 0 0.000 0.000 0.000 3.000 15 54 1 \n",
+ "\n",
+ "[5 rows x 58 columns]"
+ ]
+ }
+ ],
+ "prompt_number": 31
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "xdata = data.loc[::,\"0\":\"278\"]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 84
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ydata = data[\"1\"]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 85
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from sklearn.cross_validation import train_test_split\n",
+ "x_train, x_test, y_train, y_test = train_test_split(xdata, ydata, test_size=0.4, random_state=0)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 86
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "Create a function to print out the results of each classifier:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "def run_classifier(clf, x_train, x_test, y_train, y_test, x_data, y_data):\n",
+ " clf.fit(x_train, y_train)\n",
+ " predicted = clf.predict(x_test)\n",
+ " print(metrics.classification_report(y_test, predicted))\n",
+ " print(metrics.confusion_matrix(y_test, predicted))\n",
+ " print(metrics.f1_score(y_test, predicted))\n",
+ " scores = cross_val_score(clf, xdata, ydata, cv=5)\n",
+ " "
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 94
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "Multinomial Naive Bayes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "bayclass = MultinomialNB()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 87
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "run_classifier(bayclass, x_train, x_test, y_train, y_test, xdata, ydata)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.82 0.83 0.83 1097\n",
+ " 1 0.75 0.74 0.74 743\n",
+ "\n",
+ "avg / total 0.79 0.79 0.79 1840\n",
+ "\n",
+ "[[912 185]\n",
+ " [196 547]]\n",
+ "0.741694915254\n"
+ ]
+ }
+ ],
+ "prompt_number": 104
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "Gaussian Naive Bayes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from sklearn.naive_bayes import GaussianNB"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 32
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "gauss = GaussianNB()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 33
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "run_classifier(gauss, x_train, x_test, y_train, y_test, xdata, ydata)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.97 0.69 0.80 1097\n",
+ " 1 0.68 0.97 0.80 743\n",
+ "\n",
+ "avg / total 0.85 0.80 0.80 1840\n",
+ "\n",
+ "[[753 344]\n",
+ " [ 23 720]]\n",
+ "0.796900940786\n"
+ ]
+ }
+ ],
+ "prompt_number": 96
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "Decision Tree Classifier"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "tree = DecisionTreeClassifier()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 99
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "run_classifier(tree, x_train, x_test, y_train, y_test, xdata, ydata)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.94 0.91 0.93 1097\n",
+ " 1 0.88 0.92 0.90 743\n",
+ "\n",
+ "avg / total 0.92 0.91 0.91 1840\n",
+ "\n",
+ "[[1003 94]\n",
+ " [ 63 680]]\n",
+ "0.89650626236\n"
+ ]
+ }
+ ],
+ "prompt_number": 98
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "Random Forest Classifier"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from sklearn.ensemble import RandomForestClassifier"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 53
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "forest = RandomForestClassifier()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 101
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "run_classifier(forest, x_train, x_test, y_train, y_test, xdata, ydata)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.94 0.97 0.96 1097\n",
+ " 1 0.96 0.92 0.94 743\n",
+ "\n",
+ "avg / total 0.95 0.95 0.95 1840\n",
+ "\n",
+ "[[1069 28]\n",
+ " [ 63 680]]\n",
+ "0.937284631289\n"
+ ]
+ }
+ ],
+ "prompt_number": 100
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "K Neighbors Classifier"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from sklearn.neighbors import KNeighborsClassifier"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 56
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "neighbors = KNeighborsClassifier()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 103
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "run_classifier(neighbors, x_train, x_test, y_train, y_test, xdata, ydata)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.81 0.85 0.83 1097\n",
+ " 1 0.76 0.70 0.73 743\n",
+ "\n",
+ "avg / total 0.79 0.79 0.79 1840\n",
+ "\n",
+ "[[936 161]\n",
+ " [220 523]]\n",
+ "0.733006306938\n"
+ ]
+ }
+ ],
+ "prompt_number": 102
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "Can we figure out which features are helpful and which aren't?"
+ ]
+ },
+ {
+ "cell_type": "heading",
+ "level": 3,
+ "metadata": {},
+ "source": [
+ "I tried something called ExtraTreesClassifier"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "from sklearn.ensemble import ExtraTreesClassifier\n",
+ "\n",
+ "X, y = xdata, ydata\n",
+ "X.shape"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 42,
+ "text": [
+ "(4600, 57)"
+ ]
+ }
+ ],
+ "prompt_number": 42
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "clf = ExtraTreesClassifier()\n",
+ "X_new = clf.fit(X, y).transform(X)\n",
+ "clf.feature_importances_ "
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 43,
+ "text": [
+ "array([ 0.01329835, 0.0079715 , 0.01509007, 0.0021716 , 0.00276668,\n",
+ " 0.01658062, 0.09933421, 0.01570107, 0.02455518, 0.00837051,\n",
+ " 0.0183419 , 0.02024683, 0.00774485, 0.00722078, 0.01486713,\n",
+ " 0.04068142, 0.03525079, 0.01349685, 0.04942057, 0.00811502,\n",
+ " 0.07483391, 0.0100333 , 0.0315938 , 0.01704934, 0.03070221,\n",
+ " 0.01114189, 0.0291203 , 0.01327662, 0.00971931, 0.01131513,\n",
+ " 0.00479219, 0.00280843, 0.00443832, 0.00210647, 0.00783549,\n",
+ " 0.00497984, 0.02002667, 0.00151691, 0.00518695, 0.00257288,\n",
+ " 0.0024335 , 0.01093254, 0.00655802, 0.00576946, 0.01401507,\n",
+ " 0.02525882, 0.00134001, 0.00270929, 0.01051194, 0.01370621,\n",
+ " 0.00558226, 0.05104588, 0.02472411, 0.00536161, 0.03504849,\n",
+ " 0.037736 , 0.03099089])"
+ ]
+ }
+ ],
+ "prompt_number": 43
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "X_new.shape"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 44,
+ "text": [
+ "(4600, 18)"
+ ]
+ }
+ ],
+ "prompt_number": 44
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "X_new"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 48,
+ "text": [
+ "array([[ 2.10000000e-01, 0.00000000e+00, 2.10000000e-01, ...,\n",
+ " 5.11400000e+00, 1.01000000e+02, 1.02800000e+03],\n",
+ " [ 1.90000000e-01, 6.40000000e-01, 3.80000000e-01, ...,\n",
+ " 9.82100000e+00, 4.85000000e+02, 2.25900000e+03],\n",
+ " [ 3.10000000e-01, 3.10000000e-01, 3.10000000e-01, ...,\n",
+ " 3.53700000e+00, 4.00000000e+01, 1.91000000e+02],\n",
+ " ..., \n",
+ " [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,\n",
+ " 1.40400000e+00, 6.00000000e+00, 1.18000000e+02],\n",
+ " [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,\n",
+ " 1.14700000e+00, 5.00000000e+00, 7.80000000e+01],\n",
+ " [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,\n",
+ " 1.25000000e+00, 5.00000000e+00, 4.00000000e+01]])"
+ ]
+ }
+ ],
+ "prompt_number": 48
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "x_newtrain, x_newtest, y_newtrain, y_newtest = train_test_split(X_new, y, test_size=0.4, random_state=0)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 105
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "newtree = DecisionTreeClassifier()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 107
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "run_classifier(newtree, x_newtrain, x_newtest, y_newtrain, y_newtest, X_new, y)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.92 0.92 0.92 1097\n",
+ " 1 0.88 0.89 0.88 743\n",
+ "\n",
+ "avg / total 0.91 0.91 0.91 1840\n",
+ "\n",
+ "[[1006 91]\n",
+ " [ 82 661]]\n",
+ "0.884280936455\n"
+ ]
+ }
+ ],
+ "prompt_number": 108
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "newmultibay = MultinomialNB()\n",
+ "run_classifier(newmultibay, x_newtrain, x_newtest, y_newtrain, y_newtest, X_new, y)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.76 0.75 0.76 1097\n",
+ " 1 0.64 0.65 0.65 743\n",
+ "\n",
+ "avg / total 0.71 0.71 0.71 1840\n",
+ "\n",
+ "[[824 273]\n",
+ " [259 484]]\n",
+ "0.645333333333\n"
+ ]
+ }
+ ],
+ "prompt_number": 111
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "newgauss = GaussianNB()\n",
+ "run_classifier(newgauss, x_newtrain, x_newtest, y_newtrain, y_newtest, X_new, y)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.89 0.94 0.92 1097\n",
+ " 1 0.91 0.84 0.87 743\n",
+ "\n",
+ "avg / total 0.90 0.90 0.90 1840\n",
+ "\n",
+ "[[1035 62]\n",
+ " [ 122 621]]\n",
+ "0.870967741935\n"
+ ]
+ }
+ ],
+ "prompt_number": 112
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "newforest = RandomForestClassifier()\n",
+ "run_classifier(newforest, x_newtrain, x_newtest, y_newtrain, y_newtest, X_new, y)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.94 0.96 0.95 1097\n",
+ " 1 0.94 0.91 0.93 743\n",
+ "\n",
+ "avg / total 0.94 0.94 0.94 1840\n",
+ "\n",
+ "[[1051 46]\n",
+ " [ 64 679]]\n",
+ "0.925068119891\n"
+ ]
+ }
+ ],
+ "prompt_number": 113
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "newneighbors = KNeighborsClassifier()\n",
+ "run_classifier(newneighbors, x_newtrain, x_newtest, y_newtrain, y_newtest, X_new, y)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.79 0.86 0.82 1097\n",
+ " 1 0.76 0.67 0.71 743\n",
+ "\n",
+ "avg / total 0.78 0.78 0.78 1840\n",
+ "\n",
+ "[[939 158]\n",
+ " [246 497]]\n",
+ "0.711015736767\n"
+ ]
+ }
+ ],
+ "prompt_number": 114
+ }
+ ],
+ "metadata": {}
+ }
+ ]
+}
\ No newline at end of file