-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHTML
More file actions
99 lines (84 loc) · 3.25 KB
/
HTML
File metadata and controls
99 lines (84 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
<html>
<head>
<p> Coursera Machine Learning Class_Prediction Assignment <p>
<title>Title</title>
</head>
<body>
install.packages("Hmisc") <br />
install.packages("caret") <br />
install.packages("randomForest") <br />
install.packages("AppliedPredictiveModeling") <br />
install.packages("rattle") <br />
install.packages("rpart.plot") <br />
install.packages("RColorBrewer") <br />
<br />
<br />
### Load packages <br />
library(Hmisc) <br />
library(caret) <br />
library(randomForest) <br />
library(AppliedPredictiveModeling) <br />
library(rattle) <br />
library(rpart.plot) <br />
library(RColorBrewer)
set.seed(888)<br />
<br />
<br />
### Download data <br />
<p> training.file <- 'pml-training.csv' <br />
testing.file <- 'pml-test.csv' <br />
training.url <- 'http://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv'
<p> testing.url <- 'http://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv' <p> <br />
download.file(training.url, training.file) <br />
download.file(testing.url,testing.file ) <br />
<br />
<br />
### Clean data <br />
df.training <- read.csv(training.file, na.strings=c("NA","","#DIV/0!"), header=TRUE) <br />
colnames.train <- colnames(df.training) <br />
df.testing <- read.csv(testing.file, na.strings=c("NA","","#DIV/0!"), header=TRUE) <br />
colnames.test <- colnames(df.testing) <br />
# Delete columns with all missing values <br />
df.training<-df.training[,colSums(is.na(df.training)) == 0] <br />
df.testing <-df.testing[,colSums(is.na(df.testing)) == 0] <br />
# Drop NA data and the first extra 7 columns <br />
df.training <-df.training[,-c(1:7)] <br />
df.testing <-df.testing[,-c(1:7)] <br />
# Verify that the column names are identical in the training and test set <br />
all.equal(colnames.train[1:length(colnames.train)-1], colnames.test[1:length(colnames.train)-1]) <br />
<br />
<br />
### Splitting data into training and testing sets <br />
inTraining.matrix <- createDataPartition(df.training$classe, p = 0.75, list = FALSE) <br />
training.data.df <- df.training[inTraining.matrix, ] <br />
testing.data.df <- df.training[-inTraining.matrix, ] <br />
dim(training.data.df) <br />
dim(testing.data.df) <br />
head(training.data.df) <br />
head(testing.data.df) <br />
<br />
<br />
#Decision tree <br />
mod.decisiontree <- rpart(classe ~ ., data=training.data.df, method="class") <br />
pred.decisiontree<- predict(mod.decisiontree, testing.data.df, type = "class") <br />
# Plot of the decision tree <br />
rpart.plot(mod.decisiontree, main="Classification Tree", extra=102, under=TRUE, faclen=0) <br />
# Test results on testing set <br />
confusionMatrix(pred.decisiontree, testing.data.df$classe) <br />
<br />
<br />
#Random forest <br />
mod.randomforest <- randomForest(classe ~. , data=training.data.df, method="class") <br />
pred.randomforest <- predict(mod.randomforest, training.data.df, type = "class") <br />
confusionMatrix(pred.randomforest, training.data.df$classe)<br />
<br />
<br />
# Using random forest for final prediction <br />
pred.final<- predict(mod.randomforest, df.training, type="class") <br />
<br />
pred.final <br />
<p>
<img src="C:/Users/Brooke/Dropbox/Rplot.png" alt="Rplot" style="float:left;width:420px;height:420px">
</p>
<body>
</html>