From 6eae1aa623e71d786384d92a48abe9d348fbb719 Mon Sep 17 00:00:00 2001 From: wethenwethen Date: Sat, 21 Mar 2015 10:03:49 +0800 Subject: [PATCH 1/2] Update msvmRFE.R make it to support multi-class data. date: Mar-21-2015 --- msvmRFE.R | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/msvmRFE.R b/msvmRFE.R index 94c1f56..337fce1 100644 --- a/msvmRFE.R +++ b/msvmRFE.R @@ -12,7 +12,7 @@ svmRFE.wrap <- function(test.fold, X, ...) { return(list(feature.ids=features.ranked, train.data.ids=row.names(train.data), test.data.ids=row.names(test.data))) } -svmRFE <- function(X, k=1, halve.above=5000) { +svmRFE <- function(X, k=1, halve.above=50,...) { # Feature selection with Multiple SVM Recursive Feature Elimination (RFE) algorithm n = ncol(X) - 1 @@ -49,12 +49,14 @@ svmRFE <- function(X, k=1, halve.above=5000) { c = vbar / vsd } else { # Only do 1 pass (i.e. regular SVM-RFE) - w = getWeights(NULL, X[, c(1, 1+i.surviving)]) + w = getWeights2(NULL, X[, c(1, 1+i.surviving)]) c = w * w } # Rank the features - ranking = sort(c, index.return=T)$ix + rankingCriteria = 0 + for(i in 1:ncol(c))rankingCriteria[i] = mean(c[,i]) + ranking = sort(rankingCriteria, index.return=T)$ix if(length(i.surviving) == 1) { ranking = 1 } @@ -97,6 +99,41 @@ getWeights <- function(test.fold, X) { t(svmModel$coefs) %*% svmModel$SV } + +svm.weights<-function(model){ + w=0 + if(model$nclasses==2){ + w=t(model$coefs)%*%model$SV + }else{ #when we deal with OVO svm classification + ## compute start-index + start <- c(1, cumsum(model$nSV)+1) + start <- start[-length(start)] + + calcw <- function (i,j) { + ## ranges for class i and j: + ri <- start[i] : (start[i] + model$nSV[i] - 1) + rj <- start[j] : (start[j] + model$nSV[j] - 1) + + ## coefs for (i,j): + coef1 <- model$coefs[ri, j-1] + coef2 <- model$coefs[rj, i] + ## return w values: + w=t(coef1)%*%model$SV[ri,]+t(coef2)%*%model$SV[rj,] + return(w) + } + + W=NULL + for (i in 1 : (model$nclasses - 1)){ + for (j in (i + 1) : model$nclasses){ + wi=calcw(i,j) + W=rbind(W,wi) + } + } + w=W + } + return(w) +} + WriteFeatures <- function(results, input, save=T, file='features_ranked.txt') { # Compile feature rankings across multiple folds featureID = sort(apply(sapply(results, function(x) sort(x$feature, index.return=T)$ix), 1, mean), index=T)$ix @@ -140,4 +177,14 @@ PlotErrors <- function(errors, errors2=NULL, no.info=0.5, ylim=range(c(errors, e AddLine(errors) if(!is.null(errors2)) AddLine(errors2, 'gray30') abline(h=no.info, lty=3) -} \ No newline at end of file +} + +getWeights2 <- function(test.fold, X) { + # Fit a linear SVM model and obtain feature weights + train.data = X + if(!is.null(test.fold)) train.data = X[-test.fold, ] + + svmModel = svm(train.data[, -1], train.data[, 1], cost=10, cachesize=500, + scale=F, type="C-classification", kernel="linear") + return(svm.weights(svmModel)) +} From 228421673a3c02236e5c9aeef2c0d59d561d1a45 Mon Sep 17 00:00:00 2001 From: wethenwethen Date: Fri, 27 Mar 2015 20:33:14 +0800 Subject: [PATCH 2/2] Update msvmRFE.R to make it support multi-class data set. --- msvmRFE.R | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/msvmRFE.R b/msvmRFE.R index 337fce1..4456790 100644 --- a/msvmRFE.R +++ b/msvmRFE.R @@ -12,7 +12,7 @@ svmRFE.wrap <- function(test.fold, X, ...) { return(list(feature.ids=features.ranked, train.data.ids=row.names(train.data), test.data.ids=row.names(test.data))) } -svmRFE <- function(X, k=1, halve.above=50,...) { +svmRFE <- function(X, k=1, halve.above=5000,...) { # Feature selection with Multiple SVM Recursive Feature Elimination (RFE) algorithm n = ncol(X) - 1 @@ -49,7 +49,7 @@ svmRFE <- function(X, k=1, halve.above=50,...) { c = vbar / vsd } else { # Only do 1 pass (i.e. regular SVM-RFE) - w = getWeights2(NULL, X[, c(1, 1+i.surviving)]) + w = getWeights(NULL, X[, c(1, 1+i.surviving)]) c = w * w } @@ -88,17 +88,6 @@ svmRFE <- function(X, k=1, halve.above=50,...) { return (ranked.list) } -getWeights <- function(test.fold, X) { -# Fit a linear SVM model and obtain feature weights - train.data = X - if(!is.null(test.fold)) train.data = X[-test.fold, ] - - svmModel = svm(train.data[, -1], train.data[, 1], cost=10, cachesize=500, - scale=F, type="C-classification", kernel="linear") - - t(svmModel$coefs) %*% svmModel$SV -} - svm.weights<-function(model){ w=0 @@ -179,7 +168,7 @@ PlotErrors <- function(errors, errors2=NULL, no.info=0.5, ylim=range(c(errors, e abline(h=no.info, lty=3) } -getWeights2 <- function(test.fold, X) { +getWeights <- function(test.fold, X) { # Fit a linear SVM model and obtain feature weights train.data = X if(!is.null(test.fold)) train.data = X[-test.fold, ]