在R中查找精确度、召回率和F1分数。

huangapple go评论59阅读模式
英文:

Finding the precision, recall and the f1 in R

问题

I want to run models on a loop via and then store the performance metrics into a table. I do not want to use the confusionMatrix function in caret, but I want to compute the precision, recall and f1 and then store those in a table. Please assist, edits to the code are welcome.
My attempt is below.

library(MASS) #will load our biopsy data
library(caret)
data("biopsy")
biopsy$ID <- NULL
names(biopsy) <- c('clump thickness','uniformity cell size','uniformity cell shape',
                 'marginal adhesion','single epithelial cell size','bare nuclei',
                 'bland chromatin','normal nuclei','mitosis','class')

sum(is.na(biopsy))
biopsy <- na.omit(biopsy)
sum(is.na(biopsy))
head(biopsy, 5)
set.seed(123)
inTraining <- createDataPartition(biopsy$class, p = .75, list = FALSE)
training <- biopsy[inTraining,]
testing  <- biopsy[-inTraining,]

# Run algorithms using 10-fold cross validation
control <- trainControl(method="repeatedcv", number=10, repeats = 5, verboseIter = FALSE, classProbs = TRUE)
# CHANGING THE CHARACTERS INTO FACTORS VARIABLES
training <- as.data.frame(unclass(training),                     
                         stringsAsFactors = TRUE)

# CHANGING THE CHARACTERS INTO FACTORS VARIABLES
testing <- as.data.frame(unclass(testing),                     
                         stringsAsFactors = TRUE)

models <- c("svmRadial", "rf")
results_table <- data.frame(models = models, stringsAsFactors = FALSE)

for (i in models){
  model_train <- train(class ~ ., data = training, method = i,
                     trControl = control, metric = "Accuracy")
  predictions <- predict(model_train, newdata = testing)
  precision_ <- posPredValue(predictions, testing)
  recall_ <- sensitivity(predictions, testing)
  f1 <- (2 * precision_ * recall_) / (precision_ + recall_)

  # put that in the results table
  results_table[i, "Precision"] <- precision_
  results_table[i, "Recall"] <- recall_
  results_table[i, "F1score"] <- f1
}

However, I get an error which says Error in posPredValue.default(predictions, testing) : inputs must be factors. I do not know where I went wrong, and any edits to my code are welcome.

I know that I could get precision, recall, f1 by just using the code below (B), however, this is a tutorial question where I am required not to use the code example below (B):

(B)

for (i in models){
  model_train <- train(class ~ ., data = training, method = i,
                     trControl = control, metric = "Accuracy")
  predictions <- predict(model_train, newdata = testing)
  print(confusionMatrix(predictions, testing$class, mode = "prec_recall"))
}
英文:

I want to run models on a loop via and then store the performance metrics into a table. I do not want to use the confusionMatrix function in caret, but I want to compute the precision, recall and f1 and then store those in a table. Please assist, edits to the code are welcome.
My attempt is below.

library(MASS) #will load our biopsy data
library(caret)
data(&quot;biopsy&quot;)
biopsy$ID&lt;-NULL
names(biopsy)&lt;-c(&#39;clump thickness&#39;,&#39;uniformity cell size&#39;,&#39;uniformity cell shape&#39;,
                 &#39;marginal adhesion&#39;,&#39;single epithelial cell size&#39;,&#39;bare nuclei&#39;,
                 &#39;bland chromatin&#39;,&#39;normal nuclei&#39;,&#39;mitosis&#39;,&#39;class&#39;)

sum(is.na(biopsy))
biopsy&lt;-na.omit(biopsy)
sum(is.na(biopsy))
head(biopsy,5)
set.seed(123)
inTraining &lt;- createDataPartition(biopsy$class, p = .75, list = FALSE)
training &lt;- biopsy[ inTraining,]
testing  &lt;- biopsy[-inTraining,]

# Run algorithms using 10-fold cross validation
control &lt;- trainControl(method=&quot;repeatedcv&quot;, number=10,repeats = 5, verboseIter = F, classProbs = T)
#CHANGING THE CHARACTERS INTO FACTORS VARAIBLES
training&lt;- as.data.frame(unclass(training),                     
                         stringsAsFactors = TRUE)

#CHANGING THE CHARACTERS INTO FACTORS VARAIBLES
testing &lt;- as.data.frame(unclass(testing),                     
                         stringsAsFactors = TRUE)


models&lt;-c(&quot;svmRadial&quot;,&quot;rf&quot;)
results_table &lt;- data.frame(models = models, stringsAsFactors = F)

for (i in models){
  model_train&lt;-train(class~., data=training, method=i,
                     trControl=control,metric=&quot;Accuracy&quot;)
  predictions&lt;-predict(model_train, newdata=testing)
  precision_&lt;-posPredValue(predictions,testing)
  recall_&lt;-sensitivity(predictions,testing)
  f1&lt;-(2*precision_*recall_)/(precision_+recall_)

  # put that in the results table
  results_table[i, &quot;Precision&quot;] &lt;- precision_
  results_table[i, &quot;Recall&quot;] &lt;- recall_
  results_table[i, &quot;F1score&quot;] &lt;- f1
}

However I get an error which says Error in posPredValue.default(predictions, testing) :
inputs must be factors
. i do not know where I went wrong and any edits to my code are welcome.

I know that I could get precision,recall, f1 by just using the code below (B), however this is a tutorial question where I am required not to use the code example below (B):

(B)

for (i in models){
  model_train&lt;-train(class~., data=training, method=i,
                     trControl=control,metric=&quot;Accuracy&quot;)
  predictions&lt;-predict(model_train, newdata=testing)
  print(confusionMatrix(predictions, testing$class,mode=&quot;prec_recall&quot;))
}

答案1

得分: 1

需要发生一些事情。

  1. 您需要更改posPredValuesensitivity的函数调用。对于两者,将testing更改为testing$class

  2. 对于results_tablei是一个_单词_,而不是一个值,所以您正在分配results_table["rf", "Precision"] <- precision_(这会创建一个新行,行名为"rf")。

以下是您的for语句,其中包括对1)中提到的函数的更改以及解决2)中问题的修改。

for (i in models){
  model_train <- train(class~., data = training, method = i,
                     trControl= control, metric = "Accuracy")
  assign("fit", model_train)
  predictions <- predict(model_train, newdata = testing)
  precision_ <- posPredValue(predictions, testing$class)
  recall_ <- sensitivity(predictions, testing$class)
  f1 <- (2*precision_ * recall_) / (precision_ + recall_)
  
  # 将这些值放入结果表
  results_table[results_table$models %in% i, "Precision"] <- precision_
  results_table[results_table$models %in% i, "Recall"] <- recall_
  results_table[results_table$models %in% i, "F1score"] <- f1
}

这是对我而言的样子。

results_table
#      models Precision    Recall   F1score
# 1 svmRadial 0.9722222 0.9459459 0.9589041
# 2        rf 0.9732143 0.9819820 0.9775785 
英文:

A few things need to happen.

  1. You have to change the function calls for posPredValue and sensitivity. For both, change testing to testing$class.

  2. for the results_table, i is a word, not a value, so you're assigning results_table[&quot;rf&quot;, &quot;Precision&quot;] &lt;- precision_ (This makes a new row, where the row name is "rf".)

Here is your for statement, with changes to those functions mentioned in 1) and a modification to address the issue in 2).

for (i in models){
  model_train &lt;- train(class~., data = training, method = i,
                     trControl= control, metric = &quot;Accuracy&quot;)
  assign(&quot;fit&quot;, model_train)
  predictions &lt;- predict(model_train, newdata = testing)
  precision_ &lt;-posPredValue(predictions, testing$class)
  recall_ &lt;- sensitivity(predictions, testing$class)
  f1 &lt;- (2*precision_ * recall_) / (precision_ + recall_)
  
  # put that in the results table
  results_table[results_table$models %in% i, &quot;Precision&quot;] &lt;- precision_
  results_table[results_table$models %in% i, &quot;Recall&quot;] &lt;- recall_
  results_table[results_table$models %in% i, &quot;F1score&quot;] &lt;- f1
  }

This is what it looks like for me.

results_table
#      models Precision    Recall   F1score
# 1 svmRadial 0.9722222 0.9459459 0.9589041
# 2        rf 0.9732143 0.9819820 0.9775785 

huangapple
  • 本文由 发表于 2023年2月16日 19:36:33
  • 转载请务必保留本文链接:https://go.coder-hub.com/75471701.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定