在R中查找精确度、召回率和F1分数。

huangapple go评论93阅读模式
英文:

Finding the precision, recall and the f1 in R

问题

I want to run models on a loop via and then store the performance metrics into a table. I do not want to use the confusionMatrix function in caret, but I want to compute the precision, recall and f1 and then store those in a table. Please assist, edits to the code are welcome.
My attempt is below.

  1. library(MASS) #will load our biopsy data
  2. library(caret)
  3. data("biopsy")
  4. biopsy$ID <- NULL
  5. names(biopsy) <- c('clump thickness','uniformity cell size','uniformity cell shape',
  6. 'marginal adhesion','single epithelial cell size','bare nuclei',
  7. 'bland chromatin','normal nuclei','mitosis','class')
  8. sum(is.na(biopsy))
  9. biopsy <- na.omit(biopsy)
  10. sum(is.na(biopsy))
  11. head(biopsy, 5)
  12. set.seed(123)
  13. inTraining <- createDataPartition(biopsy$class, p = .75, list = FALSE)
  14. training <- biopsy[inTraining,]
  15. testing <- biopsy[-inTraining,]
  16. # Run algorithms using 10-fold cross validation
  17. control <- trainControl(method="repeatedcv", number=10, repeats = 5, verboseIter = FALSE, classProbs = TRUE)
  18. # CHANGING THE CHARACTERS INTO FACTORS VARIABLES
  19. training <- as.data.frame(unclass(training),
  20. stringsAsFactors = TRUE)
  21. # CHANGING THE CHARACTERS INTO FACTORS VARIABLES
  22. testing <- as.data.frame(unclass(testing),
  23. stringsAsFactors = TRUE)
  24. models <- c("svmRadial", "rf")
  25. results_table <- data.frame(models = models, stringsAsFactors = FALSE)
  26. for (i in models){
  27. model_train <- train(class ~ ., data = training, method = i,
  28. trControl = control, metric = "Accuracy")
  29. predictions <- predict(model_train, newdata = testing)
  30. precision_ <- posPredValue(predictions, testing)
  31. recall_ <- sensitivity(predictions, testing)
  32. f1 <- (2 * precision_ * recall_) / (precision_ + recall_)
  33. # put that in the results table
  34. results_table[i, "Precision"] <- precision_
  35. results_table[i, "Recall"] <- recall_
  36. results_table[i, "F1score"] <- f1
  37. }

However, I get an error which says Error in posPredValue.default(predictions, testing) : inputs must be factors. I do not know where I went wrong, and any edits to my code are welcome.

I know that I could get precision, recall, f1 by just using the code below (B), however, this is a tutorial question where I am required not to use the code example below (B):

  1. (B)
  2. for (i in models){
  3. model_train <- train(class ~ ., data = training, method = i,
  4. trControl = control, metric = "Accuracy")
  5. predictions <- predict(model_train, newdata = testing)
  6. print(confusionMatrix(predictions, testing$class, mode = "prec_recall"))
  7. }
英文:

I want to run models on a loop via and then store the performance metrics into a table. I do not want to use the confusionMatrix function in caret, but I want to compute the precision, recall and f1 and then store those in a table. Please assist, edits to the code are welcome.
My attempt is below.

  1. library(MASS) #will load our biopsy data
  2. library(caret)
  3. data(&quot;biopsy&quot;)
  4. biopsy$ID&lt;-NULL
  5. names(biopsy)&lt;-c(&#39;clump thickness&#39;,&#39;uniformity cell size&#39;,&#39;uniformity cell shape&#39;,
  6. &#39;marginal adhesion&#39;,&#39;single epithelial cell size&#39;,&#39;bare nuclei&#39;,
  7. &#39;bland chromatin&#39;,&#39;normal nuclei&#39;,&#39;mitosis&#39;,&#39;class&#39;)
  8. sum(is.na(biopsy))
  9. biopsy&lt;-na.omit(biopsy)
  10. sum(is.na(biopsy))
  11. head(biopsy,5)
  12. set.seed(123)
  13. inTraining &lt;- createDataPartition(biopsy$class, p = .75, list = FALSE)
  14. training &lt;- biopsy[ inTraining,]
  15. testing &lt;- biopsy[-inTraining,]
  16. # Run algorithms using 10-fold cross validation
  17. control &lt;- trainControl(method=&quot;repeatedcv&quot;, number=10,repeats = 5, verboseIter = F, classProbs = T)
  18. #CHANGING THE CHARACTERS INTO FACTORS VARAIBLES
  19. training&lt;- as.data.frame(unclass(training),
  20. stringsAsFactors = TRUE)
  21. #CHANGING THE CHARACTERS INTO FACTORS VARAIBLES
  22. testing &lt;- as.data.frame(unclass(testing),
  23. stringsAsFactors = TRUE)
  24. models&lt;-c(&quot;svmRadial&quot;,&quot;rf&quot;)
  25. results_table &lt;- data.frame(models = models, stringsAsFactors = F)
  26. for (i in models){
  27. model_train&lt;-train(class~., data=training, method=i,
  28. trControl=control,metric=&quot;Accuracy&quot;)
  29. predictions&lt;-predict(model_train, newdata=testing)
  30. precision_&lt;-posPredValue(predictions,testing)
  31. recall_&lt;-sensitivity(predictions,testing)
  32. f1&lt;-(2*precision_*recall_)/(precision_+recall_)
  33. # put that in the results table
  34. results_table[i, &quot;Precision&quot;] &lt;- precision_
  35. results_table[i, &quot;Recall&quot;] &lt;- recall_
  36. results_table[i, &quot;F1score&quot;] &lt;- f1
  37. }

However I get an error which says Error in posPredValue.default(predictions, testing) :
inputs must be factors
. i do not know where I went wrong and any edits to my code are welcome.

I know that I could get precision,recall, f1 by just using the code below (B), however this is a tutorial question where I am required not to use the code example below (B):

  1. (B)
  2. for (i in models){
  3. model_train&lt;-train(class~., data=training, method=i,
  4. trControl=control,metric=&quot;Accuracy&quot;)
  5. predictions&lt;-predict(model_train, newdata=testing)
  6. print(confusionMatrix(predictions, testing$class,mode=&quot;prec_recall&quot;))
  7. }

答案1

得分: 1

需要发生一些事情。

  1. 您需要更改posPredValuesensitivity的函数调用。对于两者,将testing更改为testing$class

  2. 对于results_tablei是一个_单词_,而不是一个值,所以您正在分配results_table["rf", "Precision"] <- precision_(这会创建一个新行,行名为"rf")。

以下是您的for语句,其中包括对1)中提到的函数的更改以及解决2)中问题的修改。

  1. for (i in models){
  2. model_train <- train(class~., data = training, method = i,
  3. trControl= control, metric = "Accuracy")
  4. assign("fit", model_train)
  5. predictions <- predict(model_train, newdata = testing)
  6. precision_ <- posPredValue(predictions, testing$class)
  7. recall_ <- sensitivity(predictions, testing$class)
  8. f1 <- (2*precision_ * recall_) / (precision_ + recall_)
  9. # 将这些值放入结果表
  10. results_table[results_table$models %in% i, "Precision"] <- precision_
  11. results_table[results_table$models %in% i, "Recall"] <- recall_
  12. results_table[results_table$models %in% i, "F1score"] <- f1
  13. }

这是对我而言的样子。

  1. results_table
  2. # models Precision Recall F1score
  3. # 1 svmRadial 0.9722222 0.9459459 0.9589041
  4. # 2 rf 0.9732143 0.9819820 0.9775785
英文:

A few things need to happen.

  1. You have to change the function calls for posPredValue and sensitivity. For both, change testing to testing$class.

  2. for the results_table, i is a word, not a value, so you're assigning results_table[&quot;rf&quot;, &quot;Precision&quot;] &lt;- precision_ (This makes a new row, where the row name is "rf".)

Here is your for statement, with changes to those functions mentioned in 1) and a modification to address the issue in 2).

  1. for (i in models){
  2. model_train &lt;- train(class~., data = training, method = i,
  3. trControl= control, metric = &quot;Accuracy&quot;)
  4. assign(&quot;fit&quot;, model_train)
  5. predictions &lt;- predict(model_train, newdata = testing)
  6. precision_ &lt;-posPredValue(predictions, testing$class)
  7. recall_ &lt;- sensitivity(predictions, testing$class)
  8. f1 &lt;- (2*precision_ * recall_) / (precision_ + recall_)
  9. # put that in the results table
  10. results_table[results_table$models %in% i, &quot;Precision&quot;] &lt;- precision_
  11. results_table[results_table$models %in% i, &quot;Recall&quot;] &lt;- recall_
  12. results_table[results_table$models %in% i, &quot;F1score&quot;] &lt;- f1
  13. }

This is what it looks like for me.

  1. results_table
  2. # models Precision Recall F1score
  3. # 1 svmRadial 0.9722222 0.9459459 0.9589041
  4. # 2 rf 0.9732143 0.9819820 0.9775785

huangapple
  • 本文由 发表于 2023年2月16日 19:36:33
  • 转载请务必保留本文链接:https://go.coder-hub.com/75471701.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定