英文:
NAs produced using indexing to calculate RMSE
问题
I'm working with the Boston Housing data set in the MASS package. The code produces NAs when calculating RMSE using indexing:
library(MASS)
library(glmnet)
for (i in 1:5){
idx <- sample(seq(1, 3), size = nrow(MASS::Boston), replace = TRUE, prob = c(.6, .2, .2))
train <- MASS::Boston[idx == 1,]
test <- MASS::Boston[idx == 2,]
validation <- MASS::Boston[idx == 3,]
elastic.test.RMSE <- 0
elastic.test.pred <- 0
y <- train$medv
x <- data.matrix(train %>% dplyr::select(-medv))
elastic.model <- glmnet(x, y, alpha = 0.5)
elastic.cv <- cv.glmnet(x, y, alpha = 0.5)
best.elastic.lambda <- elastic.cv$lambda.min
best.elastic.model <- glmnet(x, y, alpha = 0, lambda = best.elastic.lambda)
elastic.test.pred <- predict(best.elastic.model, s = best.elastic.lambda, newx = data.matrix(test %>% dplyr::select(-medv)))
elastic.test.RMSE[i] <- Metrics::rmse(actual = test$medv, predicted = elastic.test.pred)
}
As an example, elastic.test.RMSE returns:
[1] 0.000000 NA NA NA 4.019411
However, if I create a data frame and add new RMSE values to the data frame, using the same formula, everything is fine.
elastic.test.RMSE.df <- data.frame(elastic.test.RMSE)
library(MASS)
library(glmnet)
for (i in 1:5){
idx <- sample(seq(1, 3), size = nrow(MASS::Boston), replace = TRUE, prob = c(.6, .2, .2))
train <- MASS::Boston[idx == 1,]
test <- MASS::Boston[idx == 2,]
validation <- MASS::Boston[idx == 3,]
elastic.test.RMSE <- 0
elastic.test.pred <- 0
y <- train$medv
x <- data.matrix(train %>% dplyr::select(-medv))
elastic.model <- glmnet(x, y, alpha = 0.5)
elastic.cv <- cv.glmnet(x, y, alpha = 0.5)
best.elastic.lambda <- elastic.cv$lambda.min
best.elastic.model <- glmnet(x, y, alpha = 0, lambda = best.elastic.lambda)
elastic.test.pred <- predict(best.elastic.model, s = best.elastic.lambda, newx = data.matrix(test %>% dplyr::select(-medv)))
elastic.test.RMSE <- Metrics::rmse(actual = test$medv, predicted = elastic.test.pred)
elastic.test.RMSE.df <- rbind(elastic.test.RMSE.df, elastic.test.RMSE)
}
for example,
> elastic.test.RMSE.df
elastic.test.RMSE
1 5.213519
2 4.806393
3 5.412275
4 5.749699
5 5.192845
6 4.229541
I'd much rather do this with indexing, but I can't see what's causing the NA values. Obviously I've checked Stackoverflow, and the help files, I didn't find anything to solve the issue.
英文:
I'm working with the Boston Housing data set in the MASS package. The code produces NAs when calculating RMSE using indexing:
library(MASS)
library(glmnet)
for (i in 1:5){
idx <- sample(seq(1, 3), size = nrow(MASS::Boston), replace = TRUE, prob = c(.6, .2, .2))
train <- MASS::Boston[idx == 1,]
test <- MASS::Boston[idx == 2,]
validation <- MASS::Boston[idx == 3,]
elastic.test.RMSE <- 0
elastic.test.pred <- 0
y <- train$medv
x <- data.matrix(train %>% dplyr::select(-medv))
elastic.model <- glmnet(x, y, alpha = 0.5)
elastic.cv <- cv.glmnet(x, y, alpha = 0.5)
best.elastic.lambda <- elastic.cv$lambda.min
best.elastic.model <- glmnet(x, y, alpha = 0, lambda = best.elastic.lambda)
elastic.test.pred <- predict(best.elastic.model, s = best.elastic.lambda, newx = data.matrix(test %>% dplyr::select(-medv)))
elastic.test.RMSE[i] <- Metrics::rmse(actual = test$medv, predicted = elastic.test.pred)
}
As an example, elastic.test.RMSE returns:
[1] 0.000000 NA NA NA 4.019411
However, if I create a data frame and add new RMSE values to the data frame, using the same formula, everything is fine.
elastic.test.RMSE.df <- data.frame(elastic.test.RMSE)
library(MASS)
library(glmnet)
for (i in 1:5){
idx <- sample(seq(1, 3), size = nrow(MASS::Boston), replace = TRUE, prob = c(.6, .2, .2))
train <- MASS::Boston[idx == 1,]
test <- MASS::Boston[idx == 2,]
validation <- MASS::Boston[idx == 3,]
elastic.test.RMSE <- 0
elastic.test.pred <- 0
y <- train$medv
x <- data.matrix(train %>% dplyr::select(-medv))
elastic.model <- glmnet(x, y, alpha = 0.5)
elastic.cv <- cv.glmnet(x, y, alpha = 0.5)
best.elastic.lambda <- elastic.cv$lambda.min
best.elastic.model <- glmnet(x, y, alpha = 0, lambda = best.elastic.lambda)
elastic.test.pred <- predict(best.elastic.model, s = best.elastic.lambda, newx = data.matrix(test %>% dplyr::select(-medv)))
elastic.test.RMSE <- Metrics::rmse(actual = test$medv, predicted = elastic.test.pred)
elastic.test.RMSE.df <- rbind(elastic.test.RMSE.df, elastic.test.RMSE)
}
for example,
> elastic.test.RMSE.df
elastic.test.RMSE
1 5.213519
2 4.806393
3 5.412275
4 5.749699
5 5.192845
6 4.229541
I'd much rather do this with indexing, but I can't see what's causing the NA values. Obviously I've checked Stackoverflow, and the help files, I didn't find anything to solve the issue.
答案1
得分: 1
问题出在这行代码上:
elastic.test.RMSE <- 0
变量 elastic.test.RMSE
在每次迭代时都被重新初始化。你应该将这行代码放在 for 循环之外,如下所示:
elastic.test.RMSE <- 0
for (i in 1:5){
...
}
另外,我认为以下这行代码可以安全删除:
elastic.test.pred <- 0
英文:
The problem is with this line of code:
elastic.test.RMSE <- 0
The variable elastic.test.RMSE
gets reinitialized at each iteration. You should place the line outside the for loop, like so:
elastic.test.RMSE <- 0
for (i in 1:5){
...
}
Also, I believe the following line can be safely deleted:
elastic.test.pred <- 0
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论