英文:
How to save multiple steps in map function
问题
我正在尝试创建多个样本,在每个样本上拟合模型并打印结果。
dat <- data.frame(
x=rnorm(20, mean=5, sd=1),
y=rnorm(20, mean=6, sd=1),
weight = rnorm(20, mean=1, sd=0.2)
)
f <- function(data, var.x, var.y, n) {
# 选择样本
data_subset <- data %>% sample_n(n, weight = weight, replace = TRUE) %>% select(all_of(var.x), all_of(var.y)) %>% drop_na()
# 拟合模型
model <- lm(unlist(data_subset %>% select(y)) ~ unlist(data_subset %>% select(x)))
print(model)
}
f(data=dat, var.x="x", var.y="y", n=20)
在map函数内保存数据集并拟合模型的方法是否存在?以下是一种尝试:
```R
f <- function(data, var.x, var.y, n) {
map(seq_len(20),
# 选择样本
data_subset <- data %>% sample_n(n, weight = weight, replace = TRUE) %>% select(all_of(var.x), all_of(var.y)) %>% drop_na()
# 拟合模型
model <- lm(unlist(data_subset %>% select(y)) ~ unlist(data_subset %>% select(x)))
print(model)
)
}
英文:
I am trying to create multiple samples, fit a model on each sample, and print the results.
dat <- data.frame(
x=rnorm(20, mean=5, sd=1),
y=rnorm(20, mean=6, sd=1),
weight = rnorm(20, mean=1, sd=0.2)
)
f <- function(data, var.x, var.y, n) {
# select sample
data_subset <- data %>% sample_n(n, weight = weight, replace = T) %>% select(all_of(var.x), all_of(var.y)) %>% drop_na()
# fit model
model <- lm(unlist(data_subset %>% select(y)) ~ unlist(data_subset %>% select(x)))
print(model)
}
f(data=dat, var.x="x", var.y="y", n=20)
Is there a way to save the dataset within a map function and fit the model? Here is an attempt:
f <- function(data, var.x, var.y, n) {
map(seq_len(20),
# select sample
data_subset <- data %>% sample_n(n, weight = weight, replace = T) %>% select(all_of(var.x), all_of(var.y)) %>% drop_na()
# fit model
model <- lm(unlist(data_subset %>% select(y)) ~ unlist(data_subset %>% select(x)))
print(model)
)
}
答案1
得分: 2
当然,以下是代码部分的翻译:
library(dplyr)
library(purrr) # map
set.seed(42)
dat <- data.frame( x=rnorm(20, mean=5, sd=1), y=rnorm(20, mean=6, sd=1), weight = rnorm(20, mean=1, sd=0.2) )
head(dat,3)
# x y weight
# 1 6.370958 5.693361 1.0411997
# 2 4.435302 4.218692 0.9277885
# 3 5.363128 5.828083 1.1516326
out <- tibble(run = 1:20) %>%
mutate(
data = map(run, ~ sample_n(dat, size = 5, weight = weight, replace = TRUE)),
mdl = map(data, ~ lm(y ~ x, data = .x)),
smry = map(mdl, ~ summary(.x))
)
此外,我已经跳过了代码中的注释和一些格式化的字符,只提供了核心的翻译。如果您需要更多的翻译,请告诉我。
英文:
Sure,
library(dplyr)
library(purrr) # map
set.seed(42)
dat <- data.frame( x=rnorm(20, mean=5, sd=1), y=rnorm(20, mean=6, sd=1), weight = rnorm(20, mean=1, sd=0.2) )
head(dat,3)
# x y weight
# 1 6.370958 5.693361 1.0411997
# 2 4.435302 4.218692 0.9277885
# 3 5.363128 5.828083 1.1516326
out <- tibble(run = 1:20) %>%
mutate(
data = map(run, ~ sample_n(dat, size = 5, weight = weight, replace = TRUE)),
mdl = map(data, ~ lm(y ~ x, data = .x)),
smry = map(mdl, ~ summary(.x))
)
At this point, three columns of out
are list-columns,
out
# # A tibble: 20 × 4
# run data mdl smry
# <int> <list> <list> <list>
# 1 1 <df [5 × 3]> <lm> <smmry.lm>
# 2 2 <df [5 × 3]> <lm> <smmry.lm>
# 3 3 <df [5 × 3]> <lm> <smmry.lm>
# 4 4 <df [5 × 3]> <lm> <smmry.lm>
# 5 5 <df [5 × 3]> <lm> <smmry.lm>
# 6 6 <df [5 × 3]> <lm> <smmry.lm>
# 7 7 <df [5 × 3]> <lm> <smmry.lm>
# 8 8 <df [5 × 3]> <lm> <smmry.lm>
# 9 9 <df [5 × 3]> <lm> <smmry.lm>
# 10 10 <df [5 × 3]> <lm> <smmry.lm>
# 11 11 <df [5 × 3]> <lm> <smmry.lm>
# 12 12 <df [5 × 3]> <lm> <smmry.lm>
# 13 13 <df [5 × 3]> <lm> <smmry.lm>
# 14 14 <df [5 × 3]> <lm> <smmry.lm>
# 15 15 <df [5 × 3]> <lm> <smmry.lm>
# 16 16 <df [5 × 3]> <lm> <smmry.lm>
# 17 17 <df [5 × 3]> <lm> <smmry.lm>
# 18 18 <df [5 × 3]> <lm> <smmry.lm>
# 19 19 <df [5 × 3]> <lm> <smmry.lm>
# 20 20 <df [5 × 3]> <lm> <smmry.lm>
and we can access any of the individual elements using [[
:
out$data[[1]]
# x y weight
# 1 4.721211 5.391074 1.1285799
# 2 4.893875 5.569531 1.0865636
# 3 6.370958 5.693361 1.0411997
# 4 6.370958 5.693361 1.0411997
# 5 4.866679 6.504955 1.0179521
# 6 4.893875 5.569531 1.0865636
# 7 6.511522 5.742731 0.8377214
# 8 5.404268 7.895193 0.7263438
# 9 4.715747 5.215541 1.1358578
# 10 4.866679 6.504955 1.0179521
# 11 6.511522 5.742731 0.8377214
# 12 6.370958 5.693361 1.0411997
# 13 2.343545 5.149092 1.0179666
# 14 6.511522 5.742731 0.8377214
# 15 5.632863 7.214675 0.8546590
# 16 6.370958 5.693361 1.0411997
# 17 5.632863 7.214675 0.8546590
# 18 4.905341 4.236837 1.2888203
# 19 4.435302 4.218692 0.9277885
# 20 2.343545 5.149092 1.0179666
out$data[1:2]
# [[1]]
# x y weight
# 1 4.721211 5.391074 1.128580
# 2 4.893875 5.569531 1.086564
# 3 6.370958 5.693361 1.041200
# 4 6.370958 5.693361 1.041200
# 5 4.866679 6.504955 1.017952
# [[2]]
# x y weight
# 1 4.893875 5.569531 1.0865636
# 2 6.511522 5.742731 0.8377214
# 3 5.404268 7.895193 0.7263438
# 4 4.715747 5.215541 1.1358578
# 5 4.866679 6.504955 1.0179521
out$mdl[1:2]
# [[1]]
# Call:
# lm(formula = y ~ x, data = .x)
# Coefficients:
# (Intercept) x
# 6.11533 -0.06334
# [[2]]
# Call:
# lm(formula = y ~ x, data = .x)
# Coefficients:
# (Intercept) x
# 5.4066 0.1476
out$smry[1:2]
# [[1]]
# Call:
# lm(formula = y ~ x, data = .x)
# Residuals:
# 1 2 3 4 5
# -0.42521 -0.23582 -0.01843 -0.01843 0.69788
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 6.11533 1.59236 3.840 0.0311 *
# x -0.06334 0.28966 -0.219 0.8409
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residual standard error: 0.4913 on 3 degrees of freedom
# Multiple R-squared: 0.01569, Adjusted R-squared: -0.3124
# F-statistic: 0.04782 on 1 and 3 DF, p-value: 0.8409
# [[2]]
# Call:
# lm(formula = y ~ x, data = .x)
# Residuals:
# 1 2 3 4 5
# -0.5593 -0.6248 1.6910 -0.8870 0.3801
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 5.4066 4.4193 1.223 0.309
# x 0.1476 0.8308 0.178 0.870
# Residual standard error: 1.224 on 3 degrees of freedom
# Multiple R-squared: 0.01041, Adjusted R-squared: -0.3195
# F-statistic: 0.03155 on 1 and 3 DF, p-value: 0.8703
If you need to get more "complicated" than this (or if you want to anyway ...), look into the broom
package and friends.
Note:
- I didn't functionize this, but it should be relatively simple to do that, my point here is to demonstrate doing things in steps, where the first such step is to store a list-column of "just data";
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论