英文:
Reframing output of confidence intervals to combine mean, upper and lower values into one cell
问题
I use the code below to calculate the mean, upper and lower confidence intervals of multiple variables at once.
library(gmodels)
library(purrr)
dfci <- df %>%
group_by(group) %>%
dplyr::summarize(across(everything(),
.fns = list(mean = ~ mean(.x, na.rm = TRUE, trim = 4),
ci = ~ ci(.x, confidence = 0.95, alpha = 0.05, na.rm = T))))
#dfci <- dfci[-(13:16),] # remove additional rows
write.csv(dfci, file="dfci.csv")
Sample data:
Group| A_pre | A_post | B_pre | B_post
0 20 21 20 23
1 30 10 19 11
2 10 53 30 34
1 22 32 25 20
2 34 40 32 30
0 30 50 NA 40
0 39 40 19 20
1 40 NA 20 20
2 50 10 20 10
0 34 23 30 10
I tried pivoting into long after the ci calculations but doesn't work:
library(reshape2)
dfci <- df %>%
group_by(group) %>%
summarize(across(everything(),
.fns = list(mean = ~ mean(.x, na.rm = TRUE, trim = 4),
ci = ~ ci(.x, confidence = 0.95, alpha = 0.05, na.rm = TRUE))))
dfci <- melt(dfci, id.vars = "group")
dfci <- dcast(dfci, group + variable ~ variable)
write.csv(dfci, file = "dfi.csv", row.names = FALSE)
英文:
I use the code below to calculate the mean, upper and lower confidence intervals of multiple variables at once.
library(gmodels)
library(purrr)
dfci <- df %>%
group_by(group) %>%
dplyr::summarize(across(everything(),
.fns = list(mean = ~ mean(.x, na.rm = TRUE, trim = 4),
ci = ~ ci(.x, confidence = 0.95, alpha = 0.05, na.rm = T))))
#dfci <- dfci[-(13:16),] # remove additional rows
write.csv(dfci, file="dfci.csv")
Sample data:
Group| A_pre | A_post | B_pre | B_post
0 20 21 20 23
1 30 10 19 11
2 10 53 30 34
1 22 32 25 20
2 34 40 32 30
0 30 50 NA 40
0 39 40 19 20
1 40 NA 20 20
2 50 10 20 10
0 34 23 30 10
As I have over 50 "pre" and "post" variables i.e., >100 variables, is it possible to combine the outputs from the three desired cells (mean, lower and upper ci) into one so I am not manually combining all of them?
I tried pivoting into long after the ci calculations but doesn't work:
library(reshape2)
dfci <- df %>%
group_by(group) %>%
summarize(across(everything(),
.fns = list(mean = ~ mean(.x, na.rm = TRUE, trim = 4),
ci = ~ ci(.x, confidence = 0.95, alpha = 0.05, na.rm = TRUE))))
dfci <- melt(dfci, id.vars = "group")
dfci <- dcast(dfci, group + variable ~ variable)
write.csv(dfci, file = "dfi.csv", row.names = FALSE)
答案1
得分: 2
以下是您提供的内容的翻译:
更新后的澄清:
我们可以使用自定义的ci函数来使用reframe:
library(dplyr) #>= dplyr 1.1.0
df %>%
reframe(across(everything(), .fns = list(
mean = ~ mean(.x, na.rm = TRUE, trim = 4),
ci = ~ {
se <- sqrt(var(.x, na.rm = TRUE) / sum(!is.na(.x)))
mean_val <- mean(.x, na.rm = TRUE)
lower <- mean_val - qt(0.975, df = sum(!is.na(.x))) * se
upper <- mean_val + qt(0.975, df = sum(!is.na(.x))) * se
# c(lower, upper)
paste0("[", round(lower, 2), ", ", round(upper, 2), "]")
}
)), .by = Group)
Group A_pre_mean A_pre_ci A_post_mean A_post_ci B_pre_mean B_pre_ci B_post_mean B_post_ci
1 0 32 [19.56, 41.94] 31.5 [14.18, 52.82] 20 [11.82, 34.18] 21.5 [5.93, 40.57]
2 1 30 [14.1, 47.24] 21.0 [-26.33, 68.33] 20 [15.43, 27.24] 20.0 [7.45, 26.55]
3 2 34 [-5.66, 68.33] 40.0 [-6.19, 74.85] 30 [15.52, 39.15] 30.0 [1.04, 48.29]
英文:
Update after clarification:
We can use reframe with a custom ci function:
library(dplyr) #>= dplyr 1.1.0
df %>%
reframe(across(everything(), .fns = list(
mean = ~ mean(.x, na.rm = TRUE, trim = 4),
ci = ~ {
se <- sqrt(var(.x, na.rm = TRUE) / sum(!is.na(.x)))
mean_val <- mean(.x, na.rm = TRUE)
lower <- mean_val - qt(0.975, df = sum(!is.na(.x))) * se
upper <- mean_val + qt(0.975, df = sum(!is.na(.x))) * se
# c(lower, upper)
paste0("[", round(lower, 2), ", ", round(upper, 2), "]")
}
)), .by = Group)
Group A_pre_mean A_pre_ci A_post_mean A_post_ci B_pre_mean B_pre_ci B_post_mean B_post_ci
1 0 32 [19.56, 41.94] 31.5 [14.18, 52.82] 20 [11.82, 34.18] 21.5 [5.93, 40.57]
2 1 30 [14.1, 47.24] 21.0 [-26.33, 68.33] 20 [15.43, 27.24] 20.0 [7.45, 26.55]
3 2 34 [-5.66, 68.33] 40.0 [-6.19, 74.85] 30 [15.52, 39.15] 30.0 [1.04, 48.29]
答案2
得分: 0
This code does the job:
library(dplyr)
dfci <- df %>%
group_by(group) %>%
summarise(across(everything(), list(
mean = ~ mean(., na.rm = TRUE, trim = 4),
ci = ~ { # 自定义的 CI 函数
se <- sqrt(var(., na.rm = TRUE) / sum(!is.na(.)))
mean_val <- mean(., na.rm = TRUE)
lower <- mean_val - qt(0.975, df = sum(!is.na(.))) * se
upper <- mean_val + qt(0.975, df = sum(!is.na(.))) * se
paste0("[", round(lower, 2), ", ", round(upper, 2), "]")
}
), .names = "{.col}_{.fn}")) %>%
ungroup()
英文:
Unfortunately the earlier answers did not work as they repeated the same ci throughout.
This code does the job:
library(dplyr)
dfci <- df %>%
group_by(group) %>%
summarise(across(everything(), list(
mean = ~ mean(., na.rm = TRUE, trim = 4),
ci = ~ { # OWN CI FUNCTION
se <- sqrt(var(., na.rm = TRUE) / sum(!is.na(.)))
mean_val <- mean(., na.rm = TRUE)
lower <- mean_val - qt(0.975, df = sum(!is.na(.))) * se
upper <- mean_val + qt(0.975, df = sum(!is.na(.))) * se
paste0("[", round(lower, 2), ", ", round(upper, 2), "]")
}
), .names = "{.col}_{.fn}")) %>%
ungroup()
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论