英文:
How to set the stat_function in for loop to plot two graphs with normal distribution, central and variance parameters
问题
以下是翻译好的内容:
我想并行创建以下图形
[![在此输入图像描述][1]][1]
[1]: https://i.stack.imgur.com/plQLC.png
我使用了宽格式数据集的以下代码:
```R
sumstatz_1 <- data.frame(whichstat = c("mean",
"sd upr",
"sd lwr",
"median"),
value = c(mean(data$score),
mean(data$score)+sd(data$score),
mean(data$score)-sd(data$score),
median(data$score)))
plot2 = ggplot(data, aes(x = score)) +
geom_histogram(aes(y =..density..),
breaks = seq(0, max(data$score), by = 5),
colour = "black",
fill = "white") + stat_function(fun = dnorm,
args = list(mean = mean(data$score, na.rm = TRUE),
sd = sd(data$score, na.rm = TRUE)),
colour = 'black', size = 1) +
labs(title='score', x='score', y= 'Distribution') +
geom_vline(data=sumstatz_1,aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)
我只更改了感兴趣的变量来创建第二个图。无论如何,我想通过使用交互图形来创建相同的结果。这里我已经设置了以下代码,为了方便将其转换为长格式,然后我编写了以下for循环:
for (i in 101:ncol(long)) {
p <- ggplot(long, aes(x = points)) +
geom_histogram(aes(y =..density..),
breaks = seq(0, 50, by = 3),
colour = "black",
fill = "white") + facet_grid(.~ score)
} for (j in seq_along(long$score)){
p +
stat_function(fun = dnorm[???],
args = list(mean = mean(long$points[long$score == 'j'], na.rm = TRUE),
sd = mean(long$points[long$score == 'j'], na.rm = TRUE)),
colour = 'black', size = 1)
}
print(p)
但我不知道如何在**stat_function()**中设置参数,也不知道是否可以在for循环或其他迭代方法中使用。你可能有任何建议吗?
这里是数据集:
structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c("score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame")), points = c(53, 13.25, 17.5, 1.59090909090909,
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308,
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455,
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18,
19:20), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))
英文:
I would like to create the following plots in parallel
I have used the following code using the wide format dataset:
sumstatz_1 <- data.frame(whichstat = c("mean",
"sd upr",
"sd lwr",
"median"),
value = c(mean(data$score),
mean(data$score)+sd(data$score),
mean(data$score)-sd(data$score),
median(data$score)))
plot2 = ggplot(data, aes(x = score)) +
geom_histogram(aes(y =..density..),
breaks = seq(0, max(data$score), by = 5),
colour = "black",
fill = "white") + stat_function(fun = dnorm,
args = list(mean = mean(data$score, na.rm = TRUE),
sd = sd(data$score, na.rm = TRUE)),
colour = 'black', size = 1) +
labs(title='score', x='score', y= 'Distribution') +
geom_vline(data=sumstatz_1,aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)
I have taken it by changing just the variable of interest to create the second graph. Anyway, I would like to create the same result by using an interactive graph. Here I have set up the following code that I have converted into a long format for convenience and then I have coded the following for loop:
for (i in 101:ncol(long)) {
p <- ggplot(long, aes(x = points)) +
geom_histogram(aes(y =..density..),
breaks = seq(0, 50, by = 3),
colour = "black",
fill = "white") + facet_grid(.~ score)
} for (j in seq_along(long$score)){
p +
stat_function(fun = dnorm[???],
args = list(mean = mean(long$points[long$score == 'j'], na.rm = TRUE),
sd = mean(long$points[long$score == 'j'], na.rm = TRUE)),
colour = 'black', size = 1)
}
print(p)
But I have no clue how to set parameters in stat_function() nor wether it is possible to use in a for loop or another iterative method. Would you have possibly any suggestion?
Here the dataset
structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c("score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame")), points = c(53, 13.25, 17.5, 1.59090909090909,
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308,
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455,
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18,
19:20), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))
答案1
得分: 2
尝试使用以下代码:
dados <- structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c("score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame")), points = c(53, 13.25, 17.5, 1.59090909090909,
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308,
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455,
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row names = c(NA, -20L), groups = structure(list(
ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18,
19:20), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row names = c(NA, -10L), .drop = TRUE))
dados <- dados %>% ungroup() %>% mutate(score = factor(score$MM_score))
grid <- with(dados, seq(min(points), max(points), length = 100))
normaldens <- data.frame()
sumstatz_1 <- data.frame()
for(i in levels(dados$score)){
aux <- dados %>% filter(score == i) %>%
summarise(mean = mean(points), sd = sd(points), median = median(points))
normaldens <- rbind(normaldens,data.frame(score = rep(i,100),
points = grid,
density = dnorm(grid, aux$mean, aux$sd)))
sumstatz_1 <- rbind(sumstatz_1,
data.frame(score = rep(i,4),
whichstat = c("mean",
"sd upr",
"sd lwr",
"median"),
value = c(aux$mean,
aux$mean+aux$sd,
aux$mean-aux$sd,
aux$median)))
}
ggplot(dados, aes(x = points)) +
geom_histogram(aes(y = ..density..),
breaks = seq(0, 50, by = 3),
colour = "black",
fill = "white") +
geom_line(aes(y = density), data = normaldens, colour = "red") +
geom_vline(data=sumstatz_1,aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)+
facet_wrap(~score)
如果您有任何问题,请问我!
英文:
Try using this code:
dados <- structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c("score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2",
"score_1")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame")), points = c(53, 13.25, 17.5, 1.59090909090909,
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308,
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455,
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18,
19:20), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))
dados <- dados %>% ungroup() %>% mutate(score = factor(score$MM_score))
grid <- with(dados, seq(min(points), max(points), length = 100))
normaldens <- data.frame()
sumstatz_1 <- data.frame()
for(i in levels(dados$score)){
aux <- dados %>% filter(score == i) %>%
summarise(mean = mean(points), sd = sd(points), median = median(points))
normaldens <- rbind(normaldens,data.frame(score = rep(i,100),
points = grid,
density = dnorm(grid, aux$mean, aux$sd)))
sumstatz_1 <- rbind(sumstatz_1,
data.frame(score = rep(i,4),
whichstat = c("mean",
"sd upr",
"sd lwr",
"median"),
value = c(aux$mean,
aux$mean+aux$sd,
aux$mean-aux$sd,
aux$median)))
}
ggplot(dados, aes(x = points)) +
geom_histogram(aes(y = ..density..),
breaks = seq(0, 50, by = 3),
colour = "black",
fill = "white") +
geom_line(aes(y = density), data = normaldens, colour = "red") +
geom_vline(data=sumstatz_1,aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)+
facet_wrap(~score)
If you have any questions, please ask me!!
答案2
得分: 0
这是我自己想出来的解决方案,如果对其他人有用的话:
sumstatz = NULL
df = NULL
name = NULL
dim = c(5, 1)
par(mfrow = c(1, 2))
plot = NULL
for (j in 1:length(dim)){
name[j] = unique(long$MM_score)[j]
df[[name[j]]] = long[long$MM_score == unique(long$MM_score)[j] & long$points,]
sumstatz[[name[j]]] <- data.frame(whichstat = c("median","qupr","qlwr"),
value = c(mean(df[[j]]$points),
mean(df[[j]]$points) + sd(df[[j]]$points),
mean(df[[j]]$points) - sd(df[[j]]$points)))
plot[[name[j]]] = ggplot(df[[j]], aes(x = points)) +
geom_histogram(aes(y =..density..),breaks = seq(min(df[[j]]$points),
max(df[[j]]$points), by = dim[j]), colour = "black",
fill = "white") +
stat_function(fun = dnorm,
args = list(mean = mean(df[[j]]$points, na.rm = TRUE),
sd = sd(df[[j]]$points, na.rm = TRUE)),
colour = 'black', size = 1) +
labs(title= unique(long$MM_score)[j], x= unique(long$MM_score)[j],
y= 'Distribution') + geom_vline(data=sumstatz[[j]],
aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)
}
ggarrange(plot[[1]], plot[[2]])
希望对你有所帮助。
英文:
This is the solution I come up with on my own, if it might be useful for somebody else:
sumstatz = NULL
df = NULL
name = NULL
dim = c(5, 1)
par(mfrow = c(1, 2))
plot = NULL
for (j in 1:length(dim)){
name[j] = unique(long$MM_score)[j]
df[[name[j]]]= long[long$MM_score == unique(long$MM_score)[j] & long$points,]
sumstatz[[name[j]]] <- data.frame(whichstat = c("median","qupr","qlwr"),
value = c(mean(df[[j]]$points),
mean(df[[j]]$points) + sd(df[[j]]$points),
mean(df[[j]]$points)-sd(df[[j]]$points)))
plot[[name[j]]] = ggplot(df[[j]], aes(x = points)) +
geom_histogram(aes(y =..density..),breaks = seq(min(df[[j]]$points),
max(df[[j]]$points), by = dim[j]), colour = "black",
fill = "white") +
stat_function(fun = dnorm,
args = list(mean = mean(df[[j]]$points, na.rm = TRUE),
sd = sd(df[[j]]$points, na.rm = TRUE)),
colour = 'black', size = 1) +
labs(title= unique(long$MM_score)[j], x= unique(long$MM_score)[j],
y= 'Distribution') + geom_vline(data=sumstatz[[j]],
aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)
}
ggarrange(plot[[1]], plot[[2]])
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论