2023年2月7日 04:14:33go评论74阅读模式

英文:

How to set the stat_function in for loop to plot two graphs with normal distribution, central and variance parameters

问题

以下是翻译好的内容：

我想并行创建以下图形

[![在此输入图像描述][1]][1]


  [1]: https://i.stack.imgur.com/plQLC.png

我使用了宽格式数据集的以下代码：

```R
sumstatz_1 <- data.frame(whichstat = c("mean",
                                     "sd upr", 
                                     "sd lwr", 
                                     "median"),
                       value     = c(mean(data$score),
                                     mean(data$score)+sd(data$score),
                                     mean(data$score)-sd(data$score), 
                                     median(data$score)))


plot2 = ggplot(data, aes(x = score)) +                           
  geom_histogram(aes(y =..density..),
                 breaks = seq(0, max(data$score), by = 5), 
                 colour = "black", 
                 fill = "white") + stat_function(fun = dnorm, 
                                   args = list(mean = mean(data$score, na.rm = TRUE), 
                                   sd = sd(data$score, na.rm = TRUE)), 
                                   colour = 'black', size = 1) + 
  labs(title='score', x='score', y= 'Distribution') +
  geom_vline(data=sumstatz_1,aes(xintercept = value,
                               linetype = whichstat,
                               col = whichstat),size=1)

我只更改了感兴趣的变量来创建第二个图。无论如何，我想通过使用交互图形来创建相同的结果。这里我已经设置了以下代码，为了方便将其转换为长格式，然后我编写了以下for循环：

for (i in 101:ncol(long)) {
    p <- ggplot(long, aes(x = points)) +                           
      geom_histogram(aes(y =..density..), 
                     breaks = seq(0, 50, by = 3), 
                     colour = "black", 
                     fill = "white") + facet_grid(.~ score)
} for (j in seq_along(long$score)){
   p +
      stat_function(fun = dnorm[???], 
                    args = list(mean = mean(long$points[long$score == 'j'], na.rm = TRUE), 
                                sd = mean(long$points[long$score == 'j'], na.rm = TRUE)), 
                    colour = 'black', size = 1)
  }

print(p)

但我不知道如何在**stat_function()**中设置参数，也不知道是否可以在for循环或其他迭代方法中使用。你可能有任何建议吗？

这里是数据集：

structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c("score_2", 
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
"score_1")), row.names = c(NA, -20L), class = c("tbl_df", "tbl", 
"data.frame")), points = c(53, 13.25, 17.5, 1.59090909090909, 
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308, 
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455, 
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
    ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
        1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18, 
        19:20), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))

英文:

I would like to create the following plots in parallel

I have used the following code using the wide format dataset:

sumstatz_1 &lt;- data.frame(whichstat = c(&quot;mean&quot;,
&quot;sd upr&quot;, 
&quot;sd lwr&quot;, 
&quot;median&quot;),
value     = c(mean(data$score),
mean(data$score)+sd(data$score),
mean(data$score)-sd(data$score), 
median(data$score)))
plot2 = ggplot(data, aes(x = score)) +                           
geom_histogram(aes(y =..density..),
breaks = seq(0, max(data$score), by = 5), 
colour = &quot;black&quot;, 
fill = &quot;white&quot;) + stat_function(fun = dnorm, 
args = list(mean = mean(data$score, na.rm = TRUE), 
sd = sd(data$score, na.rm = TRUE)), 
colour = &#39;black&#39;, size = 1) + 
labs(title=&#39;score&#39;, x=&#39;score&#39;, y= &#39;Distribution&#39;) +
geom_vline(data=sumstatz_1,aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)

I have taken it by changing just the variable of interest to create the second graph. Anyway, I would like to create the same result by using an interactive graph. Here I have set up the following code that I have converted into a long format for convenience and then I have coded the following for loop:

for (i in 101:ncol(long)) {
p &lt;- ggplot(long, aes(x = points)) +                           
geom_histogram(aes(y =..density..), 
breaks = seq(0, 50, by = 3), 
colour = &quot;black&quot;, 
fill = &quot;white&quot;) + facet_grid(.~ score)
} for (j in seq_along(long$score)){
p +
stat_function(fun = dnorm[???], 
args = list(mean = mean(long$points[long$score == &#39;j&#39;], na.rm = TRUE), 
sd = mean(long$points[long$score == &#39;j&#39;], na.rm = TRUE)), 
colour = &#39;black&#39;, size = 1)
}
print(p)

But I have no clue how to set parameters in stat_function() nor wether it is possible to use in a for loop or another iterative method. Would you have possibly any suggestion?

Here the dataset

structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c(&quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;)), row.names = c(NA, -20L), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, 
&quot;data.frame&quot;)), points = c(53, 13.25, 17.5, 1.59090909090909, 
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308, 
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455, 
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c(&quot;grouped_df&quot;, 
&quot;tbl_df&quot;, &quot;tbl&quot;, &quot;data.frame&quot;), row.names = c(NA, -20L), groups = structure(list(
ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18, 
19:20), ptype = integer(0), class = c(&quot;vctrs_list_of&quot;, 
&quot;vctrs_vctr&quot;, &quot;list&quot;))), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, &quot;data.frame&quot;
), row.names = c(NA, -10L), .drop = TRUE))

答案1

得分: 2

尝试使用以下代码：

dados &lt;- structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c(&quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;)), row.names = c(NA, -20L), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, 
&quot;data.frame&quot;)), points = c(53, 13.25, 17.5, 1.59090909090909, 
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308, 
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455, 
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c(&quot;grouped_df&quot;, 
&quot;tbl_df&quot;, &quot;tbl&quot;, &quot;data.frame&quot;), row names = c(NA, -20L), groups = structure(list(
ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18, 
19:20), ptype = integer(0), class = c(&quot;vctrs_list_of&quot;, 
&quot;vctrs_vctr&quot;, &quot;list&quot;))), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, &quot;data.frame&quot;
), row names = c(NA, -10L), .drop = TRUE))

dados &lt;- dados %&gt;% ungroup() %&gt;% mutate(score = factor(score$MM_score))

grid &lt;- with(dados, seq(min(points), max(points), length = 100))
normaldens &lt;- data.frame()
sumstatz_1 &lt;- data.frame()
for(i in levels(dados$score)){
  aux &lt;- dados %&gt;% filter(score == i) %&gt;% 
    summarise(mean = mean(points), sd = sd(points), median = median(points))
  normaldens &lt;- rbind(normaldens,data.frame(score = rep(i,100),
                                            points = grid,
                                            density = dnorm(grid, aux$mean, aux$sd)))
  sumstatz_1 &lt;- rbind(sumstatz_1,
                      data.frame(score = rep(i,4),
                                 whichstat = c(&quot;mean&quot;,
                                               &quot;sd upr&quot;, 
                                               &quot;sd lwr&quot;, 
                                               &quot;median&quot;),
                                 value = c(aux$mean,
                                     aux$mean+aux$sd,
                                     aux$mean-aux$sd, 
                                     aux$median)))
}

ggplot(dados, aes(x = points))  + 
  geom_histogram(aes(y = ..density..), 
                     breaks = seq(0, 50, by = 3), 
                     colour = &quot;black&quot;, 
                     fill = &quot;white&quot;) + 
  geom_line(aes(y = density), data = normaldens, colour = &quot;red&quot;) +
  geom_vline(data=sumstatz_1,aes(xintercept = value,
                               linetype = whichstat,
                               col = whichstat),size=1)+
  facet_wrap(~score)

如果您有任何问题，请问我！

英文:

Try using this code:


dados &lt;- structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c(&quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, &quot;score_1&quot;, &quot;score_2&quot;, 
&quot;score_1&quot;)), row.names = c(NA, -20L), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, 
&quot;data.frame&quot;)), points = c(53, 13.25, 17.5, 1.59090909090909, 
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308, 
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455, 
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c(&quot;grouped_df&quot;, 
&quot;tbl_df&quot;, &quot;tbl&quot;, &quot;data.frame&quot;), row.names = c(NA, -20L), groups = structure(list(
ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18, 
19:20), ptype = integer(0), class = c(&quot;vctrs_list_of&quot;, 
&quot;vctrs_vctr&quot;, &quot;list&quot;))), class = c(&quot;tbl_df&quot;, &quot;tbl&quot;, &quot;data.frame&quot;
), row.names = c(NA, -10L), .drop = TRUE))
dados &lt;- dados %&gt;% ungroup() %&gt;% mutate(score = factor(score$MM_score))
grid &lt;- with(dados, seq(min(points), max(points), length = 100))
normaldens &lt;- data.frame()
sumstatz_1 &lt;- data.frame()
for(i in levels(dados$score)){
aux &lt;- dados %&gt;% filter(score == i) %&gt;% 
summarise(mean = mean(points), sd = sd(points), median = median(points))
normaldens &lt;- rbind(normaldens,data.frame(score = rep(i,100),
points = grid,
density = dnorm(grid, aux$mean, aux$sd)))
sumstatz_1 &lt;- rbind(sumstatz_1,
data.frame(score = rep(i,4),
whichstat = c(&quot;mean&quot;,
&quot;sd upr&quot;, 
&quot;sd lwr&quot;, 
&quot;median&quot;),
value = c(aux$mean,
aux$mean+aux$sd,
aux$mean-aux$sd, 
aux$median)))
}
ggplot(dados, aes(x = points))  + 
geom_histogram(aes(y = ..density..), 
breaks = seq(0, 50, by = 3), 
colour = &quot;black&quot;, 
fill = &quot;white&quot;) + 
geom_line(aes(y = density), data = normaldens, colour = &quot;red&quot;) +
geom_vline(data=sumstatz_1,aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)+
facet_wrap(~score)

If you have any questions, please ask me!!

答案2

得分: 0

这是我自己想出来的解决方案，如果对其他人有用的话：

sumstatz = NULL
df = NULL   
name = NULL
dim = c(5, 1)
par(mfrow = c(1, 2))
plot = NULL
for (j in 1:length(dim)){
  name[j] = unique(long$MM_score)[j]
  df[[name[j]]] = long[long$MM_score == unique(long$MM_score)[j] & long$points,]
  sumstatz[[name[j]]] <- data.frame(whichstat = c("median","qupr","qlwr"), 
                                     value = c(mean(df[[j]]$points),
                                               mean(df[[j]]$points) + sd(df[[j]]$points),
                                               mean(df[[j]]$points) - sd(df[[j]]$points)))
  plot[[name[j]]] = ggplot(df[[j]], aes(x = points)) +                           
          geom_histogram(aes(y =..density..),breaks = seq(min(df[[j]]$points), 
                         max(df[[j]]$points), by = dim[j]), colour = "black", 
                         fill = "white") + 
          stat_function(fun = dnorm,
                        args = list(mean = mean(df[[j]]$points, na.rm = TRUE),
                                    sd = sd(df[[j]]$points, na.rm = TRUE)), 
                                    colour = 'black', size = 1) + 
          labs(title= unique(long$MM_score)[j], x= unique(long$MM_score)[j], 
               y= 'Distribution') + geom_vline(data=sumstatz[[j]],
                                               aes(xintercept = value,
                                                   linetype = whichstat,
                                                   col = whichstat),size=1)
}

ggarrange(plot[[1]], plot[[2]])

希望对你有所帮助。

英文:

This is the solution I come up with on my own, if it might be useful for somebody else:

sumstatz = NULL
df = NULL   
name = NULL
dim = c(5, 1)
par(mfrow = c(1, 2))
plot = NULL
for (j in 1:length(dim)){
name[j] = unique(long$MM_score)[j]
df[[name[j]]]= long[long$MM_score == unique(long$MM_score)[j] &amp; long$points,]
sumstatz[[name[j]]] &lt;- data.frame(whichstat = c(&quot;median&quot;,&quot;qupr&quot;,&quot;qlwr&quot;), 
value = c(mean(df[[j]]$points),
mean(df[[j]]$points) + sd(df[[j]]$points),
mean(df[[j]]$points)-sd(df[[j]]$points)))
plot[[name[j]]] = ggplot(df[[j]], aes(x = points)) +                           
geom_histogram(aes(y =..density..),breaks = seq(min(df[[j]]$points), 
max(df[[j]]$points), by = dim[j]), colour = &quot;black&quot;, 
fill = &quot;white&quot;) + 
stat_function(fun = dnorm,
args = list(mean = mean(df[[j]]$points, na.rm = TRUE),
sd = sd(df[[j]]$points, na.rm = TRUE)), 
colour = &#39;black&#39;, size = 1) + 
labs(title= unique(long$MM_score)[j], x= unique(long$MM_score)[j], 
y= &#39;Distribution&#39;) + geom_vline(data=sumstatz[[j]],
aes(xintercept = value,
linetype = whichstat,
col = whichstat),size=1)
}
ggarrange(plot[[1]], plot[[2]])

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

如何在for循环中设置`stat_function`以绘制两个正态分布图，中心和方差参数。

问题

答案1

答案2

将数据重塑为长格式。

在图表中添加线连接不同的集群。

尝试计算观察的期望值

将marginaleffects::predictions()的结果与主数据框连接起来？

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论