2023年6月29日 08:07:11go评论172阅读模式

英文:

Boxplot with means multiple means per box showing individual variation

问题

I am trying to make a box plot that shows not only the overall mean of the data per box (red dot) but also the means of the 9 individuals included in the data set for each behaviour I am looking at. So rather than plotting all the raw data as shown in below I want it to plot the average for each individual at each behaviour

This is the graph currently and I want approx 9 means per box + the overall mean shown in red.

library(ggplot2)
ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour)) +
  geom_boxplot(outlier.shape= NA) +
  geom_point(aes(fill=Behaviour), size=2, position=position_jitter(width=0.2, height=0.1)) +
  stat_summary(fun=mean, geom=&quot;point&quot;, shape=20, size=5, color=&quot;red&quot;, fill=&quot;red&quot;) +
  theme_classic() + my_scale + 
  theme(axis.text.y=element_text(size=16, angle=0))+
  ylim(-30, 30)

英文:

This is the graph currently and I want approx 9 means per box + the overall mean shown in red.

library(ggplot2)
ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour)) +
  geom_boxplot(outlier.shape= NA) +
  geom_point(aes(fill=Behaviour), size=2, position=position_jitter(width=0.2, height=0.1)) +
  stat_summary(fun=mean, geom=&quot;point&quot;, shape=20, size=5, color=&quot;red&quot;, fill=&quot;red&quot;) +
  theme_classic() + my_scale + 
  theme(axis.text.y=element_text(size=16, angle=0))+
  ylim(-30, 30)

EDIT

I am now needing to put these in order of behaviours with specific colours. The code worked fine before adding the jitter however now it wont order them. See full code with @Mark fix.

my_colors &lt;- c(&quot;#CCFFFF&quot;, &quot;#000000&quot;, &quot;#7F7F7F&quot;, &quot;#336699&quot;, &quot;#008080&quot;, &quot;#00CCFF&quot;, &quot;#264AE2&quot;)
names(my_colors) &lt;- levels(factor(c((Seen2$Behaviour), levels(Seen2$Behaviour))))
my_scale &lt;- scale_fill_manual(name=&quot;Behaviour&quot;, values=my_colors,)
behavssec$Behaviour &lt;- factor(Seen2$Behaviour, 
                              levels=c(&quot;Burst&quot;, &quot;High energy swimming&quot;, 
                                         &quot;Medium energy swimming&quot;, 
                                         &quot;Low energy swimming&quot;, &quot;Travel&quot;,
                                         &quot;Ascending&quot;, &quot;Descending&quot;))
ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour)) +
  geom_boxplot(outlier.shape= NA) +
  geom_point(data=means, size=2, position=position_jitter(width=0.2, height=0.1)) + 
  stat_summary(fun=mean, geom=&quot;point&quot;, shape=20, size=5, color=&quot;red&quot;, fill=&quot;red&quot;) +
  theme_classic() + my_scale + 
  theme( axis.text.y= element_text( size=16, angle =0)) +
  ylim(-30, 30)

data

Seen2 &lt;- structure(list(SharkID = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L), Behaviour = c(&quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;Travel&quot;, &quot;Travel&quot;, &quot;Travel&quot;, &quot;Travel&quot;, 
&quot;Travel&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Ascending&quot;, 
&quot;Ascending&quot;, &quot;Ascending&quot;, &quot;Ascending&quot;, &quot;Ascending&quot;, &quot;Ascending&quot;, 
&quot;Ascending&quot;, &quot;Descending&quot;, &quot;Descending&quot;, &quot;Descending&quot;, &quot;Descending&quot;, 
&quot;Descending&quot;, &quot;Descending&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, 
&quot;Burst&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, 
&quot;Burst&quot;, &quot;Burst&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;), Roll_Avg = c(3.97084, 3.90604, 3.90738, 
3.80425, 3.4154, -0.993225, -0.940408, -0.55992, -0.791121, -1.83573, 
-3.41667, -14.0837, -14.9381, -16.4732, -16.6994, -15.5318, -18.2402, 
-19.4427, -22.8129, -27.009, -27.3907, 17.3778, 13.4861, 7.82564, 
4.63057, 6.94956, 14.3372, 22.0873, -11.5397, -11.7741, -11.4795, 
-10.7844, -10.5135, -11.0162, -90, -90, 11.0157, 6.13595, 2.2689, 
-0.710414, -5.56132, -12.0987, -9.70231, -7.13388, -5.41693, 
-4.23157, 2.11092, 2.19057, 1.5597, 0.637742, 1.17135, 3.41601, 
4.71664, 4.61525, -0.813111, -4.45238, -7.43746, -9.11626, -9.94338, 
-11.0361, -11.8852, -10.472, -5.12697, 2.61247, 9.80993, 17.307, 
10.5466, -4.01104, -7.40708, -2.72602, -5.43834, -5.22419, -4.8472, 
-4.43957, -1.67914, 2.39693, 7.84736, -9.7158, -8.70349, -8.22463, 
-8.22878, -9.43265, -0.527293, -0.283262, -0.614311, -0.380123, 
-0.344986, 7.73204, 7.47037, 7.00224, 7.01661, 7.38737, 7.83069, 
-1.83138, -1.7847, -1.68084, -1.61196, -1.49905, -1.61391, -1.46356, 
-0.986477, -0.806394, -0.883015, -0.840026, -0.727501, -1.15641, 
-1.28692, -1.38961, -1.43838, -1.42089, -1.27225)), class = &quot;data.frame&quot;, row.names = c(NA, 
-111L))

答案1

得分: 0

你可以做的是，为你想绘制的样本创建一个单独的数据框，并在geom_point()调用中引用它：

library(tidyverse)
set.seed(123)
number_of_samples <- 9
Seen2 <- Seen2 %>%
mutate(Behaviour = factor(Behaviour, levels = c("Burst","High.energy.swimming", "Medium.energy.swimming", "Low.energy.swimming", "Travel", "Ascending", "Descending")))
# 创建样本数据框
means <- Seen2 %>%
group_by(Behaviour, SharkID) %>%
summarise(Roll_Avg = mean(Roll_Avg)) %>%
group_by(Behaviour) %>%
sample_n(ifelse(n() < number_of_samples, n(), number_of_samples)) # 一些行为组少于9行，因此我们取9和每个组的行数的最大值
ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour))+
  geom_boxplot(outlier.shape= NA)+
  geom_point(data = means, size = 2, position=position_jitter(width=0.2, height = 0.1))+
  stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
  theme_classic () + 
  theme( axis.text.y=  element_text( size = 16, angle =0))+
  ylim(-30,30)

英文:

What you can do is, create a separate dataframe for just the samples you want to plot, and then reference that in the geom_point() call:

library(tidyverse)
set.seed(123)
number_of_samples &lt;- 9
Seen2 &lt;- Seen2 %&gt;%
mutate(Behaviour = factor(Behaviour, levels = c(&quot;Burst&quot;,&quot;High.energy.swimming&quot;   , &quot;Medium.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Travel&quot;, &quot;Ascending&quot;, &quot;Descending&quot;)))
# create sample dataframe
means &lt;- Seen2 %&gt;% 
group_by(Behaviour, SharkID) %&gt;% 
summarise(Roll_Avg = mean(Roll_Avg)) %&gt;% # get the mean of each group
group_by(Behaviour) %&gt;%
sample_n(ifelse(n() &lt; number_of_samples, n(), number_of_samples)) # some behaviours have less than 9 rows, so we take the max of 9 and the number of rows for each group
ggplot(Seen2, aes(x=Behaviour, y=Roll_Avg, fill=Behaviour))+
  geom_boxplot(outlier.shape= NA)+
  geom_point(data = means, size = 2, position=position_jitter(width=0.2, height = 0.1))+ # you don&#39;t need to repeat the fill=Behaviour or y=Roll_Avg, since that is in ggplot() already
  stat_summary(fun=mean, geom=&quot;point&quot;, shape=20, size=5, color=&quot;red&quot;, fill=&quot;red&quot;) +
  theme_classic () + 
  theme( axis.text.y=  element_text( size = 16, angle =0))+
  ylim(-30,30)

答案2

得分: 0

以下是您要翻译的内容：

首先，将标签缩短一点。

Seen3$Behaviour &lt;- gsub(&#39;.energy.swimming&#39;, &#39;*&#39;, Seen3$Behaviour)

接下来，进行一些预处理，使用 tapply 计算每种行为和鲨鱼的平均值（请注意，箱线图中的中心线实际上显示的是中位数！）。

m &lt;- with(Seen3, tapply(Roll_Avg, list(Behaviour, SharkID), median, na.rm=TRUE))

接下来，使用 boxplot，

boxplot(Roll_Avg ~ Behaviour, Seen3, col=0, border=&#39;grey40&#39;, ylab=&#39;Moving average&#39;)

然后，在为鲨鱼定义六种颜色之后，

clr &lt;- rainbow(ncol(m), v=.75)

添加具有“jitter”的均值（或中位数）。

set.seed(666)  ## 用于固定抖动
points(jitter(as.vector(row(m))), m, col=clr, pch=20)

最后，添加一个漂亮的 legend。

legend(&#39;bottomleft&#39;, legend=colnames(m), col=clr, pch=20, ncol=2, title=&#39;Shark&#39;)
text(ncol(m), min(m, na.rm=TRUE)*1.75, &#39;*swimming&#39;, bty=&#39;n&#39;, xpd=TRUE, adj=0, cex=.9)

注意： 如果您（真的？）想要从显示中删除异常值，请在 boxplot() 调用中包括 pch=NA。

英文:

First, shorten the labels a bit.

Seen3$Behaviour &lt;- gsub(&#39;.energy.swimming&#39;, &#39;*&#39;, Seen3$Behaviour)

Next, doing some preprocessing, using tapply to calculate the mean per behavior and shark (note, that the central line in a boxplot actually shows the median!).

m &lt;- with(Seen3, tapply(Roll_Avg, list(Behaviour, SharkID), median, na.rm=TRUE))

Next, use boxplot,

boxplot(Roll_Avg ~ Behaviour, Seen3, col=0, border=&#39;grey40&#39;, ylab=&#39;Moving average&#39;)

and after defining six colors for the sharks,

clr &lt;- rainbow(ncol(m), v=.75)

add the jittered means (or medians respectively).

set.seed(666)  ## to fix the jitter
points(jitter(as.vector(row(m))), m, col=clr, pch=20)

Finally, add a nice legend.

legend(&#39;bottomleft&#39;, legend=colnames(m), col=clr, pch=20, ncol=2, title=&#39;Shark&#39;)
text(ncol(m), min(m, na.rm=TRUE)*1.75, &#39;*swimming&#39;, bty=&#39;n&#39;, xpd=TRUE, adj=0, cex=.9)

Note: If you (really?) want to remove the outliers from display, include a pch=NA in the boxplot() call.

Data:

## Seen2 with outliers removed according to the Tukey criterion
Seen3 &lt;- structure(list(SharkID = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L), Behaviour = c(&quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;Travel&quot;, &quot;Travel&quot;, &quot;Travel&quot;, &quot;Travel&quot;, 
&quot;Travel&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Ascending&quot;, 
&quot;Ascending&quot;, &quot;Ascending&quot;, &quot;Ascending&quot;, &quot;Ascending&quot;, &quot;Ascending&quot;, 
&quot;Ascending&quot;, &quot;Descending&quot;, &quot;Descending&quot;, &quot;Descending&quot;, &quot;Descending&quot;, 
&quot;Descending&quot;, &quot;Descending&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, 
&quot;Burst&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, 
&quot;Low.energy.swimming&quot;, &quot;Low.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, &quot;High.energy.swimming&quot;, 
&quot;High.energy.swimming&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, &quot;Burst&quot;, 
&quot;Burst&quot;, &quot;Burst&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, &quot;Medium.energy.swimming&quot;, 
&quot;Medium.energy.swimming&quot;), Roll_Avg = c(3.97084, 3.90604, 3.90738, 
3.80425, 3.4154, -0.993225, -0.940408, -0.55992, -0.791121, -1.83573, 
-3.41667, -14.0837, -14.9381, -16.4732, -16.6994, -15.5318, -18.2402, 
-19.4427, -22.8129, NA, NA, 17.3778, 13.4861, 7.82564, 4.63057, 
6.94956, 14.3372, NA, -11.5397, -11.7741, -11.4795, -10.7844, 
-10.5135, -11.0162, NA, NA, 11.0157, 6.13595, 2.2689, -0.710414, 
-5.56132, -12.0987, -9.70231, -7.13388, -5.41693, -4.23157, 2.11092, 
2.19057, 1.5597, 0.637742, 1.17135, 3.41601, 4.71664, 4.61525, 
-0.813111, -4.45238, -7.43746, -9.11626, -9.94338, -11.0361, 
-11.8852, -10.472, -5.12697, 2.61247, 9.80993, 17.307, 10.5466, 
-4.01104, -7.40708, -2.72602, -5.43834, -5.22419, -4.8472, -4.43957, 
-1.67914, 2.39693, 7.84736, -9.7158, -8.70349, -8.22463, -8.22878, 
-9.43265, -0.527293, -0.283262, -0.614311, -0.380123, -0.344986, 
7.73204, 7.47037, 7.00224, 7.01661, 7.38737, 7.83069, -1.83138, 
-1.7847, -1.68084, -1.61196, -1.49905, -1.61391, -1.46356, -0.986477, 
-0.806394, -0.883015, -0.840026, -0.727501, -1.15641, -1.28692, 
-1.38961, -1.43838, -1.42089, -1.27225)), row.names = c(NA, -111L
), class = &quot;data.frame&quot;)

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

箱线图，每个箱子显示多个均值，展示个体变异。

问题

data

答案1

答案2

在R中，使用滞后值和起始值的条件语句。

制作一个根据组别着色的ECDF图。

基于连续的行创建分组，以在 ggplot 折线图中显示。

Apply a function in R on each row: function takes multiple columns from each row and returns multiple new columns

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。