英文:
ggplot2 bar graph with statistics (apoptosis/necrosis assay)
问题
我正在使用ggplot构建一个堆叠条形图。这是我目前用来生成图表的代码,它堆叠了apop、nec和late的值之和,但使用不同的颜色来表示各个类别对总和的贡献。
这是当我简单地忽略统计数据时得到的图表。
到目前为止,我尝试了以下方法:
数据表格
condition | rep | nec | late | apop |
---|---|---|---|---|
37_colo_control | rep1 | 0.0209 | 0.0334 | 0.0405 |
37_colo_control | rep2 | 0.0013 | 0.0402 | 0.0541 |
37_colo_control | rep3 | 0.0076 | 0.0546 | 0.0707 |
42_colo_control | rep1 | 0.0147 | 0.0564 | 0.0616 |
42_colo_control | rep2 | 0.0233 | 0.0596 | 0.0762 |
42_colo_control | rep3 | 0.0176 | 0.0461 | 0.0507 |
37_colo_mmc | rep1 | 0.01210 | 0.0976 | 0.2370 |
37_colo_mmc | rep2 | 0.00860 | 0.1090 | 0.2410 |
37_colo_mmc | rep3 | 0.00760 | 0.1110 | 0.2890 |
42_colo_mmc | rep1 | 0.00870 | 0.1120 | 0.3020 |
42_colo_mmc | rep2 | 0.01220 | 0.1330 | 0.3270 |
42_colo_mmc | rep3 | 0.00870 | 0.1120 | 0.3020 |
上面的示例数据作为数据框架 "the_data":
the_data <- structure(list(condition = c("37_colo_control", "37_colo_control",
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control",
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc",
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2",
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209,
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076,
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564,
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405,
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289,
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA,
12L))
代码
library(ggpubr)
library(ggprism)
library(ggplot2)
the_data <- read.csv(**[[参见数据表格的部分]]**)
factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
comparisons <- list(c(factored_condition[1],factored_condition[2]),
c(factored_condition[1],factored_condition[3]),
c(factored_condition[1],factored_condition[4])
)
the_data %>%
group_by(condition)
fig_bar <- ggplot(the_data, aes(x=factor(condition, levels=factored_condition)))+
geom_bar(aes(y=apop+nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey65") +
stat_compare_means(mapping = aes(y=apop),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
geom_bar(aes(y=nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey45") +
stat_compare_means(mapping = aes(y=late),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
geom_bar(aes(y=nec),position=position_dodge(), stat="summary", fun="mean", fill = "grey 15") +
stat_compare_means(mapping = aes(y=nec),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
labs(y="细胞百分比", x="", fill = "") +
ggtitle("Colo205") +
scale_y_continuous(expand=c(0,0),limits = c(0,1.0), labels = scales::percent) +
scale_x_discrete(labels=x.names) +
theme_prism()
fig_bar
基本上,我尝试的方法是将"stat-*compare-*means"部分复制粘贴到每个单独的条形图中。
英文:
I am building a stacked bargraph using ggplot. here is the code I am currently using to generate the plot that stacks the sum of the values in apop, nec, and late but with different colored the bars so it can be known how much that category contributes to the sum.
This is a picture of the graph I get when I simply ignore the stats.
This is what I have tried so far
data table
condition | rep | nec | late | apop |
---|---|---|---|---|
37_colo_control | rep1 | 0.0209 | 0.0334 | 0.0405 |
37_colo_control | rep2 | 0.0013 | 0.0402 | 0.0541 |
37_colo_control | rep3 | 0.0076 | 0.0546 | 0.0707 |
42_colo_control | rep1 | 0.0147 | 0.0564 | 0.0616 |
42_colo_control | rep2 | 0.0233 | 0.0596 | 0.0762 |
42_colo_control | rep3 | 0.0176 | 0.0461 | 0.0507 |
37_colo_mmc | rep1 | 0.01210 | 0.0976 | 0.2370 |
37_colo_mmc | rep2 | 0.00860 | 0.1090 | 0.2410 |
37_colo_mmc | rep3 | 0.00760 | 0.1110 | 0.2890 |
42_colo_mmc | rep1 | 0.00870 | 0.1120 | 0.3020 |
42_colo_mmc | rep2 | 0.01220 | 0.1330 | 0.3270 |
42_colo_mmc | rep3 | 0.00870 | 0.1120 | 0.3020 |
above sample data as dataframe "the_data":
the_data <- structure(list(condition = c("37_colo_control", "37_colo_control",
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control",
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc",
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2",
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209,
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076,
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564,
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405,
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289,
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA,
12L))
code
library(ggpubr)
library(ggprism)
library(ggplot2)
the_data <- read.csv(**[[see table for data]]**)
factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
comparisons <- list(c(factored_condition[1],factored_condition[2]),
c(factored_condition[1],factored_condition[3]),
c(factored_condition[1],factored_condition[4])
)
the_data %>%
group_by(condition)
fig_bar <- ggplot(the_data, aes(x=factor(condition, levels=factored_condition)))+
geom_bar(aes(y=apop+nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey65") +
stat_compare_means(mapping = aes(y=apop),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
geom_bar(aes(y=nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey45") +
stat_compare_means(mapping = aes(y=late),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
geom_bar(aes(y=nec),position=position_dodge(), stat="summary", fun="mean", fill = "grey 15") +
stat_compare_means(mapping = aes(y=nec),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
labs(y="Percent of Cells", x="", fill = "") +
ggtitle("Colo205") +
scale_y_continuous(expand=c(0,0),limits = c(0,1.0), labels = scales::percent) +
scale_x_discrete(labels=x.names) +
theme_prism()
fig_bar
basically what I tried doing is just copy pasting the stat-*compare-*means section to each individual bar graph. however I keep getting an error code... not sure what is wrong as I am putting y=apop//nec//late in the aes.
Error in `ggsignif::geom_signif()`:
! Problem while computing stat.
i Error occurred in the 3rd layer.
Caused by error in `compute_layer()`:
! `stat_signif()` requires the following missing aesthetics: y
Backtrace:
答案1
得分: 1
事情变得更容易理解了tidy data的概念,其中包括将数据重塑为长格式。这样做,您不必针对每个列发出相同的指令,而是针对每个组(从初始列名称派生)执行一次。
示例:
- 前奏
library(dplyr)
library(tidyr) ## to reshape
library(ggplot2)
library(ggpubr)
library(ggprism)
factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
comparisons <- list(c(factored_condition[1],factored_condition[2]),
c(factored_condition[1],factored_condition[3]),
c(factored_condition[1],factored_condition[4])
)
the_data <- structure(list(condition = c("37_colo_control", "37_colo_control",
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control",
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc",
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2",
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209,
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076,
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564,
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405,
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289,
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA,
12L))
- 重塑和计算百分比:
the_data <-
the_data |>
pivot_longer(cols = nec:apop, names_to = 'parameter') |>
mutate(value_percent = prop.table(value))
> head(the_data, 4)
# A tibble: 6 x 5
condition rep parameter value value_percent
<chr> <chr> <chr> <dbl> <dbl>
1 37_colo_control rep1 nec 0.0209 0.00661
2 37_colo_control rep1 late 0.0334 0.0106
3 37_colo_control rep1 apop 0.0405 0.0128
4 37_colo_control rep2 nec 0.0013 0.000411
- 绘图:
ggplot(the_data, aes(x = condition, y = value_percent, group = parameter)) +
geom_col(aes(fill = parameter), alpha = .5) +
stat_compare_means(comparisons = comparisons,
paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = sapply(4:0, \(n) substr('****', 0, n))
),
step.increase = .5 ## increase vertical spacing between brackets
) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_fill_grey()
英文:
Things get easier with the concept of tidy data which in this case includes reshaping your data to long format. Doing so, you don't have to issue the same instruction for each and every column but instead do it once per each group (derived from the initial column names).
Example:
- prelude
library(dplyr)
library(tidyr) ## to reshape
library(ggplot2)
library(ggpubr)
library(ggprism)
factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
comparisons <- list(c(factored_condition[1],factored_condition[2]),
c(factored_condition[1],factored_condition[3]),
c(factored_condition[1],factored_condition[4])
)
the_data <- structure(list(condition = c("37_colo_control", "37_colo_control",
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control",
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc",
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2",
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209,
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076,
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564,
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405,
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289,
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA,
12L))
- reshape and calculate percentages:
the_data <-
the_data |>
pivot_longer(cols = nec:apop, names_to = 'parameter') |>
mutate(value_percent = prop.table(value))
> head(the_data, 4)
# A tibble: 6 x 5
condition rep parameter value value_percent
<chr> <chr> <chr> <dbl> <dbl>
1 37_colo_control rep1 nec 0.0209 0.00661
2 37_colo_control rep1 late 0.0334 0.0106
3 37_colo_control rep1 apop 0.0405 0.0128
4 37_colo_control rep2 nec 0.0013 0.000411
- plot:
ggplot(the_data, aes(x = condition, y = value_percent, group = parameter)) +
geom_col(aes(fill = parameter), alpha = .5) +
stat_compare_means(comparisons = comparisons,
paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = sapply(4:0, \(n) substr('****', 0, n))
),
step.increase = .5 ## increase vertical spacing between brackets
) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_fill_grey()
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论