英文:
adding standard errors to correct panels on faceted bar chart
问题
我已翻译代码部分,以下是翻译好的内容:
我有这个数据框:
这是一个数据框,其中每一行都是一个类别,列与时间序列(从T0到T5)相对应。
我想为每个类别(taxon)的时间(T0-T5)制作一个条形图:
melted_df <- reshape2::melt(taxonFC1, id.vars = "taxon", variable.name = "timepoint", value.name = "value")
ggplot(melted_df, aes(x = timepoint, y = value, fill = taxon)) +
geom_bar(stat = "identity") +
facet_wrap(~ taxon, ncol = 3) +
labs(title = "不同时间序列的条形图",
x = "时间点",
y = "数值",
fill = "类别")
问题是是否可以为每个时间序列分配标准误差(se列)到其对数FC值(lfc列)。
更新:
我已经为T0做到了:
ggplot(data = taxonFC1, aes(x = taxon, y = lfc_StatusChronic.ACST0., fill = taxon)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("类别") +
ylab("lfc_StatusChronic.ACST0.") +
ggtitle("按类别绘制的lfc_StatusChronic.ACST0.条形图") +
# 使用se_StatusChronic.ACST0.列添加误差线
geom_errorbar(aes(ymin = lfc_StatusChronic.ACST0. - se_StatusChronic.ACST0.,
ymax = lfc_StatusChronic.ACST0. + se_StatusChronic.ACST0.),
width = 0.4)
希望这可以帮助你绘制所需的图表。
英文:
I have this dataframe:
structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
It is a dataframe where each row is a category, and the columns correspond with a time series (from T0 til T5).
I want to do a bar chart for each category (taxon) for their time (T0-T5):
melted_df <- reshape2::melt(taxonFC1, id.vars = "taxon", variable.name = "timepoint", value.name = "value")
ggplot(melted_df, aes(x = timepoint, y = value, fill = taxon)) +
geom_bar(stat = "identity") +
facet_wrap(~ taxon, ncol = 3) +
labs(title = "Bar Chart for Different Time Series",
x = "Time Point",
y = "Value",
fill = "Category")
The question is if it is possible to assign the standard error (se columns) to their logFC value (lfc columns) for each time series.
Update:
I did this, but only for T0:
ggplot(data = taxonFC1, aes(x = taxon, y = lfc_StatusChronic.ACST0., fill = taxon)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category") +
# Add error bars using se_StatusChronic.ACST0. column
geom_errorbar(aes(ymin = lfc_StatusChronic.ACST0. - se_StatusChronic.ACST0.,
ymax = lfc_StatusChronic.ACST0. + se_StatusChronic.ACST0.),
width = 0.4)
Output expected (the image is from other data):
答案1
得分: 1
这是你要找的吗?
library(dplyr)
library(tidyr)
library(ggplot2)
dat <- structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.192778560, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.315448360, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.368867350, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
dat %>%
rename(lfc_time.fT0 = lfc_StatusChronic.ACST0.,
se_Time.fT0 = se_StatusChronic.ACST0.) %>%
pivot_longer(-taxon, names_pattern="(.*)_[Tt]ime\\.f(.*)",
names_to = c(".value", "time")) %>%
ggplot(aes(x = time, y = lfc, ymin = lfc - se, ymax = lfc + se, fill = taxon)) +
geom_bar(stat = "identity") +
geom_errorbar(width=.4) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_brewer(palette="Set1") +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
facet_wrap(~taxon, ncol=1) +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category")
如果是的话,关键是将T0
变量重命名为与其他时间段变量具有相同格式,然后使用pivot_longer()
将所有lfc
测量放入单个列中,所有se
测量放入单个列中。其余的操作是通过在时间变量上进行分面处理来完成的。 pivot_longer()
文档中有一些保留多列的良好示例,特别是页面上的最后一个示例。
英文:
Is this what you're looking for?
library(dplyr)
library(tidyr)
library(ggplot2)
dat <- structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
dat %>%
rename(lfc_time.fT0 = lfc_StatusChronic.ACST0.,
se_Time.fT0 = se_StatusChronic.ACST0.) %>%
pivot_longer(-taxon, names_pattern="(.*)_[Tt]ime\\.f(.*)",
names_to = c(".value", "time")) %>%
ggplot(aes(x = time, y = lfc, ymin = lfc - se, ymax = lfc + se, fill = taxon)) +
geom_bar(stat = "identity") +
geom_errorbar(width=.4) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_brewer(palette="Set1") +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
facet_wrap(~taxon, ncol=1) +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category")```
If so, the key is to rename the T0
variables to have the same format as the other time-period variables and then use pivot_longer()
to put all the lfc
measures in a single column and all the se
measures in a single column. The rest is accomplished with faceting on the time variable. The pivot_longer()
documentation has some good examples of retaining multiple columns, see in particular the last example on the page.
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论