英文:
Summarize number of significantly up and down-regulated genes per group
问题
在不翻译代码的情况下,以下是您要求翻译的内容的摘要:
"在差异表达值的数据框中,计算每个组中显著上调和下调的基因数。显著性由FDR(Benjamini校正的调整后p值)和折叠变化定义。结果应为每个组的上调和下调的图表。(额外提示:在图表中显示不同的Fc水平,例如0.5、1、2、4和>4)。
我的解决方案似乎过于复杂,必定有更简单的方法。
示例数据
创建dex df
gene_creator <- paste("gene", 1:1000, sep="")
genes = sample(gene_creator, 100)
dex_A <- data.frame(
gene = genes,
group = "group_A",
logFC = sample(c(-5:5), replace=T, size=100),
FDR = sample(c(0.01, 1), replace=T, size=100)
)
dex_B <- data.frame(
gene = genes,
group = "group_B",
logFC = sample(c(-5:5), replace=T, size=100),
FDR = sample(c(0.01, 1), replace=T, size=100)
)
dex_C <- data.frame(
gene = genes,
group = "group_C",
logFC = sample(c(-5:5), replace=T, size=100),
FDR = sample(c(0.01, 1), replace=T, size=100)
)
dex_D <- data.frame(
gene = genes,
group = "group_D",
logFC = sample(c(-5:5), replace=T, size=100),
FDR = sample(c(0.01, 1), replace=T, size=100)
)
dex_df <- rbind(dex_A, dex_B, dex_C, dex_D)
解决方案
library("tidyverse")
FC上调
dex_up <- dex_df %>%
group_by(group) %>%
filter(FDR <= 0.05) %>%
filter(logFC > 0.5 ) %>%
summarise(n_up = n())
FC下调
dex_down <- dex_df %>%
group_by(group) %>%
filter(FDR <= 0.05) %>%
filter(logFC < 0.5 ) %>%
summarise(n_down = n())
格式化
dex_comb <- left_join(dex_up, dex_down, by = "group")
dex_comb$n_down <- dex_comb$n_down * -1
dex_comb_long <- dex_comb %>% pivot_longer(!group, names_to = "direction", values_to = "n")
绘制图表
dex_comb_long %>%
ggplot(aes(x = group, y = n, fill = direction)) +
geom_bar(stat="identity", position="identity") +
geom_text(aes(label=n, vjust = -sign(n))) +
ggtitle("每组Dex基因数量")
英文:
In a data.frame of differential expression values, count the genes per group that are significantly up and down-regulated. Significance shall be defined by FDR (false discovery rate = adjusted p-value from Benjamini) and fold-change. Results should be a plot with up and down regs per group.
(Sweet bonus: show in the plot the different Fc levels (eg: 0.5, 1, 2, 4, >4).
My solution seems way too complicated, there must be an easier way.
Example data
# create dex df
gene_creator <- paste("gene",1:1000,sep="")
genes = sample(gene_creator,100)
dex_A <- data.frame(
gene = genes,
group = "group_A",
logFC = sample(c(-5:5), replace=T, size=100),
FDR = sample(c(0.01,1), replace=T, size=100)
)
dex_B <- data.frame(
gene = genes,
group = "group_B",
logFC = sample(c(-5:5), replace=T, size=100),
FDR = sample(c(0.01,1), replace=T, size=100)
)
dex_C <- data.frame(
gene = genes,
group = "group_C",
logFC = sample(c(-5:5), replace=T, size=100),
FDR = sample(c(0.01,1), replace=T, size=100)
)
dex_D <- data.frame(
gene = genes,
group = "group_D",
logFC = sample(c(-5:5), replace=T, size=100),
FDR = sample(c(0.01,1), replace=T, size=100)
)
dex_df <- rbind(dex_A, dex_B, dex_C, dex_D)
Solution
library("tidyverse")
# FC up
dex_up <- dex_df %>%
group_by(group) %>%
filter(FDR <= 0.05) %>%
filter(logFC > 0.5 ) %>%
summarise(n_up = n())
# Fc down
dex_down <- dex_df %>%
group_by(group) %>%
filter(FDR <= 0.05) %>%
filter(logFC < 0.5 ) %>%
summarise(n_down = n())
# format
dex_comb <- left_join(dex_up, dex_down, by = c("group"))
dex_comb$n_down <- dex_comb$n_down * -1
dex_comb_long <- dex_comb %>% pivot_longer(!group, names_to = "direction", values_to = "n")
# plot
dex_comb_long %>%
ggplot(aes(x = group, y = n, fill = direction)) +
geom_bar(stat="identity", position="identity") +
geom_text(aes(label=n, vjust = -sign(n))) +
ggtitle("Dex numbers per group")
答案1
得分: 1
以下是代码中需要翻译的部分:
-
"The usual way to count the number of times a condition is met is to
sum()
that condition:" 可以翻译为 "通常计算条件满足的次数的方法是使用sum()
函数:" -
"Illustrated with this simplified sample data:" 可以翻译为 "通过这个简化的示例数据进行说明:"
-
"Here's my suggestion for including
logFC
values in the plot:" 可以翻译为 "这是我关于在图表中包含logFC
值的建议:"
希望这些翻译能帮助您理解代码的内容。
英文:
The usual way to count the number of times a condition is met is to sum()
that condition:
dex_summary = dex_df %>%
group_by(group) %>%
summarize(
n_up = sum(FDR <= 0.05 & logFC > 0.5),
n_down = -sum(FDR <= 0.05 & logFC < 0.5)
) %>%
pivot_longer(-group, names_to = "direction", values_to = "n")
# plot
dex_summary %>%
ggplot(aes(x = group, y = n, fill = direction)) +
## using geom_col() instead of geom_bar(stat = "identity")
geom_col() +
geom_text(aes(label=n, vjust = -sign(n))) +
## adding a little padding to the y scale for the numbers
scale_y_continuous(expand = expansion(add = 0.5)) +
ggtitle("Dex numbers per group")
Illustrated with this simplified sample data:
set.seed(47)
gene_creator <- paste("gene",1:100,sep="")
genes = sample(gene_creator,8)
dex_A <- data.frame(
gene = genes,
group = "group_A",
logFC = sample(c(-5:5), replace=T, size=8),
FDR = sample(c(0.01,1), replace=T, size=8)
)
dex_B <- data.frame(
gene = genes,
group = "group_B",
logFC = sample(c(-5:5), replace=T, size=8),
FDR = sample(c(0.01,1), replace=T, size=8)
)
dex_df <- rbind(dex_A, dex_B)
Here's my suggestion for including logFC
values in the plot:
## re-ran sample data with 20 samples per group
dex_df %>%
filter(FDR <= 0.05 & abs(logFC) > 0.5) %>%
count(group, logFC) %>%
mutate(
direction = sign(logFC),
n_dir = n * sign(direction)
) %>%
ggplot(aes(x = factor(logFC), y = n_dir, fill = factor(direction))) +
geom_col() +
facet_wrap(~group)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论