Add text to ggplot (在MA图中添加不同ially表达基因的数量)

huangapple go评论73阅读模式
英文:

Add text to ggplot (add number of differentially expressed genes to MA plot)

问题

Sure, here's the translated code part without the code comments:

在MA图函数中,将显著表达的基因数量添加到图中。
MA图显示两组之间差异表达的基因,其中包括它们的折叠变化和平均表达。
我试图将不同ially表达的基因总数添加到图的右上角。而不是“硬编码”x和y坐标,更希望相对定位。

也许应该将数字添加到包含要绘制的数据的df中,而不是新的数据?任何建议都将不胜感激!

# 示例数据

set.seed(47)
gene_creator <- paste("gene", 1:100, sep="")
genes = sample(gene_creator, 8)

dex_A <- data.frame(
  gene = genes,
  group = "group_A",
  logFC = sample(c(-5:5), replace=T, size=8),
  adj.P.Val = sample(c(0.01, 1), replace=T, size=8),
  AveExpr = sample(c(30:36), replace=T, size=8)
)

dex_B <- data.frame(
  gene = genes,
  group = "group_B",
  logFC = sample(c(-5:5), replace=T, size=8),
  adj.P.Val = sample(c(0.01, 1), replace=T, size=8),
  AveExpr = sample(c(30:36), replace=T, size=8)
)

dex_df <- rbind(dex_A, dex_B)

# 解决方案(不起作用)

library('tidyverse')
library('ggrepel')

## 找到最小和最大的基因
minmax <- bind_rows(dex_df %>%
                      filter(adj.P.Val < 0.05) %>%
                      group_by(group) %>%
                      slice_max(logFC, n = 3) %>%
                      mutate(rank = "top5"),
                    dex_df %>%
                      filter(adj.P.Val < 0.05) %>%
                      group_by(group) %>%
                      slice_min(logFC, n = 3) %>%
                      mutate(rank = "min5"))

## 获取数字
numbers <- dex_df %>%
  group_by(group) %>%
  summarize(
    n_up = sum(adj.P.Val <= 0.05 & logFC > 0.5),
    n_down = -sum(adj.P.Val <= 0.05 & logFC < 0.5)
  ) %>%
  pivot_longer(-group, names_to = "direction", values_to = "n")

# 颜色显著性
colors_sig <- c("#999999", "#0072B2", "#D55E00")
names(colors_sig) <- c("NA", "Up", "Down")

# 制作MA图
make_plot_MA <- function(dex_df, minmax, numbers, comp){
  # 获取comp的最小和最大值
  MINmax <- minmax %>% filter(group == comp)
  # 不同ially的数字
  numb4text <- numbers %>% filter(group == comp)
  # 绘制
  dex_df %>%
    mutate(lab = ifelse(dex_df$gene %in% minmax$gene, as.character(dex_df$gene), NA)) %>%
    mutate(col = ifelse(dex_df$adj.P.Val > 0.05, "NS", ifelse(logFC > 0, 'Up', 'Down'))) %>%
    filter(group == comp) %>%
    ggplot(aes(x = AveExpr, y = logFC, label = lab, color = col)) +
    geom_point()+ 
    geom_hline(yintercept = 0) +
    xlab("Log2 mean expression") +
    ylab("Log2 fold change") +
    geom_label_repel(color = 'black', min.segment.length = 0) +
    geom_point(data = . %>% filter(gene %in% minmax$gene), color = "black") +
    ggtitle(comp) +
    scale_color_manual(values = colors_sig) +
    geom_text(data=numb4text, aes(x=34, y=6, label=n))
  
}

make_plot_MA(dex_df, MinMax, numbers, 'group_A')

I hope this helps! If you have any further questions or need additional assistance, please feel free to ask.

英文:

In a MA plot function, add the number of significantly expressed genes to the plot.
MA plots show differentially expressed genes between two groups with their fold-change and the average expression.
I am trying to add the total number of differentially expressed genes to the upper right side of the plot. Instead of "hard coding" the x and y coordinates, a reallative localisation would be prefered.

Maybe the numbers should be added to the df cotaing the data to be plotted instead of a new one? Any advise would me much appreciated!

Example data

set.seed(47)
gene_creator &lt;- paste(&quot;gene&quot;,1:100,sep=&quot;&quot;)
genes = sample(gene_creator,8)
dex_A &lt;- data.frame(
gene = genes,
group = &quot;group_A&quot;,
logFC = sample(c(-5:5), replace=T, size=8),
adj.P.Val = sample(c(0.01,1), replace=T, size=8),
AveExpr = sample(c(30:36), replace=T, size=8)
)
dex_B &lt;- data.frame(
gene = genes,
group = &quot;group_B&quot;,
logFC = sample(c(-5:5), replace=T, size=8),
adj.P.Val = sample(c(0.01,1), replace=T, size=8),
AveExpr = sample(c(30:36), replace=T, size=8)
)
dex_df &lt;- rbind(dex_A, dex_B)

Solution (not working)

library(&#39;tidyverse&#39;)
library(&#39;ggrepel&#39;)
## find min max genes
minmax &lt;- bind_rows(dex_df %&gt;%
filter(adj.P.Val &lt; 0.05) %&gt;%
group_by(group) %&gt;%
slice_max(logFC, n = 3) %&gt;%
mutate(rank = &quot;top5&quot;),
dex_df %&gt;%
filter(adj.P.Val &lt; 0.05) %&gt;%
group_by(group) %&gt;%
slice_min(logFC, n = 3) %&gt;%
mutate(rank = &quot;min5&quot;))
## get numbers
numbers &lt;- dex_df %&gt;%
group_by(group) %&gt;%
summarize(
n_up = sum(adj.P.Val &lt;= 0.05 &amp; logFC &gt; 0.5),
n_down = -sum(adj.P.Val &lt;= 0.05 &amp; logFC &lt; 0.5)
) %&gt;%
pivot_longer(-group, names_to = &quot;direction&quot;, values_to = &quot;n&quot;)
# colors significance
colors_sig &lt;- c(&quot;#999999&quot;, &quot;#0072B2&quot;, &quot;#D55E00&quot;)
names(colors_sig) &lt;- c(&quot;NA&quot;, &quot;Up&quot;, &quot;Down&quot;)
# make MA plot
make_plot_MA &lt;- function(dex_df, minmax, numbers, comp){
# get Min Max for comp
MINmax &lt;- minmax %&gt;% filter(group == comp)
# numbers of difEx
numb4text &lt;- numbers %&gt;% filter(group == comp)
# plot
dex_df %&gt;%
mutate(lab = ifelse(dex_df$gene %in% minmax$gene, as.character(dex_df$gene), NA)) %&gt;%
mutate(col =  ifelse(dex_df$adj.P.Val &gt; 0.05, &quot;NS&quot;, ifelse(logFC &gt; 0, &#39;Up&#39;, &#39;Down&#39;))) %&gt;%
filter(group == comp) %&gt;%
#filter(adj.P.Val &lt; 0.05) %&gt;%
ggplot(aes(x = AveExpr, y = logFC, label = lab, color = col)) +
geom_point()+ 
geom_hline(yintercept = 0) +
xlab (&quot;Log2 mean expression&quot;) +
ylab (&quot;Log2 fold change&quot;) +
geom_label_repel(color = &#39;black&#39;, min.segment.length = 0) +
geom_point(data = . %&gt;% filter(gene %in% minmax$gene), color = &quot;black&quot;) +
ggtitle(comp) +
scale_color_manual(values = colors_sig) +
geom_text(data=numb4text, aes(x=34, y=6, label=n))
}
make_plot_MA(dex_df, MinMax, numbers, &#39;group_A&#39;)

答案1

得分: 1

Here is the translated code:

make_plot_MA <- function(dex_df, minmax, numbers, comp){
  # 获取comp的最小值和最大值
  MINmax <- minmax %>% filter(group == comp)
  # difEx的数字
  numb4text <- numbers %>% filter(group == comp)
  label.up=paste("up:", numb4text$n[1])
  label.down=paste("down:", numb4text$n[2])
  # 画图
  dex_df %>%
    mutate(lab = ifelse(dex_df$gene %in% minmax$gene, as.character(dex_df$gene), NA)) %>%
    mutate(col =  ifelse(dex_df$adj.P.Val > 0.05, "NS", ifelse(logFC > 0, 'Up', 'Down'))) %>%
    filter(group == comp) %>%
    ggplot(aes(x = AveExpr, y = logFC, label = lab, color = col)) +
    geom_point()+ 
    geom_hline(yintercept = 0) +
    xlab ("Log2 mean expression") +
    ylab ("Log2 fold change") +
    geom_label_repel(color = 'black', min.segment.length = 0) +
    geom_point(data = . %>% filter(gene %in% minmax$gene), color = "black") +
    ggtitle(comp) +
    scale_color_manual(values = colors_sig) +
    geom_text(inherit.aes = F,aes(x=max(dex_df$AveExpr), y=max(dex_df$logFC), label=label.up))+
    geom_text(inherit.aes = F,aes(x=max(dex_df$AveExpr), y=max(dex_df$logFC)-1, label=label.down))
}

make_plot_MA(dex_df, minmax, numbers, 'group_A')

Add text to ggplot (在MA图中添加不同ially表达基因的数量)


I've translated the code for you.
<details>
<summary>英文:</summary>

make_plot_MA <- function(dex_df, minmax, numbers, comp){

get Min Max for comp

MINmax <- minmax %>% filter(group == comp)

numbers of difEx

numb4text <- numbers %>% filter(group == comp)
label.up=paste("up:", numb4text$n1)
label.down=paste("down:", numb4text$n[2])

plot

dex_df %>%
mutate(lab = ifelse(dex_df$gene %in% minmax$gene, as.character(dex_df$gene), NA)) %>%
mutate(col = ifelse(dex_df$adj.P.Val > 0.05, "NS", ifelse(logFC > 0, 'Up', 'Down'))) %>%
filter(group == comp) %>%
#filter(adj.P.Val < 0.05) %>%
ggplot(aes(x = AveExpr, y = logFC, label = lab, color = col)) +
geom_point()+
geom_hline(yintercept = 0) +
xlab ("Log2 mean expression") +
ylab ("Log2 fold change") +
geom_label_repel(color = 'black', min.segment.length = 0) +
geom_point(data = . %>% filter(gene %in% minmax$gene), color = "black") +
ggtitle(comp) +
scale_color_manual(values = colors_sig) +
geom_text(inherit.aes = F,aes(x=max(dex_df$AveExpr), y=max(dex_df$logFC), label=label.up))+
geom_text(inherit.aes = F,aes(x=max(dex_df$AveExpr), y=max(dex_df$logFC)-1, label=label.down))

}

make_plot_MA(dex_df, minmax, numbers, 'group_A')


[![plot with labels][1]][1]
[1]: https://i.stack.imgur.com/2AsiY.png
</details>

huangapple
  • 本文由 发表于 2023年4月17日 19:39:03
  • 转载请务必保留本文链接:https://go.coder-hub.com/76034768.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定