2023年5月22日 00:37:39go评论100阅读模式

英文:

error while using rbind function in R: add the same rows multiple times

问题

I have a dataframe in R:

dput(trans_eqtl[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086", 
"ENSG00000265688", "ENSG00000265688", "ENSG00000265688"), `Gene-Chr` = c(11, 
11, 17, 17, 17), `Gene-Pos` = c(133980358, 133980358, 79887167, 
79887167, 79887167), RsId = c("rs3811444", "rs35340377", "rs705705", 
"rs1131017", "rs10876864"), `SNP-Chr` = c(1, 1, 12, 12, 12), 
    `SNP-Pos` = c(248039451, 248038210, 56435504, 56435929, 56401085
    )), row.names = c(NA, 5L), class = "data.frame")

我想检查 gene-chr 和 snp-chr 是否位于同一染色体或不同染色体上。
我编写了一段代码，它会遍历每一行并检查这两个条件。在循环过程中使用了 rbind 函数，将满足这两个条件的行添加到单独的 intra 和 inter 变量中。
但在我的情况下，rbind 函数不断将相同的行添加到最终变量中。
这是参考的代码：

## 检查 intra 和 inter 染色体：
## intra:
inter <- NULL
intra <- NULL
for(i in 1:240){
  if(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i])){
    x <- which(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
    value <- trans_eqtl[x,]
  }
  inter <- rbind(inter,value)
}
## 检查 intra:
for(i in 1:240){
  if(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]){
    y <- which((trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
    value1 <- trans_eqtl[y,]
  }
  intra <- rbind(intra,value1)
}

我的输出变量看起来是这样的：

dput(intra[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086", 
"ENSG00000166086", "ENSG00000166086", "ENSG00000166086"), `Gene-Chr` = c(11, 
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358, 
133980358, 133980358), RsId = c("rs3811444", "rs3811444", "rs3811444", 
"rs3811444", "rs3811444"), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451, 
248039451, 248039451, 248039451, 248039451)), row.names = c(NA, 
5L), class = "data.frame")
dput(inter[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086", 
"ENSG00000166086", "ENSG00000166086", "ENSG00000166086"), `Gene-Chr` = c(11, 
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358, 
133980358, 133980358), RsId = c("rs3811444", "rs3811444", "rs3811444", 
"rs3811444", "rs3811444"), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451, 
248039451, 248039451, 248039451, 248039451)), row.names = c(NA, 
5L), class = "data.frame")

根据我的观察，rbind 函数应该在循环内部，以捕获满足条件的所有 i 的值，并不断添加到最终变量中。
有人知道如何解决这个问题吗？
谢谢。

英文:

I have a dataframe in R:

dput(trans_eqtl[1:5,1:6])
structure(list(Gene = c(&quot;ENSG00000166086&quot;, &quot;ENSG00000166086&quot;, 
&quot;ENSG00000265688&quot;, &quot;ENSG00000265688&quot;, &quot;ENSG00000265688&quot;), `Gene-Chr` = c(11, 
11, 17, 17, 17), `Gene-Pos` = c(133980358, 133980358, 79887167, 
79887167, 79887167), RsId = c(&quot;rs3811444&quot;, &quot;rs35340377&quot;, &quot;rs705705&quot;, 
&quot;rs1131017&quot;, &quot;rs10876864&quot;), `SNP-Chr` = c(1, 1, 12, 12, 12), 
    `SNP-Pos` = c(248039451, 248038210, 56435504, 56435929, 56401085
    )), row.names = c(NA, 5L), class = &quot;data.frame&quot;)

I want to check whether gene-chr and snp-chr are present in same chromosome or different chromosomes.
I wrote a code which goes through each row and check for these two condition.
and used rbind function during the loop to add the rows which comes under these two conditions into separate intra and inter variable.
But in my case: rbind function keeps on adding the same rows to the final variable.
This is the code for reference:

##check for intra and inter chrom:
##intra:
inter &lt;- NULL
intra &lt;- NULL
for(i in 1:240){
  if(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i])){
    x &lt;- which(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
    value &lt;- trans_eqtl[x,]
  }
  inter &lt;- rbind(inter,value)
}
##check for intra:
for(i in 1:240){
  if(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]){
    y &lt;- which((trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
    value1 &lt;- trans_eqtl[y,]
  }
  intra &lt;- rbind(intra,value1)
}

My output variable looks like this:

dput(intra[1:5,1:6])
structure(list(Gene = c(&quot;ENSG00000166086&quot;, &quot;ENSG00000166086&quot;, 
&quot;ENSG00000166086&quot;, &quot;ENSG00000166086&quot;, &quot;ENSG00000166086&quot;), `Gene-Chr` = c(11, 
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358, 
133980358, 133980358), RsId = c(&quot;rs3811444&quot;, &quot;rs3811444&quot;, &quot;rs3811444&quot;, 
&quot;rs3811444&quot;, &quot;rs3811444&quot;), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451, 
248039451, 248039451, 248039451, 248039451)), row.names = c(NA, 
5L), class = &quot;data.frame&quot;)
dput(inter[1:5,1:6])
structure(list(Gene = c(&quot;ENSG00000166086&quot;, &quot;ENSG00000166086&quot;, 
&quot;ENSG00000166086&quot;, &quot;ENSG00000166086&quot;, &quot;ENSG00000166086&quot;), `Gene-Chr` = c(11, 
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358, 
133980358, 133980358), RsId = c(&quot;rs3811444&quot;, &quot;rs3811444&quot;, &quot;rs3811444&quot;, 
&quot;rs3811444&quot;, &quot;rs3811444&quot;), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451, 
248039451, 248039451, 248039451, 248039451)), row.names = c(NA, 
5L), class = &quot;data.frame&quot;)

According to me: rbind function should be inside the loop to capture all the i's value matching the condition and keep on adding on the final variable.
Does anyone know how to solve this issue?
Thank you.

答案1

得分: 1

你可以尝试使用dplyr方法：

library(dplyr)
inter <- trans_eqtl %>%
  filter(`Gene-Chr` != `SNP-Chr`)
intra <- trans_eqtl %>%
  filter(`Gene-Chr` == `SNP-Chr`)

输出：

#inter: 
             Gene Gene-Chr  Gene-Pos       RsId SNP-Chr   SNP-Pos
1 ENSG00000166086       11 133980358  rs3811444       1 248039451
2 ENSG00000166086       11 133980358 rs35340377       1 248038210
3 ENSG00000265688       17  79887167   rs705705      12  56435504
4 ENSG00000265688       17  79887167  rs1131017      12  56435929
5 ENSG00000265688       17  79887167 rs10876864      12  56401085
# intra:
[1] Gene     Gene-Chr Gene-Pos RsId     SNP-Chr  SNP-Pos 
&lt;0 Zeilen&gt; (oder row.names mit L&#228;nge 0)

英文:

You could try a dplyr approach:

library(dplyr)
inter &lt;- trans_eqtl %&gt;%
  filter(`Gene-Chr` != `SNP-Chr`)
intra &lt;- trans_eqtl %&gt;%
  filter(`Gene-Chr` == `SNP-Chr`)

output:

#inter: 
             Gene Gene-Chr  Gene-Pos       RsId SNP-Chr   SNP-Pos
1 ENSG00000166086       11 133980358  rs3811444       1 248039451
2 ENSG00000166086       11 133980358 rs35340377       1 248038210
3 ENSG00000265688       17  79887167   rs705705      12  56435504
4 ENSG00000265688       17  79887167  rs1131017      12  56435929
5 ENSG00000265688       17  79887167 rs10876864      12  56401085
# intra:
[1] Gene     Gene-Chr Gene-Pos RsId     SNP-Chr  SNP-Pos 
&lt;0 Zeilen&gt; (oder row.names mit L&#228;nge 0)

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

error while using rbind function in R: add the same rows multiple times

问题

答案1

R Shiny – 仅在选择特定输入筛选器时显示下载按钮

自定义Tmap调色板基于结果

_targets.R函数能够从targets列表中读取对象吗？

制作一个循环来计算R中站点之间的距离。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。