英文:
error while using rbind function in R: add the same rows multiple times
问题
I have a dataframe in R:
dput(trans_eqtl[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000265688", "ENSG00000265688", "ENSG00000265688"), `Gene-Chr` = c(11,
11, 17, 17, 17), `Gene-Pos` = c(133980358, 133980358, 79887167,
79887167, 79887167), RsId = c("rs3811444", "rs35340377", "rs705705",
"rs1131017", "rs10876864"), `SNP-Chr` = c(1, 1, 12, 12, 12),
`SNP-Pos` = c(248039451, 248038210, 56435504, 56435929, 56401085
)), row.names = c(NA, 5L), class = "data.frame")
我想检查 gene-chr 和 snp-chr 是否位于同一染色体或不同染色体上。
我编写了一段代码,它会遍历每一行并检查这两个条件。在循环过程中使用了 rbind 函数,将满足这两个条件的行添加到单独的 intra 和 inter 变量中。
但在我的情况下,rbind 函数不断将相同的行添加到最终变量中。
这是参考的代码:
## 检查 intra 和 inter 染色体:
## intra:
inter <- NULL
intra <- NULL
for(i in 1:240){
if(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i])){
x <- which(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
value <- trans_eqtl[x,]
}
inter <- rbind(inter,value)
}
## 检查 intra:
for(i in 1:240){
if(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]){
y <- which((trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
value1 <- trans_eqtl[y,]
}
intra <- rbind(intra,value1)
}
我的输出变量看起来是这样的:
dput(intra[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000166086", "ENSG00000166086", "ENSG00000166086"), `Gene-Chr` = c(11,
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358,
133980358, 133980358), RsId = c("rs3811444", "rs3811444", "rs3811444",
"rs3811444", "rs3811444"), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451,
248039451, 248039451, 248039451, 248039451)), row.names = c(NA,
5L), class = "data.frame")
dput(inter[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000166086", "ENSG00000166086", "ENSG00000166086"), `Gene-Chr` = c(11,
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358,
133980358, 133980358), RsId = c("rs3811444", "rs3811444", "rs3811444",
"rs3811444", "rs3811444"), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451,
248039451, 248039451, 248039451, 248039451)), row.names = c(NA,
5L), class = "data.frame")
根据我的观察,rbind 函数应该在循环内部,以捕获满足条件的所有 i 的值,并不断添加到最终变量中。
有人知道如何解决这个问题吗?
谢谢。
英文:
I have a dataframe in R:
dput(trans_eqtl[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000265688", "ENSG00000265688", "ENSG00000265688"), `Gene-Chr` = c(11,
11, 17, 17, 17), `Gene-Pos` = c(133980358, 133980358, 79887167,
79887167, 79887167), RsId = c("rs3811444", "rs35340377", "rs705705",
"rs1131017", "rs10876864"), `SNP-Chr` = c(1, 1, 12, 12, 12),
`SNP-Pos` = c(248039451, 248038210, 56435504, 56435929, 56401085
)), row.names = c(NA, 5L), class = "data.frame")
I want to check whether gene-chr and snp-chr are present in same chromosome or different chromosomes.
I wrote a code which goes through each row and check for these two condition.
and used rbind function during the loop to add the rows which comes under these two conditions into separate intra and inter variable.
But in my case: rbind function keeps on adding the same rows to the final variable.
This is the code for reference:
##check for intra and inter chrom:
##intra:
inter <- NULL
intra <- NULL
for(i in 1:240){
if(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i])){
x <- which(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
value <- trans_eqtl[x,]
}
inter <- rbind(inter,value)
}
##check for intra:
for(i in 1:240){
if(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]){
y <- which((trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
value1 <- trans_eqtl[y,]
}
intra <- rbind(intra,value1)
}
My output variable looks like this:
dput(intra[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000166086", "ENSG00000166086", "ENSG00000166086"), `Gene-Chr` = c(11,
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358,
133980358, 133980358), RsId = c("rs3811444", "rs3811444", "rs3811444",
"rs3811444", "rs3811444"), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451,
248039451, 248039451, 248039451, 248039451)), row.names = c(NA,
5L), class = "data.frame")
dput(inter[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000166086", "ENSG00000166086", "ENSG00000166086"), `Gene-Chr` = c(11,
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358,
133980358, 133980358), RsId = c("rs3811444", "rs3811444", "rs3811444",
"rs3811444", "rs3811444"), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451,
248039451, 248039451, 248039451, 248039451)), row.names = c(NA,
5L), class = "data.frame")
According to me: rbind function should be inside the loop to capture all the i's value matching the condition and keep on adding on the final variable.
Does anyone know how to solve this issue?
Thank you.
答案1
得分: 1
你可以尝试使用dplyr
方法:
library(dplyr)
inter <- trans_eqtl %>%
filter(`Gene-Chr` != `SNP-Chr`)
intra <- trans_eqtl %>%
filter(`Gene-Chr` == `SNP-Chr`)
输出:
#inter:
Gene Gene-Chr Gene-Pos RsId SNP-Chr SNP-Pos
1 ENSG00000166086 11 133980358 rs3811444 1 248039451
2 ENSG00000166086 11 133980358 rs35340377 1 248038210
3 ENSG00000265688 17 79887167 rs705705 12 56435504
4 ENSG00000265688 17 79887167 rs1131017 12 56435929
5 ENSG00000265688 17 79887167 rs10876864 12 56401085
# intra:
[1] Gene Gene-Chr Gene-Pos RsId SNP-Chr SNP-Pos
<0 Zeilen> (oder row.names mit Länge 0)
英文:
You could try a dplyr
approach:
library(dplyr)
inter <- trans_eqtl %>%
filter(`Gene-Chr` != `SNP-Chr`)
intra <- trans_eqtl %>%
filter(`Gene-Chr` == `SNP-Chr`)
output:
#inter:
Gene Gene-Chr Gene-Pos RsId SNP-Chr SNP-Pos
1 ENSG00000166086 11 133980358 rs3811444 1 248039451
2 ENSG00000166086 11 133980358 rs35340377 1 248038210
3 ENSG00000265688 17 79887167 rs705705 12 56435504
4 ENSG00000265688 17 79887167 rs1131017 12 56435929
5 ENSG00000265688 17 79887167 rs10876864 12 56401085
# intra:
[1] Gene Gene-Chr Gene-Pos RsId SNP-Chr SNP-Pos
<0 Zeilen> (oder row.names mit Länge 0)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论