英文:
apply/create formula to manipulate two dataframes
问题
我有两个在R中的数据框
df1
chr start end strand bam1 bam2 bam3 bam4 bam5 bam6 bam7 bam8
1 chr1 3531569 3531966 - 2 2 1 4 8 36 21 1
2 chr1 3670538 3672624 - 251 50 170 165 294 259 665 86
3 chr1 4491645 4493854 - 220 46 179 167 275 332 414 77
4 chr1 4496542 4497750 - 115 41 100 67 114 69 42 63
5 chr1 4571267 4572265 - 64 32 77 44 76 130 179 27
6 chr1 4688213 4688719 - 39 10 20 20 14 23 25 17
7 chr1 4688800 4688919 - 20 30 10 20 14 55 17 20
8 chr1 4688800 4688919 - 2 4 6 8 10 12 14 16
9 chr1 4688800 4688919 - 1 2 3 4 5 6 7 8
和
df2
bam_file r1 r2
1 bam1 2 1
2 bam2 9 3
3 bam3 1 4
4 bam4 1 5
5 bam5 1 1
6 bam6 8 6
7 bam7 3 7
8 bam8 3 2
我想应用以下公式(假设为X),使df2的列乘以df1的行
((df2[1,2]-df1[1,5])ˆ2 + (df2[2,2]-df1[1,6])ˆ2 + (df2[3,2]-df1[1,7])ˆ2 + (df2[4,2]-df1[1,8])ˆ2 + (df2[5,2]-df1[1,9])ˆ2 + (df2[6,2]-df1[1,10])ˆ2 + (df2[7,2]-df1[1,11])ˆ2 +
(df2[8,2]-df1[1,12])ˆ2)/(ncol(df1)-4)
所以期望的输出将是
output
r1 r2
1 152.375 144.75
2 89140.25 88467.875
3 57822.75 57413.125
4 6195.125 6148
5 8007.375 7858.75
6 395.75 372.625
7 508.75 543.125
8 60.75 47.125
9 15.5 6.875
如果您需要任何进一步的帮助,请随时提问。
英文:
I have two dataframes in R
df1
chr start end strand bam1 bam2 bam3 bam4 bam5 bam6 bam7 bam8
1 chr1 3531569 3531966 - 2 2 1 4 8 36 21 1
2 chr1 3670538 3672624 - 251 50 170 165 294 259 665 86
3 chr1 4491645 4493854 - 220 46 179 167 275 332 414 77
4 chr1 4496542 4497750 - 115 41 100 67 114 69 42 63
5 chr1 4571267 4572265 - 64 32 77 44 76 130 179 27
6 chr1 4688213 4688719 - 39 10 20 20 14 23 25 17
7 chr1 4688800 4688919 - 20 30 10 20 14 55 17 20
8 chr1 4688800 4688919 - 2 4 6 8 10 12 14 16
9 chr1 4688800 4688919 - 1 2 3 4 5 6 7 8
and
df2
bam_file r1 r2
1 bam1 2 1
2 bam2 9 3
3 bam3 1 4
4 bam4 1 5
5 bam5 1 1
6 bam6 8 6
7 bam7 3 7
8 bam8 3 2
I want to apply following formula (let say X), So that column of df2 X row of df1
((df2[1,2]-df1[1,5])ˆ2 + (df2[2,2]-df1[1,6])ˆ2 + (df2[3,2]-df1[1,7])ˆ2 + (df2[4,2]-df1[1,8])ˆ2 + (df2[5,2]-df1[1,9])ˆ2 + (df2[6,2]-df1[1,10])ˆ2 + (df2[7,2]-df1[1,11])ˆ2 +
(df2[8,2]-df1[1,12])ˆ2)/(ncol(df1)-4)
So the desired output will be
output
r1 r2
1 152.375 144.75
2 89140.25 88467.875
3 57822.75 57413.125
4 6195.125 6148
5 8007.375 7858.75
6 395.75 372.625
7 508.75 543.125
8 60.75 47.125
9 15.5 6.875
I apologize if this appears to be a repetitive question, but I tried and was unable to resolve it (as I am beginner and learning). It would be great to find a solution. Thank you in advance and looking for a positive response.
答案1
得分: 1
以下是您要翻译的内容:
We could create a sequence column ('rn'), reshape to 'long' format with pivot_longer
on the first data, join with the second data ('df2') and do a group by calculation on the 'r1', 'r2' columns in reframe
library(dplyr) # version >= 1.1.0
library(tidyr)
df1 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols= starts_with("bam"), names_to = "bam_file") %>%
left_join(df2) %>%
reframe(across(r1:r2, ~ sum((value - .x)^2)/n()), .by = "rn")
-output
# A tibble: 9 × 3
rn r1 r2
<int> <dbl> <dbl>
1 1 152. 145.
2 2 89140. 88468.
3 3 57823. 57413.
4 4 6195. 6148
5 5 8007. 7859.
6 6 396. 373.
7 7 509. 543.
8 8 60.8 47.1
9 9 15.5 6.88
data
df1 <- structure(list(chr = c("chr1", "chr1", "chr1", "chr1", "chr1",
"chr1", "chr1", "chr1", "chr1"), start = c(3531569L, 3670538L,
4491645L, 4496542L, 4571267L, 4688213L, 4688800L, 4688800L, 4688800L
), end = c(3531966L, 3672624L, 4493854L, 4497750L, 4572265L,
4688719L, 4688919L, 4688919L, 4688919L), strand = c("-", "-",
"-", "-", "-", "-", "-", "-", "-"), bam1 = c(2L, 251L, 220L,
115L, 64L, 39L, 20L, 2L, 1L), bam2 = c(2L, 50L, 46L, 41L, 32L,
10L, 30L, 4L, 2L), bam3 = c(1L, 170L, 179L, 100L, 77L, 20L, 10L,
6L, 3L), bam4 = c(4L, 165L, 167L, 67L, 44L, 20L, 20L, 8L, 4L),
bam5 = c(8L, 294L, 275L, 114L, 76L, 14L, 14L, 10L, 5L), bam6 = c(36L,
259L, 332L, 69L, 130L, 23L, 55L, 12L, 6L), bam7 = c(21L,
665L, 414L, 42L, 179L, 25L, 17L, 14L, 7L), bam8 = c(1L, 86L,
77L, 63L, 27L, 17L, 20L, 16L, 8L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9"))
df2 <- structure(list(bam_file = c("bam1", "bam2", "bam3", "bam4", "bam5",
"bam6", "bam7", "bam8"), r1 = c(2L, 9L, 1L, 1L, 1L, 8L, 3L, 3L
), r2 = c(1L, 3L, 4L, 5L, 1L, 6L, 7L, 2L)), class = "data.frame", row names = c("1",
"2", "3", "4", "5", "6", "7", "8"))
注意:我已将代码部分保留在英文状态,不做翻译。
英文:
We could create a sequence column ('rn'), reshape to 'long' format with pivot_longer
on the first data, join with the second data ('df2') and do a group by calculation on the 'r1', 'r2' columns in reframe
library(dplyr) # version >= 1.1.0
library(tidyr)
df1 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols= starts_with("bam"), names_to = "bam_file") %>%
left_join(df2) %>%
reframe(across(r1:r2, ~ sum((value - .x)^2)/n()), .by = "rn")
-output
# A tibble: 9 × 3
rn r1 r2
<int> <dbl> <dbl>
1 1 152. 145.
2 2 89140. 88468.
3 3 57823. 57413.
4 4 6195. 6148
5 5 8007. 7859.
6 6 396. 373.
7 7 509. 543.
8 8 60.8 47.1
9 9 15.5 6.88
data
df1 <- structure(list(chr = c("chr1", "chr1", "chr1", "chr1", "chr1",
"chr1", "chr1", "chr1", "chr1"), start = c(3531569L, 3670538L,
4491645L, 4496542L, 4571267L, 4688213L, 4688800L, 4688800L, 4688800L
), end = c(3531966L, 3672624L, 4493854L, 4497750L, 4572265L,
4688719L, 4688919L, 4688919L, 4688919L), strand = c("-", "-",
"-", "-", "-", "-", "-", "-", "-"), bam1 = c(2L, 251L, 220L,
115L, 64L, 39L, 20L, 2L, 1L), bam2 = c(2L, 50L, 46L, 41L, 32L,
10L, 30L, 4L, 2L), bam3 = c(1L, 170L, 179L, 100L, 77L, 20L, 10L,
6L, 3L), bam4 = c(4L, 165L, 167L, 67L, 44L, 20L, 20L, 8L, 4L),
bam5 = c(8L, 294L, 275L, 114L, 76L, 14L, 14L, 10L, 5L), bam6 = c(36L,
259L, 332L, 69L, 130L, 23L, 55L, 12L, 6L), bam7 = c(21L,
665L, 414L, 42L, 179L, 25L, 17L, 14L, 7L), bam8 = c(1L, 86L,
77L, 63L, 27L, 17L, 20L, 16L, 8L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9"))
df2 <- structure(list(bam_file = c("bam1", "bam2", "bam3", "bam4", "bam5",
"bam6", "bam7", "bam8"), r1 = c(2L, 9L, 1L, 1L, 1L, 8L, 3L, 3L
), r2 = c(1L, 3L, 4L, 5L, 1L, 6L, 7L, 2L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8"))
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论