英文:
map over a nested list in mutate and extract specific list elements
问题
我有一些看起来像这样的数据:
price unleveragedData
<dbl> <list>
1 450000 <list [5]>
2 400000 <list [5]>
3 400000 <list [5]>
4 397000 <list [5]>
5 750000 <list [5]>
6 550000 <list [5]>
我试图将列表的第 `5` 个元素放入新列中,即:
price unleveragedData element5
<dbl> <list>
1 450000 <list [5]> -----
2 400000 <list [5]> -----
3 400000 <list [5]>
4 397000 <list [5]>
5 750000 <list [5]>
6 550000 <list [5]> -----
使用以下代码:
df$unleveragedData %>%
map(., ~ pluck(., c(5)))
我可以得到我想要的输出:
[[1]]
[1] 260551.4
[[2]]
[1] 330786.9
[[3]]
[1] 330786.9
[[4]]
[1] 287739.3
[[5]]
[1] 566416
[[6]]
[1] 271879.7
然而,在 `mutate` 函数内部,我无法让它正常工作。
df %>%
mutate(
element5 = map(unleveragedData, ~ map_dbl(., pluck(., c(5))))
)
数据:
df = structure(list(price = c(450000, 400000, 400000, 397000, 750000,
550000), unleveragedData = list(list(-0.0547083151944441, c(-450000,
15533.28, 16475.2128, 17473.760928, 18532.32229728, 280205.849576444
), "450000-0.08", structure(list(` ` = c("Revenue", "Vacancy",
"Gross Revenue", "Operating Expenses", "Net Operating Income"), Year1 = c(16560, 828, 15732, 199, 15533), Year2 = c(17554,
878, 16676, 201, 16475), Year3 = c(18607, 930, 17676, 203, 17474
), Year4 = c(19723, 986, 18737, 205, 18532), Year5 = c(20907,
1045, 19861, 207, 19654), Year6 = c(22161, 1108, 21053, 209,
20844), purchasePriceCapRate = c("450000-0.08", "450000-0.08",
"450000-0.08", "450000-0.08", "450000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 260551.350870592), list(0.0224165566243759,
c(-400000, 19720.512, 20916.35712, 22184.0790912, 23527.991786112,
355739.600331834), "400000-0.08", structure(list(` ` = c("Revenue",
"Vacancy", "Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(21024, 1051, 19973, 252, 19721), Year2 = c(22285,
1114, 21171, 255, 20916), Year3 = c(23623, 1181, 22441, 257,
22184), Year4 = c(25040, 1252, 23788, 260, 23528), Year5 = c(26542,
1327, 25215, 263, 24953), Year6 = c(28135, 1407, 26728, 265,
26463), purchasePriceCapRate = c("400000-0.08", "400000-0.08",
"400000-0.08", "400000-0.08", "400000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 330786.932409621), list(0.0224165566243759,
c(-400000, 19720.512, 20916.35712, 22184.0790912, 23527.991786112,
355739.600331834), "400000-0.08", structure(list(` ` = c("Revenue",
"Vacancy", "Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(21024, 1051, 19973, 252, 19721), Year2 = c(22285,
1114, 21171, 255, 20916), Year3 = c(23623, 1181, 22441, 257,
22184), Year4 = c(25040, 1252, 23788, 260, 23528), Year5 = c(26542,
1327, 25215, 263, 24953), Year6 = c(28135, 1407, 26728, 265,
26463), purchasePriceCapRate = c("400000-0.08", "400000-0.08",
"400000-0.08", "400000-0.08", "400000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 330786.932409621), list(-0.00700507916565851,
c(-397000, 17154.144, 18194.36544, 19297.1098944, 20466.129841344,
309444.720836595), "397000-0.08", structure(list(` ` = c("Revenue",
"Vacancy", "Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(18288, 914, 17374, 219, 17154), Year2 = c(19385,
969, 18416, 222, 18194), Year3 = c(20548, 1027, 195
<details>
<summary>英文:</summary>
I have some data which looks like:
price unleveragedData
<dbl> <list>
1 450000 <list [5]>
2 400000 <list [5]>
3 400000 <list [5]>
4 397000 <list [5]>
5 750000 <list [5]>
6 550000 <list [5]>
I am trying to put into a new column the element `5` of the lists - i.e.
price unleveragedData element5
<dbl> <list>
1 450000 <list [5]> -----
2 400000 <list [5]> -----
3 400000 <list [5]>
4 397000 <list [5]>
5 750000 <list [5]>
6 550000 <list [5]> -----
Using the following:
df$unleveragedData %>%
map(., ~ pluck(., c(5)))
I can get the output I want:
[[1]]
[1] 260551.4
[[2]]
[1] 330786.9
[[3]]
[1] 330786.9
[[4]]
[1] 287739.3
[[5]]
[1] 566416
[[6]]
[1] 271879.7
However, inside the `mutate` function I can't get it to work.
df %>%
mutate(
element5 = map(unleveragedData, ~ map_dbl(., pluck(., c(5))))
)
Data:
df = structure(list(price = c(450000, 400000, 400000, 397000, 750000,
550000), unleveragedData = list(list(-0.0547083151944441, c(-450000,
15533.28, 16475.2128, 17473.760928, 18532.32229728, 280205.849576444
), "450000-0.08", structure(list(` ` = c("Revenue", "Vacancy",
"Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(16560, 828, 15732, 199, 15533), Year2 = c(17554,
878, 16676, 201, 16475), Year3 = c(18607, 930, 17676, 203, 17474
), Year4 = c(19723, 986, 18737, 205, 18532), Year5 = c(20907,
1045, 19861, 207, 19654), Year6 = c(22161, 1108, 21053, 209,
20844), purchasePriceCapRate = c("450000-0.08", "450000-0.08",
"450000-0.08", "450000-0.08", "450000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 260551.350870592), list(0.0224165566243759,
c(-400000, 19720.512, 20916.35712, 22184.0790912, 23527.991786112,
355739.600331834), "400000-0.08", structure(list(` ` = c("Revenue",
"Vacancy", "Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(21024, 1051, 19973, 252, 19721), Year2 = c(22285,
1114, 21171, 255, 20916), Year3 = c(23623, 1181, 22441, 257,
22184), Year4 = c(25040, 1252, 23788, 260, 23528), Year5 = c(26542,
1327, 25215, 263, 24953), Year6 = c(28135, 1407, 26728, 265,
26463), purchasePriceCapRate = c("400000-0.08", "400000-0.08",
"400000-0.08", "400000-0.08", "400000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 330786.932409621), list(0.0224165566243759,
c(-400000, 19720.512, 20916.35712, 22184.0790912, 23527.991786112,
355739.600331834), "400000-0.08", structure(list(` ` = c("Revenue",
"Vacancy", "Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(21024, 1051, 19973, 252, 19721), Year2 = c(22285,
1114, 21171, 255, 20916), Year3 = c(23623, 1181, 22441, 257,
22184), Year4 = c(25040, 1252, 23788, 260, 23528), Year5 = c(26542,
1327, 25215, 263, 24953), Year6 = c(28135, 1407, 26728, 265,
26463), purchasePriceCapRate = c("400000-0.08", "400000-0.08",
"400000-0.08", "400000-0.08", "400000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 330786.932409621), list(-0.00700507916565851,
c(-397000, 17154.144, 18194.36544, 19297.1098944, 20466.129841344,
309444.720836595), "397000-0.08", structure(list(` ` = c("Revenue",
"Vacancy", "Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(18288, 914, 17374, 219, 17154), Year2 = c(19385,
969, 18416, 222, 18194), Year3 = c(20548, 1027, 19521, 224,
19297), Year4 = c(21781, 1089, 20692, 226, 20466), Year5 = c(23088,
1154, 21934, 228, 21705), Year6 = c(24473, 1224, 23250, 231,
23019), purchasePriceCapRate = c("397000-0.08", "397000-0.08",
"397000-0.08", "397000-0.08", "397000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 287739.317917958), list(0.00205549716813258,
c(-750000, 33768, 35815.68, 37986.4368, 40287.657168, 609143.15125314
), "750000-0.08", structure(list(` ` = c("Revenue", "Vacancy",
"Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(36000, 1800, 34200, 432, 33768), Year2 = c(38160,
1908, 36252, 436, 35816), Year3 = c(40450, 2022, 38427, 441,
37986), Year4 = c(42877, 2144, 40733, 445, 40288), Year5 = c(45449,
2272, 43177, 450, 42727), Year6 = c(48176, 2409, 45767, 454,
45313), purchasePriceCapRate = c("750000-0.08", "750000-0.08",
"750000-0.08", "750000-0.08", "750000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 566415.98015346), list(-0.0866171399087425,
c(-550000, 16208.64, 17191.5264, 18233.489664, 19338.07544064,
292388.712601507), "550000-0.08", structure(list(` ` = c("Revenue",
"Vacancy", "Gross Revenue", "Operating Expenses", "Net Operating Income"
), Year1 = c(17280, 864, 16416, 207, 16209), Year2 = c(18317,
916, 17401, 209, 17192), Year3 = c(19416, 971, 18445, 212,
18233), Year4 = c(20581, 1029, 19552, 214, 19338), Year5 = c(21816,
1091, 20725, 216, 20509), Year6 = c(23125, 1156, 21968, 218,
21750), purchasePriceCapRate = c("550000-0.08", "550000-0.08",
"550000-0.08", "550000-0.08", "550000-0.08")), row.names = c(NA,
-5L), class = "data.frame"), 271879.670473661))), class = c("rowwise_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), groups = structure(list(
.rows = structure(list(1L, 2L, 3L, 4L, 5L, 6L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame")))
</details>
# 答案1
**得分**: 4
```markdown
存在一个`rowwise`分组,如果要使用`map`,应该将其解除分组(`ungroup`)
```R
library(dplyr)
library(purrr)
df %>%
ungroup() %>%
mutate(
element5 = map_dbl(unleveragedData, ~ nth(.x, 5))
)
-输出
# 一个 tibble: 6 × 3
price unleveragedData element5
<dbl> <list> <dbl>
1 450000 <list [5]> 260551.
2 400000 <list [5]> 330787.
3 400000 <list [5]> 330787.
4 397000 <list [5]> 287739.
5 750000 <list [5]> 566416.
6 550000 <list [5]> 271880.
另外,由于是rowwise
,我们也可以直接提取
df %>%
mutate(element5 = nth(unleveragedData, 5)) %>%
ungroup()
-输出
# 一个 tibble: 6 × 3
price unleveragedData element5
<dbl> <list> <dbl>
1 450000 <list [5]> 260551.
2 400000 <list [5]> 330787.
3 400000 <list [5]> 330787.
4 397000 <list [5]> 287739.
5 750000 <list [5]> 566416.
6 550000 <list [5]> 271880.
或者使用 pluck
df %>%
mutate(element5 = pluck(unleveragedData, 5)) %>%
ungroup()
-输出
# 一个 tibble: 6 × 3
price unleveragedData element5
<dbl> <list> <dbl>
1 450000 <list [5]> 260551.
2 400000 <list [5]> 330787.
3 400000 <list [5]> 330787.
4 397000 <list [5]> 287739.
5 750000 <list [5]> 566416.
6 550000 <list [5]> 271880.
在rowwise
上使用map
也是可能的,只需要将其包装在pick
中
df %>%
mutate(element5 = map_dbl(pick(unleveragedData), pluck, 5))
-输出
# 一个 tibble: 6 × 3
# Rowwise:
price unleveragedData element5
<dbl> <list> <dbl>
1 450000 <list [5]> 260551.
2 400000 <list [5]> 330787.
3 400000 <list [5]> 330787.
4 397000 <list [5]> 287739.
5 750000 <list [5]> 566416.
6 550000 <list [5]> 271880.
<details>
<summary>英文:</summary>
There is a `rowwise` grouping, which should be `ungroup`ed if we want to use `map`
library(dplyr)
library(purrr)
df %>%
ungroup %>%
mutate(
element5 = map_dbl(unleveragedData, ~ nth(.x, 5))
)
-output
A tibble: 6 × 3
price unleveragedData element5
<dbl> <list> <dbl>
1 450000 <list [5]> 260551.
2 400000 <list [5]> 330787.
3 400000 <list [5]> 330787.
4 397000 <list [5]> 287739.
5 750000 <list [5]> 566416.
6 550000 <list [5]> 271880.
---
Also, as it is `rowwise`, we can directly extract as well
df %>%
mutate(element5 = nth(unleveragedData, 5)) %>%
ungroup
-output
A tibble: 6 × 3
price unleveragedData element5
<dbl> <list> <dbl>
1 450000 <list [5]> 260551.
2 400000 <list [5]> 330787.
3 400000 <list [5]> 330787.
4 397000 <list [5]> 287739.
5 750000 <list [5]> 566416.
6 550000 <list [5]> 271880.
Or with `pluck`
df %>%
mutate(element5 = pluck(unleveragedData, 5)) %>%
ungroup
-output
A tibble: 6 × 3
price unleveragedData element5
<dbl> <list> <dbl>
1 450000 <list [5]> 260551.
2 400000 <list [5]> 330787.
3 400000 <list [5]> 330787.
4 397000 <list [5]> 287739.
5 750000 <list [5]> 566416.
6 550000 <list [5]> 271880.
---
It is possible to do this in `map` on a `rowwise`, if we wrap it in `pick`
df %>%
mutate(element5 = map_dbl(pick(unleveragedData), pluck, 5))
-output
A tibble: 6 × 3
Rowwise:
price unleveragedData element5
<dbl> <list> <dbl>
1 450000 <list [5]> 260551.
2 400000 <list [5]> 330787.
3 400000 <list [5]> 330787.
4 397000 <list [5]> 287739.
5 750000 <list [5]> 566416.
6 550000 <list [5]> 271880.
</details>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论