英文:
Parse a function input name as an output name
问题
我试图在我的函数"result"中将数据输入变量名作为输出值添加到一个单独的列($V5)中。
输入是不同的数据值,我想将这些名称保存在输出中,以便我可以追踪数据的来源。
我以为这会很简单,我可以使用print(deparse(substitute(input))来实现,但这不起作用。
有人能推荐一个解决方案吗?
英文:
I am trying to add the data input variable name as an output value in a separate column ($V5 within my function "result).
The inputs are different data values, and would like to save these names in the outputs so I can track where the data is coming from.
I thought this would be fairly straightforward, and that I could use print(deparse(substitute(input))) but this doesn't work.
Can anyone recommend a solution?
library(dplyr)
library(tidyr)
## Inputs ##
input_1 = structure(list(V1 = c("Team_2022", "Team_2022", "Team_2022"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(55, 76, 14)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_2 = structure(list(V1 = c("Team_2023", "Team_2023", "Team_2023"), V2 = c("Bill", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(113, 23, 10)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_3 = structure(list(V1 = c("Team_2024", "Team_2024", "Team_2024"), V2 = c("Frank", "Mary", "Bill"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(7, 19, 52)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_4 = structure(list(V1 = c("Team_2025", "Team_2025", "Team_2025"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(46, 44, 88)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
## Teams ##
teams = structure(list(V1 = c("team1", "team2", "team3"), V2 = c("Mary + Frank","Mary + John", "Mary + Bill")), class = "data.frame", row.names = c(NA, -3L))
## Group the inputs into one ##
all_objects = ls()
input_objects = grep("^input", all_objects, value = T)
input_test = as.data.frame(input_obj)
## Function ##
result = function(input, teams) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>% fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4),
V5 = print(deparse(substitute(input))))
return(data)
}
all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)
output = input_test %>%
lapply(result, teams) %>%
bind_rows()
### Current output ###
structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2",
"team3", "team1", "team2", "team3", "team1", "team2", "team3"
), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023",
"Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024",
"Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank",
"Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John",
"Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill",
"Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney",
"Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney",
"Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76,
23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input", "input",
"input", "input", "input", "input", "input", "input", "input",
"input", "input", "input")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))
### Desired Output ###
structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2",
"team3", "team1", "team2", "team3", "team1", "team2", "team3"
), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023",
"Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024",
"Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank",
"Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John",
"Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill",
"Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney",
"Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney",
"Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76,
23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input_1", "input_1",
"input_1", "input_2", "input_2", "input_2", "input_3", "input_3", "input_3",
"input_4", "input_4", "input_4")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))
答案1
得分: 1
如果我理解你的意思正确:purrr::map_df()
函数具有一个很好的特性,可以识别最终 data.frames
中的输入列表(最好是有命名的):
library(dplyr)
library(tidyr)
# dropping V5 as it will be "automatically" computed
result = function(input, teams) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>%
fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4)
# we do not need V5 here anymore
)
return(data)
}
l_objects <- ls()
input_objects <- grep("^input_\\d", l_objects, value = TRUE)
input_test <- lapply(input_objects, get)
# name the object list to use the name for identification
names(input_test) <- input_objects
# use purrr map to data.frame with the .id feature
purrr::map_df(input_test, ~result(.x, teams), .id = "V5")
请注意,这也可以在不命名列表的情况下工作,尽管你只会得到列表项的编号,这可能不够。
此外,plyr::ldply
函数也可以用于绑定命名列表,生成一个新列,其中包含结果数据帧中的列表名称。
英文:
If I understand you correctly: the purrr::map_df()
function has a nice feature to identify inputs lists (ideally named) in final data.frames
:
library(dplyr)
library(tidyr)
# dropping V5 as it will be "automatically" computed
result = function(input, teams) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>%
fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4)
# we do not need V5 here anymore
)
return(data)
}
l_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)
# name the object list to use the name for identification
names(input_test) <- input_objects
# use purrr map to data.frame with the .id feature
purrr::map_df(input_test, ~result(.x, teams), .id = "V5")
V5 V1.x V1.y V2 V3 V4
<chr> <chr> <chr> <chr> <chr> <dbl>
1 input_1 team1 Team_2022 Mary + Frank Sydney 131
2 input_1 team2 Team_2022 Mary + John Sydney 90
3 input_1 team3 Team_2022 Mary + Bill Sydney 76
4 input_2 team1 Team_2023 Mary + Frank Sydney 23
5 input_2 team2 Team_2023 Mary + John Sydney 33
6 input_2 team3 Team_2023 Mary + Bill Sydney 136
7 input_3 team1 Team_2024 Mary + Frank Sydney 26
8 input_3 team2 Team_2024 Mary + John Sydney 19
9 input_3 team3 Team_2024 Mary + Bill Sydney 71
10 input_4 team1 Team_2025 Mary + Frank Sydney 90
11 input_4 team2 Team_2025 Mary + John Sydney 132
12 input_4 team3 Team_2025 Mary + Bill Sydney 44
Note that this works without naming the list aswell, though you will only get the list item number, which might be insufficient.
Also the plyr::ldply
function can be used to bind named lists, generating a new column with the list names in the result data.frame.
答案2
得分: 1
不使用deparse/substitute
,而是在result
函数中为名称创建一个参数,然后使用它。
library(dplyr)
library(purrr)
library(tidyr)
result <- function(input, teams, inputnm) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>%
fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4),
V5 = inputnm)
return(data)
}
测试
input_test %>%
pull(input_objects) %>%
mget(inherits = TRUE) %>%
imap_dfr(~ result(.x, teams, .y))
输出
# A tibble: 12 × 6
V1.x V1.y V2 V3 V4 V5
<chr> <chr> <chr> <chr> <dbl> <chr>
1 team1 Team_2022 Mary + Frank Sydney 131 input_1
2 team2 Team_2022 Mary + John Sydney 90 input_1
3 team3 Team_2022 Mary + Bill Sydney 76 input_1
4 team1 Team_2023 Mary + Frank Sydney 23 input_2
5 team2 Team_2023 Mary + John Sydney 33 input_2
6 team3 Team_2023 Mary + Bill Sydney 136 input_2
7 team1 Team_2024 Mary + Frank Sydney 26 input_3
8 team2 Team_2024 Mary + John Sydney 19 input_3
9 team3 Team_2024 Mary + Bill Sydney 71 input_3
10 team1 Team_2025 Mary + Frank Sydney 90 input_4
11 team2 Team_2025 Mary + John Sydney 132 input_4
12 team3 Team_2025 Mary + Bill Sydney 44 input_4
这是您提供的代码的翻译。
英文:
Instead of doing the deparse/substitute
, create an argument in result
for the names as well, and then use that
library(dplyr)
library(purrr)
library(tidyr)
result <- function(input, teams, inputnm) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>% fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4),
V5 = inputnm)
return(data)
}
-testing
input_test %>%
pull(input_objects) %>%
mget(inherits = TRUE) %>%
imap_dfr(~ result(.x, teams, .y))
-output
# A tibble: 12 × 6
V1.x V1.y V2 V3 V4 V5
<chr> <chr> <chr> <chr> <dbl> <chr>
1 team1 Team_2022 Mary + Frank Sydney 131 input_1
2 team2 Team_2022 Mary + John Sydney 90 input_1
3 team3 Team_2022 Mary + Bill Sydney 76 input_1
4 team1 Team_2023 Mary + Frank Sydney 23 input_2
5 team2 Team_2023 Mary + John Sydney 33 input_2
6 team3 Team_2023 Mary + Bill Sydney 136 input_2
7 team1 Team_2024 Mary + Frank Sydney 26 input_3
8 team2 Team_2024 Mary + John Sydney 19 input_3
9 team3 Team_2024 Mary + Bill Sydney 71 input_3
10 team1 Team_2025 Mary + Frank Sydney 90 input_4
11 team2 Team_2025 Mary + John Sydney 132 input_4
12 team3 Team_2025 Mary + Bill Sydney 44 input_4
答案3
得分: 0
如果您一次只有一个输入,简单地将分配V5
的行移到您的summarize
和dplyr
管道之外就可以了:
result = function(input, teams) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>% fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4),
#V5 = print(deparse(substitute(input)))
)
data$V5 <- deparse(substitute(input))
return(data)
}
result(input_1, teams)
# V1.x V1.y V2 V3 V4 V5
# <chr> <chr> <chr> <chr> <dbl> <chr>
# 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
# 2 team2 Team_2022 Mary + John Sydney 90 input_1
# 3 team3 Team_2022 Mary + Bill Sydney 76 input_1
但您使用列表的方式有些挑战(对于该代码会输出X[[i]]
)。
为了解决这个问题,使用lapply
,我建议简单地添加一个额外的输入来获取名称,并对lapply
函数进行额外的调整以适应它:
result = function(input, teams, nme) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>% fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4),
#V5 = print(deparse(substitute(input)))
)
data$V5 <- nme
return(data)
}
all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)
# 添加分配名称给列表
names(input_test) <- input_objects
output = lapply(input_objects, function(x) result(input_test[[x]], teams, nme = x)) %>%
bind_rows()
# V1.x V1.y V2 V3 V4 V5
# <chr> <chr> <chr> <chr> <dbl> <chr>
# 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
# 2 team2 Team_2022 Mary + John Sydney 90 input_1
# 3 team3 Team_2022 Mary + Bill Sydney 76 input_1
# 4 team1 Team_2023 Mary + Frank Sydney 23 input_2
# 5 team2 Team_2023 Mary + John Sydney 33 input_2
# 6 team3 Team_2023 Mary + Bill Sydney 136 input_2
# 7 team1 Team_2024 Mary + Frank Sydney 26 input_3
# 8 team2 Team_2024 Mary + John Sydney 19 input_3
# 9 team3 Team_2024 Mary + Bill Sydney 71 input_3
# 10 team1 Team_2025 Mary + Frank Sydney 90 input_4
# 11 team2 Team_2025 Mary + John Sydney 132 input_4
# 12 team3 Team_2025 Mary + Bill Sydney 44 input_4
英文:
If you just had one input at a time, simply moving the line for assigning V5
outside of your summarize
and dplyr
pipes would do it:
result = function(input, teams) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>% fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4),
#V5 = print(deparse(substitute(input)))
)
data$V5 <- deparse(substitute(input))
return(data)
}
result(input_1, teams)
# V1.x V1.y V2 V3 V4 V5
# <chr> <chr> <chr> <chr> <dbl> <chr>
# 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
# 2 team2 Team_2022 Mary + John Sydney 90 input_1
# 3 team3 Team_2022 Mary + Bill Sydney 76 input_1
But your use of lists makes that a bit challenging (it will output X[[i]]
for that code).
To address this for using lapply
, I would suggest simply adding an additional input that takes the name and simply assigning it that name, with additional tweaks to the lapply
function to accommodate it:
result = function(input, teams, nme) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>% fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " + "),
V3 = first(V3),
V4 = sum(V4),
#V5 = print(deparse(substitute(input)))
)
data$V5 <- nme
return(data)
}
all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)
# add in assigning names to the list
names(input_test) <- input_objects
output = lapply(input_objects, function(x) result(input_test[[x]], teams, nme = x)) %>%
bind_rows()
# V1.x V1.y V2 V3 V4 V5
# <chr> <chr> <chr> <chr> <dbl> <chr>
# 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
# 2 team2 Team_2022 Mary + John Sydney 90 input_1
# 3 team3 Team_2022 Mary + Bill Sydney 76 input_1
# 4 team1 Team_2023 Mary + Frank Sydney 23 input_2
# 5 team2 Team_2023 Mary + John Sydney 33 input_2
# 6 team3 Team_2023 Mary + Bill Sydney 136 input_2
# 7 team1 Team_2024 Mary + Frank Sydney 26 input_3
# 8 team2 Team_2024 Mary + John Sydney 19 input_3
# 9 team3 Team_2024 Mary + Bill Sydney 71 input_3
# 10 team1 Team_2025 Mary + Frank Sydney 90 input_4
# 11 team2 Team_2025 Mary + John Sydney 132 input_4
# 12 team3 Team_2025 Mary + Bill Sydney 44 input_4
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论