解析函数输入名称作为输出名称。

huangapple go评论63阅读模式
英文:

Parse a function input name as an output name

问题

我试图在我的函数"result"中将数据输入变量名作为输出值添加到一个单独的列($V5)中。

输入是不同的数据值,我想将这些名称保存在输出中,以便我可以追踪数据的来源。

我以为这会很简单,我可以使用print(deparse(substitute(input))来实现,但这不起作用。

有人能推荐一个解决方案吗?

英文:

I am trying to add the data input variable name as an output value in a separate column ($V5 within my function "result).

The inputs are different data values, and would like to save these names in the outputs so I can track where the data is coming from.

I thought this would be fairly straightforward, and that I could use print(deparse(substitute(input))) but this doesn't work.

Can anyone recommend a solution?

library(dplyr)
library(tidyr)

## Inputs ##

input_1 = structure(list(V1 = c("Team_2022", "Team_2022", "Team_2022"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(55, 76, 14)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_2 = structure(list(V1 = c("Team_2023", "Team_2023", "Team_2023"), V2 = c("Bill", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(113, 23, 10)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_3 = structure(list(V1 = c("Team_2024", "Team_2024", "Team_2024"), V2 = c("Frank", "Mary", "Bill"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(7, 19, 52)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_4 = structure(list(V1 = c("Team_2025", "Team_2025", "Team_2025"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(46, 44, 88)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))

## Teams ##

teams = structure(list(V1 = c("team1", "team2", "team3"), V2 = c("Mary + Frank","Mary + John", "Mary + Bill")), class = "data.frame", row.names = c(NA, -3L))

## Group the inputs into one ##

all_objects = ls()
input_objects = grep("^input", all_objects, value = T)
input_test = as.data.frame(input_obj)

## Function ##

result = function(input, teams) {
  data = teams %>%
  separate_rows(V2) %>%
  left_join(input, by = c("V2" = "V2")) %>%
  replace_na(list(V4 = 0)) %>%
  group_by(V1.x) %>% fill(V1.y, V3) %>%
  summarize(V1.y = first(V1.y),
            V2 = paste(V2, collapse = " + "),
            V3 = first(V3),
            V4 = sum(V4),
            V5 = print(deparse(substitute(input))))
  return(data)
}



all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)

output = input_test %>%
  lapply(result, teams)  %>%
  bind_rows()

### Current output ###

structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2", 
"team3", "team1", "team2", "team3", "team1", "team2", "team3"
), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023", 
"Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024", 
"Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank", 
"Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John", 
"Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill", 
"Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney", 
"Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", 
"Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76, 
23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input", "input", 
"input", "input", "input", "input", "input", "input", "input", 
"input", "input", "input")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))

### Desired Output ###

structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2", 
"team3", "team1", "team2", "team3", "team1", "team2", "team3"
), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023", 
"Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024", 
"Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank", 
"Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John", 
"Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill", 
"Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney", 
"Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", 
"Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76, 
23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input_1", "input_1", 
"input_1", "input_2", "input_2", "input_2", "input_3", "input_3", "input_3", 
"input_4", "input_4", "input_4")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -12L))


答案1

得分: 1

如果我理解你的意思正确:purrr::map_df() 函数具有一个很好的特性,可以识别最终 data.frames 中的输入列表(最好是有命名的):

library(dplyr)
library(tidyr)
# dropping V5 as it will be "automatically" computed
result = function(input, teams) {
  data = teams %>%
  separate_rows(V2) %>%
  left_join(input, by = c("V2" = "V2")) %>%
  replace_na(list(V4 = 0)) %>%
  group_by(V1.x) %>%
  fill(V1.y, V3) %>%
  summarize(V1.y = first(V1.y),
            V2 = paste(V2, collapse = " + "),
            V3 = first(V3),
            V4 = sum(V4)
            # we do not need V5 here anymore
            )
  return(data)
}

l_objects <- ls()
input_objects <- grep("^input_\\d", l_objects, value = TRUE)
input_test <- lapply(input_objects, get)

# name the object list to use the name for identification
names(input_test) <- input_objects

# use purrr map to data.frame with the .id feature
purrr::map_df(input_test, ~result(.x, teams), .id = "V5")

请注意,这也可以在不命名列表的情况下工作,尽管你只会得到列表项的编号,这可能不够。

此外,plyr::ldply 函数也可以用于绑定命名列表,生成一个新列,其中包含结果数据帧中的列表名称。

英文:

If I understand you correctly: the purrr::map_df() function has a nice feature to identify inputs lists (ideally named) in final data.frames:

library(dplyr)
library(tidyr)
# dropping V5 as it will be &quot;automatically&quot; computed
result = function(input, teams) {
  data = teams %&gt;%
  separate_rows(V2) %&gt;%
  left_join(input, by = c(&quot;V2&quot; = &quot;V2&quot;)) %&gt;%
  replace_na(list(V4 = 0)) %&gt;%
  group_by(V1.x) %&gt;% 
  fill(V1.y, V3) %&gt;%
  summarize(V1.y = first(V1.y),
            V2 = paste(V2, collapse = &quot; + &quot;),
            V3 = first(V3),
            V4 = sum(V4)
            # we do not need V5 here anymore
            )
  return(data)
}


l_objects &lt;- ls()
input_objects &lt;- grep(&quot;^input_\\d&quot;, all_objects, value = T)
input_test &lt;- lapply(input_objects, get)

# name the object list to use the name for identification
names(input_test) &lt;- input_objects

# use purrr map to data.frame with the .id feature
purrr::map_df(input_test, ~result(.x, teams), .id = &quot;V5&quot;)

  V5      V1.x  V1.y      V2           V3        V4
   &lt;chr&gt;   &lt;chr&gt; &lt;chr&gt;     &lt;chr&gt;        &lt;chr&gt;  &lt;dbl&gt;
 1 input_1 team1 Team_2022 Mary + Frank Sydney   131
 2 input_1 team2 Team_2022 Mary + John  Sydney    90
 3 input_1 team3 Team_2022 Mary + Bill  Sydney    76
 4 input_2 team1 Team_2023 Mary + Frank Sydney    23
 5 input_2 team2 Team_2023 Mary + John  Sydney    33
 6 input_2 team3 Team_2023 Mary + Bill  Sydney   136
 7 input_3 team1 Team_2024 Mary + Frank Sydney    26
 8 input_3 team2 Team_2024 Mary + John  Sydney    19
 9 input_3 team3 Team_2024 Mary + Bill  Sydney    71
10 input_4 team1 Team_2025 Mary + Frank Sydney    90
11 input_4 team2 Team_2025 Mary + John  Sydney   132
12 input_4 team3 Team_2025 Mary + Bill  Sydney    44

Note that this works without naming the list aswell, though you will only get the list item number, which might be insufficient.

Also the plyr::ldply function can be used to bind named lists, generating a new column with the list names in the result data.frame.

答案2

得分: 1

不使用deparse/substitute,而是在result函数中为名称创建一个参数,然后使用它。

library(dplyr)
library(purrr)
library(tidyr)
result <- function(input, teams, inputnm) {
  data = teams %>%
    separate_rows(V2) %>%
    left_join(input, by = c("V2" = "V2")) %>%
    replace_na(list(V4 = 0)) %>%
    group_by(V1.x) %>%
    fill(V1.y, V3) %>%
    summarize(V1.y = first(V1.y),
              V2 = paste(V2, collapse = " + "),
              V3 = first(V3),
              V4 = sum(V4),
              V5 = inputnm)
  return(data)
}

测试

input_test %>%
   pull(input_objects) %>%
   mget(inherits = TRUE) %>%
   imap_dfr(~ result(.x, teams, .y))

输出

# A tibble: 12 × 6
   V1.x  V1.y      V2           V3        V4 V5     
   <chr> <chr>     <chr>        <chr>  <dbl> <chr>  
 1 team1 Team_2022 Mary + Frank Sydney   131 input_1
 2 team2 Team_2022 Mary + John  Sydney    90 input_1
 3 team3 Team_2022 Mary + Bill  Sydney    76 input_1
 4 team1 Team_2023 Mary + Frank Sydney    23 input_2
 5 team2 Team_2023 Mary + John  Sydney    33 input_2
 6 team3 Team_2023 Mary + Bill  Sydney   136 input_2
 7 team1 Team_2024 Mary + Frank Sydney    26 input_3
 8 team2 Team_2024 Mary + John  Sydney    19 input_3
 9 team3 Team_2024 Mary + Bill  Sydney    71 input_3
10 team1 Team_2025 Mary + Frank Sydney    90 input_4
11 team2 Team_2025 Mary + John  Sydney   132 input_4
12 team3 Team_2025 Mary + Bill  Sydney    44 input_4

这是您提供的代码的翻译。

英文:

Instead of doing the deparse/substitute, create an argument in result for the names as well, and then use that

library(dplyr)
library(purrr)
library(tidyr)
result &lt;- function(input, teams, inputnm) {
  data = teams %&gt;%
  separate_rows(V2) %&gt;%
  left_join(input, by = c(&quot;V2&quot; = &quot;V2&quot;)) %&gt;%
  replace_na(list(V4 = 0)) %&gt;%
  group_by(V1.x) %&gt;% fill(V1.y, V3) %&gt;%
  summarize(V1.y = first(V1.y),
            V2 = paste(V2, collapse = &quot; + &quot;),
            V3 = first(V3),
            V4 = sum(V4),
            V5 = inputnm)
  return(data)
}

-testing

input_test %&gt;% 
   pull(input_objects) %&gt;%
   mget(inherits = TRUE) %&gt;%
   imap_dfr(~ result(.x, teams, .y))

-output

# A tibble: 12 &#215; 6
   V1.x  V1.y      V2           V3        V4 V5     
   &lt;chr&gt; &lt;chr&gt;     &lt;chr&gt;        &lt;chr&gt;  &lt;dbl&gt; &lt;chr&gt;  
 1 team1 Team_2022 Mary + Frank Sydney   131 input_1
 2 team2 Team_2022 Mary + John  Sydney    90 input_1
 3 team3 Team_2022 Mary + Bill  Sydney    76 input_1
 4 team1 Team_2023 Mary + Frank Sydney    23 input_2
 5 team2 Team_2023 Mary + John  Sydney    33 input_2
 6 team3 Team_2023 Mary + Bill  Sydney   136 input_2
 7 team1 Team_2024 Mary + Frank Sydney    26 input_3
 8 team2 Team_2024 Mary + John  Sydney    19 input_3
 9 team3 Team_2024 Mary + Bill  Sydney    71 input_3
10 team1 Team_2025 Mary + Frank Sydney    90 input_4
11 team2 Team_2025 Mary + John  Sydney   132 input_4
12 team3 Team_2025 Mary + Bill  Sydney    44 input_4

答案3

得分: 0

如果您一次只有一个输入,简单地将分配V5的行移到您的summarizedplyr管道之外就可以了:

result = function(input, teams) {
  data = teams %>%
    separate_rows(V2) %>%
    left_join(input, by = c("V2" = "V2")) %>%
    replace_na(list(V4 = 0)) %>%
    group_by(V1.x) %>% fill(V1.y, V3) %>%
    summarize(V1.y = first(V1.y),
              V2 = paste(V2, collapse = " + "),
              V3 = first(V3),
              V4 = sum(V4),
              #V5 = print(deparse(substitute(input)))
              )
  data$V5 <- deparse(substitute(input))
  return(data)
}

result(input_1, teams)

#  V1.x   V1.y       V2           V3        V4 V5     
#  <chr> <chr>     <chr>        <chr>  <dbl> <chr>  
#  1 team1 Team_2022 Mary + Frank Sydney   131 input_1
#  2 team2 Team_2022 Mary + John  Sydney    90 input_1
#  3 team3 Team_2022 Mary + Bill  Sydney    76 input_1

但您使用列表的方式有些挑战(对于该代码会输出X[[i]])。

为了解决这个问题,使用lapply,我建议简单地添加一个额外的输入来获取名称,并对lapply函数进行额外的调整以适应它:

result = function(input, teams, nme) {
  data = teams %>%
    separate_rows(V2) %>%
    left_join(input, by = c("V2" = "V2")) %>%
    replace_na(list(V4 = 0)) %>%
    group_by(V1.x) %>% fill(V1.y, V3) %>%
    summarize(V1.y = first(V1.y),
              V2 = paste(V2, collapse = " + "),
              V3 = first(V3),
              V4 = sum(V4),
              #V5 = print(deparse(substitute(input)))
              )
  data$V5 <- nme 
  return(data)
}

all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)

# 添加分配名称给列表
names(input_test) <- input_objects 

output = lapply(input_objects, function(x) result(input_test[[x]], teams, nme = x)) %>%
  bind_rows()

#     V1.x  V1.y      V2           V3        V4 V5     
#     <chr> <chr>     <chr>        <chr>  <dbl> <chr>  
#   1 team1 Team_2022 Mary + Frank Sydney   131 input_1
#   2 team2 Team_2022 Mary + John  Sydney    90 input_1
#   3 team3 Team_2022 Mary + Bill  Sydney    76 input_1
#   4 team1 Team_2023 Mary + Frank Sydney    23 input_2
#   5 team2 Team_2023 Mary + John  Sydney    33 input_2
#   6 team3 Team_2023 Mary + Bill  Sydney   136 input_2
#   7 team1 Team_2024 Mary + Frank Sydney    26 input_3
#   8 team2 Team_2024 Mary + John  Sydney    19 input_3
#   9 team3 Team_2024 Mary + Bill  Sydney    71 input_3
#  10 team1 Team_2025 Mary + Frank Sydney    90 input_4
#  11 team2 Team_2025 Mary + John  Sydney   132 input_4
#  12 team3 Team_2025 Mary + Bill  Sydney    44 input_4
英文:

If you just had one input at a time, simply moving the line for assigning V5 outside of your summarize and dplyr pipes would do it:

result = function(input, teams) {
  data = teams %&gt;%
    separate_rows(V2) %&gt;%
    left_join(input, by = c(&quot;V2&quot; = &quot;V2&quot;)) %&gt;%
    replace_na(list(V4 = 0)) %&gt;%
    group_by(V1.x) %&gt;% fill(V1.y, V3) %&gt;%
    summarize(V1.y = first(V1.y),
              V2 = paste(V2, collapse = &quot; + &quot;),
              V3 = first(V3),
              V4 = sum(V4),
              #V5 = print(deparse(substitute(input)))
              )
  data$V5 &lt;- deparse(substitute(input))
  return(data)
}

result(input_1, teams)

#  V1.x   V1.y       V2           V3        V4 V5     
#  &lt;chr&gt; &lt;chr&gt;     &lt;chr&gt;        &lt;chr&gt;  &lt;dbl&gt; &lt;chr&gt;  
#  1 team1 Team_2022 Mary + Frank Sydney   131 input_1
#  2 team2 Team_2022 Mary + John  Sydney    90 input_1
#  3 team3 Team_2022 Mary + Bill  Sydney    76 input_1

But your use of lists makes that a bit challenging (it will output X[[i]] for that code).

To address this for using lapply, I would suggest simply adding an additional input that takes the name and simply assigning it that name, with additional tweaks to the lapply function to accommodate it:

result = function(input, teams, nme) {
  data = teams %&gt;%
    separate_rows(V2) %&gt;%
    left_join(input, by = c(&quot;V2&quot; = &quot;V2&quot;)) %&gt;%
    replace_na(list(V4 = 0)) %&gt;%
    group_by(V1.x) %&gt;% fill(V1.y, V3) %&gt;%
    summarize(V1.y = first(V1.y),
              V2 = paste(V2, collapse = &quot; + &quot;),
              V3 = first(V3),
              V4 = sum(V4),
              #V5 = print(deparse(substitute(input)))
              )
  data$V5 &lt;- nme 
  return(data)
}

all_objects &lt;- ls()
input_objects &lt;- grep(&quot;^input_\\d&quot;, all_objects, value = T)
input_test &lt;- lapply(input_objects, get)

# add in assigning names to the list
names(input_test) &lt;- input_objects 

output = lapply(input_objects, function(x) result(input_test[[x]], teams, nme = x)) %&gt;%
  bind_rows()

#     V1.x  V1.y      V2           V3        V4 V5     
#     &lt;chr&gt; &lt;chr&gt;     &lt;chr&gt;        &lt;chr&gt;  &lt;dbl&gt; &lt;chr&gt;  
#   1 team1 Team_2022 Mary + Frank Sydney   131 input_1
#   2 team2 Team_2022 Mary + John  Sydney    90 input_1
#   3 team3 Team_2022 Mary + Bill  Sydney    76 input_1
#   4 team1 Team_2023 Mary + Frank Sydney    23 input_2
#   5 team2 Team_2023 Mary + John  Sydney    33 input_2
#   6 team3 Team_2023 Mary + Bill  Sydney   136 input_2
#   7 team1 Team_2024 Mary + Frank Sydney    26 input_3
#   8 team2 Team_2024 Mary + John  Sydney    19 input_3
#   9 team3 Team_2024 Mary + Bill  Sydney    71 input_3
#  10 team1 Team_2025 Mary + Frank Sydney    90 input_4
#  11 team2 Team_2025 Mary + John  Sydney   132 input_4
#  12 team3 Team_2025 Mary + Bill  Sydney    44 input_4

huangapple
  • 本文由 发表于 2023年2月6日 06:31:13
  • 转载请务必保留本文链接:https://go.coder-hub.com/75355948.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定