解析函数输入名称作为输出名称。

huangapple go评论100阅读模式
英文:

Parse a function input name as an output name

问题

我试图在我的函数"result"中将数据输入变量名作为输出值添加到一个单独的列($V5)中。

输入是不同的数据值,我想将这些名称保存在输出中,以便我可以追踪数据的来源。

我以为这会很简单,我可以使用print(deparse(substitute(input))来实现,但这不起作用。

有人能推荐一个解决方案吗?

英文:

I am trying to add the data input variable name as an output value in a separate column ($V5 within my function "result).

The inputs are different data values, and would like to save these names in the outputs so I can track where the data is coming from.

I thought this would be fairly straightforward, and that I could use print(deparse(substitute(input))) but this doesn't work.

Can anyone recommend a solution?

  1. library(dplyr)
  2. library(tidyr)
  3. ## Inputs ##
  4. input_1 = structure(list(V1 = c("Team_2022", "Team_2022", "Team_2022"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(55, 76, 14)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
  5. input_2 = structure(list(V1 = c("Team_2023", "Team_2023", "Team_2023"), V2 = c("Bill", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(113, 23, 10)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
  6. input_3 = structure(list(V1 = c("Team_2024", "Team_2024", "Team_2024"), V2 = c("Frank", "Mary", "Bill"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(7, 19, 52)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
  7. input_4 = structure(list(V1 = c("Team_2025", "Team_2025", "Team_2025"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(46, 44, 88)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
  8. ## Teams ##
  9. teams = structure(list(V1 = c("team1", "team2", "team3"), V2 = c("Mary + Frank","Mary + John", "Mary + Bill")), class = "data.frame", row.names = c(NA, -3L))
  10. ## Group the inputs into one ##
  11. all_objects = ls()
  12. input_objects = grep("^input", all_objects, value = T)
  13. input_test = as.data.frame(input_obj)
  14. ## Function ##
  15. result = function(input, teams) {
  16. data = teams %>%
  17. separate_rows(V2) %>%
  18. left_join(input, by = c("V2" = "V2")) %>%
  19. replace_na(list(V4 = 0)) %>%
  20. group_by(V1.x) %>% fill(V1.y, V3) %>%
  21. summarize(V1.y = first(V1.y),
  22. V2 = paste(V2, collapse = " + "),
  23. V3 = first(V3),
  24. V4 = sum(V4),
  25. V5 = print(deparse(substitute(input))))
  26. return(data)
  27. }
  28. all_objects <- ls()
  29. input_objects <- grep("^input_\\d", all_objects, value = T)
  30. input_test <- lapply(input_objects, get)
  31. output = input_test %>%
  32. lapply(result, teams) %>%
  33. bind_rows()
  34. ### Current output ###
  35. structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2",
  36. "team3", "team1", "team2", "team3", "team1", "team2", "team3"
  37. ), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023",
  38. "Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024",
  39. "Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank",
  40. "Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John",
  41. "Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill",
  42. "Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney",
  43. "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney",
  44. "Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76,
  45. 23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input", "input",
  46. "input", "input", "input", "input", "input", "input", "input",
  47. "input", "input", "input")), class = c("tbl_df", "tbl", "data.frame"
  48. ), row.names = c(NA, -12L))
  49. ### Desired Output ###
  50. structure(list(V1.x = c("team1", "team2", "team3", "team1", "team2",
  51. "team3", "team1", "team2", "team3", "team1", "team2", "team3"
  52. ), V1.y = c("Team_2022", "Team_2022", "Team_2022", "Team_2023",
  53. "Team_2023", "Team_2023", "Team_2024", "Team_2024", "Team_2024",
  54. "Team_2025", "Team_2025", "Team_2025"), V2 = c("Mary + Frank",
  55. "Mary + John", "Mary + Bill", "Mary + Frank", "Mary + John",
  56. "Mary + Bill", "Mary + Frank", "Mary + John", "Mary + Bill",
  57. "Mary + Frank", "Mary + John", "Mary + Bill"), V3 = c("Sydney",
  58. "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney", "Sydney",
  59. "Sydney", "Sydney", "Sydney", "Sydney"), V4 = c(131, 90, 76,
  60. 23, 33, 136, 26, 19, 71, 90, 132, 44), V5 = c("input_1", "input_1",
  61. "input_1", "input_2", "input_2", "input_2", "input_3", "input_3", "input_3",
  62. "input_4", "input_4", "input_4")), class = c("tbl_df", "tbl", "data.frame"
  63. ), row.names = c(NA, -12L))

答案1

得分: 1

如果我理解你的意思正确:purrr::map_df() 函数具有一个很好的特性,可以识别最终 data.frames 中的输入列表(最好是有命名的):

  1. library(dplyr)
  2. library(tidyr)
  3. # dropping V5 as it will be "automatically" computed
  4. result = function(input, teams) {
  5. data = teams %>%
  6. separate_rows(V2) %>%
  7. left_join(input, by = c("V2" = "V2")) %>%
  8. replace_na(list(V4 = 0)) %>%
  9. group_by(V1.x) %>%
  10. fill(V1.y, V3) %>%
  11. summarize(V1.y = first(V1.y),
  12. V2 = paste(V2, collapse = " + "),
  13. V3 = first(V3),
  14. V4 = sum(V4)
  15. # we do not need V5 here anymore
  16. )
  17. return(data)
  18. }
  19. l_objects <- ls()
  20. input_objects <- grep("^input_\\d", l_objects, value = TRUE)
  21. input_test <- lapply(input_objects, get)
  22. # name the object list to use the name for identification
  23. names(input_test) <- input_objects
  24. # use purrr map to data.frame with the .id feature
  25. purrr::map_df(input_test, ~result(.x, teams), .id = "V5")

请注意,这也可以在不命名列表的情况下工作,尽管你只会得到列表项的编号,这可能不够。

此外,plyr::ldply 函数也可以用于绑定命名列表,生成一个新列,其中包含结果数据帧中的列表名称。

英文:

If I understand you correctly: the purrr::map_df() function has a nice feature to identify inputs lists (ideally named) in final data.frames:

  1. library(dplyr)
  2. library(tidyr)
  3. # dropping V5 as it will be &quot;automatically&quot; computed
  4. result = function(input, teams) {
  5. data = teams %&gt;%
  6. separate_rows(V2) %&gt;%
  7. left_join(input, by = c(&quot;V2&quot; = &quot;V2&quot;)) %&gt;%
  8. replace_na(list(V4 = 0)) %&gt;%
  9. group_by(V1.x) %&gt;%
  10. fill(V1.y, V3) %&gt;%
  11. summarize(V1.y = first(V1.y),
  12. V2 = paste(V2, collapse = &quot; + &quot;),
  13. V3 = first(V3),
  14. V4 = sum(V4)
  15. # we do not need V5 here anymore
  16. )
  17. return(data)
  18. }
  19. l_objects &lt;- ls()
  20. input_objects &lt;- grep(&quot;^input_\\d&quot;, all_objects, value = T)
  21. input_test &lt;- lapply(input_objects, get)
  22. # name the object list to use the name for identification
  23. names(input_test) &lt;- input_objects
  24. # use purrr map to data.frame with the .id feature
  25. purrr::map_df(input_test, ~result(.x, teams), .id = &quot;V5&quot;)
  26. V5 V1.x V1.y V2 V3 V4
  27. &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt;
  28. 1 input_1 team1 Team_2022 Mary + Frank Sydney 131
  29. 2 input_1 team2 Team_2022 Mary + John Sydney 90
  30. 3 input_1 team3 Team_2022 Mary + Bill Sydney 76
  31. 4 input_2 team1 Team_2023 Mary + Frank Sydney 23
  32. 5 input_2 team2 Team_2023 Mary + John Sydney 33
  33. 6 input_2 team3 Team_2023 Mary + Bill Sydney 136
  34. 7 input_3 team1 Team_2024 Mary + Frank Sydney 26
  35. 8 input_3 team2 Team_2024 Mary + John Sydney 19
  36. 9 input_3 team3 Team_2024 Mary + Bill Sydney 71
  37. 10 input_4 team1 Team_2025 Mary + Frank Sydney 90
  38. 11 input_4 team2 Team_2025 Mary + John Sydney 132
  39. 12 input_4 team3 Team_2025 Mary + Bill Sydney 44

Note that this works without naming the list aswell, though you will only get the list item number, which might be insufficient.

Also the plyr::ldply function can be used to bind named lists, generating a new column with the list names in the result data.frame.

答案2

得分: 1

不使用deparse/substitute,而是在result函数中为名称创建一个参数,然后使用它。

  1. library(dplyr)
  2. library(purrr)
  3. library(tidyr)
  4. result <- function(input, teams, inputnm) {
  5. data = teams %>%
  6. separate_rows(V2) %>%
  7. left_join(input, by = c("V2" = "V2")) %>%
  8. replace_na(list(V4 = 0)) %>%
  9. group_by(V1.x) %>%
  10. fill(V1.y, V3) %>%
  11. summarize(V1.y = first(V1.y),
  12. V2 = paste(V2, collapse = " + "),
  13. V3 = first(V3),
  14. V4 = sum(V4),
  15. V5 = inputnm)
  16. return(data)
  17. }

测试

  1. input_test %>%
  2. pull(input_objects) %>%
  3. mget(inherits = TRUE) %>%
  4. imap_dfr(~ result(.x, teams, .y))

输出

  1. # A tibble: 12 × 6
  2. V1.x V1.y V2 V3 V4 V5
  3. <chr> <chr> <chr> <chr> <dbl> <chr>
  4. 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
  5. 2 team2 Team_2022 Mary + John Sydney 90 input_1
  6. 3 team3 Team_2022 Mary + Bill Sydney 76 input_1
  7. 4 team1 Team_2023 Mary + Frank Sydney 23 input_2
  8. 5 team2 Team_2023 Mary + John Sydney 33 input_2
  9. 6 team3 Team_2023 Mary + Bill Sydney 136 input_2
  10. 7 team1 Team_2024 Mary + Frank Sydney 26 input_3
  11. 8 team2 Team_2024 Mary + John Sydney 19 input_3
  12. 9 team3 Team_2024 Mary + Bill Sydney 71 input_3
  13. 10 team1 Team_2025 Mary + Frank Sydney 90 input_4
  14. 11 team2 Team_2025 Mary + John Sydney 132 input_4
  15. 12 team3 Team_2025 Mary + Bill Sydney 44 input_4

这是您提供的代码的翻译。

英文:

Instead of doing the deparse/substitute, create an argument in result for the names as well, and then use that

  1. library(dplyr)
  2. library(purrr)
  3. library(tidyr)
  4. result &lt;- function(input, teams, inputnm) {
  5. data = teams %&gt;%
  6. separate_rows(V2) %&gt;%
  7. left_join(input, by = c(&quot;V2&quot; = &quot;V2&quot;)) %&gt;%
  8. replace_na(list(V4 = 0)) %&gt;%
  9. group_by(V1.x) %&gt;% fill(V1.y, V3) %&gt;%
  10. summarize(V1.y = first(V1.y),
  11. V2 = paste(V2, collapse = &quot; + &quot;),
  12. V3 = first(V3),
  13. V4 = sum(V4),
  14. V5 = inputnm)
  15. return(data)
  16. }

-testing

  1. input_test %&gt;%
  2. pull(input_objects) %&gt;%
  3. mget(inherits = TRUE) %&gt;%
  4. imap_dfr(~ result(.x, teams, .y))

-output

  1. # A tibble: 12 &#215; 6
  2. V1.x V1.y V2 V3 V4 V5
  3. &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt; &lt;chr&gt;
  4. 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
  5. 2 team2 Team_2022 Mary + John Sydney 90 input_1
  6. 3 team3 Team_2022 Mary + Bill Sydney 76 input_1
  7. 4 team1 Team_2023 Mary + Frank Sydney 23 input_2
  8. 5 team2 Team_2023 Mary + John Sydney 33 input_2
  9. 6 team3 Team_2023 Mary + Bill Sydney 136 input_2
  10. 7 team1 Team_2024 Mary + Frank Sydney 26 input_3
  11. 8 team2 Team_2024 Mary + John Sydney 19 input_3
  12. 9 team3 Team_2024 Mary + Bill Sydney 71 input_3
  13. 10 team1 Team_2025 Mary + Frank Sydney 90 input_4
  14. 11 team2 Team_2025 Mary + John Sydney 132 input_4
  15. 12 team3 Team_2025 Mary + Bill Sydney 44 input_4

答案3

得分: 0

如果您一次只有一个输入,简单地将分配V5的行移到您的summarizedplyr管道之外就可以了:

  1. result = function(input, teams) {
  2. data = teams %>%
  3. separate_rows(V2) %>%
  4. left_join(input, by = c("V2" = "V2")) %>%
  5. replace_na(list(V4 = 0)) %>%
  6. group_by(V1.x) %>% fill(V1.y, V3) %>%
  7. summarize(V1.y = first(V1.y),
  8. V2 = paste(V2, collapse = " + "),
  9. V3 = first(V3),
  10. V4 = sum(V4),
  11. #V5 = print(deparse(substitute(input)))
  12. )
  13. data$V5 <- deparse(substitute(input))
  14. return(data)
  15. }
  16. result(input_1, teams)
  17. # V1.x V1.y V2 V3 V4 V5
  18. # <chr> <chr> <chr> <chr> <dbl> <chr>
  19. # 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
  20. # 2 team2 Team_2022 Mary + John Sydney 90 input_1
  21. # 3 team3 Team_2022 Mary + Bill Sydney 76 input_1

但您使用列表的方式有些挑战(对于该代码会输出X[[i]])。

为了解决这个问题,使用lapply,我建议简单地添加一个额外的输入来获取名称,并对lapply函数进行额外的调整以适应它:

  1. result = function(input, teams, nme) {
  2. data = teams %>%
  3. separate_rows(V2) %>%
  4. left_join(input, by = c("V2" = "V2")) %>%
  5. replace_na(list(V4 = 0)) %>%
  6. group_by(V1.x) %>% fill(V1.y, V3) %>%
  7. summarize(V1.y = first(V1.y),
  8. V2 = paste(V2, collapse = " + "),
  9. V3 = first(V3),
  10. V4 = sum(V4),
  11. #V5 = print(deparse(substitute(input)))
  12. )
  13. data$V5 <- nme
  14. return(data)
  15. }
  16. all_objects <- ls()
  17. input_objects <- grep("^input_\\d", all_objects, value = T)
  18. input_test <- lapply(input_objects, get)
  19. # 添加分配名称给列表
  20. names(input_test) <- input_objects
  21. output = lapply(input_objects, function(x) result(input_test[[x]], teams, nme = x)) %>%
  22. bind_rows()
  23. # V1.x V1.y V2 V3 V4 V5
  24. # <chr> <chr> <chr> <chr> <dbl> <chr>
  25. # 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
  26. # 2 team2 Team_2022 Mary + John Sydney 90 input_1
  27. # 3 team3 Team_2022 Mary + Bill Sydney 76 input_1
  28. # 4 team1 Team_2023 Mary + Frank Sydney 23 input_2
  29. # 5 team2 Team_2023 Mary + John Sydney 33 input_2
  30. # 6 team3 Team_2023 Mary + Bill Sydney 136 input_2
  31. # 7 team1 Team_2024 Mary + Frank Sydney 26 input_3
  32. # 8 team2 Team_2024 Mary + John Sydney 19 input_3
  33. # 9 team3 Team_2024 Mary + Bill Sydney 71 input_3
  34. # 10 team1 Team_2025 Mary + Frank Sydney 90 input_4
  35. # 11 team2 Team_2025 Mary + John Sydney 132 input_4
  36. # 12 team3 Team_2025 Mary + Bill Sydney 44 input_4
英文:

If you just had one input at a time, simply moving the line for assigning V5 outside of your summarize and dplyr pipes would do it:

  1. result = function(input, teams) {
  2. data = teams %&gt;%
  3. separate_rows(V2) %&gt;%
  4. left_join(input, by = c(&quot;V2&quot; = &quot;V2&quot;)) %&gt;%
  5. replace_na(list(V4 = 0)) %&gt;%
  6. group_by(V1.x) %&gt;% fill(V1.y, V3) %&gt;%
  7. summarize(V1.y = first(V1.y),
  8. V2 = paste(V2, collapse = &quot; + &quot;),
  9. V3 = first(V3),
  10. V4 = sum(V4),
  11. #V5 = print(deparse(substitute(input)))
  12. )
  13. data$V5 &lt;- deparse(substitute(input))
  14. return(data)
  15. }
  16. result(input_1, teams)
  17. # V1.x V1.y V2 V3 V4 V5
  18. # &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt; &lt;chr&gt;
  19. # 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
  20. # 2 team2 Team_2022 Mary + John Sydney 90 input_1
  21. # 3 team3 Team_2022 Mary + Bill Sydney 76 input_1

But your use of lists makes that a bit challenging (it will output X[[i]] for that code).

To address this for using lapply, I would suggest simply adding an additional input that takes the name and simply assigning it that name, with additional tweaks to the lapply function to accommodate it:

  1. result = function(input, teams, nme) {
  2. data = teams %&gt;%
  3. separate_rows(V2) %&gt;%
  4. left_join(input, by = c(&quot;V2&quot; = &quot;V2&quot;)) %&gt;%
  5. replace_na(list(V4 = 0)) %&gt;%
  6. group_by(V1.x) %&gt;% fill(V1.y, V3) %&gt;%
  7. summarize(V1.y = first(V1.y),
  8. V2 = paste(V2, collapse = &quot; + &quot;),
  9. V3 = first(V3),
  10. V4 = sum(V4),
  11. #V5 = print(deparse(substitute(input)))
  12. )
  13. data$V5 &lt;- nme
  14. return(data)
  15. }
  16. all_objects &lt;- ls()
  17. input_objects &lt;- grep(&quot;^input_\\d&quot;, all_objects, value = T)
  18. input_test &lt;- lapply(input_objects, get)
  19. # add in assigning names to the list
  20. names(input_test) &lt;- input_objects
  21. output = lapply(input_objects, function(x) result(input_test[[x]], teams, nme = x)) %&gt;%
  22. bind_rows()
  23. # V1.x V1.y V2 V3 V4 V5
  24. # &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;dbl&gt; &lt;chr&gt;
  25. # 1 team1 Team_2022 Mary + Frank Sydney 131 input_1
  26. # 2 team2 Team_2022 Mary + John Sydney 90 input_1
  27. # 3 team3 Team_2022 Mary + Bill Sydney 76 input_1
  28. # 4 team1 Team_2023 Mary + Frank Sydney 23 input_2
  29. # 5 team2 Team_2023 Mary + John Sydney 33 input_2
  30. # 6 team3 Team_2023 Mary + Bill Sydney 136 input_2
  31. # 7 team1 Team_2024 Mary + Frank Sydney 26 input_3
  32. # 8 team2 Team_2024 Mary + John Sydney 19 input_3
  33. # 9 team3 Team_2024 Mary + Bill Sydney 71 input_3
  34. # 10 team1 Team_2025 Mary + Frank Sydney 90 input_4
  35. # 11 team2 Team_2025 Mary + John Sydney 132 input_4
  36. # 12 team3 Team_2025 Mary + Bill Sydney 44 input_4

huangapple
  • 本文由 发表于 2023年2月6日 06:31:13
  • 转载请务必保留本文链接:https://go.coder-hub.com/75355948.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定