如何在R中使用for循环存储名称和ID函数。

huangapple go评论68阅读模式
英文:

How do we store a name and id function in For Loop in R

问题

I am trying to create a for loop that iterates over one variable (customer_id) and stores a second variable (cust_nm) for writing the file name to excel and included the cust_nm in the file name.

library(tidyverse)
library(janitor)

df.1 <- tribble(
~customer_id, ~cust_nm  ,~period, ~cost1, ~cost2 ,
'cust1', 'pizzaco', '202201', 5, 10,
'cust2', 'sodaco','202202', 5, 10,
'cust1', 'pizzaco', '202203', 5, 10,
'cust2', 'sodaco', '202204', 5, 10,
)

cust_list <- df.1 %>% 
  select(customer_id,cust_nm) %>% 
  unique()

for (i in 1:length(cust_list)) {
  test <- df.1 %>% filter(customer_id == cust_list[i][,1]) %>% 
    group_by(customer_id, cust_nm, period) %>% 
    summarise(cost1 = sum(cost1, na.rm = T),
              cost2 = sum(cost2, na.rm = T),
              total = cost1 + cost2) %>% 
    adorn_totals(where = 'row') 

  writexl::write_xlsx(test, paste0("test_",cust_list[i][,2],".xlsx"))
}

The output expected would be 2 files with appropriate data labeled "test_pizzaco.xlsx" and "test_sodaco.xlsx".

I also tried seq_along because I understand that should allow to store the index and name, but was not able to create a working solution with that either.

Your advice is appreciated.

英文:

I am trying to create a for loop that iterates over one variable (customer_id) and stores a second variable (cust_nm) for writing the file name to excel and included the cust_nm in the file name.

library(tidyverse)
library(janitor)

df.1 &lt;- tribble(
~customer_id, ~cust_nm  ,~period, ~cost1, ~cost2 ,
&#39;cust1&#39;, &#39;pizzaco&#39;, &#39;202201&#39;, 5, 10,
&#39;cust2&#39;, &#39;sodaco&#39;,&#39;202202&#39;, 5, 10,
&#39;cust1&#39;, &#39;pizzaco&#39;, &#39;202203&#39;, 5, 10,
&#39;cust2&#39;, &#39;sodaco&#39;, &#39;202204&#39;, 5, 10,
)

cust_list &lt;- df.1 %&gt;% 
  select(customer_id,cust_nm) %&gt;% 
  unique()


for (i in 1:length(cust_list)) {
test &lt;- df.1 %&gt;% filter(customer_id == cust_list[i][,1]) %&gt;% 
  group_by(customer_id, cust_nm, period) %&gt;% 
  summarise(cost1 = sum(cost1, na.rm = T),
            cost2 = sum(cost2, na.rm = T),
            total = cost1 + cost2) %&gt;% 
  adorn_totals(where = &#39;row&#39;) 

writexl::write_xlsx(test, paste0(&quot;test_&quot;,cust_list[i][,2],&quot;.xlsx&quot;))
}

The output expected would be 2 files with appropriate data labeled "test_pizzaco.xlsx" and "test_sodaco.xlsx"

I also tried seq_along because I understand that should allow to store the index and name, but was not able to create a working solution with that either.

Your advice is appreciated.

答案1

得分: 1

你可以使用 purrr::map2() 来代替 for 循环。

  library(tidyverse)
  library(janitor)
  
df.1 <- tribble(
  ~customer_id, ~cust_nm  ,~period, ~cost1, ~cost2 ,
  'cust1', 'pizzaco', '202201', 5, 10,
  'cust2', 'sodaco','202202', 5, 10,
  'cust1', 'pizzaco', '202203', 5, 10,
  'cust2', 'sodaco', '202204', 5, 10,
)

cust_list <- 
  df.1 %>%
  select(customer_id,cust_nm) %>%
  unique()

map2(cust_list$customer_id,
     cust_list$cust_nm,
     \(x, y) {
       out <- df.1 %>%
         filter(customer_id == x) %>%
         group_by(customer_id, cust_nm, period) %>%
         summarise(
           cost1 = sum(cost1, na.rm = T),
           cost2 = sum(cost2, na.rm = T),
           total = cost1 + cost2,
           .groups = "keep"
         ) %>%
         adorn_totals(where = 'row')
       
       writexl::write_xlsx(out, paste0("test_", y , ".xlsx"))
       
       out
     })
#> [[1]]
#>  customer_id cust_nm period cost1 cost2 total
#>        cust1 pizzaco 202201     5    10    15
#>        cust1 pizzaco 202203     5    10    15
#>        Total       -      -    10    20    30
#> 
#> [[2]]
#>  customer_id cust_nm period cost1 cost2 total
#>        cust2  sodaco 202202     5    10    15
#>        cust2  sodaco 202204     5    10    15
#>        Total       -      -    10    20    30
英文:

You could use purrr::map2() for this instead of a for-loop.

  library(tidyverse)
  library(janitor)
  
df.1 &lt;- tribble(
  ~customer_id, ~cust_nm  ,~period, ~cost1, ~cost2 ,
  &#39;cust1&#39;, &#39;pizzaco&#39;, &#39;202201&#39;, 5, 10,
  &#39;cust2&#39;, &#39;sodaco&#39;,&#39;202202&#39;, 5, 10,
  &#39;cust1&#39;, &#39;pizzaco&#39;, &#39;202203&#39;, 5, 10,
  &#39;cust2&#39;, &#39;sodaco&#39;, &#39;202204&#39;, 5, 10,
)

cust_list &lt;- 
  df.1 %&gt;% 
  select(customer_id,cust_nm) %&gt;% 
  unique()

map2(cust_list$customer_id,
     cust_list$cust_nm,
     \(x, y) {
       out &lt;- df.1 |&gt;
         filter(customer_id == x) |&gt;
         group_by(customer_id, cust_nm, period) |&gt;
         summarise(
           cost1 = sum(cost1, na.rm = T),
           cost2 = sum(cost2, na.rm = T),
           total = cost1 + cost2,
           .groups = &quot;keep&quot;
         ) |&gt;
         adorn_totals(where = &#39;row&#39;)
       
       writexl::write_xlsx(out, paste0(&quot;test_&quot;, y , &quot;.xlsx&quot;))
       
       out
     })
#&gt; [[1]]
#&gt;  customer_id cust_nm period cost1 cost2 total
#&gt;        cust1 pizzaco 202201     5    10    15
#&gt;        cust1 pizzaco 202203     5    10    15
#&gt;        Total       -      -    10    20    30
#&gt; 
#&gt; [[2]]
#&gt;  customer_id cust_nm period cost1 cost2 total
#&gt;        cust2  sodaco 202202     5    10    15
#&gt;        cust2  sodaco 202204     5    10    15
#&gt;        Total       -      -    10    20    30

答案2

得分: 1

以下是翻译好的部分:

首先,length(cust_list) 将循环遍历 列数,而不是我认为你想要的行数。在这种情况下,它们的数量是相同的,但在这个示例中,这可能不经常成立。相反,使用 for (i in 1:nrow(cust_list))

其次,我建议简化你的 cust_list[][] 命名方式,只引用你需要的值。

例如,在你的代码中,当 i <- 1 时:

df.1 %>%
  filter(customer_id == cust_list[i][,1])
# 错误:filter(., customer_id == cust_list[i][, 1]) : 
#   ℹ 在参数中:`customer_id == cust_list[i][, 1]`。
# 由错误引起:
# ! `..1` 的大小必须为 4 或 1,而不是 2。

但这个可以正常工作:

df.1 %>%
  filter(customer_id == cust_list$customer_id[i])
# # 一个数据框:2 × 5
#   customer_id cust_nm period cost1 cost2
#   <chr>       <chr>   <chr>  <dbl> <dbl>
# 1 cust1       pizzaco 202201     5    10
# 2 cust1       pizzaco 202203     5    10

(类似的情况也适用于 paste0。)

我认为你的完整代码应该是:

for (i in 1:nrow(cust_list)) {
test <- df.1 %>%
  filter(customer_id == cust_list$customer_id[i]) %>%
  group_by(customer_id, cust_nm, period) %>%
  summarise(cost1 = sum(cost1, na.rm = T),
            cost2 = sum(cost2, na.rm = T),
            total = cost1 + cost2) %>%
  adorn_totals(where = 'row') 

writexl::write_xlsx(test, paste0("test_", cust_list$cust_nm[i], ".xlsx"))
}
英文:

First, length(cust_list) is going to iterate over the number of columns, not the number of rows as I expect you're meaning to do. In this case it is the same number, but it's a convenience in this example that likely doesn't hold true very often. Instead, use for (i in 1:nrow(cust_list)).

Second, I suggest you simplify your cust_list[][] nomenclature to just reference the values you need.

For example, in your code, when i &lt;- 1:

df.1 %&gt;%
  filter(customer_id == cust_list[i][,1])
# Error in filter(., customer_id == cust_list[i][, 1]) : 
#   ℹ In argument: `customer_id == cust_list[i][, 1]`.
# Caused by error:
# ! `..1` must be of size 4 or 1, not size 2.

but this works:

df.1 %&gt;%
  filter(customer_id == cust_list$customer_id[i])
# # A tibble: 2 &#215; 5
#   customer_id cust_nm period cost1 cost2
#   &lt;chr&gt;       &lt;chr&gt;   &lt;chr&gt;  &lt;dbl&gt; &lt;dbl&gt;
# 1 cust1       pizzaco 202201     5    10
# 2 cust1       pizzaco 202203     5    10

(Similarly for the paste0.)

I think your full code should be:

for (i in 1:nrow(cust_list)) {
test &lt;- df.1 %&gt;% filter(customer_id == cust_list$customer_id[i]) %&gt;% 
  group_by(customer_id, cust_nm, period) %&gt;% 
  summarise(cost1 = sum(cost1, na.rm = T),
            cost2 = sum(cost2, na.rm = T),
            total = cost1 + cost2) %&gt;% 
  adorn_totals(where = &#39;row&#39;) 

writexl::write_xlsx(test, paste0(&quot;test_&quot;,cust_list$cust_nm[i],&quot;.xlsx&quot;))
}

huangapple
  • 本文由 发表于 2023年6月6日 02:43:27
  • 转载请务必保留本文链接:https://go.coder-hub.com/76409192.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定