2023年6月6日 02:43:27go评论84阅读模式

英文:

How do we store a name and id function in For Loop in R

问题

I am trying to create a for loop that iterates over one variable (customer_id) and stores a second variable (cust_nm) for writing the file name to excel and included the cust_nm in the file name.

library(tidyverse)
library(janitor)

df.1 <- tribble(
~customer_id, ~cust_nm  ,~period, ~cost1, ~cost2 ,
'cust1', 'pizzaco', '202201', 5, 10,
'cust2', 'sodaco','202202', 5, 10,
'cust1', 'pizzaco', '202203', 5, 10,
'cust2', 'sodaco', '202204', 5, 10,
)

cust_list <- df.1 %>% 
  select(customer_id,cust_nm) %>% 
  unique()

for (i in 1:length(cust_list)) {
  test <- df.1 %>% filter(customer_id == cust_list[i][,1]) %>% 
    group_by(customer_id, cust_nm, period) %>% 
    summarise(cost1 = sum(cost1, na.rm = T),
              cost2 = sum(cost2, na.rm = T),
              total = cost1 + cost2) %>% 
    adorn_totals(where = 'row') 

  writexl::write_xlsx(test, paste0("test_",cust_list[i][,2],".xlsx"))
}

The output expected would be 2 files with appropriate data labeled "test_pizzaco.xlsx" and "test_sodaco.xlsx".

I also tried seq_along because I understand that should allow to store the index and name, but was not able to create a working solution with that either.

Your advice is appreciated.

英文:

library(tidyverse)
library(janitor)

df.1 &lt;- tribble(
~customer_id, ~cust_nm  ,~period, ~cost1, ~cost2 ,
&#39;cust1&#39;, &#39;pizzaco&#39;, &#39;202201&#39;, 5, 10,
&#39;cust2&#39;, &#39;sodaco&#39;,&#39;202202&#39;, 5, 10,
&#39;cust1&#39;, &#39;pizzaco&#39;, &#39;202203&#39;, 5, 10,
&#39;cust2&#39;, &#39;sodaco&#39;, &#39;202204&#39;, 5, 10,
)

cust_list &lt;- df.1 %&gt;% 
  select(customer_id,cust_nm) %&gt;% 
  unique()


for (i in 1:length(cust_list)) {
test &lt;- df.1 %&gt;% filter(customer_id == cust_list[i][,1]) %&gt;% 
  group_by(customer_id, cust_nm, period) %&gt;% 
  summarise(cost1 = sum(cost1, na.rm = T),
            cost2 = sum(cost2, na.rm = T),
            total = cost1 + cost2) %&gt;% 
  adorn_totals(where = &#39;row&#39;) 

writexl::write_xlsx(test, paste0(&quot;test_&quot;,cust_list[i][,2],&quot;.xlsx&quot;))
}

The output expected would be 2 files with appropriate data labeled "test_pizzaco.xlsx" and "test_sodaco.xlsx"

I also tried seq_along because I understand that should allow to store the index and name, but was not able to create a working solution with that either.

Your advice is appreciated.

答案1

得分: 1

你可以使用 purrr::map2() 来代替 for 循环。

  library(tidyverse)
  library(janitor)
  
df.1 <- tribble(
  ~customer_id, ~cust_nm  ,~period, ~cost1, ~cost2 ,
  'cust1', 'pizzaco', '202201', 5, 10,
  'cust2', 'sodaco','202202', 5, 10,
  'cust1', 'pizzaco', '202203', 5, 10,
  'cust2', 'sodaco', '202204', 5, 10,
)

cust_list <- 
  df.1 %>%
  select(customer_id,cust_nm) %>%
  unique()

map2(cust_list$customer_id,
     cust_list$cust_nm,
     \(x, y) {
       out <- df.1 %>%
         filter(customer_id == x) %>%
         group_by(customer_id, cust_nm, period) %>%
         summarise(
           cost1 = sum(cost1, na.rm = T),
           cost2 = sum(cost2, na.rm = T),
           total = cost1 + cost2,
           .groups = "keep"
         ) %>%
         adorn_totals(where = 'row')
       
       writexl::write_xlsx(out, paste0("test_", y , ".xlsx"))
       
       out
     })
#> [[1]]
#>  customer_id cust_nm period cost1 cost2 total
#>        cust1 pizzaco 202201     5    10    15
#>        cust1 pizzaco 202203     5    10    15
#>        Total       -      -    10    20    30
#> 
#> [[2]]
#>  customer_id cust_nm period cost1 cost2 total
#>        cust2  sodaco 202202     5    10    15
#>        cust2  sodaco 202204     5    10    15
#>        Total       -      -    10    20    30

英文:

You could use purrr::map2() for this instead of a for-loop.

  library(tidyverse)
  library(janitor)
  
df.1 &lt;- tribble(
  ~customer_id, ~cust_nm  ,~period, ~cost1, ~cost2 ,
  &#39;cust1&#39;, &#39;pizzaco&#39;, &#39;202201&#39;, 5, 10,
  &#39;cust2&#39;, &#39;sodaco&#39;,&#39;202202&#39;, 5, 10,
  &#39;cust1&#39;, &#39;pizzaco&#39;, &#39;202203&#39;, 5, 10,
  &#39;cust2&#39;, &#39;sodaco&#39;, &#39;202204&#39;, 5, 10,
)

cust_list &lt;- 
  df.1 %&gt;% 
  select(customer_id,cust_nm) %&gt;% 
  unique()

map2(cust_list$customer_id,
     cust_list$cust_nm,
     \(x, y) {
       out &lt;- df.1 |&gt;
         filter(customer_id == x) |&gt;
         group_by(customer_id, cust_nm, period) |&gt;
         summarise(
           cost1 = sum(cost1, na.rm = T),
           cost2 = sum(cost2, na.rm = T),
           total = cost1 + cost2,
           .groups = &quot;keep&quot;
         ) |&gt;
         adorn_totals(where = &#39;row&#39;)
       
       writexl::write_xlsx(out, paste0(&quot;test_&quot;, y , &quot;.xlsx&quot;))
       
       out
     })
#&gt; [[1]]
#&gt;  customer_id cust_nm period cost1 cost2 total
#&gt;        cust1 pizzaco 202201     5    10    15
#&gt;        cust1 pizzaco 202203     5    10    15
#&gt;        Total       -      -    10    20    30
#&gt; 
#&gt; [[2]]
#&gt;  customer_id cust_nm period cost1 cost2 total
#&gt;        cust2  sodaco 202202     5    10    15
#&gt;        cust2  sodaco 202204     5    10    15
#&gt;        Total       -      -    10    20    30

答案2

得分: 1

以下是翻译好的部分：

首先，length(cust_list) 将循环遍历列数，而不是我认为你想要的行数。在这种情况下，它们的数量是相同的，但在这个示例中，这可能不经常成立。相反，使用 for (i in 1:nrow(cust_list))。

其次，我建议简化你的 cust_list[][] 命名方式，只引用你需要的值。

例如，在你的代码中，当 i <- 1 时：

df.1 %>%
  filter(customer_id == cust_list[i][,1])
# 错误：filter(., customer_id == cust_list[i][, 1]) : 
#   ℹ 在参数中：`customer_id == cust_list[i][, 1]`。
# 由错误引起：
# ! `..1` 的大小必须为 4 或 1，而不是 2。

但这个可以正常工作：

df.1 %>%
  filter(customer_id == cust_list$customer_id[i])
# # 一个数据框：2 × 5
#   customer_id cust_nm period cost1 cost2
#   <chr>       <chr>   <chr>  <dbl> <dbl>
# 1 cust1       pizzaco 202201     5    10
# 2 cust1       pizzaco 202203     5    10

（类似的情况也适用于 paste0。）

我认为你的完整代码应该是：

for (i in 1:nrow(cust_list)) {
test <- df.1 %>%
  filter(customer_id == cust_list$customer_id[i]) %>%
  group_by(customer_id, cust_nm, period) %>%
  summarise(cost1 = sum(cost1, na.rm = T),
            cost2 = sum(cost2, na.rm = T),
            total = cost1 + cost2) %>%
  adorn_totals(where = 'row') 

writexl::write_xlsx(test, paste0("test_", cust_list$cust_nm[i], ".xlsx"))
}

英文:

First, length(cust_list) is going to iterate over the number of columns, not the number of rows as I expect you're meaning to do. In this case it is the same number, but it's a convenience in this example that likely doesn't hold true very often. Instead, use for (i in 1:nrow(cust_list)).

Second, I suggest you simplify your cust_list[][] nomenclature to just reference the values you need.

For example, in your code, when i <- 1:

df.1 %&gt;%
  filter(customer_id == cust_list[i][,1])
# Error in filter(., customer_id == cust_list[i][, 1]) : 
#   ℹ In argument: `customer_id == cust_list[i][, 1]`.
# Caused by error:
# ! `..1` must be of size 4 or 1, not size 2.

but this works:

df.1 %&gt;%
  filter(customer_id == cust_list$customer_id[i])
# # A tibble: 2 &#215; 5
#   customer_id cust_nm period cost1 cost2
#   &lt;chr&gt;       &lt;chr&gt;   &lt;chr&gt;  &lt;dbl&gt; &lt;dbl&gt;
# 1 cust1       pizzaco 202201     5    10
# 2 cust1       pizzaco 202203     5    10

(Similarly for the paste0.)

I think your full code should be:

for (i in 1:nrow(cust_list)) {
test &lt;- df.1 %&gt;% filter(customer_id == cust_list$customer_id[i]) %&gt;% 
  group_by(customer_id, cust_nm, period) %&gt;% 
  summarise(cost1 = sum(cost1, na.rm = T),
            cost2 = sum(cost2, na.rm = T),
            total = cost1 + cost2) %&gt;% 
  adorn_totals(where = &#39;row&#39;) 

writexl::write_xlsx(test, paste0(&quot;test_&quot;,cust_list$cust_nm[i],&quot;.xlsx&quot;))
}

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

如何在R中使用for循环存储名称和ID函数。

问题

答案1

答案2

用`ggeffects::ggpredict`控制网格。

how to add a box containing text (mean=….,sigma=…) in the left side and a horizontally laied histogramm in the right side of a plot in r?

purrr::pluck() 和 purrr::map() 的第二个参数类型为 double。

相对于图例中的文本，符号的垂直调整

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论