在`map_if`函数中编写测试条件:对所有包含特定值的列进行函数应用。

huangapple go评论59阅读模式
英文:

writing test conditions in a map_if function : apply function on all dataframes with a column that includes specific values

问题

以下是您要翻译的代码部分:

Once again i'm struggling with map functions of purrr. 

I've got a list of dataframes,  all with ID and Name columns.

I want to perform some recoding and then aggregation on rows with some specific values.
For that purpose,  i've got another dataframe with a vector of ID and newID that I want to replace before doing some aggregration (sum all numeric variable). 

I know how to perform this on one df (see II/), but I don't know what test to write in a map_if function to apply thoses operations on all dataframes where column ID includes some values of new newIDdf$ID (here dataframe B and C).

Any ideas ? 

## I/  2 objects 
 # a list of df
 list_df <- list(A = data.frame(ID = c("a", "b", "c", "Z", "Y"),
                                Name = c("a_name", "b_name", "c_name", "Z_name", "Y_name"), 
                                Var1 = rnorm(5),
                                Var2 = rnorm(5),
                                Var3 = rnorm(5)),
                B = data.frame(ID = c("a", "b", "z1", "z2", "z3"),
                               Name = c("a_name", "b_name", "z1_name", "z2_name", "z3_name"),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)),
                C = data.frame(ID =  c("y1", "y2", "z1", "z2", "z3"),
                               Name = c("y1_name", "y2_name", "z1_name", "z2_name", "z3_name"),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)))
 
 # a dataframe of correspondance for aggregation operations
 newIDdf <- data.frame(ID =  c("y1", "y2", "z1", "z2", "z3"),
                       IDagr =  c("Y", "Y", "Z", "Z", "Z"))
 
 ## II/ what I want to do (but on 1 df)
# example on 1 df
 
 On1df <- list_df[["B"]] %>% 
   mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
                           .init= ID, 
                           str_replace)) %>%
   mutate(Name = case_when(ID == "Z" ~ "Z_name",
                         ID == "Y" ~ "Y_name",
                             TRUE ~ Name)) %>%
   group_by(ID) %>% 
   mutate_if(is.numeric, ~list(. = sum(.))) %>% 
   distinct(ID, .keep_all = TRUE)

## III/ What I really want to achieve
 # what if I want to do that simultaneously on df B and C 
 # I mean applying thoses operations on dataframes 
# where column ID includes some values of new newIDdf$ID
 
 list_df_output <- list_df %>% map_if( .p = ~ any(ID %in% newIDdf$ID), ### what test to put here ? (because this doesn't work)
                                       ~ mutate(.x, ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
                                                             .init= ID, 
                                                             str_replace)) %>%
                                         mutate(.,Name = case_when(ID == "Z" ~ "Z_name",
                                                                 ID == "Y" ~ "Y_name",
                                                                 TRUE ~ Name)) %>%
                                         group_by(., ID) %>% 
                                         mutate_if(., is.numeric, ~list(. = sum(.))) %>% 
                                         distinct(., ID, .keep_all = TRUE) )

希望这有所帮助。

英文:

Once again i'm struggling with map functions of purrr.

I've got a list of dataframes, all with ID and Name columns.

I want to perform some recoding and then aggregation on rows with some specific values.
For that purpose, i've got another dataframe with a vector of ID and newID that I want to replace before doing some aggregration (sum all numeric variable).

I know how to perform this on one df (see II/), but I don't know what test to write in a map_if function to apply thoses operations on all dataframes where column ID includes some values of new newIDdf$ID (here dataframe B and C).

Any ideas ?

## I/  2 objects 
 # a list of df
 list_df <- list(A = data.frame(ID = c("a", "b", "c", "Z", "Y"),
                                Name = c("a_name", "b_name", "c_name", "Z_name", "Y_name"), 
                                Var1 = rnorm(5),
                                Var2 = rnorm(5),
                                Var3 = rnorm(5)),
                B = data.frame(ID = c("a", "b", "z1", "z2", "z3"),
                               Name = c("a_name", "b_name", "z1_name", "z2_name", "z3_name"),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)),
                C = data.frame(ID =  c("y1", "y2", "z1", "z2", "z3"),
                               Name = c("y1_name", "y2_name", "z1_name", "z2_name", "z3_name"),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)))
 
 # a dataframe of correspondance for aggregation operations
 newIDdf <- data.frame(ID =  c("y1", "y2", "z1", "z2", "z3"),
                       IDagr =  c("Y", "Y", "Z", "Z", "Z"))
 
 ## II/ what I want to do (but on 1 df)
# example on 1 df
 
 On1df <- list_df[["B"]] %>% 
   mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
                           .init= ID, 
                           str_replace)) %>%
   mutate(Name = case_when(ID == "Z" ~ "Z_name",
                         ID == "Y" ~ "Y_name",
                             TRUE ~ Name)) %>%
   group_by(ID) %>% 
   mutate_if(is.numeric, ~list(. = sum(.))) %>% 
   distinct(ID, .keep_all = TRUE)

## III/ What I really want to achieve
 # what if I want to do that simultaneously on df B and C 
 # I mean applying thoses operations on dataframes 
# where column ID includes some values of new newIDdf$ID
 
 list_df_output <- list_df %>% map_if( .p = ~ any(ID %in% newIDdf$ID), ### what test to put here ? (because this doesn't work)
                                       ~ mutate(.x, ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
                                                             .init= ID, 
                                                             str_replace)) %>%
                                         mutate(.,Name = case_when(ID == "Z" ~ "Z_name",
                                                                 ID == "Y" ~ "Y_name",
                                                                 TRUE ~ Name)) %>%
                                         group_by(., ID) %>% 
                                         mutate_if(., is.numeric, ~list(. = sum(.))) %>% 
                                         distinct(., ID, .keep_all = TRUE) )

答案1

得分: 3

我不确定下面的方法是否能够产生您期望的输出。另外,我们不需要使用map_if,我们可以使用across(where())。此外,我们也不需要使用reduce2(),而是可以将一个查找向量(使用set_names()创建)传递给str_replace_all()

library(dplyr)
library(purrr)
library(stringr)

list_df %>%
  map_if(~ any(.x$ID %in% newIDdf$ID),
         ~ .x %>%
           mutate(ID = str_replace_all(ID, set_names(newIDdf$IDagr, newIDdf$ID)),
                  Name = case_when(ID == "Z" ~ "Z_name",
                                   ID == "Y" ~ "Y_name",
                                   TRUE ~ Name)
                  ) %>%
  group_by(ID) %>%
  mutate(across(where(is.numeric), ~ sum(.))) %>%
  distinct(ID, .keep_all = TRUE)
  )

从 OP 的数据中:

list_df <- list(A = data.frame(ID = c("a", "b", "c", "Z", "Y"),
                               Name = c("a_name", "b_name", "c_name", "Z_name", "Y_name"), 
                               Var1 = rnorm(5),
                               Var2 = rnorm(5),
                               Var3 = rnorm(5)),
                B = data.frame(ID = c("a", "b", "z1", "z2", "z3"),
                               Name = c("a_name", "b_name", "z1_name", "z2_name", "z3_name"),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)),
                C = data.frame(ID =  c("y1", "y2", "z1", "z2", "z3"),
                               Name = c("y1_name", "y2_name", "z1_name", "z2_name", "z3_name"),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)))

# 用于聚合操作的对应关系数据框
newIDdf <- data.frame(ID =  c("y1", "y2", "z1", "z2", "z3"),
                      IDagr =  c("Y", "Y", "Z", "Z", "Z"))

创建于2023-03-03,使用 reprex 包 (v2.0.1)

英文:

I'm not sure if the approach below yields your desird output. Btw. we don't need map_if we can use across(where()). Also, we don't need reduce2() but can pass a lookup vector (below created with set_names()) to str_replace_all():

library(dplyr)
library(purrr)
library(stringr)


list_df %&gt;% 
  map_if(~ any(.x$ID %in% newIDdf$ID),
         ~ .x %&gt;% 
           mutate(ID = str_replace_all(ID, set_names(newIDdf$IDagr, newIDdf$ID)),
                  Name = case_when(ID == &quot;Z&quot; ~ &quot;Z_name&quot;,
                                   ID == &quot;Y&quot; ~ &quot;Y_name&quot;,
                                   TRUE ~ Name)
                  ) %&gt;%
  group_by(ID) %&gt;% 
  mutate(across(where(is.numeric), ~ sum(.))) %&gt;% 
  distinct(ID, .keep_all = TRUE)
  )

#&gt; $A
#&gt;   ID   Name       Var1       Var2       Var3
#&gt; 1  a a_name -0.9958825 -0.4822998 -0.5283220
#&gt; 2  b b_name  0.5309721  0.7133405 -1.1024029
#&gt; 3  c c_name -1.2049361  0.2681276  0.1179077
#&gt; 4  Z Z_name -0.7167132 -1.0513967 -1.5125656
#&gt; 5  Y Y_name -0.5056531  0.6273818  1.4781721
#&gt; 
#&gt; $B
#&gt; # A tibble: 3 x 4
#&gt; # Groups:   ID [3]
#&gt;   ID    Name     Var1  Var2
#&gt;   &lt;chr&gt; &lt;chr&gt;   &lt;dbl&gt; &lt;dbl&gt;
#&gt; 1 a     a_name -0.967  2.78
#&gt; 2 b     b_name -0.814  1.37
#&gt; 3 Z     Z_name  0.354  2.33
#&gt; 
#&gt; $C
#&gt; # A tibble: 2 x 4
#&gt; # Groups:   ID [2]
#&gt;   ID    Name    Var1   Var2
#&gt;   &lt;chr&gt; &lt;chr&gt;  &lt;dbl&gt;  &lt;dbl&gt;
#&gt; 1 Y     Y_name -2.71 -0.852
#&gt; 2 Z     Z_name -2.06 -1.52

Data from OP

list_df &lt;- list(A = data.frame(ID = c(&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, &quot;Z&quot;, &quot;Y&quot;),
                               Name = c(&quot;a_name&quot;, &quot;b_name&quot;, &quot;c_name&quot;, &quot;Z_name&quot;, &quot;Y_name&quot;), 
                               Var1 = rnorm(5),
                               Var2 = rnorm(5),
                               Var3 = rnorm(5)),
                B = data.frame(ID = c(&quot;a&quot;, &quot;b&quot;, &quot;z1&quot;, &quot;z2&quot;, &quot;z3&quot;),
                               Name = c(&quot;a_name&quot;, &quot;b_name&quot;, &quot;z1_name&quot;, &quot;z2_name&quot;, &quot;z3_name&quot;),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)),
                C = data.frame(ID =  c(&quot;y1&quot;, &quot;y2&quot;, &quot;z1&quot;, &quot;z2&quot;, &quot;z3&quot;),
                               Name = c(&quot;y1_name&quot;, &quot;y2_name&quot;, &quot;z1_name&quot;, &quot;z2_name&quot;, &quot;z3_name&quot;),
                               Var1 = rnorm(5),
                               Var2 = rnorm(5)))

# a dataframe of correspondance for aggregation operations
newIDdf &lt;- data.frame(ID =  c(&quot;y1&quot;, &quot;y2&quot;, &quot;z1&quot;, &quot;z2&quot;, &quot;z3&quot;),
                      IDagr =  c(&quot;Y&quot;, &quot;Y&quot;, &quot;Z&quot;, &quot;Z&quot;, &quot;Z&quot;))

<sup>Created on 2023-03-03 by the reprex package (v2.0.1)</sup>

答案2

得分: 1

你要这个吗?我还将您的 mutate_at 函数更改为使用 acrosswhere 的更新版本:

list_df |>
  map_if(~any(.x$ID %in%newIDdf$ID) , ~ .x |>
    mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
                        .init= ID, 
                        str_replace)) %>%
    mutate(Name = case_when(ID == "Z" ~ "Z_name",
                            ID == "Y" ~ "Y_name",
                            TRUE ~ Name)) %>%
    group_by(ID) %>%
    mutate(across(where(is.numeric), ~ sum(.))) %>%
    distinct(ID, .keep_all = TRUE))

输出

$A
ID Name Var1 Var2 Var3
1 a a_name 0.1015844 0.6306434 0.5058593
2 b b_name -0.1420690 0.5152645 0.2497879
3 c c_name 0.5841423 1.2883330 0.5297098
4 Z Z_name 1.6645565 0.2307524 -1.0418045
5 Y Y_name -0.1293767 -2.4152871 -0.1935843

$B

A tibble: 3 × 4

Groups: ID [3]

ID Name Var1 Var2

1 a a_name -0.512 -0.119
2 b b_name -2.14 -0.834
3 Z Z_name 0.468 2.54

$C

A tibble: 2 × 4

Groups: ID [2]

ID Name Var1 Var2

1 Y Y_name 1.15 0.162
2 Z Z_name 0.790 2.03


<details>
<summary>英文:</summary>
Do you want this? I also changed your `mutate_at` function to the more recent version using `across` and `where`:
list_df |&gt; 
map_if(~any(.x$ID %in%newIDdf$ID) , ~ .x |&gt; 
mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr, 
.init= ID, 
str_replace)) %&gt;%
mutate(Name = case_when(ID == &quot;Z&quot; ~ &quot;Z_name&quot;,
ID == &quot;Y&quot; ~ &quot;Y_name&quot;,
TRUE ~ Name)) %&gt;%
group_by(ID) %&gt;% 
mutate(across(where(is.numeric), ~ sum(.))) %&gt;%
distinct(ID, .keep_all = TRUE))
**Output**:
$A
ID   Name       Var1       Var2       Var3
1  a a_name  0.1015844  0.6306434  0.5058593
2  b b_name -0.1420690  0.5152645  0.2497879
3  c c_name  0.5841423  1.2883330  0.5297098
4  Z Z_name  1.6645565  0.2307524 -1.0418045
5  Y Y_name -0.1293767 -2.4152871 -0.1935843
$B
# A tibble: 3 &#215; 4
# Groups:   ID [3]
ID    Name     Var1   Var2
&lt;chr&gt; &lt;chr&gt;   &lt;dbl&gt;  &lt;dbl&gt;
1 a     a_name -0.512 -0.119
2 b     b_name -2.14  -0.834
3 Z     Z_name  0.468  2.54 
$C
# A tibble: 2 &#215; 4
# Groups:   ID [2]
ID    Name    Var1  Var2
&lt;chr&gt; &lt;chr&gt;  &lt;dbl&gt; &lt;dbl&gt;
1 Y     Y_name 1.15  0.162
2 Z     Z_name 0.790 2.03 
</details>

huangapple
  • 本文由 发表于 2023年3月3日 18:51:54
  • 转载请务必保留本文链接:https://go.coder-hub.com/75626117.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定