英文:
writing test conditions in a map_if function : apply function on all dataframes with a column that includes specific values
问题
以下是您要翻译的代码部分:
Once again i'm struggling with map functions of purrr.
I've got a list of dataframes, all with ID and Name columns.
I want to perform some recoding and then aggregation on rows with some specific values.
For that purpose, i've got another dataframe with a vector of ID and newID that I want to replace before doing some aggregration (sum all numeric variable).
I know how to perform this on one df (see II/), but I don't know what test to write in a map_if function to apply thoses operations on all dataframes where column ID includes some values of new newIDdf$ID (here dataframe B and C).
Any ideas ?
## I/ 2 objects
# a list of df
list_df <- list(A = data.frame(ID = c("a", "b", "c", "Z", "Y"),
Name = c("a_name", "b_name", "c_name", "Z_name", "Y_name"),
Var1 = rnorm(5),
Var2 = rnorm(5),
Var3 = rnorm(5)),
B = data.frame(ID = c("a", "b", "z1", "z2", "z3"),
Name = c("a_name", "b_name", "z1_name", "z2_name", "z3_name"),
Var1 = rnorm(5),
Var2 = rnorm(5)),
C = data.frame(ID = c("y1", "y2", "z1", "z2", "z3"),
Name = c("y1_name", "y2_name", "z1_name", "z2_name", "z3_name"),
Var1 = rnorm(5),
Var2 = rnorm(5)))
# a dataframe of correspondance for aggregation operations
newIDdf <- data.frame(ID = c("y1", "y2", "z1", "z2", "z3"),
IDagr = c("Y", "Y", "Z", "Z", "Z"))
## II/ what I want to do (but on 1 df)
# example on 1 df
On1df <- list_df[["B"]] %>%
mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr,
.init= ID,
str_replace)) %>%
mutate(Name = case_when(ID == "Z" ~ "Z_name",
ID == "Y" ~ "Y_name",
TRUE ~ Name)) %>%
group_by(ID) %>%
mutate_if(is.numeric, ~list(. = sum(.))) %>%
distinct(ID, .keep_all = TRUE)
## III/ What I really want to achieve
# what if I want to do that simultaneously on df B and C
# I mean applying thoses operations on dataframes
# where column ID includes some values of new newIDdf$ID
list_df_output <- list_df %>% map_if( .p = ~ any(ID %in% newIDdf$ID), ### what test to put here ? (because this doesn't work)
~ mutate(.x, ID = reduce2(newIDdf$ID, newIDdf$IDagr,
.init= ID,
str_replace)) %>%
mutate(.,Name = case_when(ID == "Z" ~ "Z_name",
ID == "Y" ~ "Y_name",
TRUE ~ Name)) %>%
group_by(., ID) %>%
mutate_if(., is.numeric, ~list(. = sum(.))) %>%
distinct(., ID, .keep_all = TRUE) )
希望这有所帮助。
英文:
Once again i'm struggling with map functions of purrr.
I've got a list of dataframes, all with ID and Name columns.
I want to perform some recoding and then aggregation on rows with some specific values.
For that purpose, i've got another dataframe with a vector of ID and newID that I want to replace before doing some aggregration (sum all numeric variable).
I know how to perform this on one df (see II/), but I don't know what test to write in a map_if function to apply thoses operations on all dataframes where column ID includes some values of new newIDdf$ID (here dataframe B and C).
Any ideas ?
## I/ 2 objects
# a list of df
list_df <- list(A = data.frame(ID = c("a", "b", "c", "Z", "Y"),
Name = c("a_name", "b_name", "c_name", "Z_name", "Y_name"),
Var1 = rnorm(5),
Var2 = rnorm(5),
Var3 = rnorm(5)),
B = data.frame(ID = c("a", "b", "z1", "z2", "z3"),
Name = c("a_name", "b_name", "z1_name", "z2_name", "z3_name"),
Var1 = rnorm(5),
Var2 = rnorm(5)),
C = data.frame(ID = c("y1", "y2", "z1", "z2", "z3"),
Name = c("y1_name", "y2_name", "z1_name", "z2_name", "z3_name"),
Var1 = rnorm(5),
Var2 = rnorm(5)))
# a dataframe of correspondance for aggregation operations
newIDdf <- data.frame(ID = c("y1", "y2", "z1", "z2", "z3"),
IDagr = c("Y", "Y", "Z", "Z", "Z"))
## II/ what I want to do (but on 1 df)
# example on 1 df
On1df <- list_df[["B"]] %>%
mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr,
.init= ID,
str_replace)) %>%
mutate(Name = case_when(ID == "Z" ~ "Z_name",
ID == "Y" ~ "Y_name",
TRUE ~ Name)) %>%
group_by(ID) %>%
mutate_if(is.numeric, ~list(. = sum(.))) %>%
distinct(ID, .keep_all = TRUE)
## III/ What I really want to achieve
# what if I want to do that simultaneously on df B and C
# I mean applying thoses operations on dataframes
# where column ID includes some values of new newIDdf$ID
list_df_output <- list_df %>% map_if( .p = ~ any(ID %in% newIDdf$ID), ### what test to put here ? (because this doesn't work)
~ mutate(.x, ID = reduce2(newIDdf$ID, newIDdf$IDagr,
.init= ID,
str_replace)) %>%
mutate(.,Name = case_when(ID == "Z" ~ "Z_name",
ID == "Y" ~ "Y_name",
TRUE ~ Name)) %>%
group_by(., ID) %>%
mutate_if(., is.numeric, ~list(. = sum(.))) %>%
distinct(., ID, .keep_all = TRUE) )
答案1
得分: 3
我不确定下面的方法是否能够产生您期望的输出。另外,我们不需要使用map_if
,我们可以使用across(where())
。此外,我们也不需要使用reduce2()
,而是可以将一个查找向量(使用set_names()
创建)传递给str_replace_all()
:
library(dplyr)
library(purrr)
library(stringr)
list_df %>%
map_if(~ any(.x$ID %in% newIDdf$ID),
~ .x %>%
mutate(ID = str_replace_all(ID, set_names(newIDdf$IDagr, newIDdf$ID)),
Name = case_when(ID == "Z" ~ "Z_name",
ID == "Y" ~ "Y_name",
TRUE ~ Name)
) %>%
group_by(ID) %>%
mutate(across(where(is.numeric), ~ sum(.))) %>%
distinct(ID, .keep_all = TRUE)
)
从 OP 的数据中:
list_df <- list(A = data.frame(ID = c("a", "b", "c", "Z", "Y"),
Name = c("a_name", "b_name", "c_name", "Z_name", "Y_name"),
Var1 = rnorm(5),
Var2 = rnorm(5),
Var3 = rnorm(5)),
B = data.frame(ID = c("a", "b", "z1", "z2", "z3"),
Name = c("a_name", "b_name", "z1_name", "z2_name", "z3_name"),
Var1 = rnorm(5),
Var2 = rnorm(5)),
C = data.frame(ID = c("y1", "y2", "z1", "z2", "z3"),
Name = c("y1_name", "y2_name", "z1_name", "z2_name", "z3_name"),
Var1 = rnorm(5),
Var2 = rnorm(5)))
# 用于聚合操作的对应关系数据框
newIDdf <- data.frame(ID = c("y1", "y2", "z1", "z2", "z3"),
IDagr = c("Y", "Y", "Z", "Z", "Z"))
创建于2023-03-03,使用 reprex 包 (v2.0.1)
英文:
I'm not sure if the approach below yields your desird output. Btw. we don't need map_if
we can use across(where())
. Also, we don't need reduce2()
but can pass a lookup vector (below created with set_names()
) to str_replace_all()
:
library(dplyr)
library(purrr)
library(stringr)
list_df %>%
map_if(~ any(.x$ID %in% newIDdf$ID),
~ .x %>%
mutate(ID = str_replace_all(ID, set_names(newIDdf$IDagr, newIDdf$ID)),
Name = case_when(ID == "Z" ~ "Z_name",
ID == "Y" ~ "Y_name",
TRUE ~ Name)
) %>%
group_by(ID) %>%
mutate(across(where(is.numeric), ~ sum(.))) %>%
distinct(ID, .keep_all = TRUE)
)
#> $A
#> ID Name Var1 Var2 Var3
#> 1 a a_name -0.9958825 -0.4822998 -0.5283220
#> 2 b b_name 0.5309721 0.7133405 -1.1024029
#> 3 c c_name -1.2049361 0.2681276 0.1179077
#> 4 Z Z_name -0.7167132 -1.0513967 -1.5125656
#> 5 Y Y_name -0.5056531 0.6273818 1.4781721
#>
#> $B
#> # A tibble: 3 x 4
#> # Groups: ID [3]
#> ID Name Var1 Var2
#> <chr> <chr> <dbl> <dbl>
#> 1 a a_name -0.967 2.78
#> 2 b b_name -0.814 1.37
#> 3 Z Z_name 0.354 2.33
#>
#> $C
#> # A tibble: 2 x 4
#> # Groups: ID [2]
#> ID Name Var1 Var2
#> <chr> <chr> <dbl> <dbl>
#> 1 Y Y_name -2.71 -0.852
#> 2 Z Z_name -2.06 -1.52
Data from OP
list_df <- list(A = data.frame(ID = c("a", "b", "c", "Z", "Y"),
Name = c("a_name", "b_name", "c_name", "Z_name", "Y_name"),
Var1 = rnorm(5),
Var2 = rnorm(5),
Var3 = rnorm(5)),
B = data.frame(ID = c("a", "b", "z1", "z2", "z3"),
Name = c("a_name", "b_name", "z1_name", "z2_name", "z3_name"),
Var1 = rnorm(5),
Var2 = rnorm(5)),
C = data.frame(ID = c("y1", "y2", "z1", "z2", "z3"),
Name = c("y1_name", "y2_name", "z1_name", "z2_name", "z3_name"),
Var1 = rnorm(5),
Var2 = rnorm(5)))
# a dataframe of correspondance for aggregation operations
newIDdf <- data.frame(ID = c("y1", "y2", "z1", "z2", "z3"),
IDagr = c("Y", "Y", "Z", "Z", "Z"))
<sup>Created on 2023-03-03 by the reprex package (v2.0.1)</sup>
答案2
得分: 1
你要这个吗?我还将您的 mutate_at
函数更改为使用 across
和 where
的更新版本:
list_df |>
map_if(~any(.x$ID %in%newIDdf$ID) , ~ .x |>
mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr,
.init= ID,
str_replace)) %>%
mutate(Name = case_when(ID == "Z" ~ "Z_name",
ID == "Y" ~ "Y_name",
TRUE ~ Name)) %>%
group_by(ID) %>%
mutate(across(where(is.numeric), ~ sum(.))) %>%
distinct(ID, .keep_all = TRUE))
输出:
$A
ID Name Var1 Var2 Var3
1 a a_name 0.1015844 0.6306434 0.5058593
2 b b_name -0.1420690 0.5152645 0.2497879
3 c c_name 0.5841423 1.2883330 0.5297098
4 Z Z_name 1.6645565 0.2307524 -1.0418045
5 Y Y_name -0.1293767 -2.4152871 -0.1935843
$B
A tibble: 3 × 4
Groups: ID [3]
ID Name Var1 Var2
1 a a_name -0.512 -0.119
2 b b_name -2.14 -0.834
3 Z Z_name 0.468 2.54
$C
A tibble: 2 × 4
Groups: ID [2]
ID Name Var1 Var2
1 Y Y_name 1.15 0.162
2 Z Z_name 0.790 2.03
<details>
<summary>英文:</summary>
Do you want this? I also changed your `mutate_at` function to the more recent version using `across` and `where`:
list_df |>
map_if(~any(.x$ID %in%newIDdf$ID) , ~ .x |>
mutate(ID = reduce2(newIDdf$ID, newIDdf$IDagr,
.init= ID,
str_replace)) %>%
mutate(Name = case_when(ID == "Z" ~ "Z_name",
ID == "Y" ~ "Y_name",
TRUE ~ Name)) %>%
group_by(ID) %>%
mutate(across(where(is.numeric), ~ sum(.))) %>%
distinct(ID, .keep_all = TRUE))
**Output**:
$A
ID Name Var1 Var2 Var3
1 a a_name 0.1015844 0.6306434 0.5058593
2 b b_name -0.1420690 0.5152645 0.2497879
3 c c_name 0.5841423 1.2883330 0.5297098
4 Z Z_name 1.6645565 0.2307524 -1.0418045
5 Y Y_name -0.1293767 -2.4152871 -0.1935843
$B
# A tibble: 3 × 4
# Groups: ID [3]
ID Name Var1 Var2
<chr> <chr> <dbl> <dbl>
1 a a_name -0.512 -0.119
2 b b_name -2.14 -0.834
3 Z Z_name 0.468 2.54
$C
# A tibble: 2 × 4
# Groups: ID [2]
ID Name Var1 Var2
<chr> <chr> <dbl> <dbl>
1 Y Y_name 1.15 0.162
2 Z Z_name 0.790 2.03
</details>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论