英文:
R data frame create new column from first existing column or NA if no existing columns
问题
以下是代码部分的翻译:
library(dplyr, warn.conflicts = FALSE)
#> 警告:package 'dplyr' 是在 R 版本 4.2.3 下构建的
# `dplyr::coalesce` 返回第一个非缺失值,但这些列必须存在
return_exist_var <- function(.var1, .var2, .fill = NA, .data) {
# 从 magrittr::%>% 获取 .
.data <- get(".", parent.frame())
stopifnot(is.data.frame(.data))
.var1 <- rlang::enexpr(.var1)
.var2 <- rlang::enexpr(.var2)
if (rlang::as_string(.var1) %in% names(.data)) {
return(rlang::parse_expr(rlang::as_string(.var1)))
} else if (rlang::as_string(.var2) %in% names(.data)) {
return(rlang::parse_expr(rlang::as_string(.var2)))
} else {
return(.fill)
}
}
df <- data.frame(
ID = c("ID01", "ID01", "ID02", "ID02"),
v1 = c("a", "b", "c", NA),
v2 = c("A", "B", "C", "C")
)
以下是示例用法的翻译:
返回第一个存在的列
df %>% dplyr::mutate(NEW = !!return_exist_var(v1))
#> ID v1 v2 NEW
#> 1 ID01 a A a
#> 2 ID01 b B b
#> 3 ID02 c C c
#> 4 ID02 <NA> C <NA>
返回两列中的第一个存在的列
df %>% dplyr::mutate(NEW = !!return_exist_var(v2, v1))
#> ID v1 v2 NEW
#> 1 ID01 a A A
#> 2 ID01 b B B
#> 3 ID02 c C C
#> 4 ID02 <NA> C C
返回两列中的第二个存在的列
df %>% dplyr::mutate(NEW = !!return_exist_var(v1_x, v2))
#> ID v1 v2 NEW
#> 1 ID01 a A A
#> 2 ID01 b B B
#> 3 ID02 c C C
#> 4 ID02 <NA> C C
如果两列不存在,则返回 NA
df %>% dplyr::mutate(NEW = !!return_exist_var(v1_X, v2_X))
#> ID v1 v2 NEW
#> 1 ID01 a A NA
#> 2 ID01 b B NA
#> 3 ID02 c C NA
#> 4 ID02 <NA> C NA
在分组的数据框上正常工作
df %>%
dplyr::group_by(ID) %>%
dplyr::mutate(NEW = !!return_exist_var(v1))
#> # 一个 tibble:4 × 4
#> # 组: ID [2]
#> ID v1 v2 NEW
#> <chr> <chr> <chr> <chr>
#> 1 ID01 a A a
#> 2 ID01 b B b
#> 3 ID02 c C c
#> 4 ID02 <NA> C <NA>
英文:
The following function return first existing column in data frame, or NA
if no existing columns
library(dplyr, warn.conflicts = FALSE)
#> Warning: package 'dplyr' was built under R version 4.2.3
# `dplyr::coalesce` return fist non-missing, but these columns must exist
return_exist_var <- function(.var1, .var2, .fill = NA, .data) {
# get the . from magrittr::%>%
.data <- get(".", parent.frame())
stopifnot(is.data.frame(.data))
.var1 <- rlang::enexpr(.var1)
.var2 <- rlang::enexpr(.var2)
if (rlang::as_string(.var1) %in% names(.data)) {
return(rlang::parse_expr(rlang::as_string(.var1)))
} else if (rlang::as_string(.var2) %in% names(.data)) {
return(rlang::parse_expr(rlang::as_string(.var2)))
} else {
return(.fill)
}
}
df <- data.frame(
ID = c("ID01", "ID01", "ID02", "ID02"),
v1 = c("a", "b", "c", NA),
v2 = c("A", "B", "C", "C")
)
return first existing column
df %>% dplyr::mutate(NEW = !!return_exist_var(v1))
#> ID v1 v2 NEW
#> 1 ID01 a A a
#> 2 ID01 b B b
#> 3 ID02 c C c
#> 4 ID02 <NA> C <NA>
return first existing column in two column
df %>% dplyr::mutate(NEW = !!return_exist_var(v2, v1))
#> ID v1 v2 NEW
#> 1 ID01 a A A
#> 2 ID01 b B B
#> 3 ID02 c C C
#> 4 ID02 <NA> C C
return second existing column in two column
df %>% dplyr::mutate(NEW = !!return_exist_var(v1_x, v2))
#> ID v1 v2 NEW
#> 1 ID01 a A A
#> 2 ID01 b B B
#> 3 ID02 c C C
#> 4 ID02 <NA> C C
return NA if two column not eixst
df %>% dplyr::mutate(NEW = !!return_exist_var(v1_X, v2_X))
#> ID v1 v2 NEW
#> 1 ID01 a A NA
#> 2 ID01 b B NA
#> 3 ID02 c C NA
#> 4 ID02 <NA> C NA
work well on grouped data frame
df %>%
dplyr::group_by(ID) %>%
dplyr::mutate(NEW = !!return_exist_var(v1))
#> # A tibble: 4 × 4
#> # Groups: ID [2]
#> ID v1 v2 NEW
#> <chr> <chr> <chr> <chr>
#> 1 ID01 a A a
#> 2 ID01 b B b
#> 3 ID02 c C c
#> 4 ID02 <NA> C <NA>
Problem
How to update the function, then call function like below without !!
, or define a new same function
df %>% dplyr::mutate(NEW = return_exist_var(v1))
答案1
得分: 1
这是您的函数的简化版本,它依赖于使用 any_of()
来处理缺失的变量。由于 any_of()
需要字符向量作为参数,因此在使用之前需要解析参数。
library(dplyr)
return_exist_var <- function(...) {
var_list <- match.call(expand.dots = FALSE)$...
c(pick(any_of(sapply(var_list, deparse))), NA)[[1]]
}
df %>%
dplyr::mutate(NEW = return_exist_var(v1))
ID v1 v2 NEW
1 ID01 a A a
2 ID01 b B b
3 ID02 c C c
4 ID02 <NA> C <NA>
df %>%
dplyr::mutate(NEW = return_exist_var(v2, v1))
ID v1 v2 NEW
1 ID01 a A A
2 ID01 b B B
3 ID02 c C C
4 ID02 <NA> C C
df %>%
dplyr::mutate(NEW = return_exist_var(v1_x, v2))
ID v1 v2 NEW
1 ID01 a A A
2 ID01 b B B
3 ID02 c C C
4 ID02 <NA> C C
df %>%
dplyr::mutate(NEW = return_exist_var(v1_X, v2_X))
ID v1 v2 NEW
1 ID01 a A <NA>
2 ID01 b B <NA>
3 ID02 c C <NA>
4 ID02 <NA> C <NA>
# 如果数据已分组,也适用
df %>%
mutate(NEW = return_exist_var(v1), .by = ID)
ID v1 v2 NEW
1 ID01 a A a
2 ID01 b B b
3 ID02 c C c
4 ID02 <NA> C <NA>
英文:
Here is a simplified version of your function that relies on using any_of()
to handle missing vars. As any_of()
requires a character vector, the arguments need to be deparsed before use.
library(dplyr)
return_exist_var <- function(...) {
var_list <- match.call(expand.dots = FALSE)$...
c(pick(any_of(sapply(var_list, deparse))), NA)[[1]]
}
df %>%
dplyr::mutate(NEW = return_exist_var(v1))
ID v1 v2 NEW
1 ID01 a A a
2 ID01 b B b
3 ID02 c C c
4 ID02 <NA> C <NA>
df %>%
dplyr::mutate(NEW = return_exist_var(v2, v1))
ID v1 v2 NEW
1 ID01 a A A
2 ID01 b B B
3 ID02 c C C
4 ID02 <NA> C C
df %>%
dplyr::mutate(NEW = return_exist_var(v1_x, v2))
ID v1 v2 NEW
1 ID01 a A A
2 ID01 b B B
3 ID02 c C C
4 ID02 <NA> C C
df %>%
dplyr::mutate(NEW = return_exist_var(v1_X, v2_X))
ID v1 v2 NEW
1 ID01 a A NA
2 ID01 b B NA
3 ID02 c C NA
4 ID02 <NA> C NA
# Works if data is grouped
df %>%
mutate(NEW = return_exist_var(v1), .by = ID)
ID v1 v2 NEW
1 ID01 a A a
2 ID01 b B b
3 ID02 c C c
4 ID02 <NA> C <NA>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论