2023年6月29日 10:23:25go评论160阅读模式

英文:

R data frame create new column from first existing column or NA if no existing columns

问题

以下是代码部分的翻译：

library(dplyr, warn.conflicts = FALSE)
#&gt; 警告：package &#39;dplyr&#39; 是在 R 版本 4.2.3 下构建的

# `dplyr::coalesce` 返回第一个非缺失值，但这些列必须存在

return_exist_var &lt;- function(.var1, .var2, .fill = NA, .data) {
  # 从 magrittr::%&gt;% 获取 .
  .data &lt;- get(&quot;.&quot;, parent.frame())
  stopifnot(is.data.frame(.data))
  .var1 &lt;- rlang::enexpr(.var1)
  .var2 &lt;- rlang::enexpr(.var2)
  if (rlang::as_string(.var1) %in% names(.data)) {
    return(rlang::parse_expr(rlang::as_string(.var1)))
  } else if (rlang::as_string(.var2) %in% names(.data)) {
    return(rlang::parse_expr(rlang::as_string(.var2)))
  } else {
    return(.fill)
  }
}

df &lt;- data.frame(
  ID = c(&quot;ID01&quot;, &quot;ID01&quot;, &quot;ID02&quot;, &quot;ID02&quot;),
  v1 = c(&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, NA),
  v2 = c(&quot;A&quot;, &quot;B&quot;, &quot;C&quot;, &quot;C&quot;)
)

以下是示例用法的翻译：

返回第一个存在的列

df %&gt;% dplyr::mutate(NEW = !!return_exist_var(v1))
#&gt;     ID   v1 v2  NEW
#&gt; 1 ID01    a  A    a
#&gt; 2 ID01    b  B    b
#&gt; 3 ID02    c  C    c
#&gt; 4 ID02 &lt;NA&gt;  C &lt;NA&gt;

返回两列中的第一个存在的列

df %&gt;% dplyr::mutate(NEW = !!return_exist_var(v2, v1))
#&gt;     ID   v1 v2 NEW
#&gt; 1 ID01    a  A   A
#&gt; 2 ID01    b  B   B
#&gt; 3 ID02    c  C   C
#&gt; 4 ID02 &lt;NA&gt;  C   C

返回两列中的第二个存在的列

df %&gt;% dplyr::mutate(NEW = !!return_exist_var(v1_x, v2))
#&gt;     ID   v1 v2 NEW
#&gt; 1 ID01    a  A   A
#&gt; 2 ID01    b  B   B
#&gt; 3 ID02    c  C   C
#&gt; 4 ID02 &lt;NA&gt;  C   C

如果两列不存在，则返回 NA

df %&gt;% dplyr::mutate(NEW = !!return_exist_var(v1_X, v2_X))
#&gt;     ID   v1 v2 NEW
#&gt; 1 ID01    a  A  NA
#&gt; 2 ID01    b  B  NA
#&gt; 3 ID02    c  C  NA
#&gt; 4 ID02 &lt;NA&gt;  C  NA

在分组的数据框上正常工作

df %&gt;%
  dplyr::group_by(ID) %&gt;%
  dplyr::mutate(NEW = !!return_exist_var(v1))
#&gt; # 一个 tibble：4 × 4
#&gt; # 组：   ID [2]
#&gt;   ID    v1    v2    NEW  
#&gt;   &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt;
#&gt; 1 ID01  a     A     a    
#&gt; 2 ID01  b     B     b    
#&gt; 3 ID02  c     C     c    
#&gt; 4 ID02  &lt;NA&gt;  C     &lt;NA&gt;

英文:

The following function return first existing column in data frame, or NA if no existing columns

library(dplyr, warn.conflicts = FALSE)
#&gt; Warning: package &#39;dplyr&#39; was built under R version 4.2.3

# `dplyr::coalesce` return fist non-missing, but these columns must exist

return_exist_var &lt;- function(.var1, .var2, .fill = NA, .data) {
  # get the . from magrittr::%&gt;%
  .data &lt;- get(&quot;.&quot;, parent.frame())
  stopifnot(is.data.frame(.data))
  .var1 &lt;- rlang::enexpr(.var1)
  .var2 &lt;- rlang::enexpr(.var2)
  if (rlang::as_string(.var1) %in% names(.data)) {
    return(rlang::parse_expr(rlang::as_string(.var1)))
  } else if (rlang::as_string(.var2) %in% names(.data)) {
    return(rlang::parse_expr(rlang::as_string(.var2)))
  } else {
    return(.fill)
  }
}

df &lt;- data.frame(
  ID = c(&quot;ID01&quot;, &quot;ID01&quot;, &quot;ID02&quot;, &quot;ID02&quot;),
  v1 = c(&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, NA),
  v2 = c(&quot;A&quot;, &quot;B&quot;, &quot;C&quot;, &quot;C&quot;)
)

return first existing column

df %&gt;% dplyr::mutate(NEW = !!return_exist_var(v1))
#&gt;     ID   v1 v2  NEW
#&gt; 1 ID01    a  A    a
#&gt; 2 ID01    b  B    b
#&gt; 3 ID02    c  C    c
#&gt; 4 ID02 &lt;NA&gt;  C &lt;NA&gt;

return first existing column in two column

df %&gt;% dplyr::mutate(NEW = !!return_exist_var(v2, v1))
#&gt;     ID   v1 v2 NEW
#&gt; 1 ID01    a  A   A
#&gt; 2 ID01    b  B   B
#&gt; 3 ID02    c  C   C
#&gt; 4 ID02 &lt;NA&gt;  C   C

return second existing column in two column

df %&gt;% dplyr::mutate(NEW = !!return_exist_var(v1_x, v2))
#&gt;     ID   v1 v2 NEW
#&gt; 1 ID01    a  A   A
#&gt; 2 ID01    b  B   B
#&gt; 3 ID02    c  C   C
#&gt; 4 ID02 &lt;NA&gt;  C   C

return NA if two column not eixst

df %&gt;% dplyr::mutate(NEW = !!return_exist_var(v1_X, v2_X))
#&gt;     ID   v1 v2 NEW
#&gt; 1 ID01    a  A  NA
#&gt; 2 ID01    b  B  NA
#&gt; 3 ID02    c  C  NA
#&gt; 4 ID02 &lt;NA&gt;  C  NA

work well on grouped data frame

df %&gt;%
  dplyr::group_by(ID) %&gt;%
  dplyr::mutate(NEW = !!return_exist_var(v1))
#&gt; # A tibble: 4 &#215; 4
#&gt; # Groups:   ID [2]
#&gt;   ID    v1    v2    NEW  
#&gt;   &lt;chr&gt; &lt;chr&gt; &lt;chr&gt; &lt;chr&gt;
#&gt; 1 ID01  a     A     a    
#&gt; 2 ID01  b     B     b    
#&gt; 3 ID02  c     C     c    
#&gt; 4 ID02  &lt;NA&gt;  C     &lt;NA&gt;

Problem

How to update the function, then call function like below without !!, or define a new same function

df %>% dplyr::mutate(NEW = return_exist_var(v1))

答案1

得分: 1

这是您的函数的简化版本，它依赖于使用 any_of() 来处理缺失的变量。由于 any_of() 需要字符向量作为参数，因此在使用之前需要解析参数。

library(dplyr)

return_exist_var <- function(...) {
  var_list <-  match.call(expand.dots = FALSE)$...
  c(pick(any_of(sapply(var_list, deparse))), NA)[[1]]
}

df %>%
  dplyr::mutate(NEW = return_exist_var(v1))
  
    ID   v1 v2  NEW
1 ID01    a  A    a
2 ID01    b  B    b
3 ID02    c  C    c
4 ID02 <NA>  C <NA>

df %>%
  dplyr::mutate(NEW = return_exist_var(v2, v1))

    ID   v1 v2 NEW
1 ID01    a  A   A
2 ID01    b  B   B
3 ID02    c  C   C
4 ID02 <NA>  C   C

df %>%
  dplyr::mutate(NEW = return_exist_var(v1_x, v2))

    ID   v1 v2 NEW
1 ID01    a  A   A
2 ID01    b  B   B
3 ID02    c  C   C
4 ID02 <NA>  C   C

df %>% 
  dplyr::mutate(NEW = return_exist_var(v1_X, v2_X))

    ID   v1 v2  NEW
1 ID01    a  A <NA>
2 ID01    b  B <NA>
3 ID02    c  C <NA>
4 ID02 <NA>  C <NA>

# 如果数据已分组，也适用
df %>%
  mutate(NEW = return_exist_var(v1), .by = ID)

    ID   v1 v2  NEW
1 ID01    a  A    a
2 ID01    b  B    b
3 ID02    c  C    c
4 ID02 <NA>  C <NA>

英文:

Here is a simplified version of your function that relies on using any_of() to handle missing vars. As any_of() requires a character vector, the arguments need to be deparsed before use.

library(dplyr)

return_exist_var &lt;- function(...) {
  var_list &lt;-  match.call(expand.dots = FALSE)$...
  c(pick(any_of(sapply(var_list, deparse))), NA)[[1]]
  }

df %&gt;%
  dplyr::mutate(NEW = return_exist_var(v1))

    ID   v1 v2  NEW
1 ID01    a  A    a
2 ID01    b  B    b
3 ID02    c  C    c
4 ID02 &lt;NA&gt;  C &lt;NA&gt;

df %&gt;%
  dplyr::mutate(NEW = return_exist_var(v2, v1))

    ID   v1 v2 NEW
1 ID01    a  A   A
2 ID01    b  B   B
3 ID02    c  C   C
4 ID02 &lt;NA&gt;  C   C

df %&gt;%
  dplyr::mutate(NEW = return_exist_var(v1_x, v2))

    ID   v1 v2 NEW
1 ID01    a  A   A
2 ID01    b  B   B
3 ID02    c  C   C
4 ID02 &lt;NA&gt;  C   C

df %&gt;% 
  dplyr::mutate(NEW = return_exist_var(v1_X, v2_X))

    ID   v1 v2 NEW
1 ID01    a  A  NA
2 ID01    b  B  NA
3 ID02    c  C  NA
4 ID02 &lt;NA&gt;  C  NA

# Works if data is grouped
df %&gt;%
  mutate(NEW = return_exist_var(v1), .by = ID)

    ID   v1 v2  NEW
1 ID01    a  A    a
2 ID01    b  B    b
3 ID02    c  C    c
4 ID02 &lt;NA&gt;  C &lt;NA&gt;

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

创建新列：从第一个现有列创建，如果没有现有列，则为NA。

问题

返回第一个存在的列

返回两列中的第一个存在的列

返回两列中的第二个存在的列

如果两列不存在，则返回 NA

在分组的数据框上正常工作

return first existing column

return first existing column in two column

return second existing column in two column

return NA if two column not eixst

work well on grouped data frame

Problem

答案1

你可以根据两列中是否有缺失值从数据框中删除行。

一些元素在使用 %in% seq() 时不会显示为序列的成员。

将一列进行分组，同时保留其他常数。

Marginaleffects – obtaining contrasts and plotting predictions

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论