2023年3月9日 16:10:37go评论153阅读模式

英文:

Update run-length ID but skip NA

问题

给定这种类型的数据：

df <- data.frame(
  ID = 1:10,
  Sequ = c(NA, 44, 44, NA, NA, 33, 33, 33, 5, 5),
  Q = c(NA, "q1", "q1", NA, NA, "q2", "q2", "q2", "q2", "q2")
)

如何以比以下操作更高效的方式来更新 Sequ 的 run-length ID：

library(dplyr)
library(data.table)
left_join(df, df %>%
  filter(!is.na(Sequ)) %>%
  mutate(Sequ_0 = rleid(Sequ))) %>%
  select(-Sequ)

备注：尽管我使用了 data.table 中的 rleid，但我正在寻找 tidyverse 的解决方案。

英文:

Given this type of data:

df &lt;- data.frame(
  ID = 1:10,
  Sequ = c(NA, 44,44, NA, NA, 33,33,33, 5,5),
  Q = c(NA, &quot;q1&quot;,&quot;q1&quot;, NA, NA, &quot;q2&quot;,&quot;q2&quot;,&quot;q2&quot;, &quot;q2&quot;,&quot;q2&quot;)
)

how can I update the run-length ID of Sequ more efficiently than by doing this:

library(dplyr)
library(data.table)
left_join(df, df %&gt;%
  filter(!is.na(Sequ)) %&gt;%
  mutate(Sequ_0 = rleid(Sequ))) %&gt;%
  select(-Sequ)
   ID    Q Sequ_0
1   1 &lt;NA&gt;     NA
2   2   q1      1
3   3   q1      1
4   4 &lt;NA&gt;     NA
5   5 &lt;NA&gt;     NA
6   6   q2      2
7   7   q2      2
8   8   q2      2
9   9   q2      3
10 10   q2      3

Note: although I'm using rleid from data.table I'm looking for a tidyverse solution.

答案1

得分: 5

    df %>%
       mutate(Sequ_0 = dense_rank(NA^is.na(Q)*consecutive_id(Sequ)))

       ID Sequ    Q Sequ_0
    1   1   NA <NA>     NA
    2   2   44   q1      1
    3   3   44   q1      1
    4   4   NA <NA>     NA
    5   5   NA <NA>     NA
    6   6   33   q2      2
    7   7   33   q2      2
    8   8   33   q2      2
    9   9    5   q2      3
    10 10    5   q2      3

    df %>%
       mutate(Sequ_0 = dense_rank(`is.na<-`(consecutive_id(Sequ), is.na(Q))))

    df %>%
      mutate(Sequ_0 =  replace(Q, !is.na(Q), consecutive_id(na.omit(Sequ))))

英文:

df %&gt;%
   mutate(Sequ_0 = dense_rank(NA^is.na(Q)*consecutive_id(Sequ)))

   ID Sequ    Q Sequ_0
1   1   NA &lt;NA&gt;     NA
2   2   44   q1      1
3   3   44   q1      1
4   4   NA &lt;NA&gt;     NA
5   5   NA &lt;NA&gt;     NA
6   6   33   q2      2
7   7   33   q2      2
8   8   33   q2      2
9   9    5   q2      3
10 10    5   q2      3

df %&gt;%
   mutate(Sequ_0 = dense_rank(`is.na&lt;-`(consecutive_id(Sequ), is.na(Q))))

Also:

df %&gt;%
  mutate(Sequ_0 =  replace(Q, !is.na(Q), consecutive_id(na.omit(Sequ))))

答案2

得分: 4

使用consecutive_id来获取ID，然后排除NA值，将其转换为因子，再转换回数字以获得连续的数字：

library(dplyr) # dplyr_1.1.0 - consecutive_id

df %>%
  mutate(id = as.numeric(as.factor(
    if_else(is.na(Sequ), NA, consecutive_id(Sequ)))))
#    ID Sequ    Q id
# 1   1   NA &lt;NA&gt; NA
# 2   2   44   q1  1
# 3   3   44   q1  1
# 4   4   NA &lt;NA&gt; NA
# 5   5   NA &lt;NA&gt; NA
# 6   6   33   q2  2
# 7   7   33   q2  2
# 8   8   33   q2  2
# 9   9    5   q2  3
# 10 10    5   q2  3

英文:

Using consecutive_id to get ids, then exclude NAs, convert to factor, then back to numeric to have sequential numbers:

library(dplyr) # dplyr_1.1.0 - consecutive_id

df %&gt;% 
  mutate(id = as.numeric(as.factor(
    if_else(is.na(Sequ), NA, consecutive_id(Sequ)))))
#    ID Sequ    Q id
# 1   1   NA &lt;NA&gt; NA
# 2   2   44   q1  1
# 3   3   44   q1  1
# 4   4   NA &lt;NA&gt; NA
# 5   5   NA &lt;NA&gt; NA
# 6   6   33   q2  2
# 7   7   33   q2  2
# 8   8   33   q2  2
# 9   9    5   q2  3
# 10 10    5   q2  3

答案3

得分: 3

这里是另一种使用 arrange 和 consecutive_id 的选项：

library(dplyr) #dplyr >= 1.1.0
df %>%
  arrange(Q) %>%
  mutate(Sequ_0 = consecutive_id(Sequ),
         Sequ_0 = ifelse(is.na(Sequ), NA_real_, Sequ_0)) %>%
  arrange(ID)

   ID Sequ    Q Sequ_0
1   1   NA <NA>     NA
2   2   44   q1      1
3   3   44   q1      1
4   4   NA <NA>     NA
5   5   NA <NA>     NA
6   6   33   q2      2
7   7   33   q2      2
8   8   33   q2      2
9   9    5   q2      3
10 10    5   q2      3

英文:

Here is another option using arrange with consecutive_id:

library(dplyr) #dplyr &gt;= 1.1.0
df %&gt;% 
  arrange(Q) %&gt;% 
  mutate(Sequ_0 =  consecutive_id(Sequ),
         Sequ_0 = ifelse(is.na(Sequ), NA_real_, Sequ_0)) %&gt;% 
  arrange(ID)

   ID Sequ    Q Sequ_0
1   1   NA &lt;NA&gt;     NA
2   2   44   q1      1
3   3   44   q1      1
4   4   NA &lt;NA&gt;     NA
5   5   NA &lt;NA&gt;     NA
6   6   33   q2      2
7   7   33   q2      2
8   8   33   q2      2
9   9    5   q2      3
10 10    5   q2      3

答案4

得分: 1

以下是代码的翻译部分：

一个选项是使用match和unique来创建ID，如下所示：

library(tidyverse)
df %>%
  left_join(., df %>%
              drop_na() %>%
              mutate(Sequ_0 = match(Sequ, unique(Sequ))))
#> Joining with `by = join_by(ID, Sequ, Q)`
#>    ID Sequ    Q Sequ_0
#> 1   1   NA <NA>     NA
#> 2   2   44   q1      1
#> 3   3   44   q1      1
#> 4   4   NA <NA>     NA
#> 5   5   NA <NA>     NA
#> 6   6   33   q2      2
#> 7   7   33   q2      2
#> 8   8   33   q2      2
#> 9   9    5   q2      3
#> 10 10    5   q2      3

^{创建于2023年03月09日，使用reprex v2.0.2。}

英文:

An option could be using match with unique to create the ID like this:

library(tidyverse)
df %&gt;%
  left_join(., df %&gt;%
              drop_na() %&gt;%
              mutate(Sequ_0 = match(Sequ, unique(Sequ))))
#&gt; Joining with `by = join_by(ID, Sequ, Q)`
#&gt;    ID Sequ    Q Sequ_0
#&gt; 1   1   NA &lt;NA&gt;     NA
#&gt; 2   2   44   q1      1
#&gt; 3   3   44   q1      1
#&gt; 4   4   NA &lt;NA&gt;     NA
#&gt; 5   5   NA &lt;NA&gt;     NA
#&gt; 6   6   33   q2      2
#&gt; 7   7   33   q2      2
#&gt; 8   8   33   q2      2
#&gt; 9   9    5   q2      3
#&gt; 10 10    5   q2      3

<sup>Created on 2023-03-09 with reprex v2.0.2</sup>

答案5

得分: 1

在基本R中：

```R
transform(df, seq0 = with(rle(Sequ), 
              rep(`[<-`(values, !is.na(values), seq(na.omit(values))), lengths)))

       ID Sequ    Q seq0
    1   1   NA <NA>   NA
    2   2   44   q1    1
    3   3   44   q1    1
    4   4   NA <NA>   NA
    5   5   NA <NA>   NA
    6   6   33   q2    2
    7   7   33   q2    2
    8   8   33   q2    2
    9   9    5   q2    3
    10 10    5   q2    3
---
回到基础：

```R
a <- rle(df$Sequ)
a$values[!is.na(a$values)] <- seq_along(na.omit(a$values))
cbind(df, sequ_0 = inverse.rle(a))

       ID Sequ    Q sequ_0
    1   1   NA <NA>     NA
    2   2   44   q1      1
    3   3   44   q1      1
    4   4   NA <NA>     NA
    5   5   NA <NA>     NA
    6   6   33   q2      2
    7   7   33   q2      2
    8   8   33   q2      2
    9   9    5   q2      3
    10 10    5   q2      3

英文:

in base R:

transform(df, seq0 = with(rle(Sequ), 
          rep(`[&lt;-`(values, !is.na(values), seq(na.omit(values))), lengths)))

   ID Sequ    Q seq0
1   1   NA &lt;NA&gt;   NA
2   2   44   q1    1
3   3   44   q1    1
4   4   NA &lt;NA&gt;   NA
5   5   NA &lt;NA&gt;   NA
6   6   33   q2    2
7   7   33   q2    2
8   8   33   q2    2
9   9    5   q2    3
10 10    5   q2    3

Back to basics:

a &lt;- rle(df$Sequ)
a$values[!is.na(a$values)] &lt;- seq_along(na.omit(a$values))
cbind(df, sequ_0 = inverse.rle(a))

   ID Sequ    Q sequ_0
1   1   NA &lt;NA&gt;     NA
2   2   44   q1      1
3   3   44   q1      1
4   4   NA &lt;NA&gt;     NA
5   5   NA &lt;NA&gt;     NA
6   6   33   q2      2
7   7   33   q2      2
8   8   33   q2      2
9   9    5   q2      3
10 10    5   q2      3

通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库，让每个人都能够通过互相帮助和分享经验来进步。

Update run-length ID but skip NA.

问题

答案1

答案2

答案3

答案4

答案5

如何在 macOS 上使用文件选择对话框在 R 中保存文件？

重新定义因子水平和组内顺序。

确定每个群体对总金额的贡献在R中如何。

如何绘制自定义广义加性模型（GAM）概率的逻辑图？

What's the correct way to type hint an empty list as a literal in python?

如何在Highcharts Gantt中更改本地化的星期名称

如何在同一个流中使用多个过滤器和映射函数？

如何使用Map/Set来将代码优化到O(n)？

.NET MAUI Android在GitHub Actions上构建失败，错误代码为1。

如何在Playwright视觉比较中屏蔽多个定位器？

在C++中，可以使用可变模板参数来检索类型的内部类型。

selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: stale element not found

Creating and opening a URL to log in to Website via Basic Auth with Robot Framework/Selenium (Python)

AG Grid 在上下文菜单中以大文本形式打开

发表评论