英文:
Update run-length ID but skip NA
问题
给定这种类型的数据:
df <- data.frame(
ID = 1:10,
Sequ = c(NA, 44, 44, NA, NA, 33, 33, 33, 5, 5),
Q = c(NA, "q1", "q1", NA, NA, "q2", "q2", "q2", "q2", "q2")
)
如何以比以下操作更高效的方式来更新 Sequ
的 run-length ID:
library(dplyr)
library(data.table)
left_join(df, df %>%
filter(!is.na(Sequ)) %>%
mutate(Sequ_0 = rleid(Sequ))) %>%
select(-Sequ)
备注:尽管我使用了 data.table
中的 rleid
,但我正在寻找 tidyverse
的解决方案。
英文:
Given this type of data:
df <- data.frame(
ID = 1:10,
Sequ = c(NA, 44,44, NA, NA, 33,33,33, 5,5),
Q = c(NA, "q1","q1", NA, NA, "q2","q2","q2", "q2","q2")
)
how can I update the run-length ID of Sequ
more efficiently than by doing this:
library(dplyr)
library(data.table)
left_join(df, df %>%
filter(!is.na(Sequ)) %>%
mutate(Sequ_0 = rleid(Sequ))) %>%
select(-Sequ)
ID Q Sequ_0
1 1 <NA> NA
2 2 q1 1
3 3 q1 1
4 4 <NA> NA
5 5 <NA> NA
6 6 q2 2
7 7 q2 2
8 8 q2 2
9 9 q2 3
10 10 q2 3
Note: although I'm using rleid
from data.table
I'm looking for a tidyverse
solution.
答案1
得分: 5
df %>%
mutate(Sequ_0 = dense_rank(NA^is.na(Q)*consecutive_id(Sequ)))
ID Sequ Q Sequ_0
1 1 NA <NA> NA
2 2 44 q1 1
3 3 44 q1 1
4 4 NA <NA> NA
5 5 NA <NA> NA
6 6 33 q2 2
7 7 33 q2 2
8 8 33 q2 2
9 9 5 q2 3
10 10 5 q2 3
df %>%
mutate(Sequ_0 = dense_rank(`is.na<-`(consecutive_id(Sequ), is.na(Q))))
df %>%
mutate(Sequ_0 = replace(Q, !is.na(Q), consecutive_id(na.omit(Sequ))))
英文:
df %>%
mutate(Sequ_0 = dense_rank(NA^is.na(Q)*consecutive_id(Sequ)))
ID Sequ Q Sequ_0
1 1 NA <NA> NA
2 2 44 q1 1
3 3 44 q1 1
4 4 NA <NA> NA
5 5 NA <NA> NA
6 6 33 q2 2
7 7 33 q2 2
8 8 33 q2 2
9 9 5 q2 3
10 10 5 q2 3
df %>%
mutate(Sequ_0 = dense_rank(`is.na<-`(consecutive_id(Sequ), is.na(Q))))
Also:
df %>%
mutate(Sequ_0 = replace(Q, !is.na(Q), consecutive_id(na.omit(Sequ))))
答案2
得分: 4
使用consecutive_id来获取ID,然后排除NA值,将其转换为因子,再转换回数字以获得连续的数字:
library(dplyr) # dplyr_1.1.0 - consecutive_id
df %>%
mutate(id = as.numeric(as.factor(
if_else(is.na(Sequ), NA, consecutive_id(Sequ)))))
# ID Sequ Q id
# 1 1 NA <NA> NA
# 2 2 44 q1 1
# 3 3 44 q1 1
# 4 4 NA <NA> NA
# 5 5 NA <NA> NA
# 6 6 33 q2 2
# 7 7 33 q2 2
# 8 8 33 q2 2
# 9 9 5 q2 3
# 10 10 5 q2 3
英文:
Using consecutive_id to get ids, then exclude NAs, convert to factor, then back to numeric to have sequential numbers:
library(dplyr) # dplyr_1.1.0 - consecutive_id
df %>%
mutate(id = as.numeric(as.factor(
if_else(is.na(Sequ), NA, consecutive_id(Sequ)))))
# ID Sequ Q id
# 1 1 NA <NA> NA
# 2 2 44 q1 1
# 3 3 44 q1 1
# 4 4 NA <NA> NA
# 5 5 NA <NA> NA
# 6 6 33 q2 2
# 7 7 33 q2 2
# 8 8 33 q2 2
# 9 9 5 q2 3
# 10 10 5 q2 3
答案3
得分: 3
这里是另一种使用 arrange
和 consecutive_id
的选项:
library(dplyr) #dplyr >= 1.1.0
df %>%
arrange(Q) %>%
mutate(Sequ_0 = consecutive_id(Sequ),
Sequ_0 = ifelse(is.na(Sequ), NA_real_, Sequ_0)) %>%
arrange(ID)
ID Sequ Q Sequ_0
1 1 NA <NA> NA
2 2 44 q1 1
3 3 44 q1 1
4 4 NA <NA> NA
5 5 NA <NA> NA
6 6 33 q2 2
7 7 33 q2 2
8 8 33 q2 2
9 9 5 q2 3
10 10 5 q2 3
英文:
Here is another option using arrange
with consecutive_id
:
library(dplyr) #dplyr >= 1.1.0
df %>%
arrange(Q) %>%
mutate(Sequ_0 = consecutive_id(Sequ),
Sequ_0 = ifelse(is.na(Sequ), NA_real_, Sequ_0)) %>%
arrange(ID)
ID Sequ Q Sequ_0
1 1 NA <NA> NA
2 2 44 q1 1
3 3 44 q1 1
4 4 NA <NA> NA
5 5 NA <NA> NA
6 6 33 q2 2
7 7 33 q2 2
8 8 33 q2 2
9 9 5 q2 3
10 10 5 q2 3
答案4
得分: 1
以下是代码的翻译部分:
一个选项是使用match
和unique
来创建ID,如下所示:
library(tidyverse)
df %>%
left_join(., df %>%
drop_na() %>%
mutate(Sequ_0 = match(Sequ, unique(Sequ))))
#> Joining with `by = join_by(ID, Sequ, Q)`
#> ID Sequ Q Sequ_0
#> 1 1 NA <NA> NA
#> 2 2 44 q1 1
#> 3 3 44 q1 1
#> 4 4 NA <NA> NA
#> 5 5 NA <NA> NA
#> 6 6 33 q2 2
#> 7 7 33 q2 2
#> 8 8 33 q2 2
#> 9 9 5 q2 3
#> 10 10 5 q2 3
创建于2023年03月09日,使用reprex v2.0.2。
英文:
An option could be using match
with unique
to create the ID like this:
library(tidyverse)
df %>%
left_join(., df %>%
drop_na() %>%
mutate(Sequ_0 = match(Sequ, unique(Sequ))))
#> Joining with `by = join_by(ID, Sequ, Q)`
#> ID Sequ Q Sequ_0
#> 1 1 NA <NA> NA
#> 2 2 44 q1 1
#> 3 3 44 q1 1
#> 4 4 NA <NA> NA
#> 5 5 NA <NA> NA
#> 6 6 33 q2 2
#> 7 7 33 q2 2
#> 8 8 33 q2 2
#> 9 9 5 q2 3
#> 10 10 5 q2 3
<sup>Created on 2023-03-09 with reprex v2.0.2</sup>
答案5
得分: 1
在基本R中:
```R
transform(df, seq0 = with(rle(Sequ),
rep(`[<-`(values, !is.na(values), seq(na.omit(values))), lengths)))
ID Sequ Q seq0
1 1 NA <NA> NA
2 2 44 q1 1
3 3 44 q1 1
4 4 NA <NA> NA
5 5 NA <NA> NA
6 6 33 q2 2
7 7 33 q2 2
8 8 33 q2 2
9 9 5 q2 3
10 10 5 q2 3
---
回到基础:
```R
a <- rle(df$Sequ)
a$values[!is.na(a$values)] <- seq_along(na.omit(a$values))
cbind(df, sequ_0 = inverse.rle(a))
ID Sequ Q sequ_0
1 1 NA <NA> NA
2 2 44 q1 1
3 3 44 q1 1
4 4 NA <NA> NA
5 5 NA <NA> NA
6 6 33 q2 2
7 7 33 q2 2
8 8 33 q2 2
9 9 5 q2 3
10 10 5 q2 3
英文:
in base R:
transform(df, seq0 = with(rle(Sequ),
rep(`[<-`(values, !is.na(values), seq(na.omit(values))), lengths)))
ID Sequ Q seq0
1 1 NA <NA> NA
2 2 44 q1 1
3 3 44 q1 1
4 4 NA <NA> NA
5 5 NA <NA> NA
6 6 33 q2 2
7 7 33 q2 2
8 8 33 q2 2
9 9 5 q2 3
10 10 5 q2 3
Back to basics:
a <- rle(df$Sequ)
a$values[!is.na(a$values)] <- seq_along(na.omit(a$values))
cbind(df, sequ_0 = inverse.rle(a))
ID Sequ Q sequ_0
1 1 NA <NA> NA
2 2 44 q1 1
3 3 44 q1 1
4 4 NA <NA> NA
5 5 NA <NA> NA
6 6 33 q2 2
7 7 33 q2 2
8 8 33 q2 2
9 9 5 q2 3
10 10 5 q2 3
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论