英文:
Adding NA for Age at the start and end of the data collected when grouped by S.NO in R
问题
S.NO | AGE_1 | AGE_2 |
---|---|---|
123 | NA | 19 |
123 | 19 | 24 |
123 | 24 | NA |
124 | NA | 18 |
124 | 18 | 21 |
124 | 21 | 28 |
124 | 28 | 35 |
124 | 35 | NA |
125 | NA | 13 |
125 | 13 | 19 |
125 | 19 | 23 |
125 | 23 | NA |
126 | NA | 19 |
126 | 19 | 21 |
126 | 21 | NA |
英文:
I have a dataset that looks like this and I am trying to add NA at the beginning and end of the age data collected for each S.NO. as shown in the expected dataset in R.
Would appreciate any help. Thank you.
S.NO | AGE_1 | AGE_2 |
---|---|---|
123 | 19 | 24 |
124 | 18 | 21 |
124 | 21 | 28 |
124 | 28 | 35 |
125 | 13 | 19 |
125 | 19 | 23 |
126 | 19 | 21 |
Expected Dataset
S.NO | AGE_1 | AGE_2 |
---|---|---|
123 | NA | 19 |
123 | 19 | 24 |
123 | 24 | NA |
124 | NA | 18 |
124 | 18 | 21 |
124 | 21 | 28 |
124 | 28 | 35 |
124 | 35 | NA |
125 | NA | 13 |
125 | 13 | 19 |
125 | 19 | 23 |
125 | 23 | NA |
126 | NA | 19 |
126 | 19 | 21 |
126 | 21 | NA |
答案1
得分: 1
使用 tibble::add_row
、dplyr::group_split
和 purrr::map_dfr
,您可以执行以下操作:
library(dplyr, warn = FALSE)
library(tibble)
library(purrr)
pad_rows <- function(x) {
x |>
tibble::add_row(S.NO = first(x$S.NO), AGE_2 = first(x$AGE_1), .before = 1) |>
tibble::add_row(S.NO = last(x$S.NO), AGE_1 = last(x$AGE_2), .after = Inf)
}
dat |>
group_split(S.NO) |>
purrr::map_dfr(pad_rows)
#> # A tibble: 15 × 3
#> S.NO AGE_1 AGE_2
#> <int> <int> <int>
#> 1 123 NA 19
#> 2 123 19 24
#> 3 123 24 NA
#> 4 124 NA 18
#> 5 124 18 21
#> 6 124 21 28
#> 7 124 28 35
#> 8 124 35 NA
#> 9 125 NA 13
#> 10 125 13 19
#> 11 125 19 23
#> 12 125 23 NA
#> 13 126 NA 19
#> 14 126 19 21
#> 15 126 21 NA
数据
dat <- data.frame(
S.NO = c(123L, 124L, 124L, 124L, 125L, 125L, 126L),
AGE_1 = c(19L, 18L, 21L, 28L, 13L, 19L, 19L),
AGE_2 = c(24L, 21L, 28L, 35L, 19L, 23L, 21L)
)
英文:
Using tibble::add_row
, dplyr::group_split
and purrr::map_dfr
you could do:
library(dplyr, warn = FALSE)
library(tibble)
library(purrr)
pad_rows <- function(x) {
x |>
tibble::add_row(S.NO = first(x$S.NO), AGE_2 = first(x$AGE_1), .before = 1) |>
tibble::add_row(S.NO = last(x$S.NO), AGE_1 = last(x$AGE_2), .after = Inf)
}
dat |>
group_split(S.NO) |>
purrr::map_dfr(pad_rows)
#> # A tibble: 15 × 3
#> S.NO AGE_1 AGE_2
#> <int> <int> <int>
#> 1 123 NA 19
#> 2 123 19 24
#> 3 123 24 NA
#> 4 124 NA 18
#> 5 124 18 21
#> 6 124 21 28
#> 7 124 28 35
#> 8 124 35 NA
#> 9 125 NA 13
#> 10 125 13 19
#> 11 125 19 23
#> 12 125 23 NA
#> 13 126 NA 19
#> 14 126 19 21
#> 15 126 21 NA
DATA
dat <- data.frame(
S.NO = c(123L, 124L, 124L, 124L, 125L, 125L, 126L),
AGE_1 = c(19L, 18L, 21L, 28L, 13L, 19L, 19L),
AGE_2 = c(24L, 21L, 28L, 35L, 19L, 23L, 21L)
)
答案2
得分: 1
使用 group_modify
library(dplyr)
library(tibble)
df %>%
group_by(S.NO) %>%
group_modify(~ .x %>%
add_row(AGE_2 = first(.x$AGE_1), .before = 1) %>%
add_row(AGE_1 = last(.x$AGE_2))) %>%
ungroup
-输出
# A tibble: 15 × 3
S.NO AGE_1 AGE_2
<int> <int> <int>
1 123 NA 19
2 123 19 24
3 123 24 NA
4 124 NA 18
5 124 18 21
6 124 21 28
7 124 28 35
8 124 35 NA
9 125 NA 13
10 125 13 19
11 125 19 23
12 125 23 NA
13 126 NA 19
14 126 19 21
15 126 21 NA
英文:
Using group_modify
library(dplyr)
library(tibble)
df %>%
group_by(S.NO) %>%
group_modify(~ .x %>%
add_row(AGE_2= first(.x$AGE_1), .before = 1) %>%
add_row(AGE_1 = last(.x$AGE_2))) %>%
ungroup
-output
# A tibble: 15 × 3
S.NO AGE_1 AGE_2
<int> <int> <int>
1 123 NA 19
2 123 19 24
3 123 24 NA
4 124 NA 18
5 124 18 21
6 124 21 28
7 124 28 35
8 124 35 NA
9 125 NA 13
10 125 13 19
11 125 19 23
12 125 23 NA
13 126 NA 19
14 126 19 21
15 126 21 NA
答案3
得分: 0
使用nest_by
的方法:
library(dplyr)
library(tidyr) # unnest
df %>%
nest_by(S.NO) %>%
mutate(data = list(unique(unlist(data))) %>%
unnest(data) %>%
group_by(S.NO) %>%
reframe(AGE_1 = c(NA, data), AGE_2 = c(data, NA))
# A tibble: 15 × 3
S.NO AGE_1 AGE_2
<int> <int> <int>
1 123 NA 19
2 123 19 24
3 123 24 NA
4 124 NA 18
5 124 18 21
6 124 21 28
7 124 28 35
8 124 35 NA
9 125 NA 13
10 125 13 19
11 125 19 23
12 125 23 NA
13 126 NA 19
14 126 19 21
15 126 21 NA
数据
df <- structure(list(S.NO = c(123L, 124L, 124L, 124L, 125L, 125L, 126L
), AGE_1 = c(19L, 18L, 21L, 28L, 13L, 19L, 19L), AGE_2 = c(24L,
21L, 28L, 35L, 19L, 23L, 21L)), class = "data.frame", row.names = c(NA,
-7L))
英文:
An approach using nest_by
library(dplyr)
library(tidyr) # unnest
df %>%
nest_by(S.NO) %>%
mutate(data = list(unique(unlist(data)))) %>%
unnest(data) %>%
group_by(S.NO) %>%
reframe(AGE_1 = c(NA, data), AGE_2 = c(data, NA))
# A tibble: 15 × 3
S.NO AGE_1 AGE_2
<int> <int> <int>
1 123 NA 19
2 123 19 24
3 123 24 NA
4 124 NA 18
5 124 18 21
6 124 21 28
7 124 28 35
8 124 35 NA
9 125 NA 13
10 125 13 19
11 125 19 23
12 125 23 NA
13 126 NA 19
14 126 19 21
15 126 21 NA
Data
df <- structure(list(S.NO = c(123L, 124L, 124L, 124L, 125L, 125L, 126L
), AGE_1 = c(19L, 18L, 21L, 28L, 13L, 19L, 19L), AGE_2 = c(24L,
21L, 28L, 35L, 19L, 23L, 21L)), class = "data.frame", row.names = c(NA,
-7L))
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论