英文:
Pivot ID's from one column to several one and pair them with another column character
问题
你可以使用下面的R代码来实现你想要的数据框转换:
library(dplyr)
library(tidyr)
df_wanted <- df %>%
group_by(ID) %>%
mutate(row = row_number()) %>%
pivot_wider(id_cols = ID, names_from = row, values_from = Fahrzeugart) %>%
rename_with(~paste0("Fahrzeug_", .), starts_with("Col_")) %>%
select(-starts_with("Col_"))
这段代码会将原始数据框 df
转换成你期望的格式,并生成新的数据框 df_wanted
,其中每个 Fahrzeugart
对应一个新的列,并且每个 ID
仅有一行。
请注意,为了运行这段代码,你需要先加载 dplyr
和 tidyr
这两个包。
英文:
My problem is the following. I have this data frame:
ID <- c(1,2,NA,3,NA,4,NA,NA,5,NA,NA,NA)
Col_1 <- c(NA,45,NA,23,1,2,8,NA,78,12,NA,19)
Objekt.Nr. <- c(1,1,2,1,2,1,2,3,1,2,3,4)
Fahrzeugart <- c("E-Bike", "Fahrrad", "Fahrrad", "Fahrrad", "Bus", "Bus", "Fahrrad", "Auto", "E-Bike", "Fahrrad", "Fahrrad", "Fahrrad")
Col_2 <- c(1,2,3,4,NA,5,6,7,NA,89,10,12)
df <- data.frame(ID,Col_1, Objekt.Nr., Fahrzeugart, Col_2)
I need to transform it so that there is only one row for every ID, not several like there are now. For that, I need to pivot the data frame so that every object Objekt.Nr will correspond to a new column with the Fahrzeugart.
My goal is that the data frame will look like this:
ID <- c(1,2,3,4,5)
Fahrzeug_1 <- c("E-Bike","Fahrrad","Fahrrad","Bus","E-Bike")
Fahrzeug_2 <- c(NA, "Fahrrad", "Bus", "Fahrrad", "Fahrrad")
Fahrzeug_3 <- c(NA,NA,NA, "Auto", "Fahrrad")
Fahrzeug_4 <- c(NA,NA,NA,NA, "Fahrrad")
col_1 <- c(1,(2,3)...) #merged for every ID
same for Col_2
df_wanted <- data.frame(ID,Fahrzeug_1,Fahrzeug_2,Fahrzeug_3,Fahrzeug_4)
I tried using this code, but it will only return binary values for "Fahrzeugart":
df_melted <- melt(df, id.vars = c("ID"), measure.vars = c("Fahrzeugart"))
df_wanted <- dcast(df_melted, ID ~ Objekt.Nr., value.var = "Fahrzeugart")
Thank you very much!
答案1
得分: 2
你可以使用 tidyr
包中的 fill()
函数来填充缺失的 ID 值,然后再使用 tidyr
包中的 pivot_wider()
函数将数据从长格式转换为宽格式。
library(dplyr)
library(tidyr)
ID <- c(1,2,NA,3,NA,4,NA,NA,5,NA,NA,NA)
Objekt.Nr. <- c(1,1,2,1,2,1,2,3,1,2,3,4)
Fahrzeugart <- c("E-Bike", "Fahrrad", "Fahrrad", "Fahrrad", "Bus", "Bus", "Fahrrad", "Auto", "E-Bike", "Fahrrad", "Fahrrad", "Fahrrad")
df <- data.frame(ID, Objekt.Nr., Fahrzeugart)
df %>%
fill(ID, .direction="down") %>%
pivot_wider(names_from="Objekt.Nr.", values_from = "Fahrzeugart", names_prefix="Fahrzeugart_")
#> # A tibble: 5 × 5
#> ID Fahrzeugart_1 Fahrzeugart_2 Fahrzeugart_3 Fahrzeugart_4
#> <dbl> <chr> <chr> <chr> <chr>
#> 1 1 E-Bike <NA> <NA> <NA>
#> 2 2 Fahrrad Fahrrad <NA> <NA>
#> 3 3 Fahrrad Bus <NA> <NA>
#> 4 4 Bus Fahrrad Auto <NA>
#> 5 5 E-Bike Fahrrad Fahrrad Fahrrad
如果有其他列的话,你可以使用以下方法,允许在数据中包含一些列表列:
library(dplyr)
library(tidyr)
ID <- c(1,2,NA,3,NA,4,NA,NA,5,NA,NA,NA)
Col_1 <- c(NA,45,NA,23,1,2,8,NA,78,12,NA,19)
Objekt.Nr. <- c(1,1,2,1,2,1,2,3,1,2,3,4)
Fahrzeugart <- c("E-Bike", "Fahrrad", "Fahrrad", "Fahrrad", "Bus", "Bus", "Fahrrad", "Auto", "E-Bike", "Fahrrad", "Fahrrad", "Fahrrad")
Col_2 <- c(1,2,3,4,NA,5,6,7,NA,89,10,12)
df <- data.frame(ID,Col_1, Objekt.Nr., Fahrzeugart, Col_2)
df %>%
fill(ID, .direction="down") %>%
pivot_wider(id_cols=ID,
names_from="Objekt.Nr.",
values_from = "Fahrzeugart",
names_prefix="Fahrzeugart_",
unused_fn = list)
#> # A tibble: 5 × 7
#> ID Fahrzeugart_1 Fahrzeugart_2 Fahrzeugart_3 Fahrzeugart_4 Col_1 Col_2
#> <dbl> <chr> <chr> <chr> <chr> <list> <list>
#> 1 1 E-Bike <NA> <NA> <NA> <dbl [1]> <dbl>
#> 2 2 Fahrrad Fahrrad <NA> <NA> <dbl [2]> <dbl>
#> 3 3 Fahrrad Bus <NA> <NA> <dbl [2]> <dbl>
#> 4 4 Bus Fahrrad Auto <NA> <dbl [3]> <dbl>
#> 5 5 E-Bike Fahrrad Fahrrad Fahrrad <dbl [4]> <dbl>
英文:
You can use fill()
from the tidyr
package to fill in the missing ID values and then pivot_wider()
also from the tidyr
package to change from long to wide-form.
library(dplyr)
library(tidyr)
ID <- c(1,2,NA,3,NA,4,NA,NA,5,NA,NA,NA)
Objekt.Nr. <- c(1,1,2,1,2,1,2,3,1,2,3,4)
Fahrzeugart <- c("E-Bike", "Fahrrad", "Fahrrad", "Fahrrad", "Bus", "Bus", "Fahrrad", "Auto", "E-Bike", "Fahrrad", "Fahrrad", "Fahrrad")
df <- data.frame(ID, Objekt.Nr., Fahrzeugart)
df %>%
fill(ID, .direction="down") %>%
pivot_wider(names_from="Objekt.Nr.", values_from = "Fahrzeugart", names_prefix="Fahrzeugart_")
#> # A tibble: 5 × 5
#> ID Fahrzeugart_1 Fahrzeugart_2 Fahrzeugart_3 Fahrzeugart_4
#> <dbl> <chr> <chr> <chr> <chr>
#> 1 1 E-Bike <NA> <NA> <NA>
#> 2 2 Fahrrad Fahrrad <NA> <NA>
#> 3 3 Fahrrad Bus <NA> <NA>
#> 4 4 Bus Fahrrad Auto <NA>
#> 5 5 E-Bike Fahrrad Fahrrad Fahrrad
<sup>Created on 2023-02-14 by the reprex package (v2.0.1)</sup>
Edit: what if there are other columns
If you're alright having some list columns in your data, you could do the following:
library(dplyr)
library(tidyr)
ID <- c(1,2,NA,3,NA,4,NA,NA,5,NA,NA,NA)
Col_1 <- c(NA,45,NA,23,1,2,8,NA,78,12,NA,19)
Objekt.Nr. <- c(1,1,2,1,2,1,2,3,1,2,3,4)
Fahrzeugart <- c("E-Bike", "Fahrrad", "Fahrrad", "Fahrrad", "Bus", "Bus", "Fahrrad", "Auto", "E-Bike", "Fahrrad", "Fahrrad", "Fahrrad")
Col_2 <- c(1,2,3,4,NA,5,6,7,NA,89,10,12)
df <- data.frame(ID,Col_1, Objekt.Nr., Fahrzeugart, Col_2)
df %>%
fill(ID, .direction="down") %>%
pivot_wider(id_cols=ID,
names_from="Objekt.Nr.",
values_from = "Fahrzeugart",
names_prefix="Fahrzeugart_",
unused_fn = list)
#> # A tibble: 5 × 7
#> ID Fahrzeugart_1 Fahrzeugart_2 Fahrzeugart_3 Fahrzeugart_4 Col_1 Col_2
#> <dbl> <chr> <chr> <chr> <chr> <list> <list>
#> 1 1 E-Bike <NA> <NA> <NA> <dbl [1]> <dbl>
#> 2 2 Fahrrad Fahrrad <NA> <NA> <dbl [2]> <dbl>
#> 3 3 Fahrrad Bus <NA> <NA> <dbl [2]> <dbl>
#> 4 4 Bus Fahrrad Auto <NA> <dbl [3]> <dbl>
#> 5 5 E-Bike Fahrrad Fahrrad Fahrrad <dbl [4]> <dbl>
<sup>Created on 2023-02-14 by the reprex package (v2.0.1)</sup>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论