英文:
Separate numbers in paranthesis into different columns
问题
根据以下示例数据,如何将数字分隔成两列,即X
和Y
?
样本数据
Coordinates = c("[-1.74589,6.74885]", NA, NA, NA, NA, "[-1.582775,6.100376]", "[-1.68144,6.63265]", NA, NA, NA, "[-1.98006,6.33484]", "[-0.94156,6.63623]")
df <- data.frame(Coordinates)
期望输出:
Coordinates X Y
[-1.74589,6.74885] -1.74589 6.74885
NA NA NA
NA NA NA
NA NA NA
NA NA NA
[-1.582775,6.100376] -1.582775 6.100376
[-1.68144,6.63265] -1.68144 6.63265
NA NA NA
NA NA NA
NA NA NA
[-1.98006,6.33484] -1.98006 6.33484
[-0.94156,6.63623] -0.94156 6.63623
英文:
Based on the sample data below, how can I seperate the numbers into two different columns i.e., X
and Y
?
Sample data
Coordinates = c("[-1.74589,6.74885]", NA, NA, NA, NA, "[-1.582775,6.100376]", "[-1.68144,6.63265]", NA, NA, NA, "[-1.98006,6.33484]", "[-0.94156,6.63623]")
df <- data.frame(Coordinates)
Desired Output:
Coordinates X Y
[-1.74589,6.74885] -1.74589 6.74885
NA NA NA
NA NA NA
NA NA NA
NA NA NA
[-1.582775,6.100376] -1.582775 6.100376
[-1.68144,6.63265] -1.68144 6.63265
NA NA NA
NA NA NA
NA NA NA
[-1.98006,6.33484] -1.98006 6.33484
[-0.94156,6.63623] -0.94156 6.6362
答案1
得分: 2
一个潜在的选择是使用tidyr包中的separate()
函数:
library(tidyverse)
Coordinates = c("[-1.74589,6.74885]",NA, NA, NA, NA, "[-1.582775,6.100376]", "[-1.68144,6.63265]", NA, NA, NA, "[-1.98006,6.33484]", "[-0.94156,6.63623]")
df = data.frame(Coordinates)
df %>%
separate(Coordinates, into = c("X", "Y"), sep = ",", remove = FALSE) %>%
mutate(across(X:Y, ~gsub("\\[|\\]", "", .x)))
#> Coordinates X Y
#> 1 [-1.74589,6.74885] -1.74589 6.74885
#> 2 <NA> <NA> <NA>
#> 3 <NA> <NA> <NA>
#> 4 <NA> <NA> <NA>
#> 5 <NA> <NA> <NA>
#> 6 [-1.582775,6.100376] -1.582775 6.100376
#> 7 [-1.68144,6.63265] -1.68144 6.63265
#> 8 <NA> <NA> <NA>
#> 9 <NA> <NA> <NA>
#> 10 <NA> <NA> <NA>
#> 11 [-1.98006,6.33484] -1.98006 6.33484
#> 12 [-0.94156,6.63623] -0.94156 6.63623
<sup>创建于2023年2月8日,使用reprex v2.0.2</sup>
注意:与@Darren Tsai的extract()
方法不同,这不会更改"X"和"Y"的数据类型为numeric。
df %>%
separate(Coordinates, into = c("X", "Y"), sep = ",", remove = FALSE) %>%
mutate(across(X:Y, ~gsub("\\[|\\]", "", .x))) %>%
str()
# 'data.frame': 12 obs. of 3 variables:
# $ Coordinates: chr "[-1.74589,6.74885]" NA NA NA ...
# $ X : chr "-1.74589" NA NA NA ...
# $ Y : chr "6.74885" NA NA NA ...
虽然如果需要,你可以将"X"和"Y"更改为numeric:
df %>%
separate(Coordinates, into = c("X", "Y"), sep = ",", remove = FALSE) %>%
mutate(across(X:Y, ~as.numeric(gsub("\\[|\\]", "", .x)))) %>%
str()
# 'data.frame': 12 obs. of 3 variables:
# $ Coordinates: chr "[-1.74589,6.74885]" NA NA NA ...
# $ X : num -1.75 NA NA NA NA ...
# $ Y : num 6.75 NA NA NA NA ...
<sup>创建于2023年2月8日,使用reprex v2.0.2</sup>
英文:
One potential option is to use separate()
from the tidyr package:
library(tidyverse)
Coordinates = c("[-1.74589,6.74885]",NA, NA, NA, NA, "[-1.582775,6.100376]", "[-1.68144,6.63265]", NA, NA, NA, "[-1.98006,6.33484]", "[-0.94156,6.63623]")
df = data.frame(Coordinates)
df %>%
separate(Coordinates, into = c("X", "Y"), sep = ",", remove = FALSE) %>%
mutate(across(X:Y, ~gsub("\\[|\\]", "", .x)))
#> Coordinates X Y
#> 1 [-1.74589,6.74885] -1.74589 6.74885
#> 2 <NA> <NA> <NA>
#> 3 <NA> <NA> <NA>
#> 4 <NA> <NA> <NA>
#> 5 <NA> <NA> <NA>
#> 6 [-1.582775,6.100376] -1.582775 6.100376
#> 7 [-1.68144,6.63265] -1.68144 6.63265
#> 8 <NA> <NA> <NA>
#> 9 <NA> <NA> <NA>
#> 10 <NA> <NA> <NA>
#> 11 [-1.98006,6.33484] -1.98006 6.33484
#> 12 [-0.94156,6.63623] -0.94156 6.63623
<sup>Created on 2023-02-08 with reprex v2.0.2</sup>
NB. unlike @Darren Tsai's extract()
approach, this does not change the type of "X" and "Y" to numeric
df %>%
separate(Coordinates, into = c("X", "Y"), sep = ",", remove = FALSE) %>%
mutate(across(X:Y, ~gsub("\\[|\\]", "", .x))) %>%
str()
#> 'data.frame': 12 obs. of 3 variables:
#> $ Coordinates: chr "[-1.74589,6.74885]" NA NA NA ...
#> $ X : chr "-1.74589" NA NA NA ...
#> $ Y : chr "6.74885" NA NA NA ...
Although you can change "X" and "Y" to numeric if required:
df %>%
separate(Coordinates, into = c("X", "Y"), sep = ",", remove = FALSE) %>%
mutate(across(X:Y, ~as.numeric(gsub("\\[|\\]", "", .x)))) %>%
str()
#> 'data.frame': 12 obs. of 3 variables:
#> $ Coordinates: chr "[-1.74589,6.74885]" NA NA NA ...
#> $ X : num -1.75 NA NA NA NA ...
#> $ Y : num 6.75 NA NA NA NA ...
<sup>Created on 2023-02-08 with reprex v2.0.2</sup>
答案2
得分: 2
你可以使用 tidyr::extract
:
library(tidyr)
df %>%
extract(Coordinates, into = c('X', 'Y'), regex = "\\[(.+),(.+)\\]", convert = TRUE)
X Y
1 -1.745890 6.748850
2 NA NA
3 NA NA
4 NA NA
5 NA NA
6 -1.582775 6.100376
7 -1.681440 6.632650
8 NA NA
9 NA NA
10 NA NA
11 -1.980060 6.334840
12 -0.941560 6.636230
英文:
You can use tidyr::extract
:
library(tidyr)
df %>%
extract(Coordinates, into = c('X', 'Y'), regex = "\\[(.+),(.+)\\]", convert = TRUE)
X Y
1 -1.745890 6.748850
2 NA NA
3 NA NA
4 NA NA
5 NA NA
6 -1.582775 6.100376
7 -1.681440 6.632650
8 NA NA
9 NA NA
10 NA NA
11 -1.980060 6.334840
12 -0.941560 6.636230
答案3
得分: 1
在新版本的 tidyr
中,我们也可以使用 separate_wider_regex
library(tidyr)
library(dplyr)
df %>%
separate_wider_regex(Coordinates,
c("\\[", X = "-?[0-9.]+", ",", Y = "-?[0-9.]+", "\\]"),
cols_remove = FALSE) %>%
relocate(Coordinates, .before = 1)
-output
# A tibble: 12 × 3
Coordinates X Y
<chr> <chr> <chr>
1 [-1.74589,6.74885] -1.74589 6.74885
2 <NA> <NA> <NA>
3 <NA> <NA> <NA>
4 <NA> <NA> <NA>
5 <NA> <NA> <NA>
6 [-1.582775,6.100376] -1.582775 6.100376
7 [-1.68144,6.63265] -1.68144 6.63265
8 <NA> <NA> <NA>
9 <NA> <NA> <NA>
10 <NA> <NA> <NA>
11 [-1.98006,6.33484] -1.98006 6.33484
12 [-0.94156,6.63623] -0.94156 6.63623
英文:
In the newer version of tidyr
, we could also use separate_wider_regex
library(tidyr)
library(dplyr)
df %>%
separate_wider_regex(Coordinates,
c("\\[", X = "-?[0-9.]+", ",", Y = "-?[0-9.]+", "\\]"),
cols_remove = FALSE) %>%
relocate(Coordinates, .before = 1)
-output
# A tibble: 12 × 3
Coordinates X Y
<chr> <chr> <chr>
1 [-1.74589,6.74885] -1.74589 6.74885
2 <NA> <NA> <NA>
3 <NA> <NA> <NA>
4 <NA> <NA> <NA>
5 <NA> <NA> <NA>
6 [-1.582775,6.100376] -1.582775 6.100376
7 [-1.68144,6.63265] -1.68144 6.63265
8 <NA> <NA> <NA>
9 <NA> <NA> <NA>
10 <NA> <NA> <NA>
11 [-1.98006,6.33484] -1.98006 6.33484
12 [-0.94156,6.63623] -0.94156 6.63623
If the type needs to be converted, add %>% type.convert(as.is = TRUE)
at the end
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论