英文:
Mean of geospatial distance between two datasets of different lengths
问题
我想计算toy1数据集中所有点与toy2数据集中最近点之间的平均距离。
所以,对于toy1数据集中的每个点,我想找到toy2数据集中距离最近的点,然后计算所有距离的平均值。
我希望距离以米为单位。
[![点击此处进入图像描述][1]][1]
toy1数据集的结构:
结构(list(random = c(0.424676549155265, 7.62225863989443,
2.49231160385534, 8.09510331135243, 9.83170835534111, 0.697977161034942
), geometry = structure(list(structure(c(1219790.00733145, 6050277.88033381
), class = c("XY", "POINT", "sfg")), structure(c(1217790.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1218290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219790.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1220290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 1217790.00733145,
ymin = 6050277.88033381, xmax = 1220290.00733145, ymax = 6050777.88033381
), class = "bbox"), crs = structure(list(input = NA_character_,
wkt = "PROJCS[\"RGF_1993_Lambert_93\",\n GEOGCS[\"GCS_RGF_1993\",\n DATUM[\"Reseau_Geodesique_Francais_1993\",\n SPHEROID[\"GRS_1980\",6378137.0,298.257222101]],\n PRIMEM[\"Greenwich\",0.0],\n UNIT[\"Degree\",0.0174532925199433]],\n PROJECTION[\"Lambert_Conformal_Conic_2SP\"],\n PARAMETER[\"False_Easting\",700000.0],\n PARAMETER[\"False_Northing\",6600000.0],\n PARAMETER[\"Central_Meridian\",3.0],\n PARAMETER[\"Standard_Parallel_1\",49.0],\n PARAMETER[\"Standard_Parallel_2\",44.0],\n PARAMETER[\"Latitude_Of_Origin\",46.5],\n UNIT[\"Meter\",1.0]]"), class = "crs"), n_empty = 0L),
long = c(1219790.00733145, 1217790.00733145, 1218290.00733145,
1219290.00733145, 1219790.00733145, 1220290.00733145), lat = c(6050277.88033381,
6050777.88033381, 6050777.88033381, 6050777.88033381, 6050777.88033381,
6050777.88033381)), row.names = c(NA, -6L), sf_column = "geometry", agr = structure(c(random = NA_integer_,
long = NA_integer_, lat = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))
toy2数据集的结构:
结构(list(random = c(8.91735465731472, 1.18594053201377,
3.9330403204076, 9.10003933124244, 9.85382732702419, 7.3299086955376,
7.75479080388322, 0.621909373439848, 9.07300168415532, 2.00036991853267
), geometry = structure(list(structure(c(1215290.00733145, 6053277.88033381
), class = c("XY", "POINT", "sfg")), structure(c(1212790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1216790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1217290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1217790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1218290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1215790.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg")), structure(c(1216790.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg")), structure(c(1221290.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 1212790.00733145,
ymin = 6053277.88033381, xmax = 1221290.00733145, ymax = 6054277.88033381
), class = "bbox"), crs = structure(list(input = NA_character_,
wkt = "PROJCS[\"RGF_1993_Lambert_93\",\n GEOGCS[\"GCS_RGF_1993\",\n DATUM[\"Reseau_Geodesique_Francais_1993\",\n SPHEROID[\"GRS_1980\",6378137.
<details>
<summary>英文:</summary>
I want to compute the mean distance between all points of the toy1 dataset with the closest point of the toy2 dataset.
So for each point of the toy1 dataset, I want to find the closest point among the points of the toy2 dataset, and then compute a mean with all the distances.
I want the distances to be in meters.
[![enter image description here][1]][1]
the structure of toy1 dataset :
structure(list(random = c(0.424676549155265, 7.62225863989443,
2.49231160385534, 8.09510331135243, 9.83170835534111, 0.697977161034942
), geometry = structure(list(structure(c(1219790.00733145, 6050277.88033381
), class = c("XY", "POINT", "sfg")), structure(c(1217790.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1218290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219790.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1220290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 1217790.00733145,
ymin = 6050277.88033381, xmax = 1220290.00733145, ymax = 6050777.88033381
), class = "bbox"), crs = structure(list(input = NA_character_,
wkt = "PROJCS["RGF_1993_Lambert_93",\n GEOGCS["GCS_RGF_1993",\n DATUM["Reseau_Geodesique_Francais_1993",\n SPHEROID["GRS_1980",6378137.0,298.257222101]],\n PRIMEM["Greenwich",0.0],\n UNIT["Degree",0.0174532925199433]],\n PROJECTION["Lambert_Conformal_Conic_2SP"],\n PARAMETER["False_Easting",700000.0],\n PARAMETER["False_Northing",6600000.0],\n PARAMETER["Central_Meridian",3.0],\n PARAMETER["Standard_Parallel_1",49.0],\n PARAMETER["Standard_Parallel_2",44.0],\n PARAMETER["Latitude_Of_Origin",46.5],\n UNIT["Meter",1.0]]"), class = "crs"), n_empty = 0L),
long = c(1219790.00733145, 1217790.00733145, 1218290.00733145,
1219290.00733145, 1219790.00733145, 1220290.00733145), lat = c(6050277.88033381,
6050777.88033381, 6050777.88033381, 6050777.88033381, 6050777.88033381,
6050777.88033381)), row.names = c(NA, -6L), sf_column = "geometry", agr = structure(c(random = NA_integer_,
long = NA_integer_, lat = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))
The structure of toy2 dataset :
structure(list(random = c(8.91735465731472, 1.18594053201377,
3.9330403204076, 9.10003933124244, 9.85382732702419, 7.3299086955376,
7.75479080388322, 0.621909373439848, 9.07300168415532, 2.00036991853267
), geometry = structure(list(structure(c(1215290.00733145, 6053277.88033381
), class = c("XY", "POINT", "sfg")), structure(c(1212790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1216790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1217290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1217790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1218290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1215790.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg")), structure(c(1216790.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg")), structure(c(1221290.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 1212790.00733145,
ymin = 6053277.88033381, xmax = 1221290.00733145, ymax = 6054277.88033381
), class = "bbox"), crs = structure(list(input = NA_character_,
wkt = "PROJCS["RGF_1993_Lambert_93",\n GEOGCS["GCS_RGF_1993",\n DATUM["Reseau_Geodesique_Francais_1993",\n SPHEROID["GRS_1980",6378137.0,298.257222101]],\n PRIMEM["Greenwich",0.0],\n UNIT["Degree",0.0174532925199433]],\n PROJECTION["Lambert_Conformal_Conic_2SP"],\n PARAMETER["False_Easting",700000.0],\n PARAMETER["False_Northing",6600000.0],\n PARAMETER["Central_Meridian",3.0],\n PARAMETER["Standard_Parallel_1",49.0],\n PARAMETER["Standard_Parallel_2",44.0],\n PARAMETER["Latitude_Of_Origin",46.5],\n UNIT["Meter",1.0]]"), class = "crs"), n_empty = 0L),
long = c(1215290.00733145, 1212790.00733145, 1216790.00733145,
1217290.00733145, 1217790.00733145, 1218290.00733145, 1219290.00733145,
1215790.00733145, 1216790.00733145, 1221290.00733145), lat = c(6053277.88033381,
6053777.88033381, 6053777.88033381, 6053777.88033381, 6053777.88033381,
6053777.88033381, 6053777.88033381, 6054277.88033381, 6054277.88033381,
6054277.88033381)), row.names = c(NA, -10L), sf_column = "geometry", agr = structure(c(random = NA_integer_,
long = NA_integer_, lat = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))
I found some topics related, but as the two datasets do not have the same number of points I got errors in the functions.
The function distHaversine from geospatial package seems to require the same number of points in the two arguments...
[1]: https://i.stack.imgur.com/qqHGJ.png
</details>
# 答案1
**得分**: 2
one approach:
```R
library(dplyr)
library(sf)
toy_1_with_distances <-
toy_1 %>%
rowwise() %>%
mutate(closest_point_row_toy_2 = st_nearest_feature(geometry, toy_2),
closest_point = toy_2[closest_point_row_toy_2,],
dist_to_closest = st_distance(geometry, closest_point)
)
> toy1_with_distances %>% pull(dist_to_closest) %>% mean()
3123.199 [m]
英文:
one approach:
library(dplyr)
library(sf)
toy_1_with_distances <-
toy_1 |>
rowwise() |>
mutate(closest_point_row_toy_2 = st_nearest_feature(geometry, toy_2),
closest_point = toy_2[closest_point_row_toy_2,],
dist_to_closest = st_distance(geometry, closest_point)
)
> toy1_with_distances |> pull(dist_to_closest) |> mean()
3123.199 [m]
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论