英文:
Make a loop to find the distance between stations in R
问题
我已经制作了这段代码来计算站点之间的距离,但在输出中只有一个值。你能找到错误吗?
df <- data.frame(
station = rep(c("A", "B", "C", "D"), each = 20),
temperature = rnorm(80),
latitude = c(40.7128, 34.0522, 41.8781, 39.9526),
longitude = c(-74.0060, -118.2437, -87.6298, -75.1652)
)
stations <- unique(df$station)
my_points <- matrix(NA, nrow = length(unique(df$station)), ncol = length(unique(df$station)))
# 循环遍历每个站点组合
for (i in 1:length(stations)) {
for (j in 1:length(stations)) {
# 获取两个站点的温度
lat1 <- df$latitude[df$station == stations[i]]
lon1 <- df$longitude[df$station == stations[i]]
lat2 <- df$latitude[df$station == stations[j]]
lon2 <- df$longitude[df$station == stations[j]]
my_points[i, j] <- as.vector(dist(matrix(c(lon1, lon2, lat1, lat2),
nrow = 2)))
}
}
distance_df <- as.data.frame(my_points)
英文:
I have made this code to find the distance between stations but in the output, there is only one value. Can you find the error?
df <- data.frame(
station = rep(c("A", "B", "C", "D"), each = 20),
temperature = rnorm(80),
latitude = c(40.7128, 34.0522, 41.8781, 39.9526),
longitude = c(-74.0060, -118.2437, -87.6298, -75.1652)
)
stations <- unique(df$station)
my_points <- matrix(NA, nrow = length(unique(df$station)), ncol = length(unique(df$station)))
# Loop through each station combination
for (i in 1:length(stations)) {
for (j in 1:length(stations)) {
# Get temperatures for the two stations
lat1 <- df$latitude[df$station == stations[i]]
lon1 <- df$longitude[df$station == stations[i]]
lat2 <- df$latitude[df$station == stations[j]]
lon2 <- df$longitude[df$station == stations[j]]
my_points[i, j] <- as.vector(dist(matrix(c(lon1,lon2,lat1,lat2),
nrow = 2)))
}
}
distance_df <- as.data.frame(my_points)
答案1
得分: 2
这里有两个问题:
-
您的输入数据框可能不符合您的预期 - 纬度和经度列被循环使用,因此对于同一个站点,您有多个不同的坐标。尝试在纬度和经度列中添加
rep()
,以及station
。 -
在您的代码中,
lat1 <- df$latitude[df$station == stations[i]]
返回一个向量,因为有多个匹配项。我认为您期望的是一个单一的值。只使用第一个匹配元素(因为在添加rep()
之后,它们现在都是向量中的相同元素):
df <- data.frame(
station = rep(c("A", "B", "C", "D"), each = 20),
temperature = rnorm(80),
latitude = rep(c(40.7128, 34.0522, 41.8781, 39.9526), each = 20),
longitude = rep(c(-74.0060, -118.2437, -87.6298, -75.1652), each = 20)
)
stations <- unique(df$station)
my_points <- matrix(NA, nrow = length(unique(df$station)), ncol = length(unique(df$station)))
# 循环遍历每个站点组合
for (i in 1:length(stations)) {
for (j in 1:length(stations)) {
# 获取两个站点的温度
lat1 <- df$latitude[df$station == stations[i]][1]
lon1 <- df$longitude[df$station == stations[i]][1]
lat2 <- df$latitude[df$station == stations[j]][1]
lon2 <- df$longitude[df$station == stations[j]][1]
my_points[i, j] <- as.vector(dist(matrix(c(lon1,lon2,lat1,lat2),
nrow = 2)))
}
}
distance_df <- as.data.frame(my_points)
这将得到以下结果:
V1 V2 V3 V4
1 0.000000 44.73631 13.67355 1.386235
2 44.736313 0.00000 31.59835 43.480707
3 13.673546 31.59835 0.00000 12.612446
4 1.386235 43.48071 12.61245 0.000000
一个稍微更好的查找唯一站点的方法是:
unique(df[, c("station", "latitude", "longitude")])
然后您可以循环遍历这些唯一站点:
# 循环遍历每个站点组合
for (i in 1:length(stations)) {
for (j in 1:length(stations)) {
# 获取两个站点的温度
lat1 <- unique_df$latitude[unique_df$station == stations[i]]
lon1 <- unique_df$longitude[unique_df$station == stations[i]]
lat2 <- unique_df$latitude[unique_df$station == stations[j]]
lon2 <- unique_df$longitude[unique_df$station == stations[j]]
my_points[i, j] <- as.vector(dist(matrix(c(lon1,lon2,lat1,lat2),
nrow = 2)))
}
}
英文:
There are two issues here:
-
Your input data frame might not look the way you expect it to - the latitude and longitude columns are recycled so you have multiple different coordinates for the same station. Try adding
rep()
in the lat and long columns as well asstation
. -
In your code
lat1 <- df$latitude[df$station == stations[i]]
returns a vector, because there are multiple matches. I think you're expecting a single value. Use only the first matching element (since they are now all the same elements in the vector after addingrep()
as above):
df <- data.frame(
station = rep(c("A", "B", "C", "D"), each = 20),
temperature = rnorm(80),
latitude = rep(c(40.7128, 34.0522, 41.8781, 39.9526), each = 20),
longitude = rep(c(-74.0060, -118.2437, -87.6298, -75.1652), each = 20)
)
stations <- unique(df$station)
my_points <- matrix(NA, nrow = length(unique(df$station)), ncol = length(unique(df$station)))
# Loop through each station combination
for (i in 1:length(stations)) {
for (j in 1:length(stations)) {
# Get temperatures for the two stations
lat1 <- df$latitude[df$station == stations[i]][1]
lon1 <- df$longitude[df$station == stations[i]][1]
lat2 <- df$latitude[df$station == stations[j]][1]
lon2 <- df$longitude[df$station == stations[j]][1]
my_points[i, j] <- as.vector(dist(matrix(c(lon1,lon2,lat1,lat2),
nrow = 2)))
}
}
distance_df <- as.data.frame(my_points)
This gives:
V1 V2 V3 V4
1 0.000000 44.73631 13.67355 1.386235
2 44.736313 0.00000 31.59835 43.480707
3 13.673546 31.59835 0.00000 12.612446
4 1.386235 43.48071 12.61245 0.000000
A slightly better way of finding unique stations:
unique(df[, c("station", "latitude", "longitude")])
You can then loop over those instead:
# Loop through each station combination
for (i in 1:length(stations)) {
for (j in 1:length(stations)) {
# Get temperatures for the two stations
lat1 <- unique_df$latitude[unique_df$station == stations[i]]
lon1 <- unique_df$longitude[unique_df$station == stations[i]]
lat2 <- unique_df$latitude[unique_df$station == stations[j]]
lon2 <- unique_df$longitude[unique_df$station == stations[j]]
my_points[i, j] <- as.vector(dist(matrix(c(lon1,lon2,lat1,lat2),
nrow = 2)))
}
}
答案2
得分: 1
没有必要使用循环 - 只需对唯一的站点纬度和经度进行子集,并在其上使用dist()
:
# 假设每个站点都有唯一的位置
stations <- unique(df[c("latitude", "longitude")])
rownames(stations) <- unique(df$station)
dist(stations) |>
as.matrix() |>
as.data.frame()
A B C D
A 0.000000 44.73631 13.67355 1.386235
B 44.736313 0.00000 31.59835 43.480707
C 13.673546 31.59835 0.00000 12.612446
D 1.386235 43.48071 12.61245 0.000000
示例数据:
df <- data.frame(
station = rep(c("A", "B", "C", "D"), each = 20),
temperature = rnorm(80),
latitude = rep(c(40.7128, 34.0522, 41.8781, 39.9526), each = 20),
longitude = rep(c(-74.0060, -118.2437, -87.6298, -75.1652), each = 20)
)
英文:
There’s no need for a loop — just subset to unique station latitudes and longitudes, and use dist()
on that:
# assuming each station has a unique location
stations <- unique(df[c("latitude", "longitude")])
rownames(stations) <- unique(df$station)
dist(stations) |>
as.matrix() |>
as.data.frame()
A B C D
A 0.000000 44.73631 13.67355 1.386235
B 44.736313 0.00000 31.59835 43.480707
C 13.673546 31.59835 0.00000 12.612446
D 1.386235 43.48071 12.61245 0.000000
Example data:
df <- data.frame(
station = rep(c("A", "B", "C", "D"), each = 20),
temperature = rnorm(80),
latitude = rep(c(40.7128, 34.0522, 41.8781, 39.9526), each = 20),
longitude = rep(c(-74.0060, -118.2437, -87.6298, -75.1652), each = 20)
)
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论