如何解决R - ggmap - 通过地理编码计算城市之间的最短距离
我有一个城市和相关信息的列表,我已经放在一个数据框中,如下所示:
@Configuration
@Profile("my_profile")
@Import(LibraryConfig::class)
open class MyProfileConfig
使用 my_profile
和 library(plyr)
library(dplyr)
library(ggmap)
library(Imap)
cities <- c("washington,dc","wilmington,de","amarillo,tx","denver,co","needham,ma","philadelphia,pa","doylestown,"galveston,"tuscaloosa,al","hollywood,fl"
)
id <- c(156952,154222,785695,154423,971453,149888,1356987,178946,169944,136421)
month <- c(201811,201811,201912,202005,202106,202106 )
category<- c("home","work","home","cell","cell")
places <- data.frame(cities,id,category,month)
包,我可以检索每个城市的经度和纬度:
Imap
我想做的是:
- 按月份和类别计算每个城市之间的距离
- 在
ggmap
的不同列中返回第二个最短距离以及相应的城市和 ID
我写了一个 lat <- geocode(location = places$cities,source = "google")$lat
lon <- geocode(location = places$cities,source = "google")$lon
places <- cbind(places,lat,lon)
循环来计算距离:
places
产生以下数据:
for
所需的结果如下所示(第一行):
for (i in 1:nrow(places)) {
dist_list[[i]] <- gdist(lon.1 = places$lon[i],lat.1 = places$lat[i],lon.2 = places$lon,lat.2 = places$lat,units="miles")
}
通过 dput(dist_list)
list(c(0,98.3464717885451,1386.25425677199,1489.87718040776,383.083760289456,123.232894969413,140.284537078237,1209.23510542932,706.670452283757,906.79542720295),c(98.4762434610638,1472.06660056474,1560.93398322985,285.23618862797,24.9195071209828,44.8853561530985,1308.60741637919,805.755084908157,983.102810248198),c(1389.07354011351,356.573530670257,1712.29111612461,1493.39302974566,1497.2125164277,579.329313217289,827.577713357261,1434.82691622332
),c(1492.80130415651,1761.3773163288,1578.71125031146,1576.80713231756,923.725006795209,1067.04809350934,1717.32991551111),c(383.551997010915,260.382178510916,243.947043197789,1588.85470703957,1088.38640303169,1230.47219244291),c(123.395655314093,24.9195071209827,24.7382114555287,1333.29925285915,830.581742827321,1002.94777739349
),c(140.431447025301,44.8853561530986,24.7382114555285,1346.44527983873,844.827513981938,1026.98263808807),c(1211.16392416136,505.292529136012,925.512554201542),c(707.73957320737,666.837848781548),c(906.880841903584,983.102810248198,1434.82691622332,1717.32991551111,1230.47219244291,1002.94777739349,1026.98263808807,925.512554201542,666.837848781548,0))
中的 cities id category month lat lon min.dist closest city closest city id
washington,dc 156952 home 201811 38.90719 -77.03687 98.34647 wilmington,de 154222
函数,我可以获得第二小的距离
nth
我遇到的问题是我不知道如何将列表中的信息连接到 df Rfast
。任何帮助或建议将不胜感激。
解决方法
~/.bashrc
分组:
PATH=$HOME/.local/bin:$PATH
结果:
# get min distance:
min_d <- sapply(dist_list,function(x) sort(x)[2])
places$min_dist <- min_d
# index:
i <- sapply(dist_list,function(x) which(sort(x)[2] == x))
# add name:
places$min_name <- places$cities[i]
,
更新
假设我们只按 month
分组,我们可以试试下面的代码
f <- function(df) {
r <- list()
for (i in 1:nrow(df)) {
x <- c()
for (j in 1:nrow(df)) {
x <- append(
x,with(df,gdist(lat[i],lon[i],lat[j],lon[j],units = "miles"))
)
}
x <- replace(x,x == 0,Inf)
idx <- which.min(x)
r[[i]] <- data.frame(
min.dist = min(x),closest_city = df$cities[idx],closest_city_id = df$id[idx]
)
}
do.call(rbind,r)
}
places %>%
group_by(month) %>%
do(cbind(.,f(.))) %>%
ungroup()
给出
# A tibble: 10 x 9
cities id category month lat lon min.dist closest_city
<chr> <int> <chr> <int> <dbl> <dbl> <dbl> <chr>
1 washington,dc 156952 home 201811 38.9 -77.0 104. wilmington,de
2 wilmington,de 154222 work 201811 39.7 -75.5 104. washington,dc
3 amarillo,tx 785695 home 201912 35.2 -102. 232. denver,co
4 denver,co 154423 home 201912 39.8 -105. 232. amarillo,tx
5 needham,ma 971453 home 202005 42.3 -71.2 273. doylestown,pa
6 philadelphia,~ 149888 work 202005 40.0 -75.2 6.81 doylestown,pa
7 doylestown,pa 1356987 cell 202005 40.3 -75.1 6.81 philadelphia,~
8 galveston,tx 178946 home 202106 29.2 -94.9 11405. hollywood,fl
9 tuscaloosa,al 169944 work 202106 33.2 -87.6 517. hollywood,fl
10 hollywood,fl 136421 cell 202106 26.0 -80.1 517. tuscaloosa,al
# ... with 1 more variable: closest_city_id <int>
根据您获得的dist_list
,我们可以试试下面的代码
closest <- do.call(
rbind,lapply(
dist_list,function(x) {
x <- replace(x,Inf)
idx <- which.min(x)
with(
places,data.frame(
min.dist = min(x),closest_city = cities[idx],closest_city_id = id[idx]
)
)
}
)
)
给出
min.dist closest_city closest_city_id
1 98.34647 wilmington,de 154222
2 24.91951 philadelphia,pa 149888
3 356.57353 denver,co 154423
4 356.57353 amarillo,tx 785695
5 243.94704 doylestown,pa 1356987
6 24.73821 doylestown,pa 1356987
7 24.73821 philadelphia,pa 149888
8 505.29253 tuscaloosa,al 169944
9 505.29253 galveston,tx 178946
10 666.83785 tuscaloosa,al 169944
此外,如果您想将上述数据框附加到您现有的 places
,您可以使用
places <- cbind(places,closest)
,
使用sf::st_distance()
鉴于您正在处理空间数据,我建议采用基于空间库的方法,例如 {sf}
。
library(tidyverse)
library(tidygeocoder)
library(sf)
# clean location,geocode,and convert to sf object
places <- places %>%
separate(cities,into = c("city","state"),sep = ",") %>%
geocode(city = city,state = state) %>%
st_as_sf(coords = c("long","lat"),crs = 4269)
# sanity check
mapview::mapview(places)
# calculate distances between point pairs with st_distance()
compute_close_city <- function(i){
# compute distances btwn a point and its neighbors (excluding itself)
distances = st_distance(places[i,],places[-i,])
# index of the nearest neighbor
j = which.min(distances)
# organize and return the result
result <- tibble(
close_city = places$city[-i][j],# closest city
close_state = places$state[-i][j],# closest state
close_dist_m = distances[j] # distance in meters
)
return(result)
}
# calculate close cities and distances,bind results into dataframe
close_df <- map_df(1:nrow(places),~compute_close_city(.x))
# bind the result to the places data frame
places <- bind_cols(places,close_df)
# view the result and verify it works
select(places,city,close_city,close_dist_m)
返回:
Simple feature collection with 10 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -104.9849 ymin: 26.0112 xmax: -75.13046 ymax: 40.31004
Geodetic CRS: NAD83
# A tibble: 10 x 4
city close_city close_dist_m geometry
<chr> <chr> [m] <POINT [°]>
1 washington wilmington 159476.85 (-77.03656 38.89499)
2 wilmington philadelphia 40022.81 (-75.54659 39.74595)
3 amarillo denver 574956.04 (-101.8338 35.20722)
4 denver amarillo 574956.04 (-104.9849 39.73924)
5 needham tuscaloosa 153463.74 (-88.33309 31.98683)
6 philadelphia doylestown 39775.87 (-75.16353 39.95272)
7 doylestown philadelphia 39775.87 (-75.13046 40.31004)
8 galveston needham 687140.47 (-94.79459 29.29933)
9 tuscaloosa needham 153463.74 (-87.56753 33.20956)
10 hollywood needham 1035934.60 (-80.14949 26.0112)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。