1 回答

TA貢獻1828條經驗 獲得超6個贊
我最近一直在玩 igraph 和 API,所以這是相當新鮮的。我認為下面的代碼可以滿足您的需求,但它確實省略了一些復雜性(例如不會使 API 超時)。它不是非???- 我懷疑其中很多與使用 as_data_frame 接口來跟蹤頂點有關。
所以我確信它可以被優化并且我確信在某個時候 API 會以一種破壞它的編碼返回一些東西,但這是一個開始。
library(igraph)
api_fetch <- function(query){
result <- jsonlite::fromJSON(paste0('http://suggestqueries.google.com/complete/search?client=firefox&q=', httpuv::encodeURIComponent(query)))
return(result)
}
build_query_graph <- function(entry_word, max_depth=2){
# Create an empty graph
graph <- make_empty_graph()
entry_word <- tolower(trimws(entry_word))
graph <- add_vertices(graph, 1, name=entry_word, searched=FALSE)
# Keep on doing this until the graph hits the maximum depth from the entry word
while(TRUE){
# Look up the current vertices and find their depths from the entry word
vertices <- as_data_frame(graph, what='vertices')
vertex_depth <- distances(graph, v=entry_word)
vertices$depth <- vertex_depth[match(colnames(vertex_depth), vertices$name)]
# Find vertices at least one step from the maximum depth and that haven't
# already been searched and sort to get the shallowest at the top
live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
live_vertices <- live_vertices[order(live_vertices$depth),]
# If there are any vertices meeting these criteria, then query the API
# otherwise bail from the while loop
if(nrow(live_vertices)){
# Get the vertex name and query it
this_vertex <- live_vertices$name[1]
res <- api_fetch(this_vertex)
# For each of the daughter results, check it isn't already a vertex
# and add an edge from it to this_vertex
for(daughter in res[[2]]){
if(! daughter %in% get.vertex.attribute(graph, 'name')){
graph <- add_vertices(graph, 1, name=daughter, searched=FALSE)
}
graph <- add_edges(graph, c(this_vertex, daughter))
}
# Don't search this vertex again
graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)
} else {
break
}
}
return(graph)
}
運行:
> g <- build_query_graph('amazon')
> g
IGRAPH 0ec19b6 DN-- 90 100 --
+ attr: name (v/c), searched (v/l)
+ edges from 0ec19b6 (vertex names):
[1] amazon ->amazon amazon ->amazon prime amazon ->amazon prime video
[4] amazon ->amazon uk amazon ->amazon music amazon ->amazon smile
[7] amazon ->amazon india amazon ->amazon jobs amazon ->amazon video
[10] amazon ->amazon customer service amazon prime ->amazon prime amazon prime ->amazon prime video
[13] amazon prime ->amazon prime movies amazon prime ->amazon prime music amazon prime ->amazon prime now
[16] amazon prime ->amazon prime login amazon prime ->amazon prime uk amazon prime ->amazon prime tv
[19] amazon prime ->amazon prime cost amazon prime ->amazon prime student amazon prime video->amazon prime video
[22] amazon prime video->amazon prime video login amazon prime video->amazon prime video app amazon prime video->amazon prime video uk
+ ... omitted several edges
> plot(g)
考慮一下,重復重新計算所有距離并進行大量排序和匹配。在創建單個頂點時保存它們的深度可能會更快:
build_query_graph <- function(entry_word, max_depth=2){
# Create an empty graph
graph <- make_empty_graph()
entry_word <- tolower(trimws(entry_word))
graph <- add_vertices(graph, 1, name=entry_word, depth=0, searched=FALSE)
# Keep on doing this until the graph hits the maximum depth from the entry word
while(TRUE){
# Look up the current vertices and find their depths from the entry word
vertices <- as_data_frame(graph, what='vertices')
# Find vertices at least one step from the maximum depth and that haven't
# already been searched and sort to get the shallowest at the top
live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
live_vertices <- live_vertices[order(live_vertices$depth),]
# If there are any vertices meeting these criteria, then query the API
# otherwise bail from the while loop
if(nrow(live_vertices)){
# Get the vertex name and query it
this_vertex <- live_vertices$name[1]
res <- api_fetch(this_vertex)
# For each of the daughter results, check it isn't already a vertex
# add an edge from it to this_vertex and store the depth from the entry word
for(daughter in res[[2]]){
if(! daughter %in% get.vertex.attribute(graph, 'name')){
graph <- add_vertices(graph, 1, name=daughter, depth=NA, searched=FALSE)
}
graph <- add_edges(graph, c(this_vertex, daughter))
graph <- set_vertex_attr(graph, 'depth', daughter,
distances(graph, v=entry_word, to=daughter))
}
# Don't search this vertex again
graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)
} else {
break
}
}
return(graph)
}
添加回答
舉報