pytorch 笔记: 复现论文 Stochastic Weight Completion for Road Networks using Graph Convolutional Networks
1 理論部分
論文筆記:Stochastic Weight Completion for Road Networks using Graph Convolutional Networks_UQI-LIUWJ的博客-CSDN博客
2 導入庫
import torch import torch.nn.functional as F import numpy as np import pandas as pd import os from torch_geometric.data import Data, DataLoader from torch_geometric.utils import normalized_cut from torch_geometric.nn import (ChebConv, graclus, GCNConv, max_pool, max_pool_x, global_mean_pool) from toolz.curried import *3 數據集處理
數據集來源是uber movement 以及紐約的osm 地圖數據
Uber Movement: Let's find smarter ways forward, together.
np.random.seed(123) torch.manual_seed(123) uberdir = "D:/" nykjuly = os.path.join(uberdir, "movement-speeds-hourly-new-york-2019-7.csv.zip") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')?nodes, edges = load_gdfs("data/newyork/") #There are 4588 nodes and 9893 edges in the road networks. df = pd.read_csv(nykjuly)#獲取一個月的出行數據df = attach_edgeid(nodes, edges, df) #原來的shape:(25365815, 13) #去掉了['segment_id','start_junction_id','end_junction_id']三列,加上了edge_id(表示這條邊是edges里面的第edge_id條邊】) #同時只保留起止邊和way_id都在edges里面的邊 #去掉之后的shape:(2653457, 11),少了很多條出行記錄dG = edge_topology_from_edges(edges) #相當于邊變成點,如果原圖兩條邊在一個點相交,那么轉換之后的圖中兩個點有連邊 #Construct edge topology from the `edges` (The graph with road segments as nodes).n_test = 3 * 24 #一個月最后三天作為測試集,之前的是訓練集 n_epochs = 5 batch_size = 1obs, unobs = split_obs_unobs(df, ratio=0.9) #obs——744個小時,每一個小時90%的數據(DataFrame) #uobs——744個小時,每一個小時后10%的數據(DataFrame) obs = [g for (_, g) in obs.groupby(['month', 'day', 'hour'])] unobs = [g for (_, g) in unobs.groupby(['month', 'day', 'hour'])] #obs——744個小時,每一個小時10%的數據(list)#劃分有觀測點的數據和無觀測點的數據(的ground truth)3.1 load_gdfs
?導入地圖數據,獲得點集和邊集
#導入地圖數據 def load_gdfs(datadir: str) -> Tuple[GeoDataFrame, GeoDataFrame]:"""Load the nodes and edges GeoDataFrame from graphmlsaved by `save_graphml_from_places`.Usage:nodes, edges = load_gdfs("data/newyork/")"""G = ox.load_graphml(os.path.join(datadir, "graph.graphml")) #導入地圖數據nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True) #?Convert a MultiDiGraph to node and/or edge GeoDataFrames.nodes['osmid'] = nodes.index.values## the centroid coordindates of road segmentspoints = edges.geometry.to_crs(epsg=3395).centroid#切換crs投影坐標,同時point表示邊的質心coords = pipe(points.map(lambda p: (p.x, p.y)).values, map(list), list, np.array)#將質心提取為ndarray的二維數組,每一個元素是質心的橫縱坐標(變成ndarray的原因是方便找到每一列的min)coords = coords - coords.min(axis=0)#減去每一列的最小值edges['coords'] = pipe(coords, map(tuple), list)#變回list,添加到coords這一列中edges['osmid'] = edges.osmid.map(lambda x: x if isinstance(x, list) else [x])u, v, _ = list(zip(*edges.index))edges["u"] = uedges["v"] = vedges['id'] = np.arange(edges.shape[0])edges.set_index('id', inplace=True, drop=False)#將id作為edges的活躍列(從0開始的數字)print(f"There are {nodes.shape[0]} nodes and {edges.shape[0]} edges in the road networks.")#導入多少列,出來多少列return nodes, edges類似于simplified操作
同時將newyork里面的crs坐標系轉換成uber使用的坐標系
nodes幾乎沒動
edges加了一個coords條目,表示的是邊的質心,同時edges的活躍列改為id?
nodes:
edges:?
?
?
3.2 attach_edgeid?
def attach_edgeid(nodes: GeoDataFrame, edges: GeoDataFrame, df: DataFrame) -> DataFrame:"""Filter and attaching uber one-month dataframe `df` a graph edge-id column, where the edge id is determined by (u, v, osmid) and only rows with edge id are kept.Usage:mh = attach_edgeid(nodes, edges, df)"""## filtering by node idssdf = df[df.osm_start_node_id.isin(nodes.osmid)&df.osm_end_node_id.isin(nodes.osmid)].copy()#start和end的點都在nodes里面的那些df行## dropping columns that will not be usedsdf.drop(["segment_id", "start_junction_id", "end_junction_id"], axis=1, inplace=True)#丟棄這三行edgeidmap = {(u, v): (osmid, edgeid) for (u, v, osmid, edgeid) in zip(edges.u, edges.v, edges.osmid, edges.id)}#對edges中的這四個屬性,拼成一個字典def getedgeid(u: int, v: int, osmid: int) -> int:"""Map the (u, v, osmid) tuple to the corresponding graph edge id and return -1 if there is no such edge in the graph."""osmids, edgeid = get((u, v), edgeidmap, ([-1], -1))#在edgeidmap中找(u,v),如果找到了,返回edgeid,否則,返回-1return edgeid if osmid in osmids else -1#如果osmids有這個osmid,那么成立,否則,不成立edge_idx_cols = ['osm_start_node_id', 'osm_end_node_id', 'osm_way_id']sdf['edgeid'] = sdf[edge_idx_cols].apply(lambda x: getedgeid(*x), axis=1)#首先,判斷一條記錄的起點和終點在不在edges的起止點上;其次,判斷這條邊的id在不在edges (u,v)對應的里面sdf = sdf[sdf.edgeid >= 0]#留下存在的邊return sdf3.3?edge_topology_from_edges?
def edge_topology_from_edges(edges: GeoDataFrame) -> Graph:"""Construct edge topology from the `edges` (The graph with road segments as nodes).nx.line_graph() can construct the line graph directly from the original graph.Argsedges: Geodataframe returned by load_gdfs.ReturnsG: A undirected graph whose node ids are edge ids in `edges`."""triple = pd.concat([pd.DataFrame({'id': edges.id, 'u': edges.u, 'v': edges.v}),pd.DataFrame({'id': edges.id, 'u': edges.v, 'v': edges.u})],ignore_index=True)#一條邊兩個方向pairs = []for (_, g) in triple.groupby('u'):pairs += [(u, v) for u in g.id for v in g.id if u != v]for (_, g) in triple.groupby('v'):pairs += [(u, v) for u in g.id for v in g.id if u != v]#同時從一個點出發\同時從一個點到達的邊000G = Graph()G.add_edges_from(pairs)#相當于邊變成點,如果原圖兩條邊在一個點相交,那么轉換之后的圖中兩個點有連邊return G ''' 某一個的GROUPBYid u v 0 0 42421728 42432736 1 1 42421728 42435337 2 2 42421728 42421731 9898 5 42421728 42421731 12211 2318 42421728 42432736 12942 3049 42421728 42435337'''3.4??split_obs_unobs
def split_dataframe(df: DataFrame, ratio: Optional[float]=0.9) -> Tuple[DataFrame, DataFrame]:"""Split a dataframe into two parts along the row dimension by the given ratio."""k = int(df.shape[0] * ratio)#要選擇的行數idx = np.random.permutation(df.shape[0])#隨機排列序號,劃分為前k個和后面的部分 return df.iloc[idx[:k]], df.iloc[idx[k:]]def split_obs_unobs(df: DataFrame, ratio: Optional[float]=0.9) -> Tuple[DataFrame, DataFrame]:"""Split a one-month dataframe into observed and unobserved dataframes.Returnstrn: Observations for a fraction of road segments.tst: Ground truth for road segments to be inferred. """## we should guarantee the results are invariant to calling order. np.random.seed(123)dfs = [split_dataframe(g, ratio=ratio) for (_, g) in df.groupby(['month', 'day', 'hour'])]trn = pd.concat(pipe(dfs, map(first), list))tst = pd.concat(pipe(dfs, map(second), list))return trn, tst3.5 uber_movement數據集補充說明
| len(osm_ids.osm_way_id) | 33320 |
| len(osm_ids.osm_start_node_id) | 58601 |
| len(osm_ids.osm_end_node_id) | 58605 |
?只有osm_way_id、osm_start_node_id、osm_end_node_id 加起來,才能唯一確定一個子路段
?原因是因為,即使我們知道了osm_start_node_id、osm_end_node_id,但因為可能有地面、高架等不同的重疊路段,所以osm_way_id可能會不同(比如上圖,黃顏色的是高架,黑線是地面道路。畫紅色箭頭的是兩個高架匝道,所以相同的osm_start_node_id、osm_end_node_id可能分別對應了地面和高架)
4 dataloader
trn_list = [get_data(dG, o, u) for (o, u) in zip(obs[:-n_test], unobs[:-n_test])] tst_list = [get_data(dG, o, u) for (o, u) in zip(obs[-n_test:], unobs[-n_test:])] #train_list的每一個元素都是torch_geometric的元素 #以第一個小時為例:Data(x=[9893, 1], edge_index=[2, 34637], y=[9893, 1]) #紐約一共有9893條邊【edges.shape[0]】(在這里表示9893個點) #這邊的”邊“相當于是,如果在紐約兩條邊有公共點, #那么在我們的圖上,邊對應的點它們就相連 #trn_list和tst_list每一個元素是一個data數據trn_loader = DataLoader(trn_list, batch_size=batch_size) tst_loader = DataLoader(tst_list, batch_size=batch_size)4.1 get_data
def get_x(df: DataFrame, num_nodes: int) -> torch.FloatTensor:"""Get pytorch geometric input feature from observation dataframe.Inputsdf: The observation dataframe with edgeid being attached. Returnsx (num_nodes, num_features): Input feature tensor. """node_obs = {u: [v] for (u, v) in zip(df.edgeid.values, df.speed_mph_mean.values)}## (num_nodes, 1)#一個字典,鍵值是這一小時每個edge_id,value是這個edge_id對應的平均速度return torch.FloatTensor([get(u, node_obs, [0]) for u in range(num_nodes)]) #訓練集:[速度] 其他的都是[0] #測試集:[速度] 其他的都是[0]def get_data(G: Graph, obs: DataFrame, unobs: DataFrame) -> Data:#obs和unobs是某一個小時觀測數據和未觀測數據edge_index = get_edge_index(G)#utils中的函數,將G的邊集轉換成Tensor,然后轉置x = get_x(obs, G.number_of_nodes())y = get_x(unobs, G.number_of_nodes())#訓練集:速度 其他的都是0#測試集:速度 其他的都是0return Data(x=x, edge_index=edge_index, y=y)5 model部分?
?
?
model = ChebNet(1, dG.number_of_nodes()).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) class ChebNet(torch.nn.Module):def __init__(self, num_features, num_nodes):super(ChebNet, self).__init__()self.conv1 = ChebConv(num_features, 32, 2)#切比雪夫近似的GCN,2階切比雪夫多項式近似(輸入維度num_features,輸出維度32,2階切比雪夫)self.conv2 = ChebConv(32, 64, 2)#切比雪夫近似的GCN,2階切比雪夫多項式近似(輸入維度32,輸出維度64,2階切比雪夫)self.fc1 = torch.nn.Linear(64, 128)self.fc2 = torch.nn.Linear(128, num_nodes)#兩層全連接層def forward(self, data):#以第一張圖為例:#data:Batch(x=[9893, 1], edge_index=[2, 34637], y=[9893, 1], batch=[9893], ptr=[2])x = F.relu(self.conv1(data.x, data.edge_index))#切比雪夫近似GCN+RELU#x:torch.Size([9893, 1])cluster = graclus(data.edge_index, num_nodes=x.shape[0])#圖點分類,cluster是一個x.shape[0]維度的Tensor,表示每個點所在的cluster#cluster:torch.Size([9893])data = max_pool(cluster, Data(x=x, batch=data.batch, edge_index=data.edge_index))#data:Batch(x=[5870, 32], edge_index=[2, 22026], batch=[5870])#max_pool操作,根據cluster的分簇情況,重新構造圖datax = F.relu(self.conv2(data.x, data.edge_index))#x:torch.Size([5847, 64])cluster = graclus(data.edge_index, num_nodes=x.shape[0])#cluster:5847維的tensorx, batch = max_pool_x(cluster, x, data.batch)'''x.shape,batch.shape(torch.Size([3436, 64]), torch.Size([3436]))+'''x = global_mean_pool(x, batch)#torch.Size([1, 64])x = F.relu(self.fc1(x))#torch.Size([1, 128])x = F.dropout(x, training=self.training)x = self.fc2(x)#torch.Size([1, num_nodes])return x6 訓練模型
for epoch in range(n_epochs):train(epoch, optimizer, trn_loader, model, device) def train(epoch, optimizer, train_loader, model, device):model.train()losses = []for data in train_loader:data = data.to(device)xhat = model(data)#預測的各個點的速度## -> (batch_size, num_nodes)x = data.x.reshape(xhat.shape)nz = x > 0#保留觀測集的那些點loss = F.mse_loss(xhat[nz], x[nz], reduction='sum') / nz.sum().item()optimizer.zero_grad()loss.backward()optimizer.step()#老三部曲losses.append(loss.item())print(f"Epoch is {epoch}, Training Loss is {np.mean(losses):.5f}") ''' Epoch is 0, Training Loss is 55.03807 Epoch is 1, Training Loss is 29.84954 Epoch is 2, Training Loss is 21.36361 Epoch is 3, Training Loss is 19.08718 Epoch is 4, Training Loss is 18.11195 Epoch is 5, Training Loss is 18.60411 Epoch is 6, Training Loss is 17.49593 Epoch is 7, Training Loss is 17.83597 Epoch is 8, Training Loss is 17.09360 Epoch is 9, Training Loss is 17.26834 Epoch is 10, Training Loss is 17.15905 Epoch is 11, Training Loss is 16.93761 Epoch is 12, Training Loss is 16.54925 Epoch is 13, Training Loss is 16.65559 Epoch is 14, Training Loss is 16.71426 Wall time: 12min 2s '''總結
以上是生活随笔為你收集整理的pytorch 笔记: 复现论文 Stochastic Weight Completion for Road Networks using Graph Convolutional Networks的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: torch_geometric 笔记:g
- 下一篇: python 包介绍:osmnx