日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 运维知识 > linux >内容正文

linux

tcp/ip 协议栈Linux内核源码分析六 路由子系统分析一路由缓存

發布時間:2025/4/5 linux 33 豆豆
生活随笔 收集整理的這篇文章主要介紹了 tcp/ip 协议栈Linux内核源码分析六 路由子系统分析一路由缓存 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

內核版本:3.4.39

收到報文或者發送報文的時候都需要查找路由表,頻繁的路由表查找操作時需要耗費一部分CPU的,Linux提供了路由緩存來減少路由表的查詢,路由緩存由hash表組織而成,路由緩存的初始化放在路由初始化函數ip_rt_init中,當路由緩存沒有命中的時候會去查找路由表,查找成功則會添加到路由緩存里。

有兩個地方需要查找緩存,一個是ip_rcv()接收報文的時候,另一個是發送報文的時候。

緩存的初始化流程:

看下ip_rt_init函數

//路由緩存初始化 int __init ip_rt_init(void) {int rc = 0;#ifdef CONFIG_IP_ROUTE_CLASSID//基于路由的分類器,每個CPU256個變量ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));if (!ip_rt_acct)panic("IP: failed to allocate ip_rt_acct\n"); #endif//路由緩存池ipv4_dst_ops.kmem_cachep =kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;//初始化每CPU變量if (dst_entries_init(&ipv4_dst_ops) < 0)panic("IP: failed to allocate ipv4_dst_ops counter\n");//初始化每CPU變量if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");//建立路由緩存hash表rt_hash_table = (struct rt_hash_bucket *)alloc_large_system_hash("IP route cache",sizeof(struct rt_hash_bucket),rhash_entries,(totalram_pages >= 128 * 1024) ?15 : 17,0,&rt_hash_log,&rt_hash_mask,rhash_entries ? 0 : 512 * 1024);//初始化路由緩存hash表memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));//每個hash表rt_hash_lock_init();//設置gc時間和緩存最大數量ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);ip_rt_max_size = (rt_hash_mask + 1) * 16;//初始化devinet_init();//注冊通知鏈和創建alias緩存ip_fib_init();//注冊gc任務INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);expires_ljiffies = jiffies;schedule_delayed_work(&expires_work,net_random() % ip_rt_gc_interval + ip_rt_gc_interval);if (ip_rt_proc_init())pr_err("Unable to create route proc files\n"); #ifdef CONFIG_XFRMxfrm_init();xfrm4_init(ip_rt_max_size); #endif//注冊netlink消息rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);#ifdef CONFIG_SYSCTLregister_pernet_subsys(&sysctl_route_ops); #endifregister_pernet_subsys(&rt_genid_ops);return rc; }

函數中rt_hash_table就是路由緩存hash表。

看完了初始化看下查詢是如何調用的。

首先是輸入函數的查詢:

主要的查找函數是ip_route_input_common:

int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,u8 tos, struct net_device *dev, bool noref) {struct rtable * rth;unsigned hash;int iif = dev->ifindex;struct net *net;int res;net = dev_net(dev);rcu_read_lock();if (!rt_caching(net))goto skip_cache;tos &= IPTOS_RT_MASK;//輸入hash值的計算由src、dst、iif和隨機量組成hash = rt_hash(daddr, saddr, iif, rt_genid(net));//根據報文得到的hash值查找路由緩存for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;rth = rcu_dereference(rth->dst.rt_next)) {//比較地址,設備,tos,markif ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |((__force u32)rth->rt_key_src ^ (__force u32)saddr) |(rth->rt_route_iif ^ iif) |(rth->rt_key_tos ^ tos)) == 0 &&rth->rt_mark == skb->mark &&net_eq(dev_net(rth->dst.dev), net) &&!rt_is_expired(rth)) {ipv4_validate_peer(rth);//noref區別,基本上外部設備發來的報文noref為tureif (noref) { dst_use_noref(&rth->dst, jiffies);//更新該緩存計數器和時間skb_dst_set_noref(skb, &rth->dst);} else {//基本上自己發給自己的報文會走到這里來dst_use(&rth->dst, jiffies);skb_dst_set(skb, &rth->dst);}//增加命中計數RT_CACHE_STAT_INC(in_hit);rcu_read_unlock();return 0;}//增加查找次數統計RT_CACHE_STAT_INC(in_hlist_search);}skip_cache:/* Multicast recognition logic is moved from route cache to here.The problem was that too many Ethernet cards have broken/missinghardware multicast filters :-( As result the host on multicastingnetwork acquires a lot of useless route cache entries, sort ofSDR messages from all the world. Now we try to get rid of them.Really, provided software IP multicast filter is organizedreasonably (at least, hashed), it does not result in a slowdowncomparing with route cache reject entries.Note, that multicast routers are not affected, becauseroute cache entry is created eventually.*/if (ipv4_is_multicast(daddr)) {struct in_device *in_dev = __in_dev_get_rcu(dev);if (in_dev) {int our = ip_check_mc_rcu(in_dev, daddr, saddr,ip_hdr(skb)->protocol);if (our #ifdef CONFIG_IP_MROUTE||(!ipv4_is_local_multicast(daddr) &&IN_DEV_MFORWARD(in_dev)) #endif) {int res = ip_route_input_mc(skb, daddr, saddr,tos, dev, our);rcu_read_unlock();return res;}}rcu_read_unlock();return -EINVAL;}//查詢路由表res = ip_route_input_slow(skb, daddr, saddr, tos, dev);rcu_read_unlock();return res; } EXPORT_SYMBOL(ip_route_input_common);

輸出報文查找函數是ip_route_output_flow(),這是個包裹函數,核心是調用__ip_route_output_key,

該函數如下:

//查找路由,先查找路由緩存,查找不到再查找路由表 struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) {struct rtable *rth;unsigned int hash;if (!rt_caching(net))goto slow_output;hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net));rcu_read_lock_bh();//遍歷hash表,匹配則返回for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;rth = rcu_dereference_bh(rth->dst.rt_next)) {if (rth->rt_key_dst == flp4->daddr &&rth->rt_key_src == flp4->saddr &&rt_is_output_route(rth) &&rth->rt_oif == flp4->flowi4_oif &&rth->rt_mark == flp4->flowi4_mark &&rth->rt_uid == flp4->flowi4_uid &&!((rth->rt_key_tos ^ flp4->flowi4_tos) &(IPTOS_RT_MASK | RTO_ONLINK)) &&net_eq(dev_net(rth->dst.dev), net) &&!rt_is_expired(rth)) {ipv4_validate_peer(rth);dst_use(&rth->dst, jiffies);RT_CACHE_STAT_INC(out_hit);rcu_read_unlock_bh();if (!flp4->saddr)flp4->saddr = rth->rt_src;if (!flp4->daddr)flp4->daddr = rth->rt_dst;//緩存匹配則返回 return rth;}RT_CACHE_STAT_INC(out_hlist_search);}rcu_read_unlock_bh();slow_output://查找路由表return ip_route_output_slow(net, flp4); } EXPORT_SYMBOL_GPL(__ip_route_output_key);

?

參考目錄:

1. 《Linux Kernel Networking - ?Implementation and Theory》

2. 《深入理解Linux網絡技術內幕》

總結

以上是生活随笔為你收集整理的tcp/ip 协议栈Linux内核源码分析六 路由子系统分析一路由缓存的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。