當前位置：首頁 > 运维知识 > linux >内容正文

linux

tcp/ip 协议栈Linux源码分析二 IPv4分片报文重组分析二

發布時間：2025/4/5 linux 30 豆豆

生活随笔收集整理的這篇文章主要介紹了 tcp/ip 协议栈Linux源码分析二 IPv4分片报文重组分析二小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

繼續接著上篇講，之前我們說過，收到分片報文后首先會檢查分片報文所占內存是否過大，如果超過閾值的話就要調用ip_evictor函數去釋放一些舊的分片隊列，關于如何釋放分片隊列資源上一篇已經總結完成，接下來來看下進一步的處理，即如何查找分片隊列的，先看下代碼：

/* Lookup (or create) queue header *//* 這里根據分片五元組(源地址、目的地址、IP ID，protocol, user)去查找分片隊列* ip_find函數查找成功就返回對應的分片隊列，查找失敗就新建一個分片隊列，* 如果分配失敗的話就返回NULL;*/if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {int ret;spin_lock(&qp->q.lock);/* 這里是分片隊列排隊的地方，報文的排隊，重組都在這里執行，下面* 再來分析該函數。*/ret = ip_frag_queue(qp, skb);spin_unlock(&qp->q.lock);/* 這是一個包裹函數，減少分片隊列的引用計數，如果沒人引用該* 隊列就調用inet_frag_destroy釋放隊列所占資源。*/ipq_put(qp);return ret;}

首先是調用ip_find()函數根據報文的五元組得到一個hash值去查找hash表找到對應的分片隊列，找到的話返回，找不到并且當前hash桶的深度不超過一定的值的話就新建一個隊列，否則就直接返回NULL。

我們看下ip_find()具體的處理流程：

/* Find the correct entry in the "incomplete datagrams" queue for* this IP datagram, and create new one, if nothing is found.* 從哈希表中找到對應的分片隊列，找不到就新建一個*/ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) {struct inet_frag_queue *q;struct ip4_create_arg arg;unsigned int hash;/* arg包含了分片的五元組，源地址、目的地址、協議 * IP ID以及user(表示調用者，可能是協議棧也可能是netfilter )*/arg.iph = iph;arg.user = user;/* 先持有哈希表的讀鎖，防止更改 */read_lock(&ip4_frags.lock);/* 根據上述五元組到一個hash值，經典的hash函數，可以拿來自用 */hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);/* 根據hash值查找hash表，這里arg的作用是對分片隊列進行匹配，* 因為hash值相等的分片隊列能有很多，在這個函數里，如果找不到* 的話就會去新建一個分片隊列。*/q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);if (IS_ERR_OR_NULL(q)) {inet_frag_maybe_warn_overflow(q, pr_fmt());return NULL;}/* 找到了，返回ipq分片隊列指針,注意區分struct ipq 和* struct inet_frag_queue的關系，兩者是包含關系，前者包含后者*/return container_of(q, struct ipq, q); }

接著看inet_frag_find 分片隊列查找函數的實現：

/* 分片隊列查找函數 */ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,struct inet_frags *f, void *key, unsigned int hash)__releases(&f->lock) {struct inet_frag_queue *q;struct hlist_node *n;int depth = 0;/* 遍歷hash表，即ip4_frags->hash[hash],然后調用match回調函數* 去和報文的五元組進行匹配，找到的話就增加該隊列的引用計數并返回其指針，* 找不到的話增加hash桶的深度，繼續查找下一個。* ip4_frags 注冊的match 回調函數是ip4_frag_match，在ip_fragment.c文件里* 該函數很簡單，就是去比較五元組是否完全一樣。*/hlist_for_each_entry(q, n, &f->hash[hash], list) {if (q->net == nf && f->match(q, key)) {atomic_inc(&q->refcnt);read_unlock(&f->lock);return q;}depth++;}read_unlock(&f->lock);/* 還是沒找到，如果hash桶深不超過限值的話就調用inet_frag_create* 創建一個新的分片隊列，超出的話直接返回錯誤就得了。* 通常收到第一個分片的時候會走到這里。*/if (depth <= INETFRAGS_MAXDEPTH)return inet_frag_create(nf, f, key);elsereturn ERR_PTR(-ENOBUFS); } EXPORT_SYMBOL(inet_frag_find);

match對調函數，處理很簡單，就是對報文的五元組和分片隊列進行比較;

static int ip4_frag_match(struct inet_frag_queue *q, void *a) {struct ipq *qp;struct ip4_create_arg *arg = a;qp = container_of(q, struct ipq, q);return qp->id == arg->iph->id &&qp->saddr == arg->iph->saddr &&qp->daddr == arg->iph->daddr &&qp->protocol == arg->iph->protocol &&qp->user == arg->user; }

這里重點關注下?inet_frag_create 函數：

/* 創建分片隊列 */ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,struct inet_frags *f, void *arg) {struct inet_frag_queue *q;/* 創建并初始化分片隊列 */q = inet_frag_alloc(nf, f, arg);if (q == NULL)return NULL;/* 將分片隊列插入到分片哈希表中和lru鏈表尾部 */return inet_frag_intern(nf, q, f, arg); }

inet_frag_alloc就是創建一個分片隊列緩存然后初始化：

static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,struct inet_frags *f, void *arg) {struct inet_frag_queue *q;/* qsize指的是分片隊列的固定大小，等于sizeof(struct ipq) */q = kzalloc(f->qsize, GFP_ATOMIC);if (q == NULL)return NULL;/* 初始化分片隊列，將五元組賦值給分片隊列，* constructor是初始化回調函數是，ip4_frag_init(), 在ipfrag_init()里設置。*/f->constructor(q, arg);/* 增加分片所占用的內存大小 */atomic_add(f->qsize, &nf->mem);/* 初始化該分片隊列的定時器，并設置該定時器的回調處理函數 * 回調處理函數是在系統初始化的時候設置的，ip4的分片定時器* 回調處理函數是ip_expire(), 該定時器的主要作用是重組超時后* 釋放該分片隊列所占資源，防止大量分片長時間占用內存，定時器* 的時間也是可以通過proc文件系統去配置的等。*/setup_timer(&q->timer, f->frag_expire, (unsigned long)q);spin_lock_init(&q->lock);/* 初始化引用計數為1 */atomic_set(&q->refcnt, 1);q->net = nf;return q; }

上面分片隊列創建完成之后，還要調用inet_frag_intern（）函數將分片隊列插入到hash數組中，看下這個函數的處理：

/* 分片隊列插入函數 */ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,struct inet_frag_queue *qp_in, struct inet_frags *f,void *arg) {struct inet_frag_queue *qp; #ifdef CONFIG_SMPstruct hlist_node *n; #endifunsigned int hash;/* 因為是修改分片hash表，這里要求寫鎖 */write_lock(&f->lock);/** While we stayed w/o the lock other CPU could update* the rnd seed, so we need to re-calculate the hash* chain. Fortunatelly the qp_in can be used to get one.*//** hashfn函數指針在ipfrag_init()里初始化為ip4_hashfn(),* 就是一個hash函數*/hash = f->hashfn(qp_in); #ifdef CONFIG_SMP/* With SMP race we have to recheck hash table, because* such entry could be created on other cpu, while we* promoted read lock to write lock.* * 在多核處理情況下有可能其它CPU也收到同一路報文然后創建了* 分片隊列，如果出現這種情況就將我們新創建的分片隊列釋放掉，* 即設置last_in標志位，然后調用inet_frag_put()做釋放處理，* 這時候把先創建的分片隊列qp返回就好了。*/hlist_for_each_entry(qp, n, &f->hash[hash], list) {if (qp->net == nf && f->match(qp, arg)) {atomic_inc(&qp->refcnt);write_unlock(&f->lock);qp_in->last_in |= INET_FRAG_COMPLETE;inet_frag_put(qp_in, f);return qp;}} #endifqp = qp_in;/* 重新初始化分片隊列超時時間 */if (!mod_timer(&qp->timer, jiffies + nf->timeout))atomic_inc(&qp->refcnt);atomic_inc(&qp->refcnt);/* 插入到分片hash表的頭部 */hlist_add_head(&qp->list, &f->hash[hash]);/* 插入到lru鏈表的尾部，當分片所占空用過大的時候，* 內核會從lru的首部順序釋放分片隊列，因為排在前面的* 都是舊的分片，新的都掛在lru尾部*/list_add_tail(&qp->lru_list, &nf->lru_list);/* 增加分片隊列個數 */nf->nqueues++;/* 插入結束，釋放寫鎖 */write_unlock(&f->lock);return qp; }

在多核處理情況下可能會重復創建分片隊列，這時候后創建的分片隊列對調用inet_frag_put() 函數進行釋放，這是個靜態內聯函數，原型在inet_frag.h里

static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) {/* 當分片隊列引用計數為0的時候，釋放分片隊列 */if (atomic_dec_and_test(&q->refcnt))inet_frag_destroy(q, f, NULL); }

inet_frag_destroy()函數上篇博客已介紹，這里不再重復。

ip_find（）函數返回后就已經得到了該報文所對應的分片隊列，這時候再調用ip_frag_queue()進行進一步處理，當然，ip_find也有可能返回失敗，這個時候就只能釋放該報文skb緩存。

ip_frag_queue函數主要進行分片報文的排隊、重組處理，這里需要處理多種異常情況，函數比較長，今晚就先不講了，放在下篇講。

總結

以上是生活随笔為你收集整理的tcp/ip 协议栈Linux源码分析二 IPv4分片报文重组分析二的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： tcp/ip 协议栈Linux源码分析一
下一篇：高收益保本理财产品可靠吗