日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 运维知识 > linux >内容正文

linux

linux socket原理,socket 的工作原理

發布時間:2025/3/20 linux 30 豆豆
生活随笔 收集整理的這篇文章主要介紹了 linux socket原理,socket 的工作原理 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

首先談一下Socket

機制本身,socket為各種協議提供了統一接口的一種ipc機制。在linux中,它由幾個部分組成。為了討論,先討論幾個數據結構,如下所示:

struct

net_proto_family {

int?family;

int?(*create)(struct socket *sock, int protocol);

short?authentication;

short?encryption;

short?encrypt_net;

struct

module?*owner;

};

這個數據結構定義在linux的kernel中,在文件中。其中family是用來標示協議號的。而那個create函數指針則表示用來創建socket時所對應的create函數,owner則是這個協議的module結構。同時,還定義一個協議數:

#define

NPROTO?64

再看一下socket的本身的定義:

struct

socket {

socket_state?state;

unsigned

long?flags;

struct proto_ops?*ops;

struct

fasync_struct?*fasync_list;

struct

file?*file;

struct

sock?*sk;

wait_queue_head_t?wait;

short?type;

};

Ops指針所對應的是在這個socket上的一些操作,它的定義如下:

struct

proto_ops {

int?family;

struct

module?*owner;

int?(*release)?(struct socket

*sock);

int?(*bind)?(struct socket *sock,

struct sockaddr *myaddr,

int sockaddr_len);

int?(*connect)?(struct socket

*sock,

struct sockaddr *vaddr,

int sockaddr_len, int flags);

int?(*socketpair)(struct socket *sock1,

struct socket *sock2);

int?(*accept)?(struct socket *sock,

struct socket *newsock, int flags);

int?(*getname)?(struct socket

*sock,

struct sockaddr *addr,

int *sockaddr_len, int peer);

unsigned int?(*poll)?(struct file *file, struct socket *sock,

struct poll_table_struct *wait);

int?(*ioctl)?(struct socket *sock, unsigned int cmd,

unsigned long arg);

int?(*listen)?(struct socket *sock, int len);

int?(*shutdown)?(struct socket *sock, int

flags);

int?(*setsockopt)(struct socket *sock, int level,

int optname, char __user *optval, int optlen);

int?(*getsockopt)(struct socket *sock, int level,

int optname, char __user *optval, int __user *optlen);

int?(*sendmsg)?(struct kiocb *iocb,

struct socket *sock,

struct msghdr *m, size_t total_len);

int?(*recvmsg)?(struct kiocb *iocb,

struct socket *sock,

struct msghdr *m, size_t total_len,

int flags);

int?(*mmap)?(struct file *file, struct socket *sock,

struct vm_area_struct * vma);

ssize_t?(*sendpage)?(struct socket *sock, struct page

*page,

int offset, size_t size, int flags);

};

從這個定義可以看出它定義了很多函數指針,也就是當生成某個協議的socket時,這個協議所對應的函數可以賦給這些函數指針。這樣協議的實現者和socket本身的實現機制就可以分開。

在kernel中定義了一個靜態的全局數組,如下所示:

static

struct net_proto_family *net_families[NPROTO];這個定義在kernel的socket.c中。當linux系統啟動時,系統的init進程會調用sock_init函數對這個數組初始化,

在init進程中調用過程是:start_kernel –〉rest_init –〉kernel_thread(init, NULL, CLONE_FS |

CLONE_SIGHAND)-〉init-〉do_basic_setup –〉sock_init:

for(i = 0; i < NPROTO; i++)

也就是每一個協議對應這個數組的一項。同時在這個socket.c文件中還定義了一些socket注冊函數:

int sock_register(struct net_proto_family *ops)

{

int err;

if (ops->family >= NPROTO) {

printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);

return -ENOBUFS;

}

net_family_write_lock();

err = -EEXIST;

if (net_families[ops->family] == NULL) {

net_families[ops->family]=ops;

err = 0;

}

net_family_write_unlock();

printk(KERN_INFO "NET: Registered protocol family %d\n",

ops->family);

return err;

}

從這個代碼可以看出,它最主要的工作就是在數組所對應的項中把協議所對應的socket操作函數的net_proto_family結構指針給賦上值,這樣當給定某個協議的socket時,就能通過協議號在這個數組中找對應的項,進而可以得到這個socket的實際的創建函數,從而在需要生成一個新的這個協議的socket時調用用這個創建函數。那么這個socket注冊函數是在哪調用的呢?一般是在協議初始化被調用的。如tipc協議在linux中是作為一個module來實現的,那么在module的

module_init(tipc_init);這個tipc_init調用關系如下:

tipc_init->start_core-〉start_core_base-〉socket_init-〉sock_register(&tipc_family_ops);

這個tipc_family_ops的定義如下:

static struct net_proto_family tipc_family_ops = {

.owner ?= THIS_MODULE,

.family?= AF_TIPC,

.create?= tipc_create

};

AF_TIPC就是TIPC對應的協議標示,其值是30。而tipc_create函數就是tipc的socket的創建函數。

static int tipc_create(struct socket *sock, int protocol)

{

struct tipc_sock *tsock;

struct tipc_port *port;

struct sock *sk;

u32 ref;

struct task_struct *tsk;

int size = (sizeof(tsock->comm) < sizeof(tsk->comm)) ?

sizeof(tsock->comm) : sizeof(tsk->comm);

if ((protocol < 0) || (protocol >= MAX_TIPC_STACKS)) {

warn("Invalid protocol number : %d, permitted range 0 - %d.\n",

protocol, MAX_TIPC_STACKS);

return -EPROTONOSUPPORT;

}

if (protocol != 0) {

int vres = handle_protocol(sock, protocol);

return vres;

}

ref = tipc_createport_raw(0, &dispatch, &wakeupdispatch,

TIPC_LOW_IMPORTANCE, 0);

if (unlikely(!ref))

return -ENOMEM;

sock->state = SS_UNCONNECTED;

switch (sock->type) {

case SOCK_STREAM:

sock->ops = &stream_ops;

break;

case SOCK_SEQPACKET:

sock->ops = &packet_ops;

break;

case SOCK_DGRAM:

tipc_set_portunreliable(ref, 1);

case SOCK_RDM:

tipc_set_portunreturnable(ref, 1);

sock->ops = &msg_ops;

sock->state = SS_READY;

break;

default:

tipc_deleteport(ref);

return -EPROTOTYPE;

}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)

sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1);

#else

sk = sk_alloc(AF_TIPC, GFP_KERNEL, 1, tipc_cache);

#endif

if (!sk) {

tipc_deleteport(ref);

return -ENOMEM;

}

sock_init_data(sock, sk);

init_waitqueue_head(sk->sk_sleep);

sk->sk_rcvtimeo = 8 * HZ;

tsock = tipc_sk(sk);

port = tipc_get_port(ref);

tsock->p = port;

port->usr_handle = tsock;

init_MUTEX(&tsock->sem);

memset(tsock->comm, 0, size);

tsk = current;

task_lock(tsk);

tsock->pid = tsk->pid;

memcpy(tsock->comm, tsk->comm, size);

task_unlock(tsk);

tsock->comm[size-1]=0;

tsock->overload_hwm = 0;

tsock->ovld_limit = tipc_persocket_overload;

dbg("sock_create: %x\n",tsock);

atomic_inc(&tipc_user_count);

return 0;

}

從這個函數的定義中可以看出,根據這個協議的不同的類型,如SOCK_STREAM還是SOCK_SEQPACKET,這給生成socket的ops指針賦予不同的操作類型,如下所示:

static struct proto_ops packet_ops = {

.owner ?= THIS_MODULE,

.family?= AF_TIPC,

.release?= release,

.bind?= bind,

.connect?= connect,

.socketpair?= no_skpair,

.accept?= accept,

.getname?= get_name,

.poll?= poll,

.ioctl?= ioctl,

.listen?= listen,

.shutdown?= shutdown,

.setsockopt?= setsockopt,

.getsockopt?= getsockopt,

.sendmsg?= send_packet,

.recvmsg?= recv_msg,

.mmap?= no_mmap,

.sendpage = no_sendpage

};

static struct proto_ops stream_ops = {

.owner ?= THIS_MODULE,

.family?= AF_TIPC,

.release?= release,

.bind?= bind,

.connect?= connect,

.socketpair?= no_skpair,

.accept?= accept,

.getname?= get_name,

.poll?= poll,

.ioctl?= ioctl,

.listen?= listen,

.shutdown?= shutdown,

.setsockopt?= setsockopt,

.getsockopt?= getsockopt,

.sendmsg?= send_stream,

.recvmsg?= recv_stream,

.mmap?= no_mmap,

.sendpage = no_sendpage

};

以上所討論的都是linux內核當中的部分,但對于應用程序來說,是用socket編程時,并不是直接與這些內核當中的接口打交道的。由于應用程序運行在用戶空間,這這些接口是需要在內核空間才可以調到。那么就有一個問題,應用程序是如何調用到這些接口的呢?其中的奧秘就在于glibc這個庫。linux應用程序是調用glibc中的socket函數來編程的,在glibc中socket的函數只有一套,通過以上的這個機制它就可以對應各種協議的socket函數。那么glibc中是如何調用到內核中的函數的呢?

我們先來看一下內核socket.c這個文件,在這個文件中還定義了一個如下的函數:

#ifdef __ARCH_WANT_SYS_SOCKETCALL

#define AL(x) ((x) * sizeof(unsigned long))

static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),

AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),

AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};

#undef AL

asmlinkage long sys_socketcall(int call, unsigned long __user *args)

{

unsigned long a[6];

unsigned long a0,a1;

int err;

if(call<1||call>SYS_RECVMSG)

return -EINVAL;

if (copy_from_user(a, args, nargs[call]))

return -EFAULT;

err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);

if (err)

return err;

a0=a[0];

a1=a[1];

trace_socket_call(call, a0);

switch(call)

{

case SYS_SOCKET:

err = sys_socket(a0,a1,a[2]);

break;

case SYS_BIND:

err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);

break;

case SYS_CONNECT:

err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);

break;

case SYS_LISTEN:

err = sys_listen(a0,a1);

break;

case SYS_ACCEPT:

err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);

break;

case SYS_GETSOCKNAME:

err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);

break;

case SYS_GETPEERNAME:

err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);

break;

case SYS_SOCKETPAIR:

err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);

break;

case SYS_SEND:

err = sys_send(a0, (void __user *)a1, a[2], a[3]);

break;

case SYS_SENDTO:

err = sys_sendto(a0,(void __user *)a1, a[2], a[3],

(struct sockaddr __user *)a[4], a[5]);

break;

case SYS_RECV:

err = sys_recv(a0, (void __user *)a1, a[2], a[3]);

break;

case SYS_RECVFROM:

err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],

(struct sockaddr __user *)a[4], (int __user *)a[5]);

break;

case SYS_SHUTDOWN:

err = sys_shutdown(a0,a1);

break;

case SYS_SETSOCKOPT:

err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);

break;

case SYS_GETSOCKOPT:

err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);

break;

case SYS_SENDMSG:

err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);

break;

case SYS_RECVMSG:

err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);

break;

default:

err = -EINVAL;

break;

}

return err;

}

#endif

這個sys_socketcall是一個系統調用,所有的glibc中的socket函數都是通過這個系統調用進入到內核空間的。我們來看accept的調用。Glibc中accept的調用在:sysdeps\unix\sysv\linux\accept.S文件中:

#define?socket?accept

#define?__socket __libc_accept

#define?NARGS?3

#define NEED_CANCELLATION

#include

libc_hidden_def (accept)

這段與socket.S是accept()從用戶態進入內核態的關鍵代碼。accept.S中將accept定義為socket,__socket定義為__libc_accpet,NARGS定義為3,表示調用參數有3個。接下來包含了socket.S文件,如下:

#include

#include

#include

#define P(a, b) P2(a, b)

#define P2(a, b) a##b

.text

#ifndef __socket

# ifndef NO_WEAK_ALIAS

#?define __socket P(__,socket)

# else

#?define __socket socket

# endif

#endif

.globl __socket

cfi_startproc

ENTRY (__socket)

#if defined NEED_CANCELLATION && defined CENABLE

SINGLE_THREAD_P

jne 1f

#endif

movl �x, �x

cfi_register (3, 2)

movl $SYS_ify(socketcall), �x

movl $P(SOCKOP_,socket), �x

lea 4(%esp), �x

ENTER_KERNEL

movl �x, �x

cfi_restore (3)

cmpl $-125, �x

jae SYSCALL_ERROR_LABEL

L(pseudo_end):

ret

#if defined NEED_CANCELLATION && defined CENABLE

1:?pushl %esi

cfi_adjust_cfa_offset(4)

CENABLE

movl �x, %esi

cfi_offset(6, -8)

movl �x, �x

cfi_register (3, 2)

movl $SYS_ify(socketcall), �x

movl $P(SOCKOP_,socket), �x

lea 8(%esp), �x

ENTER_KERNEL

movl �x, �x

cfi_restore (3)

xchgl %esi, �x

CDISABLE

movl %esi, �x

popl %esi

cfi_restore (6)

cfi_adjust_cfa_offset(-4)

cmpl $-125, �x

jae SYSCALL_ERROR_LABEL

ret

#endif

cfi_endproc

PSEUDO_END (__socket)

#ifndef NO_WEAK_ALIAS

weak_alias (__socket, socket)

#endif

在sysdeps\unix\sysv\linux\i386\sysdep.h文件中

#undef SYS_ify

#define SYS_ify(syscall_name)?__NR_##syscall_name

可以看到,通過SYS_ify(socketcall),我們得到了__NR_socketcall

在內核linux/include/asm/unistd.h中,定義了:

#define __NR_restart_syscall?0

#define __NR_exit?1

#define __NR_fork?2

#define __NR_read?3

… …?…

… … …

#define __NR_socketcall?102

… … …

通過movl $SYS_ify(socketcall), �x我們可以看到,__NR_socketcall被定義為102,上面一行的代碼即是將eax的值賦成102,即此系統調用的調用號。

下面我們看movl $P(SOCKOP_,socket), �x這一句。在socketcall.h中有相應的定義:

在glibc的sysdeps\unix\sysv\linux\socketcall.h文件中,定于如下:

#define SOCKOP_socket?1

#define SOCKOP_bind?2

#define SOCKOP_connect?3

#define SOCKOP_listen?4

#define SOCKOP_accept?5

#define SOCKOP_getsockname?6

#define SOCKOP_getpeername?7

#define SOCKOP_socketpair 8

#define SOCKOP_send?9

#define SOCKOP_recv?10

#define SOCKOP_sendto?11

#define SOCKOP_recvfrom?12

#define SOCKOP_shutdown?13

#define SOCKOP_setsockopt 14

#define SOCKOP_getsockopt 15

#define SOCKOP_sendmsg?16

#define SOCKOP_recvmsg?17

那么這行代碼的意思就是將相應的操作碼賦予ebx,accept的操作碼是5。在sysdeps\unix\sysv\linux\i386\sysdep.h文件中,ENTER_KERNEL定義為:

#ifdef I386_USE_SYSENTER

# ifdef SHARED

#?define ENTER_KERNEL call *%gs:SYSINFO_OFFSET

# else

#?define ENTER_KERNEL call *_dl_sysinfo

# endif

#else

# define ENTER_KERNEL int $0x80

#endif

這就通過中斷進入內核,linux/arch/i386/kernel/entry.S文件中:

… … …

# system call handler stub

ENTRY(system_call)

pushl �x?# save orig_eax

SAVE_ALL

GET_THREAD_INFO(�p)

# system call tracing in operation / emulation

testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(�p)

jnz syscall_trace_entry

cmpl $(nr_syscalls), �x

jae syscall_badsys

syscall_call:

… … …

#ifdef CONFIG_DPA_ACCOUNTING

CHECK_DPA(�x,no_dpa_syscall_enter,dpa_syscall_enter)

#endif

call *sys_call_table(,�x,4)

movl �x,EAX(%esp)?# store the return value

syscall_exit:

… … …

在linux/arch/i386/kernel/syscall_table.S文件中定義了sys_call_table,而socketcall系統調用在這個表中的定義就是102,這樣傳入eax的也是102,這樣就調用到socketcall系統調用。通過上面sys_socketcall代碼的分析,它基本就是一個socket分發函數。

這樣當應用程序調用如下的一行代碼產生一個tipc的socket時,其調用關系就是:

int sd = socket (AF_TIPC, SOCK_SEQPACKET,0);

glibc的socket匯編代碼socket.S,系統調用sys_socketcall,進入內核調用sys_socket-〉sock_create-〉__sock_create-〉tipc_create,由于這個socket是SOCK_SEQPACKET類型,那么它的static struct proto_ops packet_ops = {

.owner ?= THIS_MODULE,

.family?= AF_TIPC,

.release?= release,

.bind?= bind,

.connect?= connect,

.socketpair?= no_skpair,

.accept?= accept,

.getname?= get_name,

.poll?= poll,

.ioctl?= ioctl,

.listen?= listen,

.shutdown?= shutdown,

.setsockopt?= setsockopt,

.getsockopt?= getsockopt,

.sendmsg?= send_packet,

.recvmsg?= recv_msg,

.mmap?= no_mmap,

.sendpage = no_sendpage

};

這樣當應用程序調用glibc的bind,recvmsg等,就會通過系統調用,進而調到這個tipc socket所對應的packet_ops的函數。

總結

以上是生活随笔為你收集整理的linux socket原理,socket 的工作原理的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。