本文分析基於Linux Kernel 3.2.1
更多請查看 Linux內核--網絡內核實現分析
1、套接字的綁定
創建完套接字服務器端會在應用層使用bind函數驚醒套接字的綁定,這時會產生系統調用,sys_bind內核函數進行套接字。
系統調用函數的具體實現
- SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
- {
- struct socket *sock;
- struct sockaddr_storage address;
- int err, fput_needed;
-
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
- if (sock) {
- err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
- if (err >= 0) {
- err = security_socket_bind(sock,
- (struct sockaddr *)&address,
- addrlen);
- if (!err)
- err = sock->ops->bind(sock,
- (struct sockaddr *)
- &address, addrlen);
- }
- fput_light(sock->file, fput_needed);
- }
- return err;
- }
首先調用函數sockfd_lookup_light()函數通過文件描述符來查找對應的套接字sock。
- static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
- {
- struct file *file;
- struct socket *sock;
-
- *err = -EBADF;
- file = fget_light(fd, fput_needed);
- if (file) {
- sock = sock_from_file(file, err);
- if (sock)
- return sock;
- fput_light(file, *fput_needed);
- }
- return NULL;
- }
上面函數中先調用fget_light函數通過文件描述符返回對應的文件結構,然後調用函數sock_from_file函數返回該文件對應的套接字結構體地址,它存儲在file->private_data屬性中。
再回到sys_bind函數,在返回了對應的套接字結構之後,調用move_addr_to_kernel將用戶地址空間的socket拷貝到內核空間。
然後調用INET協議族的操作集中bind函數inet_bind函數將socket地址(內核空間)和socket綁定。
- int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
- {
- struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
- struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- unsigned short snum;
- int chk_addr_ret;
- int err;
-
- //RAW類型套接字若有自己的bind函數,則使用之
- if (sk->sk_prot->bind) {
- err = sk->sk_prot->bind(sk, uaddr, addr_len);
- goto out;
- }
- err = -EINVAL;
- .....................
- //地址合法性檢查
- chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
-
- /* Not specified by any standard per-se, however it breaks too
- * many applications when removed. It is unfortunate since
- * allowing applications to make a non-local bind solves
- * several problems with systems using dynamic addressing.
- * (ie. your servers still start up even if your ISDN link
- * is temporarily down)
- */
- err = -EADDRNOTAVAIL;
- if (!sysctl_ip_nonlocal_bind &&
- !(inet->freebind || inet->transparent) &&
- addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
- chk_addr_ret != RTN_LOCAL &&
- chk_addr_ret != RTN_MULTICAST &&
- chk_addr_ret != RTN_BROADCAST)
- goto out;
-
- snum = ntohs(addr->sin_port);
- err = -EACCES;
- if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
- goto out;
-
- /* We keep a pair of addresses. rcv_saddr is the one
- * used by hash lookups, and saddr is used for transmit.
- *
- * In the BSD API these are the same except where it
- * would be illegal to use them (multicast/broadcast) in
- * which case the sending device address is used.
- */
- lock_sock(sk);
-
- /* Check these errors (active socket, double bind). */
- err = -EINVAL;
- if (sk->sk_state != TCP_CLOSE || inet->inet_num)//如果sk的狀態是CLOSE或者本地端口已經被綁定
- goto out_release_sock;
-
- inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;//設置源地址
- if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
- inet->inet_saddr = 0; /* Use device */
-
- /* Make sure we are allowed to bind here. */
- if (sk->sk_prot->get_port(sk, snum)) {
- inet->inet_saddr = inet->inet_rcv_saddr = 0;
- err = -EADDRINUSE;
- goto out_release_sock;
- }
-
- if (inet->inet_rcv_saddr)
- sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
- if (snum)
- sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
- inet->inet_sport = htons(inet->inet_num);//設置源端口號,標明該端口已經被占用
- inet->inet_daddr = 0;
- inet->inet_dport = 0;
- sk_dst_reset(sk);
- err = 0;
- out_release_sock:
- release_sock(sk);
- out:
- return err;
- }
這樣套接字綁定結束。