網絡子係統80_inet協議族-SOCK_RAW(一)
// SOCK_RAW 1.1 static struct inet_protosw inetsw_array[] = { ... { .type = SOCK_RAW, .protocol = IPPROTO_IP, /* 通配符 */ .prot = &raw_prot, /* Networking protocol blocks attached to sockets */ .ops = &inet_sockraw_ops, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } ... } // struct socket->ops字段 // 向上的數據結構 2.1 static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, .poll = datagram_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, }; // sock綁定地址 // inet_sock->inet_rcv_saddr - Bound local ipv4 addr // inet_sock->inet_num - Local port // 步驟: // 1.安全性檢查 // 1.1 地址長度是否足夠 // 1.2 地址類型是否正確 // 1.3 端口是否已經被占用 // 2.設置inet_sock結構的源地址和源端口 3.1 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { //inet使用sockaddr_in struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; //inet使用inet_sock struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); unsigned short snum; int chk_addr_ret; int err; //調用sock提供的bind if (sk->sk_prot->bind) { err = sk->sk_prot->bind(sk, uaddr, addr_len); goto out; } err = -EINVAL; //地址長度 if (addr_len < sizeof(struct sockaddr_in)) goto out; //兼容性檢查 if (addr->sin_family != AF_INET) { err = -EAFNOSUPPORT; if (addr->sin_family != AF_UNSPEC || addr->sin_addr.s_addr != htonl(INADDR_ANY)) goto out; } //檢查地址類型 chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; //端口號檢查 snum = ntohs(addr->sin_port); err = -EACCES; if (snum && snum < PROT_SOCK && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) goto out; lock_sock(sk); err = -EINVAL; //二次綁定 if (sk->sk_state != TCP_CLOSE || inet->inet_num) goto out_release_sock; //綁定地址 inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; //檢查端口是否已經被綁定 if (sk->sk_prot->get_port(sk, snum)) { inet->inet_saddr = inet->inet_rcv_saddr = 0; err = -EADDRINUSE; goto out_release_sock; } //綁定端口 inet->inet_sport = htons(inet->inet_num); inet->inet_daddr = 0; inet->inet_dport = 0; sk_dst_reset(sk); err = 0; out_release_sock: release_sock(sk); out: return err; } // 發送數據 // 統計流量,綁定端口,由prot完成數據發送 3.2 int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; //統計sock的流量 sock_rps_record_flow(sk); //綁定port if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN; //通過prot->sendmsg發送數據 return sk->sk_prot->sendmsg(iocb, sk, msg, size); } // 接收數據 // 統計流量,由prot完成數據接收 3.3 int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; //統計sock流量 sock_rps_record_flow(sk); //通過prot->recvmsg接收數據 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; } // 建立連接 // 步驟: // 1.常規檢查 // 1.1 如果未綁定端口,先隨機分配一個端口並綁定 // 2.由struct sock->sk_prot->connect建立連接 3.4 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; //未綁定端口,先綁定 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); } // 關閉sock // 步驟: // 1.如果struct sock正在連接建立的過程中(TCP_SYN_SENT),調用disconnect關閉連接 // 2.否則,由shutdown關閉連接 // 3.通知sock的擁有者,sock狀態發生了改變 3.5 int inet_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int err = 0; lock_sock(sk); //SS_CONNECTING在TCP中使用 if (sock->state == SS_CONNECTING) { if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) sock->state = SS_DISCONNECTING; else sock->state = SS_CONNECTED; } switch (sk->sk_state) { case TCP_CLOSE: err = -ENOTCONN; //由具體的sock完成關閉 default: sk->sk_shutdown |= how; if (sk->sk_prot->shutdown) sk->sk_prot->shutdown(sk, how); break; case TCP_SYN_SENT: //已經發送了syn,則通過disconnect關閉連接 err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; break; } //通知sock的擁有者,sock狀態改變 sk->sk_state_change(sk); release_sock(sk); return err; } // 通用的數據報poll調用 3.6 unsigned int datagram_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; unsigned int mask; //在sock上等待數據 sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; //有接收到的數據 if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; //有可寫內存 if (sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); //返回標誌結合,指示poll的結果 return mask; } // 發送page // 步驟: // 1. 檢查是否分配了端口 // 1.1 如果沒有分配,則內核為其選擇一個端口 // 2. 如果sock提供了sendpage,則調用 // 3. 否則,映射page到msghdr結構,通過sk->sk_prot->sendmsg發送 // 注: // 對於不支持sendpage的sock,填充page數據到msghdr 3.7 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { struct sock *sk = sock->sk; sock_rps_record_flow(sk); //沒有指定端口號,內核隨機選擇一個端口 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN; //如果sock提供了sendpage,則調用 if (sk->sk_prot->sendpage) return sk->sk_prot->sendpage(sk, page, offset, size, flags); //否則將page映射到msghdr中 return sock_no_sendpage(sock, page, offset, size, flags); } // 映射page到核心地址空間,填充msghdr結構 3.8 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { ssize_t res; struct msghdr msg = {.msg_flags = flags}; struct kvec iov; //映射page結構到核心地址空間 char *kaddr = kmap(page); iov.iov_base = kaddr + offset; iov.iov_len = size; //發送msghdr res = kernel_sendmsg(sock, &msg, &iov, 1, size); //解映射 kunmap(page); return res; } // 發送msghdr // 步驟: // 1.初始化kiocb,用於sock的同步處理 // 2.通過sock->sk_prot->sendmsg發送msghdr // 3.如果底層為異步發送,則等待數據發送完成 3.9 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct kiocb iocb; struct sock_iocb siocb; int ret; //初始化sock同步控製塊 init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; //通過sk->sk_prot->sendmsg發送msghdr ret = __sock_sendmsg(&iocb, sock, msg, size); //如果底層發送為異步,則等待發送的完成 if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); return ret; }
最後更新:2017-04-03 12:55:18