網絡子係統80_inet協議族-SOCK_RAW(一)
// SOCK_RAW
1.1 static struct inet_protosw inetsw_array[] =
{
...
{
.type = SOCK_RAW,
.protocol = IPPROTO_IP, /* 通配符 */
.prot = &raw_prot, /* Networking protocol blocks attached to sockets */
.ops = &inet_sockraw_ops,
.no_check = UDP_CSUM_DEFAULT,
.flags = INET_PROTOSW_REUSE,
}
...
}
// struct socket->ops字段
// 向上的數據結構
2.1 static const struct proto_ops inet_sockraw_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_dgram_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
.poll = datagram_poll,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.sendpage = inet_sendpage,
};
// sock綁定地址
// inet_sock->inet_rcv_saddr - Bound local ipv4 addr
// inet_sock->inet_num - Local port
// 步驟:
// 1.安全性檢查
// 1.1 地址長度是否足夠
// 1.2 地址類型是否正確
// 1.3 端口是否已經被占用
// 2.設置inet_sock結構的源地址和源端口
3.1 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
//inet使用sockaddr_in
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
//inet使用inet_sock
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
unsigned short snum;
int chk_addr_ret;
int err;
//調用sock提供的bind
if (sk->sk_prot->bind) {
err = sk->sk_prot->bind(sk, uaddr, addr_len);
goto out;
}
err = -EINVAL;
//地址長度
if (addr_len < sizeof(struct sockaddr_in))
goto out;
//兼容性檢查
if (addr->sin_family != AF_INET) {
err = -EAFNOSUPPORT;
if (addr->sin_family != AF_UNSPEC ||
addr->sin_addr.s_addr != htonl(INADDR_ANY))
goto out;
}
//檢查地址類型
chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
err = -EADDRNOTAVAIL;
if (!sysctl_ip_nonlocal_bind &&
!(inet->freebind || inet->transparent) &&
addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
chk_addr_ret != RTN_BROADCAST)
goto out;
//端口號檢查
snum = ntohs(addr->sin_port);
err = -EACCES;
if (snum && snum < PROT_SOCK &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
goto out;
lock_sock(sk);
err = -EINVAL;
//二次綁定
if (sk->sk_state != TCP_CLOSE || inet->inet_num)
goto out_release_sock;
//綁定地址
inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr;
//檢查端口是否已經被綁定
if (sk->sk_prot->get_port(sk, snum)) {
inet->inet_saddr = inet->inet_rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
}
//綁定端口
inet->inet_sport = htons(inet->inet_num);
inet->inet_daddr = 0;
inet->inet_dport = 0;
sk_dst_reset(sk);
err = 0;
out_release_sock:
release_sock(sk);
out:
return err;
}
// 發送數據
// 統計流量,綁定端口,由prot完成數據發送
3.2 int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size)
{
struct sock *sk = sock->sk;
//統計sock的流量
sock_rps_record_flow(sk);
//綁定port
if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
inet_autobind(sk))
return -EAGAIN;
//通過prot->sendmsg發送數據
return sk->sk_prot->sendmsg(iocb, sk, msg, size);
}
// 接收數據
// 統計流量,由prot完成數據接收
3.3 int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct sock *sk = sock->sk;
int addr_len = 0;
int err;
//統計sock流量
sock_rps_record_flow(sk);
//通過prot->recvmsg接收數據
err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
}
// 建立連接
// 步驟:
// 1.常規檢查
// 1.1 如果未綁定端口,先隨機分配一個端口並綁定
// 2.由struct sock->sk_prot->connect建立連接
3.4 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
//未綁定端口,先綁定
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->connect(sk, uaddr, addr_len);
}
// 關閉sock
// 步驟:
// 1.如果struct sock正在連接建立的過程中(TCP_SYN_SENT),調用disconnect關閉連接
// 2.否則,由shutdown關閉連接
// 3.通知sock的擁有者,sock狀態發生了改變
3.5 int inet_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
int err = 0;
lock_sock(sk);
//SS_CONNECTING在TCP中使用
if (sock->state == SS_CONNECTING) {
if ((1 << sk->sk_state) &
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
sock->state = SS_DISCONNECTING;
else
sock->state = SS_CONNECTED;
}
switch (sk->sk_state) {
case TCP_CLOSE:
err = -ENOTCONN;
//由具體的sock完成關閉
default:
sk->sk_shutdown |= how;
if (sk->sk_prot->shutdown)
sk->sk_prot->shutdown(sk, how);
break;
case TCP_SYN_SENT:
//已經發送了syn,則通過disconnect關閉連接
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
break;
}
//通知sock的擁有者,sock狀態改變
sk->sk_state_change(sk);
release_sock(sk);
return err;
}
// 通用的數據報poll調用
3.6 unsigned int datagram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk;
unsigned int mask;
//在sock上等待數據
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
//有接收到的數據
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
//有可寫內存
if (sock_writeable(sk))
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
//返回標誌結合,指示poll的結果
return mask;
}
// 發送page
// 步驟:
// 1. 檢查是否分配了端口
// 1.1 如果沒有分配,則內核為其選擇一個端口
// 2. 如果sock提供了sendpage,則調用
// 3. 否則,映射page到msghdr結構,通過sk->sk_prot->sendmsg發送
// 注:
// 對於不支持sendpage的sock,填充page數據到msghdr
3.7 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags)
{
struct sock *sk = sock->sk;
sock_rps_record_flow(sk);
//沒有指定端口號,內核隨機選擇一個端口
if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
inet_autobind(sk))
return -EAGAIN;
//如果sock提供了sendpage,則調用
if (sk->sk_prot->sendpage)
return sk->sk_prot->sendpage(sk, page, offset, size, flags);
//否則將page映射到msghdr中
return sock_no_sendpage(sock, page, offset, size, flags);
}
// 映射page到核心地址空間,填充msghdr結構
3.8 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
{
ssize_t res;
struct msghdr msg = {.msg_flags = flags};
struct kvec iov;
//映射page結構到核心地址空間
char *kaddr = kmap(page);
iov.iov_base = kaddr + offset;
iov.iov_len = size;
//發送msghdr
res = kernel_sendmsg(sock, &msg, &iov, 1, size);
//解映射
kunmap(page);
return res;
}
// 發送msghdr
// 步驟:
// 1.初始化kiocb,用於sock的同步處理
// 2.通過sock->sk_prot->sendmsg發送msghdr
// 3.如果底層為異步發送,則等待數據發送完成
3.9 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
//初始化sock同步控製塊
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
//通過sk->sk_prot->sendmsg發送msghdr
ret = __sock_sendmsg(&iocb, sock, msg, size);
//如果底層發送為異步,則等待發送的完成
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
最後更新:2017-04-03 12:55:18