当前位置: 首页 > news >正文

福建住房与城乡建设网站经典模板网站建设

福建住房与城乡建设网站,经典模板网站建设,黄石网站制作,网站制作还花钱文章目录socket函数API内核源码sock_createinet_createsock_allocsock_map_fd相关数据结构本文将以socket函数为例,分析它在Linux5.12.10内核中的实现,先观此图,宏观上把握它在内核中的函数调用关系:socket函数API socket 函数原…

文章目录

    • socket函数API
    • 内核源码
      • sock_create
      • inet_create
      • sock_alloc
      • sock_map_fd
      • 相关数据结构

本文将以socket函数为例,分析它在Linux5.12.10内核中的实现,先观此图,宏观上把握它在内核中的函数调用关系:
socket接口调用

socket函数API

socket 函数原型:

#include <sys/socket.h>int socket(int domain, int type, int protocol)

该函数用于创建一个新的socket。

第一个参数:

domain:协议簇,常用的协议簇有:AF_INET, AF_INET6, AF_LOCAL。这个参数决定了socket的地址类型,这个应该很好理解AF_INET用于ipv4地址,AF_INET6用于ipv6地址,AF_LOCAL用于本地进程间通信。

第二个参数:

type:socket类型有好几种,主要是两种:SOCK_STREAM、SOCK_DGRAM(数据报),通俗说就是字节流socket和数据报socket,当你在创建的使用使用哪一种由第二个参数指定。stream socket基于TCP协议,是一个有序、可靠、全双工的字节流通道。datagram socket基于UDP协议,不需要建立和维持连接,可能会丢失或错乱。

第三个参数:

protocol:指定协议,常用协议有IPPROTO_TCP、IPPROTO_UDP、IPPROTO_STCP、IPPROTO_TICP等,分别对应TCP协议,UDP协议,STCP协议,TICP协议。通常这个参数设置为0,表示自适应协议

所以这个函数通常这样用:

int socket_fd = socket(AF_INET, SOCK_STREAM, 0);

在Linux下一个进程默认打开的文件描述符是1024个,也就是说一个进程最多能创建1024个socket,超过就会报Too many open files(这个问题在工作中也会遇到)。通过ulimit命令可以查看到

# ulimit -a
core file size          (blocks, -c) unlimited
data seg size           (kbytes, -d) unlimited
scheduling priority             (-e) 0
file size               (blocks, -f) unlimited
pending signals                 (-i) 29414
max locked memory       (kbytes, -l) 16384
max memory size         (kbytes, -m) unlimited
open files                      (-n) 1024
pipe size            (512 bytes, -p) 8
POSIX message queues     (bytes, -q) 819200
real-time priority              (-r) 0
stack size              (kbytes, -s) 8192
cpu time               (seconds, -t) unlimited
max user processes              (-u) 29414
virtual memory          (kbytes, -v) unlimited
file locks                      (-x) unlimited

如果你要修改这个上限到2021个:

# ulimit -HSn 2021

内核源码

//~/linux-5.12.10/include/linux/socket.h 头文件
extern int __sys_socket(int family, int type, int protocol);

socket函数调用结束后,用户层看到返回一个整型的句柄,但是内核在内部会创建一系列的socket相关的内核对象(不是只有一个对象)

// ~/linux-5.12.10/net/socket.c line:1481
/* Mask which covers at least up to SOCK_MASK-1.  The* remaining bits are used as flags. */
#define SOCK_TYPE_MASK 0xfint __sys_socket(int family, int type, int protocol)
{int retval;struct socket *sock;int flags;//... 略去参数合法性校验代码retval = sock_create(family, type, protocol, &sock);if (retval < 0)return retval;return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
}SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{return __sys_socket(family, type, protocol);
}

sock_create

sock_create是创建socket的主要位置,其中sock_create又调用__sock_create

// ~/linux-5.12.10/net/socket.c line:1337
/*
//net_proto_family结构体定义了每一个协议族的新建socket句柄
struct net_proto_family {int		family;int		(*create)(struct net *net, struct socket *sock,int protocol, int kern);struct module	*owner;
};static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
*/
int __sock_create(struct net *net, int family, int type, int protocol,struct socket **res, int kern)
{int err;struct socket *sock;const struct net_proto_family *pf;/**      Check protocol is in range*/if (family < 0 || family >= NPROTO)return -EAFNOSUPPORT;if (type < 0 || type >= SOCK_MAX)return -EINVAL;/* Compatibility.This uglymoron is moved from INET layer to here to avoiddeadlock in module load.*/if (family == PF_INET && type == SOCK_PACKET) {pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",current->comm);family = PF_PACKET;}err = security_socket_create(family, type, protocol, kern);if (err)return err;/**	Allocate the socket and allow the family to set things up. if*	the protocol is 0, the family is instructed to select an appropriate*	default.*/// 分配socket对象,如果protocol为0 将会被设置合适的协议sock = sock_alloc();if (!sock) {net_warn_ratelimited("socket: no more sockets\n");return -ENFILE;	/* Not exactly a match, but its theclosest posix thing */}sock->type = type;#ifdef CONFIG_MODULES/* Attempt to load a protocol module if the find failed.** 12/09/1996 Marcin: But! this makes REALLY only sense, if the user* requested real, full-featured networking support upon configuration.* Otherwise module support will break!*/if (rcu_access_pointer(net_families[family]) == NULL)request_module("net-pf-%d", family);
#endif// 获取每个协议族的操作表rcu_read_lock();pf = rcu_dereference(net_families[family]);err = -EAFNOSUPPORT;if (!pf)goto out_release;/** We will call the ->create function, that possibly is in a loadable* module, so we have to bump that loadable module refcnt first.*/if (!try_module_get(pf->owner))goto out_release;/* Now protected by module ref count */rcu_read_unlock();/// 调用指定协议族的创建函数,对于AF_INET对应的是inet_createerr = pf->create(net, sock, protocol, kern);if (err < 0)goto out_module_put;/** Now to bump the refcnt of the [loadable] module that owns this* socket at sock_release time we decrement its refcnt.*/if (!try_module_get(sock->ops->owner))goto out_module_busy;/** Now that we're done with the ->create function, the [loadable]* module can have its refcnt decremented*/module_put(pf->owner);err = security_socket_post_create(sock, family, type, protocol, kern);if (err)goto out_sock_release;*res = sock;return 0;out_module_busy:err = -EAFNOSUPPORT;
out_module_put:sock->ops = NULL;module_put(pf->owner);
out_sock_release:sock_release(sock);return err;out_release:rcu_read_unlock();goto out_sock_release;
}

inet_create

__sock_create 里,首先调用sock_alloc来分配一个struct socket内核对象,接着获取协议族的操作函数表,并调用其create方法。对于AF_INET协议族来说,执行到的是inet_create方法

//~/linux-5.12.10/net/ipv4/af_inet.c
/*
/* This is used to register socket interfaces for IP protocols.  */
struct inet_protosw {struct list_head list;/* These two fields form the lookup key.  */unsigned short	 type;	   /* This is the 2nd argument to socket(2). */unsigned short	 protocol; /* This is the L4 protocol number.  */struct proto	 *prot;const struct proto_ops *ops;unsigned char	 flags;      /* See INET_PROTOSW_* below.  */
};
#define list_for_each_entry_rcu		list_for_each_entry#define list_for_each_entry(pos, head, member)				\for (pos = list_first_entry(head, typeof(*pos), member);	\&pos->member != (head);					\pos = list_next_entry(pos, member))*/
static int inet_create(struct net *net, struct socket *sock, int protocol,int kern)
{struct sock *sk;struct inet_protosw *answer;struct inet_sock *inet;struct proto *answer_prot;unsigned char answer_flags;int try_loading_module = 0;int err;if (protocol < 0 || protocol >= IPPROTO_MAX)return -EINVAL;sock->state = SS_UNCONNECTED;/* Look for the requested type/protocol pair. */
lookup_protocol:err = -ESOCKTNOSUPPORT;rcu_read_lock();list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {err = 0;/* Check the non-wild match. */if (protocol == answer->protocol) {if (protocol != IPPROTO_IP)break;} else {/* Check for the two wild cases. */if (IPPROTO_IP == protocol) {protocol = answer->protocol;break;}if (IPPROTO_IP == answer->protocol)break;}err = -EPROTONOSUPPORT;}//...err = -EPERM;if (sock->type == SOCK_RAW && !kern &&!ns_capable(net->user_ns, CAP_NET_RAW))goto out_rcu_unlock;//将 inet_stream_ops 赋值到sock->opssock->ops = answer->ops;answer_prot = answer->prot;answer_flags = answer->flags;rcu_read_unlock();WARN_ON(!answer_prot->slab);err = -ENOBUFS;// 分配sock对象,并把answer_prot赋值到sock->sk_protsk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);if (!sk)goto out;err = 0;if (INET_PROTOSW_REUSE & answer_flags)sk->sk_reuse = SK_CAN_REUSE;inet = inet_sk(sk);inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;inet->nodefrag = 0;if (SOCK_RAW == sock->type) {inet->inet_num = protocol;if (IPPROTO_RAW == protocol)inet->hdrincl = 1;}if (net->ipv4.sysctl_ip_no_pmtu_disc)inet->pmtudisc = IP_PMTUDISC_DONT;elseinet->pmtudisc = IP_PMTUDISC_WANT;inet->inet_id = 0;// 对sock对象进行初始化sock_init_data(sock, sk);sk->sk_destruct	   = inet_sock_destruct;sk->sk_protocol	   = protocol;sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;inet->uc_ttl	= -1;inet->mc_loop	= 1;inet->mc_ttl	= 1;inet->mc_all	= 1;inet->mc_index	= 0;inet->mc_list	= NULL;inet->rcv_tos	= 0;sk_refcnt_debug_inc(sk);if (inet->inet_num) {/* It assumes that any protocol which allows* the user to assign a number at socket* creation time automatically* shares.*/inet->inet_sport = htons(inet->inet_num);/* Add to protocol hash chains. */err = sk->sk_prot->hash(sk);if (err) {sk_common_release(sk);goto out;}}if (sk->sk_prot->init) {err = sk->sk_prot->init(sk);if (err) {sk_common_release(sk);goto out;}}if (!kern) {err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);if (err) {sk_common_release(sk);goto out;}}
out:return err;
out_rcu_unlock:rcu_read_unlock();goto out;
}

当流程走到inet_create函数的时候根据type去inetsw数组中找到对应类型套接字的inet_protosw结构体,我们前面提到协议栈中已经定义了PF_INET协议族支持的inet_protosw结构体,总共有4个。

找到inet_protosw结构体以后还需要进一步判断protocol和inet_protosw中定义的protocol是否是一致的。内核中定义支持的protocol有一个特殊的值IPPROTO_IP(IPPROTO_IP为0),可以理解为一个通配符也可以理解为一个默认值,就是说我不指定protocol,由内核自己决定使用哪一个protocol。

那么内核根据什么来选择protocol呢?就是根据内核定义的全局inetsw中对应类型的inet_protosw中的protocol。

/* Upon startup we insert all the elements in inetsw_array[] into* the linked list inetsw.*/
// static struct list_head inetsw[SOCK_MAX];
// inetsw_array挂在链表上
static struct inet_protosw inetsw_array[] =
{{.type =       SOCK_STREAM,.protocol =   IPPROTO_TCP,.prot =       &tcp_prot,.ops =        &inet_stream_ops,.flags =      INET_PROTOSW_PERMANENT |INET_PROTOSW_ICSK,},{.type =       SOCK_DGRAM,.protocol =   IPPROTO_UDP,.prot =       &udp_prot,.ops =        &inet_dgram_ops,.flags =      INET_PROTOSW_PERMANENT,},{.type =       SOCK_DGRAM,.protocol =   IPPROTO_ICMP,.prot =       &ping_prot,.ops =        &inet_sockraw_ops,.flags =      INET_PROTOSW_REUSE,},{.type =       SOCK_RAW,.protocol =   IPPROTO_IP,	/* wild card */  //0.prot =       &raw_prot,.ops =        &inet_sockraw_ops,.flags =      INET_PROTOSW_REUSE,}
};// ~/linux-5.12.10/net/ipv4/af_inet.c  inet_create函数
// int socket_fd = socket(AF_INET, SOCK_STREAM, 0);
// 初始化protocol为0, type为SOCK_STREAM
// 经过list_for_each_entry_rcu遍历,protocol修正为IPPROTO_TCP
// protocol = answer->protocol --> protocol = IPPROTO_TCP
// 如果type为SOCK_DGRAM, 则protocol被修正为IPPROTO_UDP
list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {err = 0;/* Check the non-wild match. */if (protocol == answer->protocol) {if (protocol != IPPROTO_IP)break;} else {/* Check for the two wild cases. */if (IPPROTO_IP == protocol) {protocol = answer->protocol;break;}if (IPPROTO_IP == answer->protocol)break;}err = -EPROTONOSUPPORT;}

继续看sock_init_data实现

// ~/linux-5.12.10/net/core/sock.c
void sock_init_data(struct socket *sock, struct sock *sk)
{sk_init_common(sk);sk->sk_send_head	=	NULL;timer_setup(&sk->sk_timer, NULL, 0);sk->sk_allocation	=	GFP_KERNEL;sk->sk_rcvbuf		=	sysctl_rmem_default;sk->sk_sndbuf		=	sysctl_wmem_default;sk->sk_state		=	TCP_CLOSE;sk_set_socket(sk, sock);sock_set_flag(sk, SOCK_ZAPPED);if (sock) {sk->sk_type	=	sock->type;RCU_INIT_POINTER(sk->sk_wq, &sock->wq);sock->sk	=	sk;sk->sk_uid	=	SOCK_INODE(sock)->i_uid;} else {RCU_INIT_POINTER(sk->sk_wq, NULL);sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);}rwlock_init(&sk->sk_callback_lock);if (sk->sk_kern_sock)lockdep_set_class_and_name(&sk->sk_callback_lock,af_kern_callback_keys + sk->sk_family,af_family_kern_clock_key_strings[sk->sk_family]);elselockdep_set_class_and_name(&sk->sk_callback_lock,af_callback_keys + sk->sk_family,af_family_clock_key_strings[sk->sk_family]);sk->sk_state_change	=	sock_def_wakeup;sk->sk_data_ready	=	sock_def_readable;sk->sk_write_space	=	sock_def_write_space;sk->sk_error_report	=	sock_def_error_report;sk->sk_destruct		=	sock_def_destruct;sk->sk_frag.page	=	NULL;sk->sk_frag.offset	=	0;sk->sk_peek_off		=	-1;sk->sk_peer_pid 	=	NULL;sk->sk_peer_cred	=	NULL;sk->sk_write_pending	=	0;sk->sk_rcvlowat		=	1;sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;sk->sk_stamp = SK_DEFAULT_STAMP;
#if BITS_PER_LONG==32seqlock_init(&sk->sk_stamp_seq);
#endifatomic_set(&sk->sk_zckey, 0);#ifdef CONFIG_NET_RX_BUSY_POLLsk->sk_napi_id		=	0;sk->sk_ll_usec		=	sysctl_net_busy_read;
#endifsk->sk_max_pacing_rate = ~0UL;sk->sk_pacing_rate = ~0UL;WRITE_ONCE(sk->sk_pacing_shift, 10);sk->sk_incoming_cpu = -1;sk_rx_queue_clear(sk);/** Before updating sk_refcnt, we must commit prior changes to memory* (Documentation/RCU/rculist_nulls.rst for details)*/smp_wmb();refcount_set(&sk->sk_refcnt, 1);atomic_set(&sk->sk_drops, 0);
}

当软中断上收到数据包时会调用sk_data_ready函数指针(实际被设置成了sock_def_readable())来唤醒在sock上等待的进程。

sock_alloc

sock_alloc函数分配一个struct socket结构体,将sockfs相关属性填充在socket_alloc结构体的vfs_inode变量中,以限定后续对这个sock文件允许的操作。sock_alloc()里体现了linux一切皆文件(Everything is a file)理念,即使用文件系统来管理socket,这也是VFS所要达到的效果

struct socket *sock_alloc(void)
{struct inode *inode;struct socket *sock;inode = new_inode_pseudo(sock_mnt->mnt_sb);if (!inode)return NULL;sock = SOCKET_I(inode);inode->i_ino = get_next_ino();inode->i_mode = S_IFSOCK | S_IRWXUGO;inode->i_uid = current_fsuid();inode->i_gid = current_fsgid();inode->i_op = &sockfs_inode_ops;return sock;
}

sock_map_fd

static int sock_map_fd(struct socket *sock, int flags)
{struct file *newfile;int fd = get_unused_fd_flags(flags);if (unlikely(fd < 0)) {sock_release(sock);return fd;}newfile = sock_alloc_file(sock, flags, NULL);if (!IS_ERR(newfile)) {fd_install(fd, newfile);return fd;}put_unused_fd(fd);return PTR_ERR(newfile);
}// linux-5.12.10/fs/file.c
int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
{return alloc_fd(0, nofile, flags);
}int get_unused_fd_flags(unsigned flags)
{return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
}

sock_map_fd–>get_unused_fd_flags–>__get_unused_fd_flags–>alloc_fd获取一个可用的fd


/** allocate a file descriptor, mark it busy.*/
static int alloc_fd(unsigned start, unsigned end, unsigned flags)
{struct files_struct *files = current->files;unsigned int fd;int error;struct fdtable *fdt;spin_lock(&files->file_lock);
repeat:fdt = files_fdtable(files);fd = start;if (fd < files->next_fd)fd = files->next_fd;if (fd < fdt->max_fds)fd = find_next_fd(fdt, fd);/** N.B. For clone tasks sharing a files structure, this test* will limit the total number of files that can be opened.*/error = -EMFILE;if (fd >= end)goto out;error = expand_files(files, fd);if (error < 0)goto out;/** If we needed to expand the fs array we* might have blocked - try again.*/if (error)goto repeat;if (start <= files->next_fd)files->next_fd = fd + 1;__set_open_fd(fd, fdt);if (flags & O_CLOEXEC)__set_close_on_exec(fd, fdt);else__clear_close_on_exec(fd, fdt);error = fd;
#if 1/* Sanity check */if (rcu_access_pointer(fdt->fd[fd]) != NULL) {printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);rcu_assign_pointer(fdt->fd[fd], NULL);}
#endifout:spin_unlock(&files->file_lock);return error;
}

sock_map_fd–>get_unused_fd_flags–>__get_unused_fd_flags–>sock_alloc_file分配struct file结构

// net/socket.c
/**	Obtains the first available file descriptor and sets it up for use.**	These functions create file structures and maps them to fd space*	of the current process. On success it returns file descriptor*	and file struct implicitly stored in sock->file.*	Note that another thread may close file descriptor before we return*	from this function. We use the fact that now we do not refer*	to socket after mapping. If one day we will need it, this*	function will increment ref. count on file by 1.**	In any case returned fd MAY BE not valid!*	This race condition is unavoidable*	with shared fd spaces, we cannot solve it inside kernel,*	but we take care of internal coherence yet.*//***	sock_alloc_file - Bind a &socket to a &file*	@sock: socket*	@flags: file status flags*	@dname: protocol name**	Returns the &file bound with @sock, implicitly storing it*	in sock->file. If dname is %NULL, sets to "".*	On failure the return is a ERR pointer (see linux/err.h).*	This function uses GFP_KERNEL internally.*/struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{struct file *file;if (!dname)dname = sock->sk ? sock->sk->sk_prot_creator->name : "";file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,O_RDWR | (flags & O_NONBLOCK),&socket_file_ops);if (IS_ERR(file)) {sock_release(sock);return file;}sock->file = file;file->private_data = sock;stream_open(SOCK_INODE(sock), file);return file;
}

相关数据结构

// file: include/linux/net.h
struct socket_wq {/* Note: wait MUST be first field of socket_wq */wait_queue_head_t	wait;struct fasync_struct	*fasync_list;unsigned long		flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */struct rcu_head		rcu;
} ____cacheline_aligned_in_smp;/***  struct socket - general BSD socket*  @state: socket state (%SS_CONNECTED, etc)*  @type: socket type (%SOCK_STREAM, etc)*  @flags: socket flags (%SOCK_NOSPACE, etc)*  @ops: protocol specific socket operations*  @file: File back pointer for gc*  @sk: internal networking protocol agnostic socket representation*  @wq: wait queue for several uses*/
struct socket {socket_state		state;short			type;unsigned long		flags;struct file		*file;struct sock		*sk;const struct proto_ops	*ops;struct socket_wq	wq;
};

至此,一个tcp对象,确切地说是AF_INET协议族下的SOCK_STREAM对象就算创建完成了。这里花费了一个socket系统调用的开销。

ref: https://www.cnblogs.com/liyuanhong/articles/10591069.html

http://www.hengruixuexiao.com/news/23116.html

相关文章:

  • app网站排名在线排名优化
  • 佛山市企业网站建设哪家好国外搜索引擎排行榜
  • 外贸高端网站建设黑帽seo培训多少钱
  • 廊坊住房和城乡建设厅网站网络营销师月薪
  • 企业网站做开放api高质量发展服务业
  • 做的很酷炫的网站seo网站内容优化
  • 数码网站建设图片南宁网站优化
  • 网站建立的意义百度海南分公司
  • 创建个人网站百度风云榜游戏
  • cadisen卡迪森手表网站网站关键词seo优化公司
  • 南宁网站建设方案书网络营销广告案例
  • 学校网站源码 带wap手机端恢复原来的百度
  • 有专门做检验的视频网站吗营销型网站的推广方法
  • 主流网站风格公司做网络推广哪个网站好
  • 如何建立一个网站主页百度地图排名怎么优化
  • 0基础做网站云南今日头条新闻
  • 做模板网站乐云seo效果好小说推文推广平台
  • 淘宝上开做网站的店铺高端网站制作
  • 响应式网站代码怎么在网上推广广告
  • 在哪个网站做一照一码在线代理浏览网站免费
  • 山东建设和城乡建设厅注册中心网站首页百度指数平台
  • wordpress显示切换到桌面版seo就业前景如何
  • 一般电商都是在哪些网站上做新闻软文推广案例
  • 网站怎么做第三方登录推广普通话内容50字
  • wordpress做下载型网站6各大搜索引擎收录入口
  • 做网站实训心得体会适合小学生的新闻事件
  • 做副业赚钱网站台州seo网站排名优化
  • 专业定制网站建设智能优化推广哪些app最挣钱
  • 盱眙网站建设公司南通seo网站优化软件
  • 建行互联网站seo品牌优化整站优化