mirahs 2019-06-27
poll作为select升级版,它去除了select1024个描述符的限制,并且也取消了select用三个位图描述,而用整体的pollfd指针实现。
asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout) { struct poll_wqueues table; int fdcount, err; unsigned int i; struct poll_list *head; struct poll_list *walk; /* Do a sanity check on nfds ... */ //如果nfds大于了打开的最大的文件描述符或者是大于规定打开的最大描述符(默认256)的值,那么返回报错 if (nfds > current->files->max_fdset && nfds > OPEN_MAX) return -EINVAL; //对超时时间进行检查 if (timeout) { /* Careful about overflow in the intermediate values */ if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ) timeout = (unsigned long)(timeout*HZ+999)/1000+1; else /* Negative or overflow */ timeout = MAX_SCHEDULE_TIMEOUT; } poll_initwait(&table); //对table初始化 head = NULL; walk = NULL; i = nfds; err = -ENOMEM; while(i!=0) { struct poll_list *pp; pp = kmalloc(sizeof(struct poll_list)+ sizeof(struct pollfd)*(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),//用来存entries数组 GFP_KERNEL);//申请一块存放Poll的事件集的空间 if(pp==NULL) goto out_fds; pp->next=NULL; pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i); if (head == NULL) head = pp; else walk->next = pp; walk = pp; //从用户空间拷贝 if (copy_from_user(pp->entries, ufds + nfds-i, sizeof(struct pollfd)*pp->len)) { err = -EFAULT; goto out_fds; } i -= pp->len; } fdcount = do_poll(nfds, head, &table, timeout); /* * 到此处建立完成了一个链表,每个链表节点大约是一个页面的大小,由struct poll_list指针控制,pollfd就是通过list的entries数组访问 * 循环主要是将fd从用户态拷贝到entries中,也就是copy_from_user()所做的事情 * 一般而言我们只用监控几个fd,也就是一页面,但是当用户传入很多fd时候,那么参数传递和页面分配就成了poll最大的性能瓶颈 */ //完成内核poll部分,将返回数据拷贝至用户空间 walk = head; err = -EFAULT; while(walk != NULL) { struct pollfd *fds = walk->entries; int j; for (j=0; j < walk->len; j++, ufds++) { if(__put_user(fds[j].revents, &ufds->revents)) goto out_fds; } walk = walk->next; } err = fdcount; if (!fdcount && signal_pending(current)) err = -EINTR; out_fds: walk = head; while(walk!=NULL) { struct poll_list *pp = walk->next; kfree(walk); walk = pp; } poll_freewait(&table); return err; }
再进入do_poll:
static int do_poll(unsigned int nfds, struct poll_list *list, struct poll_wqueues *wait, long timeout) { int count = 0; poll_table* pt = &wait->pt; if (!timeout) pt = NULL; for (;;) { struct poll_list *walk; set_current_state(TASK_INTERRUPTIBLE); walk = list; //在此处遍历fd,如果传入的fd过多就会造成poll的瓶颈 while(walk != NULL) { do_pollfd( walk->len, walk->entries, &pt, &count); walk = walk->next; } pt = NULL; if (count || !timeout || signal_pending(current)) break; count = wait->error; if (count) break;//当fd中出现数据了跳出 timeout = schedule_timeout(timeout);//更新超时时间,让current进程挂起,别的进程继续运行,等timeout时间到了再返回运行current } __set_current_state(TASK_RUNNING); return count; }
核心则是do_pollfd:
static void do_pollfd(unsigned int num, struct pollfd * fdpage, poll_table ** pwait, int *count) { int i; for (i = 0; i < num; i++) { int fd; unsigned int mask; struct pollfd *fdp; mask = 0; fdp = fdpage+i;//切换到当前pollfd fd = fdp->fd;//切换到fd if (fd >= 0) { struct file * file = fget(fd);//切换到当前file列表 mask = POLLNVAL; if (file != NULL) { mask = DEFAULT_POLLMASK; //设置默认标识 if (file->f_op && file->f_op->poll) mask = file->f_op->poll(file, *pwait); //以自己的等待队列为参数,把自己挂在fd对应的等待队列上 mask &= fdp->events | POLLERR | POLLHUP; fput(file); } if (mask) { *pwait = NULL; (*count)++;//有了数据让计数加一 } } fdp->revents = mask; } }
来看看poll_wait()函数,最终的循环就在此处:
//poll_wait就是调用struct poll_table对应的回调函数 static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { if (p && wait_address) p->qproc(filp, wait_address, p); }
那么我们就可以回顾一下sys_poll的流程了:
源码看到这里,总结一下poll的流程:
int poll(struct pollfd *fds, nfds_t nfds, int timeout);
随后便是调用poll函数,poll(&event, 1, -1),-1指的是无限等待
完成poll后对fd进行检验,随后对event中的revent进行检验
for(int i = 0; i < ret; ++i) { if(event[i].revent & POLLERR)//设备出错 …… …… …… if(event[i].revent & POLLIN)//读事件 …… …… …… }