bapinggaitianli 2019-12-19
一、Socket API编程接口
Libc库中定义的一些应用编程接口(Application Program Interface, API)引用了封装例程(Wrapper Routine),一般一个封装例程对应一个系统调用,大部分封装例程返回一个整数,其值含义依赖于相应的系统调用,-1在多数情况下表示内核不能满足进程的请求,Libc中定义的errno变量包含特定的出错码。C语言中的Socket API就是一种涉及系统调用的API,常用的函数如下:
int socket(int domain, int type, int protocol) //创建一个新的套接字,返回套接字描述符 int connect(int sockfd, struct sockaddr *server_addr, int sockaddr_len) //同远程服务器主动连接,成功时返回0,失败时返回1 int bind(int sockfd, struct sockaddr* my_addr, int addrlen) //为套接字指明一个本地端点地址,TCP/IP协议使用sockaddr_in结构,包含IP地址和端口号,服务器使用它来指明熟悉的端口号,然后等待连接 int listen(int sockfd, int input_queue_size) //面向连接的服务器指明某个套接字,将其置为被动模式,并准备接收传入连接 int accept(int sockfd, void* addr, int* addrlen) //获取传入连接请求,返回新的连接套接字描述符,为每个新连接请求创建一个新的套接字,服务器只对新的连接使用该套接字,原来的监听套接字接受其他的连接请求。新的连接上传输数据使用新的套接字 int sendto(int sockfd, const void* data, int data_len, unsigned int flags, struct sockaddr* remaddr,int remaddr_len) //基于UDP发送数据报,返回实际发送的数据长度,出错时返回1 int send(int sockfd, const void* data, int data_len, unsigned int flags) //在TCP连接上发送数据,返回成功传送数据的长度,出错时返回-1,将外发数据复制到OS内核中 int recvfrom(int sockfd, void *buf, int buf_len,unsigned int flags,struct sockaddr *from,int *fromlen); //从UDP接收数据,返回实际接收的字节数,失败时返回-1 int recv(int sockfd, void* buf, int buf_len,unsigned int flags) //从TCP接收数据,返回实际接收的数据长度,出错时返回-1。服务器使用其接收客户请求,客户使用它接受服务器的应答。如果没有数据,将阻塞,如果收到的数据大于缓存的大小,多余的数据将丢弃 close(int sockfd) //撤销套接字,如果只有一个进程使用,立即终止连接并撤销该套接字,如果多个进程共享该套接字,将引用数减一,如果引用数降到零,则撤销它

图1 UDP连接涉及的Socket API
图2 TCP连接涉及的Socket API
二、系统调用机制及内核中相关源代码

图3 应?程序、封装例程、系统调?处理程序及系统调?服务例程之间的关系
x86-64Linux系统启动时依次调用以下过程:start_kernel --> trap_init --> cpu_init --> syscall_init,而syscall_init函数实现了系统调用的初始化将中断向量与服务例程进行绑定。除此之外,还要进行系统调用表(对应于sys_call_table 数组)的初始化。在linux-5.0.1/arch/x86/kernel/cpu/common.c中定义了sysycall_init函数:
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
/*
* This only works on Intel CPUs.
* On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
* This does not cause SYSENTER to jump to the wrong location, because
* AMD doesn‘t allow SYSENTER in long mode (either 32- or 64-bit).
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
(unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
#else
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
#endif
/* Flags to clear on syscall */
wrmsrl(MSR_SYSCALL_MASK,
X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
}在一个终端打开qemu启动MenuOS,在另一个终端用gdb读入linux-5.0.1的vmlinux,通过端口1234与qemu建立连接,在start_kernel,trap_init,cpu_init,syscall_init处设置断点,然后不断continue,跟踪验证内核启动及系统调用初始化过程,如下图所示:

当用户态程序进行系统调用时,CPU会切换到内核态并开始执行一个内核函数,内核实现了很多不同的系统调用,进程必须传递一个叫作系统调用号的参数来指明需要哪个系统调用。对于x86-64系统来说,用户态程序发起系统调用时,进程会跳转到entry_SYSCALL_64,在linux-5.0.1/arch/x86/entry/entry_64.S和//以下代码来自linux-5.0.1/arch/x86/entry/common.c中定义了x86-64的系统调用服务例程:
//以下代码来自linux-5.0.1/arch/x86/entry/entry_64.SGLOBAL(entry_SYSCALL_64_after_hwframe)
...
/* IRQs are off. */
movq %rax, %rdi
movq %rsp, %rsi
call do_syscall_64 /* returns with IRQs disabled */
...//以下代码来自linux-5.0.1/arch/x86/entry/common.c
#ifdef CONFIG_X86_64
__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
...
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
...
}
#endif三、socket相关系统调用的内核处理函数
在linux-5.0.1/arch/x86/entry/syscalls/syscall_64.tbl中可查看x86-64系统调用号,及其对应API和入口(此处只摘取Socket相关的部分):
# # 64-bit system call numbers and entry vectors # # The format is: # <number> <abi> <name> <entry point> # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # # The abi is "common", "64" or "x32" for this file. # ... 41 common socket __x64_sys_socket 42 common connect __x64_sys_connect 43 common accept __x64_sys_accept 44 common sendto __x64_sys_sendto 45 64 recvfrom __x64_sys_recvfrom 46 64 sendmsg __x64_sys_sendmsg 47 64 recvmsg __x64_sys_recvmsg 48 common shutdown __x64_sys_shutdown 49 common bind __x64_sys_bind 50 common listen __x64_sys_listen 51 common getsockname __x64_sys_getsockname 52 common getpeername __x64_sys_getpeername 53 common socketpair __x64_sys_socketpair 54 64 setsockopt __x64_sys_setsockopt 55 64 getsockopt __x64_sys_getsockopt
在linux-5.0.1/net/socket.c中可以查看Socket接口对应的Linux内核系统调用处理函数:
/*
* System call vectors.
*
* Argument checking cleaned up. Saved 20% in size.
* This function doesn‘t need to set the kernel lock because
* it is set by the callees.
*/
SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
{
unsigned long a[AUDITSC_ARGS];
unsigned long a0, a1;
int err;
unsigned int len;
if (call < 1 || call > SYS_SENDMMSG)
return -EINVAL;
call = array_index_nospec(call, SYS_SENDMMSG + 1);
len = nargs[call];
if (len > sizeof(a))
return -EINVAL;
/* copy_from_user should be SMP safe. */
if (copy_from_user(a, args, len))
return -EFAULT;
err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
if (err)
return err;
a0 = a[0];
a1 = a[1];
switch (call) {
case SYS_SOCKET:
err = __sys_socket(a0, a1, a[2]);
break;
case SYS_BIND:
err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_CONNECT:
err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
break;
case SYS_LISTEN:
err = __sys_listen(a0, a1);
break;
case SYS_ACCEPT:
err = __sys_accept4(a0, (struct sockaddr __user *)a1,
(int __user *)a[2], 0);
break;
case SYS_GETSOCKNAME:
err =
__sys_getsockname(a0, (struct sockaddr __user *)a1,
(int __user *)a[2]);
break;
case SYS_GETPEERNAME:
err =
__sys_getpeername(a0, (struct sockaddr __user *)a1,
(int __user *)a[2]);
break;
case SYS_SOCKETPAIR:
err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
break;
case SYS_SEND:
err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
NULL, 0);
break;
case SYS_SENDTO:
err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4], a[5]);
break;
case SYS_RECV:
err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
NULL, NULL);
break;
case SYS_RECVFROM:
err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
(struct sockaddr __user *)a[4],
(int __user *)a[5]);
break;
case SYS_SHUTDOWN:
err = __sys_shutdown(a0, a1);
break;
case SYS_SETSOCKOPT:
err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
a[4]);
break;
case SYS_GETSOCKOPT:
err =
__sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
(int __user *)a[4]);
break;
case SYS_SENDMSG:
err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
a[2], true);
break;
case SYS_SENDMMSG:
err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
a[3], true);
break;
case SYS_RECVMSG:
err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
a[2], true);
break;
case SYS_RECVMMSG:
if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
a[2], a[3],
(struct __kernel_timespec __user *)a[4],
NULL);
else
err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
a[2], a[3], NULL,
(struct old_timespec32 __user *)a[4]);
break;
case SYS_ACCEPT4:
err = __sys_accept4(a0, (struct sockaddr __user *)a1,
(int __user *)a[2], a[3]);
break;
default:
err = -EINVAL;
break;
}
return err;
}
#endif /* __ARCH_WANT_SYS_SOCKETCALL */接下来我们将通过gdb跟踪MenuOS中replyhi和hello指令的执行过程来了解Socket相关系统调用的内核函数。先打开linuxnet/lab3/main.c查看指令的实现代码:
#include"syswrapper.h"
#define MAX_CONNECT_QUEUE 1024
int Replyhi()
{
char szBuf[MAX_BUF_LEN] = "\0";
char szReplyMsg[MAX_BUF_LEN] = "hi\0";
InitializeService();
while (1)
{
ServiceStart();
RecvMsg(szBuf);
SendMsg(szReplyMsg);
ServiceStop();
}
ShutdownService();
return 0;
}
int StartReplyhi(int argc, char *argv[])
{
int pid;
/* fork another process */
pid = fork();
if (pid < 0)
{
/* error occurred */
fprintf(stderr, "Fork Failed!");
exit(-1);
}
else if (pid == 0)
{
/* child process */
Replyhi();
printf("Reply hi TCP Service Started!\n");
}
else
{
/* parent process */
printf("Please input hello...\n");
}
}
int Hello(int argc, char *argv[])
{
char szBuf[MAX_BUF_LEN] = "\0";
char szMsg[MAX_BUF_LEN] = "hello\0";
OpenRemoteService();
SendMsg(szMsg);
RecvMsg(szBuf);
CloseRemoteService();
return 0;
}不难发现Replyhi()和Hello()调用了封装函数InitializeService(),ServiceStart(),RecvMsg(szBuf),SendMsg(),ServiceStop(),OpenRemoteService(),CloseRemoteService(),打开linuxnet/lab3/syswrapper.h查看这些封装函数的实现:
/********************************************************************/
/* Copyright (C) SSE-USTC, 2012 */
/* */
/* FILE NAME : syswraper.h */
/* PRINCIPAL AUTHOR : Mengning */
/* SUBSYSTEM NAME : system */
/* MODULE NAME : syswraper */
/* LANGUAGE : C */
/* TARGET ENVIRONMENT : Linux */
/* DATE OF FIRST RELEASE : 2012/11/22 */
/* DESCRIPTION : the interface to Linux system(socket) */
/********************************************************************/
/*
* Revision log:
*
* Created by Mengning,2012/11/22
*
*/
#ifndef _SYS_WRAPER_H_
#define _SYS_WRAPER_H_
#include<stdio.h>
#include<arpa/inet.h> /* internet socket */
#include<string.h>
//#define NDEBUG
#include<assert.h>
#define PORT 5001
#define IP_ADDR "127.0.0.1"
#define MAX_BUF_LEN 1024
/* private macro */
#define PrepareSocket(addr,port) int sockfd = -1; struct sockaddr_in serveraddr; struct sockaddr_in clientaddr; socklen_t addr_len = sizeof(struct sockaddr); serveraddr.sin_family = AF_INET; serveraddr.sin_port = htons(port); serveraddr.sin_addr.s_addr = inet_addr(addr); memset(&serveraddr.sin_zero, 0, 8); sockfd = socket(PF_INET,SOCK_STREAM,0);
#define InitServer() int ret = bind( sockfd, (struct sockaddr *)&serveraddr, sizeof(struct sockaddr)); if(ret == -1) { fprintf(stderr,"Bind Error,%s:%d\n", __FILE__,__LINE__); close(sockfd); return -1; } listen(sockfd,MAX_CONNECT_QUEUE);
#define InitClient() int ret = connect(sockfd, (struct sockaddr *)&serveraddr, sizeof(struct sockaddr)); if(ret == -1) { fprintf(stderr,"Connect Error,%s:%d\n", __FILE__,__LINE__); return -1; }
/* public macro */
#define InitializeService() \
PrepareSocket(IP_ADDR,PORT); InitServer();
#define ShutdownService() \
close(sockfd);
#define OpenRemoteService() \
PrepareSocket(IP_ADDR,PORT); InitClient(); int newfd = sockfd;
#define CloseRemoteService() \
close(sockfd);
#define ServiceStart() int newfd = accept( sockfd, (struct sockaddr *)&clientaddr, &addr_len); if(newfd == -1) { fprintf(stderr,"Accept Error,%s:%d\n", __FILE__,__LINE__); }
#define ServiceStop() \
close(newfd);
#define RecvMsg(buf) \
ret = recv(newfd,buf,MAX_BUF_LEN,0); if(ret > 0) { printf("recv \"%s\" from %s:%d\n", buf, (char*)inet_ntoa(clientaddr.sin_addr), ntohs(clientaddr.sin_port)); }
#define SendMsg(buf) \
ret = send(newfd,buf,strlen(buf),0); if(ret > 0) { printf("rely \"hi\" to %s:%d\n", (char*)inet_ntoa(clientaddr.sin_addr), ntohs(clientaddr.sin_port)); }
#endif /* _SYS_WRAPER_H_ */不难发现,过程中涉及到的Socket相关API有socket(),bind(),listen(),accept(),recv(),send(),close(),connect()。于是我们在一个终端打开qemu启动MenuOS(指令中去掉 -S),在另一个终端用gdb读入linux-5.0.1的vmlinux,通过端口1234与qemu建立连接,在相关的系统调用内核处理函数处设置断点,如下图所示:

先在gdb输入一次continue令MenuOS完成启动,然后在qemu中输入replyhi,再在gdb中不断continue,显示的调用过程如下:

然后在qemu中输入hello,再在gdb中不断continue,显示的调用过程如下:


最后查看qemu发现指令已经完整运行:

参考文献:
1.https://github.com/torvalds/linux