EPOLL事件的两种模型
-
Level Triggered (LT) 水平触发
- socket接收缓冲区不为空 有数据可读 读事件一直触发
- socket发送缓冲区不满 可以继续写入数据 写事件一直触发
符合思维习惯,epoll_wait返回的事件就是socket的状态
-
Edge Triggered (ET) 边沿触发
- socket的接收缓冲区状态变化时触发读事件,即空的接收缓冲区刚接收到数据时触发读事件
- socket的发送缓冲区状态变化时触发写事件,即满的缓冲区刚空出空间时触发读事件,仅在状态变化时触发事件
测试 et 与 lt 读事件
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/epoll.h>
#include <pthread.h>
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#define MAX_EVENT_NUMBER 1024
#define BUFFER_SIZE 10
int setnonblocking(int fd) {
int old_option = fcntl(fd, F_GETFL);
int new_option = old_option | O_NONBLOCK;
fcntl(fd, F_SETFL, new_option);
return old_option;
}
void addfd(int epollfd, int fd, bool enable_et) {
epoll_event event;
event.data.fd = fd;
event.events = EPOLLIN;
if (enable_et) {
event.events |= EPOLLET;
}
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
setnonblocking(fd);
}
void lt(epoll_event *events, int number, int epollfd, int listenfd) {
char buf[BUFFER_SIZE];
for (int i=0; i<number; i++) {
int sockfd = events[i].data.fd;
if (sockfd == listenfd) {
sockaddr_in client_address;
socklen_t client_addrlen = sizeof(client_address);
int connfd = accept(listenfd, (sockaddr*)&client_address,
&client_addrlen);
addfd(epollfd, connfd, false);
}
else if (events[i].events & EPOLLIN) {
printf("lt event trigger once\n");
memset(buf, '\0', BUFFER_SIZE);
int ret = recv(sockfd, buf, BUFFER_SIZE-1, 0);
if (ret <= 0) {
close(sockfd);
continue;
}
printf("get %d bytes of content: %s\n", ret, buf);
}
else {
printf("something else happened\n");
}
}
}
void et(epoll_event *events, int number, int epollfd, int listenfd) {
char buf[BUFFER_SIZE];
for (int i=0; i<number; i++) {
int sockfd = events[i].data.fd;
if (sockfd == listenfd) {
sockaddr_in client_address;
socklen_t client_addrlen = sizeof(client_address);
int connfd = accept(listenfd, (sockaddr*)&client_address,
&client_addrlen);
addfd(epollfd, connfd, true);
}
else if(events[i].events & EPOLLIN) {
// Need to read complete
printf("et event trigger once\n");
// TODO 修改
// if (true) {
// memset(buf, '\0', BUFFER_SIZE);
// int ret = recv(sockfd, buf, BUFFER_SIZE-1, 0);
// if (ret < 0) {
// // Below shows complete
// if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
// printf("read later\n");
// break;
// }
// printf("some error happens\n");
// close(sockfd);
// //修改
// //break;
// }
// else if (ret == 0) {
// close(sockfd);
// //修改
// //break;
// }
// else {
// printf("get %d bytes of content: %s\n", ret, buf);
// }
// }
while (true) {
memset(buf, '\0', BUFFER_SIZE);
int ret = recv(sockfd, buf, BUFFER_SIZE-1, 0);
if (ret < 0) {
// Below shows complete
if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
printf("read later\n");
break;
}
printf("some error happens\n");
close(sockfd);
break;
}
else if (ret == 0) {
close(sockfd);
break;
}
else {
printf("get %d bytes of content: %s\n", ret, buf);
}
}
}
else {
printf("something else happened\n");
}
}
}
int main(int argc, char *argv[]) {
if (argc <= 1) {
printf("usage: %s port_number ip_address\n", basename(argv[0]));
return 1;
}
int port = atoi(argv[1]);
int ret = 0;
sockaddr_in address;
bzero(&address, sizeof(address));
address.sin_family = AF_INET;
if (argc >= 3) {
const char *ip =argv[2];
inet_pton(AF_INET, ip, &address.sin_addr);
}
else {
address.sin_addr.s_addr = INADDR_ANY;
}
address.sin_port = htons(port);
int listenfd = socket(PF_INET, SOCK_STREAM, 0);
assert(listenfd >= 0);
int reuse = 1;
//设置地址可重用
ret = setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, (char*)&reuse, sizeof(reuse));
if(ret == -1){
printf("_SetReuseAddr failed, err_code=%d, fd=%d", errno, listenfd);
}
ret = bind(listenfd, (sockaddr*)&address, sizeof(address));
assert(ret != - 1);
ret = listen(listenfd, 5);
assert(ret != -1);
epoll_event events[MAX_EVENT_NUMBER];
int epollfd = epoll_create(5);
assert(epollfd != -1);
addfd(epollfd, listenfd, true);
while(true) {
ret = epoll_wait(epollfd, events, MAX_EVENT_NUMBER, -1);
if (ret < 0) {
printf("epoll failure\n");
break;
}
//lt(events, ret, epollfd, listenfd); // lt
et(events, ret, epollfd, listenfd); // et
}
close(listenfd);
return 0;
}
- 首先,注释掉et,使用lt:
- 运行 ./epoll_test 12888 并在另一个窗口用telnet输入超过10个(BUFFERSIZE)字符
$telnet 127.0.0.1 12888
Trying 127.0.0.1...
Connected to 127.0.0.1.
Escape character is '^]'.
12345678901234567890123456
1234567890123456789012345678
- 服务器得到
$./epoll_test 12888
lt event trigger once
get 9 bytes of content: 123456789
lt event trigger once
get 9 bytes of content: 012345678
lt event trigger once
get 9 bytes of content: 90123456
lt event trigger once
get 1 bytes of content:
lt event trigger once
get 9 bytes of content: 123456789
lt event trigger once
get 9 bytes of content: 012345678
lt event trigger once
get 9 bytes of content: 901234567
lt event trigger once
get 3 bytes of content: 8
- 可以看出因为buffersize的限制,服务器端进行了多次读取,event也触发了多次。
- 换成et模式
- 运行服务器后,telnet客户端输入
$telnet 127.0.0.1 12889
Trying 127.0.0.1...
Connected to 127.0.0.1.
Escape character is '^]'.
12345678901234567890123456
1234567890123456789012345678
- 服务器得到
$./epoll_test 12889
et event trigger once
get 9 bytes of content: 123456789
get 9 bytes of content: 012345678
get 9 bytes of content: 90123456
get 1 bytes of content:
read later
et event trigger once
get 9 bytes of content: 123456789
get 9 bytes of content: 012345678
get 9 bytes of content: 901234567
get 3 bytes of content: 8
read later
- 可以看出,每次客户端的字符串,只触发了一次
- 上面的例子还不够严谨,因为服务器一次已经把字符都读完了。那么如果没读完,会继续触发吗?
- 把上面例子中的TODO 修改放开,然后注释掉while循环
- 运行服务器之后,telnet输入长字符串
$telnet 127.0.0.1 12888
Trying 127.0.0.1...
Connected to 127.0.0.1.
Escape character is '^]'.
12345678901234567890123456
- 服务器端只显示了BUFFERSIZE长度的一行,没有读入的数据也没有进行event触发:
$./epoll_test 12888
et event trigger once
get 9 bytes of content: 123456789
- 如果客户端再输入一行
$telnet 127.0.0.1 12888
Trying 127.0.0.1...
Connected to 127.0.0.1.
Escape character is '^]'.
12345678901234567890123456
1234567890123456789012345678
- 服务器端也仅仅把之前没读入的上一次客户端发来的数据中,再读入BUFFERSIZE长度
$./epoll_test 12888
et event trigger once
get 9 bytes of content: 123456789
et event trigger once
get 9 bytes of content: 012345678
ET还是LT?
- LT的处理过程:
- accept一个连接,添加到epoll中监听EPOLLIN事件
- 当EPOLLIN事件到达时,read fd中的数据并处理
- 当需要写出数据时,把数据write到fd中;如果数据较大,无法一次性写出,那么在epoll中监听EPOLLOUT事件
- 当EPOLLOUT事件到达时,继续把数据write到fd中;如果数据写出完毕,那么在epoll中关闭EPOLLOUT事件
- ET的处理过程:
- accept一个一个连接,添加到epoll中监听EPOLLIN|EPOLLOUT事件
- 当EPOLLIN事件到达时,read fd中的数据并处理,read需要一直读,直到返回EAGAIN为止
- 当需要写出数据时,把数据write到fd中,直到数据全部写完,或者write返回EAGAIN
- 当EPOLLOUT事件到达时,继续把数据write到fd中,直到数据全部写完,或者write返回EAGAIN
从ET的处理过程中可以看到,ET的要求是需要一直读写,直到返回EAGAIN,否则就会遗漏事件。而LT的处理过程中,直到返回EAGAIN不是硬性要求,但通常的处理过程都会读写直到返回EAGAIN,但LT比ET多了一个开关EPOLLOUT事件的步骤
LT的编程与poll/select接近,符合一直以来的习惯,不易出错
ET的编程可以做到更加简洁,某些场景下更加高效,但另一方面容易遗漏事件,容易产生bug
例子与测试
性能测试
这里有两个简单的例子演示了LT与ET的用法(其中epoll-et的代码比epoll要少10行):
lt
/*
* 编译:g++ -g epoll.cpp -o epoll --std=c++11
* 运行: ./epoll
* 测试:curl -v localhost:端口
*/
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/socket.h>
#include <unistd.h>
#include <map>
#include <string>
using namespace std;
bool output_log = true;
#define exit_if(r, ...) \
if (r) { \
printf(__VA_ARGS__); \
printf("%s:%d error no: %d error msg %s\n", __FILE__, __LINE__, errno, strerror(errno)); \
exit(1); \
}
void setNonBlock(int fd) {
int flags = fcntl(fd, F_GETFL, 0);
exit_if(flags < 0, "fcntl failed");
int r = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
exit_if(r < 0, "fcntl failed");
}
void updateEvents(int efd, int fd, int events, int op) {
struct epoll_event ev;
memset(&ev, 0, sizeof(ev));
ev.events = events;
ev.data.fd = fd;
printf("%s fd %d events read %d write %d\n", op == EPOLL_CTL_MOD ? "mod" : "add", fd, ev.events & EPOLLIN, ev.events & EPOLLOUT);
int r = epoll_ctl(efd, op, fd, &ev);
exit_if(r, "epoll_ctl failed");
}
void handleAccept(int efd, int fd) {
struct sockaddr_in raddr;
socklen_t rsz = sizeof(raddr);
int cfd = accept(fd, (struct sockaddr *) &raddr, &rsz);
exit_if(cfd < 0, "accept failed");
sockaddr_in peer, local;
socklen_t alen = sizeof(peer);
int r = getpeername(cfd, (sockaddr *) &peer, &alen);
exit_if(r < 0, "getpeername failed");
printf("accept a connection from %s\n", inet_ntoa(raddr.sin_addr));
setNonBlock(cfd);
updateEvents(efd, cfd, EPOLLIN, EPOLL_CTL_ADD);
}
struct Con {
string readed;
size_t written;
bool writeEnabled;
Con() : written(0), writeEnabled(false) {}
};
map<int, Con> cons;
string httpRes;
void sendRes(int efd, int fd) {
Con &con = cons[fd];
size_t left = httpRes.length() - con.written;
int wd = 0;
while ((wd = ::write(fd, httpRes.data() + con.written, left)) > 0) {
con.written += wd;
left -= wd;
if (output_log)
printf("write %d bytes left: %lu\n", wd, left);
};
if (left == 0) {
// close(fd); // 测试中使用了keepalive,因此不关闭连接。连接会在read事件中关闭
if (con.writeEnabled) {
updateEvents(efd, fd, EPOLLIN, EPOLL_CTL_MOD); // 当所有数据发送结束后,不再关注其缓冲区可写事件
con.writeEnabled = false;
}
cons.erase(fd);
return;
}
if (wd < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
if (!con.writeEnabled) {
updateEvents(efd, fd, EPOLLIN | EPOLLOUT, EPOLL_CTL_MOD);
con.writeEnabled = true;
}
return;
}
if (wd <= 0) {
printf("write error for %d: %d %s\n", fd, errno, strerror(errno));
close(fd);
cons.erase(fd);
}
}
void handleRead(int efd, int fd) {
char buf[4096];
int n = 0;
while ((n = ::read(fd, buf, sizeof buf)) > 0) {
if (output_log)
printf("read %d bytes\n", n);
string &readed = cons[fd].readed;
readed.append(buf, n);
if (readed.length() > 4) {
if (readed.substr(readed.length() - 2, 2) == "\n\n" || readed.substr(readed.length() - 4, 4) == "\r\n\r\n") {
//当读取到一个完整的http请求,测试发送响应
sendRes(efd, fd);
}
}
}
if (n < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
return;
//实际应用中,n<0应当检查各类错误,如EINTR
if (n < 0) {
printf("read %d error: %d %s\n", fd, errno, strerror(errno));
}
close(fd);
cons.erase(fd);
}
void handleWrite(int efd, int fd) {
sendRes(efd, fd);
}
void loop_once(int efd, int lfd, int waitms) {
const int kMaxEvents = 20;
struct epoll_event activeEvs[100];
int n = epoll_wait(efd, activeEvs, kMaxEvents, waitms);
if (output_log)
printf("epoll_wait return %d\n", n);
for (int i = 0; i < n; i++) {
int fd = activeEvs[i].data.fd;
int events = activeEvs[i].events;
if (events & (EPOLLIN | EPOLLERR)) {
if (fd == lfd) {
handleAccept(efd, fd);
} else {
handleRead(efd, fd);
}
} else if (events & EPOLLOUT) {
if (output_log)
printf("handling epollout\n");
handleWrite(efd, fd);
} else {
exit_if(1, "unknown event");
}
}
}
int main(int argc, const char *argv[]) {
if (argc > 1) {
output_log = false;
}
::signal(SIGPIPE, SIG_IGN);
httpRes = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: text/html; charset=UTF-8\r\nContent-Length: 1048576\r\n\r\n123456";
for (int i = 0; i < 1048570; i++) {
httpRes += '\0';
}
unsigned short port = 8081;
int epollfd = epoll_create(1);
exit_if(epollfd < 0, "epoll_create failed");
int listenfd = socket(AF_INET, SOCK_STREAM, 0);
exit_if(listenfd < 0, "socket failed");
struct sockaddr_in addr;
memset(&addr, 0, sizeof addr);
addr.sin_family = AF_INET;
addr.sin_port = htons(port);
addr.sin_addr.s_addr = INADDR_ANY;
int r = ::bind(listenfd, (struct sockaddr *) &addr, sizeof(struct sockaddr));
exit_if(r, "bind to 0.0.0.0:%d failed %d %s", port, errno, strerror(errno));
r = listen(listenfd, 20);
exit_if(r, "listen failed %d %s", errno, strerror(errno));
printf("fd %d listening at %d\n", listenfd, port);
setNonBlock(listenfd);
updateEvents(epollfd, listenfd, EPOLLIN, EPOLL_CTL_ADD);
for (;;) { //实际应用应当注册信号处理函数,退出时清理资源
loop_once(epollfd, listenfd, 10000);
}
return 0;
}
et
/*
* 编译:c++ -o epoll-et epoll-et.cc
* 运行: ./epoll-et
* 测试:curl -v localhost
* 客户端发送GET请求后,服务器返回1M的数据,会触发EPOLLOUT,从epoll-et输出的日志看,EPOLLOUT事件得到了正确的处理
*/
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/socket.h>
#include <unistd.h>
#include <map>
#include <string>
using namespace std;
bool output_log = true;
#define exit_if(r, ...) \
if (r) { \
printf(__VA_ARGS__); \
printf("%s:%d error no: %d error msg %s\n", __FILE__, __LINE__, errno, strerror(errno)); \
exit(1); \
}
void setNonBlock(int fd) {
int flags = fcntl(fd, F_GETFL, 0);
exit_if(flags < 0, "fcntl failed");
int r = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
exit_if(r < 0, "fcntl failed");
}
void updateEvents(int efd, int fd, int events, int op) {
struct epoll_event ev;
memset(&ev, 0, sizeof(ev));
ev.events = events;
ev.data.fd = fd;
printf("%s fd %d events read %d write %d\n", op == EPOLL_CTL_MOD ? "mod" : "add", fd, ev.events & EPOLLIN, ev.events & EPOLLOUT);
int r = epoll_ctl(efd, op, fd, &ev);
exit_if(r, "epoll_ctl failed");
}
void handleAccept(int efd, int fd) {
struct sockaddr_in raddr;
socklen_t rsz = sizeof(raddr);
int cfd = accept(fd, (struct sockaddr *) &raddr, &rsz);
exit_if(cfd < 0, "accept failed");
sockaddr_in peer, local;
socklen_t alen = sizeof(peer);
int r = getpeername(cfd, (sockaddr *) &peer, &alen);
exit_if(r < 0, "getpeername failed");
printf("accept a connection from %s\n", inet_ntoa(raddr.sin_addr));
setNonBlock(cfd);
updateEvents(efd, cfd, EPOLLIN | EPOLLOUT | EPOLLET, EPOLL_CTL_ADD);
}
struct Con {
string readed;
size_t written;
Con() : written(0) {}
};
map<int, Con> cons;
string httpRes;
void sendRes(int fd) {
printf("sendRes: %d\n", fd);
Con &con = cons[fd];
if (!con.readed.length())
return;
printf("httpRes.length() = %ld\n", httpRes.length());
size_t left = httpRes.length() - con.written;
int wd = 0;
while ((wd = ::write(fd, httpRes.data() + con.written, left)) > 0) {
con.written += wd;
left -= wd;
//if (output_log)
printf("write %d bytes left: %lu\n", wd, left);
};
if (left == 0) {
// close(fd); // 测试中使用了keepalive,因此不关闭连接。连接会在read事件中关闭
cons.erase(fd);
return;
}
if (wd < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
return;
if (wd <= 0) {
printf("write error for %d: %d %s\n", fd, errno, strerror(errno));
close(fd);
cons.erase(fd);
}
}
static int g_num = 0;
void handleRead(int efd, int fd) {
printf("handleRead fd %d\n", fd);
char buf[4096];
int n = 0;
g_num++;
//httpRes += string(g_num);
if(g_num == 1){
return;
}
while ((n = ::read(fd, buf, sizeof buf)) > 0) {
if (output_log)
printf("read %d bytes\n", n);
printf("read %d bytes\n", n);
printf("read buf:%s\n", buf);
string &readed = cons[fd].readed;
readed.append(buf, n);
if (readed.length() > 4) {
if (readed.substr(readed.length() - 2, 2) == "\n\n" || readed.substr(readed.length() - 4, 4) == "\r\n\r\n") {
//当读取到一个完整的http请求,测试发送响应
printf("read-----sendRes(%d)\n", fd);
sendRes(fd);
}
}
}
if (n < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
return;
//实际应用中,n<0应当检查各类错误,如EINTR
if (n < 0) {
printf("read %d error: %d %s\n", fd, errno, strerror(errno));
}
close(fd);
cons.erase(fd);
}
void handleWrite(int efd, int fd) {
printf("write-----sendRes(%d)\n", fd);
sendRes(fd);
}
void loop_once(int efd, int lfd, int waitms) {
const int kMaxEvents = 20;
struct epoll_event activeEvs[100];
int n = epoll_wait(efd, activeEvs, kMaxEvents, waitms);
if (output_log)
printf("epoll_wait return %d\n", n);
for (int i = 0; i < n; i++) {
int fd = activeEvs[i].data.fd;
int events = activeEvs[i].events;
if (events & (EPOLLIN | EPOLLERR)) {
if (fd == lfd) {
handleAccept(efd, fd);
} else {
handleRead(efd, fd);
}
} else if (events & EPOLLOUT) { // 请注意,例子为了保持简洁性,没有很好的处理极端情况,例如EPOLLIN和EPOLLOUT同时到达的情况
if (output_log)
printf("handling epollout\n");
handleWrite(efd, fd);
} else {
exit_if(1, "unknown event");
}
}
}
int main(int argc, const char *argv[]) {
if (argc > 1) {
output_log = false;
}
::signal(SIGPIPE, SIG_IGN);
//httpRes = "HTTP/1.1 200 OK\r\nConnection: Keep-Alive\r\nContent-Type: text/html; charset=UTF-8\r\nContent-Length: 7\r\n\r\n123456";
httpRes = "HTTP/1.1 200 OK\r\nConnection: close\r\nContent-Type: text/html; charset=UTF-8\r\nContent-Length: 7\r\n\r\n123456";
// for (int i = 0; i < 1048570; i++) {
// httpRes += '\0';
// }
unsigned short port = 8082;
int epollfd = epoll_create(1);
exit_if(epollfd < 0, "epoll_create failed");
int listenfd = socket(AF_INET, SOCK_STREAM, 0);
exit_if(listenfd < 0, "socket failed");
struct sockaddr_in addr;
memset(&addr, 0, sizeof addr);
addr.sin_family = AF_INET;
addr.sin_port = htons(port);
addr.sin_addr.s_addr = INADDR_ANY;
int r = ::bind(listenfd, (struct sockaddr *) &addr, sizeof(struct sockaddr));
exit_if(r, "bind to 0.0.0.0:%d failed %d %s", port, errno, strerror(errno));
r = listen(listenfd, 20);
exit_if(r, "listen failed %d %s", errno, strerror(errno));
printf("fd %d listening at %d\n", listenfd, port);
setNonBlock(listenfd);
updateEvents(epollfd, listenfd, EPOLLIN, EPOLL_CTL_ADD);
for (;;) { //实际应用应当注册信号处理函数,退出时清理资源
loop_once(epollfd, listenfd, 10000);
}
return 0;
}
针对容易触发LT开关EPOLLOUT事件的情景(让服务器返回1M大小的数据),我用ab做了性能测试
测试的结果显示ET的性能稍好,详情如下:
LT 启动命令 ./epoll a
ET 启动命令 ./epoll-et a
ab 命令:ab -n 1000 -k 127.0.0.1/
LT 结果:Requests per second: 42.56 [#/sec] (mean)
ET 结果:Requests per second: 48.55 [#/sec] (mean)
当我把服务器返回的数据大小改为48576时,开关EPOLLOUT更加频繁,性能的差异更大
ab 命令:ab -n 5000 -k 127.0.0.1/
LT 结果:Requests per second: 745.30 [#/sec] (mean)
ET 结果:Requests per second: 927.56 [#/sec] (mean)
对于nginx这种高性能服务器,ET模式是很好的,而其他的通用网络库,很多是使用LT,避免使用的过程中出现bug
LE模式可以使用阻塞IO和非阻塞IO
epoll 中有两种触发模式,LT (水平触发) 和 ET(边缘触发),这里说下这两种模式下使用 阻塞/非阻塞 IO 的情况,以及对于 “为什么 ET 模式必须使用非阻塞 IO ?” 这个问题的看法。
个人认为使用 阻塞IO 潜在的问题在于,使用 阻塞 IO 去读的时候,会导致在没有数据可读的时候,导致当前工作线程阻塞不工作。而 ET 模式与 LT 模式都是在有数据的情况下触发,只不过触发的时机不同。假定读缓冲区 50b,而收到的包为 100b,有如下情况:
阻塞 IO
LT 模式下,由于只要有数据就会触发读,因此不会有问题,但是在 ET 模式下,由于在新的数据到来之前,都不会触发读事件,因此会导致剩下的 50b 没有读取到,所以为了保证能够读取到完整的包,需要使用 while(1) 之类的循环去读,这就会导致在数据读完之后,最后一次 read 阻塞,因为所有的数据都已经读完了。
非阻塞 IO
在 LT 模式下,使用非阻塞 IO 的效果与阻塞 IO 差不多,在 ET 模式下,处理的逻辑与上面类似,但是由于使用的 非阻塞 IO ,因此不会导致最后一次 read 阻塞,而是会返回 EAGAIN 。
最后对于 “为什么 ET 模式必须使用非阻塞 IO ?” 这个问题。我的看法是应该将 “必须” 改成 “建议”,因为如果使用 阻塞IO ,也是有办法规避上面的问题的,比如先获取包体的大小之类的,但是这样也会提高复杂度,效率也会更低下。对于监听的 socket,最好使用 LT 模式,ET 模式会导致高并发情况下,有的客户端会连接不上,除非使用 while 循环 accpet,且为非阻塞 socket 。对于读写的 socket,LT 模式下,阻塞和非阻塞效果都一样。ET 模式下,建议使用非阻塞 IO,并一次性地完整读写全部数据。