From 69dc60de5d53488a3c45737ec17b242d703d682b Mon Sep 17 00:00:00 2001 From: Bernie Innocenti Date: Mon, 14 May 2018 20:40:49 +0900 Subject: Avoid spinning endlessly on packet socket errors In the event the packet socket has an error, it is likely not cleared and clatd can spin in a tight loop. This may happen when a network is going down (ENETDOWN, ENETUNREACH, ...). Ordinarily the ConnectivityService/Nat464Xlat/ClatdController control plane would tear down clatd in this case. However, if the control plane is deadlocked, clatd will chew up CPU indefinitely. This fix consists of detecting when poll() exits due to a socket error and trying to clear the error by issuing a no-op recv() on the packet socket and a 0-byte read() for the /dev/tun socket. Test: manually, on a marlyn device (kernel 3.18.70) Bug: 78602493 Change-Id: Ic23f999712a674df11e981a4314ad371e8d5fb6d --- clatd.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/clatd.c b/clatd.c index c579df0..168cc69 100644 --- a/clatd.c +++ b/clatd.c @@ -382,19 +382,28 @@ void event_loop(struct tun_data *tunnel) { last_interface_poll = time(NULL); while(running) { - if(poll(wait_fd, 2, NO_TRAFFIC_INTERFACE_POLL_FREQUENCY*1000) == -1) { - if(errno != EINTR) { - logmsg(ANDROID_LOG_WARN,"event_loop/poll returned an error: %s",strerror(errno)); + if (poll(wait_fd, ARRAY_SIZE(wait_fd), + NO_TRAFFIC_INTERFACE_POLL_FREQUENCY * 1000) == -1) { + if (errno != EINTR) { + logmsg(ANDROID_LOG_WARN,"event_loop/poll returned an error: %s", strerror(errno)); } } else { + if (wait_fd[0].revents & POLLIN) { + ring_read(&tunnel->ring, tunnel->fd4, 0 /* to_ipv6 */); + } + // If any other bit is set, assume it's due to an error (i.e. POLLERR). + if (wait_fd[0].revents & ~POLLIN) { + // ring_read doesn't clear the error indication on the socket. + recv(tunnel->read_fd6, NULL, 0, MSG_PEEK); + logmsg(ANDROID_LOG_WARN, "event_loop: clearing error on read_fd6: %s", + strerror(errno)); + } + // Call read_packet if the socket has data to be read, but also if an // error is waiting. If we don't call read() after getting POLLERR, a // subsequent poll() will return immediately with POLLERR again, // causing this code to spin in a loop. Calling read() will clear the // socket error flag instead. - if (wait_fd[0].revents) { - ring_read(&tunnel->ring, tunnel->fd4, 0 /* to_ipv6 */); - } if (wait_fd[1].revents) { read_packet(tunnel->fd4, tunnel->write_fd6, 1 /* to_ipv6 */); } -- cgit v1.2.3