summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLorenzo Colitti <lorenzo@google.com>2014-12-03 15:18:29 +0900
committerLorenzo Colitti <lorenzo@google.com>2015-03-03 16:51:16 +0900
commit78ce1be5f9e9bb26bacda70fdf1003c376eab97d (patch)
tree96d46b76b6bae9c0adffc5e8beac787456360ae5
parent7023303c34040c25ce4170b43adf020ebd53a068 (diff)
downloadandroid_external_android-clat-78ce1be5f9e9bb26bacda70fdf1003c376eab97d.tar.gz
android_external_android-clat-78ce1be5f9e9bb26bacda70fdf1003c376eab97d.tar.bz2
android_external_android-clat-78ce1be5f9e9bb26bacda70fdf1003c376eab97d.zip
Switch the receive path to memory-mapped I/O with PACKET_RX_RING.
(cherry picked from commit 9353be2a5f1b4fd00b04e4c826f7f3c3ec6c5d46) Change-Id: Ifb35b4efae0363a2006f3bba002e44a0560f3014
-rw-r--r--Android.mk2
-rw-r--r--clatd.c81
-rw-r--r--ring.c126
-rw-r--r--ring.h55
-rw-r--r--tun.h2
5 files changed, 219 insertions, 47 deletions
diff --git a/Android.mk b/Android.mk
index b4ae7f0..5f2fe63 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,7 +1,7 @@
LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
-LOCAL_SRC_FILES:=clatd.c dump.c checksum.c translate.c icmp.c ipv4.c ipv6.c config.c dns64.c logging.c getaddr.c netlink_callbacks.c netlink_msg.c setif.c mtu.c tun.c
+LOCAL_SRC_FILES:=clatd.c dump.c checksum.c translate.c icmp.c ipv4.c ipv6.c config.c dns64.c logging.c getaddr.c netlink_callbacks.c netlink_msg.c setif.c mtu.c tun.c ring.c
LOCAL_CFLAGS := -Wall -Werror -Wunused-parameter
LOCAL_C_INCLUDES := external/libnl/include bionic/libc/dns/include
diff --git a/clatd.c b/clatd.c
index 1de39c8..94cb3b5 100644
--- a/clatd.c
+++ b/clatd.c
@@ -51,6 +51,7 @@
#include "getaddr.h"
#include "dump.h"
#include "tun.h"
+#include "ring.h"
#define DEVICEPREFIX "v4-"
@@ -213,13 +214,10 @@ void open_sockets(struct tun_data *tunnel, uint32_t mark) {
tunnel->write_fd6 = rawsock;
- int packetsock = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_IPV6));
- if (packetsock < 0) {
- logmsg(ANDROID_LOG_FATAL, "packet socket failed: %s", strerror(errno));
+ tunnel->read_fd6 = ring_create(tunnel);
+ if (tunnel->read_fd6 < 0) {
exit(1);
}
-
- tunnel->read_fd6 = packetsock;
}
/* function: update_clat_ipv6_address
@@ -320,16 +318,16 @@ void configure_interface(const char *uplink_interface, const char *plat_prefix,
}
/* function: read_packet
- * reads a packet from the tunnel fd and passes it down the stack
- * active_fd - tun file descriptor marked ready for reading
- * tunnel - tun device data
+ * reads a packet from the tunnel fd and translates it
+ * read_fd - file descriptor to read original packet from
+ * write_fd - file descriptor to write translated packet to
+ * to_ipv6 - whether the packet is to be translated to ipv6 or ipv4
*/
-void read_packet(int active_fd, const struct tun_data *tunnel) {
+void read_packet(int read_fd, int write_fd, int to_ipv6) {
ssize_t readlen;
uint8_t buf[PACKETLEN], *packet;
- int fd;
- readlen = read(active_fd, buf, PACKETLEN);
+ readlen = read(read_fd, buf, PACKETLEN);
if(readlen < 0) {
logmsg(ANDROID_LOG_WARN,"read_packet/read error: %s", strerror(errno));
@@ -340,41 +338,32 @@ void read_packet(int active_fd, const struct tun_data *tunnel) {
return;
}
- if (active_fd == tunnel->fd4) {
- ssize_t header_size = sizeof(struct tun_pi);
-
- if (readlen < header_size) {
- logmsg(ANDROID_LOG_WARN,"read_packet/short read: got %ld bytes", readlen);
- return;
- }
-
- struct tun_pi *tun_header = (struct tun_pi *) buf;
- uint16_t proto = ntohs(tun_header->proto);
- if (proto != ETH_P_IP) {
- logmsg(ANDROID_LOG_WARN, "%s: unknown packet type = 0x%x", __func__, proto);
- return;
- }
+ struct tun_pi *tun_header = (struct tun_pi *) buf;
+ if (readlen < (ssize_t) sizeof(*tun_header)) {
+ logmsg(ANDROID_LOG_WARN,"read_packet/short read: got %ld bytes", readlen);
+ return;
+ }
- if(tun_header->flags != 0) {
- logmsg(ANDROID_LOG_WARN, "%s: unexpected flags = %d", __func__, tun_header->flags);
- }
+ uint16_t proto = ntohs(tun_header->proto);
+ if (proto != ETH_P_IP) {
+ logmsg(ANDROID_LOG_WARN, "%s: unknown packet type = 0x%x", __func__, proto);
+ return;
+ }
- fd = tunnel->write_fd6;
- packet = buf + header_size;
- readlen -= header_size;
- } else {
- fd = tunnel->fd4;
- packet = buf;
+ if(tun_header->flags != 0) {
+ logmsg(ANDROID_LOG_WARN, "%s: unexpected flags = %d", __func__, tun_header->flags);
}
- translate_packet(fd, (fd == tunnel->write_fd6), packet, readlen);
+ packet = (uint8_t *) (tun_header + 1);
+ readlen -= sizeof(*tun_header);
+ translate_packet(write_fd, to_ipv6, packet, readlen);
}
/* function: event_loop
* reads packets from the tun network interface and passes them down the stack
* tunnel - tun device data
*/
-void event_loop(const struct tun_data *tunnel) {
+void event_loop(struct tun_data *tunnel) {
time_t last_interface_poll;
struct pollfd wait_fd[] = {
{ tunnel->read_fd6, POLLIN, 0 },
@@ -390,16 +379,16 @@ void event_loop(const struct tun_data *tunnel) {
logmsg(ANDROID_LOG_WARN,"event_loop/poll returned an error: %s",strerror(errno));
}
} else {
- size_t i;
- for(i = 0; i < ARRAY_SIZE(wait_fd); i++) {
- // Call read_packet if the socket has data to be read, but also if an
- // error is waiting. If we don't call read() after getting POLLERR, a
- // subsequent poll() will return immediately with POLLERR again,
- // causing this code to spin in a loop. Calling read() will clear the
- // socket error flag instead.
- if(wait_fd[i].revents != 0) {
- read_packet(wait_fd[i].fd,tunnel);
- }
+ // Call read_packet if the socket has data to be read, but also if an
+ // error is waiting. If we don't call read() after getting POLLERR, a
+ // subsequent poll() will return immediately with POLLERR again,
+ // causing this code to spin in a loop. Calling read() will clear the
+ // socket error flag instead.
+ if (wait_fd[0].revents) {
+ ring_read(&tunnel->ring, tunnel->fd4, 0 /* to_ipv6 */);
+ }
+ if (wait_fd[1].revents) {
+ read_packet(tunnel->fd4, tunnel->write_fd6, 1 /* to_ipv6 */);
}
}
diff --git a/ring.c b/ring.c
new file mode 100644
index 0000000..5e99fd5
--- /dev/null
+++ b/ring.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * ring.c - packet ring buffer functions
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <linux/if.h>
+#include <linux/if_packet.h>
+
+#include "logging.h"
+#include "ring.h"
+#include "translate.h"
+#include "tun.h"
+
+int ring_create(struct tun_data *tunnel) {
+ int packetsock = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_IPV6));
+ if (packetsock < 0) {
+ logmsg(ANDROID_LOG_FATAL, "packet socket failed: %s", strerror(errno));
+ return -1;
+ }
+
+ int ver = TPACKET_V2;
+ if (setsockopt(packetsock, SOL_PACKET, PACKET_VERSION, (void *) &ver, sizeof(ver))) {
+ logmsg(ANDROID_LOG_FATAL, "setsockopt(PACKET_VERSION, %d) failed: %s", ver, strerror(errno));
+ return -1;
+ }
+
+ int on = 1;
+ if (setsockopt(packetsock, SOL_PACKET, PACKET_LOSS, (void *) &on, sizeof(on))) {
+ logmsg(ANDROID_LOG_WARN, "PACKET_LOSS failed: %s", strerror(errno));
+ }
+
+ struct packet_ring *ring = &tunnel->ring;
+ ring->numblocks = TP_NUM_BLOCKS;
+
+ int total_frames = TP_FRAMES * ring->numblocks;
+
+ struct tpacket_req req = {
+ .tp_frame_size = TP_FRAME_SIZE, // Frame size.
+ .tp_block_size = TP_BLOCK_SIZE, // Frames per block.
+ .tp_block_nr = ring->numblocks, // Number of blocks.
+ .tp_frame_nr = total_frames, // Total frames.
+ };
+
+ if (setsockopt(packetsock, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req)) < 0) {
+ logmsg(ANDROID_LOG_FATAL, "PACKET_RX_RING failed: %s", strerror(errno));
+ return -1;
+ }
+
+ size_t buflen = TP_BLOCK_SIZE * ring->numblocks;
+ ring->base = mmap(NULL, buflen, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_LOCKED|MAP_POPULATE,
+ packetsock, 0);
+ if (ring->base == MAP_FAILED) {
+ logmsg(ANDROID_LOG_FATAL, "mmap %lu failed: %s", buflen, strerror(errno));
+ return -1;
+ }
+
+ ring->block = 0;
+ ring->slot = 0;
+ ring->numslots = TP_BLOCK_SIZE / TP_FRAME_SIZE;
+ ring->next = (struct tpacket2_hdr *) ring->base;
+
+ logmsg(ANDROID_LOG_INFO, "Using ring buffer with %d frames (%d bytes) at %p",
+ total_frames, buflen, ring->base);
+
+ return packetsock;
+}
+
+/* function: ring_advance
+ * advances to the next position in the packet ring
+ * ring - packet ring buffer
+ */
+static struct tpacket2_hdr* ring_advance(struct packet_ring *ring) {
+ uint8_t *next = (uint8_t *) ring->next;
+
+ ring->slot++;
+ next += TP_FRAME_SIZE;
+
+ if (ring->slot == ring->numslots) {
+ ring->slot = 0;
+ ring->block++;
+
+ if (ring->block < ring->numblocks) {
+ next += TP_FRAME_GAP;
+ } else {
+ ring->block = 0;
+ next = (uint8_t *) ring->base;
+ }
+ }
+
+ ring->next = (struct tpacket2_hdr *) next;
+ return ring->next;
+}
+
+/* function: ring_read
+ * reads a packet from the ring buffer and translates it
+ * read_fd - file descriptor to read original packet from
+ * write_fd - file descriptor to write translated packet to
+ * to_ipv6 - whether the packet is to be translated to ipv6 or ipv4
+ */
+void ring_read(struct packet_ring *ring, int write_fd, int to_ipv6) {
+ struct tpacket2_hdr *tp = ring->next;
+ if (tp->tp_status & TP_STATUS_USER) {
+ uint8_t *packet = ((uint8_t *) tp) + tp->tp_net;
+ translate_packet(write_fd, to_ipv6, packet, tp->tp_len);
+ tp->tp_status = TP_STATUS_KERNEL;
+ tp = ring_advance(ring);
+ }
+}
diff --git a/ring.h b/ring.h
new file mode 100644
index 0000000..b9b8c11
--- /dev/null
+++ b/ring.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * ring.c - packet ring buffer functions
+ */
+#ifndef __RING_H__
+#define __RING_H__
+
+#include <linux/if.h>
+#include <linux/if_packet.h>
+
+#include "clatd.h"
+
+struct tun_data;
+
+// Frame size. Must be a multiple of TPACKET_ALIGNMENT (=16)
+// Why the 16? http://lxr.free-electrons.com/source/net/packet/af_packet.c?v=3.4#L1764
+#define TP_FRAME_SIZE (TPACKET_ALIGN(MAXMTU) + TPACKET_ALIGN(TPACKET2_HDRLEN) + 16)
+
+// Block size. Must be a multiple of the page size, and a power of two for efficient memory use.
+#define TP_BLOCK_SIZE 65536
+
+// In order to save memory, our frames are not an exact divider of the block size. Therefore, the
+// mmaped region will have gaps corresponding to the empty space at the end of each block.
+#define TP_FRAMES (TP_BLOCK_SIZE / TP_FRAME_SIZE)
+#define TP_FRAME_GAP (TP_BLOCK_SIZE % TP_FRAME_SIZE)
+
+// TODO: Make this configurable. This requires some refactoring because the packet socket is
+// opened before we drop privileges, but the configuration file is read after. A value of 16
+// results in 656 frames (1048576 bytes).
+#define TP_NUM_BLOCKS 16
+
+struct packet_ring {
+ uint8_t *base;
+ struct tpacket2_hdr *next;
+ int slot, numslots;
+ int block, numblocks;
+};
+
+int ring_create(struct tun_data *tunnel);
+void ring_read(struct packet_ring *ring, int write_fd, int to_ipv6);
+
+#endif
diff --git a/tun.h b/tun.h
index 946ab47..bcdd10e 100644
--- a/tun.h
+++ b/tun.h
@@ -21,10 +21,12 @@
#include <linux/if.h>
#include "clatd.h"
+#include "ring.h"
struct tun_data {
char device4[IFNAMSIZ];
int read_fd6, write_fd6, fd4;
+ struct packet_ring ring;
};
int tun_open();