summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSubash Abhinov Kasiviswanathan <subashab@codeaurora.org>2015-10-26 18:45:04 -0600
committerLinux Build Service Account <lnxbuild@localhost>2016-08-24 08:08:29 -0600
commitfbc7e056ed22ce3eb3924293733571a12eee7363 (patch)
tree04f8a5c618b033fa2480a252d02e4dc46ebe0df5
parentd961f6f64d965d51a970b4354ab8ca458a03a36b (diff)
downloadandroid_external_android-clat-cm-14.1_prerebase.tar.gz
android_external_android-clat-cm-14.1_prerebase.tar.bz2
android_external_android-clat-cm-14.1_prerebase.zip
clatd: Add support for handling GRO coalesced packetsstaging/cm-14.1-cafrebasecm-14.1_prerebasecm-14.1cm-14.0
With this change, we can notify network stack to disable checksum validation for GRO packets as well as other packets which have checksum validation completed earlier in a driver. GRO packets have the ip_summed field set to CHECKSUM_PARTIAL while checksum offloaded packets have the ip_summed field as CHECKSUM_UNNECESSARY. Kernel copies this ip_summed field to the status field in the tpacket filter. The information from the status field is then passed as part of the TUN header to the TUN interface. Any other packet will have the complete checksum validation done in the network stack. Note that this only applies to packets which are captured in packet sockets and passed onto the TUN interface. MTU serves a dual purpose for CLAT. The segment size of the packet ring is based on the MAXMTU apart from the interface MTU itself. GRO coalesce happens on the receive path which is not affected by the MTU (transmit). In other words, increasing the MAXMTU does not affect if a GRO coalesced packet is delivered to CLAT. It only affects the amount of data being read from the TPacket ring. Introduce a new parameter MAXMRU which handles the increased MRU to account for GRO packets. Increase the ring size to compensate for the larger segments to maintain the same throughput as earlier CRs-Fixed: 1023199 Change-Id: I5f5ff05918f3d3e03bb18fe799ccb5770a85038a
-rw-r--r--clatd.c2
-rw-r--r--clatd.h3
-rw-r--r--clatd_test.cpp13
-rw-r--r--ring.c14
-rw-r--r--ring.h7
-rw-r--r--translate.c9
-rw-r--r--translate.h4
7 files changed, 35 insertions, 17 deletions
diff --git a/clatd.c b/clatd.c
index d57ea59..c936b62 100644
--- a/clatd.c
+++ b/clatd.c
@@ -364,7 +364,7 @@ void read_packet(int read_fd, int write_fd, int to_ipv6) {
packet = (uint8_t *) (tun_header + 1);
readlen -= sizeof(*tun_header);
- translate_packet(write_fd, to_ipv6, packet, readlen);
+ translate_packet(write_fd, to_ipv6, packet, readlen, TP_CSUM_NONE);
}
/* function: event_loop
diff --git a/clatd.h b/clatd.h
index f421f46..126a31c 100644
--- a/clatd.h
+++ b/clatd.h
@@ -21,7 +21,8 @@
#include <sys/uio.h>
#define MAXMTU 1500
-#define PACKETLEN (MAXMTU+sizeof(struct tun_pi))
+#define MAXMRU 65536
+#define PACKETLEN (MAXMRU+sizeof(struct tun_pi))
#define CLATD_VERSION "1.4"
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/clatd_test.cpp b/clatd_test.cpp
index 7e218f0..d67113f 100644
--- a/clatd_test.cpp
+++ b/clatd_test.cpp
@@ -30,6 +30,7 @@ extern "C" {
#include "translate.h"
#include "config.h"
#include "clatd.h"
+#include "ring.h"
}
// For convenience.
@@ -456,7 +457,7 @@ void do_translate_packet(const uint8_t *original, size_t original_len, uint8_t *
break;
}
- translate_packet(write_fd, (version == 4), original, original_len);
+ translate_packet(write_fd, (version == 4), original, original_len, TP_CSUM_NONE);
snprintf(foo, sizeof(foo), "%s: Invalid translated packet", msg);
if (version == 6) {
@@ -485,7 +486,7 @@ void do_translate_packet(const uint8_t *original, size_t original_len, uint8_t *
void check_translated_packet(const uint8_t *original, size_t original_len,
const uint8_t *expected, size_t expected_len, const char *msg) {
- uint8_t translated[MAXMTU];
+ uint8_t translated[MAXMRU];
size_t translated_len = sizeof(translated);
do_translate_packet(original, original_len, translated, &translated_len, msg);
EXPECT_EQ(expected_len, translated_len) << msg << ": Translated packet length incorrect\n";
@@ -504,12 +505,12 @@ void check_fragment_translation(const uint8_t *original[], const size_t original
}
// Sanity check that reassembling the original and translated fragments produces valid packets.
- uint8_t reassembled[MAXMTU];
+ uint8_t reassembled[MAXMRU];
size_t reassembled_len = sizeof(reassembled);
reassemble_packet(original, original_lengths, numfragments, reassembled, &reassembled_len, msg);
check_packet(reassembled, reassembled_len, msg);
- uint8_t translated[MAXMTU];
+ uint8_t translated[MAXMRU];
size_t translated_len = sizeof(translated);
do_translate_packet(reassembled, reassembled_len, translated, &translated_len, msg);
check_packet(translated, translated_len, msg);
@@ -771,7 +772,7 @@ TEST_F(ClatdTest, DataSanitycheck) {
check_packet(ipv6_ping, sizeof(ipv6_ping), "IPv6 ping sanity check");
// Sanity checks reassemble_packet.
- uint8_t reassembled[MAXMTU];
+ uint8_t reassembled[MAXMRU];
size_t total_length = sizeof(reassembled);
reassemble_packet(kIPv4Fragments, kIPv4FragLengths, ARRAYSIZE(kIPv4Fragments),
reassembled, &total_length, "Reassembly sanity check");
@@ -897,7 +898,7 @@ TEST_F(ClatdTest, Fragmentation) {
void check_translate_checksum_neutral(const uint8_t *original, size_t original_len,
size_t expected_len, const char *msg) {
- uint8_t translated[MAXMTU];
+ uint8_t translated[MAXMRU];
size_t translated_len = sizeof(translated);
do_translate_packet(original, original_len, translated, &translated_len, msg);
EXPECT_EQ(expected_len, translated_len) << msg << ": Translated packet length incorrect\n";
diff --git a/ring.c b/ring.c
index 5e99fd5..6929ab4 100644
--- a/ring.c
+++ b/ring.c
@@ -29,6 +29,8 @@
#include "translate.h"
#include "tun.h"
+#define TP_STATUS_CSUM_UNNECESSARY (1 << 7)
+
int ring_create(struct tun_data *tunnel) {
int packetsock = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_IPV6));
if (packetsock < 0) {
@@ -117,9 +119,19 @@ static struct tpacket2_hdr* ring_advance(struct packet_ring *ring) {
*/
void ring_read(struct packet_ring *ring, int write_fd, int to_ipv6) {
struct tpacket2_hdr *tp = ring->next;
+ uint16_t val = TP_CSUM_NONE;
if (tp->tp_status & TP_STATUS_USER) {
+ //We expect only GRO coalesced packets to have TP_STATUS_CSUMNOTREADY
+ //(ip_summed = CHECKSUM_PARTIAL) in this path. Note that these packets have already gone
+ //through checksum validation in GRO engine. CHECKSUM_PARTIAL is defined to be 3 while
+ //CHECKSUM_UNNECESSARY is defined to be 1.
+ //Kernel only checks for CHECKSUM_UNNECESSARY (TP_CSUM_UNNECESSARY) bit while processing a
+ //packet, so its ok to pass only this bit rather than the full ip_summed field.
+ if ((tp->tp_status & TP_STATUS_CSUMNOTREADY) || (tp->tp_status & TP_STATUS_CSUM_UNNECESSARY)) {
+ val = TP_CSUM_UNNECESSARY;
+ }
uint8_t *packet = ((uint8_t *) tp) + tp->tp_net;
- translate_packet(write_fd, to_ipv6, packet, tp->tp_len);
+ translate_packet(write_fd, to_ipv6, packet, tp->tp_len, val);
tp->tp_status = TP_STATUS_KERNEL;
tp = ring_advance(ring);
}
diff --git a/ring.h b/ring.h
index b9b8c11..20d2f98 100644
--- a/ring.h
+++ b/ring.h
@@ -27,10 +27,10 @@ struct tun_data;
// Frame size. Must be a multiple of TPACKET_ALIGNMENT (=16)
// Why the 16? http://lxr.free-electrons.com/source/net/packet/af_packet.c?v=3.4#L1764
-#define TP_FRAME_SIZE (TPACKET_ALIGN(MAXMTU) + TPACKET_ALIGN(TPACKET2_HDRLEN) + 16)
+#define TP_FRAME_SIZE (TPACKET_ALIGN(MAXMRU) + TPACKET_ALIGN(TPACKET2_HDRLEN) + 16)
// Block size. Must be a multiple of the page size, and a power of two for efficient memory use.
-#define TP_BLOCK_SIZE 65536
+#define TP_BLOCK_SIZE 2686976
// In order to save memory, our frames are not an exact divider of the block size. Therefore, the
// mmaped region will have gaps corresponding to the empty space at the end of each block.
@@ -42,6 +42,9 @@ struct tun_data;
// results in 656 frames (1048576 bytes).
#define TP_NUM_BLOCKS 16
+#define TP_CSUM_NONE (0)
+#define TP_CSUM_UNNECESSARY (1)
+
struct packet_ring {
uint8_t *base;
struct tpacket2_hdr *next;
diff --git a/translate.c b/translate.c
index ddc9bac..bce9270 100644
--- a/translate.c
+++ b/translate.c
@@ -108,8 +108,8 @@ struct in6_addr ipv4_addr_to_ipv6_addr(uint32_t addr4) {
* tun_header - tunnel header, already allocated
* proto - ethernet protocol id: ETH_P_IP(ipv4) or ETH_P_IPV6(ipv6)
*/
-void fill_tun_header(struct tun_pi *tun_header, uint16_t proto) {
- tun_header->flags = 0;
+void fill_tun_header(struct tun_pi *tun_header, uint16_t proto, uint16_t skip_csum) {
+ tun_header->flags = htons(skip_csum);
tun_header->proto = htons(proto);
}
@@ -491,8 +491,9 @@ void send_rawv6(int fd, clat_packet out, int iov_len) {
* to_ipv6 - true if translating to ipv6, false if translating to ipv4
* packet - packet
* packetsize - size of packet
+ * skip_csum - true if kernel has to skip checksum validation, false if it has to validate checksum.
*/
-void translate_packet(int fd, int to_ipv6, const uint8_t *packet, size_t packetsize) {
+void translate_packet(int fd, int to_ipv6, const uint8_t *packet, size_t packetsize, uint16_t skip_csum) {
int iov_len = 0;
// Allocate buffers for all packet headers.
@@ -524,7 +525,7 @@ void translate_packet(int fd, int to_ipv6, const uint8_t *packet, size_t packets
} else {
iov_len = ipv6_packet(out, CLAT_POS_IPHDR, packet, packetsize);
if (iov_len > 0) {
- fill_tun_header(&tun_targ, ETH_P_IP);
+ fill_tun_header(&tun_targ, ETH_P_IP, skip_csum);
out[CLAT_POS_TUNHDR].iov_len = sizeof(tun_targ);
send_tun(fd, out, iov_len);
}
diff --git a/translate.h b/translate.h
index aa8b736..3c249eb 100644
--- a/translate.h
+++ b/translate.h
@@ -42,14 +42,14 @@ uint16_t packet_length(clat_packet packet, clat_packet_index pos);
int is_in_plat_subnet(const struct in6_addr *addr6);
// Functions to create tun, IPv4, and IPv6 headers.
-void fill_tun_header(struct tun_pi *tun_header, uint16_t proto);
+void fill_tun_header(struct tun_pi *tun_header, uint16_t proto, uint16_t skip_csum);
void fill_ip_header(struct iphdr *ip_targ, uint16_t payload_len, uint8_t protocol,
const struct ip6_hdr *old_header);
void fill_ip6_header(struct ip6_hdr *ip6, uint16_t payload_len, uint8_t protocol,
const struct iphdr *old_header);
// Translate and send packets.
-void translate_packet(int fd, int to_ipv6, const uint8_t *packet, size_t packetsize);
+void translate_packet(int fd, int to_ipv6, const uint8_t *packet, size_t packetsize, uint16_t skip_csum);
// Translate IPv4 and IPv6 packets.
int ipv4_packet(clat_packet out, clat_packet_index pos, const uint8_t *packet, size_t len);