diff options
author | David S. Miller <davem@davemloft.net> | 2015-10-03 04:32:52 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-03 04:32:52 -0700 |
commit | c3fc7ac9a0b978ee8538058743d21feef25f7b33 (patch) | |
tree | 0caf05649d27830ba0f9548704abbb1ec4b5bb91 /net/core/request_sock.c | |
parent | f6d3125fa3c2f55ddf7cf69365c41089de6cfae6 (diff) | |
parent | e994b2f0fb9229aeff5eea9541320bd7b2ca8714 (diff) | |
download | kernel_replicant_linux-c3fc7ac9a0b978ee8538058743d21feef25f7b33.tar.gz kernel_replicant_linux-c3fc7ac9a0b978ee8538058743d21feef25f7b33.tar.bz2 kernel_replicant_linux-c3fc7ac9a0b978ee8538058743d21feef25f7b33.zip |
Merge branch 'tcp-lockless-listener'
Eric Dumazet says:
====================
tcp/dccp: lockless listener
TCP listener refactoring : this is becoming interesting !
This patch series takes the steps to use normal TCP/DCCP ehash
table to store SYN_RECV requests, instead of the private per-listener
hash table we had until now.
SYNACK skb are now attached to their syn_recv request socket,
so that we no longer heavily modify listener sk_wmem_alloc.
listener lock is no longer held in fast path, including
SYNCOOKIE mode.
During my tests, my server was able to process 3,500,000
SYN packets per second on one listener and still had available
cpu cycles.
That is about 2 to 3 order of magnitude what we had with older kernels.
This effort started two years ago and I am pleased to reach expectations.
We'll probably extend SO_REUSEPORT to add proper cpu/numa affinities,
so that heavy duty TCP servers can get proper siloing thanks to multi-queues
NIC.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/request_sock.c')
-rw-r--r-- | net/core/request_sock.c | 84 |
1 files changed, 2 insertions, 82 deletions
diff --git a/net/core/request_sock.c b/net/core/request_sock.c index e22cfa4ed25f..15c853806518 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -37,28 +37,9 @@ int sysctl_max_syn_backlog = 256; EXPORT_SYMBOL(sysctl_max_syn_backlog); -int reqsk_queue_alloc(struct request_sock_queue *queue, - unsigned int nr_table_entries) +void reqsk_queue_alloc(struct request_sock_queue *queue) { - size_t lopt_size = sizeof(struct listen_sock); - struct listen_sock *lopt = NULL; - - nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); - nr_table_entries = max_t(u32, nr_table_entries, 8); - nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); - lopt_size += nr_table_entries * sizeof(struct request_sock *); - - if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) - lopt = kzalloc(lopt_size, GFP_KERNEL | - __GFP_NOWARN | - __GFP_NORETRY); - if (!lopt) - lopt = vzalloc(lopt_size); - if (!lopt) - return -ENOMEM; - - get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); - spin_lock_init(&queue->syn_wait_lock); + spin_lock_init(&queue->rskq_lock); spin_lock_init(&queue->fastopenq.lock); queue->fastopenq.rskq_rst_head = NULL; @@ -67,67 +48,6 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, queue->fastopenq.max_qlen = 0; queue->rskq_accept_head = NULL; - lopt->nr_table_entries = nr_table_entries; - lopt->max_qlen_log = ilog2(nr_table_entries); - - spin_lock_bh(&queue->syn_wait_lock); - queue->listen_opt = lopt; - spin_unlock_bh(&queue->syn_wait_lock); - - return 0; -} - -void __reqsk_queue_destroy(struct request_sock_queue *queue) -{ - /* This is an error recovery path only, no locking needed */ - kvfree(queue->listen_opt); -} - -static inline struct listen_sock *reqsk_queue_yank_listen_sk( - struct request_sock_queue *queue) -{ - struct listen_sock *lopt; - - spin_lock_bh(&queue->syn_wait_lock); - lopt = queue->listen_opt; - queue->listen_opt = NULL; - spin_unlock_bh(&queue->syn_wait_lock); - - return lopt; -} - -void reqsk_queue_destroy(struct request_sock_queue *queue) -{ - /* make all the listen_opt local to us */ - struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); - - if (listen_sock_qlen(lopt) != 0) { - unsigned int i; - - for (i = 0; i < lopt->nr_table_entries; i++) { - struct request_sock *req; - - spin_lock_bh(&queue->syn_wait_lock); - while ((req = lopt->syn_table[i]) != NULL) { - lopt->syn_table[i] = req->dl_next; - /* Because of following del_timer_sync(), - * we must release the spinlock here - * or risk a dead lock. - */ - spin_unlock_bh(&queue->syn_wait_lock); - atomic_inc(&lopt->qlen_dec); - if (del_timer_sync(&req->rsk_timer)) - reqsk_put(req); - reqsk_put(req); - spin_lock_bh(&queue->syn_wait_lock); - } - spin_unlock_bh(&queue->syn_wait_lock); - } - } - - if (WARN_ON(listen_sock_qlen(lopt) != 0)) - pr_err("qlen %u\n", listen_sock_qlen(lopt)); - kvfree(lopt); } /* |