aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c4
-rw-r--r--net/atm/common.c2
-rw-r--r--net/ax25/af_ax25.c1
-rw-r--r--net/bluetooth/a2mp.c6
-rw-r--r--net/bluetooth/af_bluetooth.c22
-rw-r--r--net/bluetooth/bnep/sock.c4
-rw-r--r--net/bluetooth/cmtp/sock.c4
-rw-r--r--net/bluetooth/hci_conn.c4
-rw-r--r--net/bluetooth/hci_core.c727
-rw-r--r--net/bluetooth/hci_event.c605
-rw-r--r--net/bluetooth/hci_sock.c9
-rw-r--r--net/bluetooth/hci_sysfs.c4
-rw-r--r--net/bluetooth/hidp/core.c4
-rw-r--r--net/bluetooth/hidp/sock.c4
-rw-r--r--net/bluetooth/l2cap_sock.c4
-rw-r--r--net/bluetooth/mgmt.c680
-rw-r--r--net/bluetooth/rfcomm/core.c167
-rw-r--r--net/bluetooth/rfcomm/sock.c4
-rw-r--r--net/bluetooth/sco.c4
-rw-r--r--net/bridge/br_if.c1
-rw-r--r--net/bridge/netfilter/ebt_log.c44
-rw-r--r--net/bridge/netfilter/ebt_nflog.c5
-rw-r--r--net/bridge/netfilter/ebt_ulog.c125
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/can/raw.c5
-rw-r--r--net/core/datagram.c4
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/dev_addr_lists.c210
-rw-r--r--net/core/dst.c9
-rw-r--r--net/core/flow.c2
-rw-r--r--net/core/neighbour.c49
-rw-r--r--net/core/rtnetlink.c4
-rw-r--r--net/core/scm.c24
-rw-r--r--net/core/sock.c20
-rw-r--r--net/decnet/dn_table.c1
-rw-r--r--net/ieee802154/6lowpan.c4
-rw-r--r--net/ieee802154/nl-mac.c25
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/inet_fragment.c57
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipconfig.c13
-rw-r--r--net/ipv4/netfilter/ip_tables.c9
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c129
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c8
-rw-r--r--net/ipv4/ping.c5
-rw-r--r--net/ipv4/tcp.c19
-rw-r--r--net/ipv4/tcp_memcontrol.c3
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c8
-rw-r--r--net/ipv6/addrconf.c127
-rw-r--r--net/ipv6/ip6_input.c12
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/netfilter/ip6_tables.c3
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c22
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c7
-rw-r--r--net/irda/af_irda.c7
-rw-r--r--net/irda/ircomm/ircomm_core.c2
-rw-r--r--net/iucv/af_iucv.c5
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/cfg.c35
-rw-r--r--net/mac80211/chan.c17
-rw-r--r--net/mac80211/debugfs_key.c10
-rw-r--r--net/mac80211/debugfs_netdev.c22
-rw-r--r--net/mac80211/driver-ops.h60
-rw-r--r--net/mac80211/ieee80211_i.h15
-rw-r--r--net/mac80211/iface.c54
-rw-r--r--net/mac80211/key.c129
-rw-r--r--net/mac80211/key.h15
-rw-r--r--net/mac80211/main.c22
-rw-r--r--net/mac80211/mesh.c8
-rw-r--r--net/mac80211/mlme.c18
-rw-r--r--net/mac80211/offchannel.c31
-rw-r--r--net/mac80211/pm.c6
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c24
-rw-r--r--net/mac80211/rx.c14
-rw-r--r--net/mac80211/scan.c7
-rw-r--r--net/mac80211/sta_info.c45
-rw-r--r--net/mac80211/sta_info.h9
-rw-r--r--net/mac80211/trace.h35
-rw-r--r--net/mac80211/tx.c8
-rw-r--r--net/mac80211/util.c48
-rw-r--r--net/mac802154/mac802154.h2
-rw-r--r--net/mac802154/mib.c12
-rw-r--r--net/mac802154/tx.c29
-rw-r--r--net/mac802154/wpan.c2
-rw-r--r--net/netfilter/core.c29
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c31
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c306
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c73
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c641
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c115
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c190
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c64
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c63
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c35
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c176
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c1050
-rw-r--r--net/netfilter/nf_conntrack_helper.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c8
-rw-r--r--net/netfilter/nf_conntrack_standalone.c1
-rw-r--r--net/netfilter/nf_log.c206
-rw-r--r--net/netfilter/nfnetlink_acct.c2
-rw-r--r--net/netfilter/nfnetlink_log.c182
-rw-r--r--net/netfilter/nfnetlink_queue_core.c173
-rw-r--r--net/netfilter/xt_LOG.c52
-rw-r--r--net/netfilter/xt_NFQUEUE.c63
-rw-r--r--net/netfilter/xt_osf.c6
-rw-r--r--net/netrom/af_netrom.c1
-rw-r--r--net/nfc/llcp/llcp.c8
-rw-r--r--net/nfc/llcp/sock.c12
-rw-r--r--net/packet/af_packet.c49
-rw-r--r--net/rose/af_rose.c1
-rw-r--r--net/sched/act_csum.c39
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/sch_cbq.c5
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sctp/associola.c12
-rw-r--r--net/sctp/endpointola.c14
-rw-r--r--net/sctp/proc.c12
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/socket.c3
-rw-r--r--net/tipc/socket.c7
-rw-r--r--net/unix/af_unix.c26
-rw-r--r--net/vmw_vsock/af_vsock.c8
-rw-r--r--net/vmw_vsock/vmci_transport.c34
-rw-r--r--net/vmw_vsock/vsock_addr.c10
-rw-r--r--net/vmw_vsock/vsock_addr.h2
-rw-r--r--net/wireless/core.c68
-rw-r--r--net/wireless/core.h3
-rw-r--r--net/wireless/nl80211.c52
-rw-r--r--net/wireless/scan.c24
-rw-r--r--net/wireless/sme.c10
-rw-r--r--net/wireless/trace.h5
-rw-r--r--net/wireless/wext-sme.c6
-rw-r--r--net/xfrm/xfrm_replay.c66
-rw-r--r--net/xfrm/xfrm_state.c1
-rw-r--r--net/xfrm/xfrm_user.c17
161 files changed, 4805 insertions, 3523 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 8456f5d98b85..5d9630a0eb93 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -609,8 +609,12 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
/* Delete timer and generate a final TRANSMIT_PDU event to flush out
* all pending messages before the applicant is gone. */
del_timer_sync(&app->join_timer);
+
+ spin_lock_bh(&app->lock);
garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
garp_pdu_queue(app);
+ spin_unlock_bh(&app->lock);
+
garp_queue_xmit(app);
dev_mc_del(dev, appl->proto.group_address);
diff --git a/net/atm/common.c b/net/atm/common.c
index 7b491006eaf4..737bef59ce89 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -531,6 +531,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
struct sk_buff *skb;
int copied, error = -EINVAL;
+ msg->msg_namelen = 0;
+
if (sock->state != SS_CONNECTED)
return -ENOTCONN;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 7b11f8bc5071..e277e38f736b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1642,6 +1642,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
ax25_address src;
const unsigned char *mac = skb_mac_header(skb);
+ memset(sax, 0, sizeof(struct full_sockaddr_ax25));
ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
&digi, NULL, NULL);
sax->sax25_family = AF_AX25;
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index eb0f4b16ff09..17f33a62f6db 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -397,13 +397,12 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
if (ctrl) {
u8 *assoc;
- assoc = kzalloc(assoc_len, GFP_KERNEL);
+ assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL);
if (!assoc) {
amp_ctrl_put(ctrl);
return -ENOMEM;
}
- memcpy(assoc, rsp->amp_assoc, assoc_len);
ctrl->assoc = assoc;
ctrl->assoc_len = assoc_len;
ctrl->assoc_rem_len = assoc_len;
@@ -472,13 +471,12 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req);
u8 *assoc;
- assoc = kzalloc(assoc_len, GFP_KERNEL);
+ assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
if (!assoc) {
amp_ctrl_put(ctrl);
return -ENOMEM;
}
- memcpy(assoc, req->amp_assoc, assoc_len);
ctrl->assoc = assoc;
ctrl->assoc_len = assoc_len;
ctrl->assoc_rem_len = assoc_len;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index d3ee69b35a78..e5338f787d68 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops)
}
EXPORT_SYMBOL(bt_sock_register);
-int bt_sock_unregister(int proto)
+void bt_sock_unregister(int proto)
{
- int err = 0;
-
if (proto < 0 || proto >= BT_MAX_PROTO)
- return -EINVAL;
+ return;
write_lock(&bt_proto_lock);
-
- if (!bt_proto[proto])
- err = -ENOENT;
- else
- bt_proto[proto] = NULL;
-
+ bt_proto[proto] = NULL;
write_unlock(&bt_proto_lock);
-
- return err;
}
EXPORT_SYMBOL(bt_sock_unregister);
@@ -230,6 +221,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (flags & (MSG_OOB))
return -EOPNOTSUPP;
+ msg->msg_namelen = 0;
+
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -237,8 +230,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
return err;
}
- msg->msg_namelen = 0;
-
copied = skb->len;
if (len < copied) {
msg->msg_flags |= MSG_TRUNC;
@@ -422,7 +413,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
return bt_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index e7154a58465f..5b1c04e28821 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -253,8 +253,6 @@ error:
void __exit bnep_sock_cleanup(void)
{
bt_procfs_cleanup(&init_net, "bnep");
- if (bt_sock_unregister(BTPROTO_BNEP) < 0)
- BT_ERR("Can't unregister BNEP socket");
-
+ bt_sock_unregister(BTPROTO_BNEP);
proto_unregister(&bnep_proto);
}
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 1c57482112b6..58d9edebab4b 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -264,8 +264,6 @@ error:
void cmtp_cleanup_sockets(void)
{
bt_procfs_cleanup(&init_net, "cmtp");
- if (bt_sock_unregister(BTPROTO_CMTP) < 0)
- BT_ERR("Can't unregister CMTP socket");
-
+ bt_sock_unregister(BTPROTO_CMTP);
proto_unregister(&cmtp_proto);
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 4925a02ae7e4..b9f90169940b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -117,7 +117,7 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn)
hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);
}
-void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
+void hci_disconnect(struct hci_conn *conn, __u8 reason)
{
struct hci_cp_disconnect cp;
@@ -253,7 +253,7 @@ static void hci_conn_disconnect(struct hci_conn *conn)
hci_amp_disconn(conn, reason);
break;
default:
- hci_acl_disconn(conn, reason);
+ hci_disconnect(conn, reason);
break;
}
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 60793e7b768b..cfcad5423f1c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event)
/* ---- HCI requests ---- */
-void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result)
+static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
{
- BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result);
-
- /* If this is the init phase check if the completed command matches
- * the last init command, and if not just return.
- */
- if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) {
- struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
- u16 opcode = __le16_to_cpu(sent->opcode);
- struct sk_buff *skb;
-
- /* Some CSR based controllers generate a spontaneous
- * reset complete event during init and any pending
- * command will never be completed. In such a case we
- * need to resend whatever was the last sent
- * command.
- */
-
- if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET)
- return;
-
- skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC);
- if (skb) {
- skb_queue_head(&hdev->cmd_q, skb);
- queue_work(hdev->workqueue, &hdev->cmd_work);
- }
-
- return;
- }
+ BT_DBG("%s result 0x%2.2x", hdev->name, result);
if (hdev->req_status == HCI_REQ_PEND) {
hdev->req_result = result;
@@ -107,21 +80,41 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
}
/* Execute request and wait for completion. */
-static int __hci_request(struct hci_dev *hdev,
- void (*req)(struct hci_dev *hdev, unsigned long opt),
- unsigned long opt, __u32 timeout)
+static int __hci_req_sync(struct hci_dev *hdev,
+ void (*func)(struct hci_request *req,
+ unsigned long opt),
+ unsigned long opt, __u32 timeout)
{
+ struct hci_request req;
DECLARE_WAITQUEUE(wait, current);
int err = 0;
BT_DBG("%s start", hdev->name);
+ hci_req_init(&req, hdev);
+
hdev->req_status = HCI_REQ_PEND;
+ func(&req, opt);
+
+ err = hci_req_run(&req, hci_req_sync_complete);
+ if (err < 0) {
+ hdev->req_status = 0;
+
+ /* ENODATA means the HCI request command queue is empty.
+ * This can happen when a request with conditionals doesn't
+ * trigger any commands to be sent. This is normal behavior
+ * and should not trigger an error return.
+ */
+ if (err == -ENODATA)
+ return 0;
+
+ return err;
+ }
+
add_wait_queue(&hdev->req_wait_q, &wait);
set_current_state(TASK_INTERRUPTIBLE);
- req(hdev, opt);
schedule_timeout(timeout);
remove_wait_queue(&hdev->req_wait_q, &wait);
@@ -150,9 +143,10 @@ static int __hci_request(struct hci_dev *hdev,
return err;
}
-static int hci_request(struct hci_dev *hdev,
- void (*req)(struct hci_dev *hdev, unsigned long opt),
- unsigned long opt, __u32 timeout)
+static int hci_req_sync(struct hci_dev *hdev,
+ void (*req)(struct hci_request *req,
+ unsigned long opt),
+ unsigned long opt, __u32 timeout)
{
int ret;
@@ -161,75 +155,86 @@ static int hci_request(struct hci_dev *hdev,
/* Serialize all requests */
hci_req_lock(hdev);
- ret = __hci_request(hdev, req, opt, timeout);
+ ret = __hci_req_sync(hdev, req, opt, timeout);
hci_req_unlock(hdev);
return ret;
}
-static void hci_reset_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_reset_req(struct hci_request *req, unsigned long opt)
{
- BT_DBG("%s %ld", hdev->name, opt);
+ BT_DBG("%s %ld", req->hdev->name, opt);
/* Reset device */
- set_bit(HCI_RESET, &hdev->flags);
- hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL);
+ set_bit(HCI_RESET, &req->hdev->flags);
+ hci_req_add(req, HCI_OP_RESET, 0, NULL);
}
-static void bredr_init(struct hci_dev *hdev)
+static void bredr_init(struct hci_request *req)
{
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
+ req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
/* Read Local Supported Features */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
/* Read Local Version */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+
+ /* Read BD Address */
+ hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);
}
-static void amp_init(struct hci_dev *hdev)
+static void amp_init(struct hci_request *req)
{
- hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
+ req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
/* Read Local Version */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
/* Read Local AMP Info */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
/* Read Data Blk size */
- hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
}
-static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_init1_req(struct hci_request *req, unsigned long opt)
{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_request init_req;
struct sk_buff *skb;
BT_DBG("%s %ld", hdev->name, opt);
/* Driver initialization */
+ hci_req_init(&init_req, hdev);
+
/* Special commands */
while ((skb = skb_dequeue(&hdev->driver_init))) {
bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
skb->dev = (void *) hdev;
- skb_queue_tail(&hdev->cmd_q, skb);
- queue_work(hdev->workqueue, &hdev->cmd_work);
+ if (skb_queue_empty(&init_req.cmd_q))
+ bt_cb(skb)->req.start = true;
+
+ skb_queue_tail(&init_req.cmd_q, skb);
}
skb_queue_purge(&hdev->driver_init);
+ hci_req_run(&init_req, NULL);
+
/* Reset */
if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks))
- hci_reset_req(hdev, 0);
+ hci_reset_req(req, 0);
switch (hdev->dev_type) {
case HCI_BREDR:
- bredr_init(hdev);
+ bredr_init(req);
break;
case HCI_AMP:
- amp_init(hdev);
+ amp_init(req);
break;
default:
@@ -238,44 +243,327 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
}
}
-static void hci_scan_req(struct hci_dev *hdev, unsigned long opt)
+static void bredr_setup(struct hci_request *req)
+{
+ struct hci_cp_delete_stored_link_key cp;
+ __le16 param;
+ __u8 flt_type;
+
+ /* Read Buffer Size (ACL mtu, max pkt, etc.) */
+ hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
+
+ /* Read Class of Device */
+ hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
+
+ /* Read Local Name */
+ hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL);
+
+ /* Read Voice Setting */
+ hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL);
+
+ /* Clear Event Filters */
+ flt_type = HCI_FLT_CLEAR_ALL;
+ hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
+
+ /* Connection accept timeout ~20 secs */
+ param = __constant_cpu_to_le16(0x7d00);
+ hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
+
+ bacpy(&cp.bdaddr, BDADDR_ANY);
+ cp.delete_all = 0x01;
+ hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
+
+ /* Read page scan parameters */
+ if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) {
+ hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
+ hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
+ }
+}
+
+static void le_setup(struct hci_request *req)
+{
+ /* Read LE Buffer Size */
+ hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
+
+ /* Read LE Local Supported Features */
+ hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
+
+ /* Read LE Advertising Channel TX Power */
+ hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
+
+ /* Read LE White List Size */
+ hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
+
+ /* Read LE Supported States */
+ hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
+}
+
+static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
+{
+ if (lmp_ext_inq_capable(hdev))
+ return 0x02;
+
+ if (lmp_inq_rssi_capable(hdev))
+ return 0x01;
+
+ if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
+ hdev->lmp_subver == 0x0757)
+ return 0x01;
+
+ if (hdev->manufacturer == 15) {
+ if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
+ return 0x01;
+ if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
+ return 0x01;
+ if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
+ return 0x01;
+ }
+
+ if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
+ hdev->lmp_subver == 0x1805)
+ return 0x01;
+
+ return 0x00;
+}
+
+static void hci_setup_inquiry_mode(struct hci_request *req)
+{
+ u8 mode;
+
+ mode = hci_get_inquiry_mode(req->hdev);
+
+ hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
+}
+
+static void hci_setup_event_mask(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ /* The second byte is 0xff instead of 0x9f (two reserved bits
+ * disabled) since a Broadcom 1.2 dongle doesn't respond to the
+ * command otherwise.
+ */
+ u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
+
+ /* CSR 1.1 dongles does not accept any bitfield so don't try to set
+ * any event mask for pre 1.2 devices.
+ */
+ if (hdev->hci_ver < BLUETOOTH_VER_1_2)
+ return;
+
+ if (lmp_bredr_capable(hdev)) {
+ events[4] |= 0x01; /* Flow Specification Complete */
+ events[4] |= 0x02; /* Inquiry Result with RSSI */
+ events[4] |= 0x04; /* Read Remote Extended Features Complete */
+ events[5] |= 0x08; /* Synchronous Connection Complete */
+ events[5] |= 0x10; /* Synchronous Connection Changed */
+ }
+
+ if (lmp_inq_rssi_capable(hdev))
+ events[4] |= 0x02; /* Inquiry Result with RSSI */
+
+ if (lmp_sniffsubr_capable(hdev))
+ events[5] |= 0x20; /* Sniff Subrating */
+
+ if (lmp_pause_enc_capable(hdev))
+ events[5] |= 0x80; /* Encryption Key Refresh Complete */
+
+ if (lmp_ext_inq_capable(hdev))
+ events[5] |= 0x40; /* Extended Inquiry Result */
+
+ if (lmp_no_flush_capable(hdev))
+ events[7] |= 0x01; /* Enhanced Flush Complete */
+
+ if (lmp_lsto_capable(hdev))
+ events[6] |= 0x80; /* Link Supervision Timeout Changed */
+
+ if (lmp_ssp_capable(hdev)) {
+ events[6] |= 0x01; /* IO Capability Request */
+ events[6] |= 0x02; /* IO Capability Response */
+ events[6] |= 0x04; /* User Confirmation Request */
+ events[6] |= 0x08; /* User Passkey Request */
+ events[6] |= 0x10; /* Remote OOB Data Request */
+ events[6] |= 0x20; /* Simple Pairing Complete */
+ events[7] |= 0x04; /* User Passkey Notification */
+ events[7] |= 0x08; /* Keypress Notification */
+ events[7] |= 0x10; /* Remote Host Supported
+ * Features Notification
+ */
+ }
+
+ if (lmp_le_capable(hdev))
+ events[7] |= 0x20; /* LE Meta-Event */
+
+ hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
+
+ if (lmp_le_capable(hdev)) {
+ memset(events, 0, sizeof(events));
+ events[0] = 0x1f;
+ hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK,
+ sizeof(events), events);
+ }
+}
+
+static void hci_init2_req(struct hci_request *req, unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ if (lmp_bredr_capable(hdev))
+ bredr_setup(req);
+
+ if (lmp_le_capable(hdev))
+ le_setup(req);
+
+ hci_setup_event_mask(req);
+
+ if (hdev->hci_ver > BLUETOOTH_VER_1_1)
+ hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
+
+ if (lmp_ssp_capable(hdev)) {
+ if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+ u8 mode = 0x01;
+ hci_req_add(req, HCI_OP_WRITE_SSP_MODE,
+ sizeof(mode), &mode);
+ } else {
+ struct hci_cp_write_eir cp;
+
+ memset(hdev->eir, 0, sizeof(hdev->eir));
+ memset(&cp, 0, sizeof(cp));
+
+ hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+ }
+ }
+
+ if (lmp_inq_rssi_capable(hdev))
+ hci_setup_inquiry_mode(req);
+
+ if (lmp_inq_tx_pwr_capable(hdev))
+ hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
+
+ if (lmp_ext_feat_capable(hdev)) {
+ struct hci_cp_read_local_ext_features cp;
+
+ cp.page = 0x01;
+ hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
+ sizeof(cp), &cp);
+ }
+
+ if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
+ u8 enable = 1;
+ hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
+ &enable);
+ }
+}
+
+static void hci_setup_link_policy(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_def_link_policy cp;
+ u16 link_policy = 0;
+
+ if (lmp_rswitch_capable(hdev))
+ link_policy |= HCI_LP_RSWITCH;
+ if (lmp_hold_capable(hdev))
+ link_policy |= HCI_LP_HOLD;
+ if (lmp_sniff_capable(hdev))
+ link_policy |= HCI_LP_SNIFF;
+ if (lmp_park_capable(hdev))
+ link_policy |= HCI_LP_PARK;
+
+ cp.policy = cpu_to_le16(link_policy);
+ hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
+}
+
+static void hci_set_le_support(struct hci_request *req)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_le_host_supported cp;
+
+ memset(&cp, 0, sizeof(cp));
+
+ if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+ cp.le = 0x01;
+ cp.simul = lmp_le_br_capable(hdev);
+ }
+
+ if (cp.le != lmp_host_le_capable(hdev))
+ hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
+ &cp);
+}
+
+static void hci_init3_req(struct hci_request *req, unsigned long opt)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ if (hdev->commands[5] & 0x10)
+ hci_setup_link_policy(req);
+
+ if (lmp_le_capable(hdev)) {
+ hci_set_le_support(req);
+ hci_update_ad(req);
+ }
+}
+
+static int __hci_init(struct hci_dev *hdev)
+{
+ int err;
+
+ err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT);
+ if (err < 0)
+ return err;
+
+ /* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
+ * BR/EDR/LE type controllers. AMP controllers only need the
+ * first stage init.
+ */
+ if (hdev->dev_type != HCI_BREDR)
+ return 0;
+
+ err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
+ if (err < 0)
+ return err;
+
+ return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT);
+}
+
+static void hci_scan_req(struct hci_request *req, unsigned long opt)
{
__u8 scan = opt;
- BT_DBG("%s %x", hdev->name, scan);
+ BT_DBG("%s %x", req->hdev->name, scan);
/* Inquiry and Page scans */
- hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
-static void hci_auth_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_auth_req(struct hci_request *req, unsigned long opt)
{
__u8 auth = opt;
- BT_DBG("%s %x", hdev->name, auth);
+ BT_DBG("%s %x", req->hdev->name, auth);
/* Authentication */
- hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
+ hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
}
-static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_encrypt_req(struct hci_request *req, unsigned long opt)
{
__u8 encrypt = opt;
- BT_DBG("%s %x", hdev->name, encrypt);
+ BT_DBG("%s %x", req->hdev->name, encrypt);
/* Encryption */
- hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
+ hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
}
-static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_linkpol_req(struct hci_request *req, unsigned long opt)
{
__le16 policy = cpu_to_le16(opt);
- BT_DBG("%s %x", hdev->name, policy);
+ BT_DBG("%s %x", req->hdev->name, policy);
/* Default link policy */
- hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
+ hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
}
/* Get HCI device by index.
@@ -512,9 +800,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
return copied;
}
-static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
+static void hci_inq_req(struct hci_request *req, unsigned long opt)
{
struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_inquiry cp;
BT_DBG("%s", hdev->name);
@@ -526,7 +815,7 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
memcpy(&cp.lap, &ir->lap, 3);
cp.length = ir->length;
cp.num_rsp = ir->num_rsp;
- hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
}
int hci_inquiry(void __user *arg)
@@ -556,7 +845,8 @@ int hci_inquiry(void __user *arg)
timeo = ir.length * msecs_to_jiffies(2000);
if (do_inquiry) {
- err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo);
+ err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir,
+ timeo);
if (err < 0)
goto done;
}
@@ -654,39 +944,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr)
return ad_len;
}
-int hci_update_ad(struct hci_dev *hdev)
+void hci_update_ad(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_le_set_adv_data cp;
u8 len;
- int err;
- hci_dev_lock(hdev);
-
- if (!lmp_le_capable(hdev)) {
- err = -EINVAL;
- goto unlock;
- }
+ if (!lmp_le_capable(hdev))
+ return;
memset(&cp, 0, sizeof(cp));
len = create_ad(hdev, cp.data);
if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0) {
- err = 0;
- goto unlock;
- }
+ memcmp(cp.data, hdev->adv_data, len) == 0)
+ return;
memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
hdev->adv_data_len = len;
cp.length = len;
- err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
-unlock:
- hci_dev_unlock(hdev);
-
- return err;
+ hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
}
/* ---- HCI ioctl helpers ---- */
@@ -735,10 +1015,7 @@ int hci_dev_open(__u16 dev)
if (!test_bit(HCI_RAW, &hdev->flags)) {
atomic_set(&hdev->cmd_cnt, 1);
set_bit(HCI_INIT, &hdev->flags);
- hdev->init_last_cmd = 0;
-
- ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT);
-
+ ret = __hci_init(hdev);
clear_bit(HCI_INIT, &hdev->flags);
}
@@ -746,7 +1023,6 @@ int hci_dev_open(__u16 dev)
hci_dev_hold(hdev);
set_bit(HCI_UP, &hdev->flags);
hci_notify(hdev, HCI_DEV_UP);
- hci_update_ad(hdev);
if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
mgmt_valid_hdev(hdev)) {
hci_dev_lock(hdev);
@@ -828,7 +1104,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
if (!test_bit(HCI_RAW, &hdev->flags) &&
test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
set_bit(HCI_INIT, &hdev->flags);
- __hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
+ __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
clear_bit(HCI_INIT, &hdev->flags);
}
@@ -851,6 +1127,10 @@ static int hci_dev_do_close(struct hci_dev *hdev)
* and no tasks are scheduled. */
hdev->close(hdev);
+ /* Clear flags */
+ hdev->flags = 0;
+ hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
+
if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
mgmt_valid_hdev(hdev)) {
hci_dev_lock(hdev);
@@ -858,9 +1138,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hci_dev_unlock(hdev);
}
- /* Clear flags */
- hdev->flags = 0;
-
/* Controller radio is available but is currently powered down */
hdev->amp_status = 0;
@@ -921,7 +1198,7 @@ int hci_dev_reset(__u16 dev)
hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
if (!test_bit(HCI_RAW, &hdev->flags))
- ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
+ ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
done:
hci_req_unlock(hdev);
@@ -960,8 +1237,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
switch (cmd) {
case HCISETAUTH:
- err = hci_request(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
break;
case HCISETENCRYPT:
@@ -972,24 +1249,24 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
if (!test_bit(HCI_AUTH, &hdev->flags)) {
/* Auth must be enabled first */
- err = hci_request(hdev, hci_auth_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
if (err)
break;
}
- err = hci_request(hdev, hci_encrypt_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
break;
case HCISETSCAN:
- err = hci_request(hdev, hci_scan_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
break;
case HCISETLINKPOL:
- err = hci_request(hdev, hci_linkpol_req, dr.dev_opt,
- HCI_INIT_TIMEOUT);
+ err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
+ HCI_INIT_TIMEOUT);
break;
case HCISETLINKMODE:
@@ -1566,7 +1843,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
return mgmt_device_unblocked(hdev, bdaddr, type);
}
-static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)
+static void le_scan_param_req(struct hci_request *req, unsigned long opt)
{
struct le_scan_params *param = (struct le_scan_params *) opt;
struct hci_cp_le_set_scan_param cp;
@@ -1576,10 +1853,10 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)
cp.interval = cpu_to_le16(param->interval);
cp.window = cpu_to_le16(param->window);
- hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
}
-static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt)
+static void le_scan_enable_req(struct hci_request *req, unsigned long opt)
{
struct hci_cp_le_set_scan_enable cp;
@@ -1587,7 +1864,7 @@ static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt)
cp.enable = 1;
cp.filter_dup = 1;
- hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
}
static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
@@ -1608,10 +1885,10 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
hci_req_lock(hdev);
- err = __hci_request(hdev, le_scan_param_req, (unsigned long) &param,
- timeo);
+ err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) &param,
+ timeo);
if (!err)
- err = __hci_request(hdev, le_scan_enable_req, 0, timeo);
+ err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo);
hci_req_unlock(hdev);
@@ -2160,20 +2437,55 @@ static int hci_send_frame(struct sk_buff *skb)
return hdev->send(skb);
}
-/* Send HCI command */
-int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
+void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
+{
+ skb_queue_head_init(&req->cmd_q);
+ req->hdev = hdev;
+ req->err = 0;
+}
+
+int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ BT_DBG("length %u", skb_queue_len(&req->cmd_q));
+
+ /* If an error occured during request building, remove all HCI
+ * commands queued on the HCI request queue.
+ */
+ if (req->err) {
+ skb_queue_purge(&req->cmd_q);
+ return req->err;
+ }
+
+ /* Do not allow empty requests */
+ if (skb_queue_empty(&req->cmd_q))
+ return -ENODATA;
+
+ skb = skb_peek_tail(&req->cmd_q);
+ bt_cb(skb)->req.complete = complete;
+
+ spin_lock_irqsave(&hdev->cmd_q.lock, flags);
+ skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
+ spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+
+ queue_work(hdev->workqueue, &hdev->cmd_work);
+
+ return 0;
+}
+
+static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
+ u32 plen, void *param)
{
int len = HCI_COMMAND_HDR_SIZE + plen;
struct hci_command_hdr *hdr;
struct sk_buff *skb;
- BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
-
skb = bt_skb_alloc(len, GFP_ATOMIC);
- if (!skb) {
- BT_ERR("%s no memory for command", hdev->name);
- return -ENOMEM;
- }
+ if (!skb)
+ return NULL;
hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
hdr->opcode = cpu_to_le16(opcode);
@@ -2187,8 +2499,26 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
skb->dev = (void *) hdev;
- if (test_bit(HCI_INIT, &hdev->flags))
- hdev->init_last_cmd = opcode;
+ return skb;
+}
+
+/* Send HCI command */
+int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
+{
+ struct sk_buff *skb;
+
+ BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+
+ skb = hci_prepare_cmd(hdev, opcode, plen, param);
+ if (!skb) {
+ BT_ERR("%s no memory for command", hdev->name);
+ return -ENOMEM;
+ }
+
+ /* Stand-alone HCI commands must be flaged as
+ * single-command requests.
+ */
+ bt_cb(skb)->req.start = true;
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -2196,6 +2526,34 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
return 0;
}
+/* Queue a command to an asynchronous HCI request */
+void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, void *param)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct sk_buff *skb;
+
+ BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+
+ /* If an error occured during request building, there is no point in
+ * queueing the HCI command. We can simply return.
+ */
+ if (req->err)
+ return;
+
+ skb = hci_prepare_cmd(hdev, opcode, plen, param);
+ if (!skb) {
+ BT_ERR("%s no memory for command (opcode 0x%4.4x)",
+ hdev->name, opcode);
+ req->err = -ENOMEM;
+ return;
+ }
+
+ if (skb_queue_empty(&req->cmd_q))
+ bt_cb(skb)->req.start = true;
+
+ skb_queue_tail(&req->cmd_q, skb);
+}
+
/* Get data from the previously sent command */
void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
{
@@ -2398,7 +2756,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
if (c->type == type && c->sent) {
BT_ERR("%s killing stalled connection %pMR",
hdev->name, &c->dst);
- hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM);
+ hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
}
}
@@ -2860,6 +3218,123 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
kfree_skb(skb);
}
+static bool hci_req_is_complete(struct hci_dev *hdev)
+{
+ struct sk_buff *skb;
+
+ skb = skb_peek(&hdev->cmd_q);
+ if (!skb)
+ return true;
+
+ return bt_cb(skb)->req.start;
+}
+
+static void hci_resend_last(struct hci_dev *hdev)
+{
+ struct hci_command_hdr *sent;
+ struct sk_buff *skb;
+ u16 opcode;
+
+ if (!hdev->sent_cmd)
+ return;
+
+ sent = (void *) hdev->sent_cmd->data;
+ opcode = __le16_to_cpu(sent->opcode);
+ if (opcode == HCI_OP_RESET)
+ return;
+
+ skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ skb_queue_head(&hdev->cmd_q, skb);
+ queue_work(hdev->workqueue, &hdev->cmd_work);
+}
+
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
+{
+ hci_req_complete_t req_complete = NULL;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ BT_DBG("opcode 0x%04x status 0x%02x", opcode, status);
+
+ /* If the completed command doesn't match the last one that was
+ * sent we need to do special handling of it.
+ */
+ if (!hci_sent_cmd_data(hdev, opcode)) {
+ /* Some CSR based controllers generate a spontaneous
+ * reset complete event during init and any pending
+ * command will never be completed. In such a case we
+ * need to resend whatever was the last sent
+ * command.
+ */
+ if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET)
+ hci_resend_last(hdev);
+
+ return;
+ }
+
+ /* If the command succeeded and there's still more commands in
+ * this request the request is not yet complete.
+ */
+ if (!status && !hci_req_is_complete(hdev))
+ return;
+
+ /* If this was the last command in a request the complete
+ * callback would be found in hdev->sent_cmd instead of the
+ * command queue (hdev->cmd_q).
+ */
+ if (hdev->sent_cmd) {
+ req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+ if (req_complete)
+ goto call_complete;
+ }
+
+ /* Remove all pending commands belonging to this request */
+ spin_lock_irqsave(&hdev->cmd_q.lock, flags);
+ while ((skb = __skb_dequeue(&hdev->cmd_q))) {
+ if (bt_cb(skb)->req.start) {
+ __skb_queue_head(&hdev->cmd_q, skb);
+ break;
+ }
+
+ req_complete = bt_cb(skb)->req.complete;
+ kfree_skb(skb);
+ }
+ spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+
+call_complete:
+ if (req_complete)
+ req_complete(hdev, status);
+}
+
+void hci_req_cmd_status(struct hci_dev *hdev, u16 opcode, u8 status)
+{
+ hci_req_complete_t req_complete = NULL;
+
+ BT_DBG("opcode 0x%04x status 0x%02x", opcode, status);
+
+ if (status) {
+ hci_req_cmd_complete(hdev, opcode, status);
+ return;
+ }
+
+ /* No need to handle success status if there are more commands */
+ if (!hci_req_is_complete(hdev))
+ return;
+
+ if (hdev->sent_cmd)
+ req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+
+ /* If the request doesn't have a complete callback or there
+ * are other commands/requests in the hdev queue we consider
+ * this request as completed.
+ */
+ if (!req_complete || !skb_queue_empty(&hdev->cmd_q))
+ hci_req_cmd_complete(hdev, opcode, status);
+}
+
static void hci_rx_work(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 477726a63512..138580745c2c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -53,7 +53,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
hci_dev_unlock(hdev);
- hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status);
+ hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status);
hci_conn_check_pending(hdev);
}
@@ -183,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev,
if (!status)
hdev->link_policy = get_unaligned_le16(sent);
-
- hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status);
}
static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
@@ -195,11 +193,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_RESET, &hdev->flags);
- hci_req_complete(hdev, HCI_OP_RESET, status);
-
/* Reset all non-persistent flags */
- hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) |
- BIT(HCI_PERIODIC_INQ));
+ hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
hdev->discovery.state = DISCOVERY_STOPPED;
hdev->inq_tx_power = HCI_TX_POWER_INVALID;
@@ -228,11 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH);
hci_dev_unlock(hdev);
-
- if (!status && !test_bit(HCI_INIT, &hdev->flags))
- hci_update_ad(hdev);
-
- hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status);
}
static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
@@ -270,8 +260,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
if (test_bit(HCI_MGMT, &hdev->dev_flags))
mgmt_auth_enable_complete(hdev, status);
-
- hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status);
}
static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -293,8 +281,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
else
clear_bit(HCI_ENCRYPT, &hdev->flags);
}
-
- hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status);
}
static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
@@ -343,7 +329,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
done:
hci_dev_unlock(hdev);
- hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
}
static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
@@ -435,15 +420,6 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev,
hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
}
-static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status);
-}
-
static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
{
__u8 status = *((__u8 *) skb->data);
@@ -472,211 +448,6 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
}
}
-static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
-{
- if (lmp_ext_inq_capable(hdev))
- return 2;
-
- if (lmp_inq_rssi_capable(hdev))
- return 1;
-
- if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
- hdev->lmp_subver == 0x0757)
- return 1;
-
- if (hdev->manufacturer == 15) {
- if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
- return 1;
- if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
- return 1;
- if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
- return 1;
- }
-
- if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
- hdev->lmp_subver == 0x1805)
- return 1;
-
- return 0;
-}
-
-static void hci_setup_inquiry_mode(struct hci_dev *hdev)
-{
- u8 mode;
-
- mode = hci_get_inquiry_mode(hdev);
-
- hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
-}
-
-static void hci_setup_event_mask(struct hci_dev *hdev)
-{
- /* The second byte is 0xff instead of 0x9f (two reserved bits
- * disabled) since a Broadcom 1.2 dongle doesn't respond to the
- * command otherwise */
- u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
-
- /* CSR 1.1 dongles does not accept any bitfield so don't try to set
- * any event mask for pre 1.2 devices */
- if (hdev->hci_ver < BLUETOOTH_VER_1_2)
- return;
-
- if (lmp_bredr_capable(hdev)) {
- events[4] |= 0x01; /* Flow Specification Complete */
- events[4] |= 0x02; /* Inquiry Result with RSSI */
- events[4] |= 0x04; /* Read Remote Extended Features Complete */
- events[5] |= 0x08; /* Synchronous Connection Complete */
- events[5] |= 0x10; /* Synchronous Connection Changed */
- }
-
- if (lmp_inq_rssi_capable(hdev))
- events[4] |= 0x02; /* Inquiry Result with RSSI */
-
- if (lmp_sniffsubr_capable(hdev))
- events[5] |= 0x20; /* Sniff Subrating */
-
- if (lmp_pause_enc_capable(hdev))
- events[5] |= 0x80; /* Encryption Key Refresh Complete */
-
- if (lmp_ext_inq_capable(hdev))
- events[5] |= 0x40; /* Extended Inquiry Result */
-
- if (lmp_no_flush_capable(hdev))
- events[7] |= 0x01; /* Enhanced Flush Complete */
-
- if (lmp_lsto_capable(hdev))
- events[6] |= 0x80; /* Link Supervision Timeout Changed */
-
- if (lmp_ssp_capable(hdev)) {
- events[6] |= 0x01; /* IO Capability Request */
- events[6] |= 0x02; /* IO Capability Response */
- events[6] |= 0x04; /* User Confirmation Request */
- events[6] |= 0x08; /* User Passkey Request */
- events[6] |= 0x10; /* Remote OOB Data Request */
- events[6] |= 0x20; /* Simple Pairing Complete */
- events[7] |= 0x04; /* User Passkey Notification */
- events[7] |= 0x08; /* Keypress Notification */
- events[7] |= 0x10; /* Remote Host Supported
- * Features Notification */
- }
-
- if (lmp_le_capable(hdev))
- events[7] |= 0x20; /* LE Meta-Event */
-
- hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
-
- if (lmp_le_capable(hdev)) {
- memset(events, 0, sizeof(events));
- events[0] = 0x1f;
- hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK,
- sizeof(events), events);
- }
-}
-
-static void bredr_setup(struct hci_dev *hdev)
-{
- struct hci_cp_delete_stored_link_key cp;
- __le16 param;
- __u8 flt_type;
-
- /* Read Buffer Size (ACL mtu, max pkt, etc.) */
- hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
-
- /* Read Class of Device */
- hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
-
- /* Read Local Name */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL);
-
- /* Read Voice Setting */
- hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL);
-
- /* Clear Event Filters */
- flt_type = HCI_FLT_CLEAR_ALL;
- hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
-
- /* Connection accept timeout ~20 secs */
- param = __constant_cpu_to_le16(0x7d00);
- hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
-
- bacpy(&cp.bdaddr, BDADDR_ANY);
- cp.delete_all = 1;
- hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
-}
-
-static void le_setup(struct hci_dev *hdev)
-{
- /* Read LE Buffer Size */
- hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
-
- /* Read LE Local Supported Features */
- hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
-
- /* Read LE Advertising Channel TX Power */
- hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
-
- /* Read LE White List Size */
- hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
-
- /* Read LE Supported States */
- hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
-}
-
-static void hci_setup(struct hci_dev *hdev)
-{
- if (hdev->dev_type != HCI_BREDR)
- return;
-
- /* Read BD Address */
- hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL);
-
- if (lmp_bredr_capable(hdev))
- bredr_setup(hdev);
-
- if (lmp_le_capable(hdev))
- le_setup(hdev);
-
- hci_setup_event_mask(hdev);
-
- if (hdev->hci_ver > BLUETOOTH_VER_1_1)
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
-
- if (lmp_ssp_capable(hdev)) {
- if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
- u8 mode = 0x01;
- hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE,
- sizeof(mode), &mode);
- } else {
- struct hci_cp_write_eir cp;
-
- memset(hdev->eir, 0, sizeof(hdev->eir));
- memset(&cp, 0, sizeof(cp));
-
- hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
- }
- }
-
- if (lmp_inq_rssi_capable(hdev))
- hci_setup_inquiry_mode(hdev);
-
- if (lmp_inq_tx_pwr_capable(hdev))
- hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
-
- if (lmp_ext_feat_capable(hdev)) {
- struct hci_cp_read_local_ext_features cp;
-
- cp.page = 0x01;
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp),
- &cp);
- }
-
- if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
- u8 enable = 1;
- hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
- &enable);
- }
-}
-
static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -684,7 +455,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
if (rp->status)
- goto done;
+ return;
hdev->hci_ver = rp->hci_ver;
hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
@@ -694,30 +465,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name,
hdev->manufacturer, hdev->hci_ver, hdev->hci_rev);
-
- if (test_bit(HCI_INIT, &hdev->flags))
- hci_setup(hdev);
-
-done:
- hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status);
-}
-
-static void hci_setup_link_policy(struct hci_dev *hdev)
-{
- struct hci_cp_write_def_link_policy cp;
- u16 link_policy = 0;
-
- if (lmp_rswitch_capable(hdev))
- link_policy |= HCI_LP_RSWITCH;
- if (lmp_hold_capable(hdev))
- link_policy |= HCI_LP_HOLD;
- if (lmp_sniff_capable(hdev))
- link_policy |= HCI_LP_SNIFF;
- if (lmp_park_capable(hdev))
- link_policy |= HCI_LP_PARK;
-
- cp.policy = cpu_to_le16(link_policy);
- hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
}
static void hci_cc_read_local_commands(struct hci_dev *hdev,
@@ -727,16 +474,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (rp->status)
- goto done;
-
- memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
-
- if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10))
- hci_setup_link_policy(hdev);
-
-done:
- hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status);
+ if (!rp->status)
+ memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
}
static void hci_cc_read_local_features(struct hci_dev *hdev,
@@ -795,22 +534,6 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
hdev->features[6], hdev->features[7]);
}
-static void hci_set_le_support(struct hci_dev *hdev)
-{
- struct hci_cp_write_le_host_supported cp;
-
- memset(&cp, 0, sizeof(cp));
-
- if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
- cp.le = 1;
- cp.simul = lmp_le_br_capable(hdev);
- }
-
- if (cp.le != lmp_host_le_capable(hdev))
- hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
- &cp);
-}
-
static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -819,7 +542,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
if (rp->status)
- goto done;
+ return;
switch (rp->page) {
case 0:
@@ -829,12 +552,6 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
memcpy(hdev->host_features, rp->features, 8);
break;
}
-
- if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev))
- hci_set_le_support(hdev);
-
-done:
- hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status);
}
static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
@@ -844,12 +561,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (rp->status)
- return;
-
- hdev->flow_ctl_mode = rp->mode;
-
- hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status);
+ if (!rp->status)
+ hdev->flow_ctl_mode = rp->mode;
}
static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
@@ -886,8 +599,65 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
if (!rp->status)
bacpy(&hdev->bdaddr, &rp->bdaddr);
+}
+
+static void hci_cc_read_page_scan_activity(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_rp_read_page_scan_activity *rp = (void *) skb->data;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) {
+ hdev->page_scan_interval = __le16_to_cpu(rp->interval);
+ hdev->page_scan_window = __le16_to_cpu(rp->window);
+ }
+}
+
+static void hci_cc_write_page_scan_activity(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ u8 status = *((u8 *) skb->data);
+ struct hci_cp_write_page_scan_activity *sent;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY);
+ if (!sent)
+ return;
+
+ hdev->page_scan_interval = __le16_to_cpu(sent->interval);
+ hdev->page_scan_window = __le16_to_cpu(sent->window);
+}
+
+static void hci_cc_read_page_scan_type(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_rp_read_page_scan_type *rp = (void *) skb->data;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ if (test_bit(HCI_INIT, &hdev->flags) && !rp->status)
+ hdev->page_scan_type = rp->type;
+}
+
+static void hci_cc_write_page_scan_type(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ u8 status = *((u8 *) skb->data);
+ u8 *type;
- hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status);
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ type = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE);
+ if (type)
+ hdev->page_scan_type = *type;
}
static void hci_cc_read_data_block_size(struct hci_dev *hdev,
@@ -908,17 +678,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev,
BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,
hdev->block_cnt, hdev->block_len);
-
- hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status);
-}
-
-static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
}
static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
@@ -942,8 +701,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
- hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status);
-
a2mp_rsp:
a2mp_send_getinfo_rsp(hdev);
}
@@ -985,35 +742,6 @@ a2mp_rsp:
a2mp_send_create_phy_link_req(hdev, rp->status);
}
-static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status);
-}
-
-static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status);
-}
-
-static void hci_cc_write_inquiry_mode(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status);
-}
-
static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -1023,17 +751,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
if (!rp->status)
hdev->inq_tx_power = rp->tx_power;
-
- hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status);
-}
-
-static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);
}
static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1095,8 +812,6 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,
hdev->le_cnt = hdev->le_pkts;
BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
-
- hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);
}
static void hci_cc_le_read_local_features(struct hci_dev *hdev,
@@ -1108,8 +823,6 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev,
if (!rp->status)
memcpy(hdev->le_features, rp->features, 8);
-
- hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status);
}
static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
@@ -1119,22 +832,8 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
- if (!rp->status) {
+ if (!rp->status)
hdev->adv_tx_power = rp->tx_power;
- if (!test_bit(HCI_INIT, &hdev->flags))
- hci_update_ad(hdev);
- }
-
- hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status);
-}
-
-static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
-{
- __u8 status = *((__u8 *) skb->data);
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status);
}
static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1231,12 +930,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags);
}
- hci_dev_unlock(hdev);
+ if (!test_bit(HCI_INIT, &hdev->flags)) {
+ struct hci_request req;
- if (!test_bit(HCI_INIT, &hdev->flags))
- hci_update_ad(hdev);
+ hci_req_init(&req, hdev);
+ hci_update_ad(&req);
+ hci_req_run(&req, NULL);
+ }
- hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status);
+ hci_dev_unlock(hdev);
}
static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1245,8 +947,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status);
-
if (status) {
hci_dev_lock(hdev);
mgmt_start_discovery_failed(hdev, status);
@@ -1269,8 +969,6 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
switch (cp->enable) {
case LE_SCANNING_ENABLED:
- hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status);
-
if (status) {
hci_dev_lock(hdev);
mgmt_start_discovery_failed(hdev, status);
@@ -1321,32 +1019,6 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
if (!rp->status)
hdev->le_white_list_size = rp->size;
-
- hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status);
-}
-
-static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb)
-{
- struct hci_rp_le_ltk_reply *rp = (void *) skb->data;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
-
- if (rp->status)
- return;
-
- hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status);
-}
-
-static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
-{
- struct hci_rp_le_ltk_neg_reply *rp = (void *) skb->data;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
-
- if (rp->status)
- return;
-
- hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status);
}
static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
@@ -1358,8 +1030,6 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
if (!rp->status)
memcpy(hdev->le_states, rp->le_states, 8);
-
- hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status);
}
static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
@@ -1389,8 +1059,6 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
!test_bit(HCI_INIT, &hdev->flags))
mgmt_le_enable_complete(hdev, sent->le, status);
-
- hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status);
}
static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
@@ -1412,7 +1080,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
if (status) {
- hci_req_complete(hdev, HCI_OP_INQUIRY, status);
hci_conn_check_pending(hdev);
hci_dev_lock(hdev);
if (test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -1884,11 +1551,6 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
}
}
-static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
-{
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-}
-
static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status)
{
struct hci_cp_create_phy_link *cp;
@@ -1930,11 +1592,6 @@ static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status)
amp_write_remote_assoc(hdev, cp->phy_handle);
}
-static void hci_cs_create_logical_link(struct hci_dev *hdev, u8 status)
-{
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-}
-
static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
__u8 status = *((__u8 *) skb->data);
@@ -1943,7 +1600,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- hci_req_complete(hdev, HCI_OP_INQUIRY, status);
+ hci_req_cmd_complete(hdev, HCI_OP_INQUIRY, status);
hci_conn_check_pending(hdev);
@@ -2399,7 +2056,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
if (ev->status && conn->state == BT_CONNECTED) {
- hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
hci_conn_put(conn);
goto unlock;
}
@@ -2491,20 +2148,10 @@ unlock:
hci_dev_unlock(hdev);
}
-static void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- BT_DBG("%s", hdev->name);
-}
-
-static void hci_qos_setup_complete_evt(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- BT_DBG("%s", hdev->name);
-}
-
static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_cmd_complete *ev = (void *) skb->data;
+ u8 status = skb->data[sizeof(*ev)];
__u16 opcode;
skb_pull(skb, sizeof(*ev));
@@ -2588,10 +2235,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_write_voice_setting(hdev, skb);
break;
- case HCI_OP_HOST_BUFFER_SIZE:
- hci_cc_host_buffer_size(hdev, skb);
- break;
-
case HCI_OP_WRITE_SSP_MODE:
hci_cc_write_ssp_mode(hdev, skb);
break;
@@ -2620,46 +2263,42 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_read_bd_addr(hdev, skb);
break;
- case HCI_OP_READ_DATA_BLOCK_SIZE:
- hci_cc_read_data_block_size(hdev, skb);
+ case HCI_OP_READ_PAGE_SCAN_ACTIVITY:
+ hci_cc_read_page_scan_activity(hdev, skb);
break;
- case HCI_OP_WRITE_CA_TIMEOUT:
- hci_cc_write_ca_timeout(hdev, skb);
+ case HCI_OP_WRITE_PAGE_SCAN_ACTIVITY:
+ hci_cc_write_page_scan_activity(hdev, skb);
break;
- case HCI_OP_READ_FLOW_CONTROL_MODE:
- hci_cc_read_flow_control_mode(hdev, skb);
+ case HCI_OP_READ_PAGE_SCAN_TYPE:
+ hci_cc_read_page_scan_type(hdev, skb);
break;
- case HCI_OP_READ_LOCAL_AMP_INFO:
- hci_cc_read_local_amp_info(hdev, skb);
+ case HCI_OP_WRITE_PAGE_SCAN_TYPE:
+ hci_cc_write_page_scan_type(hdev, skb);
break;
- case HCI_OP_READ_LOCAL_AMP_ASSOC:
- hci_cc_read_local_amp_assoc(hdev, skb);
+ case HCI_OP_READ_DATA_BLOCK_SIZE:
+ hci_cc_read_data_block_size(hdev, skb);
break;
- case HCI_OP_DELETE_STORED_LINK_KEY:
- hci_cc_delete_stored_link_key(hdev, skb);
+ case HCI_OP_READ_FLOW_CONTROL_MODE:
+ hci_cc_read_flow_control_mode(hdev, skb);
break;
- case HCI_OP_SET_EVENT_MASK:
- hci_cc_set_event_mask(hdev, skb);
+ case HCI_OP_READ_LOCAL_AMP_INFO:
+ hci_cc_read_local_amp_info(hdev, skb);
break;
- case HCI_OP_WRITE_INQUIRY_MODE:
- hci_cc_write_inquiry_mode(hdev, skb);
+ case HCI_OP_READ_LOCAL_AMP_ASSOC:
+ hci_cc_read_local_amp_assoc(hdev, skb);
break;
case HCI_OP_READ_INQ_RSP_TX_POWER:
hci_cc_read_inq_rsp_tx_power(hdev, skb);
break;
- case HCI_OP_SET_EVENT_FLT:
- hci_cc_set_event_flt(hdev, skb);
- break;
-
case HCI_OP_PIN_CODE_REPLY:
hci_cc_pin_code_reply(hdev, skb);
break;
@@ -2684,10 +2323,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_le_read_adv_tx_power(hdev, skb);
break;
- case HCI_OP_LE_SET_EVENT_MASK:
- hci_cc_le_set_event_mask(hdev, skb);
- break;
-
case HCI_OP_USER_CONFIRM_REPLY:
hci_cc_user_confirm_reply(hdev, skb);
break;
@@ -2720,14 +2355,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cc_le_read_white_list_size(hdev, skb);
break;
- case HCI_OP_LE_LTK_REPLY:
- hci_cc_le_ltk_reply(hdev, skb);
- break;
-
- case HCI_OP_LE_LTK_NEG_REPLY:
- hci_cc_le_ltk_neg_reply(hdev, skb);
- break;
-
case HCI_OP_LE_READ_SUPPORTED_STATES:
hci_cc_le_read_supported_states(hdev, skb);
break;
@@ -2745,9 +2372,11 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
break;
}
- if (ev->opcode != HCI_OP_NOP)
+ if (opcode != HCI_OP_NOP)
del_timer(&hdev->cmd_timer);
+ hci_req_cmd_complete(hdev, opcode, status);
+
if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
atomic_set(&hdev->cmd_cnt, 1);
if (!skb_queue_empty(&hdev->cmd_q))
@@ -2817,10 +2446,6 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cs_le_create_conn(hdev, ev->status);
break;
- case HCI_OP_LE_START_ENC:
- hci_cs_le_start_enc(hdev, ev->status);
- break;
-
case HCI_OP_CREATE_PHY_LINK:
hci_cs_create_phylink(hdev, ev->status);
break;
@@ -2829,18 +2454,16 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_cs_accept_phylink(hdev, ev->status);
break;
- case HCI_OP_CREATE_LOGICAL_LINK:
- hci_cs_create_logical_link(hdev, ev->status);
- break;
-
default:
BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
break;
}
- if (ev->opcode != HCI_OP_NOP)
+ if (opcode != HCI_OP_NOP)
del_timer(&hdev->cmd_timer);
+ hci_req_cmd_status(hdev, opcode, ev->status);
+
if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
atomic_set(&hdev->cmd_cnt, 1);
if (!skb_queue_empty(&hdev->cmd_q))
@@ -3391,18 +3014,6 @@ unlock:
hci_dev_unlock(hdev);
}
-static void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- BT_DBG("%s", hdev->name);
-}
-
-static void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- struct hci_ev_sniff_subrate *ev = (void *) skb->data;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
-}
-
static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -3472,7 +3083,7 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
if (ev->status && conn->state == BT_CONNECTED) {
- hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
hci_conn_put(conn);
goto unlock;
}
@@ -4130,14 +3741,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_remote_features_evt(hdev, skb);
break;
- case HCI_EV_REMOTE_VERSION:
- hci_remote_version_evt(hdev, skb);
- break;
-
- case HCI_EV_QOS_SETUP_COMPLETE:
- hci_qos_setup_complete_evt(hdev, skb);
- break;
-
case HCI_EV_CMD_COMPLETE:
hci_cmd_complete_evt(hdev, skb);
break;
@@ -4194,14 +3797,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_sync_conn_complete_evt(hdev, skb);
break;
- case HCI_EV_SYNC_CONN_CHANGED:
- hci_sync_conn_changed_evt(hdev, skb);
- break;
-
- case HCI_EV_SNIFF_SUBRATE:
- hci_sniff_subrate_evt(hdev, skb);
- break;
-
case HCI_EV_EXTENDED_INQUIRY_RESULT:
hci_extended_inquiry_result_evt(hdev, skb);
break;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 6a93614f2c49..aa4354fca77c 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -854,6 +854,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
skb_queue_tail(&hdev->raw_q, skb);
queue_work(hdev->workqueue, &hdev->tx_work);
} else {
+ /* Stand-alone HCI commands must be flaged as
+ * single-command requests.
+ */
+ bt_cb(skb)->req.start = true;
+
skb_queue_tail(&hdev->cmd_q, skb);
queue_work(hdev->workqueue, &hdev->cmd_work);
}
@@ -1121,8 +1126,6 @@ error:
void hci_sock_cleanup(void)
{
bt_procfs_cleanup(&init_net, "hci");
- if (bt_sock_unregister(BTPROTO_HCI) < 0)
- BT_ERR("HCI socket unregistration failed");
-
+ bt_sock_unregister(BTPROTO_HCI);
proto_unregister(&hci_sk_proto);
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 23b4e242a31a..ff38561385de 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -590,10 +590,8 @@ int __init bt_sysfs_init(void)
bt_debugfs = debugfs_create_dir("bluetooth", NULL);
bt_class = class_create(THIS_MODULE, "bluetooth");
- if (IS_ERR(bt_class))
- return PTR_ERR(bt_class);
- return 0;
+ return PTR_RET(bt_class);
}
void bt_sysfs_cleanup(void)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index a7352ff3fd1e..2342327f3335 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -311,6 +311,9 @@ static int hidp_get_raw_report(struct hid_device *hid,
int numbered_reports = hid->report_enum[report_type].numbered;
int ret;
+ if (atomic_read(&session->terminate))
+ return -EIO;
+
switch (report_type) {
case HID_FEATURE_REPORT:
report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE;
@@ -722,6 +725,7 @@ static int hidp_session(void *arg)
set_current_state(TASK_INTERRUPTIBLE);
}
set_current_state(TASK_RUNNING);
+ atomic_inc(&session->terminate);
remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 82a829d90b0f..5d0f1ca0a314 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -304,8 +304,6 @@ error:
void __exit hidp_cleanup_sockets(void)
{
bt_procfs_cleanup(&init_net, "hidp");
- if (bt_sock_unregister(BTPROTO_HIDP) < 0)
- BT_ERR("Can't unregister HIDP socket");
-
+ bt_sock_unregister(BTPROTO_HIDP);
proto_unregister(&hidp_proto);
}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bcfb8422fdc..7f9704993b74 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1312,8 +1312,6 @@ error:
void l2cap_cleanup_sockets(void)
{
bt_procfs_cleanup(&init_net, "l2cap");
- if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
- BT_ERR("L2CAP socket unregistration failed");
-
+ bt_sock_unregister(BTPROTO_L2CAP);
proto_unregister(&l2cap_proto);
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 39395c7144aa..03e7e732215f 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -384,7 +384,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_bredr_capable(hdev)) {
settings |= MGMT_SETTING_CONNECTABLE;
- settings |= MGMT_SETTING_FAST_CONNECTABLE;
+ if (hdev->hci_ver >= BLUETOOTH_VER_1_2)
+ settings |= MGMT_SETTING_FAST_CONNECTABLE;
settings |= MGMT_SETTING_DISCOVERABLE;
settings |= MGMT_SETTING_BREDR;
settings |= MGMT_SETTING_LINK_SECURITY;
@@ -409,6 +410,9 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
settings |= MGMT_SETTING_CONNECTABLE;
+ if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+ settings |= MGMT_SETTING_FAST_CONNECTABLE;
+
if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
settings |= MGMT_SETTING_DISCOVERABLE;
@@ -591,32 +595,33 @@ static void create_eir(struct hci_dev *hdev, u8 *data)
ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
}
-static int update_eir(struct hci_dev *hdev)
+static void update_eir(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_write_eir cp;
if (!hdev_is_powered(hdev))
- return 0;
+ return;
if (!lmp_ext_inq_capable(hdev))
- return 0;
+ return;
if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
- return 0;
+ return;
if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
- return 0;
+ return;
memset(&cp, 0, sizeof(cp));
create_eir(hdev, cp.data);
if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
- return 0;
+ return;
memcpy(hdev->eir, cp.data, sizeof(cp.data));
- return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
}
static u8 get_service_classes(struct hci_dev *hdev)
@@ -630,47 +635,48 @@ static u8 get_service_classes(struct hci_dev *hdev)
return val;
}
-static int update_class(struct hci_dev *hdev)
+static void update_class(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
u8 cod[3];
- int err;
BT_DBG("%s", hdev->name);
if (!hdev_is_powered(hdev))
- return 0;
+ return;
if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
- return 0;
+ return;
cod[0] = hdev->minor_class;
cod[1] = hdev->major_class;
cod[2] = get_service_classes(hdev);
if (memcmp(cod, hdev->dev_class, 3) == 0)
- return 0;
-
- err = hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
- if (err == 0)
- set_bit(HCI_PENDING_CLASS, &hdev->dev_flags);
+ return;
- return err;
+ hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
}
static void service_cache_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
service_cache.work);
+ struct hci_request req;
if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
return;
+ hci_req_init(&req, hdev);
+
hci_dev_lock(hdev);
- update_eir(hdev);
- update_class(hdev);
+ update_eir(&req);
+ update_class(&req);
hci_dev_unlock(hdev);
+
+ hci_req_run(&req, NULL);
}
static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
@@ -994,11 +1000,64 @@ failed:
return err;
}
+static void write_fast_connectable(struct hci_request *req, bool enable)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_cp_write_page_scan_activity acp;
+ u8 type;
+
+ if (hdev->hci_ver < BLUETOOTH_VER_1_2)
+ return;
+
+ if (enable) {
+ type = PAGE_SCAN_TYPE_INTERLACED;
+
+ /* 160 msec page scan interval */
+ acp.interval = __constant_cpu_to_le16(0x0100);
+ } else {
+ type = PAGE_SCAN_TYPE_STANDARD; /* default */
+
+ /* default 1.28 sec page scan */
+ acp.interval = __constant_cpu_to_le16(0x0800);
+ }
+
+ acp.window = __constant_cpu_to_le16(0x0012);
+
+ if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
+ __cpu_to_le16(hdev->page_scan_window) != acp.window)
+ hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
+ sizeof(acp), &acp);
+
+ if (hdev->page_scan_type != type)
+ hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
+}
+
+static void set_connectable_complete(struct hci_dev *hdev, u8 status)
+{
+ struct pending_cmd *cmd;
+
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
+ if (!cmd)
+ goto unlock;
+
+ send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev);
+
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_mode *cp = data;
struct pending_cmd *cmd;
+ struct hci_request req;
u8 scan;
int err;
@@ -1065,7 +1124,20 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
cancel_delayed_work(&hdev->discov_off);
}
- err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ hci_req_init(&req, hdev);
+
+ hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+
+ /* If we're going from non-connectable to connectable or
+ * vice-versa when fast connectable is enabled ensure that fast
+ * connectable gets disabled. write_fast_connectable won't do
+ * anything if the page scan parameters are already what they
+ * should be.
+ */
+ if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
+ write_fast_connectable(&req, false);
+
+ err = hci_req_run(&req, set_connectable_complete);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -1332,6 +1404,29 @@ unlock:
return err;
}
+/* This is a helper function to test for pending mgmt commands that can
+ * cause CoD or EIR HCI commands. We can only allow one such pending
+ * mgmt command at a time since otherwise we cannot easily track what
+ * the current values are, will be, and based on that calculate if a new
+ * HCI command needs to be sent and if yes with what value.
+ */
+static bool pending_eir_or_class(struct hci_dev *hdev)
+{
+ struct pending_cmd *cmd;
+
+ list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
+ switch (cmd->opcode) {
+ case MGMT_OP_ADD_UUID:
+ case MGMT_OP_REMOVE_UUID:
+ case MGMT_OP_SET_DEV_CLASS:
+ case MGMT_OP_SET_POWERED:
+ return true;
+ }
+ }
+
+ return false;
+}
+
static const u8 bluetooth_base_uuid[] = {
0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80,
0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -1351,10 +1446,37 @@ static u8 get_uuid_size(const u8 *uuid)
return 16;
}
+static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status)
+{
+ struct pending_cmd *cmd;
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_find(mgmt_op, hdev);
+ if (!cmd)
+ goto unlock;
+
+ cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status),
+ hdev->dev_class, 3);
+
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
+static void add_uuid_complete(struct hci_dev *hdev, u8 status)
+{
+ BT_DBG("status 0x%02x", status);
+
+ mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status);
+}
+
static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
{
struct mgmt_cp_add_uuid *cp = data;
struct pending_cmd *cmd;
+ struct hci_request req;
struct bt_uuid *uuid;
int err;
@@ -1362,7 +1484,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_dev_lock(hdev);
- if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
+ if (pending_eir_or_class(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID,
MGMT_STATUS_BUSY);
goto failed;
@@ -1380,23 +1502,28 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
list_add_tail(&uuid->list, &hdev->uuids);
- err = update_class(hdev);
- if (err < 0)
- goto failed;
+ hci_req_init(&req, hdev);
- err = update_eir(hdev);
- if (err < 0)
- goto failed;
+ update_class(&req);
+ update_eir(&req);
+
+ err = hci_req_run(&req, add_uuid_complete);
+ if (err < 0) {
+ if (err != -ENODATA)
+ goto failed;
- if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0,
hdev->dev_class, 3);
goto failed;
}
cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len);
- if (!cmd)
+ if (!cmd) {
err = -ENOMEM;
+ goto failed;
+ }
+
+ err = 0;
failed:
hci_dev_unlock(hdev);
@@ -1417,6 +1544,13 @@ static bool enable_service_cache(struct hci_dev *hdev)
return false;
}
+static void remove_uuid_complete(struct hci_dev *hdev, u8 status)
+{
+ BT_DBG("status 0x%02x", status);
+
+ mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status);
+}
+
static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
@@ -1424,13 +1558,14 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
struct pending_cmd *cmd;
struct bt_uuid *match, *tmp;
u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ struct hci_request req;
int err, found;
BT_DBG("request for %s", hdev->name);
hci_dev_lock(hdev);
- if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
+ if (pending_eir_or_class(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID,
MGMT_STATUS_BUSY);
goto unlock;
@@ -1466,34 +1601,47 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
}
update_class:
- err = update_class(hdev);
- if (err < 0)
- goto unlock;
+ hci_req_init(&req, hdev);
- err = update_eir(hdev);
- if (err < 0)
- goto unlock;
+ update_class(&req);
+ update_eir(&req);
+
+ err = hci_req_run(&req, remove_uuid_complete);
+ if (err < 0) {
+ if (err != -ENODATA)
+ goto unlock;
- if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0,
hdev->dev_class, 3);
goto unlock;
}
cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len);
- if (!cmd)
+ if (!cmd) {
err = -ENOMEM;
+ goto unlock;
+ }
+
+ err = 0;
unlock:
hci_dev_unlock(hdev);
return err;
}
+static void set_class_complete(struct hci_dev *hdev, u8 status)
+{
+ BT_DBG("status 0x%02x", status);
+
+ mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status);
+}
+
static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_cp_set_dev_class *cp = data;
struct pending_cmd *cmd;
+ struct hci_request req;
int err;
BT_DBG("request for %s", hdev->name);
@@ -1502,15 +1650,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
MGMT_STATUS_NOT_SUPPORTED);
- if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags))
- return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
- MGMT_STATUS_BUSY);
+ hci_dev_lock(hdev);
- if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0)
- return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
- MGMT_STATUS_INVALID_PARAMS);
+ if (pending_eir_or_class(hdev)) {
+ err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
+ MGMT_STATUS_BUSY);
+ goto unlock;
+ }
- hci_dev_lock(hdev);
+ if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) {
+ err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
+ MGMT_STATUS_INVALID_PARAMS);
+ goto unlock;
+ }
hdev->major_class = cp->major;
hdev->minor_class = cp->minor;
@@ -1521,26 +1673,34 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
+ hci_req_init(&req, hdev);
+
if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {
hci_dev_unlock(hdev);
cancel_delayed_work_sync(&hdev->service_cache);
hci_dev_lock(hdev);
- update_eir(hdev);
+ update_eir(&req);
}
- err = update_class(hdev);
- if (err < 0)
- goto unlock;
+ update_class(&req);
+
+ err = hci_req_run(&req, set_class_complete);
+ if (err < 0) {
+ if (err != -ENODATA)
+ goto unlock;
- if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0,
hdev->dev_class, 3);
goto unlock;
}
cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len);
- if (!cmd)
+ if (!cmd) {
err = -ENOMEM;
+ goto unlock;
+ }
+
+ err = 0;
unlock:
hci_dev_unlock(hdev);
@@ -2140,7 +2300,7 @@ unlock:
}
static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
- bdaddr_t *bdaddr, u8 type, u16 mgmt_op,
+ struct mgmt_addr_info *addr, u16 mgmt_op,
u16 hci_op, __le32 passkey)
{
struct pending_cmd *cmd;
@@ -2150,37 +2310,41 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
if (!hdev_is_powered(hdev)) {
- err = cmd_status(sk, hdev->id, mgmt_op,
- MGMT_STATUS_NOT_POWERED);
+ err = cmd_complete(sk, hdev->id, mgmt_op,
+ MGMT_STATUS_NOT_POWERED, addr,
+ sizeof(*addr));
goto done;
}
- if (type == BDADDR_BREDR)
- conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, bdaddr);
+ if (addr->type == BDADDR_BREDR)
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);
else
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr);
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr);
if (!conn) {
- err = cmd_status(sk, hdev->id, mgmt_op,
- MGMT_STATUS_NOT_CONNECTED);
+ err = cmd_complete(sk, hdev->id, mgmt_op,
+ MGMT_STATUS_NOT_CONNECTED, addr,
+ sizeof(*addr));
goto done;
}
- if (type == BDADDR_LE_PUBLIC || type == BDADDR_LE_RANDOM) {
+ if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
/* Continue with pairing via SMP */
err = smp_user_confirm_reply(conn, mgmt_op, passkey);
if (!err)
- err = cmd_status(sk, hdev->id, mgmt_op,
- MGMT_STATUS_SUCCESS);
+ err = cmd_complete(sk, hdev->id, mgmt_op,
+ MGMT_STATUS_SUCCESS, addr,
+ sizeof(*addr));
else
- err = cmd_status(sk, hdev->id, mgmt_op,
- MGMT_STATUS_FAILED);
+ err = cmd_complete(sk, hdev->id, mgmt_op,
+ MGMT_STATUS_FAILED, addr,
+ sizeof(*addr));
goto done;
}
- cmd = mgmt_pending_add(sk, mgmt_op, hdev, bdaddr, sizeof(*bdaddr));
+ cmd = mgmt_pending_add(sk, mgmt_op, hdev, addr, sizeof(*addr));
if (!cmd) {
err = -ENOMEM;
goto done;
@@ -2190,11 +2354,12 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
if (hci_op == HCI_OP_USER_PASSKEY_REPLY) {
struct hci_cp_user_passkey_reply cp;
- bacpy(&cp.bdaddr, bdaddr);
+ bacpy(&cp.bdaddr, &addr->bdaddr);
cp.passkey = passkey;
err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp);
} else
- err = hci_send_cmd(hdev, hci_op, sizeof(*bdaddr), bdaddr);
+ err = hci_send_cmd(hdev, hci_op, sizeof(addr->bdaddr),
+ &addr->bdaddr);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -2211,7 +2376,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev,
BT_DBG("");
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_PIN_CODE_NEG_REPLY,
HCI_OP_PIN_CODE_NEG_REPLY, 0);
}
@@ -2227,7 +2392,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data,
return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY,
MGMT_STATUS_INVALID_PARAMS);
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_USER_CONFIRM_REPLY,
HCI_OP_USER_CONFIRM_REPLY, 0);
}
@@ -2239,7 +2404,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev,
BT_DBG("");
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_USER_CONFIRM_NEG_REPLY,
HCI_OP_USER_CONFIRM_NEG_REPLY, 0);
}
@@ -2251,7 +2416,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data,
BT_DBG("");
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_USER_PASSKEY_REPLY,
HCI_OP_USER_PASSKEY_REPLY, cp->passkey);
}
@@ -2263,18 +2428,47 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev,
BT_DBG("");
- return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ return user_pairing_resp(sk, hdev, &cp->addr,
MGMT_OP_USER_PASSKEY_NEG_REPLY,
HCI_OP_USER_PASSKEY_NEG_REPLY, 0);
}
-static int update_name(struct hci_dev *hdev, const char *name)
+static void update_name(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_write_local_name cp;
- memcpy(cp.name, name, sizeof(cp.name));
+ memcpy(cp.name, hdev->dev_name, sizeof(cp.name));
+
+ hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
+}
+
+static void set_name_complete(struct hci_dev *hdev, u8 status)
+{
+ struct mgmt_cp_set_local_name *cp;
+ struct pending_cmd *cmd;
+
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
+ if (!cmd)
+ goto unlock;
+
+ cp = cmd->param;
- return hci_send_cmd(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
+ if (status)
+ cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
+ mgmt_status(status));
+ else
+ cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
+ cp, sizeof(*cp));
+
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
}
static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -2282,12 +2476,24 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
{
struct mgmt_cp_set_local_name *cp = data;
struct pending_cmd *cmd;
+ struct hci_request req;
int err;
BT_DBG("");
hci_dev_lock(hdev);
+ /* If the old values are the same as the new ones just return a
+ * direct command complete event.
+ */
+ if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) &&
+ !memcmp(hdev->short_name, cp->short_name,
+ sizeof(hdev->short_name))) {
+ err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
+ data, len);
+ goto failed;
+ }
+
memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name));
if (!hdev_is_powered(hdev)) {
@@ -2310,7 +2516,19 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
goto failed;
}
- err = update_name(hdev, cp->name);
+ memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name));
+
+ hci_req_init(&req, hdev);
+
+ if (lmp_bredr_capable(hdev)) {
+ update_name(&req);
+ update_eir(&req);
+ }
+
+ if (lmp_le_capable(hdev))
+ hci_update_ad(&req);
+
+ err = hci_req_run(&req, set_name_complete);
if (err < 0)
mgmt_pending_remove(cmd);
@@ -2698,6 +2916,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
u16 len)
{
struct mgmt_cp_set_device_id *cp = data;
+ struct hci_request req;
int err;
__u16 source;
@@ -2718,24 +2937,59 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0);
- update_eir(hdev);
+ hci_req_init(&req, hdev);
+ update_eir(&req);
+ hci_req_run(&req, NULL);
hci_dev_unlock(hdev);
return err;
}
+static void fast_connectable_complete(struct hci_dev *hdev, u8 status)
+{
+ struct pending_cmd *cmd;
+
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
+
+ cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev);
+ if (!cmd)
+ goto unlock;
+
+ if (status) {
+ cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
+ mgmt_status(status));
+ } else {
+ struct mgmt_mode *cp = cmd->param;
+
+ if (cp->val)
+ set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+ else
+ clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
+
+ send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev);
+ new_settings(hdev, cmd->sk);
+ }
+
+ mgmt_pending_remove(cmd);
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
struct mgmt_mode *cp = data;
- struct hci_cp_write_page_scan_activity acp;
- u8 type;
+ struct pending_cmd *cmd;
+ struct hci_request req;
int err;
BT_DBG("%s", hdev->name);
- if (!lmp_bredr_capable(hdev))
+ if (!lmp_bredr_capable(hdev) || hdev->hci_ver < BLUETOOTH_VER_1_2)
return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
MGMT_STATUS_NOT_SUPPORTED);
@@ -2753,40 +3007,39 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
- if (cp->val) {
- type = PAGE_SCAN_TYPE_INTERLACED;
+ if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) {
+ err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
+ MGMT_STATUS_BUSY);
+ goto unlock;
+ }
- /* 160 msec page scan interval */
- acp.interval = __constant_cpu_to_le16(0x0100);
- } else {
- type = PAGE_SCAN_TYPE_STANDARD; /* default */
+ if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) {
+ err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE,
+ hdev);
+ goto unlock;
+ }
- /* default 1.28 sec page scan */
- acp.interval = __constant_cpu_to_le16(0x0800);
+ cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev,
+ data, len);
+ if (!cmd) {
+ err = -ENOMEM;
+ goto unlock;
}
- /* default 11.25 msec page scan window */
- acp.window = __constant_cpu_to_le16(0x0012);
+ hci_req_init(&req, hdev);
- err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, sizeof(acp),
- &acp);
- if (err < 0) {
- err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
- MGMT_STATUS_FAILED);
- goto done;
- }
+ write_fast_connectable(&req, cp->val);
- err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
+ err = hci_req_run(&req, fast_connectable_complete);
if (err < 0) {
err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
MGMT_STATUS_FAILED);
- goto done;
+ mgmt_pending_remove(cmd);
}
- err = cmd_complete(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 0,
- NULL, 0);
-done:
+unlock:
hci_dev_unlock(hdev);
+
return err;
}
@@ -3043,79 +3296,115 @@ static void settings_rsp(struct pending_cmd *cmd, void *data)
mgmt_pending_free(cmd);
}
-static int set_bredr_scan(struct hci_dev *hdev)
+static void set_bredr_scan(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
u8 scan = 0;
+ /* Ensure that fast connectable is disabled. This function will
+ * not do anything if the page scan parameters are already what
+ * they should be.
+ */
+ write_fast_connectable(req, false);
+
if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
scan |= SCAN_PAGE;
if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
scan |= SCAN_INQUIRY;
- if (!scan)
- return 0;
-
- return hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ if (scan)
+ hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
}
-int mgmt_powered(struct hci_dev *hdev, u8 powered)
+static void powered_complete(struct hci_dev *hdev, u8 status)
{
struct cmd_lookup match = { NULL, hdev };
- int err;
- if (!test_bit(HCI_MGMT, &hdev->dev_flags))
- return 0;
+ BT_DBG("status 0x%02x", status);
+
+ hci_dev_lock(hdev);
mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
- if (powered) {
- u8 link_sec;
+ new_settings(hdev, match.sk);
- if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
- !lmp_host_ssp_capable(hdev)) {
- u8 ssp = 1;
+ hci_dev_unlock(hdev);
- hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
- }
+ if (match.sk)
+ sock_put(match.sk);
+}
- if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
- struct hci_cp_write_le_host_supported cp;
+static int powered_update_hci(struct hci_dev *hdev)
+{
+ struct hci_request req;
+ u8 link_sec;
- cp.le = 1;
- cp.simul = lmp_le_br_capable(hdev);
+ hci_req_init(&req, hdev);
- /* Check first if we already have the right
- * host state (host features set)
- */
- if (cp.le != lmp_host_le_capable(hdev) ||
- cp.simul != lmp_host_le_br_capable(hdev))
- hci_send_cmd(hdev,
- HCI_OP_WRITE_LE_HOST_SUPPORTED,
- sizeof(cp), &cp);
- }
+ if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
+ !lmp_host_ssp_capable(hdev)) {
+ u8 ssp = 1;
- link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
- if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
- hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE,
- sizeof(link_sec), &link_sec);
+ hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
+ }
- if (lmp_bredr_capable(hdev)) {
- set_bredr_scan(hdev);
- update_class(hdev);
- update_name(hdev, hdev->dev_name);
- update_eir(hdev);
- }
- } else {
- u8 status = MGMT_STATUS_NOT_POWERED;
- u8 zero_cod[] = { 0, 0, 0 };
+ if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+ struct hci_cp_write_le_host_supported cp;
- mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status);
+ cp.le = 1;
+ cp.simul = lmp_le_br_capable(hdev);
- if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
- mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
- zero_cod, sizeof(zero_cod), NULL);
+ /* Check first if we already have the right
+ * host state (host features set)
+ */
+ if (cp.le != lmp_host_le_capable(hdev) ||
+ cp.simul != lmp_host_le_br_capable(hdev))
+ hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED,
+ sizeof(cp), &cp);
}
+ link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
+ if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
+ hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE,
+ sizeof(link_sec), &link_sec);
+
+ if (lmp_bredr_capable(hdev)) {
+ set_bredr_scan(&req);
+ update_class(&req);
+ update_name(&req);
+ update_eir(&req);
+ }
+
+ return hci_req_run(&req, powered_complete);
+}
+
+int mgmt_powered(struct hci_dev *hdev, u8 powered)
+{
+ struct cmd_lookup match = { NULL, hdev };
+ u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
+ u8 zero_cod[] = { 0, 0, 0 };
+ int err;
+
+ if (!test_bit(HCI_MGMT, &hdev->dev_flags))
+ return 0;
+
+ if (powered) {
+ if (powered_update_hci(hdev) == 0)
+ return 0;
+
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp,
+ &match);
+ goto new_settings;
+ }
+
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status_not_powered);
+
+ if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
+ mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
+ zero_cod, sizeof(zero_cod), NULL);
+
+new_settings:
err = new_settings(hdev, match.sk);
if (match.sk)
@@ -3152,7 +3441,7 @@ int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
{
- struct cmd_lookup match = { NULL, hdev };
+ struct pending_cmd *cmd;
bool changed = false;
int err = 0;
@@ -3164,14 +3453,10 @@ int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
changed = true;
}
- mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, settings_rsp,
- &match);
+ cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
if (changed)
- err = new_settings(hdev, match.sk);
-
- if (match.sk)
- sock_put(match.sk);
+ err = new_settings(hdev, cmd ? cmd->sk : NULL);
return err;
}
@@ -3555,23 +3840,25 @@ int mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
return err;
}
-static int clear_eir(struct hci_dev *hdev)
+static void clear_eir(struct hci_request *req)
{
+ struct hci_dev *hdev = req->hdev;
struct hci_cp_write_eir cp;
if (!lmp_ext_inq_capable(hdev))
- return 0;
+ return;
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(&cp, 0, sizeof(cp));
- return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
}
int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
{
struct cmd_lookup match = { NULL, hdev };
+ struct hci_request req;
bool changed = false;
int err = 0;
@@ -3604,29 +3891,26 @@ int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
if (match.sk)
sock_put(match.sk);
+ hci_req_init(&req, hdev);
+
if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
- update_eir(hdev);
+ update_eir(&req);
else
- clear_eir(hdev);
+ clear_eir(&req);
+
+ hci_req_run(&req, NULL);
return err;
}
-static void class_rsp(struct pending_cmd *cmd, void *data)
+static void sk_lookup(struct pending_cmd *cmd, void *data)
{
struct cmd_lookup *match = data;
- cmd_complete(cmd->sk, cmd->index, cmd->opcode, match->mgmt_status,
- match->hdev->dev_class, 3);
-
- list_del(&cmd->list);
-
if (match->sk == NULL) {
match->sk = cmd->sk;
sock_hold(match->sk);
}
-
- mgmt_pending_free(cmd);
}
int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
@@ -3635,11 +3919,9 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
int err = 0;
- clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags);
-
- mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, class_rsp, &match);
- mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, class_rsp, &match);
- mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, class_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
+ mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
+ mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
if (!status)
err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
@@ -3653,55 +3935,29 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
{
- struct pending_cmd *cmd;
struct mgmt_cp_set_local_name ev;
- bool changed = false;
- int err = 0;
+ struct pending_cmd *cmd;
- if (memcmp(name, hdev->dev_name, sizeof(hdev->dev_name)) != 0) {
- memcpy(hdev->dev_name, name, sizeof(hdev->dev_name));
- changed = true;
- }
+ if (status)
+ return 0;
memset(&ev, 0, sizeof(ev));
memcpy(ev.name, name, HCI_MAX_NAME_LENGTH);
memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH);
cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
- if (!cmd)
- goto send_event;
-
- /* Always assume that either the short or the complete name has
- * changed if there was a pending mgmt command */
- changed = true;
+ if (!cmd) {
+ memcpy(hdev->dev_name, name, sizeof(hdev->dev_name));
- if (status) {
- err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
- mgmt_status(status));
- goto failed;
+ /* If this is a HCI command related to powering on the
+ * HCI dev don't send any mgmt signals.
+ */
+ if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
+ return 0;
}
- err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, &ev,
- sizeof(ev));
- if (err < 0)
- goto failed;
-
-send_event:
- if (changed)
- err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev,
- sizeof(ev), cmd ? cmd->sk : NULL);
-
- /* EIR is taken care of separately when powering on the
- * adapter so only update them here if this is a name change
- * unrelated to power on.
- */
- if (!test_bit(HCI_INIT, &hdev->flags))
- update_eir(hdev);
-
-failed:
- if (cmd)
- mgmt_pending_remove(cmd);
- return err;
+ return mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
+ cmd ? cmd->sk : NULL);
}
int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index b23e2713fea8..ca957d34b0c8 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
u8 sec_level,
int *err);
static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
-static void rfcomm_session_del(struct rfcomm_session *s);
+static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s);
/* ---- RFCOMM frame parsing macros ---- */
#define __get_dlci(b) ((b & 0xfc) >> 2)
@@ -108,12 +108,6 @@ static void rfcomm_schedule(void)
wake_up_process(rfcomm_thread);
}
-static void rfcomm_session_put(struct rfcomm_session *s)
-{
- if (atomic_dec_and_test(&s->refcnt))
- rfcomm_session_del(s);
-}
-
/* ---- RFCOMM FCS computation ---- */
/* reversed, 8-bit, poly=0x07 */
@@ -249,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
{
BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout);
- if (!mod_timer(&s->timer, jiffies + timeout))
- rfcomm_session_hold(s);
+ mod_timer(&s->timer, jiffies + timeout);
}
static void rfcomm_session_clear_timer(struct rfcomm_session *s)
{
BT_DBG("session %p state %ld", s, s->state);
- if (del_timer(&s->timer))
- rfcomm_session_put(s);
+ del_timer_sync(&s->timer);
}
/* ---- RFCOMM DLCs ---- */
@@ -336,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d)
{
BT_DBG("dlc %p session %p", d, s);
- rfcomm_session_hold(s);
-
rfcomm_session_clear_timer(s);
rfcomm_dlc_hold(d);
list_add(&d->list, &s->dlcs);
@@ -356,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
if (list_empty(&s->dlcs))
rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT);
-
- rfcomm_session_put(s);
}
static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci)
@@ -493,12 +481,34 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
int rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
{
- int r;
+ int r = 0;
+ struct rfcomm_dlc *d_list;
+ struct rfcomm_session *s, *s_list;
+
+ BT_DBG("dlc %p state %ld dlci %d err %d", d, d->state, d->dlci, err);
rfcomm_lock();
- r = __rfcomm_dlc_close(d, err);
+ s = d->session;
+ if (!s)
+ goto no_session;
+
+ /* after waiting on the mutex check the session still exists
+ * then check the dlc still exists
+ */
+ list_for_each_entry(s_list, &session_list, list) {
+ if (s_list == s) {
+ list_for_each_entry(d_list, &s->dlcs, list) {
+ if (d_list == d) {
+ r = __rfcomm_dlc_close(d, err);
+ break;
+ }
+ }
+ break;
+ }
+ }
+no_session:
rfcomm_unlock();
return r;
}
@@ -609,7 +619,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
return s;
}
-static void rfcomm_session_del(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s)
{
int state = s->state;
@@ -617,15 +627,14 @@ static void rfcomm_session_del(struct rfcomm_session *s)
list_del(&s->list);
- if (state == BT_CONNECTED)
- rfcomm_send_disc(s, 0);
-
rfcomm_session_clear_timer(s);
sock_release(s->sock);
kfree(s);
if (state != BT_LISTEN)
module_put(THIS_MODULE);
+
+ return NULL;
}
static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
@@ -644,17 +653,16 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
return NULL;
}
-static void rfcomm_session_close(struct rfcomm_session *s, int err)
+static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s,
+ int err)
{
struct rfcomm_dlc *d;
struct list_head *p, *n;
- BT_DBG("session %p state %ld err %d", s, s->state, err);
-
- rfcomm_session_hold(s);
-
s->state = BT_CLOSED;
+ BT_DBG("session %p state %ld err %d", s, s->state, err);
+
/* Close all dlcs */
list_for_each_safe(p, n, &s->dlcs) {
d = list_entry(p, struct rfcomm_dlc, list);
@@ -663,7 +671,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
}
rfcomm_session_clear_timer(s);
- rfcomm_session_put(s);
+ return rfcomm_session_del(s);
}
static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
@@ -715,8 +723,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
if (*err == 0 || *err == -EINPROGRESS)
return s;
- rfcomm_session_del(s);
- return NULL;
+ return rfcomm_session_del(s);
failed:
sock_release(sock);
@@ -1105,7 +1112,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
}
/* ---- RFCOMM frame reception ---- */
-static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
{
BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
@@ -1114,7 +1121,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);
if (!d) {
rfcomm_send_dm(s, dlci);
- return 0;
+ return s;
}
switch (d->state) {
@@ -1150,25 +1157,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
break;
case BT_DISCONN:
- /* rfcomm_session_put is called later so don't do
- * anything here otherwise we will mess up the session
- * reference counter:
- *
- * (a) when we are the initiator dlc_unlink will drive
- * the reference counter to 0 (there is no initial put
- * after session_add)
- *
- * (b) when we are not the initiator rfcomm_rx_process
- * will explicitly call put to balance the initial hold
- * done after session add.
- */
+ s = rfcomm_session_close(s, ECONNRESET);
break;
}
}
- return 0;
+ return s;
}
-static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
{
int err = 0;
@@ -1192,13 +1188,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
else
err = ECONNRESET;
- s->state = BT_CLOSED;
- rfcomm_session_close(s, err);
+ s = rfcomm_session_close(s, err);
}
- return 0;
+ return s;
}
-static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
+static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s,
+ u8 dlci)
{
int err = 0;
@@ -1227,11 +1223,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
else
err = ECONNRESET;
- s->state = BT_CLOSED;
- rfcomm_session_close(s, err);
+ s = rfcomm_session_close(s, err);
}
-
- return 0;
+ return s;
}
void rfcomm_dlc_accept(struct rfcomm_dlc *d)
@@ -1652,11 +1646,18 @@ drop:
return 0;
}
-static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
+static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s,
+ struct sk_buff *skb)
{
struct rfcomm_hdr *hdr = (void *) skb->data;
u8 type, dlci, fcs;
+ if (!s) {
+ /* no session, so free socket data */
+ kfree_skb(skb);
+ return s;
+ }
+
dlci = __get_dlci(hdr->addr);
type = __get_type(hdr->ctrl);
@@ -1667,7 +1668,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
if (__check_fcs(skb->data, type, fcs)) {
BT_ERR("bad checksum in packet");
kfree_skb(skb);
- return -EILSEQ;
+ return s;
}
if (__test_ea(hdr->len))
@@ -1683,22 +1684,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
case RFCOMM_DISC:
if (__test_pf(hdr->ctrl))
- rfcomm_recv_disc(s, dlci);
+ s = rfcomm_recv_disc(s, dlci);
break;
case RFCOMM_UA:
if (__test_pf(hdr->ctrl))
- rfcomm_recv_ua(s, dlci);
+ s = rfcomm_recv_ua(s, dlci);
break;
case RFCOMM_DM:
- rfcomm_recv_dm(s, dlci);
+ s = rfcomm_recv_dm(s, dlci);
break;
case RFCOMM_UIH:
- if (dlci)
- return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
-
+ if (dlci) {
+ rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
+ return s;
+ }
rfcomm_recv_mcc(s, skb);
break;
@@ -1707,7 +1709,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
break;
}
kfree_skb(skb);
- return 0;
+ return s;
}
/* ---- Connection and data processing ---- */
@@ -1844,7 +1846,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s)
}
}
-static void rfcomm_process_rx(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
{
struct socket *sock = s->sock;
struct sock *sk = sock->sk;
@@ -1856,17 +1858,15 @@ static void rfcomm_process_rx(struct rfcomm_session *s)
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
if (!skb_linearize(skb))
- rfcomm_recv_frame(s, skb);
+ s = rfcomm_recv_frame(s, skb);
else
kfree_skb(skb);
}
- if (sk->sk_state == BT_CLOSED) {
- if (!s->initiator)
- rfcomm_session_put(s);
+ if (s && (sk->sk_state == BT_CLOSED))
+ s = rfcomm_session_close(s, sk->sk_err);
- rfcomm_session_close(s, sk->sk_err);
- }
+ return s;
}
static void rfcomm_accept_connection(struct rfcomm_session *s)
@@ -1891,8 +1891,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
s = rfcomm_session_add(nsock, BT_OPEN);
if (s) {
- rfcomm_session_hold(s);
-
/* We should adjust MTU on incoming sessions.
* L2CAP MTU minus UIH header and FCS. */
s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu,
@@ -1903,7 +1901,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
sock_release(nsock);
}
-static void rfcomm_check_connection(struct rfcomm_session *s)
+static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s)
{
struct sock *sk = s->sock->sk;
@@ -1921,10 +1919,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s)
break;
case BT_CLOSED:
- s->state = BT_CLOSED;
- rfcomm_session_close(s, sk->sk_err);
+ s = rfcomm_session_close(s, sk->sk_err);
break;
}
+ return s;
}
static void rfcomm_process_sessions(void)
@@ -1940,7 +1938,6 @@ static void rfcomm_process_sessions(void)
if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
s->state = BT_DISCONN;
rfcomm_send_disc(s, 0);
- rfcomm_session_put(s);
continue;
}
@@ -1949,21 +1946,18 @@ static void rfcomm_process_sessions(void)
continue;
}
- rfcomm_session_hold(s);
-
switch (s->state) {
case BT_BOUND:
- rfcomm_check_connection(s);
+ s = rfcomm_check_connection(s);
break;
default:
- rfcomm_process_rx(s);
+ s = rfcomm_process_rx(s);
break;
}
- rfcomm_process_dlcs(s);
-
- rfcomm_session_put(s);
+ if (s)
+ rfcomm_process_dlcs(s);
}
rfcomm_unlock();
@@ -2010,10 +2004,11 @@ static int rfcomm_add_listener(bdaddr_t *ba)
/* Add listening session */
s = rfcomm_session_add(sock, BT_LISTEN);
- if (!s)
+ if (!s) {
+ err = -ENOMEM;
goto failed;
+ }
- rfcomm_session_hold(s);
return 0;
failed:
sock_release(sock);
@@ -2071,8 +2066,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
if (!s)
return;
- rfcomm_session_hold(s);
-
list_for_each_safe(p, n, &s->dlcs) {
d = list_entry(p, struct rfcomm_dlc, list);
@@ -2104,8 +2097,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
set_bit(RFCOMM_AUTH_REJECT, &d->flags);
}
- rfcomm_session_put(s);
-
rfcomm_schedule();
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c23bae86263b..a8638b58c4bf 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -608,6 +608,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
rfcomm_dlc_accept(d);
+ msg->msg_namelen = 0;
return 0;
}
@@ -1065,8 +1066,7 @@ void __exit rfcomm_cleanup_sockets(void)
debugfs_remove(rfcomm_sock_debugfs);
- if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
- BT_ERR("RFCOMM socket layer unregistration failed");
+ bt_sock_unregister(BTPROTO_RFCOMM);
proto_unregister(&rfcomm_proto);
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index fad0302bdb32..2c8055350510 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -665,6 +665,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
hci_conn_accept(pi->conn->hcon, 0);
sk->sk_state = BT_CONFIG;
+ msg->msg_namelen = 0;
release_sock(sk);
return 0;
@@ -1112,8 +1113,7 @@ void __exit sco_exit(void)
debugfs_remove(sco_debugfs);
- if (bt_sock_unregister(BTPROTO_SCO) < 0)
- BT_ERR("SCO socket unregistration failed");
+ bt_sock_unregister(BTPROTO_SCO);
proto_unregister(&sco_proto);
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ef1b91431c6b..f17fcb3097c2 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -148,7 +148,6 @@ static void del_nbp(struct net_bridge_port *p)
dev->priv_flags &= ~IFF_BRIDGE_PORT;
netdev_rx_handler_unregister(dev);
- synchronize_net();
netdev_upper_dev_unlink(dev, br->dev);
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 92de5e5f9db2..9878eb8204c5 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
const char *prefix)
{
unsigned int bitmask;
+ struct net *net = dev_net(in ? in : out);
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
spin_lock_bh(&ebt_log_lock);
printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
@@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_log_info *info = par->targinfo;
struct nf_loginfo li;
+ struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = info->loglevel;
li.u.log.logflags = info->bitmask;
if (info->bitmask & EBT_LOG_NFLOG)
- nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
- par->out, &li, "%s", info->prefix);
+ nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
+ par->in, par->out, &li, "%s", info->prefix);
else
ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
- par->out, &li, info->prefix);
+ par->out, &li, info->prefix);
return EBT_CONTINUE;
}
@@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = {
.me = THIS_MODULE,
};
+static int __net_init ebt_log_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
+ return 0;
+}
+
+static void __net_exit ebt_log_net_fini(struct net *net)
+{
+ nf_log_unset(net, &ebt_log_logger);
+}
+
+static struct pernet_operations ebt_log_net_ops = {
+ .init = ebt_log_net_init,
+ .exit = ebt_log_net_fini,
+};
+
static int __init ebt_log_init(void)
{
int ret;
+ ret = register_pernet_subsys(&ebt_log_net_ops);
+ if (ret < 0)
+ goto err_pernet;
+
ret = xt_register_target(&ebt_log_tg_reg);
if (ret < 0)
- return ret;
+ goto err_target;
+
nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
- return 0;
+
+ return ret;
+
+err_target:
+ unregister_pernet_subsys(&ebt_log_net_ops);
+err_pernet:
+ return ret;
}
static void __exit ebt_log_fini(void)
{
+ unregister_pernet_subsys(&ebt_log_net_ops);
nf_log_unregister(&ebt_log_logger);
xt_unregister_target(&ebt_log_tg_reg);
}
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 5be68bbcc341..59ac7952010d 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nflog_info *info = par->targinfo;
struct nf_loginfo li;
+ struct net *net = dev_net(par->in ? par->in : par->out);
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
- nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out,
- &li, "%s", info->prefix);
+ nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
+ par->out, &li, "%s", info->prefix);
return EBT_CONTINUE;
}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 961d870ab283..fc1905c51417 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -41,6 +41,7 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ulog.h>
#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include "../br_private.h"
@@ -62,13 +63,22 @@ typedef struct {
spinlock_t lock; /* the per-queue lock */
} ebt_ulog_buff_t;
-static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
-static struct sock *ebtulognl;
+static int ebt_ulog_net_id __read_mostly;
+struct ebt_ulog_net {
+ unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
+ ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
+ struct sock *ebtulognl;
+};
+
+static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
+{
+ return net_generic(net, ebt_ulog_net_id);
+}
/* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroup)
+static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
{
- ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup];
+ ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
del_timer(&ub->timer);
@@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup)
ub->lastnlh->nlmsg_type = NLMSG_DONE;
NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
- netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
+ netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
ub->qlen = 0;
ub->skb = NULL;
@@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup)
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
- spin_lock_bh(&ulog_buffers[data].lock);
- if (ulog_buffers[data].skb)
- ulog_send(data);
- spin_unlock_bh(&ulog_buffers[data].lock);
+ struct ebt_ulog_net *ebt = container_of((void *)data,
+ struct ebt_ulog_net,
+ nlgroup[*(unsigned int *)data]);
+
+ ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
+ spin_lock_bh(&ub->lock);
+ if (ub->skb)
+ ulog_send(ebt, *(unsigned int *)data);
+ spin_unlock_bh(&ub->lock);
}
static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
ebt_ulog_packet_msg_t *pm;
size_t size, copy_len;
struct nlmsghdr *nlh;
+ struct net *net = dev_net(in ? in : out);
+ struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
unsigned int group = uloginfo->nlgroup;
- ebt_ulog_buff_t *ub = &ulog_buffers[group];
+ ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
spinlock_t *lock = &ub->lock;
ktime_t kt;
@@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
if (!(ub->skb = ulog_alloc_skb(size)))
goto unlock;
} else if (size > skb_tailroom(ub->skb)) {
- ulog_send(group);
+ ulog_send(ebt, group);
if (!(ub->skb = ulog_alloc_skb(size)))
goto unlock;
@@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
ub->lastnlh = nlh;
if (ub->qlen >= uloginfo->qthreshold)
- ulog_send(group);
+ ulog_send(ebt, group);
else if (!timer_pending(&ub->timer)) {
ub->timer.expires = jiffies + flushtimeout * HZ / 100;
add_timer(&ub->timer);
@@ -277,47 +294,39 @@ static struct nf_logger ebt_ulog_logger __read_mostly = {
.me = THIS_MODULE,
};
-static int __init ebt_ulog_init(void)
+static int __net_init ebt_ulog_net_init(struct net *net)
{
- int ret;
int i;
+ struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
+
struct netlink_kernel_cfg cfg = {
.groups = EBT_ULOG_MAXNLGROUPS,
};
- if (nlbufsiz >= 128*1024) {
- pr_warning("Netlink buffer has to be <= 128kB,"
- " please try a smaller nlbufsiz parameter.\n");
- return -EINVAL;
- }
-
/* initialize ulog_buffers */
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
- setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
- spin_lock_init(&ulog_buffers[i].lock);
+ ebt->nlgroup[i] = i;
+ setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
+ (unsigned long)&ebt->nlgroup[i]);
+ spin_lock_init(&ebt->ulog_buffers[i].lock);
}
- ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
- if (!ebtulognl)
- ret = -ENOMEM;
- else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
- netlink_kernel_release(ebtulognl);
+ ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+ if (!ebt->ebtulognl)
+ return -ENOMEM;
- if (ret == 0)
- nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
-
- return ret;
+ nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
+ return 0;
}
-static void __exit ebt_ulog_fini(void)
+static void __net_exit ebt_ulog_net_fini(struct net *net)
{
- ebt_ulog_buff_t *ub;
int i;
+ struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
- nf_log_unregister(&ebt_ulog_logger);
- xt_unregister_target(&ebt_ulog_tg_reg);
+ nf_log_unset(net, &ebt_ulog_logger);
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
- ub = &ulog_buffers[i];
+ ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
del_timer(&ub->timer);
if (ub->skb) {
@@ -325,7 +334,49 @@ static void __exit ebt_ulog_fini(void)
ub->skb = NULL;
}
}
- netlink_kernel_release(ebtulognl);
+ netlink_kernel_release(ebt->ebtulognl);
+}
+
+static struct pernet_operations ebt_ulog_net_ops = {
+ .init = ebt_ulog_net_init,
+ .exit = ebt_ulog_net_fini,
+ .id = &ebt_ulog_net_id,
+ .size = sizeof(struct ebt_ulog_net),
+};
+
+static int __init ebt_ulog_init(void)
+{
+ int ret;
+
+ if (nlbufsiz >= 128*1024) {
+ pr_warn("Netlink buffer has to be <= 128kB,"
+ "please try a smaller nlbufsiz parameter.\n");
+ return -EINVAL;
+ }
+
+ ret = register_pernet_subsys(&ebt_ulog_net_ops);
+ if (ret)
+ goto out_pernet;
+
+ ret = xt_register_target(&ebt_ulog_tg_reg);
+ if (ret)
+ goto out_target;
+
+ nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
+
+ return 0;
+
+out_target:
+ unregister_pernet_subsys(&ebt_ulog_net_ops);
+out_pernet:
+ return ret;
+}
+
+static void __exit ebt_ulog_fini(void)
+{
+ nf_log_unregister(&ebt_ulog_logger);
+ xt_unregister_target(&ebt_ulog_tg_reg);
+ unregister_pernet_subsys(&ebt_ulog_net_ops);
}
module_init(ebt_ulog_init);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 1d337e02bc63..630b8be6e748 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -286,6 +286,8 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
if (m->msg_flags&MSG_OOB)
goto read_error;
+ m->msg_namelen = 0;
+
skb = skb_recv_datagram(sk, flags, 0 , &ret);
if (!skb)
goto read_error;
diff --git a/net/can/raw.c b/net/can/raw.c
index c1764e41ddaf..1085e65f848e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
if (err < 0)
goto free_skb;
- err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (err < 0)
- goto free_skb;
+
+ sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
skb->dev = dev;
skb->sk = sk;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3f9dc6..ebba65d7e0da 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -749,7 +749,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2db88dfa99d7..3655ff927315 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1624,7 +1624,6 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
}
skb_orphan(skb);
- nf_reset(skb);
if (unlikely(!is_skb_forwardable(dev, skb))) {
atomic_long_inc(&dev->rx_dropped);
@@ -1640,6 +1639,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb->mark = 0;
secpath_reset(skb);
nf_reset(skb);
+ nf_reset_trace(skb);
return netif_rx(skb);
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -3318,6 +3318,7 @@ int netdev_rx_handler_register(struct net_device *dev,
if (dev->rx_handler)
return -EBUSY;
+ /* Note: rx_handler_data must be set before rx_handler */
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
rcu_assign_pointer(dev->rx_handler, rx_handler);
@@ -3338,6 +3339,11 @@ void netdev_rx_handler_unregister(struct net_device *dev)
ASSERT_RTNL();
RCU_INIT_POINTER(dev->rx_handler, NULL);
+ /* a reader seeing a non NULL rx_handler in a rcu_read_lock()
+ * section has a guarantee to see a non NULL rx_handler_data
+ * as well.
+ */
+ synchronize_net();
RCU_INIT_POINTER(dev->rx_handler_data, NULL);
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index bd2eb9d3e369..c013f38482a1 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -22,7 +22,8 @@
static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
- unsigned char addr_type, bool global)
+ unsigned char addr_type, bool global,
+ bool sync)
{
struct netdev_hw_addr *ha;
int alloc_size;
@@ -37,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
ha->type = addr_type;
ha->refcount = 1;
ha->global_use = global;
- ha->synced = false;
+ ha->synced = sync;
list_add_tail_rcu(&ha->list, &list->list);
list->count++;
@@ -46,7 +47,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
- unsigned char addr_type, bool global)
+ unsigned char addr_type, bool global, bool sync)
{
struct netdev_hw_addr *ha;
@@ -63,43 +64,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
else
ha->global_use = true;
}
+ if (sync) {
+ if (ha->synced)
+ return 0;
+ else
+ ha->synced = true;
+ }
ha->refcount++;
return 0;
}
}
- return __hw_addr_create_ex(list, addr, addr_len, addr_type, global);
+ return __hw_addr_create_ex(list, addr, addr_len, addr_type, global,
+ sync);
}
static int __hw_addr_add(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
unsigned char addr_type)
{
- return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+ return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false);
+}
+
+static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
+ struct netdev_hw_addr *ha, bool global,
+ bool sync)
+{
+ if (global && !ha->global_use)
+ return -ENOENT;
+
+ if (sync && !ha->synced)
+ return -ENOENT;
+
+ if (global)
+ ha->global_use = false;
+
+ if (sync)
+ ha->synced = false;
+
+ if (--ha->refcount)
+ return 0;
+ list_del_rcu(&ha->list);
+ kfree_rcu(ha, rcu_head);
+ list->count--;
+ return 0;
}
static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
- unsigned char addr_type, bool global)
+ unsigned char addr_type, bool global, bool sync)
{
struct netdev_hw_addr *ha;
list_for_each_entry(ha, &list->list, list) {
if (!memcmp(ha->addr, addr, addr_len) &&
- (ha->type == addr_type || !addr_type)) {
- if (global) {
- if (!ha->global_use)
- break;
- else
- ha->global_use = false;
- }
- if (--ha->refcount)
- return 0;
- list_del_rcu(&ha->list);
- kfree_rcu(ha, rcu_head);
- list->count--;
- return 0;
- }
+ (ha->type == addr_type || !addr_type))
+ return __hw_addr_del_entry(list, ha, global, sync);
}
return -ENOENT;
}
@@ -108,7 +128,57 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
unsigned char addr_type)
{
- return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+ return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false);
+}
+
+static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr *ha,
+ int addr_len)
+{
+ int err;
+
+ err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
+ false, true);
+ if (err)
+ return err;
+ ha->sync_cnt++;
+ ha->refcount++;
+
+ return 0;
+}
+
+static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ struct netdev_hw_addr *ha,
+ int addr_len)
+{
+ int err;
+
+ err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type,
+ false, true);
+ if (err)
+ return;
+ ha->sync_cnt--;
+ __hw_addr_del_entry(from_list, ha, false, true);
+}
+
+static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
+ struct netdev_hw_addr_list *from_list,
+ int addr_len)
+{
+ int err = 0;
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+ if (ha->sync_cnt == ha->refcount) {
+ __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
+ } else {
+ err = __hw_addr_sync_one(to_list, ha, addr_len);
+ if (err)
+ break;
+ }
+ }
+ return err;
}
int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
@@ -152,6 +222,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
}
EXPORT_SYMBOL(__hw_addr_del_multiple);
+/* This function only works where there is a strict 1-1 relationship
+ * between source and destionation of they synch. If you ever need to
+ * sync addresses to more then 1 destination, you need to use
+ * __hw_addr_sync_multiple().
+ */
int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
struct netdev_hw_addr_list *from_list,
int addr_len)
@@ -160,17 +235,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
struct netdev_hw_addr *ha, *tmp;
list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (!ha->synced) {
- err = __hw_addr_add(to_list, ha->addr,
- addr_len, ha->type);
+ if (!ha->sync_cnt) {
+ err = __hw_addr_sync_one(to_list, ha, addr_len);
if (err)
break;
- ha->synced = true;
- ha->refcount++;
- } else if (ha->refcount == 1) {
- __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
- __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
- }
+ } else if (ha->refcount == 1)
+ __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
}
return err;
}
@@ -183,13 +253,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
struct netdev_hw_addr *ha, *tmp;
list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
- if (ha->synced) {
- __hw_addr_del(to_list, ha->addr,
- addr_len, ha->type);
- ha->synced = false;
- __hw_addr_del(from_list, ha->addr,
- addr_len, ha->type);
- }
+ if (ha->sync_cnt)
+ __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
}
}
EXPORT_SYMBOL(__hw_addr_unsync);
@@ -406,7 +471,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
}
}
err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST, true);
+ NETDEV_HW_ADDR_T_UNICAST, true, false);
if (!err)
__dev_set_rx_mode(dev);
out:
@@ -469,7 +534,8 @@ EXPORT_SYMBOL(dev_uc_del);
* locked by netif_addr_lock_bh.
*
* This function is intended to be called from the dev->set_rx_mode
- * function of layered software devices.
+ * function of layered software devices. This function assumes that
+ * addresses will only ever be synced to the @to devices and no other.
*/
int dev_uc_sync(struct net_device *to, struct net_device *from)
{
@@ -488,6 +554,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
EXPORT_SYMBOL(dev_uc_sync);
/**
+ * dev_uc_sync_multiple - Synchronize device's unicast list to another
+ * device, but allow for multiple calls to sync to multiple devices.
+ * @to: destination device
+ * @from: source device
+ *
+ * Add newly added addresses to the destination device and release
+ * addresses that have been deleted from the source. The source device
+ * must be locked by netif_addr_lock_bh.
+ *
+ * This function is intended to be called from the dev->set_rx_mode
+ * function of layered software devices. It allows for a single source
+ * device to be synced to multiple destination devices.
+ */
+int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
+{
+ int err = 0;
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ netif_addr_lock_nested(to);
+ err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ netif_addr_unlock(to);
+ return err;
+}
+EXPORT_SYMBOL(dev_uc_sync_multiple);
+
+/**
* dev_uc_unsync - Remove synchronized addresses from the destination device
* @to: destination device
* @from: source device
@@ -559,7 +655,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
}
}
err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, true);
+ NETDEV_HW_ADDR_T_MULTICAST, true, false);
if (!err)
__dev_set_rx_mode(dev);
out:
@@ -575,7 +671,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
netif_addr_lock_bh(dev);
err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, global);
+ NETDEV_HW_ADDR_T_MULTICAST, global, false);
if (!err)
__dev_set_rx_mode(dev);
netif_addr_unlock_bh(dev);
@@ -615,7 +711,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
netif_addr_lock_bh(dev);
err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, global);
+ NETDEV_HW_ADDR_T_MULTICAST, global, false);
if (!err)
__dev_set_rx_mode(dev);
netif_addr_unlock_bh(dev);
@@ -679,6 +775,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
EXPORT_SYMBOL(dev_mc_sync);
/**
+ * dev_mc_sync_multiple - Synchronize device's unicast list to another
+ * device, but allow for multiple calls to sync to multiple devices.
+ * @to: destination device
+ * @from: source device
+ *
+ * Add newly added addresses to the destination device and release
+ * addresses that have no users left. The source device must be
+ * locked by netif_addr_lock_bh.
+ *
+ * This function is intended to be called from the ndo_set_rx_mode
+ * function of layered software devices. It allows for a single
+ * source device to be synced to multiple destination devices.
+ */
+int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
+{
+ int err = 0;
+
+ if (to->addr_len != from->addr_len)
+ return -EINVAL;
+
+ netif_addr_lock_nested(to);
+ err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+ if (!err)
+ __dev_set_rx_mode(to);
+ netif_addr_unlock(to);
+ return err;
+}
+EXPORT_SYMBOL(dev_mc_sync_multiple);
+
+/**
* dev_mc_unsync - Remove synchronized addresses from the destination device
* @to: destination device
* @from: source device
diff --git a/net/core/dst.c b/net/core/dst.c
index 35fd12f1a69c..df9cc810ec8e 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
/**
- * skb_dst_set_noref - sets skb dst, without a reference
+ * __skb_dst_set_noref - sets skb dst, without a reference
* @skb: buffer
* @dst: dst entry
+ * @force: if force is set, use noref version even for DST_NOCACHE entries
*
* Sets skb dst, assuming a reference was not taken on dst
* skb_dst_drop() should not dst_release() this dst
*/
-void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
{
WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
/* If dst not in cache, we must take a reference, because
* dst_release() will destroy dst as soon as its refcount becomes zero
*/
- if (unlikely(dst->flags & DST_NOCACHE)) {
+ if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
dst_hold(dst);
skb_dst_set(skb, dst);
} else {
skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
}
}
-EXPORT_SYMBOL(skb_dst_set_noref);
+EXPORT_SYMBOL(__skb_dst_set_noref);
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
diff --git a/net/core/flow.c b/net/core/flow.c
index 707fb7b952e8..7102f166482d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -346,7 +346,7 @@ static void flow_cache_flush_per_cpu(void *data)
struct flow_flush_info *info = data;
struct tasklet_struct *tasklet;
- tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet);
+ tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet;
tasklet->data = (unsigned long)info;
tasklet_schedule(tasklet);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c72a646d9f44..89a3a07d85fb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -39,21 +39,13 @@
#include <linux/string.h>
#include <linux/log2.h>
+#define DEBUG
#define NEIGH_DEBUG 1
-
-#define NEIGH_PRINTK(x...) printk(x)
-#define NEIGH_NOPRINTK(x...) do { ; } while(0)
-#define NEIGH_PRINTK1 NEIGH_NOPRINTK
-#define NEIGH_PRINTK2 NEIGH_NOPRINTK
-
-#if NEIGH_DEBUG >= 1
-#undef NEIGH_PRINTK1
-#define NEIGH_PRINTK1 NEIGH_PRINTK
-#endif
-#if NEIGH_DEBUG >= 2
-#undef NEIGH_PRINTK2
-#define NEIGH_PRINTK2 NEIGH_PRINTK
-#endif
+#define neigh_dbg(level, fmt, ...) \
+do { \
+ if (level <= NEIGH_DEBUG) \
+ pr_debug(fmt, ##__VA_ARGS__); \
+} while (0)
#define PNEIGH_HASHMASK 0xF
@@ -246,7 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
n->nud_state = NUD_NOARP;
else
n->nud_state = NUD_NONE;
- NEIGH_PRINTK2("neigh %p is stray.\n", n);
+ neigh_dbg(2, "neigh %p is stray\n", n);
}
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@@ -542,7 +534,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
lockdep_is_held(&tbl->lock)));
rcu_assign_pointer(nht->hash_buckets[hash_val], n);
write_unlock_bh(&tbl->lock);
- NEIGH_PRINTK2("neigh %p is created.\n", n);
+ neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
out:
return rc;
@@ -725,7 +717,7 @@ void neigh_destroy(struct neighbour *neigh)
dev_put(dev);
neigh_parms_put(neigh->parms);
- NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+ neigh_dbg(2, "neigh %p is destroyed\n", neigh);
atomic_dec(&neigh->tbl->entries);
kfree_rcu(neigh, rcu);
@@ -739,7 +731,7 @@ EXPORT_SYMBOL(neigh_destroy);
*/
static void neigh_suspect(struct neighbour *neigh)
{
- NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+ neigh_dbg(2, "neigh %p is suspected\n", neigh);
neigh->output = neigh->ops->output;
}
@@ -751,7 +743,7 @@ static void neigh_suspect(struct neighbour *neigh)
*/
static void neigh_connect(struct neighbour *neigh)
{
- NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+ neigh_dbg(2, "neigh %p is connected\n", neigh);
neigh->output = neigh->ops->connected_output;
}
@@ -852,7 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)
struct sk_buff *skb;
NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
- NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+ neigh_dbg(2, "neigh %p is failed\n", neigh);
neigh->updated = jiffies;
/* It is very thin place. report_unreachable is very complicated
@@ -904,17 +896,17 @@ static void neigh_timer_handler(unsigned long arg)
if (state & NUD_REACHABLE) {
if (time_before_eq(now,
neigh->confirmed + neigh->parms->reachable_time)) {
- NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+ neigh_dbg(2, "neigh %p is still alive\n", neigh);
next = neigh->confirmed + neigh->parms->reachable_time;
} else if (time_before_eq(now,
neigh->used + neigh->parms->delay_probe_time)) {
- NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+ neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + neigh->parms->delay_probe_time;
} else {
- NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+ neigh_dbg(2, "neigh %p is suspected\n", neigh);
neigh->nud_state = NUD_STALE;
neigh->updated = jiffies;
neigh_suspect(neigh);
@@ -923,14 +915,14 @@ static void neigh_timer_handler(unsigned long arg)
} else if (state & NUD_DELAY) {
if (time_before_eq(now,
neigh->confirmed + neigh->parms->delay_probe_time)) {
- NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
+ neigh_dbg(2, "neigh %p is now reachable\n", neigh);
neigh->nud_state = NUD_REACHABLE;
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
- NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+ neigh_dbg(2, "neigh %p is probed\n", neigh);
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
@@ -997,7 +989,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
- NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+ neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_add_timer(neigh,
@@ -1320,8 +1312,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
out:
return rc;
discard:
- NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
- dst, neigh);
+ neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
out_kfree_skb:
rc = -EINVAL;
kfree_skb(skb);
@@ -1498,7 +1489,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
}
}
write_unlock_bh(&tbl->lock);
- NEIGH_PRINTK1("neigh_parms_release: not found\n");
+ neigh_dbg(1, "%s: not found\n", __func__);
}
EXPORT_SYMBOL(neigh_parms_release);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d2322d7f0f7b..589d0abb34a0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -496,8 +496,10 @@ static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
}
if (ops->fill_info) {
data = nla_nest_start(skb, IFLA_INFO_DATA);
- if (data == NULL)
+ if (data == NULL) {
+ err = -EMSGSIZE;
goto err_cancel_link;
+ }
err = ops->fill_info(skb, dev);
if (err < 0)
goto err_cancel_data;
diff --git a/net/core/scm.c b/net/core/scm.c
index 905dcc6ad1e3..03795d0147f2 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -24,6 +24,7 @@
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/security.h>
+#include <linux/pid_namespace.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
@@ -52,7 +53,8 @@ static __inline__ int scm_check_creds(struct ucred *creds)
if (!uid_valid(uid) || !gid_valid(gid))
return -EINVAL;
- if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) &&
+ if ((creds->pid == task_tgid_vnr(current) ||
+ ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
@@ -185,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
p->creds.uid = uid;
p->creds.gid = gid;
-
- if (!p->cred ||
- !uid_eq(p->cred->euid, uid) ||
- !gid_eq(p->cred->egid, gid)) {
- struct cred *cred;
- err = -ENOMEM;
- cred = prepare_creds();
- if (!cred)
- goto error;
-
- cred->uid = cred->euid = uid;
- cred->gid = cred->egid = gid;
- if (p->cred)
- put_cred(p->cred);
- p->cred = cred;
- }
break;
}
default:
@@ -304,8 +290,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
/* Bump the usage count and install the file. */
sock = sock_from_file(fp[i], &err);
if (sock) {
- sock_update_netprioidx(sock->sk, current);
- sock_update_classid(sock->sk, current);
+ sock_update_netprioidx(sock->sk);
+ sock_update_classid(sock->sk);
}
fd_install(new_fd, get_file(fp[i]));
}
diff --git a/net/core/sock.c b/net/core/sock.c
index a19e728d5fb6..d4f4cea726e7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -907,6 +907,10 @@ set_rcvbuf:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
+ case SO_SELECT_ERR_QUEUE:
+ sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
break;
+ case SO_SELECT_ERR_QUEUE:
+ v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
+ break;
+
default:
return -ENOPROTOOPT;
}
@@ -1299,11 +1307,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
}
#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
-void sock_update_classid(struct sock *sk, struct task_struct *task)
+void sock_update_classid(struct sock *sk)
{
u32 classid;
- classid = task_cls_classid(task);
+ classid = task_cls_classid(current);
if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
@@ -1311,12 +1319,12 @@ EXPORT_SYMBOL(sock_update_classid);
#endif
#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
-void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
+void sock_update_netprioidx(struct sock *sk)
{
if (in_interrupt())
return;
- sk->sk_cgrp_prioidx = task_netprioidx(task);
+ sk->sk_cgrp_prioidx = task_netprioidx(current);
}
EXPORT_SYMBOL_GPL(sock_update_netprioidx);
#endif
@@ -1345,8 +1353,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_net_set(sk, get_net(net));
atomic_set(&sk->sk_wmem_alloc, 1);
- sock_update_classid(sk, current);
- sock_update_netprioidx(sk, current);
+ sock_update_classid(sk);
+ sock_update_netprioidx(sk);
}
return sk;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index b15c1d1720fb..86e3807052e9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -19,7 +19,6 @@
#include <linux/sockios.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <net/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/proc_fs.h>
#include <linux/netdevice.h>
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index e1b4580f78dd..55e1fd5b3e56 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1139,10 +1139,10 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
error:
dev_kfree_skb(skb);
out:
- if (err < 0)
+ if (err)
pr_debug("ERROR: xmit failed\n");
- return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK);
+ return (err < 0) ? NET_XMIT_DROP : err;
}
static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 96bb08abece2..b0bdd8c51e9c 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
struct net_device *dev;
struct ieee802154_addr addr;
u8 page;
- int ret = -EINVAL;
+ int ret = -EOPNOTSUPP;
if (!info->attrs[IEEE802154_ATTR_CHANNEL] ||
!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
@@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb,
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->assoc_req)
+ goto out;
if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {
addr.addr_type = IEEE802154_ADDR_LONG;
@@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
page,
nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY]));
+out:
dev_put(dev);
return ret;
}
@@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
{
struct net_device *dev;
struct ieee802154_addr addr;
- int ret = -EINVAL;
+ int ret = -EOPNOTSUPP;
if (!info->attrs[IEEE802154_ATTR_STATUS] ||
!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] ||
@@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->assoc_resp)
+ goto out;
addr.addr_type = IEEE802154_ADDR_LONG;
nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
@@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS]));
+out:
dev_put(dev);
return ret;
}
@@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
{
struct net_device *dev;
struct ieee802154_addr addr;
- int ret = -EINVAL;
+ int ret = -EOPNOTSUPP;
if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] &&
!info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) ||
@@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->disassoc_req)
+ goto out;
if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {
addr.addr_type = IEEE802154_ADDR_LONG;
@@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,
nla_get_u8(info->attrs[IEEE802154_ATTR_REASON]));
+out:
dev_put(dev);
return ret;
}
@@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
u8 channel, bcn_ord, sf_ord;
u8 page;
int pan_coord, blx, coord_realign;
- int ret;
+ int ret = -EOPNOTSUPP;
if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
!info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] ||
@@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->start_req)
+ goto out;
addr.addr_type = IEEE802154_ADDR_SHORT;
addr.short_addr = nla_get_u16(
@@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page,
bcn_ord, sf_ord, pan_coord, blx, coord_realign);
+out:
dev_put(dev);
return ret;
}
@@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
{
struct net_device *dev;
- int ret;
+ int ret = -EOPNOTSUPP;
u8 type;
u32 channels;
u8 duration;
@@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
dev = ieee802154_nl_get_dev(info);
if (!dev)
return -ENODEV;
+ if (!ieee802154_mlme_ops(dev)->scan_req)
+ goto out;
type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]);
channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
@@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page,
duration);
+out:
dev_put(dev);
return ret;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5d985e367535..2759dfd576ae 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -802,8 +802,10 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE))
return -EEXIST;
-
- set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
+ ifa = ifa_existing;
+ set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+ rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
+ blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
}
return 0;
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 1206ca64b0ea..e97d66a1fdde 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -52,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
unsigned long now = jiffies;
int i;
+ /* Per bucket lock NOT needed here, due to write lock protection */
write_lock(&f->lock);
+
get_random_bytes(&f->rnd, sizeof(u32));
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
struct hlist_node *n;
- hlist_for_each_entry_safe(q, n, &f->hash[i], list) {
+ hb = &f->hash[i];
+ hlist_for_each_entry_safe(q, n, &hb->chain, list) {
unsigned int hval = f->hashfn(q);
if (hval != i) {
+ struct inet_frag_bucket *hb_dest;
+
hlist_del(&q->list);
/* Relink to new hash chain. */
- hlist_add_head(&q->list, &f->hash[hval]);
+ hb_dest = &f->hash[hval];
+ hlist_add_head(&q->list, &hb_dest->chain);
}
}
}
@@ -78,9 +85,12 @@ void inet_frags_init(struct inet_frags *f)
{
int i;
- for (i = 0; i < INETFRAGS_HASHSZ; i++)
- INIT_HLIST_HEAD(&f->hash[i]);
+ for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+ struct inet_frag_bucket *hb = &f->hash[i];
+ spin_lock_init(&hb->chain_lock);
+ INIT_HLIST_HEAD(&hb->chain);
+ }
rwlock_init(&f->lock);
f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
@@ -122,9 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net);
static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
{
- write_lock(&f->lock);
+ struct inet_frag_bucket *hb;
+ unsigned int hash;
+
+ read_lock(&f->lock);
+ hash = f->hashfn(fq);
+ hb = &f->hash[hash];
+
+ spin_lock(&hb->chain_lock);
hlist_del(&fq->list);
- write_unlock(&f->lock);
+ spin_unlock(&hb->chain_lock);
+
+ read_unlock(&f->lock);
inet_frag_lru_del(fq);
}
@@ -226,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in, struct inet_frags *f,
void *arg)
{
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *qp;
#ifdef CONFIG_SMP
#endif
unsigned int hash;
- write_lock(&f->lock);
+ read_lock(&f->lock); /* Protects against hash rebuild */
/*
* While we stayed w/o the lock other CPU could update
* the rnd seed, so we need to re-calculate the hash
* chain. Fortunatelly the qp_in can be used to get one.
*/
hash = f->hashfn(qp_in);
+ hb = &f->hash[hash];
+ spin_lock(&hb->chain_lock);
+
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
* such entry could be created on other cpu, while we
- * promoted read lock to write lock.
+ * released the hash bucket lock.
*/
- hlist_for_each_entry(qp, &f->hash[hash], list) {
+ hlist_for_each_entry(qp, &hb->chain, list) {
if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt);
- write_unlock(&f->lock);
+ spin_unlock(&hb->chain_lock);
+ read_unlock(&f->lock);
qp_in->last_in |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f);
return qp;
@@ -258,8 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
atomic_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &f->hash[hash]);
- write_unlock(&f->lock);
+ hlist_add_head(&qp->list, &hb->chain);
+ spin_unlock(&hb->chain_lock);
+ read_unlock(&f->lock);
inet_frag_lru_add(nf, qp);
return qp;
}
@@ -300,17 +325,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash)
__releases(&f->lock)
{
+ struct inet_frag_bucket *hb;
struct inet_frag_queue *q;
int depth = 0;
- hlist_for_each_entry(q, &f->hash[hash], list) {
+ hb = &f->hash[hash];
+
+ spin_lock(&hb->chain_lock);
+ hlist_for_each_entry(q, &hb->chain, list) {
if (q->net == nf && f->match(q, key)) {
atomic_inc(&q->refcnt);
+ spin_unlock(&hb->chain_lock);
read_unlock(&f->lock);
return q;
}
depth++;
}
+ spin_unlock(&hb->chain_lock);
read_unlock(&f->lock);
if (depth <= INETFRAGS_MAXDEPTH)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ad662e906f7e..987a4e5e07e2 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -159,14 +159,14 @@ static int ip_gre_calc_hlen(__be16 o_flags)
static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
bool *csum_err, int *hdr_len)
{
- struct iphdr *iph = ip_hdr(skb);
- struct gre_base_hdr *greh;
+ unsigned int ip_hlen = ip_hdrlen(skb);
+ const struct gre_base_hdr *greh;
__be32 *options;
if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
return -EINVAL;
- greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2));
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
@@ -176,6 +176,8 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (!pskb_may_pull(skb, *hdr_len))
return -EINVAL;
+ greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
+
tpi->proto = greh->protocol;
options = (__be32 *)(greh + 1);
@@ -660,7 +662,6 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
- dev->iflink = 0;
dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5e12dca7b3dd..147abf5275aa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
to->nf_trace = from->nf_trace;
#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index f01d1b1aff7f..59cb8c769056 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t->props.mode = x->props.mode;
t->props.saddr.a4 = x->props.saddr.a4;
t->props.flags = x->props.flags;
+ t->props.extra_flags = x->props.extra_flags;
memcpy(&t->mark, &x->mark, sizeof(t->mark));
if (xfrm_init_state(t))
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index bf6c5cf31aed..efa1138fa523 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -206,7 +206,7 @@ static int __init ic_open_devs(void)
struct ic_device *d, **last;
struct net_device *dev;
unsigned short oflags;
- unsigned long start;
+ unsigned long start, next_msg;
last = &ic_first_dev;
rtnl_lock();
@@ -263,12 +263,23 @@ static int __init ic_open_devs(void)
/* wait for a carrier on at least one device */
start = jiffies;
+ next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
+ int wait, elapsed;
+
for_each_netdev(&init_net, dev)
if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
goto have_carrier;
msleep(1);
+
+ if time_before(jiffies, next_msg)
+ continue;
+
+ elapsed = jiffies_to_msecs(jiffies - start);
+ wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000;
+ pr_info("Waiting up to %d more seconds for network.\n", wait);
+ next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
}
have_carrier:
rtnl_unlock();
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3efcf87400c3..e391db1f056d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -182,8 +182,7 @@ ipt_get_target_c(const struct ipt_entry *e)
return ipt_get_target((struct ipt_entry *)e);
}
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
static const char *const hooknames[] = {
[NF_INET_PRE_ROUTING] = "PREROUTING",
[NF_INET_LOCAL_IN] = "INPUT",
@@ -259,6 +258,7 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment;
const struct ipt_entry *iter;
unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
@@ -271,7 +271,7 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
+ nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum);
}
@@ -361,8 +361,7 @@ ipt_do_table(struct sk_buff *skb,
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
trace_packet(skb, hook, in, out,
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index e7f8cad11393..8799c836ccaa 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -45,6 +45,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h>
#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <linux/bitops.h>
#include <asm/unaligned.h>
@@ -78,15 +79,23 @@ typedef struct {
struct timer_list timer; /* the timer function */
} ulog_buff_t;
-static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
+static int ulog_net_id __read_mostly;
+struct ulog_net {
+ unsigned int nlgroup[ULOG_MAXNLGROUPS];
+ ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
+ struct sock *nflognl;
+ spinlock_t lock;
+};
-static struct sock *nflognl; /* our socket */
-static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
+static struct ulog_net *ulog_pernet(struct net *net)
+{
+ return net_generic(net, ulog_net_id);
+}
/* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroupnum)
+static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
{
- ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+ ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
pr_debug("ulog_send: timer is deleting\n");
del_timer(&ub->timer);
@@ -103,7 +112,8 @@ static void ulog_send(unsigned int nlgroupnum)
NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
pr_debug("throwing %d packets to netlink group %u\n",
ub->qlen, nlgroupnum + 1);
- netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
+ netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
+ GFP_ATOMIC);
ub->qlen = 0;
ub->skb = NULL;
@@ -114,13 +124,16 @@ static void ulog_send(unsigned int nlgroupnum)
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
+ struct ulog_net *ulog = container_of((void *)data,
+ struct ulog_net,
+ nlgroup[*(unsigned int *)data]);
pr_debug("timer function called, calling ulog_send\n");
/* lock to protect against somebody modifying our structure
* from ipt_ulog_target at the same time */
- spin_lock_bh(&ulog_lock);
- ulog_send(data);
- spin_unlock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
+ ulog_send(ulog, data);
+ spin_unlock_bh(&ulog->lock);
}
static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -160,6 +173,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
size_t size, copy_len;
struct nlmsghdr *nlh;
struct timeval tv;
+ struct net *net = dev_net(in ? in : out);
+ struct ulog_net *ulog = ulog_pernet(net);
/* ffs == find first bit set, necessary because userspace
* is already shifting groupnumber, but we need unshifted.
@@ -174,9 +189,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
size = nlmsg_total_size(sizeof(*pm) + copy_len);
- ub = &ulog_buffers[groupnum];
+ ub = &ulog->ulog_buffers[groupnum];
- spin_lock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
if (!ub->skb) {
if (!(ub->skb = ulog_alloc_skb(size)))
@@ -186,7 +201,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
/* either the queue len is too high or we don't have
* enough room in nlskb left. send it to userspace. */
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
if (!(ub->skb = ulog_alloc_skb(size)))
goto alloc_failure;
@@ -260,16 +275,16 @@ static void ipt_ulog_packet(unsigned int hooknum,
if (ub->qlen >= loginfo->qthreshold) {
if (loginfo->qthreshold > 1)
nlh->nlmsg_type = NLMSG_DONE;
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
}
out_unlock:
- spin_unlock_bh(&ulog_lock);
+ spin_unlock_bh(&ulog->lock);
return;
alloc_failure:
pr_debug("Error building netlink message\n");
- spin_unlock_bh(&ulog_lock);
+ spin_unlock_bh(&ulog->lock);
}
static unsigned int
@@ -376,54 +391,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {
.me = THIS_MODULE,
};
-static int __init ulog_tg_init(void)
+static int __net_init ulog_tg_net_init(struct net *net)
{
- int ret, i;
+ int i;
+ struct ulog_net *ulog = ulog_pernet(net);
struct netlink_kernel_cfg cfg = {
.groups = ULOG_MAXNLGROUPS,
};
- pr_debug("init module\n");
-
- if (nlbufsiz > 128*1024) {
- pr_warning("Netlink buffer has to be <= 128kB\n");
- return -EINVAL;
- }
-
+ spin_lock_init(&ulog->lock);
/* initialize ulog_buffers */
for (i = 0; i < ULOG_MAXNLGROUPS; i++)
- setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
+ setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i);
- nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
- if (!nflognl)
+ ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+ if (!ulog->nflognl)
return -ENOMEM;
- ret = xt_register_target(&ulog_tg_reg);
- if (ret < 0) {
- netlink_kernel_release(nflognl);
- return ret;
- }
if (nflog)
- nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+ nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
return 0;
}
-static void __exit ulog_tg_exit(void)
+static void __net_exit ulog_tg_net_exit(struct net *net)
{
ulog_buff_t *ub;
int i;
-
- pr_debug("cleanup_module\n");
+ struct ulog_net *ulog = ulog_pernet(net);
if (nflog)
- nf_log_unregister(&ipt_ulog_logger);
- xt_unregister_target(&ulog_tg_reg);
- netlink_kernel_release(nflognl);
+ nf_log_unset(net, &ipt_ulog_logger);
+
+ netlink_kernel_release(ulog->nflognl);
/* remove pending timers and free allocated skb's */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- ub = &ulog_buffers[i];
+ ub = &ulog->ulog_buffers[i];
pr_debug("timer is deleting\n");
del_timer(&ub->timer);
@@ -434,5 +438,50 @@ static void __exit ulog_tg_exit(void)
}
}
+static struct pernet_operations ulog_tg_net_ops = {
+ .init = ulog_tg_net_init,
+ .exit = ulog_tg_net_exit,
+ .id = &ulog_net_id,
+ .size = sizeof(struct ulog_net),
+};
+
+static int __init ulog_tg_init(void)
+{
+ int ret;
+ pr_debug("init module\n");
+
+ if (nlbufsiz > 128*1024) {
+ pr_warn("Netlink buffer has to be <= 128kB\n");
+ return -EINVAL;
+ }
+
+ ret = register_pernet_subsys(&ulog_tg_net_ops);
+ if (ret)
+ goto out_pernet;
+
+ ret = xt_register_target(&ulog_tg_reg);
+ if (ret < 0)
+ goto out_target;
+
+ if (nflog)
+ nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+
+ return 0;
+
+out_target:
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+out_pernet:
+ return ret;
+}
+
+static void __exit ulog_tg_exit(void)
+{
+ pr_debug("cleanup_module\n");
+ if (nflog)
+ nf_log_unregister(&ipt_ulog_logger);
+ xt_unregister_target(&ulog_tg_reg);
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+}
+
module_init(ulog_tg_init);
module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5241d997ab75..c2cd63d2d892 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -187,8 +187,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: short packet ");
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
+ NULL, "nf_ct_icmp: short packet ");
return -NF_ACCEPT;
}
@@ -196,7 +196,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
}
@@ -209,7 +209,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
*/
if (icmph->type > NR_ICMP_TYPES) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2e91006d6076..7d93d62cd5fd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.opt = NULL;
ipc.oif = sk->sk_bound_dev_if;
ipc.tx_flags = 0;
- err = sock_tx_timestamp(sk, &ipc.tx_flags);
- if (err)
- return err;
+
+ sock_tx_timestamp(sk, &ipc.tx_flags);
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a96f7b586277..dcb116dde216 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2885,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
__be32 delta;
unsigned int oldlen;
unsigned int mss;
+ struct sk_buff *gso_skb = skb;
+ __sum16 newcheck;
if (!pskb_may_pull(skb, sizeof(*th)))
goto out;
@@ -2935,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
th = tcp_hdr(skb);
seq = ntohl(th->seq);
+ newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
+ (__force u32)delta));
+
do {
th->fin = th->psh = 0;
+ th->check = newcheck;
- th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
- (__force u32)delta));
if (skb->ip_summed != CHECKSUM_PARTIAL)
th->check =
csum_fold(csum_partial(skb_transport_header(skb),
@@ -2953,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
th->cwr = 0;
} while (skb->next);
+ /* Following permits TCP Small Queues to work well with GSO :
+ * The callback to TCP stack will be called at the time last frag
+ * is freed at TX completion, and not right now when gso_skb
+ * is freed by GSO engine
+ */
+ if (gso_skb->destructor == tcp_wfree) {
+ swap(gso_skb->sk, skb->sk);
+ swap(gso_skb->destructor, skb->destructor);
+ swap(gso_skb->truesize, skb->truesize);
+ }
+
delta = htonl(oldlen + (skb->tail - skb->transport_header) +
skb->data_len);
th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index b6f3583ddfe8..da14436c1735 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -64,7 +64,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
{
struct cg_proto *cg_proto;
struct tcp_memcontrol *tcp;
- u64 val;
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
@@ -72,8 +71,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
tcp = tcp_from_cgproto(cg_proto);
percpu_counter_destroy(&tcp->tcp_sockets_allocated);
-
- val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index af354c98fdb5..d12694353540 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -787,7 +787,7 @@ void __init tcp_tasklet_init(void)
* We cant xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
-static void tcp_wfree(struct sk_buff *skb)
+void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7117d1467b02..2722db024a0b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.addr = inet->inet_saddr;
ipc.oif = sk->sk_bound_dev_if;
- err = sock_tx_timestamp(sk, &ipc.tx_flags);
- if (err)
- return err;
+
+ sock_tx_timestamp(sk, &ipc.tx_flags);
+
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc);
if (err)
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index fe5189e2e114..eb1dd4d643f2 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
- /* DS disclosed */
- top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
+ /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
+ if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+ top_iph->tos = 0;
+ else
+ top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+ top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
XFRM_MODE_SKB_CB(skb)->tos);
flags = x->props.flags;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6279cb3d547..28b61e89bbb8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -422,6 +422,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_regen_rndid((unsigned long) ndev);
}
#endif
+ ndev->token = in6addr_any;
if (netif_running(dev) && addrconf_qdisc_ok(dev))
ndev->if_flags |= IF_READY;
@@ -877,6 +878,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
ifa->prefix_len = pfxlen;
ifa->flags = flags | IFA_F_TENTATIVE;
ifa->cstamp = ifa->tstamp = jiffies;
+ ifa->tokenized = false;
ifa->rt = rt;
@@ -2133,11 +2135,19 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
struct inet6_ifaddr *ifp;
struct in6_addr addr;
int create = 0, update_lft = 0;
+ bool tokenized = false;
if (pinfo->prefix_len == 64) {
memcpy(&addr, &pinfo->prefix, 8);
- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
- ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+
+ if (!ipv6_addr_any(&in6_dev->token)) {
+ read_lock_bh(&in6_dev->lock);
+ memcpy(addr.s6_addr + 8,
+ in6_dev->token.s6_addr + 8, 8);
+ read_unlock_bh(&in6_dev->lock);
+ tokenized = true;
+ } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+ ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
in6_dev_put(in6_dev);
return;
}
@@ -2178,6 +2188,7 @@ ok:
update_lft = create = 1;
ifp->cstamp = jiffies;
+ ifp->tokenized = tokenized;
addrconf_dad_start(ifp);
}
@@ -2616,6 +2627,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
static void init_loopback(struct net_device *dev)
{
struct inet6_dev *idev;
+ struct net_device *sp_dev;
+ struct inet6_ifaddr *sp_ifa;
+ struct rt6_info *sp_rt;
/* ::1 */
@@ -2627,6 +2641,30 @@ static void init_loopback(struct net_device *dev)
}
add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
+
+ /* Add routes to other interface's IPv6 addresses */
+ for_each_netdev(dev_net(dev), sp_dev) {
+ if (!strcmp(sp_dev->name, dev->name))
+ continue;
+
+ idev = __in6_dev_get(sp_dev);
+ if (!idev)
+ continue;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(sp_ifa, &idev->addr_list, if_list) {
+
+ if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))
+ continue;
+
+ sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0);
+
+ /* Failure cases are ignored */
+ if (!IS_ERR(sp_rt))
+ ip6_ins_rt(sp_rt);
+ }
+ read_unlock_bh(&idev->lock);
+ }
}
static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
@@ -4138,7 +4176,8 @@ static inline size_t inet6_ifla6_size(void)
+ nla_total_size(sizeof(struct ifla_cacheinfo))
+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
- + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+ + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+ + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
}
static inline size_t inet6_if_nlmsg_size(void)
@@ -4225,6 +4264,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
goto nla_put_failure;
snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+ nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
+ if (nla == NULL)
+ goto nla_put_failure;
+ read_lock_bh(&idev->lock);
+ memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
+ read_unlock_bh(&idev->lock);
+
return 0;
nla_put_failure:
@@ -4252,6 +4298,80 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
return 0;
}
+static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
+{
+ struct inet6_ifaddr *ifp;
+ struct net_device *dev = idev->dev;
+ bool update_rs = false;
+
+ if (token == NULL)
+ return -EINVAL;
+ if (ipv6_addr_any(token))
+ return -EINVAL;
+ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
+ return -EINVAL;
+ if (!ipv6_accept_ra(idev))
+ return -EINVAL;
+ if (idev->cnf.rtr_solicits <= 0)
+ return -EINVAL;
+
+ write_lock_bh(&idev->lock);
+
+ BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
+ memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);
+
+ write_unlock_bh(&idev->lock);
+
+ if (!idev->dead && (idev->if_flags & IF_READY)) {
+ struct in6_addr ll_addr;
+
+ ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
+ IFA_F_OPTIMISTIC);
+
+ /* If we're not ready, then normal ifup will take care
+ * of this. Otherwise, we need to request our rs here.
+ */
+ ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
+ update_rs = true;
+ }
+
+ write_lock_bh(&idev->lock);
+
+ if (update_rs)
+ idev->if_flags |= IF_RS_SENT;
+
+ /* Well, that's kinda nasty ... */
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ spin_lock(&ifp->lock);
+ if (ifp->tokenized) {
+ ifp->valid_lft = 0;
+ ifp->prefered_lft = 0;
+ }
+ spin_unlock(&ifp->lock);
+ }
+
+ write_unlock_bh(&idev->lock);
+ return 0;
+}
+
+static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+ int err = -EINVAL;
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ struct nlattr *tb[IFLA_INET6_MAX + 1];
+
+ if (!idev)
+ return -EAFNOSUPPORT;
+
+ if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
+ BUG();
+
+ if (tb[IFLA_INET6_TOKEN])
+ err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+
+ return err;
+}
+
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
u32 portid, u32 seq, int event, unsigned int flags)
{
@@ -4954,6 +5074,7 @@ static struct rtnl_af_ops inet6_ops = {
.family = AF_INET6,
.fill_link_af = inet6_fill_link_af,
.get_link_af_size = inet6_get_link_af_size,
+ .set_link_af = inet6_set_link_af,
};
/*
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index e33fe0ab2568..2bab2aa59745 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -118,6 +118,18 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
ipv6_addr_loopback(&hdr->daddr))
goto err;
+ /* RFC4291 Errata ID: 3480
+ * Interface-Local scope spans only a single interface on a
+ * node and is useful only for loopback transmission of
+ * multicast. Packets with interface-local scope received
+ * from another node must be discarded.
+ */
+ if (!(skb->pkt_type == PACKET_LOOPBACK ||
+ dev->flags & IFF_LOOPBACK) &&
+ ipv6_addr_is_multicast(&hdr->daddr) &&
+ IPV6_ADDR_MC_SCOPE(&hdr->daddr) == 1)
+ goto err;
+
/* RFC4291 2.7
* Nodes must not originate a packet to a multicast address whose scope
* field contains the reserved value 0; if such a packet is received, it
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 155eccfa7760..d2eedf192330 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1224,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
}
/* For UDP, check if TX timestamp is enabled */
- if (sk->sk_type == SOCK_DGRAM) {
- err = sock_tx_timestamp(sk, &tx_flags);
- if (err)
- goto error;
- }
+ if (sk->sk_type == SOCK_DGRAM)
+ sock_tx_timestamp(sk, &tx_flags);
/*
* Let's try using as much space as possible.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 341b54ade72c..8861b1ef420e 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -284,6 +284,7 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment;
const struct ip6t_entry *iter;
unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
@@ -296,7 +297,7 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo,
+ nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum);
}
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index 938e0b7ea1bd..590f767db5d4 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -52,7 +52,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
if (pfx_len - i >= 32)
mask = 0;
else
- mask = htonl(~((1 << (pfx_len - i)) - 1));
+ mask = htonl((1 << (i - pfx_len + 32)) - 1);
idx = i / 32;
addr->s6_addr32[idx] &= mask;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 24df3dde0076..b3807c5cb888 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
type + 128);
nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
+ NULL, NULL,
"nf_ct_icmpv6: invalid new with type %d ",
type + 128);
return false;
@@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmp6h == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: short packet ");
return -NF_ACCEPT;
}
@@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
if (LOG_INVALID(net, IPPROTO_ICMPV6))
- nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
"nf_ct_icmpv6: ICMPv6 checksum failed ");
return -NF_ACCEPT;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6700069949dd..dffdc1a389c5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -41,6 +41,7 @@
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
+#include <net/inet_ecn.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <linux/sysctl.h>
#include <linux/netfilter.h>
@@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
}
#endif
+static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+{
+ return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
+}
+
static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
const struct frag_queue *nq;
@@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data)
/* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src,
- struct in6_addr *dst)
+ struct in6_addr *dst, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.user = user;
arg.src = src;
arg.dst = dst;
+ arg.ecn = ecn;
read_lock_bh(&nf_frags.lock);
hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
@@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
struct sk_buff *prev, *next;
unsigned int payload_len;
int offset, end;
+ u8 ecn;
if (fq->q.last_in & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n");
@@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
return -1;
}
+ ecn = ip6_frag_ecn(ipv6_hdr(skb));
+
if (skb->ip_summed == CHECKSUM_COMPLETE) {
const unsigned char *nh = skb_network_header(skb);
skb->csum = csum_sub(skb->csum,
@@ -317,6 +327,7 @@ found:
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
+ fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
add_frag_mem_limit(&fq->q, skb->truesize);
@@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
{
struct sk_buff *fp, *op, *head = fq->q.fragments;
int payload_len;
+ u8 ecn;
inet_frag_kill(&fq->q, &nf_frags);
WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+ ecn = ip_frag_ecn_table[fq->ecn];
+ if (unlikely(ecn == 0xff))
+ goto out_fail;
+
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
sizeof(struct ipv6hdr) + fq->q.len -
@@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
@@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
local_bh_enable();
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
+ fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
+ ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1033d2b1d81e..e51bd1a58264 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -386,6 +386,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (dst)
dst->ops->redirect(dst, sk, skb);
+ goto out;
}
if (type == ICMPV6_PKT_TOOBIG) {
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9bf6a74a71d2..4770d515c2c8 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
sizeof(top_iph->flow_lbl));
top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
- dsfield = XFRM_MODE_SKB_CB(skb)->tos;
- dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+ if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+ dsfield = 0;
+ else
+ dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+ dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
if (x->props.flags & XFRM_STATE_NOECN)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index d28e7f014cc6..0578d4fa00a9 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -305,8 +305,7 @@ static void irda_connect_response(struct irda_sock *self)
IRDA_DEBUG(2, "%s()\n", __func__);
- skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
- GFP_ATOMIC);
+ skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL);
if (skb == NULL) {
IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n",
__func__);
@@ -1120,7 +1119,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
}
/* Allocate networking socket */
- sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto);
+ sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto);
if (sk == NULL)
return -ENOMEM;
@@ -1386,6 +1385,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
IRDA_DEBUG(4, "%s()\n", __func__);
+ msg->msg_namelen = 0;
+
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &err);
if (!skb)
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 52079f19bbbe..b797daac063c 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -117,7 +117,7 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
IRDA_ASSERT(ircomm != NULL, return NULL;);
- self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC);
+ self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL);
if (self == NULL)
return NULL;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a7d11ffe4284..e165e8dc962e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1328,6 +1328,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sk_buff *skb, *rskb, *cskb;
int err = 0;
+ msg->msg_namelen = 0;
+
if ((sk->sk_state == IUCV_DISCONN) &&
skb_queue_empty(&iucv->backlog_skb_q) &&
skb_queue_empty(&sk->sk_receive_queue) &&
@@ -1461,7 +1463,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
return iucv_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 8555f331ea60..5b1e5af25713 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2693,6 +2693,7 @@ static int key_notify_policy_flush(const struct km_event *c)
hdr->sadb_msg_pid = c->portid;
hdr->sadb_msg_version = PF_KEY_V2;
hdr->sadb_msg_errno = (uint8_t) 0;
+ hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
return 0;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index c74f5a91ff6a..b8a6039314e8 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -690,6 +690,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
lsa->l2tp_addr = ipv6_hdr(skb)->saddr;
lsa->l2tp_flowinfo = 0;
lsa->l2tp_scope_id = 0;
+ lsa->l2tp_conn_id = 0;
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
lsa->l2tp_scope_id = IP6CB(skb)->iif;
}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 88709882c464..48aaa89253e0 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -720,6 +720,8 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
int target; /* Read at least this many bytes */
long timeo;
+ msg->msg_namelen = 0;
+
lock_sock(sk);
copied = -ENOTCONN;
if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN))
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1d1ddabd89ca..c50c19402588 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -175,7 +175,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
* add it to the device after the station.
*/
if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) {
- ieee80211_key_free(sdata->local, key);
+ ieee80211_key_free_unused(key);
err = -ENOENT;
goto out_unlock;
}
@@ -214,8 +214,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
}
err = ieee80211_key_link(key, sdata, sta);
- if (err)
- ieee80211_key_free(sdata->local, key);
out_unlock:
mutex_unlock(&sdata->local->sta_mtx);
@@ -254,7 +252,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
goto out_unlock;
}
- __ieee80211_key_free(key, true);
+ ieee80211_key_free(key, true);
ret = 0;
out_unlock:
@@ -445,12 +443,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct timespec uptime;
+ u64 packets = 0;
+ int ac;
sinfo->generation = sdata->local->sta_generation;
sinfo->filled = STATION_INFO_INACTIVE_TIME |
- STATION_INFO_RX_BYTES |
- STATION_INFO_TX_BYTES |
+ STATION_INFO_RX_BYTES64 |
+ STATION_INFO_TX_BYTES64 |
STATION_INFO_RX_PACKETS |
STATION_INFO_TX_PACKETS |
STATION_INFO_TX_RETRIES |
@@ -467,10 +467,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
sinfo->connected_time = uptime.tv_sec - sta->last_connected;
sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
+ sinfo->tx_bytes = 0;
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ sinfo->tx_bytes += sta->tx_bytes[ac];
+ packets += sta->tx_packets[ac];
+ }
+ sinfo->tx_packets = packets;
sinfo->rx_bytes = sta->rx_bytes;
- sinfo->tx_bytes = sta->tx_bytes;
sinfo->rx_packets = sta->rx_packets;
- sinfo->tx_packets = sta->tx_packets;
sinfo->tx_retries = sta->tx_retry_count;
sinfo->tx_failed = sta->tx_retry_failed;
sinfo->rx_dropped_misc = sta->rx_dropped;
@@ -598,8 +602,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
data[i++] += sta->rx_fragments; \
data[i++] += sta->rx_dropped; \
\
- data[i++] += sta->tx_packets; \
- data[i++] += sta->tx_bytes; \
+ data[i++] += sinfo.tx_packets; \
+ data[i++] += sinfo.tx_bytes; \
data[i++] += sta->tx_fragments; \
data[i++] += sta->tx_filtered_count; \
data[i++] += sta->tx_retry_failed; \
@@ -621,13 +625,14 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
if (!(sta && !WARN_ON(sta->sdata->dev != dev)))
goto do_survey;
+ sinfo.filled = 0;
+ sta_set_sinfo(sta, &sinfo);
+
i = 0;
ADD_STA_STATS(sta);
data[i++] = sta->sta_state;
- sinfo.filled = 0;
- sta_set_sinfo(sta, &sinfo);
if (sinfo.filled & STATION_INFO_TX_BITRATE)
data[i] = 100000 *
@@ -2636,7 +2641,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
list_del(&dep->list);
mutex_unlock(&local->mtx);
- ieee80211_roc_notify_destroy(dep);
+ ieee80211_roc_notify_destroy(dep, true);
return 0;
}
@@ -2676,7 +2681,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
ieee80211_start_next_roc(local);
mutex_unlock(&local->mtx);
- ieee80211_roc_notify_destroy(found);
+ ieee80211_roc_notify_destroy(found, true);
} else {
/* work may be pending so use it all the time */
found->abort = true;
@@ -2686,6 +2691,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
/* work will clean up etc */
flush_delayed_work(&found->work);
+ WARN_ON(!found->to_be_freed);
+ kfree(found);
}
return 0;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 78c0d90dd641..931be419ab5a 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -63,6 +63,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
enum ieee80211_chanctx_mode mode)
{
struct ieee80211_chanctx *ctx;
+ u32 changed;
int err;
lockdep_assert_held(&local->chanctx_mtx);
@@ -76,6 +77,13 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
ctx->conf.rx_chains_dynamic = 1;
ctx->mode = mode;
+ /* acquire mutex to prevent idle from changing */
+ mutex_lock(&local->mtx);
+ /* turn idle off *before* setting channel -- some drivers need that */
+ changed = ieee80211_idle_off(local);
+ if (changed)
+ ieee80211_hw_config(local, changed);
+
if (!local->use_chanctx) {
local->_oper_channel_type =
cfg80211_get_chandef_type(chandef);
@@ -85,14 +93,17 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
err = drv_add_chanctx(local, ctx);
if (err) {
kfree(ctx);
- return ERR_PTR(err);
+ ctx = ERR_PTR(err);
+
+ ieee80211_recalc_idle(local);
+ goto out;
}
}
+ /* and keep the mutex held until the new chanctx is on the list */
list_add_rcu(&ctx->list, &local->chanctx_list);
- mutex_lock(&local->mtx);
- ieee80211_recalc_idle(local);
+ out:
mutex_unlock(&local->mtx);
return ctx;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index c3a3082b72e5..1521cabad3d6 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
char buf[50];
struct ieee80211_key *key;
- if (!sdata->debugfs.dir)
+ if (!sdata->vif.debugfs_dir)
return;
lockdep_assert_held(&sdata->local->key_mtx);
@@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_unicast_key =
debugfs_create_symlink("default_unicast_key",
- sdata->debugfs.dir, buf);
+ sdata->vif.debugfs_dir, buf);
}
if (sdata->debugfs.default_multicast_key) {
@@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_multicast_key =
debugfs_create_symlink("default_multicast_key",
- sdata->debugfs.dir, buf);
+ sdata->vif.debugfs_dir, buf);
}
}
@@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
char buf[50];
struct ieee80211_key *key;
- if (!sdata->debugfs.dir)
+ if (!sdata->vif.debugfs_dir)
return;
key = key_mtx_dereference(sdata->local,
@@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
sprintf(buf, "../keys/%d", key->debugfs.cnt);
sdata->debugfs.default_mgmt_key =
debugfs_create_symlink("default_mgmt_key",
- sdata->debugfs.dir, buf);
+ sdata->vif.debugfs_dir, buf);
} else
ieee80211_debugfs_key_remove_mgmt_default(sdata);
}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 059bbb82e84f..ddb426867904 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -521,7 +521,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
#endif
#define DEBUGFS_ADD_MODE(name, mode) \
- debugfs_create_file(#name, mode, sdata->debugfs.dir, \
+ debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \
sdata, &name##_ops);
#define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400)
@@ -577,7 +577,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata)
static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
{
struct dentry *dir = debugfs_create_dir("mesh_stats",
- sdata->debugfs.dir);
+ sdata->vif.debugfs_dir);
#define MESHSTATS_ADD(name)\
debugfs_create_file(#name, 0400, dir, sdata, &name##_ops);
@@ -594,7 +594,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
{
struct dentry *dir = debugfs_create_dir("mesh_config",
- sdata->debugfs.dir);
+ sdata->vif.debugfs_dir);
#define MESHPARAMS_ADD(name) \
debugfs_create_file(#name, 0600, dir, sdata, &name##_ops);
@@ -631,7 +631,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
static void add_files(struct ieee80211_sub_if_data *sdata)
{
- if (!sdata->debugfs.dir)
+ if (!sdata->vif.debugfs_dir)
return;
DEBUGFS_ADD(flags);
@@ -673,21 +673,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
char buf[10+IFNAMSIZ];
sprintf(buf, "netdev:%s", sdata->name);
- sdata->debugfs.dir = debugfs_create_dir(buf,
+ sdata->vif.debugfs_dir = debugfs_create_dir(buf,
sdata->local->hw.wiphy->debugfsdir);
- if (sdata->debugfs.dir)
+ if (sdata->vif.debugfs_dir)
sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
- sdata->debugfs.dir);
+ sdata->vif.debugfs_dir);
add_files(sdata);
}
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
{
- if (!sdata->debugfs.dir)
+ if (!sdata->vif.debugfs_dir)
return;
- debugfs_remove_recursive(sdata->debugfs.dir);
- sdata->debugfs.dir = NULL;
+ debugfs_remove_recursive(sdata->vif.debugfs_dir);
+ sdata->vif.debugfs_dir = NULL;
}
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
@@ -695,7 +695,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
struct dentry *dir;
char buf[10 + IFNAMSIZ];
- dir = sdata->debugfs.dir;
+ dir = sdata->vif.debugfs_dir;
if (!dir)
return;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 832acea4a5cb..169664c122e2 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -241,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
return ret;
}
+static inline void drv_set_multicast_list(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct netdev_hw_addr_list *mc_list)
+{
+ bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
+
+ trace_drv_set_multicast_list(local, sdata, mc_list->count);
+
+ check_sdata_in_driver(sdata);
+
+ if (local->ops->set_multicast_list)
+ local->ops->set_multicast_list(&local->hw, &sdata->vif,
+ allmulti, mc_list);
+ trace_drv_return_void(local);
+}
+
static inline void drv_configure_filter(struct ieee80211_local *local,
unsigned int changed_flags,
unsigned int *total_flags,
@@ -531,43 +547,6 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local,
local->ops->sta_remove_debugfs(&local->hw, &sdata->vif,
sta, dir);
}
-
-static inline
-void drv_add_interface_debugfs(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
-{
- might_sleep();
-
- check_sdata_in_driver(sdata);
-
- if (!local->ops->add_interface_debugfs)
- return;
-
- local->ops->add_interface_debugfs(&local->hw, &sdata->vif,
- sdata->debugfs.dir);
-}
-
-static inline
-void drv_remove_interface_debugfs(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
-{
- might_sleep();
-
- check_sdata_in_driver(sdata);
-
- if (!local->ops->remove_interface_debugfs)
- return;
-
- local->ops->remove_interface_debugfs(&local->hw, &sdata->vif,
- sdata->debugfs.dir);
-}
-#else
-static inline
-void drv_add_interface_debugfs(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata) {}
-static inline
-void drv_remove_interface_debugfs(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata) {}
#endif
static inline __must_check
@@ -741,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
local->ops->rfkill_poll(&local->hw);
}
-static inline void drv_flush(struct ieee80211_local *local, bool drop)
+static inline void drv_flush(struct ieee80211_local *local,
+ u32 queues, bool drop)
{
might_sleep();
- trace_drv_flush(local, drop);
+ trace_drv_flush(local, queues, drop);
if (local->ops->flush)
- local->ops->flush(&local->hw, drop);
+ local->ops->flush(&local->hw, queues, drop);
trace_drv_return_void(local);
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f4433f081e77..0b09716d22ad 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -309,6 +309,7 @@ struct ieee80211_roc_work {
struct ieee80211_channel *chan;
bool started, abort, hw_begun, notified;
+ bool to_be_freed;
unsigned long hw_start_time;
@@ -758,7 +759,6 @@ struct ieee80211_sub_if_data {
#ifdef CONFIG_MAC80211_DEBUGFS
struct {
- struct dentry *dir;
struct dentry *subdir_stations;
struct dentry *default_unicast_key;
struct dentry *default_multicast_key;
@@ -800,11 +800,6 @@ enum sdata_queue_type {
enum {
IEEE80211_RX_MSG = 1,
IEEE80211_TX_STATUS_MSG = 2,
- IEEE80211_EOSP_MSG = 3,
-};
-
-struct skb_eosp_msg_data {
- u8 sta[ETH_ALEN], iface[ETH_ALEN];
};
enum queue_stop_reason {
@@ -815,6 +810,7 @@ enum queue_stop_reason {
IEEE80211_QUEUE_STOP_REASON_SUSPEND,
IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
+ IEEE80211_QUEUE_STOP_REASON_FLUSH,
};
#ifdef CONFIG_MAC80211_LEDS
@@ -1335,7 +1331,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local);
void ieee80211_roc_setup(struct ieee80211_local *local);
void ieee80211_start_next_roc(struct ieee80211_local *local);
void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata);
-void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc);
+void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);
void ieee80211_sw_roc_work(struct work_struct *work);
void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
@@ -1349,6 +1345,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
enum nl80211_iftype type);
void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata);
void ieee80211_remove_interfaces(struct ieee80211_local *local);
+u32 ieee80211_idle_off(struct ieee80211_local *local);
void ieee80211_recalc_idle(struct ieee80211_local *local);
void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
const int offset);
@@ -1528,8 +1525,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr, bool ack);
void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
+ unsigned long queues,
enum queue_stop_reason reason);
void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
+ unsigned long queues,
enum queue_stop_reason reason);
void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
enum queue_stop_reason reason);
@@ -1546,6 +1545,8 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,
{
ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);
}
+void ieee80211_flush_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d85282f64405..69aaba79a9f7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -78,7 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
}
-static u32 ieee80211_idle_off(struct ieee80211_local *local)
+u32 ieee80211_idle_off(struct ieee80211_local *local)
{
if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE))
return 0;
@@ -92,7 +92,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local)
if (local->hw.conf.flags & IEEE80211_CONF_IDLE)
return 0;
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
local->hw.conf.flags |= IEEE80211_CONF_IDLE;
return IEEE80211_CONF_CHANGE_IDLE;
@@ -349,21 +349,19 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
- int ret = 0;
+ int ret;
if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF))
return 0;
- mutex_lock(&local->iflist_mtx);
+ ASSERT_RTNL();
if (local->monitor_sdata)
- goto out_unlock;
+ return 0;
sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, GFP_KERNEL);
- if (!sdata) {
- ret = -ENOMEM;
- goto out_unlock;
- }
+ if (!sdata)
+ return -ENOMEM;
/* set up data */
sdata->local = local;
@@ -377,13 +375,13 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
if (WARN_ON(ret)) {
/* ok .. stupid driver, it asked for this! */
kfree(sdata);
- goto out_unlock;
+ return ret;
}
ret = ieee80211_check_queues(sdata);
if (ret) {
kfree(sdata);
- goto out_unlock;
+ return ret;
}
ret = ieee80211_vif_use_channel(sdata, &local->monitor_chandef,
@@ -391,13 +389,14 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
if (ret) {
drv_remove_interface(local, sdata);
kfree(sdata);
- goto out_unlock;
+ return ret;
}
+ mutex_lock(&local->iflist_mtx);
rcu_assign_pointer(local->monitor_sdata, sdata);
- out_unlock:
mutex_unlock(&local->iflist_mtx);
- return ret;
+
+ return 0;
}
static void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
@@ -407,14 +406,20 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
if (!(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF))
return;
+ ASSERT_RTNL();
+
mutex_lock(&local->iflist_mtx);
sdata = rcu_dereference_protected(local->monitor_sdata,
lockdep_is_held(&local->iflist_mtx));
- if (!sdata)
- goto out_unlock;
+ if (!sdata) {
+ mutex_unlock(&local->iflist_mtx);
+ return;
+ }
rcu_assign_pointer(local->monitor_sdata, NULL);
+ mutex_unlock(&local->iflist_mtx);
+
synchronize_net();
ieee80211_vif_release_channel(sdata);
@@ -422,8 +427,6 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
drv_remove_interface(local, sdata);
kfree(sdata);
- out_unlock:
- mutex_unlock(&local->iflist_mtx);
}
/*
@@ -557,8 +560,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
goto err_del_interface;
}
- drv_add_interface_debugfs(local, sdata);
-
if (sdata->vif.type == NL80211_IFTYPE_AP) {
local->fif_pspoll++;
local->fif_probe_req++;
@@ -846,8 +847,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_AP:
skb_queue_purge(&sdata->skb_queue);
- drv_remove_interface_debugfs(local, sdata);
-
if (going_down)
drv_remove_interface(local, sdata);
}
@@ -919,6 +918,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
atomic_dec(&local->iff_promiscs);
sdata->flags ^= IEEE80211_SDATA_PROMISC;
}
+
+ /*
+ * TODO: If somebody needs this on AP interfaces,
+ * it can be enabled easily but multicast
+ * addresses from VLANs need to be synced.
+ */
+ if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
+ sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+ sdata->vif.type != NL80211_IFTYPE_AP)
+ drv_set_multicast_list(local, sdata, &dev->mc);
+
spin_lock_bh(&local->filter_lock);
__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
spin_unlock_bh(&local->filter_lock);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 99e9f6ae6a54..67059b88fea5 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -248,11 +248,11 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
}
-static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
- bool pairwise,
- struct ieee80211_key *old,
- struct ieee80211_key *new)
+static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ bool pairwise,
+ struct ieee80211_key *old,
+ struct ieee80211_key *new)
{
int idx;
bool defunikey, defmultikey, defmgmtkey;
@@ -397,25 +397,21 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
return key;
}
+static void ieee80211_key_free_common(struct ieee80211_key *key)
+{
+ if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
+ ieee80211_aes_key_free(key->u.ccmp.tfm);
+ if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+ ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
+ kfree(key);
+}
+
static void __ieee80211_key_destroy(struct ieee80211_key *key,
bool delay_tailroom)
{
- if (!key)
- return;
-
- /*
- * Synchronize so the TX path can no longer be using
- * this key before we free/remove it.
- */
- synchronize_net();
-
if (key->local)
ieee80211_key_disable_hw_accel(key);
- if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
- ieee80211_aes_key_free(key->u.ccmp.tfm);
- if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
- ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
if (key->local) {
struct ieee80211_sub_if_data *sdata = key->sdata;
@@ -431,7 +427,28 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key,
}
}
- kfree(key);
+ ieee80211_key_free_common(key);
+}
+
+static void ieee80211_key_destroy(struct ieee80211_key *key,
+ bool delay_tailroom)
+{
+ if (!key)
+ return;
+
+ /*
+ * Synchronize so the TX path can no longer be using
+ * this key before we free/remove it.
+ */
+ synchronize_net();
+
+ __ieee80211_key_destroy(key, delay_tailroom);
+}
+
+void ieee80211_key_free_unused(struct ieee80211_key *key)
+{
+ WARN_ON(key->sdata || key->local);
+ ieee80211_key_free_common(key);
}
int ieee80211_key_link(struct ieee80211_key *key,
@@ -462,19 +479,22 @@ int ieee80211_key_link(struct ieee80211_key *key,
increment_tailroom_need_count(sdata);
- __ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
- __ieee80211_key_destroy(old_key, true);
+ ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
+ ieee80211_key_destroy(old_key, true);
ieee80211_debugfs_key_add(key);
ret = ieee80211_key_enable_hw_accel(key);
+ if (ret)
+ ieee80211_key_free(key, true);
+
mutex_unlock(&sdata->local->key_mtx);
return ret;
}
-void __ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
+void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
{
if (!key)
return;
@@ -483,18 +503,10 @@ void __ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
* Replace key with nothingness if it was ever used.
*/
if (key->sdata)
- __ieee80211_key_replace(key->sdata, key->sta,
+ ieee80211_key_replace(key->sdata, key->sta,
key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
key, NULL);
- __ieee80211_key_destroy(key, delay_tailroom);
-}
-
-void ieee80211_key_free(struct ieee80211_local *local,
- struct ieee80211_key *key)
-{
- mutex_lock(&local->key_mtx);
- __ieee80211_key_free(key, true);
- mutex_unlock(&local->key_mtx);
+ ieee80211_key_destroy(key, delay_tailroom);
}
void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
@@ -554,6 +566,7 @@ EXPORT_SYMBOL(ieee80211_iter_keys);
void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_key *key, *tmp;
+ LIST_HEAD(keys);
cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk);
@@ -565,17 +578,65 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
ieee80211_debugfs_key_remove_mgmt_default(sdata);
- list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
- __ieee80211_key_free(key, false);
+ list_for_each_entry_safe(key, tmp, &sdata->key_list, list) {
+ ieee80211_key_replace(key->sdata, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
+ list_add_tail(&key->list, &keys);
+ }
ieee80211_debugfs_key_update_default(sdata);
+ if (!list_empty(&keys)) {
+ synchronize_net();
+ list_for_each_entry_safe(key, tmp, &keys, list)
+ __ieee80211_key_destroy(key, false);
+ }
+
WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
sdata->crypto_tx_tailroom_pending_dec);
mutex_unlock(&sdata->local->key_mtx);
}
+void ieee80211_free_sta_keys(struct ieee80211_local *local,
+ struct sta_info *sta)
+{
+ struct ieee80211_key *key, *tmp;
+ LIST_HEAD(keys);
+ int i;
+
+ mutex_lock(&local->key_mtx);
+ for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ key = key_mtx_dereference(local, sta->gtk[i]);
+ if (!key)
+ continue;
+ ieee80211_key_replace(key->sdata, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
+ list_add(&key->list, &keys);
+ }
+
+ key = key_mtx_dereference(local, sta->ptk);
+ if (key) {
+ ieee80211_key_replace(key->sdata, key->sta,
+ key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
+ key, NULL);
+ list_add(&key->list, &keys);
+ }
+
+ /*
+ * NB: the station code relies on this being
+ * done even if there aren't any keys
+ */
+ synchronize_net();
+
+ list_for_each_entry_safe(key, tmp, &keys, list)
+ __ieee80211_key_destroy(key, true);
+
+ mutex_unlock(&local->key_mtx);
+}
+
void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
{
struct ieee80211_sub_if_data *sdata;
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 2a682d81cee9..e8de3e6d7804 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -129,19 +129,20 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
size_t seq_len, const u8 *seq);
/*
* Insert a key into data structures (sdata, sta if necessary)
- * to make it used, free old key.
+ * to make it used, free old key. On failure, also free the new key.
*/
-int __must_check ieee80211_key_link(struct ieee80211_key *key,
- struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta);
-void __ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom);
-void ieee80211_key_free(struct ieee80211_local *local,
- struct ieee80211_key *key);
+int ieee80211_key_link(struct ieee80211_key *key,
+ struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta);
+void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom);
+void ieee80211_key_free_unused(struct ieee80211_key *key);
void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
bool uni, bool multi);
void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
int idx);
void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
+void ieee80211_free_sta_keys(struct ieee80211_local *local,
+ struct sta_info *sta);
void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
#define key_mtx_dereference(local, ref) \
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5a53aa5ede80..c6f81ecc36a1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -100,7 +100,6 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
int power;
enum nl80211_channel_type channel_type;
u32 offchannel_flag;
- bool scanning = false;
offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
if (local->scan_channel) {
@@ -147,9 +146,6 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
changed |= IEEE80211_CONF_CHANGE_SMPS;
}
- scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
- test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) ||
- test_bit(SCAN_HW_SCANNING, &local->scanning);
power = chan->max_power;
rcu_read_lock();
@@ -226,8 +222,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
static void ieee80211_tasklet_handler(unsigned long data)
{
struct ieee80211_local *local = (struct ieee80211_local *) data;
- struct sta_info *sta, *tmp;
- struct skb_eosp_msg_data *eosp_data;
struct sk_buff *skb;
while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -243,18 +237,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
skb->pkt_type = 0;
ieee80211_tx_status(&local->hw, skb);
break;
- case IEEE80211_EOSP_MSG:
- eosp_data = (void *)skb->cb;
- for_each_sta_info(local, eosp_data->sta, sta, tmp) {
- /* skip wrong virtual interface */
- if (memcmp(eosp_data->iface,
- sta->sdata->vif.addr, ETH_ALEN))
- continue;
- clear_sta_flag(sta, WLAN_STA_SP);
- break;
- }
- dev_kfree_skb(skb);
- break;
default:
WARN(1, "mac80211: Packet is of unknown type %d\n",
skb->pkt_type);
@@ -295,8 +277,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
"Hardware restart was requested\n");
/* use this reason, ieee80211_reconfig will unblock it */
- ieee80211_stop_queues_by_reason(hw,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND);
/*
* Stop all Rx during the reconfig. We don't want state changes
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5ac017f3fcd2..123a300cef57 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -699,10 +699,8 @@ out_free:
static int
ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh)
{
- struct ieee80211_sub_if_data *sdata;
struct beacon_data *old_bcn;
int ret;
- sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
mutex_lock(&ifmsh->mtx);
@@ -833,9 +831,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *hdr;
struct ieee802_11_elems elems;
size_t baselen;
- u8 *pos, *end;
+ u8 *pos;
- end = ((u8 *) mgmt) + len;
pos = mgmt->u.probe_req.variable;
baselen = (u8 *) pos - (u8 *) mgmt;
if (baselen > len)
@@ -1007,7 +1004,8 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list)
- if (ieee80211_vif_is_mesh(&sdata->vif))
+ if (ieee80211_vif_is_mesh(&sdata->vif) &&
+ ieee80211_sdata_running(sdata))
ieee80211_queue_work(&local->hw, &sdata->work);
rcu_read_unlock();
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index fdc06e381c10..e06dbbf8cb4c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1009,6 +1009,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
/* XXX: wait for a beacon first? */
ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
out:
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
@@ -1108,6 +1109,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
if (sw_elem->mode)
ieee80211_stop_queues_by_reason(&sdata->local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
if (sdata->local->ops->channel_switch) {
@@ -1375,6 +1377,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
}
ieee80211_wake_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_PS);
}
@@ -1436,7 +1439,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
else {
ieee80211_send_nullfunc(local, sdata, 1);
/* Flush to get the tx status of nullfunc frame */
- drv_flush(local, false);
+ ieee80211_flush_queues(local, sdata);
}
}
@@ -1767,7 +1770,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* flush out any pending frame (e.g. DELBA) before deauth/disassoc */
if (tx)
- drv_flush(local, false);
+ ieee80211_flush_queues(local, sdata);
/* deauthenticate/disassociate now */
if (tx || frame_buf)
@@ -1776,7 +1779,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* flush out frame */
if (tx)
- drv_flush(local, false);
+ ieee80211_flush_queues(local, sdata);
/* clear bssid only after building the needed mgmt frames */
memset(ifmgd->bssid, 0, ETH_ALEN);
@@ -1948,7 +1951,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
run_again(ifmgd, ifmgd->probe_timeout);
if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
- drv_flush(sdata->local, false);
+ ieee80211_flush_queues(sdata->local, sdata);
}
static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
@@ -2071,6 +2074,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
true, frame_buf);
ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_CSA);
mutex_unlock(&ifmgd->mtx);
@@ -3527,8 +3531,10 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
/* Restart STA timers */
rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list)
- ieee80211_restart_sta_timer(sdata);
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_restart_sta_timer(sdata);
+ }
rcu_read_unlock();
}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index db547fceaeb9..cce795871ab1 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -118,9 +118,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
* Stop queues and transmit all frames queued by the driver
* before sending nullfunc to enable powersave at the AP.
*/
- ieee80211_stop_queues_by_reason(&local->hw,
+ ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
@@ -181,7 +181,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
}
mutex_unlock(&local->iflist_mtx);
- ieee80211_wake_queues_by_reason(&local->hw,
+ ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
}
@@ -297,10 +297,13 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
}
}
-void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc)
+void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free)
{
struct ieee80211_roc_work *dep, *tmp;
+ if (WARN_ON(roc->to_be_freed))
+ return;
+
/* was never transmitted */
if (roc->frame) {
cfg80211_mgmt_tx_status(&roc->sdata->wdev,
@@ -316,9 +319,12 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc)
GFP_KERNEL);
list_for_each_entry_safe(dep, tmp, &roc->dependents, list)
- ieee80211_roc_notify_destroy(dep);
+ ieee80211_roc_notify_destroy(dep, true);
- kfree(roc);
+ if (free)
+ kfree(roc);
+ else
+ roc->to_be_freed = true;
}
void ieee80211_sw_roc_work(struct work_struct *work)
@@ -331,6 +337,9 @@ void ieee80211_sw_roc_work(struct work_struct *work)
mutex_lock(&local->mtx);
+ if (roc->to_be_freed)
+ goto out_unlock;
+
if (roc->abort)
goto finish;
@@ -370,10 +379,10 @@ void ieee80211_sw_roc_work(struct work_struct *work)
finish:
list_del(&roc->list);
started = roc->started;
- ieee80211_roc_notify_destroy(roc);
+ ieee80211_roc_notify_destroy(roc, !roc->abort);
if (started) {
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
local->tmp_channel = NULL;
ieee80211_hw_config(local, 0);
@@ -410,7 +419,7 @@ static void ieee80211_hw_roc_done(struct work_struct *work)
list_del(&roc->list);
- ieee80211_roc_notify_destroy(roc);
+ ieee80211_roc_notify_destroy(roc, true);
/* if there's another roc, start it now */
ieee80211_start_next_roc(local);
@@ -460,12 +469,14 @@ void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata)
list_for_each_entry_safe(roc, tmp, &tmp_list, list) {
if (local->ops->remain_on_channel) {
list_del(&roc->list);
- ieee80211_roc_notify_destroy(roc);
+ ieee80211_roc_notify_destroy(roc, true);
} else {
ieee80211_queue_delayed_work(&local->hw, &roc->work, 0);
/* work will clean up etc */
flush_delayed_work(&roc->work);
+ WARN_ON(!roc->to_be_freed);
+ kfree(roc);
}
}
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index b471a67f224d..3d16f4e61743 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -30,12 +30,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
}
ieee80211_stop_queues_by_reason(hw,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND);
/* flush out all packets */
synchronize_net();
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
local->quiescing = true;
/* make quiescing visible to timers everywhere */
@@ -68,6 +69,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
mutex_unlock(&local->sta_mtx);
}
ieee80211_wake_queues_by_reason(hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_SUSPEND);
return err;
} else if (err > 0) {
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 749552bdcfe1..d2b264d1311d 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -202,14 +202,23 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
struct minstrel_rate_stats *mr;
unsigned int nsecs = 0;
unsigned int tp;
+ unsigned int prob;
mr = &mi->groups[group].rates[rate];
+ prob = mr->probability;
- if (mr->probability < MINSTREL_FRAC(1, 10)) {
+ if (prob < MINSTREL_FRAC(1, 10)) {
mr->cur_tp = 0;
return;
}
+ /*
+ * For the throughput calculation, limit the probability value to 90% to
+ * account for collision related packet error rate fluctuation
+ */
+ if (prob > MINSTREL_FRAC(9, 10))
+ prob = MINSTREL_FRAC(9, 10);
+
if (group != MINSTREL_CCK_GROUP)
nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
@@ -639,15 +648,18 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
/*
* Sampling might add some overhead (RTS, no aggregation)
* to the frame. Hence, don't use sampling for the currently
- * used max TP rate.
+ * used rates.
*/
- if (sample_idx == mi->max_tp_rate)
+ if (sample_idx == mi->max_tp_rate ||
+ sample_idx == mi->max_tp_rate2 ||
+ sample_idx == mi->max_prob_rate)
return -1;
+
/*
- * When not using MRR, do not sample if the probability is already
- * higher than 95% to avoid wasting airtime
+ * Do not sample if the probability is already higher than 95%
+ * to avoid wasting airtime.
*/
- if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100)))
+ if (mr->probability > MINSTREL_FRAC(95, 100))
return -1;
/*
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5b4492af4e85..2528b5a4d6d4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2666,7 +2666,19 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
memset(nskb->cb, 0, sizeof(nskb->cb));
- ieee80211_tx_skb(rx->sdata, nskb);
+ if (rx->sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) {
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(nskb);
+
+ info->flags = IEEE80211_TX_CTL_TX_OFFCHAN |
+ IEEE80211_TX_INTFL_OFFCHAN_TX_OK |
+ IEEE80211_TX_CTL_NO_CCK_RATE;
+ if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)
+ info->hw_queue =
+ local->hw.offchannel_tx_hw_queue;
+ }
+
+ __ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7,
+ status->band);
}
dev_kfree_skb(rx->skb);
return RX_QUEUED;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 43a45cf00e06..cb34cbbaa20c 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -153,7 +153,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
u8 *elements;
struct ieee80211_channel *channel;
size_t baselen;
- bool beacon;
struct ieee802_11_elems elems;
if (skb->len < 24 ||
@@ -175,11 +174,9 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
elements = mgmt->u.probe_resp.variable;
baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable);
- beacon = false;
} else {
baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);
elements = mgmt->u.beacon.variable;
- beacon = true;
}
if (baselen > skb->len)
@@ -335,7 +332,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
ieee80211_offchannel_stop_vifs(local);
/* ensure nullfunc is transmitted before leaving operating channel */
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
ieee80211_configure_filter(local);
@@ -671,7 +668,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local,
ieee80211_offchannel_stop_vifs(local);
if (local->ops->flush) {
- drv_flush(local, false);
+ ieee80211_flush_queues(local, NULL);
*next_delay = 0;
} else
*next_delay = HZ / 10;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 3644ad79688a..11216bc13b27 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -556,6 +556,15 @@ static inline void __bss_tim_clear(u8 *tim, u16 id)
tim[id / 8] &= ~(1 << (id % 8));
}
+static inline bool __bss_tim_get(u8 *tim, u16 id)
+{
+ /*
+ * This format has been mandated by the IEEE specifications,
+ * so this line may not be changed to use the test_bit() format.
+ */
+ return tim[id / 8] & (1 << (id % 8));
+}
+
static unsigned long ieee80211_tids_for_ac(int ac)
{
/* If we ever support TIDs > 7, this obviously needs to be adjusted */
@@ -636,6 +645,9 @@ void sta_info_recalc_tim(struct sta_info *sta)
done:
spin_lock_bh(&local->tim_lock);
+ if (indicate_tim == __bss_tim_get(ps->tim, id))
+ goto out_unlock;
+
if (indicate_tim)
__bss_tim_set(ps->tim, id);
else
@@ -647,6 +659,7 @@ void sta_info_recalc_tim(struct sta_info *sta)
local->tim_in_locked_section = false;
}
+out_unlock:
spin_unlock_bh(&local->tim_lock);
}
@@ -770,7 +783,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
{
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
- int ret, i;
+ int ret;
might_sleep();
@@ -797,14 +810,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
list_del_rcu(&sta->list);
- mutex_lock(&local->key_mtx);
- for (i = 0; i < NUM_DEFAULT_KEYS; i++)
- __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i]),
- true);
- if (sta->ptk)
- __ieee80211_key_free(key_mtx_dereference(local, sta->ptk),
- true);
- mutex_unlock(&local->key_mtx);
+ /* this always calls synchronize_net() */
+ ieee80211_free_sta_keys(local, sta);
sta->dead = true;
@@ -1390,30 +1397,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_sta_block_awake);
-void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta)
+void ieee80211_sta_eosp(struct ieee80211_sta *pubsta)
{
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
struct ieee80211_local *local = sta->local;
- struct sk_buff *skb;
- struct skb_eosp_msg_data *data;
trace_api_eosp(local, pubsta);
- skb = alloc_skb(0, GFP_ATOMIC);
- if (!skb) {
- /* too bad ... but race is better than loss */
- clear_sta_flag(sta, WLAN_STA_SP);
- return;
- }
-
- data = (void *)skb->cb;
- memcpy(data->sta, pubsta->addr, ETH_ALEN);
- memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN);
- skb->pkt_type = IEEE80211_EOSP_MSG;
- skb_queue_tail(&local->skb_queue, skb);
- tasklet_schedule(&local->tasklet);
+ clear_sta_flag(sta, WLAN_STA_SP);
}
-EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe);
+EXPORT_SYMBOL(ieee80211_sta_eosp);
void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
u8 tid, bool buffered)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e5868c32d1a3..adc30045f99e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -333,7 +333,8 @@ struct sta_info {
unsigned long driver_buffered_tids;
/* Updated from RX path only, no locking requirements */
- unsigned long rx_packets, rx_bytes;
+ unsigned long rx_packets;
+ u64 rx_bytes;
unsigned long wep_weak_iv_count;
unsigned long last_rx;
long last_connected;
@@ -353,9 +354,9 @@ struct sta_info {
unsigned int fail_avg;
/* Updated from TX path only, no locking requirements */
- unsigned long tx_packets;
- unsigned long tx_bytes;
- unsigned long tx_fragments;
+ u32 tx_fragments;
+ u64 tx_packets[IEEE80211_NUM_ACS];
+ u64 tx_bytes[IEEE80211_NUM_ACS];
struct ieee80211_tx_rate last_tx_rate;
int last_rx_rate_idx;
u32 last_rx_rate_flag;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index e7db2b804e0c..c5899797a8d4 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -431,6 +431,30 @@ TRACE_EVENT(drv_prepare_multicast,
)
);
+TRACE_EVENT(drv_set_multicast_list,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata, int mc_count),
+
+ TP_ARGS(local, sdata, mc_count),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(bool, allmulti)
+ __field(int, mc_count)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
+ __entry->mc_count = mc_count;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d",
+ LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti
+ )
+);
+
TRACE_EVENT(drv_configure_filter,
TP_PROTO(struct ieee80211_local *local,
unsigned int changed_flags,
@@ -940,23 +964,26 @@ TRACE_EVENT(drv_get_survey,
);
TRACE_EVENT(drv_flush,
- TP_PROTO(struct ieee80211_local *local, bool drop),
+ TP_PROTO(struct ieee80211_local *local,
+ u32 queues, bool drop),
- TP_ARGS(local, drop),
+ TP_ARGS(local, queues, drop),
TP_STRUCT__entry(
LOCAL_ENTRY
__field(bool, drop)
+ __field(u32, queues)
),
TP_fast_assign(
LOCAL_ASSIGN;
__entry->drop = drop;
+ __entry->queues = queues;
),
TP_printk(
- LOCAL_PR_FMT " drop:%d",
- LOCAL_PR_ARG, __entry->drop
+ LOCAL_PR_FMT " queues:0x%x drop:%d",
+ LOCAL_PR_ARG, __entry->queues, __entry->drop
)
);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4e8a86163fc7..9e67cc97b87b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
ieee80211_stop_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_PS);
ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
ieee80211_queue_work(&local->hw,
@@ -991,15 +992,18 @@ static ieee80211_tx_result debug_noinline
ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
{
struct sk_buff *skb;
+ int ac = -1;
if (!tx->sta)
return TX_CONTINUE;
- tx->sta->tx_packets++;
skb_queue_walk(&tx->skbs, skb) {
+ ac = skb_get_queue_mapping(skb);
tx->sta->tx_fragments++;
- tx->sta->tx_bytes += skb->len;
+ tx->sta->tx_bytes[ac] += skb->len;
}
+ if (ac >= 0)
+ tx->sta->tx_packets[ac]++;
return TX_CONTINUE;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b7a856e3281b..a7368870c8ee 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
}
void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
- enum queue_stop_reason reason)
+ unsigned long queues,
+ enum queue_stop_reason reason)
{
struct ieee80211_local *local = hw_to_local(hw);
unsigned long flags;
@@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- for (i = 0; i < hw->queues; i++)
+ for_each_set_bit(i, &queues, hw->queues)
__ieee80211_stop_queue(hw, i, reason);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
void ieee80211_stop_queues(struct ieee80211_hw *hw)
{
- ieee80211_stop_queues_by_reason(hw,
+ ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_DRIVER);
}
EXPORT_SYMBOL(ieee80211_stop_queues);
@@ -491,6 +492,7 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
EXPORT_SYMBOL(ieee80211_queue_stopped);
void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
+ unsigned long queues,
enum queue_stop_reason reason)
{
struct ieee80211_local *local = hw_to_local(hw);
@@ -499,7 +501,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- for (i = 0; i < hw->queues; i++)
+ for_each_set_bit(i, &queues, hw->queues)
__ieee80211_wake_queue(hw, i, reason);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -507,10 +509,42 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
void ieee80211_wake_queues(struct ieee80211_hw *hw)
{
- ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER);
+ ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_DRIVER);
}
EXPORT_SYMBOL(ieee80211_wake_queues);
+void ieee80211_flush_queues(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ u32 queues;
+
+ if (!local->ops->flush)
+ return;
+
+ if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
+ int ac;
+
+ queues = 0;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ queues |= BIT(sdata->vif.hw_queue[ac]);
+ if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE)
+ queues |= BIT(sdata->vif.cab_queue);
+ } else {
+ /* all queues */
+ queues = BIT(local->hw.queues) - 1;
+ }
+
+ ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_FLUSH);
+
+ drv_flush(local, queues, false);
+
+ ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_FLUSH);
+}
+
void ieee80211_iterate_active_interfaces(
struct ieee80211_hw *hw, u32 iter_flags,
void (*iterator)(void *data, u8 *mac,
@@ -1651,8 +1685,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mutex_unlock(&local->sta_mtx);
}
- ieee80211_wake_queues_by_reason(hw,
- IEEE80211_QUEUE_STOP_REASON_SUSPEND);
+ ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_SUSPEND);
/*
* If this is for hw restart things are still running.
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index 21fa386f4675..5c9e021994ba 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -88,8 +88,6 @@ struct mac802154_sub_if_data {
#define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw)
-#define MAC802154_MAX_XMIT_ATTEMPTS 3
-
#define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */
extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced;
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index f03e55f2ebf0..8ded97cf1c33 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -176,9 +176,15 @@ static void phy_chan_notify(struct work_struct *work)
struct mac802154_sub_if_data *priv = netdev_priv(nw->dev);
int res;
+ mutex_lock(&priv->hw->phy->pib_lock);
res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);
if (res)
pr_debug("set_channel failed\n");
+ else {
+ priv->hw->phy->current_channel = priv->chan;
+ priv->hw->phy->current_page = priv->page;
+ }
+ mutex_unlock(&priv->hw->phy->pib_lock);
kfree(nw);
}
@@ -195,8 +201,11 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
priv->chan = chan;
spin_unlock_bh(&priv->mib_lock);
+ mutex_lock(&priv->hw->phy->pib_lock);
if (priv->hw->phy->current_channel != priv->chan ||
priv->hw->phy->current_page != priv->page) {
+ mutex_unlock(&priv->hw->phy->pib_lock);
+
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return;
@@ -204,5 +213,6 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
INIT_WORK(&work->work, phy_chan_notify);
work->dev = dev;
queue_work(priv->hw->dev_workqueue, &work->work);
- }
+ } else
+ mutex_unlock(&priv->hw->phy->pib_lock);
}
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 4e09d070995a..6d1647399d4f 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -25,6 +25,7 @@
#include <linux/if_arp.h>
#include <linux/crc-ccitt.h>
+#include <net/ieee802154_netdev.h>
#include <net/mac802154.h>
#include <net/wpan-phy.h>
@@ -39,12 +40,12 @@ struct xmit_work {
struct mac802154_priv *priv;
u8 chan;
u8 page;
- u8 xmit_attempts;
};
static void mac802154_xmit_worker(struct work_struct *work)
{
struct xmit_work *xw = container_of(work, struct xmit_work, work);
+ struct mac802154_sub_if_data *sdata;
int res;
mutex_lock(&xw->priv->phy->pib_lock);
@@ -57,21 +58,23 @@ static void mac802154_xmit_worker(struct work_struct *work)
pr_debug("set_channel failed\n");
goto out;
}
+
+ xw->priv->phy->current_channel = xw->chan;
+ xw->priv->phy->current_page = xw->page;
}
res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb);
+ if (res)
+ pr_debug("transmission failed\n");
out:
mutex_unlock(&xw->priv->phy->pib_lock);
- if (res) {
- if (xw->xmit_attempts++ < MAC802154_MAX_XMIT_ATTEMPTS) {
- queue_work(xw->priv->dev_workqueue, &xw->work);
- return;
- } else
- pr_debug("transmission failed for %d times",
- MAC802154_MAX_XMIT_ATTEMPTS);
- }
+ /* Restart the netif queue on each sub_if_data object. */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &xw->priv->slaves, list)
+ netif_wake_queue(sdata->dev);
+ rcu_read_unlock();
dev_kfree_skb(xw->skb);
@@ -82,6 +85,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
u8 page, u8 chan)
{
struct xmit_work *work;
+ struct mac802154_sub_if_data *sdata;
if (!(priv->phy->channels_supported[page] & (1 << chan))) {
WARN_ON(1);
@@ -109,12 +113,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
+ /* Stop the netif queue on each sub_if_data object. */
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &priv->slaves, list)
+ netif_stop_queue(sdata->dev);
+ rcu_read_unlock();
+
INIT_WORK(&work->work, mac802154_xmit_worker);
work->skb = skb;
work->priv = priv;
work->page = page;
work->chan = chan;
- work->xmit_attempts = 0;
queue_work(priv->dev_workqueue, &work->work);
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 7d3f6594ed4f..2ca2f4dceab7 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -360,7 +360,7 @@ void mac802154_wpan_setup(struct net_device *dev)
dev->header_ops = &mac802154_header_ops;
dev->needed_tailroom = 2; /* FCS */
dev->mtu = IEEE802154_MTU;
- dev->tx_queue_len = 10;
+ dev->tx_queue_len = 300;
dev->type = ARPHRD_IEEE802154;
dev->flags = IFF_NOARP | IFF_BROADCAST;
dev->watchdog_timeo = 0;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a9c488b6c50d..7d97302f7c07 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -276,10 +276,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif
+static int __net_init netfilter_net_init(struct net *net)
+{
#ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_net_netfilter;
-EXPORT_SYMBOL(proc_net_netfilter);
+ net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
+ net->proc_net);
+ if (!net->nf.proc_netfilter) {
+ if (!net_eq(net, &init_net))
+ pr_err("cannot create netfilter proc entry");
+
+ return -ENOMEM;
+ }
#endif
+ return 0;
+}
+
+static void __net_exit netfilter_net_exit(struct net *net)
+{
+ remove_proc_entry("netfilter", net->proc_net);
+}
+
+static struct pernet_operations netfilter_net_ops = {
+ .init = netfilter_net_init,
+ .exit = netfilter_net_exit,
+};
void __init netfilter_init(void)
{
@@ -289,11 +309,8 @@ void __init netfilter_init(void)
INIT_LIST_HEAD(&nf_hooks[i][h]);
}
-#ifdef CONFIG_PROC_FS
- proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
- if (!proc_net_netfilter)
+ if (register_pernet_subsys(&netfilter_net_ops) < 0)
panic("cannot create netfilter proc entry");
-#endif
if (netfilter_log_init() < 0)
panic("cannot initialize nf_log");
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 0b779d7df881..dfd7b65b3d2a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
module_put(app->module);
}
+static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
+{
+ kfree(inc->timeout_table);
+ kfree(inc);
+}
+
+static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
+
+ ip_vs_app_inc_destroy(inc);
+}
/*
* Allocate/initialize app incarnation and register it in proto apps.
@@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
return 0;
out:
- kfree(inc->timeout_table);
- kfree(inc);
+ ip_vs_app_inc_destroy(inc);
return ret;
}
@@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
list_del(&inc->a_list);
- kfree(inc->timeout_table);
- kfree(inc);
+ call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
}
@@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
{
int result;
- atomic_inc(&inc->usecnt);
- if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
- atomic_dec(&inc->usecnt);
+ result = ip_vs_app_get(inc->app);
+ if (result)
+ atomic_inc(&inc->usecnt);
return result;
}
@@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
*/
void ip_vs_app_inc_put(struct ip_vs_app *inc)
{
- ip_vs_app_put(inc->app);
atomic_dec(&inc->usecnt);
+ ip_vs_app_put(inc->app);
}
@@ -218,6 +228,7 @@ out_unlock:
/*
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
+ * Caller should use synchronize_rcu() or rcu_barrier()
*/
void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
@@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
unsigned int flag, __u32 seq, int diff)
{
/* spinlock is to keep updating cp->flags atomic */
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
vseq->previous_delta = vseq->delta;
vseq->delta += diff;
vseq->init_seq = seq;
cp->flags |= flag;
}
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 704e514e02ab..de6475894a39 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
struct ip_vs_aligned_lock
{
- rwlock_t l;
+ spinlock_t l;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
/* lock array for conn table */
static struct ip_vs_aligned_lock
__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
-static inline void ct_read_lock(unsigned int key)
-{
- read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_unlock(unsigned int key)
-{
- read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_lock(unsigned int key)
-{
- write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_write_unlock(unsigned int key)
-{
- write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_lock_bh(unsigned int key)
-{
- read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
-static inline void ct_read_unlock_bh(unsigned int key)
-{
- read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
-}
-
static inline void ct_write_lock_bh(unsigned int key)
{
- write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+ spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
static inline void ct_write_unlock_bh(unsigned int key)
{
- write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
+ spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
}
@@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
/* Hash by protocol, client address and port */
hash = ip_vs_conn_hashkey_conn(cp);
- ct_write_lock(hash);
+ ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
- hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
cp->flags |= IP_VS_CONN_F_HASHED;
atomic_inc(&cp->refcnt);
+ hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
ret = 1;
} else {
pr_err("%s(): request for already hashed, called from %pF\n",
@@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
}
spin_unlock(&cp->lock);
- ct_write_unlock(hash);
+ ct_write_unlock_bh(hash);
return ret;
}
@@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
/*
* UNhashes ip_vs_conn from ip_vs_conn_tab.
- * returns bool success.
+ * returns bool success. Caller should hold conn reference.
*/
static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
{
@@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
/* unhash it and decrease its reference counter */
hash = ip_vs_conn_hashkey_conn(cp);
- ct_write_lock(hash);
+ ct_write_lock_bh(hash);
spin_lock(&cp->lock);
if (cp->flags & IP_VS_CONN_F_HASHED) {
- hlist_del(&cp->c_list);
+ hlist_del_rcu(&cp->c_list);
cp->flags &= ~IP_VS_CONN_F_HASHED;
atomic_dec(&cp->refcnt);
ret = 1;
@@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
ret = 0;
spin_unlock(&cp->lock);
- ct_write_unlock(hash);
+ ct_write_unlock_bh(hash);
+
+ return ret;
+}
+
+/* Try to unlink ip_vs_conn from ip_vs_conn_tab.
+ * returns bool success.
+ */
+static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
+{
+ unsigned int hash;
+ bool ret;
+
+ hash = ip_vs_conn_hashkey_conn(cp);
+
+ ct_write_lock_bh(hash);
+ spin_lock(&cp->lock);
+
+ if (cp->flags & IP_VS_CONN_F_HASHED) {
+ ret = false;
+ /* Decrease refcnt and unlink conn only if we are last user */
+ if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
+ hlist_del_rcu(&cp->c_list);
+ cp->flags &= ~IP_VS_CONN_F_HASHED;
+ ret = true;
+ }
+ } else
+ ret = atomic_read(&cp->refcnt) ? false : true;
+
+ spin_unlock(&cp->lock);
+ ct_write_unlock_bh(hash);
return ret;
}
@@ -262,24 +262,25 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
hash = ip_vs_conn_hashkey_param(p, false);
- ct_read_lock(hash);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (cp->af == p->af &&
- p->cport == cp->cport && p->vport == cp->vport &&
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (p->cport == cp->cport && p->vport == cp->vport &&
+ cp->af == p->af &&
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
p->protocol == cp->protocol &&
ip_vs_conn_net_eq(cp, p->net)) {
+ if (!__ip_vs_conn_get(cp))
+ continue;
/* HIT */
- atomic_inc(&cp->refcnt);
- ct_read_unlock(hash);
+ rcu_read_unlock();
return cp;
}
}
- ct_read_unlock(hash);
+ rcu_read_unlock();
return NULL;
}
@@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
hash = ip_vs_conn_hashkey_param(p, false);
- ct_read_lock(hash);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (!ip_vs_conn_net_eq(cp, p->net))
- continue;
- if (p->pe_data && p->pe->ct_match) {
- if (p->pe == cp->pe && p->pe->ct_match(p, cp))
- goto out;
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (unlikely(p->pe_data && p->pe->ct_match)) {
+ if (!ip_vs_conn_net_eq(cp, p->net))
+ continue;
+ if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
+ if (__ip_vs_conn_get(cp))
+ goto out;
+ }
continue;
}
@@ -363,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
* p->vaddr is a fwmark */
ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
p->af, p->vaddr, &cp->vaddr) &&
- p->cport == cp->cport && p->vport == cp->vport &&
+ p->vport == cp->vport && p->cport == cp->cport &&
cp->flags & IP_VS_CONN_F_TEMPLATE &&
- p->protocol == cp->protocol)
- goto out;
+ p->protocol == cp->protocol &&
+ ip_vs_conn_net_eq(cp, p->net)) {
+ if (__ip_vs_conn_get(cp))
+ goto out;
+ }
}
cp = NULL;
out:
- if (cp)
- atomic_inc(&cp->refcnt);
- ct_read_unlock(hash);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
ip_vs_proto_name(p->protocol),
@@ -398,23 +402,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
*/
hash = ip_vs_conn_hashkey_param(p, true);
- ct_read_lock(hash);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (cp->af == p->af &&
- p->vport == cp->cport && p->cport == cp->dport &&
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (p->vport == cp->cport && p->cport == cp->dport &&
+ cp->af == p->af &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
p->protocol == cp->protocol &&
ip_vs_conn_net_eq(cp, p->net)) {
+ if (!__ip_vs_conn_get(cp))
+ continue;
/* HIT */
- atomic_inc(&cp->refcnt);
ret = cp;
break;
}
}
- ct_read_unlock(hash);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
ip_vs_proto_name(p->protocol),
@@ -457,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
{
if (ip_vs_conn_unhash(cp)) {
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
atomic_dec(&ip_vs_conn_no_cport_cnt);
cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
cp->cport = cport;
}
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
/* hash on new dport */
ip_vs_conn_hash(cp);
@@ -549,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
return;
/* Increase the refcnt counter of the dest */
- atomic_inc(&dest->refcnt);
+ ip_vs_dest_hold(dest);
conn_flags = atomic_read(&dest->conn_flags);
if (cp->protocol != IPPROTO_UDP)
@@ -606,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
* Check if there is a destination for the connection, if so
* bind the connection to the destination.
*/
-struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
+void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
{
struct ip_vs_dest *dest;
+ rcu_read_lock();
dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
struct ip_vs_proto_data *pd;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if (cp->dest) {
- spin_unlock(&cp->lock);
- return dest;
+ spin_unlock_bh(&cp->lock);
+ rcu_read_unlock();
+ return;
}
/* Applications work depending on the forwarding method
@@ -628,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
ip_vs_unbind_app(cp);
ip_vs_bind_dest(cp, dest);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
/* Update its packet transmitter */
cp->packet_xmit = NULL;
@@ -643,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
if (pd && atomic_read(&pd->appcnt))
ip_vs_bind_app(cp, pd->pp);
}
- return dest;
+ rcu_read_unlock();
}
@@ -695,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
}
- /*
- * Simply decrease the refcnt of the dest, because the
- * dest will be either in service's destination list
- * or in the trash.
- */
- atomic_dec(&dest->refcnt);
+ ip_vs_dest_put(dest);
}
static int expire_quiescent_template(struct netns_ipvs *ipvs,
@@ -757,41 +759,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
* Simply decrease the refcnt of the template,
* don't restart its timer.
*/
- atomic_dec(&ct->refcnt);
+ __ip_vs_conn_put(ct);
return 0;
}
return 1;
}
+static void ip_vs_conn_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn,
+ rcu_head);
+
+ ip_vs_pe_put(cp->pe);
+ kfree(cp->pe_data);
+ kmem_cache_free(ip_vs_conn_cachep, cp);
+}
+
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
struct net *net = ip_vs_conn_net(cp);
struct netns_ipvs *ipvs = net_ipvs(net);
- cp->timeout = 60*HZ;
-
- /*
- * hey, I'm using it
- */
- atomic_inc(&cp->refcnt);
-
/*
* do I control anybody?
*/
if (atomic_read(&cp->n_control))
goto expire_later;
- /*
- * unhash it if it is hashed in the conn table
- */
- if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
- goto expire_later;
-
- /*
- * refcnt==1 implies I'm the only one referrer
- */
- if (likely(atomic_read(&cp->refcnt) == 1)) {
+ /* Unlink conn if not referenced anymore */
+ if (likely(ip_vs_conn_unlink(cp))) {
/* delete the timer if it is activated by other users */
del_timer(&cp->timer);
@@ -810,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_conn_drop_conntrack(cp);
}
- ip_vs_pe_put(cp->pe);
- kfree(cp->pe_data);
if (unlikely(cp->app != NULL))
ip_vs_unbind_app(cp);
ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
atomic_dec(&ip_vs_conn_no_cport_cnt);
+ call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
atomic_dec(&ipvs->conn_count);
-
- kmem_cache_free(ip_vs_conn_cachep, cp);
return;
}
- /* hash it back to the table */
- ip_vs_conn_hash(cp);
-
expire_later:
- IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",
- atomic_read(&cp->refcnt)-1,
+ IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
+ atomic_read(&cp->refcnt),
atomic_read(&cp->n_control));
+ atomic_inc(&cp->refcnt);
+ cp->timeout = 60*HZ;
+
if (ipvs->sync_state & IP_VS_STATE_MASTER)
ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
ip_vs_conn_put(cp);
}
-
+/* Modify timer, so that it expires as soon as possible.
+ * Can be called without reference only if under RCU lock.
+ */
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
{
- if (del_timer(&cp->timer))
- mod_timer(&cp->timer, jiffies);
+ /* Using mod_timer_pending will ensure the timer is not
+ * modified after the final del_timer in ip_vs_conn_expire.
+ */
+ if (timer_pending(&cp->timer) &&
+ time_after(cp->timer.expires, jiffies))
+ mod_timer_pending(&cp->timer, jiffies);
}
@@ -858,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
p->protocol);
- cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
+ cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
IP_VS_ERR_RL("%s(): no memory\n", __func__);
return NULL;
@@ -869,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
cp->protocol = p->protocol;
- ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
+ ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
- ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
+ ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
cp->vport = p->vport;
/* proto should only be IPPROTO_IP if d_addr is a fwmark */
- ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
- &cp->daddr, daddr);
+ ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
+ &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
@@ -884,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
cp->pe = p->pe;
cp->pe_data = p->pe_data;
cp->pe_data_len = p->pe_data_len;
+ } else {
+ cp->pe = NULL;
+ cp->pe_data = NULL;
+ cp->pe_data_len = 0;
}
spin_lock_init(&cp->lock);
@@ -894,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
*/
atomic_set(&cp->refcnt, 1);
+ cp->control = NULL;
atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0);
+ cp->packet_xmit = NULL;
+ cp->app = NULL;
+ cp->app_data = NULL;
+ /* reset struct ip_vs_seq */
+ cp->in_seq.delta = 0;
+ cp->out_seq.delta = 0;
+
atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt);
/* Bind the connection with a destination server */
+ cp->dest = NULL;
ip_vs_bind_dest(cp, dest);
/* Set its state and timeout */
cp->state = 0;
+ cp->old_state = 0;
cp->timeout = 3*HZ;
cp->sync_endtime = jiffies & ~3UL;
@@ -952,14 +966,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
struct ip_vs_iter_state *iter = seq->private;
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
- ct_read_lock_bh(idx);
- hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
+ /* __ip_vs_conn_get() is not needed by
+ * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
+ */
if (pos-- == 0) {
iter->l = &ip_vs_conn_tab[idx];
return cp;
}
}
- ct_read_unlock_bh(idx);
+ rcu_read_unlock();
}
return NULL;
@@ -977,6 +994,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_conn *cp = v;
struct ip_vs_iter_state *iter = seq->private;
+ struct hlist_node *e;
struct hlist_head *l = iter->l;
int idx;
@@ -985,19 +1003,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return ip_vs_conn_array(seq, 0);
/* more on same hash chain? */
- if (cp->c_list.next)
- return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list);
+ e = rcu_dereference(hlist_next_rcu(&cp->c_list));
+ if (e)
+ return hlist_entry(e, struct ip_vs_conn, c_list);
+ rcu_read_unlock();
idx = l - ip_vs_conn_tab;
- ct_read_unlock_bh(idx);
-
while (++idx < ip_vs_conn_tab_size) {
- ct_read_lock_bh(idx);
- hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
iter->l = &ip_vs_conn_tab[idx];
return cp;
}
- ct_read_unlock_bh(idx);
+ rcu_read_unlock();
}
iter->l = NULL;
return NULL;
@@ -1009,7 +1027,7 @@ static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
struct hlist_head *l = iter->l;
if (l)
- ct_read_unlock_bh(l - ip_vs_conn_tab);
+ rcu_read_unlock();
}
static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
@@ -1188,7 +1206,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
void ip_vs_random_dropentry(struct net *net)
{
int idx;
- struct ip_vs_conn *cp;
+ struct ip_vs_conn *cp, *cp_c;
/*
* Randomly scan 1/32 of the whole table every second
@@ -1199,9 +1217,9 @@ void ip_vs_random_dropentry(struct net *net)
/*
* Lock is actually needed in this loop.
*/
- ct_write_lock_bh(hash);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
@@ -1228,12 +1246,15 @@ void ip_vs_random_dropentry(struct net *net)
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
- if (cp->control) {
+ cp_c = cp->control;
+ /* cp->control is valid only with reference to cp */
+ if (cp_c && __ip_vs_conn_get(cp)) {
IP_VS_DBG(4, "del conn template\n");
- ip_vs_conn_expire_now(cp->control);
+ ip_vs_conn_expire_now(cp_c);
+ __ip_vs_conn_put(cp);
}
}
- ct_write_unlock_bh(hash);
+ rcu_read_unlock();
}
}
@@ -1244,7 +1265,7 @@ void ip_vs_random_dropentry(struct net *net)
static void ip_vs_conn_flush(struct net *net)
{
int idx;
- struct ip_vs_conn *cp;
+ struct ip_vs_conn *cp, *cp_c;
struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
@@ -1252,19 +1273,22 @@ flush_again:
/*
* Lock is actually needed in this loop.
*/
- ct_write_lock_bh(idx);
+ rcu_read_lock();
- hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
+ hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
if (!ip_vs_conn_net_eq(cp, net))
continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
- if (cp->control) {
+ cp_c = cp->control;
+ /* cp->control is valid only with reference to cp */
+ if (cp_c && __ip_vs_conn_get(cp)) {
IP_VS_DBG(4, "del conn template\n");
- ip_vs_conn_expire_now(cp->control);
+ ip_vs_conn_expire_now(cp_c);
+ __ip_vs_conn_put(cp);
}
}
- ct_write_unlock_bh(idx);
+ rcu_read_unlock();
}
/* the counter may be not NULL, because maybe some conn entries
@@ -1331,7 +1355,7 @@ int __init ip_vs_conn_init(void)
INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
- rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+ spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l);
}
/* calculate the random value for connection hash */
@@ -1342,6 +1366,8 @@ int __init ip_vs_conn_init(void)
void ip_vs_conn_cleanup(void)
{
+ /* Wait all ip_vs_conn_rcu_free() callbacks to complete */
+ rcu_barrier();
/* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep);
vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 2aef23ed748b..f26fe3353a30 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -203,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
{
ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
vport, p);
- p->pe = svc->pe;
+ p->pe = rcu_dereference(svc->pe);
if (p->pe && p->pe->fill_param)
return p->pe->fill_param(p, skb);
@@ -296,12 +296,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/* Check if a template already exists */
ct = ip_vs_ct_in_get(&param);
if (!ct || !ip_vs_check_template(ct)) {
+ struct ip_vs_scheduler *sched;
+
/*
* No template found or the dest of the connection
* template is not available.
* return *ignored=0 i.e. ICMP and NF_DROP
*/
- dest = svc->scheduler->schedule(svc, skb);
+ sched = rcu_dereference(svc->scheduler);
+ dest = sched->schedule(svc, skb);
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
@@ -391,6 +394,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{
struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL;
+ struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest;
__be16 _ports[2], *pptr;
unsigned int flags;
@@ -446,7 +450,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return NULL;
}
- dest = svc->scheduler->schedule(svc, skb);
+ sched = rcu_dereference(svc->scheduler);
+ dest = sched->schedule(svc, skb);
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL;
@@ -504,7 +509,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL) {
- ip_vs_service_put(svc);
return NF_DROP;
}
@@ -530,8 +534,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_CONN_F_ONE_PACKET : 0;
union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
- ip_vs_service_put(svc);
-
/* create a new connection entry */
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
@@ -568,12 +570,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets.
*/
- if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) {
- ip_vs_service_put(svc);
+ if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
return NF_ACCEPT;
- }
-
- ip_vs_service_put(svc);
/*
* Notify the client that the destination is unreachable, and
@@ -640,8 +638,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- int err = ip_defrag(skb, user);
+ int err;
+ local_bh_disable();
+ err = ip_defrag(skb, user);
+ local_bh_enable();
if (!err)
ip_send_check(ip_hdr(skb));
@@ -1161,9 +1162,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_lookup_real_service(net, af, iph.protocol,
- &iph.saddr,
- pptr[0])) {
+ if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
+ pptr[0])) {
/*
* Notify the real server: there is no
* existing entry if it is not RST
@@ -1220,13 +1220,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_out(hooknum, skb, AF_INET);
- local_bh_enable();
- return verdict;
+ return ip_vs_out(hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1253,13 +1247,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_out(hooknum, skb, AF_INET6);
- local_bh_enable();
- return verdict;
+ return ip_vs_out(hooknum, skb, AF_INET6);
}
#endif
@@ -1395,10 +1383,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
- spin_lock(&dest->dst_lock);
- if (dest->dst_cache)
- mtu = dst_mtu(dest->dst_cache);
- spin_unlock(&dest->dst_lock);
+ struct ip_vs_dest_dst *dest_dst;
+
+ rcu_read_lock();
+ dest_dst = rcu_dereference(dest->dest_dst);
+ if (dest_dst)
+ mtu = dst_mtu(dest_dst->dst_cache);
+ rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
@@ -1714,13 +1705,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_in(hooknum, skb, AF_INET);
- local_bh_enable();
- return verdict;
+ return ip_vs_in(hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1779,13 +1764,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- unsigned int verdict;
-
- /* Disable BH in LOCAL_OUT until all places are fixed */
- local_bh_disable();
- verdict = ip_vs_in(hooknum, skb, AF_INET6);
- local_bh_enable();
- return verdict;
+ return ip_vs_in(hooknum, skb, AF_INET6);
}
#endif
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 8104120f16a5..9e4074c26dc2 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -55,9 +55,6 @@
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
static DEFINE_MUTEX(__ip_vs_mutex);
-/* lock for service table */
-static DEFINE_RWLOCK(__ip_vs_svc_lock);
-
/* sysctl variables */
#ifdef CONFIG_IP_VS_DEBUG
@@ -71,7 +68,7 @@ int ip_vs_get_debug_level(void)
/* Protos */
-static void __ip_vs_del_service(struct ip_vs_service *svc);
+static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
#ifdef CONFIG_IP_VS_IPV6
@@ -257,9 +254,9 @@ ip_vs_use_count_dec(void)
#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
/* the service table hashed by <protocol, addr, port> */
-static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
+static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
/* the service table hashed by fwmark */
-static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
+static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
/*
@@ -314,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
*/
hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
&svc->addr, svc->port);
- list_add(&svc->s_list, &ip_vs_svc_table[hash]);
+ hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
* Hash it by fwmark in svc_fwm_table
*/
hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
- list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
+ hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
}
svc->flags |= IP_VS_SVC_F_HASHED;
@@ -344,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
if (svc->fwmark == 0) {
/* Remove it from the svc_table table */
- list_del(&svc->s_list);
+ hlist_del_rcu(&svc->s_list);
} else {
/* Remove it from the svc_fwm_table table */
- list_del(&svc->f_list);
+ hlist_del_rcu(&svc->f_list);
}
svc->flags &= ~IP_VS_SVC_F_HASHED;
@@ -369,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
/* Check for "full" addressed entries */
hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
- list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
+ hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
@@ -396,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
/* Check for fwmark addressed entries */
hash = ip_vs_svc_fwm_hashkey(net, fwmark);
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
+ hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
if (svc->fwmark == fwmark && svc->af == af
&& net_eq(svc->net, net)) {
/* HIT */
@@ -407,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
return NULL;
}
+/* Find service, called under RCU lock */
struct ip_vs_service *
-ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
- const union nf_inet_addr *vaddr, __be16 vport)
+ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
struct netns_ipvs *ipvs = net_ipvs(net);
- read_lock(&__ip_vs_svc_lock);
-
/*
* Check the table hashed by fwmark first
*/
@@ -451,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
}
out:
- if (svc)
- atomic_inc(&svc->usecnt);
- read_unlock(&__ip_vs_svc_lock);
-
IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
fwmark, ip_vs_proto_name(protocol),
IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
@@ -471,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
dest->svc = svc;
}
+static void ip_vs_service_free(struct ip_vs_service *svc)
+{
+ if (svc->stats.cpustats)
+ free_percpu(svc->stats.cpustats);
+ kfree(svc);
+}
+
static void
__ip_vs_unbind_svc(struct ip_vs_dest *dest)
{
@@ -478,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
dest->svc = NULL;
if (atomic_dec_and_test(&svc->refcnt)) {
- IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
+ IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
- ntohs(svc->port), atomic_read(&svc->usecnt));
- free_percpu(svc->stats.cpustats);
- kfree(svc);
+ ntohs(svc->port));
+ ip_vs_service_free(svc);
}
}
@@ -508,17 +506,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
& IP_VS_RTAB_MASK;
}
-/*
- * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
- * should be called with locked tables.
- */
-static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
+/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
+static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
unsigned int hash;
- if (!list_empty(&dest->d_list)) {
- return 0;
- }
+ if (dest->in_rs_table)
+ return;
/*
* Hash by proto,addr,port,
@@ -526,64 +520,51 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
*/
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
- list_add(&dest->d_list, &ipvs->rs_table[hash]);
-
- return 1;
+ hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
+ dest->in_rs_table = 1;
}
-/*
- * UNhashes ip_vs_dest from rs_table.
- * should be called with locked tables.
- */
-static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
+/* Unhash ip_vs_dest from rs_table. */
+static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
{
/*
* Remove it from the rs_table table.
*/
- if (!list_empty(&dest->d_list)) {
- list_del_init(&dest->d_list);
+ if (dest->in_rs_table) {
+ hlist_del_rcu(&dest->d_list);
+ dest->in_rs_table = 0;
}
-
- return 1;
}
-/*
- * Lookup real service by <proto,addr,port> in the real service table.
- */
-struct ip_vs_dest *
-ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
- const union nf_inet_addr *daddr,
- __be16 dport)
+/* Check if real service by <proto,addr,port> is present */
+bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash;
struct ip_vs_dest *dest;
- /*
- * Check for "full" addressed entries
- * Return the first found entry
- */
+ /* Check for "full" addressed entries */
hash = ip_vs_rs_hashkey(af, daddr, dport);
- read_lock(&ipvs->rs_lock);
- list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
- if ((dest->af == af)
- && ip_vs_addr_equal(af, &dest->addr, daddr)
- && (dest->port == dport)
- && ((dest->protocol == protocol) ||
- dest->vfwmark)) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+ if (dest->port == dport &&
+ dest->af == af &&
+ ip_vs_addr_equal(af, &dest->addr, daddr) &&
+ (dest->protocol == protocol || dest->vfwmark)) {
/* HIT */
- read_unlock(&ipvs->rs_lock);
- return dest;
+ rcu_read_unlock();
+ return true;
}
}
- read_unlock(&ipvs->rs_lock);
+ rcu_read_unlock();
- return NULL;
+ return false;
}
-/*
- * Lookup destination by {addr,port} in the given service
+/* Lookup destination by {addr,port} in the given service
+ * Called under RCU lock.
*/
static struct ip_vs_dest *
ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
@@ -594,7 +575,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
/*
* Find the destination for the given service
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if ((dest->af == svc->af)
&& ip_vs_addr_equal(svc->af, &dest->addr, daddr)
&& (dest->port == dport)) {
@@ -608,13 +589,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
/*
* Find destination by {daddr,dport,vaddr,protocol}
- * Cretaed to be used in ip_vs_process_message() in
+ * Created to be used in ip_vs_process_message() in
* the backup synchronization daemon. It finds the
* destination to be bound to the received connection
* on the backup.
- *
- * ip_vs_lookup_real_service() looked promissing, but
- * seems not working as expected.
+ * Called under RCU lock, no refcnt is returned.
*/
struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
const union nf_inet_addr *daddr,
@@ -627,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -635,12 +614,31 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
dest = ip_vs_lookup_dest(svc, daddr, port);
if (!dest)
dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
- if (dest)
- atomic_inc(&dest->refcnt);
- ip_vs_service_put(svc);
return dest;
}
+void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_dest_dst *dest_dst = container_of(head,
+ struct ip_vs_dest_dst,
+ rcu_head);
+
+ dst_release(dest_dst->dst_cache);
+ kfree(dest_dst);
+}
+
+/* Release dest_dst and dst_cache for dest in user context */
+static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
+{
+ struct ip_vs_dest_dst *old;
+
+ old = rcu_dereference_protected(dest->dest_dst, 1);
+ if (old) {
+ RCU_INIT_POINTER(dest->dest_dst, NULL);
+ call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
+ }
+}
+
/*
* Lookup dest by {svc,addr,port} in the destination trash.
* The destination trash is used to hold the destinations that are removed
@@ -655,19 +653,25 @@ static struct ip_vs_dest *
ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
__be16 dport)
{
- struct ip_vs_dest *dest, *nxt;
+ struct ip_vs_dest *dest;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
/*
* Find the destination in trash
*/
- list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
+ spin_lock_bh(&ipvs->dest_trash_lock);
+ list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
+ /* We can not reuse dest while in grace period
+ * because conns still can use dest->svc
+ */
+ if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
+ continue;
if (dest->af == svc->af &&
ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -677,29 +681,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
(ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
dest->vport == svc->port))) {
/* HIT */
- return dest;
- }
-
- /*
- * Try to purge the destination from trash if not referenced
- */
- if (atomic_read(&dest->refcnt) == 1) {
- IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
- "from trash\n",
- dest->vfwmark,
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
- ntohs(dest->port));
- list_del(&dest->n_list);
- ip_vs_dst_reset(dest);
- __ip_vs_unbind_svc(dest);
- free_percpu(dest->stats.cpustats);
- kfree(dest);
+ list_del(&dest->t_list);
+ ip_vs_dest_hold(dest);
+ goto out;
}
}
- return NULL;
+ dest = NULL;
+
+out:
+ spin_unlock_bh(&ipvs->dest_trash_lock);
+
+ return dest;
}
+static void ip_vs_dest_free(struct ip_vs_dest *dest)
+{
+ __ip_vs_dst_cache_reset(dest);
+ __ip_vs_unbind_svc(dest);
+ free_percpu(dest->stats.cpustats);
+ kfree(dest);
+}
/*
* Clean up all the destinations in the trash
@@ -708,19 +710,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* When the ip_vs_control_clearup is activated by ipvs module exit,
* the service tables must have been flushed and all the connections
* are expired, and the refcnt of each destination in the trash must
- * be 1, so we simply release them here.
+ * be 0, so we simply release them here.
*/
static void ip_vs_trash_cleanup(struct net *net)
{
struct ip_vs_dest *dest, *nxt;
struct netns_ipvs *ipvs = net_ipvs(net);
- list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
- list_del(&dest->n_list);
- ip_vs_dst_reset(dest);
- __ip_vs_unbind_svc(dest);
- free_percpu(dest->stats.cpustats);
- kfree(dest);
+ del_timer_sync(&ipvs->dest_trash_timer);
+ /* No need to use dest_trash_lock */
+ list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
+ list_del(&dest->t_list);
+ ip_vs_dest_free(dest);
}
}
@@ -770,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct ip_vs_scheduler *sched;
int conn_flags;
/* set the weight and the flags */
@@ -785,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
* Put the real service in rs_table if not present.
* For now only for NAT!
*/
- write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_hash(ipvs, dest);
- write_unlock_bh(&ipvs->rs_lock);
}
atomic_set(&dest->conn_flags, conn_flags);
@@ -811,27 +811,20 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
dest->l_threshold = udest->l_threshold;
spin_lock_bh(&dest->dst_lock);
- ip_vs_dst_reset(dest);
+ __ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
- if (add)
- ip_vs_start_estimator(svc->net, &dest->stats);
-
- write_lock_bh(&__ip_vs_svc_lock);
-
- /* Wait until all other svc users go away */
- IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
+ sched = rcu_dereference_protected(svc->scheduler, 1);
if (add) {
- list_add(&dest->n_list, &svc->destinations);
+ ip_vs_start_estimator(svc->net, &dest->stats);
+ list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++;
+ if (sched->add_dest)
+ sched->add_dest(svc, dest);
+ } else {
+ if (sched->upd_dest)
+ sched->upd_dest(svc, dest);
}
-
- /* call the update_service, because server weight may be changed */
- if (svc->scheduler->update_service)
- svc->scheduler->update_service(svc);
-
- write_unlock_bh(&__ip_vs_svc_lock);
}
@@ -883,7 +876,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atomic_set(&dest->persistconns, 0);
atomic_set(&dest->refcnt, 1);
- INIT_LIST_HEAD(&dest->d_list);
+ INIT_HLIST_NODE(&dest->d_list);
spin_lock_init(&dest->dst_lock);
spin_lock_init(&dest->stats.lock);
__ip_vs_update_dest(svc, dest, udest, 1);
@@ -925,10 +918,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
- /*
- * Check if the dest already exists in the list
- */
+ /* We use function that requires RCU lock */
+ rcu_read_lock();
dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ rcu_read_unlock();
if (dest != NULL) {
IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
@@ -950,11 +943,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
ntohs(dest->vport));
- /*
- * Get the destination from the trash
- */
- list_del(&dest->n_list);
-
__ip_vs_update_dest(svc, dest, udest, 1);
ret = 0;
} else {
@@ -994,10 +982,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
- /*
- * Lookup the destination list
- */
+ /* We use function that requires RCU lock */
+ rcu_read_lock();
dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ rcu_read_unlock();
if (dest == NULL) {
IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
@@ -1010,11 +998,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return 0;
}
+static void ip_vs_dest_wait_readers(struct rcu_head *head)
+{
+ struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
+ rcu_head);
+
+ /* End of grace period after unlinking */
+ clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
+}
+
/*
* Delete a destination (must be already unlinked from the service)
*/
-static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
+ bool cleanup)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -1023,38 +1021,24 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
/*
* Remove it from the d-linked list with the real services.
*/
- write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_unhash(dest);
- write_unlock_bh(&ipvs->rs_lock);
- /*
- * Decrease the refcnt of the dest, and free the dest
- * if nobody refers to it (refcnt=0). Otherwise, throw
- * the destination into the trash.
- */
- if (atomic_dec_and_test(&dest->refcnt)) {
- IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
- dest->vfwmark,
- IP_VS_DBG_ADDR(dest->af, &dest->addr),
- ntohs(dest->port));
- ip_vs_dst_reset(dest);
- /* simply decrease svc->refcnt here, let the caller check
- and release the service if nobody refers to it.
- Only user context can release destination and service,
- and only one user context can update virtual service at a
- time, so the operation here is OK */
- atomic_dec(&dest->svc->refcnt);
- free_percpu(dest->stats.cpustats);
- kfree(dest);
- } else {
- IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
- "dest->refcnt=%d\n",
- IP_VS_DBG_ADDR(dest->af, &dest->addr),
- ntohs(dest->port),
- atomic_read(&dest->refcnt));
- list_add(&dest->n_list, &ipvs->dest_trash);
- atomic_inc(&dest->refcnt);
+ if (!cleanup) {
+ set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
+ call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
}
+
+ spin_lock_bh(&ipvs->dest_trash_lock);
+ IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
+ atomic_read(&dest->refcnt));
+ if (list_empty(&ipvs->dest_trash) && !cleanup)
+ mod_timer(&ipvs->dest_trash_timer,
+ jiffies + IP_VS_DEST_TRASH_PERIOD);
+ /* dest lives in trash without reference */
+ list_add(&dest->t_list, &ipvs->dest_trash);
+ spin_unlock_bh(&ipvs->dest_trash_lock);
+ ip_vs_dest_put(dest);
}
@@ -1070,14 +1054,16 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
/*
* Remove it from the d-linked destination list.
*/
- list_del(&dest->n_list);
+ list_del_rcu(&dest->n_list);
svc->num_dests--;
- /*
- * Call the update_service function of its scheduler
- */
- if (svcupd && svc->scheduler->update_service)
- svc->scheduler->update_service(svc);
+ if (svcupd) {
+ struct ip_vs_scheduler *sched;
+
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched->del_dest)
+ sched->del_dest(svc, dest);
+ }
}
@@ -1092,37 +1078,56 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
EnterFunction(2);
+ /* We use function that requires RCU lock */
+ rcu_read_lock();
dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+ rcu_read_unlock();
if (dest == NULL) {
IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
return -ENOENT;
}
- write_lock_bh(&__ip_vs_svc_lock);
-
- /*
- * Wait until all other svc users go away.
- */
- IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
/*
* Unlink dest from the service
*/
__ip_vs_unlink_dest(svc, dest, 1);
- write_unlock_bh(&__ip_vs_svc_lock);
-
/*
* Delete the destination
*/
- __ip_vs_del_dest(svc->net, dest);
+ __ip_vs_del_dest(svc->net, dest, false);
LeaveFunction(2);
return 0;
}
+static void ip_vs_dest_trash_expire(unsigned long data)
+{
+ struct net *net = (struct net *) data;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_dest *dest, *next;
+
+ spin_lock(&ipvs->dest_trash_lock);
+ list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
+ /* Skip if dest is in grace period */
+ if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
+ continue;
+ if (atomic_read(&dest->refcnt) > 0)
+ continue;
+ IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
+ dest->vfwmark,
+ IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
+ ntohs(dest->port));
+ list_del(&dest->t_list);
+ ip_vs_dest_free(dest);
+ }
+ if (!list_empty(&ipvs->dest_trash))
+ mod_timer(&ipvs->dest_trash_timer,
+ jiffies + IP_VS_DEST_TRASH_PERIOD);
+ spin_unlock(&ipvs->dest_trash_lock);
+}
/*
* Add a service into the service hash table
@@ -1178,7 +1183,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
}
/* I'm the first user of the service */
- atomic_set(&svc->usecnt, 0);
atomic_set(&svc->refcnt, 0);
svc->af = u->af;
@@ -1192,7 +1196,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
svc->net = net;
INIT_LIST_HEAD(&svc->destinations);
- rwlock_init(&svc->sched_lock);
+ spin_lock_init(&svc->sched_lock);
spin_lock_init(&svc->stats.lock);
/* Bind the scheduler */
@@ -1202,7 +1206,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
sched = NULL;
/* Bind the ct retriever */
- ip_vs_bind_pe(svc, pe);
+ RCU_INIT_POINTER(svc->pe, pe);
pe = NULL;
/* Update the virtual service counters */
@@ -1218,9 +1222,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
ipvs->num_services++;
/* Hash the service into the service table */
- write_lock_bh(&__ip_vs_svc_lock);
ip_vs_svc_hash(svc);
- write_unlock_bh(&__ip_vs_svc_lock);
*svc_p = svc;
/* Now there is a service - full throttle */
@@ -1230,15 +1232,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
out_err:
if (svc != NULL) {
- ip_vs_unbind_scheduler(svc);
- if (svc->inc) {
- local_bh_disable();
- ip_vs_app_inc_put(svc->inc);
- local_bh_enable();
- }
- if (svc->stats.cpustats)
- free_percpu(svc->stats.cpustats);
- kfree(svc);
+ ip_vs_unbind_scheduler(svc, sched);
+ ip_vs_service_free(svc);
}
ip_vs_scheduler_put(sched);
ip_vs_pe_put(pe);
@@ -1288,12 +1283,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
}
#endif
- write_lock_bh(&__ip_vs_svc_lock);
-
- /*
- * Wait until all other svc users go away.
- */
- IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+ old_sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched != old_sched) {
+ /* Bind the new scheduler */
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret) {
+ old_sched = sched;
+ goto out;
+ }
+ /* Unbind the old scheduler on success */
+ ip_vs_unbind_scheduler(svc, old_sched);
+ }
/*
* Set the flags and timeout value
@@ -1302,57 +1302,30 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
- old_sched = svc->scheduler;
- if (sched != old_sched) {
- /*
- * Unbind the old scheduler
- */
- if ((ret = ip_vs_unbind_scheduler(svc))) {
- old_sched = sched;
- goto out_unlock;
- }
-
- /*
- * Bind the new scheduler
- */
- if ((ret = ip_vs_bind_scheduler(svc, sched))) {
- /*
- * If ip_vs_bind_scheduler fails, restore the old
- * scheduler.
- * The main reason of failure is out of memory.
- *
- * The question is if the old scheduler can be
- * restored all the time. TODO: if it cannot be
- * restored some time, we must delete the service,
- * otherwise the system may crash.
- */
- ip_vs_bind_scheduler(svc, old_sched);
- old_sched = sched;
- goto out_unlock;
- }
- }
+ old_pe = rcu_dereference_protected(svc->pe, 1);
+ if (pe != old_pe)
+ rcu_assign_pointer(svc->pe, pe);
- old_pe = svc->pe;
- if (pe != old_pe) {
- ip_vs_unbind_pe(svc);
- ip_vs_bind_pe(svc, pe);
- }
-
-out_unlock:
- write_unlock_bh(&__ip_vs_svc_lock);
out:
ip_vs_scheduler_put(old_sched);
ip_vs_pe_put(old_pe);
return ret;
}
+static void ip_vs_service_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_service *svc;
+
+ svc = container_of(head, struct ip_vs_service, rcu_head);
+ ip_vs_service_free(svc);
+}
/*
* Delete a service from the service list
* - The service must be unlinked, unlocked and not referenced!
* - We are called under _bh lock
*/
-static void __ip_vs_del_service(struct ip_vs_service *svc)
+static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
{
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
@@ -1368,27 +1341,20 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
ip_vs_stop_estimator(svc->net, &svc->stats);
/* Unbind scheduler */
- old_sched = svc->scheduler;
- ip_vs_unbind_scheduler(svc);
+ old_sched = rcu_dereference_protected(svc->scheduler, 1);
+ ip_vs_unbind_scheduler(svc, old_sched);
ip_vs_scheduler_put(old_sched);
- /* Unbind persistence engine */
- old_pe = svc->pe;
- ip_vs_unbind_pe(svc);
+ /* Unbind persistence engine, keep svc->pe */
+ old_pe = rcu_dereference_protected(svc->pe, 1);
ip_vs_pe_put(old_pe);
- /* Unbind app inc */
- if (svc->inc) {
- ip_vs_app_inc_put(svc->inc);
- svc->inc = NULL;
- }
-
/*
* Unlink the whole destination list
*/
list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
__ip_vs_unlink_dest(svc, dest, 0);
- __ip_vs_del_dest(svc->net, dest);
+ __ip_vs_del_dest(svc->net, dest, cleanup);
}
/*
@@ -1402,13 +1368,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
/*
* Free the service if nobody refers to it
*/
- if (atomic_read(&svc->refcnt) == 0) {
- IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
+ if (atomic_dec_and_test(&svc->refcnt)) {
+ IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
- ntohs(svc->port), atomic_read(&svc->usecnt));
- free_percpu(svc->stats.cpustats);
- kfree(svc);
+ ntohs(svc->port));
+ call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
}
/* decrease the module use count */
@@ -1418,23 +1383,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
/*
* Unlink a service from list and try to delete it if its refcnt reached 0
*/
-static void ip_vs_unlink_service(struct ip_vs_service *svc)
+static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
{
+ /* Hold svc to avoid double release from dest_trash */
+ atomic_inc(&svc->refcnt);
/*
* Unhash it from the service table
*/
- write_lock_bh(&__ip_vs_svc_lock);
-
ip_vs_svc_unhash(svc);
- /*
- * Wait until all the svc users go away.
- */
- IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
-
- __ip_vs_del_service(svc);
-
- write_unlock_bh(&__ip_vs_svc_lock);
+ __ip_vs_del_service(svc, cleanup);
}
/*
@@ -1444,7 +1402,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
{
if (svc == NULL)
return -EEXIST;
- ip_vs_unlink_service(svc);
+ ip_vs_unlink_service(svc, false);
return 0;
}
@@ -1453,19 +1411,20 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
/*
* Flush all the virtual services
*/
-static int ip_vs_flush(struct net *net)
+static int ip_vs_flush(struct net *net, bool cleanup)
{
int idx;
- struct ip_vs_service *svc, *nxt;
+ struct ip_vs_service *svc;
+ struct hlist_node *n;
/*
* Flush the service table hashed by <netns,protocol,addr,port>
*/
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
- s_list) {
+ hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
+ s_list) {
if (net_eq(svc->net, net))
- ip_vs_unlink_service(svc);
+ ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1473,10 +1432,10 @@ static int ip_vs_flush(struct net *net)
* Flush the service table hashed by fwmark
*/
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry_safe(svc, nxt,
- &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
+ f_list) {
if (net_eq(svc->net, net))
- ip_vs_unlink_service(svc);
+ ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1492,32 +1451,29 @@ void ip_vs_service_net_cleanup(struct net *net)
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
- ip_vs_flush(net);
+ ip_vs_flush(net, true);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
-/*
- * Release dst hold by dst_cache
- */
+
+/* Put all references for device (dst_cache) */
static inline void
-__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
+ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
{
spin_lock_bh(&dest->dst_lock);
- if (dest->dst_cache && dest->dst_cache->dev == dev) {
+ if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) {
IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- ip_vs_dst_reset(dest);
+ __ip_vs_dst_cache_reset(dest);
}
spin_unlock_bh(&dest->dst_lock);
}
-/*
- * Netdev event receiver
- * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
- * a device that is "unregister" it must be released.
+/* Netdev event receiver
+ * Currently only NETDEV_DOWN is handled to release refs to cached dsts
*/
static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
void *ptr)
@@ -1529,35 +1485,37 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
struct ip_vs_dest *dest;
unsigned int idx;
- if (event != NETDEV_UNREGISTER || !ipvs)
+ if (event != NETDEV_DOWN || !ipvs)
return NOTIFY_DONE;
IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
EnterFunction(2);
mutex_lock(&__ip_vs_mutex);
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net)) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
- __ip_vs_dev_reset(dest, dev);
+ ip_vs_forget_dev(dest, dev);
}
}
}
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
if (net_eq(svc->net, net)) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
- __ip_vs_dev_reset(dest, dev);
+ ip_vs_forget_dev(dest, dev);
}
}
}
}
- list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
- __ip_vs_dev_reset(dest, dev);
+ spin_lock_bh(&ipvs->dest_trash_lock);
+ list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
+ ip_vs_forget_dev(dest, dev);
}
+ spin_unlock_bh(&ipvs->dest_trash_lock);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
return NOTIFY_DONE;
@@ -1570,12 +1528,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
{
struct ip_vs_dest *dest;
- write_lock_bh(&__ip_vs_svc_lock);
list_for_each_entry(dest, &svc->destinations, n_list) {
ip_vs_zero_stats(&dest->stats);
}
ip_vs_zero_stats(&svc->stats);
- write_unlock_bh(&__ip_vs_svc_lock);
return 0;
}
@@ -1585,14 +1541,14 @@ static int ip_vs_zero_all(struct net *net)
struct ip_vs_service *svc;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net))
ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
if (net_eq(svc->net, net))
ip_vs_zero_service(svc);
}
@@ -1920,7 +1876,7 @@ static struct ctl_table vs_vars[] = {
struct ip_vs_iter {
struct seq_net_private p; /* Do not move this, netns depends upon it*/
- struct list_head *table;
+ struct hlist_head *table;
int bucket;
};
@@ -1953,7 +1909,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_table;
iter->bucket = idx;
@@ -1964,7 +1920,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* keep looking in fwmark */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
+ f_list) {
if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_fwm_table;
iter->bucket = idx;
@@ -1977,17 +1934,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
}
static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
-__acquires(__ip_vs_svc_lock)
{
- read_lock_bh(&__ip_vs_svc_lock);
+ rcu_read_lock();
return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct list_head *e;
+ struct hlist_node *e;
struct ip_vs_iter *iter;
struct ip_vs_service *svc;
@@ -2000,13 +1956,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (iter->table == ip_vs_svc_table) {
/* next service in table hashed by protocol */
- if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
- return list_entry(e, struct ip_vs_service, s_list);
-
+ e = rcu_dereference(hlist_next_rcu(&svc->s_list));
+ if (e)
+ return hlist_entry(e, struct ip_vs_service, s_list);
while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
- list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
- s_list) {
+ hlist_for_each_entry_rcu(svc,
+ &ip_vs_svc_table[iter->bucket],
+ s_list) {
return svc;
}
}
@@ -2017,13 +1974,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
/* next service in hashed by fwmark */
- if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
- return list_entry(e, struct ip_vs_service, f_list);
+ e = rcu_dereference(hlist_next_rcu(&svc->f_list));
+ if (e)
+ return hlist_entry(e, struct ip_vs_service, f_list);
scan_fwmark:
while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
- f_list)
+ hlist_for_each_entry_rcu(svc,
+ &ip_vs_svc_fwm_table[iter->bucket],
+ f_list)
return svc;
}
@@ -2031,9 +1990,8 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
-__releases(__ip_vs_svc_lock)
{
- read_unlock_bh(&__ip_vs_svc_lock);
+ rcu_read_unlock();
}
@@ -2051,6 +2009,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_service *svc = v;
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
+ struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
@@ -2059,18 +2018,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
ip_vs_proto_name(svc->protocol),
&svc->addr.in6,
ntohs(svc->port),
- svc->scheduler->name);
+ sched->name);
else
#endif
seq_printf(seq, "%s %08X:%04X %s %s ",
ip_vs_proto_name(svc->protocol),
ntohl(svc->addr.ip),
ntohs(svc->port),
- svc->scheduler->name,
+ sched->name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
} else {
seq_printf(seq, "FWM %08X %s %s",
- svc->fwmark, svc->scheduler->name,
+ svc->fwmark, sched->name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
}
@@ -2081,7 +2040,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
else
seq_putc(seq, '\n');
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
#ifdef CONFIG_IP_VS_IPV6
if (dest->af == AF_INET6)
seq_printf(seq,
@@ -2391,7 +2350,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
- ret = ip_vs_flush(net);
+ ret = ip_vs_flush(net, false);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
@@ -2426,11 +2385,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
}
/* Lookup the exact service by <protocol, addr, port> or fwmark */
+ rcu_read_lock();
if (usvc.fwmark == 0)
svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
+ rcu_read_unlock();
if (cmd != IP_VS_SO_SET_ADD
&& (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2482,11 +2443,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
+ struct ip_vs_scheduler *sched;
+
+ sched = rcu_dereference_protected(src->scheduler, 1);
dst->protocol = src->protocol;
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
- strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
+ strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
@@ -2505,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net,
int ret = 0;
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET || !net_eq(svc->net, net))
continue;
@@ -2524,7 +2488,7 @@ __ip_vs_get_service_entries(struct net *net,
}
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
if (svc->af != AF_INET || !net_eq(svc->net, net))
continue;
@@ -2553,11 +2517,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
union nf_inet_addr addr = { .ip = get->addr };
int ret = 0;
+ rcu_read_lock();
if (get->fwmark)
svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
else
svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
get->port);
+ rcu_read_unlock();
if (svc) {
int count = 0;
@@ -2740,12 +2706,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
entry = (struct ip_vs_service_entry *)arg;
addr.ip = entry->addr;
+ rcu_read_lock();
if (entry->fwmark)
svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
else
svc = __ip_vs_service_find(net, AF_INET,
entry->protocol, &addr,
entry->port);
+ rcu_read_unlock();
if (svc) {
ip_vs_copy_service(entry, svc);
if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2902,6 +2870,7 @@ nla_put_failure:
static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct ip_vs_service *svc)
{
+ struct ip_vs_scheduler *sched;
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
@@ -2922,7 +2891,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
goto nla_put_failure;
}
- if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
(svc->pe &&
nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
@@ -2973,7 +2943,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
- list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
if (++idx <= start || !net_eq(svc->net, net))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2984,7 +2954,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
}
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
- list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+ hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
if (++idx <= start || !net_eq(svc->net, net))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -3044,11 +3014,13 @@ static int ip_vs_genl_parse_service(struct net *net,
usvc->fwmark = 0;
}
+ rcu_read_lock();
if (usvc->fwmark)
svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
else
svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
+ rcu_read_unlock();
*ret_svc = svc;
/* If a full entry was requested, check for the additional fields */
@@ -3400,7 +3372,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
- ret = ip_vs_flush(net);
+ ret = ip_vs_flush(net, false);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
ret = ip_vs_genl_set_config(net, info->attrs);
@@ -3792,13 +3764,14 @@ int __net_init ip_vs_control_net_init(struct net *net)
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
- rwlock_init(&ipvs->rs_lock);
-
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
- INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+ INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
INIT_LIST_HEAD(&ipvs->dest_trash);
+ spin_lock_init(&ipvs->dest_trash_lock);
+ setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
+ (unsigned long) net);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
@@ -3828,6 +3801,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
+ /* Some dest can be in grace period even before cleanup, we have to
+ * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
+ */
+ rcu_barrier();
ip_vs_trash_cleanup(net);
ip_vs_stop_estimator(net, &ipvs->tot_stats);
ip_vs_control_net_cleanup_sysctl(net);
@@ -3873,10 +3850,10 @@ int __init ip_vs_control_init(void)
EnterFunction(2);
- /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
+ /* Initialize svc_table, ip_vs_svc_fwm_table */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
- INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
+ INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
+ INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
}
smp_wmb(); /* Do we really need it now ? */
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 7f3b0cc00b7a..ccab120df45e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -51,7 +51,7 @@
* IPVS DH bucket
*/
struct ip_vs_dh_bucket {
- struct ip_vs_dest *dest; /* real server (cache) */
+ struct ip_vs_dest __rcu *dest; /* real server (cache) */
};
/*
@@ -64,6 +64,10 @@ struct ip_vs_dh_bucket {
#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
+struct ip_vs_dh_state {
+ struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE];
+ struct rcu_head rcu_head;
+};
/*
* Returns hash value for IPVS DH entry
@@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
* Get ip_vs_dest associated with supplied parameters.
*/
static inline struct ip_vs_dest *
-ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
- const union nf_inet_addr *addr)
+ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
{
- return (tbl[ip_vs_dh_hashkey(af, addr)]).dest;
+ return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
}
@@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
* Assign all the hash buckets of the specified table with the service.
*/
static int
-ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
+ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
{
int i;
struct ip_vs_dh_bucket *b;
struct list_head *p;
struct ip_vs_dest *dest;
+ bool empty;
- b = tbl;
+ b = &s->buckets[0];
p = &svc->destinations;
+ empty = list_empty(p);
for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
- if (list_empty(p)) {
- b->dest = NULL;
- } else {
+ dest = rcu_dereference_protected(b->dest, 1);
+ if (dest)
+ ip_vs_dest_put(dest);
+ if (empty)
+ RCU_INIT_POINTER(b->dest, NULL);
+ else {
if (p == &svc->destinations)
p = p->next;
dest = list_entry(p, struct ip_vs_dest, n_list);
- atomic_inc(&dest->refcnt);
- b->dest = dest;
+ ip_vs_dest_hold(dest);
+ RCU_INIT_POINTER(b->dest, dest);
p = p->next;
}
@@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
/*
* Flush all the hash buckets of the specified table.
*/
-static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
+static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
{
int i;
struct ip_vs_dh_bucket *b;
+ struct ip_vs_dest *dest;
- b = tbl;
+ b = &s->buckets[0];
for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
- if (b->dest) {
- atomic_dec(&b->dest->refcnt);
- b->dest = NULL;
+ dest = rcu_dereference_protected(b->dest, 1);
+ if (dest) {
+ ip_vs_dest_put(dest);
+ RCU_INIT_POINTER(b->dest, NULL);
}
b++;
}
@@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
{
- struct ip_vs_dh_bucket *tbl;
+ struct ip_vs_dh_state *s;
/* allocate the DH table for this service */
- tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
- GFP_KERNEL);
- if (tbl == NULL)
+ s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
+ if (s == NULL)
return -ENOMEM;
- svc->sched_data = tbl;
+ svc->sched_data = s;
IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
"current service\n",
sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
- /* assign the hash buckets with the updated service */
- ip_vs_dh_assign(tbl, svc);
+ /* assign the hash buckets with current dests */
+ ip_vs_dh_reassign(s, svc);
return 0;
}
-static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
+static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
{
- struct ip_vs_dh_bucket *tbl = svc->sched_data;
+ struct ip_vs_dh_state *s = svc->sched_data;
/* got to clean up hash buckets here */
- ip_vs_dh_flush(tbl);
+ ip_vs_dh_flush(s);
/* release the table itself */
- kfree(svc->sched_data);
+ kfree_rcu(s, rcu_head);
IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
-
- return 0;
}
-static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
+static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest)
{
- struct ip_vs_dh_bucket *tbl = svc->sched_data;
-
- /* got to clean up hash buckets here */
- ip_vs_dh_flush(tbl);
+ struct ip_vs_dh_state *s = svc->sched_data;
/* assign the hash buckets with the updated service */
- ip_vs_dh_assign(tbl, svc);
+ ip_vs_dh_reassign(s, svc);
return 0;
}
@@ -212,19 +217,20 @@ static struct ip_vs_dest *
ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
- struct ip_vs_dh_bucket *tbl;
+ struct ip_vs_dh_state *s;
struct ip_vs_iphdr iph;
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
- tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
- dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr);
+ s = (struct ip_vs_dh_state *) svc->sched_data;
+ dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
if (!dest
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0
|| is_overloaded(dest)) {
+ ip_vs_scheduler_err(svc, "no destination available");
return NULL;
}
@@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
.n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
.init_service = ip_vs_dh_init_svc,
.done_service = ip_vs_dh_done_svc,
- .update_service = ip_vs_dh_update_svc,
+ .add_dest = ip_vs_dh_dest_changed,
+ .del_dest = ip_vs_dh_dest_changed,
.schedule = ip_vs_dh_schedule,
};
@@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void)
static void __exit ip_vs_dh_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
+ synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 4f53a5f04437..77c173282f38 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* hopefully it will succeed on the retransmitted
* packet.
*/
+ rcu_read_lock();
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
iph->ihl * 4,
start-data, end-start,
buf, buf_len);
+ rcu_read_unlock();
if (ret) {
ip_vs_nfct_expect_related(skb, ct, n_cp,
IPPROTO_TCP, 0, 0);
@@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void)
int rv;
rv = register_pernet_subsys(&ip_vs_ftp_ops);
+ /* rcu_barrier() is called by netns on error */
return rv;
}
@@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void)
static void __exit ip_vs_ftp_exit(void)
{
unregister_pernet_subsys(&ip_vs_ftp_ops);
+ /* rcu_barrier() is called by netns */
}
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index fdd89b9564ea..b2cc2528a4df 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -90,11 +90,12 @@
* IP address and its destination server
*/
struct ip_vs_lblc_entry {
- struct list_head list;
+ struct hlist_node list;
int af; /* address family */
union nf_inet_addr addr; /* destination IP address */
- struct ip_vs_dest *dest; /* real server (cache) */
+ struct ip_vs_dest __rcu *dest; /* real server (cache) */
unsigned long lastuse; /* last used time */
+ struct rcu_head rcu_head;
};
@@ -102,12 +103,14 @@ struct ip_vs_lblc_entry {
* IPVS lblc hash table
*/
struct ip_vs_lblc_table {
- struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
+ struct rcu_head rcu_head;
+ struct hlist_head __rcu bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
+ struct timer_list periodic_timer; /* collect stale entries */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
- struct timer_list periodic_timer; /* collect stale entries */
int rover; /* rover for expire check */
int counter; /* counter for no expire */
+ bool dead;
};
@@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = {
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
- list_del(&en->list);
+ struct ip_vs_dest *dest;
+
+ hlist_del_rcu(&en->list);
/*
* We don't kfree dest because it is referred either by its service
* or the trash dest list.
*/
- atomic_dec(&en->dest->refcnt);
- kfree(en);
+ dest = rcu_dereference_protected(en->dest, 1);
+ ip_vs_dest_put(dest);
+ kfree_rcu(en, rcu_head);
}
@@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
{
unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr);
- list_add(&en->list, &tbl->bucket[hash]);
+ hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
}
-/*
- * Get ip_vs_lblc_entry associated with supplied parameters. Called under read
- * lock
- */
+/* Get ip_vs_lblc_entry associated with supplied parameters. */
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
const union nf_inet_addr *addr)
@@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
unsigned int hash = ip_vs_lblc_hashkey(af, addr);
struct ip_vs_lblc_entry *en;
- list_for_each_entry(en, &tbl->bucket[hash], list)
+ hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
if (ip_vs_addr_equal(af, &en->addr, addr))
return en;
@@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
/*
* Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
- * address to a server. Called under write lock.
+ * address to a server. Called under spin lock.
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
@@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
ip_vs_addr_copy(dest->af, &en->addr, daddr);
en->lastuse = jiffies;
- atomic_inc(&dest->refcnt);
- en->dest = dest;
+ ip_vs_dest_hold(dest);
+ RCU_INIT_POINTER(en->dest, dest);
ip_vs_lblc_hash(tbl, en);
- } else if (en->dest != dest) {
- atomic_dec(&en->dest->refcnt);
- atomic_inc(&dest->refcnt);
- en->dest = dest;
+ } else {
+ struct ip_vs_dest *old_dest;
+
+ old_dest = rcu_dereference_protected(en->dest, 1);
+ if (old_dest != dest) {
+ ip_vs_dest_put(old_dest);
+ ip_vs_dest_hold(dest);
+ /* No ordering constraints for refcnt */
+ RCU_INIT_POINTER(en->dest, dest);
+ }
}
return en;
@@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
/*
* Flush all the entries of the specified table.
*/
-static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
+static void ip_vs_lblc_flush(struct ip_vs_service *svc)
{
- struct ip_vs_lblc_entry *en, *nxt;
+ struct ip_vs_lblc_table *tbl = svc->sched_data;
+ struct ip_vs_lblc_entry *en;
+ struct hlist_node *next;
int i;
+ spin_lock_bh(&svc->sched_lock);
+ tbl->dead = 1;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
- list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
}
+ spin_unlock_bh(&svc->sched_lock);
}
static int sysctl_lblc_expiration(struct ip_vs_service *svc)
@@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc)
static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
{
struct ip_vs_lblc_table *tbl = svc->sched_data;
- struct ip_vs_lblc_entry *en, *nxt;
+ struct ip_vs_lblc_entry *en;
+ struct hlist_node *next;
unsigned long now = jiffies;
int i, j;
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
- write_lock(&svc->sched_lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ spin_lock(&svc->sched_lock);
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse +
sysctl_lblc_expiration(svc)))
@@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
- write_unlock(&svc->sched_lock);
+ spin_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
@@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
unsigned long now = jiffies;
int goal;
int i, j;
- struct ip_vs_lblc_entry *en, *nxt;
+ struct ip_vs_lblc_entry *en;
+ struct hlist_node *next;
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
@@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
- write_lock(&svc->sched_lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ spin_lock(&svc->sched_lock);
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
continue;
@@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
atomic_dec(&tbl->entries);
goal--;
}
- write_unlock(&svc->sched_lock);
+ spin_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
@@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
* Initialize the hash buckets
*/
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
- INIT_LIST_HEAD(&tbl->bucket[i]);
+ INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
+ tbl->dead = 0;
/*
* Hook periodic timer for garbage collection
@@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
}
-static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
+static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)
{
struct ip_vs_lblc_table *tbl = svc->sched_data;
@@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
del_timer_sync(&tbl->periodic_timer);
/* got to clean up table entries here */
- ip_vs_lblc_flush(tbl);
+ ip_vs_lblc_flush(svc);
/* release the table itself */
- kfree(tbl);
+ kfree_rcu(tbl, rcu_head);
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
sizeof(*tbl));
-
- return 0;
}
@@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
* The server with weight=0 is quiesced and will not receive any
* new connection.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
if (atomic_read(&dest->weight) > 0) {
@@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
* Find the destination with the least load.
*/
nextstage:
- list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
struct ip_vs_dest *d;
- list_for_each_entry(d, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(d, &svc->destinations, n_list) {
if (atomic_read(&d->activeconns)*2
< atomic_read(&d->weight)) {
return 1;
@@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
/* First look in our cache */
- read_lock(&svc->sched_lock);
en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);
if (en) {
/* We only hold a read lock, but this is atomic */
@@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* free up entries from the trash at any time.
*/
- if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
- dest = en->dest;
+ dest = rcu_dereference(en->dest);
+ if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
+ atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
+ goto out;
}
- read_unlock(&svc->sched_lock);
-
- /* If the destination has a weight and is not overloaded, use it */
- if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
- goto out;
/* No cache entry or it is invalid, time to schedule */
dest = __ip_vs_lblc_schedule(svc);
@@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
/* If we fail to create a cache entry, we'll just use the valid dest */
- write_lock(&svc->sched_lock);
- ip_vs_lblc_new(tbl, &iph.daddr, dest);
- write_unlock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
+ if (!tbl->dead)
+ ip_vs_lblc_new(tbl, &iph.daddr, dest);
+ spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
@@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
unregister_pernet_subsys(&ip_vs_lblc_ops);
+ synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c03b6a3ade2f..feb9656eac58 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,40 +89,44 @@
*/
struct ip_vs_dest_set_elem {
struct list_head list; /* list link */
- struct ip_vs_dest *dest; /* destination server */
+ struct ip_vs_dest __rcu *dest; /* destination server */
+ struct rcu_head rcu_head;
};
struct ip_vs_dest_set {
atomic_t size; /* set size */
unsigned long lastmod; /* last modified time */
struct list_head list; /* destination list */
- rwlock_t lock; /* lock for this list */
};
-static struct ip_vs_dest_set_elem *
-ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
+static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
+ struct ip_vs_dest *dest, bool check)
{
struct ip_vs_dest_set_elem *e;
- list_for_each_entry(e, &set->list, list) {
- if (e->dest == dest)
- /* already existed */
- return NULL;
+ if (check) {
+ list_for_each_entry(e, &set->list, list) {
+ struct ip_vs_dest *d;
+
+ d = rcu_dereference_protected(e->dest, 1);
+ if (d == dest)
+ /* already existed */
+ return;
+ }
}
e = kmalloc(sizeof(*e), GFP_ATOMIC);
if (e == NULL)
- return NULL;
+ return;
- atomic_inc(&dest->refcnt);
- e->dest = dest;
+ ip_vs_dest_hold(dest);
+ RCU_INIT_POINTER(e->dest, dest);
- list_add(&e->list, &set->list);
+ list_add_rcu(&e->list, &set->list);
atomic_inc(&set->size);
set->lastmod = jiffies;
- return e;
}
static void
@@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
struct ip_vs_dest_set_elem *e;
list_for_each_entry(e, &set->list, list) {
- if (e->dest == dest) {
+ struct ip_vs_dest *d;
+
+ d = rcu_dereference_protected(e->dest, 1);
+ if (d == dest) {
/* HIT */
atomic_dec(&set->size);
set->lastmod = jiffies;
- atomic_dec(&e->dest->refcnt);
- list_del(&e->list);
- kfree(e);
+ ip_vs_dest_put(dest);
+ list_del_rcu(&e->list);
+ kfree_rcu(e, rcu_head);
break;
}
}
@@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
{
struct ip_vs_dest_set_elem *e, *ep;
- write_lock(&set->lock);
list_for_each_entry_safe(e, ep, &set->list, list) {
+ struct ip_vs_dest *d;
+
+ d = rcu_dereference_protected(e->dest, 1);
/*
* We don't kfree dest because it is referred either
* by its service or by the trash dest list.
*/
- atomic_dec(&e->dest->refcnt);
- list_del(&e->list);
- kfree(e);
+ ip_vs_dest_put(d);
+ list_del_rcu(&e->list);
+ kfree_rcu(e, rcu_head);
}
- write_unlock(&set->lock);
}
/* get weighted least-connection node in the destination set */
@@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
return NULL;
/* select the first destination server, whose weight > 0 */
- list_for_each_entry(e, &set->list, list) {
- least = e->dest;
+ list_for_each_entry_rcu(e, &set->list, list) {
+ least = rcu_dereference(e->dest);
if (least->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
/* find the destination with the weighted least load */
nextstage:
- list_for_each_entry(e, &set->list, list) {
- dest = e->dest;
+ list_for_each_entry_continue_rcu(e, &set->list, list) {
+ dest = rcu_dereference(e->dest);
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* select the first destination server, whose weight > 0 */
list_for_each_entry(e, &set->list, list) {
- most = e->dest;
+ most = rcu_dereference_protected(e->dest, 1);
if (atomic_read(&most->weight) > 0) {
moh = ip_vs_dest_conn_overhead(most);
goto nextstage;
@@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* find the destination with the weighted most load */
nextstage:
- list_for_each_entry(e, &set->list, list) {
- dest = e->dest;
+ list_for_each_entry_continue(e, &set->list, list) {
+ dest = rcu_dereference_protected(e->dest, 1);
doh = ip_vs_dest_conn_overhead(dest);
/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
if ((moh * atomic_read(&dest->weight) <
@@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
* IP address and its destination server set
*/
struct ip_vs_lblcr_entry {
- struct list_head list;
+ struct hlist_node list;
int af; /* address family */
union nf_inet_addr addr; /* destination IP address */
struct ip_vs_dest_set set; /* destination server set */
unsigned long lastuse; /* last used time */
+ struct rcu_head rcu_head;
};
@@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry {
* IPVS lblcr hash table
*/
struct ip_vs_lblcr_table {
- struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
+ struct rcu_head rcu_head;
+ struct hlist_head __rcu bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
struct timer_list periodic_timer; /* collect stale entries */
int rover; /* rover for expire check */
int counter; /* counter for no expire */
+ bool dead;
};
@@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = {
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
- list_del(&en->list);
+ hlist_del_rcu(&en->list);
ip_vs_dest_set_eraseall(&en->set);
- kfree(en);
+ kfree_rcu(en, rcu_head);
}
@@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
{
unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
- list_add(&en->list, &tbl->bucket[hash]);
+ hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
}
-/*
- * Get ip_vs_lblcr_entry associated with supplied parameters. Called under
- * read lock.
- */
+/* Get ip_vs_lblcr_entry associated with supplied parameters. */
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
const union nf_inet_addr *addr)
@@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
unsigned int hash = ip_vs_lblcr_hashkey(af, addr);
struct ip_vs_lblcr_entry *en;
- list_for_each_entry(en, &tbl->bucket[hash], list)
+ hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
if (ip_vs_addr_equal(af, &en->addr, addr))
return en;
@@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
/*
* Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
- * IP address to a server. Called under write lock.
+ * IP address to a server. Called under spin lock.
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
@@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
/* initialize its dest set */
atomic_set(&(en->set.size), 0);
INIT_LIST_HEAD(&en->set.list);
- rwlock_init(&en->set.lock);
+
+ ip_vs_dest_set_insert(&en->set, dest, false);
ip_vs_lblcr_hash(tbl, en);
+ return en;
}
- write_lock(&en->set.lock);
- ip_vs_dest_set_insert(&en->set, dest);
- write_unlock(&en->set.lock);
+ ip_vs_dest_set_insert(&en->set, dest, true);
return en;
}
@@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
/*
* Flush all the entries of the specified table.
*/
-static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
+static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
{
+ struct ip_vs_lblcr_table *tbl = svc->sched_data;
int i;
- struct ip_vs_lblcr_entry *en, *nxt;
+ struct ip_vs_lblcr_entry *en;
+ struct hlist_node *next;
- /* No locking required, only called during cleanup. */
+ spin_lock_bh(&svc->sched_lock);
+ tbl->dead = 1;
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
- list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
}
}
+ spin_unlock_bh(&svc->sched_lock);
}
static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
@@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
struct ip_vs_lblcr_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int i, j;
- struct ip_vs_lblcr_entry *en, *nxt;
+ struct ip_vs_lblcr_entry *en;
+ struct hlist_node *next;
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
- write_lock(&svc->sched_lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ spin_lock(&svc->sched_lock);
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
if (time_after(en->lastuse +
sysctl_lblcr_expiration(svc), now))
continue;
@@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
ip_vs_lblcr_free(en);
atomic_dec(&tbl->entries);
}
- write_unlock(&svc->sched_lock);
+ spin_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
@@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
unsigned long now = jiffies;
int goal;
int i, j;
- struct ip_vs_lblcr_entry *en, *nxt;
+ struct ip_vs_lblcr_entry *en;
+ struct hlist_node *next;
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
@@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
- write_lock(&svc->sched_lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
+ spin_lock(&svc->sched_lock);
+ hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
continue;
@@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
atomic_dec(&tbl->entries);
goal--;
}
- write_unlock(&svc->sched_lock);
+ spin_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
@@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
* Initialize the hash buckets
*/
for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
- INIT_LIST_HEAD(&tbl->bucket[i]);
+ INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
+ tbl->dead = 0;
/*
* Hook periodic timer for garbage collection
@@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
}
-static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
+static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
{
struct ip_vs_lblcr_table *tbl = svc->sched_data;
@@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
del_timer_sync(&tbl->periodic_timer);
/* got to clean up table entries here */
- ip_vs_lblcr_flush(tbl);
+ ip_vs_lblcr_flush(svc);
/* release the table itself */
- kfree(tbl);
+ kfree_rcu(tbl, rcu_head);
IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
sizeof(*tbl));
-
- return 0;
}
@@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
* The server with weight=0 is quiesced and will not receive any
* new connection.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
* Find the destination with the least load.
*/
nextstage:
- list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
struct ip_vs_dest *d;
- list_for_each_entry(d, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(d, &svc->destinations, n_list) {
if (atomic_read(&d->activeconns)*2
< atomic_read(&d->weight)) {
return 1;
@@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_lblcr_table *tbl = svc->sched_data;
struct ip_vs_iphdr iph;
- struct ip_vs_dest *dest = NULL;
+ struct ip_vs_dest *dest;
struct ip_vs_lblcr_entry *en;
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
@@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
/* First look in our cache */
- read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
if (en) {
- /* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
/* Get the least loaded destination */
- read_lock(&en->set.lock);
dest = ip_vs_dest_set_min(&en->set);
- read_unlock(&en->set.lock);
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
- time_after(jiffies, en->set.lastmod +
+ time_after(jiffies, en->set.lastmod +
sysctl_lblcr_expiration(svc))) {
- struct ip_vs_dest *m;
+ spin_lock_bh(&svc->sched_lock);
+ if (atomic_read(&en->set.size) > 1) {
+ struct ip_vs_dest *m;
- write_lock(&en->set.lock);
- m = ip_vs_dest_set_max(&en->set);
- if (m)
- ip_vs_dest_set_erase(&en->set, m);
- write_unlock(&en->set.lock);
+ m = ip_vs_dest_set_max(&en->set);
+ if (m)
+ ip_vs_dest_set_erase(&en->set, m);
+ }
+ spin_unlock_bh(&svc->sched_lock);
}
/* If the destination is not overloaded, use it */
- if (dest && !is_overloaded(dest, svc)) {
- read_unlock(&svc->sched_lock);
+ if (dest && !is_overloaded(dest, svc))
goto out;
- }
/* The cache entry is invalid, time to schedule */
dest = __ip_vs_lblcr_schedule(svc);
if (!dest) {
ip_vs_scheduler_err(svc, "no destination available");
- read_unlock(&svc->sched_lock);
return NULL;
}
/* Update our cache entry */
- write_lock(&en->set.lock);
- ip_vs_dest_set_insert(&en->set, dest);
- write_unlock(&en->set.lock);
- }
- read_unlock(&svc->sched_lock);
-
- if (dest)
+ spin_lock_bh(&svc->sched_lock);
+ if (!tbl->dead)
+ ip_vs_dest_set_insert(&en->set, dest, true);
+ spin_unlock_bh(&svc->sched_lock);
goto out;
+ }
/* No cache entry, time to schedule */
dest = __ip_vs_lblcr_schedule(svc);
@@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
}
/* If we fail to create a cache entry, we'll just use the valid dest */
- write_lock(&svc->sched_lock);
- ip_vs_lblcr_new(tbl, &iph.daddr, dest);
- write_unlock(&svc->sched_lock);
+ spin_lock_bh(&svc->sched_lock);
+ if (!tbl->dead)
+ ip_vs_lblcr_new(tbl, &iph.daddr, dest);
+ spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
@@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
unregister_pernet_subsys(&ip_vs_lblcr_ops);
+ synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index f391819c0cca..5128e338a749 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* served, but no new connection is assigned to the server.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
atomic_read(&dest->weight) == 0)
continue;
@@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
static void __exit ip_vs_lc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_lc_init);
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 984d9c137d84..646cfd4baa73 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* new connections.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
!atomic_read(&dest->weight))
@@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
static void __exit ip_vs_nq_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_nq_init);
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 5cf859ccb31b..1a82b29ce8ea 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -13,20 +13,8 @@
/* IPVS pe list */
static LIST_HEAD(ip_vs_pe);
-/* lock for service table */
-static DEFINE_SPINLOCK(ip_vs_pe_lock);
-
-/* Bind a service with a pe */
-void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
-{
- svc->pe = pe;
-}
-
-/* Unbind a service from its pe */
-void ip_vs_unbind_pe(struct ip_vs_service *svc)
-{
- svc->pe = NULL;
-}
+/* semaphore for IPVS PEs. */
+static DEFINE_MUTEX(ip_vs_pe_mutex);
/* Get pe in the pe list by name */
struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
@@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
pe_name);
- spin_lock_bh(&ip_vs_pe_lock);
-
- list_for_each_entry(pe, &ip_vs_pe, n_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {
/* Test and get the modules atomically */
if (pe->module &&
!try_module_get(pe->module)) {
@@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
}
if (strcmp(pe_name, pe->name)==0) {
/* HIT */
- spin_unlock_bh(&ip_vs_pe_lock);
+ rcu_read_unlock();
return pe;
}
if (pe->module)
module_put(pe->module);
}
+ rcu_read_unlock();
- spin_unlock_bh(&ip_vs_pe_lock);
return NULL;
}
@@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
/* increase the module use count */
ip_vs_use_count_inc();
- spin_lock_bh(&ip_vs_pe_lock);
-
- if (!list_empty(&pe->n_list)) {
- spin_unlock_bh(&ip_vs_pe_lock);
- ip_vs_use_count_dec();
- pr_err("%s(): [%s] pe already linked\n",
- __func__, pe->name);
- return -EINVAL;
- }
-
+ mutex_lock(&ip_vs_pe_mutex);
/* Make sure that the pe with this name doesn't exist
* in the pe list.
*/
list_for_each_entry(tmp, &ip_vs_pe, n_list) {
if (strcmp(tmp->name, pe->name) == 0) {
- spin_unlock_bh(&ip_vs_pe_lock);
+ mutex_unlock(&ip_vs_pe_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] pe already existed "
"in the system\n", __func__, pe->name);
@@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
}
}
/* Add it into the d-linked pe list */
- list_add(&pe->n_list, &ip_vs_pe);
- spin_unlock_bh(&ip_vs_pe_lock);
+ list_add_rcu(&pe->n_list, &ip_vs_pe);
+ mutex_unlock(&ip_vs_pe_mutex);
pr_info("[%s] pe registered.\n", pe->name);
@@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe);
/* Unregister a pe from the pe list */
int unregister_ip_vs_pe(struct ip_vs_pe *pe)
{
- spin_lock_bh(&ip_vs_pe_lock);
- if (list_empty(&pe->n_list)) {
- spin_unlock_bh(&ip_vs_pe_lock);
- pr_err("%s(): [%s] pe is not in the list. failed\n",
- __func__, pe->name);
- return -EINVAL;
- }
-
+ mutex_lock(&ip_vs_pe_mutex);
/* Remove it from the d-linked pe list */
- list_del(&pe->n_list);
- spin_unlock_bh(&ip_vs_pe_lock);
+ list_del_rcu(&pe->n_list);
+ mutex_unlock(&ip_vs_pe_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 12475ef88daf..00cc0241ed87 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -172,6 +172,7 @@ static int __init ip_vs_sip_init(void)
static void __exit ip_vs_sip_cleanup(void)
{
unregister_ip_vs_pe(&ip_vs_sip_pe);
+ synchronize_rcu();
}
module_init(ip_vs_sip_init);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index cd1d7298f7ba..6e14a7b5602f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (sch == NULL)
return 0;
net = skb_net(skb);
+ rcu_read_lock();
if ((sch->type == SCTP_CID_INIT) &&
- (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
- &iph->daddr, sh->dest))) {
+ (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+ &iph->daddr, sh->dest))) {
int ignored;
if (ip_vs_todrop(net_ipvs(net))) {
@@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- ip_vs_service_put(svc);
+ rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
- else {
- ip_vs_service_put(svc);
+ else
*verdict = NF_DROP;
- }
+ rcu_read_unlock();
return 0;
}
- ip_vs_service_put(svc);
}
+ rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -994,9 +994,9 @@ static void
sctp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
set_sctp_state(pd, cp, direction, skb);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline __u16 sctp_app_hashkey(__be16 port)
@@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
hash = sctp_app_hashkey(port);
- spin_lock_bh(&ipvs->sctp_app_lock);
list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+ list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&ipvs->sctp_app_lock);
return ret;
}
static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
- spin_lock_bh(&ipvs->sctp_app_lock);
atomic_dec(&pd->appcnt);
- list_del(&inc->p_list);
- spin_unlock_bh(&ipvs->sctp_app_lock);
+ list_del_rcu(&inc->p_list);
}
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
@@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
- spin_lock(&ipvs->sctp_app_lock);
- list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&ipvs->sctp_app_lock);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&ipvs->sctp_app_lock);
+ rcu_read_unlock();
out:
return result;
}
@@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
- spin_lock_init(&ipvs->sctp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
sizeof(sctp_timeouts));
if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 9af653a75825..50a15944c6c1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
}
net = skb_net(skb);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
+ rcu_read_lock();
if (th->syn &&
- (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
- &iph->daddr, th->dest))) {
+ (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+ &iph->daddr, th->dest))) {
int ignored;
if (ip_vs_todrop(net_ipvs(net))) {
@@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- ip_vs_service_put(svc);
+ rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
- else {
- ip_vs_service_put(svc);
+ else
*verdict = NF_DROP;
- }
+ rcu_read_unlock();
return 0;
}
- ip_vs_service_put(svc);
}
+ rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
if (th == NULL)
return;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
set_tcp_state(pd, cp, direction, th);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
static inline __u16 tcp_app_hashkey(__be16 port)
@@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
hash = tcp_app_hashkey(port);
- spin_lock_bh(&ipvs->tcp_app_lock);
list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+ list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&ipvs->tcp_app_lock);
return ret;
}
@@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
static void
tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
- spin_lock_bh(&ipvs->tcp_app_lock);
atomic_dec(&pd->appcnt);
- list_del(&inc->p_list);
- spin_unlock_bh(&ipvs->tcp_app_lock);
+ list_del_rcu(&inc->p_list);
}
@@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- spin_lock(&ipvs->tcp_app_lock);
- list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&ipvs->tcp_app_lock);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&ipvs->tcp_app_lock);
+ rcu_read_unlock();
out:
return result;
@@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
{
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
: tcp_timeouts[IP_VS_TCP_S_LISTEN]);
- spin_unlock(&cp->lock);
+ spin_unlock_bh(&cp->lock);
}
/* ---------------------------------------------
@@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
- spin_lock_init(&ipvs->tcp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts));
if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 503a842c90d2..b62a3c0ff9bf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
return 0;
}
net = skb_net(skb);
- svc = ip_vs_service_get(net, af, skb->mark, iph->protocol,
- &iph->daddr, uh->dest);
+ rcu_read_lock();
+ svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
+ &iph->daddr, uh->dest);
if (svc) {
int ignored;
@@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* It seems that we are very loaded.
* We have to drop this packet :(
*/
- ip_vs_service_put(svc);
+ rcu_read_unlock();
*verdict = NF_DROP;
return 0;
}
@@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd, iph);
- else {
- ip_vs_service_put(svc);
+ else
*verdict = NF_DROP;
- }
+ rcu_read_unlock();
return 0;
}
- ip_vs_service_put(svc);
}
+ rcu_read_unlock();
/* NF_ACCEPT */
return 1;
}
@@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
hash = udp_app_hashkey(port);
-
- spin_lock_bh(&ipvs->udp_app_lock);
list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+ list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
atomic_inc(&pd->appcnt);
out:
- spin_unlock_bh(&ipvs->udp_app_lock);
return ret;
}
@@ -380,12 +377,9 @@ static void
udp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
- struct netns_ipvs *ipvs = net_ipvs(net);
- spin_lock_bh(&ipvs->udp_app_lock);
atomic_dec(&pd->appcnt);
- list_del(&inc->p_list);
- spin_unlock_bh(&ipvs->udp_app_lock);
+ list_del_rcu(&inc->p_list);
}
@@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- spin_lock(&ipvs->udp_app_lock);
- list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&ipvs->udp_app_lock);
+ rcu_read_unlock();
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&ipvs->udp_app_lock);
+ rcu_read_unlock();
out:
return result;
@@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
- spin_lock_init(&ipvs->udp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
sizeof(udp_timeouts));
if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index c49b388d1085..c35986c793d9 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
}
-static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
+static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
{
- svc->sched_data = &svc->destinations;
+ struct list_head *p;
+
+ spin_lock_bh(&svc->sched_lock);
+ p = (struct list_head *) svc->sched_data;
+ /* dest is already unlinked, so p->prev is not valid but
+ * p->next is valid, use it to reach previous entry.
+ */
+ if (p == &dest->n_list)
+ svc->sched_data = p->next->prev;
+ spin_unlock_bh(&svc->sched_lock);
return 0;
}
@@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
static struct ip_vs_dest *
ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
- struct list_head *p, *q;
- struct ip_vs_dest *dest;
+ struct list_head *p;
+ struct ip_vs_dest *dest, *last;
+ int pass = 0;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
- write_lock(&svc->sched_lock);
- p = (struct list_head *)svc->sched_data;
- p = p->next;
- q = p;
+ spin_lock_bh(&svc->sched_lock);
+ p = (struct list_head *) svc->sched_data;
+ last = dest = list_entry(p, struct ip_vs_dest, n_list);
+
do {
- /* skip list head */
- if (q == &svc->destinations) {
- q = q->next;
- continue;
+ list_for_each_entry_continue_rcu(dest,
+ &svc->destinations,
+ n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > 0)
+ /* HIT */
+ goto out;
+ if (dest == last)
+ goto stop;
}
-
- dest = list_entry(q, struct ip_vs_dest, n_list);
- if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
- atomic_read(&dest->weight) > 0)
- /* HIT */
- goto out;
- q = q->next;
- } while (q != p);
- write_unlock(&svc->sched_lock);
+ pass++;
+ /* Previous dest could be unlinked, do not loop forever.
+ * If we stay at head there is no need for 2nd pass.
+ */
+ } while (pass < 2 && p != &svc->destinations);
+
+stop:
+ spin_unlock_bh(&svc->sched_lock);
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
out:
- svc->sched_data = q;
- write_unlock(&svc->sched_lock);
+ svc->sched_data = &dest->n_list;
+ spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
@@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
.init_service = ip_vs_rr_init_svc,
- .update_service = ip_vs_rr_update_svc,
+ .add_dest = NULL,
+ .del_dest = ip_vs_rr_del_dest,
.schedule = ip_vs_rr_schedule,
};
@@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void)
static void __exit ip_vs_rr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_rr_init);
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index d6bf20d6cdbe..4dbcda6258bc 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err);
*/
static LIST_HEAD(ip_vs_schedulers);
-/* lock for service table */
-static DEFINE_SPINLOCK(ip_vs_sched_lock);
+/* semaphore for schedulers */
+static DEFINE_MUTEX(ip_vs_sched_mutex);
/*
@@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
{
int ret;
- svc->scheduler = scheduler;
-
if (scheduler->init_service) {
ret = scheduler->init_service(svc);
if (ret) {
@@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
return ret;
}
}
-
+ rcu_assign_pointer(svc->scheduler, scheduler);
return 0;
}
@@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
/*
* Unbind a service with its scheduler
*/
-int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
+void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
+ struct ip_vs_scheduler *sched)
{
- struct ip_vs_scheduler *sched = svc->scheduler;
+ struct ip_vs_scheduler *cur_sched;
- if (!sched)
- return 0;
+ cur_sched = rcu_dereference_protected(svc->scheduler, 1);
+ /* This check proves that old 'sched' was installed */
+ if (!cur_sched)
+ return;
- if (sched->done_service) {
- if (sched->done_service(svc) != 0) {
- pr_err("%s(): done error\n", __func__);
- return -EINVAL;
- }
- }
-
- svc->scheduler = NULL;
- return 0;
+ if (sched->done_service)
+ sched->done_service(svc);
+ /* svc->scheduler can not be set to NULL */
}
@@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
- spin_lock_bh(&ip_vs_sched_lock);
+ mutex_lock(&ip_vs_sched_mutex);
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
/*
@@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
}
if (strcmp(sched_name, sched->name)==0) {
/* HIT */
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
return sched;
}
if (sched->module)
module_put(sched->module);
}
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
return NULL;
}
@@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{
+ struct ip_vs_scheduler *sched;
+
+ sched = rcu_dereference(svc->scheduler);
if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
- svc->scheduler->name, svc->fwmark,
- svc->fwmark, msg);
+ sched->name, svc->fwmark, svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
- svc->scheduler->name,
- ip_vs_proto_name(svc->protocol),
+ sched->name, ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
#endif
} else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
- svc->scheduler->name,
- ip_vs_proto_name(svc->protocol),
+ sched->name, ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg);
}
}
@@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
/* increase the module use count */
ip_vs_use_count_inc();
- spin_lock_bh(&ip_vs_sched_lock);
+ mutex_lock(&ip_vs_sched_mutex);
if (!list_empty(&scheduler->n_list)) {
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] scheduler already linked\n",
__func__, scheduler->name);
@@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
*/
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
if (strcmp(scheduler->name, sched->name) == 0) {
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] scheduler already existed "
"in the system\n", __func__, scheduler->name);
@@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
* Add it into the d-linked scheduler list
*/
list_add(&scheduler->n_list, &ip_vs_schedulers);
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
pr_info("[%s] scheduler registered.\n", scheduler->name);
@@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
return -EINVAL;
}
- spin_lock_bh(&ip_vs_sched_lock);
+ mutex_lock(&ip_vs_sched_mutex);
if (list_empty(&scheduler->n_list)) {
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
pr_err("%s(): [%s] scheduler is not in the list. failed\n",
__func__, scheduler->name);
return -EINVAL;
@@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
* Remove it from the d-linked scheduler list
*/
list_del(&scheduler->n_list);
- spin_unlock_bh(&ip_vs_sched_lock);
+ mutex_unlock(&ip_vs_sched_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 89ead246ed3d..f3205925359a 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* new connections.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
atomic_read(&dest->weight) > 0) {
least = dest;
@@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* Find the destination with the least load.
*/
nextstage:
- list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_sed_dest_overhead(dest);
@@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)
static void __exit ip_vs_sed_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_sed_init);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e33126994628..0df269d7c99f 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -53,7 +53,7 @@
* IPVS SH bucket
*/
struct ip_vs_sh_bucket {
- struct ip_vs_dest *dest; /* real server (cache) */
+ struct ip_vs_dest __rcu *dest; /* real server (cache) */
};
/*
@@ -66,6 +66,10 @@ struct ip_vs_sh_bucket {
#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS)
#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
+struct ip_vs_sh_state {
+ struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
+ struct rcu_head rcu_head;
+};
/*
* Returns hash value for IPVS SH entry
@@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
* Get ip_vs_dest associated with supplied parameters.
*/
static inline struct ip_vs_dest *
-ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
- const union nf_inet_addr *addr)
+ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
{
- return (tbl[ip_vs_sh_hashkey(af, addr)]).dest;
+ return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
}
@@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
* Assign all the hash buckets of the specified table with the service.
*/
static int
-ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
+ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
{
int i;
struct ip_vs_sh_bucket *b;
struct list_head *p;
struct ip_vs_dest *dest;
int d_count;
+ bool empty;
- b = tbl;
+ b = &s->buckets[0];
p = &svc->destinations;
+ empty = list_empty(p);
d_count = 0;
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
- if (list_empty(p)) {
- b->dest = NULL;
- } else {
+ dest = rcu_dereference_protected(b->dest, 1);
+ if (dest)
+ ip_vs_dest_put(dest);
+ if (empty)
+ RCU_INIT_POINTER(b->dest, NULL);
+ else {
if (p == &svc->destinations)
p = p->next;
dest = list_entry(p, struct ip_vs_dest, n_list);
- atomic_inc(&dest->refcnt);
- b->dest = dest;
+ ip_vs_dest_hold(dest);
+ RCU_INIT_POINTER(b->dest, dest);
IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
@@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
/*
* Flush all the hash buckets of the specified table.
*/
-static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
+static void ip_vs_sh_flush(struct ip_vs_sh_state *s)
{
int i;
struct ip_vs_sh_bucket *b;
+ struct ip_vs_dest *dest;
- b = tbl;
+ b = &s->buckets[0];
for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
- if (b->dest) {
- atomic_dec(&b->dest->refcnt);
- b->dest = NULL;
+ dest = rcu_dereference_protected(b->dest, 1);
+ if (dest) {
+ ip_vs_dest_put(dest);
+ RCU_INIT_POINTER(b->dest, NULL);
}
b++;
}
@@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
{
- struct ip_vs_sh_bucket *tbl;
+ struct ip_vs_sh_state *s;
/* allocate the SH table for this service */
- tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
- GFP_KERNEL);
- if (tbl == NULL)
+ s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL);
+ if (s == NULL)
return -ENOMEM;
- svc->sched_data = tbl;
+ svc->sched_data = s;
IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
"current service\n",
sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
- /* assign the hash buckets with the updated service */
- ip_vs_sh_assign(tbl, svc);
+ /* assign the hash buckets with current dests */
+ ip_vs_sh_reassign(s, svc);
return 0;
}
-static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
+static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
{
- struct ip_vs_sh_bucket *tbl = svc->sched_data;
+ struct ip_vs_sh_state *s = svc->sched_data;
/* got to clean up hash buckets here */
- ip_vs_sh_flush(tbl);
+ ip_vs_sh_flush(s);
/* release the table itself */
- kfree(svc->sched_data);
+ kfree_rcu(s, rcu_head);
IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
-
- return 0;
}
-static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
+static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest)
{
- struct ip_vs_sh_bucket *tbl = svc->sched_data;
-
- /* got to clean up hash buckets here */
- ip_vs_sh_flush(tbl);
+ struct ip_vs_sh_state *s = svc->sched_data;
/* assign the hash buckets with the updated service */
- ip_vs_sh_assign(tbl, svc);
+ ip_vs_sh_reassign(s, svc);
return 0;
}
@@ -225,15 +230,15 @@ static struct ip_vs_dest *
ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
- struct ip_vs_sh_bucket *tbl;
+ struct ip_vs_sh_state *s;
struct ip_vs_iphdr iph;
ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
- tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
- dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr);
+ s = (struct ip_vs_sh_state *) svc->sched_data;
+ dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
if (!dest
|| !(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0
@@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
.n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
.init_service = ip_vs_sh_init_svc,
.done_service = ip_vs_sh_done_svc,
- .update_service = ip_vs_sh_update_svc,
+ .add_dest = ip_vs_sh_dest_changed,
+ .del_dest = ip_vs_sh_dest_changed,
+ .upd_dest = ip_vs_sh_dest_changed,
.schedule = ip_vs_sh_schedule,
};
@@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void)
static void __exit ip_vs_sh_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
+ synchronize_rcu();
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 44fd10c539ac..8e57077e5540 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
return;
- spin_lock(&ipvs->sync_buff_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return;
}
@@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
if (!buff) {
buff = ip_vs_sync_buff_create_v0(ipvs);
if (!buff) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
sb_queue_tail(ipvs, ms);
ms->sync_buff = NULL;
}
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
cp = cp->control;
@@ -641,9 +641,9 @@ sloop:
pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
}
- spin_lock(&ipvs->sync_buff_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return;
}
@@ -683,7 +683,7 @@ sloop:
if (!buff) {
buff = ip_vs_sync_buff_create(ipvs);
if (!buff) {
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@@ -750,7 +750,7 @@ sloop:
}
}
- spin_unlock(&ipvs->sync_buff_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
control:
/* synchronize its controller if it has */
@@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
kfree(param->pe_data);
dest = cp->dest;
- spin_lock(&cp->lock);
+ spin_lock_bh(&cp->lock);
if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
!(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
if (flags & IP_VS_CONN_F_INACTIVE) {
@@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
cp->flags = flags;
- spin_unlock(&cp->lock);
- if (!dest) {
- dest = ip_vs_try_bind_dest(cp);
- if (dest)
- atomic_dec(&dest->refcnt);
- }
+ spin_unlock_bh(&cp->lock);
+ if (!dest)
+ ip_vs_try_bind_dest(cp);
} else {
/*
* Find the appropriate destination for the connection.
* If it is not found the connection will remain unbound
* but still handled.
*/
+ rcu_read_lock();
dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
param->vport, protocol, fwmark, flags);
cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
- if (dest)
- atomic_dec(&dest->refcnt);
+ rcu_read_unlock();
if (!cp) {
if (param->pe_data)
kfree(param->pe_data);
@@ -1692,11 +1689,7 @@ static int sync_thread_backup(void *data)
break;
}
- /* disable bottom half, because it accesses the data
- shared by softirq while getting/creating conns */
- local_bh_disable();
ip_vs_process_message(tinfo->net, tinfo->buf, len);
- local_bh_enable();
}
}
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index bc1bfc48a17f..c60a81c4ce9a 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* new connections.
*/
- list_for_each_entry(dest, &svc->destinations, n_list) {
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
atomic_read(&dest->weight) > 0) {
least = dest;
@@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
* Find the destination with the least load.
*/
nextstage:
- list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+ list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_dest_conn_overhead(dest);
@@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)
static void __exit ip_vs_wlc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_wlc_init);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 231be7dd547a..0e68555bceb9 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -29,14 +29,45 @@
#include <net/ip_vs.h>
+/* The WRR algorithm depends on some caclulations:
+ * - mw: maximum weight
+ * - di: weight step, greatest common divisor from all weights
+ * - cw: current required weight
+ * As result, all weights are in the [di..mw] range with a step=di.
+ *
+ * First, we start with cw = mw and select dests with weight >= cw.
+ * Then cw is reduced with di and all dests are checked again.
+ * Last pass should be with cw = di. We have mw/di passes in total:
+ *
+ * pass 1: cw = max weight
+ * pass 2: cw = max weight - di
+ * pass 3: cw = max weight - 2 * di
+ * ...
+ * last pass: cw = di
+ *
+ * Weights are supposed to be >= di but we run in parallel with
+ * weight changes, it is possible some dest weight to be reduced
+ * below di, bad if it is the only available dest.
+ *
+ * So, we modify how mw is calculated, now it is reduced with (di - 1),
+ * so that last cw is 1 to catch such dests with weight below di:
+ * pass 1: cw = max weight - (di - 1)
+ * pass 2: cw = max weight - di - (di - 1)
+ * pass 3: cw = max weight - 2 * di - (di - 1)
+ * ...
+ * last pass: cw = 1
+ *
+ */
+
/*
* current destination pointer for weighted round-robin scheduling
*/
struct ip_vs_wrr_mark {
- struct list_head *cl; /* current list head */
+ struct ip_vs_dest *cl; /* current dest or head */
int cw; /* current weight */
int mw; /* maximum weight */
int di; /* decreasing interval */
+ struct rcu_head rcu_head;
};
@@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
if (mark == NULL)
return -ENOMEM;
- mark->cl = &svc->destinations;
- mark->cw = 0;
- mark->mw = ip_vs_wrr_max_weight(svc);
+ mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
mark->di = ip_vs_wrr_gcd_weight(svc);
+ mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
+ mark->cw = mark->mw;
svc->sched_data = mark;
return 0;
}
-static int ip_vs_wrr_done_svc(struct ip_vs_service *svc)
+static void ip_vs_wrr_done_svc(struct ip_vs_service *svc)
{
+ struct ip_vs_wrr_mark *mark = svc->sched_data;
+
/*
* Release the mark variable
*/
- kfree(svc->sched_data);
-
- return 0;
+ kfree_rcu(mark, rcu_head);
}
-static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
+static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
+ struct ip_vs_dest *dest)
{
struct ip_vs_wrr_mark *mark = svc->sched_data;
- mark->cl = &svc->destinations;
- mark->mw = ip_vs_wrr_max_weight(svc);
+ spin_lock_bh(&svc->sched_lock);
+ mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
mark->di = ip_vs_wrr_gcd_weight(svc);
- if (mark->cw > mark->mw)
- mark->cw = 0;
+ mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
+ if (mark->cw > mark->mw || !mark->cw)
+ mark->cw = mark->mw;
+ else if (mark->di > 1)
+ mark->cw = (mark->cw / mark->di) * mark->di + 1;
+ spin_unlock_bh(&svc->sched_lock);
return 0;
}
@@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
static struct ip_vs_dest *
ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
- struct ip_vs_dest *dest;
+ struct ip_vs_dest *dest, *last, *stop = NULL;
struct ip_vs_wrr_mark *mark = svc->sched_data;
- struct list_head *p;
+ bool last_pass = false, restarted = false;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
- /*
- * This loop will always terminate, because mark->cw in (0, max_weight]
- * and at least one server has its weight equal to max_weight.
- */
- write_lock(&svc->sched_lock);
- p = mark->cl;
+ spin_lock_bh(&svc->sched_lock);
+ dest = mark->cl;
+ /* No available dests? */
+ if (mark->mw == 0)
+ goto err_noavail;
+ last = dest;
+ /* Stop only after all dests were checked for weight >= 1 (last pass) */
while (1) {
- if (mark->cl == &svc->destinations) {
- /* it is at the head of the destination list */
-
- if (mark->cl == mark->cl->next) {
- /* no dest entry */
- ip_vs_scheduler_err(svc,
- "no destination available: "
- "no destinations present");
- dest = NULL;
- goto out;
- }
-
- mark->cl = svc->destinations.next;
- mark->cw -= mark->di;
- if (mark->cw <= 0) {
- mark->cw = mark->mw;
- /*
- * Still zero, which means no available servers.
- */
- if (mark->cw == 0) {
- mark->cl = &svc->destinations;
- ip_vs_scheduler_err(svc,
- "no destination available");
- dest = NULL;
- goto out;
- }
- }
- } else
- mark->cl = mark->cl->next;
-
- if (mark->cl != &svc->destinations) {
- /* not at the head of the list */
- dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
+ list_for_each_entry_continue_rcu(dest,
+ &svc->destinations,
+ n_list) {
if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
- atomic_read(&dest->weight) >= mark->cw) {
- /* got it */
- break;
- }
+ atomic_read(&dest->weight) >= mark->cw)
+ goto found;
+ if (dest == stop)
+ goto err_over;
}
-
- if (mark->cl == p && mark->cw == mark->di) {
- /* back to the start, and no dest is found.
- It is only possible when all dests are OVERLOADED */
- dest = NULL;
- ip_vs_scheduler_err(svc,
- "no destination available: "
- "all destinations are overloaded");
- goto out;
+ mark->cw -= mark->di;
+ if (mark->cw <= 0) {
+ mark->cw = mark->mw;
+ /* Stop if we tried last pass from first dest:
+ * 1. last_pass: we started checks when cw > di but
+ * then all dests were checked for w >= 1
+ * 2. last was head: the first and only traversal
+ * was for weight >= 1, for all dests.
+ */
+ if (last_pass ||
+ &last->n_list == &svc->destinations)
+ goto err_over;
+ restarted = true;
+ }
+ last_pass = mark->cw <= mark->di;
+ if (last_pass && restarted &&
+ &last->n_list != &svc->destinations) {
+ /* First traversal was for w >= 1 but only
+ * for dests after 'last', now do the same
+ * for all dests up to 'last'.
+ */
+ stop = last;
}
}
+found:
IP_VS_DBG_BUF(6, "WRR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
+ mark->cl = dest;
out:
- write_unlock(&svc->sched_lock);
+ spin_unlock_bh(&svc->sched_lock);
return dest;
+
+err_noavail:
+ mark->cl = dest;
+ dest = NULL;
+ ip_vs_scheduler_err(svc, "no destination available");
+ goto out;
+
+err_over:
+ mark->cl = dest;
+ dest = NULL;
+ ip_vs_scheduler_err(svc, "no destination available: "
+ "all destinations are overloaded");
+ goto out;
}
@@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
.n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
.init_service = ip_vs_wrr_init_svc,
.done_service = ip_vs_wrr_done_svc,
- .update_service = ip_vs_wrr_update_svc,
+ .add_dest = ip_vs_wrr_dest_changed,
+ .del_dest = ip_vs_wrr_dest_changed,
+ .upd_dest = ip_vs_wrr_dest_changed,
.schedule = ip_vs_wrr_schedule,
};
@@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void)
static void __exit ip_vs_wrr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
+ synchronize_rcu();
}
module_init(ip_vs_wrr_init);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index ee6b7a9f1ec2..b75ff6429a04 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -17,6 +17,8 @@
* - not all connections have destination server, for example,
* connections in backup server when fwmark is used
* - bypass connections use daddr from packet
+ * - we can use dst without ref while sending in RCU section, we use
+ * ref when returning NF_ACCEPT for NAT-ed packet via loopback
* LOCAL_OUT rules:
* - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
* - skb->pkt_type is not set yet
@@ -51,39 +53,54 @@ enum {
*/
IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
+ IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */
};
+static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void)
+{
+ return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC);
+}
+
+static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst)
+{
+ kfree(dest_dst);
+}
+
/*
* Destination cache to speed up outgoing route lookup
*/
static inline void
-__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
- u32 dst_cookie)
+__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst,
+ struct dst_entry *dst, u32 dst_cookie)
{
- struct dst_entry *old_dst;
+ struct ip_vs_dest_dst *old;
+
+ old = rcu_dereference_protected(dest->dest_dst,
+ lockdep_is_held(&dest->dst_lock));
- old_dst = dest->dst_cache;
- dest->dst_cache = dst;
- dest->dst_rtos = rtos;
- dest->dst_cookie = dst_cookie;
- dst_release(old_dst);
+ if (dest_dst) {
+ dest_dst->dst_cache = dst;
+ dest_dst->dst_cookie = dst_cookie;
+ }
+ rcu_assign_pointer(dest->dest_dst, dest_dst);
+
+ if (old)
+ call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
}
-static inline struct dst_entry *
-__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
+static inline struct ip_vs_dest_dst *
+__ip_vs_dst_check(struct ip_vs_dest *dest)
{
- struct dst_entry *dst = dest->dst_cache;
+ struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst);
+ struct dst_entry *dst;
- if (!dst)
+ if (!dest_dst)
return NULL;
- if ((dst->obsolete || rtos != dest->dst_rtos) &&
- dst->ops->check(dst, dest->dst_cookie) == NULL) {
- dest->dst_cache = NULL;
- dst_release(dst);
+ dst = dest_dst->dst_cache;
+ if (dst->obsolete &&
+ dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
return NULL;
- }
- dst_hold(dst);
- return dst;
+ return dest_dst;
}
static inline bool
@@ -104,7 +121,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
/* Get route to daddr, update *saddr, optionally bind route to saddr */
static struct rtable *do_output_route4(struct net *net, __be32 daddr,
- u32 rtos, int rt_mode, __be32 *saddr)
+ int rt_mode, __be32 *saddr)
{
struct flowi4 fl4;
struct rtable *rt;
@@ -113,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
- fl4.flowi4_tos = rtos;
fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
FLOWI_FLAG_KNOWN_NH : 0;
@@ -124,7 +140,7 @@ retry:
if (PTR_ERR(rt) == -EINVAL && *saddr &&
rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
*saddr = 0;
- flowi4_update_output(&fl4, 0, rtos, daddr, 0);
+ flowi4_update_output(&fl4, 0, 0, daddr, 0);
goto retry;
}
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
@@ -132,7 +148,7 @@ retry:
} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
ip_rt_put(rt);
*saddr = fl4.saddr;
- flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
+ flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
loop++;
goto retry;
}
@@ -141,113 +157,140 @@ retry:
}
/* Get route to destination or remote server */
-static struct rtable *
+static int
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
- __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
+ __be32 daddr, int rt_mode, __be32 *ret_saddr)
{
struct net *net = dev_net(skb_dst(skb)->dev);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
struct rtable *ort; /* Original route */
- int local;
+ struct iphdr *iph;
+ __be16 df;
+ int mtu;
+ int local, noref = 1;
if (dest) {
- spin_lock(&dest->dst_lock);
- if (!(rt = (struct rtable *)
- __ip_vs_dst_check(dest, rtos))) {
- rt = do_output_route4(net, dest->addr.ip, rtos,
- rt_mode, &dest->dst_saddr.ip);
+ dest_dst = __ip_vs_dst_check(dest);
+ if (likely(dest_dst))
+ rt = (struct rtable *) dest_dst->dst_cache;
+ else {
+ dest_dst = ip_vs_dest_dst_alloc();
+ spin_lock_bh(&dest->dst_lock);
+ if (!dest_dst) {
+ __ip_vs_dst_set(dest, NULL, NULL, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ goto err_unreach;
+ }
+ rt = do_output_route4(net, dest->addr.ip, rt_mode,
+ &dest_dst->dst_saddr.ip);
if (!rt) {
- spin_unlock(&dest->dst_lock);
- return NULL;
+ __ip_vs_dst_set(dest, NULL, NULL, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ ip_vs_dest_dst_free(dest_dst);
+ goto err_unreach;
}
- __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
- IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
- "rtos=%X\n",
- &dest->addr.ip, &dest->dst_saddr.ip,
- atomic_read(&rt->dst.__refcnt), rtos);
+ __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
+ &dest->addr.ip, &dest_dst->dst_saddr.ip,
+ atomic_read(&rt->dst.__refcnt));
}
daddr = dest->addr.ip;
if (ret_saddr)
- *ret_saddr = dest->dst_saddr.ip;
- spin_unlock(&dest->dst_lock);
+ *ret_saddr = dest_dst->dst_saddr.ip;
} else {
__be32 saddr = htonl(INADDR_ANY);
+ noref = 0;
+
/* For such unconfigured boxes avoid many route lookups
* for performance reasons because we do not remember saddr
*/
rt_mode &= ~IP_VS_RT_MODE_CONNECT;
- rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
+ rt = do_output_route4(net, daddr, rt_mode, &saddr);
if (!rt)
- return NULL;
+ goto err_unreach;
if (ret_saddr)
*ret_saddr = saddr;
}
- local = rt->rt_flags & RTCF_LOCAL;
+ local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
rt_mode)) {
IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
(rt->rt_flags & RTCF_LOCAL) ?
"local":"non-local", &daddr);
- ip_rt_put(rt);
- return NULL;
- }
- if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
- !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
- "requires NAT method, dest: %pI4\n",
- &ip_hdr(skb)->daddr, &daddr);
- ip_rt_put(rt);
- return NULL;
+ goto err_put;
}
- if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
- IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
- "to non-local address, dest: %pI4\n",
- &ip_hdr(skb)->saddr, &daddr);
- ip_rt_put(rt);
- return NULL;
+ iph = ip_hdr(skb);
+ if (likely(!local)) {
+ if (unlikely(ipv4_is_loopback(iph->saddr))) {
+ IP_VS_DBG_RL("Stopping traffic from loopback address "
+ "%pI4 to non-local address, dest: %pI4\n",
+ &iph->saddr, &daddr);
+ goto err_put;
+ }
+ } else {
+ ort = skb_rtable(skb);
+ if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
+ !(ort->rt_flags & RTCF_LOCAL)) {
+ IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
+ "local requires NAT method, dest: %pI4\n",
+ &iph->daddr, &daddr);
+ goto err_put;
+ }
+ /* skb to local stack, preserve old route */
+ if (!noref)
+ ip_rt_put(rt);
+ return local;
}
- return rt;
-}
-
-/* Reroute packet to local IPv4 stack after DNAT */
-static int
-__ip_vs_reroute_locally(struct sk_buff *skb)
-{
- struct rtable *rt = skb_rtable(skb);
- struct net_device *dev = rt->dst.dev;
- struct net *net = dev_net(dev);
- struct iphdr *iph = ip_hdr(skb);
-
- if (rt_is_input_route(rt)) {
- unsigned long orefdst = skb->_skb_refdst;
-
- if (ip_route_input(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev))
- return 0;
- refdst_drop(orefdst);
+ if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
+ mtu = dst_mtu(&rt->dst);
+ df = iph->frag_off & htons(IP_DF);
} else {
- struct flowi4 fl4 = {
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .flowi4_tos = RT_TOS(iph->tos),
- .flowi4_mark = skb->mark,
- };
-
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
- return 0;
- if (!(rt->rt_flags & RTCF_LOCAL)) {
- ip_rt_put(rt);
- return 0;
+ struct sock *sk = skb->sk;
+
+ mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
+ if (mtu < 68) {
+ IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
+ goto err_put;
}
- /* Drop old route. */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
+ ort = skb_rtable(skb);
+ if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+ /* MTU check allowed? */
+ df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
}
- return 1;
+
+ /* MTU checking */
+ if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+ goto err_put;
+ }
+
+ skb_dst_drop(skb);
+ if (noref) {
+ if (!local)
+ skb_dst_set_noref_force(skb, &rt->dst);
+ else
+ skb_dst_set(skb, dst_clone(&rt->dst));
+ } else
+ skb_dst_set(skb, &rt->dst);
+
+ return local;
+
+err_put:
+ if (!noref)
+ ip_rt_put(rt);
+ return -1;
+
+err_unreach:
+ dst_link_failure(skb);
+ return -1;
}
#ifdef CONFIG_IP_VS_IPV6
@@ -294,44 +337,57 @@ out_err:
/*
* Get route to destination or remote server
*/
-static struct rt6_info *
+static int
__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
- int do_xfrm, int rt_mode)
+ struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
struct net *net = dev_net(skb_dst(skb)->dev);
+ struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
struct rt6_info *ort; /* Original route */
struct dst_entry *dst;
- int local;
+ int mtu;
+ int local, noref = 1;
if (dest) {
- spin_lock(&dest->dst_lock);
- rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
- if (!rt) {
+ dest_dst = __ip_vs_dst_check(dest);
+ if (likely(dest_dst))
+ rt = (struct rt6_info *) dest_dst->dst_cache;
+ else {
u32 cookie;
+ dest_dst = ip_vs_dest_dst_alloc();
+ spin_lock_bh(&dest->dst_lock);
+ if (!dest_dst) {
+ __ip_vs_dst_set(dest, NULL, NULL, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ goto err_unreach;
+ }
dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
- &dest->dst_saddr.in6,
+ &dest_dst->dst_saddr.in6,
do_xfrm);
if (!dst) {
- spin_unlock(&dest->dst_lock);
- return NULL;
+ __ip_vs_dst_set(dest, NULL, NULL, 0);
+ spin_unlock_bh(&dest->dst_lock);
+ ip_vs_dest_dst_free(dest_dst);
+ goto err_unreach;
}
rt = (struct rt6_info *) dst;
cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
- __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
+ __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
+ spin_unlock_bh(&dest->dst_lock);
IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
- &dest->addr.in6, &dest->dst_saddr.in6,
+ &dest->addr.in6, &dest_dst->dst_saddr.in6,
atomic_read(&rt->dst.__refcnt));
}
if (ret_saddr)
- *ret_saddr = dest->dst_saddr.in6;
- spin_unlock(&dest->dst_lock);
+ *ret_saddr = dest_dst->dst_saddr.in6;
} else {
+ noref = 0;
dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
if (!dst)
- return NULL;
+ goto err_unreach;
rt = (struct rt6_info *) dst;
}
@@ -340,86 +396,137 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
rt_mode)) {
IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
local ? "local":"non-local", daddr);
- dst_release(&rt->dst);
- return NULL;
+ goto err_put;
}
- if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
- !((ort = (struct rt6_info *) skb_dst(skb)) &&
- __ip_vs_is_local_route6(ort))) {
- IP_VS_DBG_RL("Redirect from non-local address %pI6c to local "
- "requires NAT method, dest: %pI6c\n",
- &ipv6_hdr(skb)->daddr, daddr);
- dst_release(&rt->dst);
- return NULL;
+ if (likely(!local)) {
+ if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+ ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
+ IPV6_ADDR_LOOPBACK)) {
+ IP_VS_DBG_RL("Stopping traffic from loopback address "
+ "%pI6c to non-local address, "
+ "dest: %pI6c\n",
+ &ipv6_hdr(skb)->saddr, daddr);
+ goto err_put;
+ }
+ } else {
+ ort = (struct rt6_info *) skb_dst(skb);
+ if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
+ !__ip_vs_is_local_route6(ort)) {
+ IP_VS_DBG_RL("Redirect from non-local address %pI6c "
+ "to local requires NAT method, "
+ "dest: %pI6c\n",
+ &ipv6_hdr(skb)->daddr, daddr);
+ goto err_put;
+ }
+ /* skb to local stack, preserve old route */
+ if (!noref)
+ dst_release(&rt->dst);
+ return local;
}
- if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
- IPV6_ADDR_LOOPBACK)) {
- IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c "
- "to non-local address, dest: %pI6c\n",
- &ipv6_hdr(skb)->saddr, daddr);
- dst_release(&rt->dst);
- return NULL;
+
+ /* MTU checking */
+ if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
+ mtu = dst_mtu(&rt->dst);
+ else {
+ struct sock *sk = skb->sk;
+
+ mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
+ if (mtu < IPV6_MIN_MTU) {
+ IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
+ IPV6_MIN_MTU);
+ goto err_put;
+ }
+ ort = (struct rt6_info *) skb_dst(skb);
+ if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
- return rt;
+ if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
+ /* only send ICMP too big on first fragment */
+ if (!ipvsh->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+ goto err_put;
+ }
+
+ skb_dst_drop(skb);
+ if (noref) {
+ if (!local)
+ skb_dst_set_noref_force(skb, &rt->dst);
+ else
+ skb_dst_set(skb, dst_clone(&rt->dst));
+ } else
+ skb_dst_set(skb, &rt->dst);
+
+ return local;
+
+err_put:
+ if (!noref)
+ dst_release(&rt->dst);
+ return -1;
+
+err_unreach:
+ dst_link_failure(skb);
+ return -1;
}
#endif
-/*
- * Release dest->dst_cache before a dest is removed
- */
-void
-ip_vs_dst_reset(struct ip_vs_dest *dest)
+/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
+static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
+ struct ip_vs_conn *cp)
{
- struct dst_entry *old_dst;
+ int ret = NF_ACCEPT;
+
+ skb->ipvs_property = 1;
+ if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
+ ret = ip_vs_confirm_conntrack(skb);
+ if (ret == NF_ACCEPT) {
+ nf_reset(skb);
+ skb_forward_csum(skb);
+ }
+ return ret;
+}
+
+/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
+static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
+ struct ip_vs_conn *cp, int local)
+{
+ int ret = NF_STOLEN;
- old_dst = dest->dst_cache;
- dest->dst_cache = NULL;
- dst_release(old_dst);
- dest->dst_saddr.ip = 0;
+ skb->ipvs_property = 1;
+ if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
+ ip_vs_notrack(skb);
+ else
+ ip_vs_update_conntrack(skb, cp, 1);
+ if (!local) {
+ skb_forward_csum(skb);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
+ dst_output);
+ } else
+ ret = NF_ACCEPT;
+ return ret;
}
-#define IP_VS_XMIT_TUNNEL(skb, cp) \
-({ \
- int __ret = NF_ACCEPT; \
- \
- (skb)->ipvs_property = 1; \
- if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
- __ret = ip_vs_confirm_conntrack(skb); \
- if (__ret == NF_ACCEPT) { \
- nf_reset(skb); \
- skb_forward_csum(skb); \
- } \
- __ret; \
-})
-
-#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
-do { \
- (skb)->ipvs_property = 1; \
- if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
- ip_vs_notrack(skb); \
- else \
- ip_vs_update_conntrack(skb, cp, 1); \
- if (local) \
- return NF_ACCEPT; \
- skb_forward_csum(skb); \
- NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
- skb_dst(skb)->dev, dst_output); \
-} while (0)
-
-#define IP_VS_XMIT(pf, skb, cp, local) \
-do { \
- (skb)->ipvs_property = 1; \
- if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
- ip_vs_notrack(skb); \
- if (local) \
- return NF_ACCEPT; \
- skb_forward_csum(skb); \
- NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
- skb_dst(skb)->dev, dst_output); \
-} while (0)
+/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
+static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
+ struct ip_vs_conn *cp, int local)
+{
+ int ret = NF_STOLEN;
+
+ skb->ipvs_property = 1;
+ if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
+ ip_vs_notrack(skb);
+ if (!local) {
+ skb_forward_csum(skb);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
+ dst_output);
+ } else
+ ret = NF_ACCEPT;
+ return ret;
+}
/*
@@ -430,7 +537,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
/* we do not touch skb and do not need pskb ptr */
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
+ return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
}
@@ -443,52 +550,29 @@ int
ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct rtable *rt; /* Route to the other host */
struct iphdr *iph = ip_hdr(skb);
- int mtu;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
- IP_VS_RT_MODE_NON_LOCAL, NULL)))
- goto tx_error_icmp;
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
- !skb_is_gso(skb)) {
- ip_rt_put(rt);
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+ rcu_read_lock();
+ if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
+ NULL) < 0)
goto tx_error;
- }
- /*
- * Call ip_send_check because we are not sure it is called
- * after ip_defrag. Is copy-on-write needed?
- */
- if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
- ip_rt_put(rt);
- return NF_STOLEN;
- }
- ip_send_check(ip_hdr(skb));
-
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
+ ip_send_check(iph);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
+ ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -496,60 +580,27 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct rt6_info *rt; /* Route to the other host */
- int mtu;
-
EnterFunction(10);
- rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0,
- IP_VS_RT_MODE_NON_LOCAL);
- if (!rt)
- goto tx_error_icmp;
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!iph->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- dst_release(&rt->dst);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+ rcu_read_lock();
+ if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
+ ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
- }
-
- /*
- * Call ip_send_check because we are not sure it is called
- * after ip_defrag. Is copy-on-write needed?
- */
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (unlikely(skb == NULL)) {
- dst_release(&rt->dst);
- return NF_STOLEN;
- }
-
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
+ ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -564,29 +615,30 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct rtable *rt; /* Route to the other host */
- int mtu;
- struct iphdr *iph = ip_hdr(skb);
- int local;
+ int local, rc, was_input;
EnterFunction(10);
+ rcu_read_lock();
/* check if it is a connection of no-client-port */
if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
__be16 _pt, *p;
- p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
+
+ p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
ip_vs_conn_fill_cport(cp, *p);
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
- RT_TOS(iph->tos),
- IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR, NULL)))
- goto tx_error_icmp;
- local = rt->rt_flags & RTCF_LOCAL;
+ was_input = rt_is_input_route(skb_rtable(skb));
+ local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_RDR, NULL);
+ if (local < 0)
+ goto tx_error;
+ rt = skb_rtable(skb);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -600,57 +652,31 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
"ip_vs_nat_xmit(): "
"stopping DNAT to local address");
- goto tx_error_put;
+ goto tx_error;
}
}
#endif
/* From world but DNAT to loopback address? */
- if (local && ipv4_is_loopback(cp->daddr.ip) &&
- rt_is_input_route(skb_rtable(skb))) {
+ if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
"stopping DNAT to loopback address");
- goto tx_error_put;
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
- !skb_is_gso(skb)) {
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
- "ip_vs_nat_xmit(): frag needed for");
- goto tx_error_put;
+ goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, sizeof(struct iphdr)))
- goto tx_error_put;
+ goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
- goto tx_error_put;
+ goto tx_error;
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
- goto tx_error_put;
+ goto tx_error;
ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb));
- if (!local) {
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- ip_rt_put(rt);
- /*
- * Some IPv4 replies get local address from routes,
- * not from iph, so while we DNAT after routing
- * we need this second input/output route.
- */
- if (!__ip_vs_reroute_locally(skb))
- goto tx_error;
- }
-
IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
@@ -660,49 +686,48 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
+ rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
+ rcu_read_unlock();
LeaveFunction(10);
- return NF_STOLEN;
+ return rc;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_put:
- ip_rt_put(rt);
- goto tx_error;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct rt6_info *rt; /* Route to the other host */
- int mtu;
- int local;
+ int local, rc;
EnterFunction(10);
+ rcu_read_lock();
/* check if it is a connection of no-client-port */
- if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) {
+ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
__be16 _pt, *p;
- p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt);
+ p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
if (p == NULL)
goto tx_error;
ip_vs_conn_fill_cport(cp, *p);
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- 0, (IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR))))
- goto tx_error_icmp;
- local = __ip_vs_is_local_route6(rt);
+ local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+ ipvsh, 0,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_RDR);
+ if (local < 0)
+ goto tx_error;
+ rt = (struct rt6_info *) skb_dst(skb);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -716,7 +741,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to local address");
- goto tx_error_put;
+ goto tx_error;
}
}
#endif
@@ -727,46 +752,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address");
- goto tx_error_put;
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!iph->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
- "ip_vs_nat_xmit_v6(): frag needed for");
- goto tx_error_put;
+ goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
- goto tx_error_put;
+ goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
- goto tx_error_put;
+ goto tx_error;
/* mangle the packet */
- if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph))
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6;
- if (!local || !skb->dev) {
- /* drop the old route when skb is not shared */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- /* destined to loopback, do we need to change route? */
- dst_release(&rt->dst);
- }
-
IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
@@ -776,20 +776,17 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
+ rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
+ rcu_read_unlock();
LeaveFunction(10);
- return NF_STOLEN;
+ return rc;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
LeaveFunction(10);
kfree_skb(skb);
+ rcu_read_unlock();
return NF_STOLEN;
-tx_error_put:
- dst_release(&rt->dst);
- goto tx_error;
}
#endif
@@ -826,56 +823,40 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
__be16 df;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
- int mtu;
- int ret;
+ int ret, local;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
- RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_CONNECT,
- &saddr)))
- goto tx_error_icmp;
- if (rt->rt_flags & RTCF_LOCAL) {
- ip_rt_put(rt);
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_CONNECT |
+ IP_VS_RT_MODE_TUNNEL, &saddr);
+ if (local < 0)
+ goto tx_error;
+ if (local) {
+ rcu_read_unlock();
+ return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
}
+ rt = skb_rtable(skb);
tdev = rt->dst.dev;
- mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
- if (mtu < 68) {
- IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
- goto tx_error_put;
- }
- if (rt_is_output_route(skb_rtable(skb)))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
/* Copy DF, reset fragment offset and MF */
df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
- if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
- goto tx_error_put;
- }
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
- if (skb_headroom(skb) < max_headroom
- || skb_cloned(skb) || skb_shared(skb)) {
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
struct sk_buff *new_skb =
skb_realloc_headroom(skb, max_headroom);
- if (!new_skb) {
- ip_rt_put(rt);
- kfree_skb(skb);
- IP_VS_ERR_RL("%s(): no memory\n", __func__);
- return NF_STOLEN;
- }
+
+ if (!new_skb)
+ goto tx_error;
consume_skb(skb);
skb = new_skb;
old_iph = ip_hdr(skb);
@@ -890,10 +871,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
/*
* Push down and install the IPIP header.
*/
@@ -911,25 +888,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- ret = IP_VS_XMIT_TUNNEL(skb, cp);
+ ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
ip_local_out(skb);
else if (ret == NF_DROP)
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
-tx_error_put:
- ip_rt_put(rt);
- goto tx_error;
}
#ifdef CONFIG_IP_VS_IPV6
@@ -943,60 +917,37 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ipv6hdr *old_iph = ipv6_hdr(skb);
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
- int mtu;
- int ret;
+ int ret, local;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
- &saddr, 1, (IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL))))
- goto tx_error_icmp;
- if (__ip_vs_is_local_route6(rt)) {
- dst_release(&rt->dst);
- IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+ &saddr, ipvsh, 1,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_TUNNEL);
+ if (local < 0)
+ goto tx_error;
+ if (local) {
+ rcu_read_unlock();
+ return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
}
+ rt = (struct rt6_info *) skb_dst(skb);
tdev = rt->dst.dev;
- mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
- if (mtu < IPV6_MIN_MTU) {
- IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
- IPV6_MIN_MTU);
- goto tx_error_put;
- }
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
- /* MTU checking: Notice that 'mtu' have been adjusted before hand */
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
- goto tx_error_put;
- }
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
- if (skb_headroom(skb) < max_headroom
- || skb_cloned(skb) || skb_shared(skb)) {
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
struct sk_buff *new_skb =
skb_realloc_headroom(skb, max_headroom);
- if (!new_skb) {
- dst_release(&rt->dst);
- kfree_skb(skb);
- IP_VS_ERR_RL("%s(): no memory\n", __func__);
- return NF_STOLEN;
- }
+
+ if (!new_skb)
+ goto tx_error;
consume_skb(skb);
skb = new_skb;
old_iph = ipv6_hdr(skb);
@@ -1008,10 +959,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
/*
* Push down and install the IPIP header.
*/
@@ -1029,25 +976,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- ret = IP_VS_XMIT_TUNNEL(skb, cp);
+ ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
ip6_local_out(skb);
else if (ret == NF_DROP)
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
-tx_error_put:
- dst_release(&rt->dst);
- goto tx_error;
}
#endif
@@ -1060,59 +1004,36 @@ int
ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct rtable *rt; /* Route to the other host */
- struct iphdr *iph = ip_hdr(skb);
- int mtu;
+ int local;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
- RT_TOS(iph->tos),
- IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_KNOWN_NH, NULL)))
- goto tx_error_icmp;
- if (rt->rt_flags & RTCF_LOCAL) {
- ip_rt_put(rt);
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
- !skb_is_gso(skb)) {
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- ip_rt_put(rt);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_KNOWN_NH, NULL);
+ if (local < 0)
goto tx_error;
+ if (local) {
+ rcu_read_unlock();
+ return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
}
- /*
- * Call ip_send_check because we are not sure it is called
- * after ip_defrag. Is copy-on-write needed?
- */
- if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
- ip_rt_put(rt);
- return NF_STOLEN;
- }
ip_send_check(ip_hdr(skb));
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
+ ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1120,64 +1041,36 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
- struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph)
+ struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct rt6_info *rt; /* Route to the other host */
- int mtu;
+ int local;
EnterFunction(10);
- if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- 0, (IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL))))
- goto tx_error_icmp;
- if (__ip_vs_is_local_route6(rt)) {
- dst_release(&rt->dst);
- IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!iph->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- dst_release(&rt->dst);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+ ipvsh, 0,
+ IP_VS_RT_MODE_LOCAL |
+ IP_VS_RT_MODE_NON_LOCAL);
+ if (local < 0)
goto tx_error;
+ if (local) {
+ rcu_read_unlock();
+ return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
}
- /*
- * Call ip_send_check because we are not sure it is called
- * after ip_defrag. Is copy-on-write needed?
- */
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (unlikely(skb == NULL)) {
- dst_release(&rt->dst);
- return NF_STOLEN;
- }
-
- /* drop old route */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
-
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
+ ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
kfree_skb(skb);
+ rcu_read_unlock();
LeaveFunction(10);
return NF_STOLEN;
}
@@ -1194,10 +1087,9 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_iphdr *iph)
{
struct rtable *rt; /* Route to the other host */
- int mtu;
int rc;
int local;
- int rt_mode;
+ int rt_mode, was_input;
EnterFunction(10);
@@ -1217,16 +1109,17 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/*
* mangle and send the packet here (only for VS/NAT)
*/
+ was_input = rt_is_input_route(skb_rtable(skb));
/* LOCALNODE from FORWARD hook is not supported */
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
- RT_TOS(ip_hdr(skb)->tos),
- rt_mode, NULL)))
- goto tx_error_icmp;
- local = rt->rt_flags & RTCF_LOCAL;
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
+ if (local < 0)
+ goto tx_error;
+ rt = skb_rtable(skb);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
@@ -1241,82 +1134,51 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "%s(): "
"stopping DNAT to local address %pI4\n",
__func__, &cp->daddr.ip);
- goto tx_error_put;
+ goto tx_error;
}
}
#endif
/* From world but DNAT to loopback address? */
- if (local && ipv4_is_loopback(cp->daddr.ip) &&
- rt_is_input_route(skb_rtable(skb))) {
+ if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI4\n",
__func__, &cp->daddr.ip);
- goto tx_error_put;
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
- !skb_is_gso(skb)) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
- goto tx_error_put;
+ goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, offset))
- goto tx_error_put;
+ goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
- goto tx_error_put;
+ goto tx_error;
ip_vs_nat_icmp(skb, pp, cp, 0);
- if (!local) {
- /* drop the old route when skb is not shared */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- ip_rt_put(rt);
- /*
- * Some IPv4 replies get local address from routes,
- * not from iph, so while we DNAT after routing
- * we need this second input/output route.
- */
- if (!__ip_vs_reroute_locally(skb))
- goto tx_error;
- }
-
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
-
- rc = NF_STOLEN;
+ rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
+ rcu_read_unlock();
goto out;
- tx_error_icmp:
- dst_link_failure(skb);
tx_error:
- dev_kfree_skb(skb);
+ kfree_skb(skb);
+ rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
return rc;
- tx_error_put:
- ip_rt_put(rt);
- goto tx_error;
}
#ifdef CONFIG_IP_VS_IPV6
int
ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
- struct ip_vs_iphdr *iph)
+ struct ip_vs_iphdr *ipvsh)
{
struct rt6_info *rt; /* Route to the other host */
- int mtu;
int rc;
int local;
int rt_mode;
@@ -1328,7 +1190,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
translate address/port back */
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
if (cp->packet_xmit)
- rc = cp->packet_xmit(skb, cp, pp, iph);
+ rc = cp->packet_xmit(skb, cp, pp, ipvsh);
else
rc = NF_ACCEPT;
/* do not touch skb anymore */
@@ -1344,11 +1206,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
rt_mode = (hooknum != NF_INET_FORWARD) ?
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
- if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- 0, rt_mode)))
- goto tx_error_icmp;
-
- local = __ip_vs_is_local_route6(rt);
+ rcu_read_lock();
+ local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
+ ipvsh, 0, rt_mode);
+ if (local < 0)
+ goto tx_error;
+ rt = (struct rt6_info *) skb_dst(skb);
/*
* Avoid duplicate tuple in reply direction for NAT traffic
* to local address when connection is sync-ed
@@ -1362,7 +1225,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "%s(): "
"stopping DNAT to local address %pI6\n",
__func__, &cp->daddr.in6);
- goto tx_error_put;
+ goto tx_error;
}
}
#endif
@@ -1373,60 +1236,31 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(1, "%s(): "
"stopping DNAT to loopback %pI6\n",
__func__, &cp->daddr.in6);
- goto tx_error_put;
- }
-
- /* MTU checking */
- mtu = dst_mtu(&rt->dst);
- if (__mtu_check_toobig_v6(skb, mtu)) {
- if (!skb->dev) {
- struct net *net = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net->loopback_dev;
- }
- /* only send ICMP too big on first fragment */
- if (!iph->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG_RL("%s(): frag needed\n", __func__);
- goto tx_error_put;
+ goto tx_error;
}
/* copy-on-write the packet before mangling it */
if (!skb_make_writable(skb, offset))
- goto tx_error_put;
+ goto tx_error;
if (skb_cow(skb, rt->dst.dev->hard_header_len))
- goto tx_error_put;
+ goto tx_error;
ip_vs_nat_icmp_v6(skb, pp, cp, 0);
- if (!local || !skb->dev) {
- /* drop the old route when skb is not shared */
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- } else {
- /* destined to loopback, do we need to change route? */
- dst_release(&rt->dst);
- }
-
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
- IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
-
- rc = NF_STOLEN;
+ rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
+ rcu_read_unlock();
goto out;
-tx_error_icmp:
- dst_link_failure(skb);
tx_error:
- dev_kfree_skb(skb);
+ kfree_skb(skb);
+ rcu_read_unlock();
rc = NF_STOLEN;
out:
LeaveFunction(10);
return rc;
-tx_error_put:
- dst_release(&rt->dst);
- goto tx_error;
}
#endif
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 94b4b9853f60..a0b1c5c23d1c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -353,7 +353,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
/* rcu_read_lock()ed by nf_hook_slow */
helper = rcu_dereference(help->helper);
- nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
+ nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
"nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
va_end(args);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index ba65b2041eb4..a99b6c3427b0 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
out_invalid:
if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg);
+ nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
+ NULL, msg);
return false;
}
@@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid packet ignored ");
return NF_ACCEPT;
case CT_DCCP_INVALID:
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid state transition ");
return -NF_ACCEPT;
}
@@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
out_invalid:
if (LOG_INVALID(net, IPPROTO_DCCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg);
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg);
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 83876e9877f1..f021a2076c87 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -720,7 +720,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
tn->tcp_be_liberal)
res = true;
if (!res && LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: %s ",
before(seq, sender->td_maxend + 1) ?
after(end, sender->td_end - receiver->td_maxwin - 1) ?
@@ -772,7 +772,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: short packet ");
return -NF_ACCEPT;
}
@@ -780,7 +780,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: truncated/malformed packet ");
return -NF_ACCEPT;
}
@@ -793,7 +793,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: bad TCP checksum ");
return -NF_ACCEPT;
}
@@ -802,7 +802,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
if (!tcp_valid_flags[tcpflags]) {
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid TCP flag combination ");
return -NF_ACCEPT;
}
@@ -949,7 +949,7 @@ static int tcp_packet(struct nf_conn *ct,
}
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid packet ignored in "
"state %s ", tcp_conntrack_names[old_state]);
return NF_ACCEPT;
@@ -959,7 +959,7 @@ static int tcp_packet(struct nf_conn *ct,
dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid state ");
return -NF_ACCEPT;
case TCP_CONNTRACK_CLOSE:
@@ -969,8 +969,8 @@ static int tcp_packet(struct nf_conn *ct,
/* Invalid RST */
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_tcp: invalid RST ");
+ nf_log_packet(net, pf, 0, skb, NULL, NULL,
+ NULL, "nf_ct_tcp: invalid RST ");
return -NF_ACCEPT;
}
if (index == TCP_RST_SET
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 59623cc56e8d..fee43228e115 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -119,7 +119,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: short packet ");
return -NF_ACCEPT;
}
@@ -127,7 +127,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
/* Truncated/malformed packets */
if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: truncated/malformed packet ");
return -NF_ACCEPT;
}
@@ -143,7 +143,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
if (LOG_INVALID(net, IPPROTO_UDP))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udp: bad UDP checksum ");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index ca969f6273f7..2750e6c69f82 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hdr == NULL) {
if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: short packet ");
return -NF_ACCEPT;
}
@@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
cscov = udplen;
else if (cscov < sizeof(*hdr) || cscov > udplen) {
if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: invalid checksum coverage ");
return -NF_ACCEPT;
}
@@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
/* UDPLITE mandates checksums */
if (!hdr->check) {
if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: checksum missing ");
return -NF_ACCEPT;
}
@@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
pf)) {
if (LOG_INVALID(net, IPPROTO_UDPLITE))
- nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_udplite: bad UDPLite checksum ");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 6c69fbdb8361..ebb67d33bd63 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -572,6 +572,7 @@ static int __init nf_conntrack_standalone_init(void)
register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
if (!nf_ct_netfilter_header) {
pr_err("nf_conntrack: can't register to sysctl.\n");
+ ret = -ENOMEM;
goto out_sysctl;
}
#endif
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 9e312695c818..388656d5a9ec 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,7 +16,6 @@
#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64
-static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
static DEFINE_MUTEX(nf_log_mutex);
@@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
return NULL;
}
+void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+{
+ const struct nf_logger *log;
+
+ if (pf == NFPROTO_UNSPEC)
+ return;
+
+ mutex_lock(&nf_log_mutex);
+ log = rcu_dereference_protected(net->nf.nf_loggers[pf],
+ lockdep_is_held(&nf_log_mutex));
+ if (log == NULL)
+ rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
+
+ mutex_unlock(&nf_log_mutex);
+}
+EXPORT_SYMBOL(nf_log_set);
+
+void nf_log_unset(struct net *net, const struct nf_logger *logger)
+{
+ int i;
+ const struct nf_logger *log;
+
+ mutex_lock(&nf_log_mutex);
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+ log = rcu_dereference_protected(net->nf.nf_loggers[i],
+ lockdep_is_held(&nf_log_mutex));
+ if (log == logger)
+ RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
+ }
+ mutex_unlock(&nf_log_mutex);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(nf_log_unset);
+
/* return EEXIST if the same logger is registered, 0 on success. */
int nf_log_register(u_int8_t pf, struct nf_logger *logger)
{
- const struct nf_logger *llog;
int i;
- if (pf >= ARRAY_SIZE(nf_loggers))
+ if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(logger->list); i++)
@@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
} else {
/* register at end of list to honor first register win */
list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
- llog = rcu_dereference_protected(nf_loggers[pf],
- lockdep_is_held(&nf_log_mutex));
- if (llog == NULL)
- rcu_assign_pointer(nf_loggers[pf], logger);
}
mutex_unlock(&nf_log_mutex);
@@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register);
void nf_log_unregister(struct nf_logger *logger)
{
- const struct nf_logger *c_logger;
int i;
mutex_lock(&nf_log_mutex);
- for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
- c_logger = rcu_dereference_protected(nf_loggers[i],
- lockdep_is_held(&nf_log_mutex));
- if (c_logger == logger)
- RCU_INIT_POINTER(nf_loggers[i], NULL);
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
list_del(&logger->list[i]);
- }
mutex_unlock(&nf_log_mutex);
-
- synchronize_rcu();
}
EXPORT_SYMBOL(nf_log_unregister);
-int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
+int nf_log_bind_pf(struct net *net, u_int8_t pf,
+ const struct nf_logger *logger)
{
- if (pf >= ARRAY_SIZE(nf_loggers))
+ if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
return -EINVAL;
mutex_lock(&nf_log_mutex);
if (__find_logger(pf, logger->name) == NULL) {
mutex_unlock(&nf_log_mutex);
return -ENOENT;
}
- rcu_assign_pointer(nf_loggers[pf], logger);
+ rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
mutex_unlock(&nf_log_mutex);
return 0;
}
EXPORT_SYMBOL(nf_log_bind_pf);
-void nf_log_unbind_pf(u_int8_t pf)
+void nf_log_unbind_pf(struct net *net, u_int8_t pf)
{
- if (pf >= ARRAY_SIZE(nf_loggers))
+ if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
return;
mutex_lock(&nf_log_mutex);
- RCU_INIT_POINTER(nf_loggers[pf], NULL);
+ RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL);
mutex_unlock(&nf_log_mutex);
}
EXPORT_SYMBOL(nf_log_unbind_pf);
-void nf_log_packet(u_int8_t pf,
+void nf_log_packet(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -121,7 +143,7 @@ void nf_log_packet(u_int8_t pf,
const struct nf_logger *logger;
rcu_read_lock();
- logger = rcu_dereference(nf_loggers[pf]);
+ logger = rcu_dereference(net->nf.nf_loggers[pf]);
if (logger) {
va_start(args, fmt);
vsnprintf(prefix, sizeof(prefix), fmt, args);
@@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet);
#ifdef CONFIG_PROC_FS
static void *seq_start(struct seq_file *seq, loff_t *pos)
{
+ struct net *net = seq_file_net(seq);
+
mutex_lock(&nf_log_mutex);
- if (*pos >= ARRAY_SIZE(nf_loggers))
+ if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
return NULL;
return pos;
@@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
{
+ struct net *net = seq_file_net(s);
+
(*pos)++;
- if (*pos >= ARRAY_SIZE(nf_loggers))
+ if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
return NULL;
return pos;
@@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v)
const struct nf_logger *logger;
struct nf_logger *t;
int ret;
+ struct net *net = seq_file_net(s);
- logger = rcu_dereference_protected(nf_loggers[*pos],
+ logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
lockdep_is_held(&nf_log_mutex));
if (!logger)
@@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = {
static int nflog_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &nflog_seq_ops);
+ return seq_open_net(inode, file, &nflog_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations nflog_file_ops = {
@@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = {
.open = nflog_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
};
@@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = {
#ifdef CONFIG_SYSCTL
static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
-static struct ctl_table_header *nf_log_dir_header;
static int nf_log_proc_dostring(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
size_t size = *lenp;
int r = 0;
int tindex = (unsigned long)table->extra1;
+ struct net *net = current->nsproxy->net_ns;
if (write) {
if (size > sizeof(buf))
@@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
return -EFAULT;
if (!strcmp(buf, "NONE")) {
- nf_log_unbind_pf(tindex);
+ nf_log_unbind_pf(net, tindex);
return 0;
}
mutex_lock(&nf_log_mutex);
@@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
mutex_unlock(&nf_log_mutex);
return -ENOENT;
}
- rcu_assign_pointer(nf_loggers[tindex], logger);
+ rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
mutex_unlock(&nf_log_mutex);
} else {
mutex_lock(&nf_log_mutex);
- logger = rcu_dereference_protected(nf_loggers[tindex],
+ logger = rcu_dereference_protected(net->nf.nf_loggers[tindex],
lockdep_is_held(&nf_log_mutex));
if (!logger)
table->data = "NONE";
@@ -260,49 +288,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
return r;
}
-static __init int netfilter_log_sysctl_init(void)
+static int netfilter_log_sysctl_init(struct net *net)
{
int i;
-
- for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
- snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i);
- nf_log_sysctl_table[i].procname =
- nf_log_sysctl_fnames[i-NFPROTO_UNSPEC];
- nf_log_sysctl_table[i].data = NULL;
- nf_log_sysctl_table[i].maxlen =
- NFLOGGER_NAME_LEN * sizeof(char);
- nf_log_sysctl_table[i].mode = 0644;
- nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring;
- nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i;
+ struct ctl_table *table;
+
+ table = nf_log_sysctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(nf_log_sysctl_table,
+ sizeof(nf_log_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto err_alloc;
+ } else {
+ for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
+ snprintf(nf_log_sysctl_fnames[i],
+ 3, "%d", i);
+ nf_log_sysctl_table[i].procname =
+ nf_log_sysctl_fnames[i];
+ nf_log_sysctl_table[i].data = NULL;
+ nf_log_sysctl_table[i].maxlen =
+ NFLOGGER_NAME_LEN * sizeof(char);
+ nf_log_sysctl_table[i].mode = 0644;
+ nf_log_sysctl_table[i].proc_handler =
+ nf_log_proc_dostring;
+ nf_log_sysctl_table[i].extra1 =
+ (void *)(unsigned long) i;
+ }
}
- nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log",
- nf_log_sysctl_table);
- if (!nf_log_dir_header)
- return -ENOMEM;
+ net->nf.nf_log_dir_header = register_net_sysctl(net,
+ "net/netfilter/nf_log",
+ table);
+ if (!net->nf.nf_log_dir_header)
+ goto err_reg;
return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
+}
+
+static void netfilter_log_sysctl_exit(struct net *net)
+{
+ struct ctl_table *table;
+
+ table = net->nf.nf_log_dir_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf.nf_log_dir_header);
+ if (!net_eq(net, &init_net))
+ kfree(table);
}
#else
-static __init int netfilter_log_sysctl_init(void)
+static int netfilter_log_sysctl_init(struct net *net)
{
return 0;
}
+
+static void netfilter_log_sysctl_exit(struct net *net)
+{
+}
#endif /* CONFIG_SYSCTL */
-int __init netfilter_log_init(void)
+static int __net_init nf_log_net_init(struct net *net)
{
- int i, r;
+ int ret = -ENOMEM;
+
#ifdef CONFIG_PROC_FS
if (!proc_create("nf_log", S_IRUGO,
- proc_net_netfilter, &nflog_file_ops))
- return -1;
+ net->nf.proc_netfilter, &nflog_file_ops))
+ return ret;
#endif
+ ret = netfilter_log_sysctl_init(net);
+ if (ret < 0)
+ goto out_sysctl;
+
+ return 0;
- /* Errors will trigger panic, unroll on error is unnecessary. */
- r = netfilter_log_sysctl_init();
- if (r < 0)
- return r;
+out_sysctl:
+ /* For init_net: errors will trigger panic, don't unroll on error. */
+ if (!net_eq(net, &init_net))
+ remove_proc_entry("nf_log", net->nf.proc_netfilter);
+
+ return ret;
+}
+
+static void __net_exit nf_log_net_exit(struct net *net)
+{
+ netfilter_log_sysctl_exit(net);
+ remove_proc_entry("nf_log", net->nf.proc_netfilter);
+}
+
+static struct pernet_operations nf_log_net_ops = {
+ .init = nf_log_net_init,
+ .exit = nf_log_net_exit,
+};
+
+int __init netfilter_log_init(void)
+{
+ int i, ret;
+
+ ret = register_pernet_subsys(&nf_log_net_ops);
+ if (ret < 0)
+ return ret;
for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
INIT_LIST_HEAD(&(nf_loggers_l[i]));
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 589d686f0b4c..dc3fd5d44464 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -49,6 +49,8 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
return -EINVAL;
acct_name = nla_data(tb[NFACCT_NAME]);
+ if (strlen(acct_name) == 0)
+ return -EINVAL;
list_for_each_entry(nfacct, &nfnl_acct_list, head) {
if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 4a2593f100cb..1a0be2af1dd8 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -32,6 +32,7 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/netfilter/nfnetlink_log.h>
#include <linux/atomic.h>
@@ -56,6 +57,7 @@ struct nfulnl_instance {
unsigned int qlen; /* number of nlmsgs in skb */
struct sk_buff *skb; /* pre-allocatd skb */
struct timer_list timer;
+ struct net *net;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
int peer_portid; /* PORTID of the peer process */
@@ -71,25 +73,34 @@ struct nfulnl_instance {
struct rcu_head rcu;
};
-static DEFINE_SPINLOCK(instances_lock);
-static atomic_t global_seq;
-
#define INSTANCE_BUCKETS 16
-static struct hlist_head instance_table[INSTANCE_BUCKETS];
static unsigned int hash_init;
+static int nfnl_log_net_id __read_mostly;
+
+struct nfnl_log_net {
+ spinlock_t instances_lock;
+ struct hlist_head instance_table[INSTANCE_BUCKETS];
+ atomic_t global_seq;
+};
+
+static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
+{
+ return net_generic(net, nfnl_log_net_id);
+}
+
static inline u_int8_t instance_hashfn(u_int16_t group_num)
{
return ((group_num & 0xff) % INSTANCE_BUCKETS);
}
static struct nfulnl_instance *
-__instance_lookup(u_int16_t group_num)
+__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
{
struct hlist_head *head;
struct nfulnl_instance *inst;
- head = &instance_table[instance_hashfn(group_num)];
+ head = &log->instance_table[instance_hashfn(group_num)];
hlist_for_each_entry_rcu(inst, head, hlist) {
if (inst->group_num == group_num)
return inst;
@@ -104,12 +115,12 @@ instance_get(struct nfulnl_instance *inst)
}
static struct nfulnl_instance *
-instance_lookup_get(u_int16_t group_num)
+instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
{
struct nfulnl_instance *inst;
rcu_read_lock_bh();
- inst = __instance_lookup(group_num);
+ inst = __instance_lookup(log, group_num);
if (inst && !atomic_inc_not_zero(&inst->use))
inst = NULL;
rcu_read_unlock_bh();
@@ -119,7 +130,11 @@ instance_lookup_get(u_int16_t group_num)
static void nfulnl_instance_free_rcu(struct rcu_head *head)
{
- kfree(container_of(head, struct nfulnl_instance, rcu));
+ struct nfulnl_instance *inst =
+ container_of(head, struct nfulnl_instance, rcu);
+
+ put_net(inst->net);
+ kfree(inst);
module_put(THIS_MODULE);
}
@@ -133,13 +148,15 @@ instance_put(struct nfulnl_instance *inst)
static void nfulnl_timer(unsigned long data);
static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
+instance_create(struct net *net, u_int16_t group_num,
+ int portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
int err;
- spin_lock_bh(&instances_lock);
- if (__instance_lookup(group_num)) {
+ spin_lock_bh(&log->instances_lock);
+ if (__instance_lookup(log, group_num)) {
err = -EEXIST;
goto out_unlock;
}
@@ -163,6 +180,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
+ inst->net = get_net(net);
inst->peer_user_ns = user_ns;
inst->peer_portid = portid;
inst->group_num = group_num;
@@ -174,14 +192,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
inst->copy_range = NFULNL_COPY_RANGE_MAX;
hlist_add_head_rcu(&inst->hlist,
- &instance_table[instance_hashfn(group_num)]);
+ &log->instance_table[instance_hashfn(group_num)]);
+
- spin_unlock_bh(&instances_lock);
+ spin_unlock_bh(&log->instances_lock);
return inst;
out_unlock:
- spin_unlock_bh(&instances_lock);
+ spin_unlock_bh(&log->instances_lock);
return ERR_PTR(err);
}
@@ -210,11 +229,12 @@ __instance_destroy(struct nfulnl_instance *inst)
}
static inline void
-instance_destroy(struct nfulnl_instance *inst)
+instance_destroy(struct nfnl_log_net *log,
+ struct nfulnl_instance *inst)
{
- spin_lock_bh(&instances_lock);
+ spin_lock_bh(&log->instances_lock);
__instance_destroy(inst);
- spin_unlock_bh(&instances_lock);
+ spin_unlock_bh(&log->instances_lock);
}
static int
@@ -336,7 +356,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
if (!nlh)
goto out;
}
- status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid,
+ status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
MSG_DONTWAIT);
inst->qlen = 0;
@@ -370,7 +390,8 @@ nfulnl_timer(unsigned long data)
/* This is an inline function, we don't really care about a long
* list of arguments */
static inline int
-__build_packet_message(struct nfulnl_instance *inst,
+__build_packet_message(struct nfnl_log_net *log,
+ struct nfulnl_instance *inst,
const struct sk_buff *skb,
unsigned int data_len,
u_int8_t pf,
@@ -536,7 +557,7 @@ __build_packet_message(struct nfulnl_instance *inst,
/* global sequence number */
if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
- htonl(atomic_inc_return(&global_seq))))
+ htonl(atomic_inc_return(&log->global_seq))))
goto nla_put_failure;
if (data_len) {
@@ -592,13 +613,15 @@ nfulnl_log_packet(u_int8_t pf,
const struct nf_loginfo *li;
unsigned int qthreshold;
unsigned int plen;
+ struct net *net = dev_net(in ? in : out);
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
else
li = &default_loginfo;
- inst = instance_lookup_get(li->u.ulog.group);
+ inst = instance_lookup_get(log, li->u.ulog.group);
if (!inst)
return;
@@ -680,7 +703,7 @@ nfulnl_log_packet(u_int8_t pf,
inst->qlen++;
- __build_packet_message(inst, skb, data_len, pf,
+ __build_packet_message(log, inst, skb, data_len, pf,
hooknum, in, out, prefix, plen);
if (inst->qlen >= qthreshold)
@@ -709,24 +732,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netlink_notify *n = ptr;
+ struct nfnl_log_net *log = nfnl_log_pernet(n->net);
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
int i;
/* destroy all instances for this portid */
- spin_lock_bh(&instances_lock);
+ spin_lock_bh(&log->instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *t2;
struct nfulnl_instance *inst;
- struct hlist_head *head = &instance_table[i];
+ struct hlist_head *head = &log->instance_table[i];
hlist_for_each_entry_safe(inst, t2, head, hlist) {
- if ((net_eq(n->net, &init_net)) &&
- (n->portid == inst->peer_portid))
+ if (n->portid == inst->peer_portid)
__instance_destroy(inst);
}
}
- spin_unlock_bh(&instances_lock);
+ spin_unlock_bh(&log->instances_lock);
}
return NOTIFY_DONE;
}
@@ -767,6 +790,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
u_int16_t group_num = ntohs(nfmsg->res_id);
struct nfulnl_instance *inst;
struct nfulnl_msg_config_cmd *cmd = NULL;
+ struct net *net = sock_net(ctnl);
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
int ret = 0;
if (nfula[NFULA_CFG_CMD]) {
@@ -776,14 +801,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
/* Commands without queue context */
switch (cmd->command) {
case NFULNL_CFG_CMD_PF_BIND:
- return nf_log_bind_pf(pf, &nfulnl_logger);
+ return nf_log_bind_pf(net, pf, &nfulnl_logger);
case NFULNL_CFG_CMD_PF_UNBIND:
- nf_log_unbind_pf(pf);
+ nf_log_unbind_pf(net, pf);
return 0;
}
}
- inst = instance_lookup_get(group_num);
+ inst = instance_lookup_get(log, group_num);
if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
ret = -EPERM;
goto out_put;
@@ -797,7 +822,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto out_put;
}
- inst = instance_create(group_num,
+ inst = instance_create(net, group_num,
NETLINK_CB(skb).portid,
sk_user_ns(NETLINK_CB(skb).ssk));
if (IS_ERR(inst)) {
@@ -811,7 +836,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto out;
}
- instance_destroy(inst);
+ instance_destroy(log, inst);
goto out_put;
default:
ret = -ENOTSUPP;
@@ -894,55 +919,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = {
#ifdef CONFIG_PROC_FS
struct iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
-static struct hlist_node *get_first(struct iter_state *st)
+static struct hlist_node *get_first(struct net *net, struct iter_state *st)
{
+ struct nfnl_log_net *log;
if (!st)
return NULL;
+ log = nfnl_log_pernet(net);
+
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
- if (!hlist_empty(&instance_table[st->bucket]))
- return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+ struct hlist_head *head = &log->instance_table[st->bucket];
+
+ if (!hlist_empty(head))
+ return rcu_dereference_bh(hlist_first_rcu(head));
}
return NULL;
}
-static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
+static struct hlist_node *get_next(struct net *net, struct iter_state *st,
+ struct hlist_node *h)
{
h = rcu_dereference_bh(hlist_next_rcu(h));
while (!h) {
+ struct nfnl_log_net *log;
+ struct hlist_head *head;
+
if (++st->bucket >= INSTANCE_BUCKETS)
return NULL;
- h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
+ log = nfnl_log_pernet(net);
+ head = &log->instance_table[st->bucket];
+ h = rcu_dereference_bh(hlist_first_rcu(head));
}
return h;
}
-static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
+static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
+ loff_t pos)
{
struct hlist_node *head;
- head = get_first(st);
+ head = get_first(net, st);
if (head)
- while (pos && (head = get_next(st, head)))
+ while (pos && (head = get_next(net, st, head)))
pos--;
return pos ? NULL : head;
}
-static void *seq_start(struct seq_file *seq, loff_t *pos)
+static void *seq_start(struct seq_file *s, loff_t *pos)
__acquires(rcu_bh)
{
rcu_read_lock_bh();
- return get_idx(seq->private, *pos);
+ return get_idx(seq_file_net(s), s->private, *pos);
}
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
{
(*pos)++;
- return get_next(s->private, v);
+ return get_next(seq_file_net(s), s->private, v);
}
static void seq_stop(struct seq_file *s, void *v)
@@ -971,8 +1009,8 @@ static const struct seq_operations nful_seq_ops = {
static int nful_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &nful_seq_ops,
- sizeof(struct iter_state));
+ return seq_open_net(inode, file, &nful_seq_ops,
+ sizeof(struct iter_state));
}
static const struct file_operations nful_file_ops = {
@@ -980,17 +1018,43 @@ static const struct file_operations nful_file_ops = {
.open = nful_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif /* PROC_FS */
-static int __init nfnetlink_log_init(void)
+static int __net_init nfnl_log_net_init(struct net *net)
{
- int i, status = -ENOMEM;
+ unsigned int i;
+ struct nfnl_log_net *log = nfnl_log_pernet(net);
for (i = 0; i < INSTANCE_BUCKETS; i++)
- INIT_HLIST_HEAD(&instance_table[i]);
+ INIT_HLIST_HEAD(&log->instance_table[i]);
+ spin_lock_init(&log->instances_lock);
+
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("nfnetlink_log", 0440,
+ net->nf.proc_netfilter, &nful_file_ops))
+ return -ENOMEM;
+#endif
+ return 0;
+}
+
+static void __net_exit nfnl_log_net_exit(struct net *net)
+{
+ remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
+}
+
+static struct pernet_operations nfnl_log_net_ops = {
+ .init = nfnl_log_net_init,
+ .exit = nfnl_log_net_exit,
+ .id = &nfnl_log_net_id,
+ .size = sizeof(struct nfnl_log_net),
+};
+
+static int __init nfnetlink_log_init(void)
+{
+ int status = -ENOMEM;
/* it's not really all that important to have a random value, so
* we can do this from the init function, even if there hasn't
@@ -1000,29 +1064,25 @@ static int __init nfnetlink_log_init(void)
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
if (status < 0) {
- printk(KERN_ERR "log: failed to create netlink socket\n");
+ pr_err("log: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
}
status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
if (status < 0) {
- printk(KERN_ERR "log: failed to register logger\n");
+ pr_err("log: failed to register logger\n");
goto cleanup_subsys;
}
-#ifdef CONFIG_PROC_FS
- if (!proc_create("nfnetlink_log", 0440,
- proc_net_netfilter, &nful_file_ops)) {
- status = -ENOMEM;
+ status = register_pernet_subsys(&nfnl_log_net_ops);
+ if (status < 0) {
+ pr_err("log: failed to register pernet ops\n");
goto cleanup_logger;
}
-#endif
return status;
-#ifdef CONFIG_PROC_FS
cleanup_logger:
nf_log_unregister(&nfulnl_logger);
-#endif
cleanup_subsys:
nfnetlink_subsys_unregister(&nfulnl_subsys);
cleanup_netlink_notifier:
@@ -1032,10 +1092,8 @@ cleanup_netlink_notifier:
static void __exit nfnetlink_log_fini(void)
{
+ unregister_pernet_subsys(&nfnl_log_net_ops);
nf_log_unregister(&nfulnl_logger);
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("nfnetlink_log", proc_net_netfilter);
-#endif
nfnetlink_subsys_unregister(&nfulnl_subsys);
netlink_unregister_notifier(&nfulnl_rtnl_notifier);
}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index e92c9161e396..5e280b3e154f 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -30,6 +30,7 @@
#include <linux/list.h>
#include <net/sock.h>
#include <net/netfilter/nf_queue.h>
+#include <net/netns/generic.h>
#include <net/netfilter/nfnetlink_queue.h>
#include <linux/atomic.h>
@@ -66,10 +67,18 @@ struct nfqnl_instance {
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
-static DEFINE_SPINLOCK(instances_lock);
+static int nfnl_queue_net_id __read_mostly;
#define INSTANCE_BUCKETS 16
-static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
+struct nfnl_queue_net {
+ spinlock_t instances_lock;
+ struct hlist_head instance_table[INSTANCE_BUCKETS];
+};
+
+static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
+{
+ return net_generic(net, nfnl_queue_net_id);
+}
static inline u_int8_t instance_hashfn(u_int16_t queue_num)
{
@@ -77,12 +86,12 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
}
static struct nfqnl_instance *
-instance_lookup(u_int16_t queue_num)
+instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
{
struct hlist_head *head;
struct nfqnl_instance *inst;
- head = &instance_table[instance_hashfn(queue_num)];
+ head = &q->instance_table[instance_hashfn(queue_num)];
hlist_for_each_entry_rcu(inst, head, hlist) {
if (inst->queue_num == queue_num)
return inst;
@@ -91,14 +100,15 @@ instance_lookup(u_int16_t queue_num)
}
static struct nfqnl_instance *
-instance_create(u_int16_t queue_num, int portid)
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
+ int portid)
{
struct nfqnl_instance *inst;
unsigned int h;
int err;
- spin_lock(&instances_lock);
- if (instance_lookup(queue_num)) {
+ spin_lock(&q->instances_lock);
+ if (instance_lookup(q, queue_num)) {
err = -EEXIST;
goto out_unlock;
}
@@ -123,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid)
}
h = instance_hashfn(queue_num);
- hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
+ hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
- spin_unlock(&instances_lock);
+ spin_unlock(&q->instances_lock);
return inst;
out_free:
kfree(inst);
out_unlock:
- spin_unlock(&instances_lock);
+ spin_unlock(&q->instances_lock);
return ERR_PTR(err);
}
@@ -158,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst)
}
static void
-instance_destroy(struct nfqnl_instance *inst)
+instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
{
- spin_lock(&instances_lock);
+ spin_lock(&q->instances_lock);
__instance_destroy(inst);
- spin_unlock(&instances_lock);
+ spin_unlock(&q->instances_lock);
}
static inline void
@@ -473,9 +483,12 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
int err = -ENOBUFS;
__be32 *packet_id_ptr;
int failopen = 0;
+ struct net *net = dev_net(entry->indev ?
+ entry->indev : entry->outdev);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
/* rcu_read_lock()ed by nf_hook_slow() */
- queue = instance_lookup(queuenum);
+ queue = instance_lookup(q, queuenum);
if (!queue) {
err = -ESRCH;
goto err_out;
@@ -512,7 +525,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
*packet_id_ptr = htonl(entry->id);
/* nfnetlink_unicast will either free the nskb or add it to a socket */
- err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT);
+ err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
if (err < 0) {
queue->queue_user_dropped++;
goto err_out_unlock;
@@ -625,15 +638,16 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
/* drop all packets with either indev or outdev == ifindex from all queue
* instances */
static void
-nfqnl_dev_drop(int ifindex)
+nfqnl_dev_drop(struct net *net, int ifindex)
{
int i;
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
rcu_read_lock();
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
- struct hlist_head *head = &instance_table[i];
+ struct hlist_head *head = &q->instance_table[i];
hlist_for_each_entry_rcu(inst, head, hlist)
nfqnl_flush(inst, dev_cmp, ifindex);
@@ -650,12 +664,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this,
{
struct net_device *dev = ptr;
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
/* Drop any packets associated with the downed device */
if (event == NETDEV_DOWN)
- nfqnl_dev_drop(dev->ifindex);
+ nfqnl_dev_drop(dev_net(dev), dev->ifindex);
return NOTIFY_DONE;
}
@@ -668,24 +679,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct netlink_notify *n = ptr;
+ struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
int i;
/* destroy all instances for this portid */
- spin_lock(&instances_lock);
+ spin_lock(&q->instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *t2;
struct nfqnl_instance *inst;
- struct hlist_head *head = &instance_table[i];
+ struct hlist_head *head = &q->instance_table[i];
hlist_for_each_entry_safe(inst, t2, head, hlist) {
- if ((n->net == &init_net) &&
- (n->portid == inst->peer_portid))
+ if (n->portid == inst->peer_portid)
__instance_destroy(inst);
}
}
- spin_unlock(&instances_lock);
+ spin_unlock(&q->instances_lock);
}
return NOTIFY_DONE;
}
@@ -706,11 +717,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
[NFQA_MARK] = { .type = NLA_U32 },
};
-static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid)
+static struct nfqnl_instance *
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
{
struct nfqnl_instance *queue;
- queue = instance_lookup(queue_num);
+ queue = instance_lookup(q, queue_num);
if (!queue)
return ERR_PTR(-ENODEV);
@@ -754,7 +766,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
LIST_HEAD(batch_list);
u16 queue_num = ntohs(nfmsg->res_id);
- queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
+ struct net *net = sock_net(ctnl);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+
+ queue = verdict_instance_lookup(q, queue_num,
+ NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
@@ -802,10 +818,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
enum ip_conntrack_info uninitialized_var(ctinfo);
struct nf_conn *ct = NULL;
- queue = instance_lookup(queue_num);
- if (!queue)
+ struct net *net = sock_net(ctnl);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
- queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
+ queue = instance_lookup(q, queue_num);
+ if (!queue)
+ queue = verdict_instance_lookup(q, queue_num,
+ NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
@@ -869,6 +888,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
u_int16_t queue_num = ntohs(nfmsg->res_id);
struct nfqnl_instance *queue;
struct nfqnl_msg_config_cmd *cmd = NULL;
+ struct net *net = sock_net(ctnl);
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
int ret = 0;
if (nfqa[NFQA_CFG_CMD]) {
@@ -882,7 +903,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
rcu_read_lock();
- queue = instance_lookup(queue_num);
+ queue = instance_lookup(q, queue_num);
if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
ret = -EPERM;
goto err_out_unlock;
@@ -895,7 +916,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ret = -EBUSY;
goto err_out_unlock;
}
- queue = instance_create(queue_num, NETLINK_CB(skb).portid);
+ queue = instance_create(q, queue_num,
+ NETLINK_CB(skb).portid);
if (IS_ERR(queue)) {
ret = PTR_ERR(queue);
goto err_out_unlock;
@@ -906,7 +928,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ret = -ENODEV;
goto err_out_unlock;
}
- instance_destroy(queue);
+ instance_destroy(q, queue);
break;
case NFQNL_CFG_CMD_PF_BIND:
case NFQNL_CFG_CMD_PF_UNBIND:
@@ -1000,19 +1022,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = {
#ifdef CONFIG_PROC_FS
struct iter_state {
+ struct seq_net_private p;
unsigned int bucket;
};
static struct hlist_node *get_first(struct seq_file *seq)
{
struct iter_state *st = seq->private;
+ struct net *net;
+ struct nfnl_queue_net *q;
if (!st)
return NULL;
+ net = seq_file_net(seq);
+ q = nfnl_queue_pernet(net);
for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
- if (!hlist_empty(&instance_table[st->bucket]))
- return instance_table[st->bucket].first;
+ if (!hlist_empty(&q->instance_table[st->bucket]))
+ return q->instance_table[st->bucket].first;
}
return NULL;
}
@@ -1020,13 +1047,17 @@ static struct hlist_node *get_first(struct seq_file *seq)
static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
{
struct iter_state *st = seq->private;
+ struct net *net = seq_file_net(seq);
h = h->next;
while (!h) {
+ struct nfnl_queue_net *q;
+
if (++st->bucket >= INSTANCE_BUCKETS)
return NULL;
- h = instance_table[st->bucket].first;
+ q = nfnl_queue_pernet(net);
+ h = q->instance_table[st->bucket].first;
}
return h;
}
@@ -1042,11 +1073,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
return pos ? NULL : head;
}
-static void *seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(instances_lock)
+static void *seq_start(struct seq_file *s, loff_t *pos)
+ __acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
{
- spin_lock(&instances_lock);
- return get_idx(seq, *pos);
+ spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
+ return get_idx(s, *pos);
}
static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
@@ -1056,9 +1087,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void seq_stop(struct seq_file *s, void *v)
- __releases(instances_lock)
+ __releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
{
- spin_unlock(&instances_lock);
+ spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
}
static int seq_show(struct seq_file *s, void *v)
@@ -1082,7 +1113,7 @@ static const struct seq_operations nfqnl_seq_ops = {
static int nfqnl_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &nfqnl_seq_ops,
+ return seq_open_net(inode, file, &nfqnl_seq_ops,
sizeof(struct iter_state));
}
@@ -1091,39 +1122,63 @@ static const struct file_operations nfqnl_file_ops = {
.open = nfqnl_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif /* PROC_FS */
-static int __init nfnetlink_queue_init(void)
+static int __net_init nfnl_queue_net_init(struct net *net)
{
- int i, status = -ENOMEM;
+ unsigned int i;
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
for (i = 0; i < INSTANCE_BUCKETS; i++)
- INIT_HLIST_HEAD(&instance_table[i]);
+ INIT_HLIST_HEAD(&q->instance_table[i]);
+
+ spin_lock_init(&q->instances_lock);
+
+#ifdef CONFIG_PROC_FS
+ if (!proc_create("nfnetlink_queue", 0440,
+ net->nf.proc_netfilter, &nfqnl_file_ops))
+ return -ENOMEM;
+#endif
+ return 0;
+}
+
+static void __net_exit nfnl_queue_net_exit(struct net *net)
+{
+ remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
+}
+
+static struct pernet_operations nfnl_queue_net_ops = {
+ .init = nfnl_queue_net_init,
+ .exit = nfnl_queue_net_exit,
+ .id = &nfnl_queue_net_id,
+ .size = sizeof(struct nfnl_queue_net),
+};
+
+static int __init nfnetlink_queue_init(void)
+{
+ int status = -ENOMEM;
netlink_register_notifier(&nfqnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfqnl_subsys);
if (status < 0) {
- printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
+ pr_err("nf_queue: failed to create netlink socket\n");
goto cleanup_netlink_notifier;
}
-#ifdef CONFIG_PROC_FS
- if (!proc_create("nfnetlink_queue", 0440,
- proc_net_netfilter, &nfqnl_file_ops))
+ status = register_pernet_subsys(&nfnl_queue_net_ops);
+ if (status < 0) {
+ pr_err("nf_queue: failed to register pernet ops\n");
goto cleanup_subsys;
-#endif
-
+ }
register_netdevice_notifier(&nfqnl_dev_notifier);
nf_register_queue_handler(&nfqh);
return status;
-#ifdef CONFIG_PROC_FS
cleanup_subsys:
nfnetlink_subsys_unregister(&nfqnl_subsys);
-#endif
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
return status;
@@ -1133,9 +1188,7 @@ static void __exit nfnetlink_queue_fini(void)
{
nf_unregister_queue_handler();
unregister_netdevice_notifier(&nfqnl_dev_notifier);
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
-#endif
+ unregister_pernet_subsys(&nfnl_queue_net_ops);
nfnetlink_subsys_unregister(&nfqnl_subsys);
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index fa40096940a1..fe573f6c9e91 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -474,7 +474,14 @@ ipt_log_packet(u_int8_t pf,
const struct nf_loginfo *loginfo,
const char *prefix)
{
- struct sbuff *m = sb_open();
+ struct sbuff *m;
+ struct net *net = dev_net(in ? in : out);
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = sb_open();
if (!loginfo)
loginfo = &default_loginfo;
@@ -798,7 +805,14 @@ ip6t_log_packet(u_int8_t pf,
const struct nf_loginfo *loginfo,
const char *prefix)
{
- struct sbuff *m = sb_open();
+ struct sbuff *m;
+ struct net *net = dev_net(in ? in : out);
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = sb_open();
if (!loginfo)
loginfo = &default_loginfo;
@@ -893,23 +907,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = {
};
#endif
+static int __net_init log_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger);
+#endif
+ return 0;
+}
+
+static void __net_exit log_net_exit(struct net *net)
+{
+ nf_log_unset(net, &ipt_log_logger);
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+ nf_log_unset(net, &ip6t_log_logger);
+#endif
+}
+
+static struct pernet_operations log_net_ops = {
+ .init = log_net_init,
+ .exit = log_net_exit,
+};
+
static int __init log_tg_init(void)
{
int ret;
+ ret = register_pernet_subsys(&log_net_ops);
+ if (ret < 0)
+ goto err_pernet;
+
ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
if (ret < 0)
- return ret;
+ goto err_target;
nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);
#endif
return 0;
+
+err_target:
+ unregister_pernet_subsys(&log_net_ops);
+err_pernet:
+ return ret;
}
static void __exit log_tg_exit(void)
{
+ unregister_pernet_subsys(&log_net_ops);
nf_log_unregister(&ipt_log_logger);
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
nf_log_unregister(&ip6t_log_logger);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 817f9e9f2b16..1e2fae32f81b 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb)
}
#endif
-static unsigned int
-nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+static u32
+nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_NFQ_info_v1 *info = par->targinfo;
u32 queue = info->queuenum;
- if (info->queues_total > 1) {
- if (par->family == NFPROTO_IPV4)
- queue = (((u64) hash_v4(skb) * info->queues_total) >>
- 32) + queue;
+ if (par->family == NFPROTO_IPV4)
+ queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- else if (par->family == NFPROTO_IPV6)
- queue = (((u64) hash_v6(skb) * info->queues_total) >>
- 32) + queue;
+ else if (par->family == NFPROTO_IPV6)
+ queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;
#endif
- }
+
+ return queue;
+}
+
+static unsigned int
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_NFQ_info_v1 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1)
+ queue = nfqueue_hash(skb, par);
+
return NF_QUEUE_NR(queue);
}
@@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
static int nfqueue_tg_check(const struct xt_tgchk_param *par)
{
- const struct xt_NFQ_info_v2 *info = par->targinfo;
+ const struct xt_NFQ_info_v3 *info = par->targinfo;
u32 maxid;
if (unlikely(!rnd_inited)) {
@@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
info->queues_total, maxid);
return -ERANGE;
}
- if (par->target->revision == 2 && info->bypass > 1)
+ if (par->target->revision == 2 && info->flags > 1)
return -EINVAL;
+ if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
+ return -EINVAL;
+
return 0;
}
+static unsigned int
+nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_NFQ_info_v3 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1) {
+ if (info->flags & NFQ_FLAG_CPU_FANOUT) {
+ int cpu = smp_processor_id();
+
+ queue = info->queuenum + cpu % info->queues_total;
+ } else
+ queue = nfqueue_hash(skb, par);
+ }
+
+ return NF_QUEUE_NR(queue);
+}
+
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
{
.name = "NFQUEUE",
@@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_NFQ_info_v2),
.me = THIS_MODULE,
},
+ {
+ .name = "NFQUEUE",
+ .revision = 3,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = nfqueue_tg_check,
+ .target = nfqueue_tg_v3,
+ .targetsize = sizeof(struct xt_NFQ_info_v3),
+ .me = THIS_MODULE,
+ },
};
static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index a5e673d32bda..647d989a01e6 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
unsigned char opts[MAX_IPOPTLEN];
const struct xt_osf_finger *kf;
const struct xt_osf_user_finger *f;
+ struct net *net = dev_net(p->in ? p->in : p->out);
if (!info)
return false;
@@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
fcount++;
if (info->flags & XT_OSF_LOG)
- nf_log_packet(p->family, p->hooknum, skb,
+ nf_log_packet(net, p->family, p->hooknum, skb,
p->in, p->out, NULL,
"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
f->genre, f->version, f->subtype,
@@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
rcu_read_unlock();
if (!fcount && (info->flags & XT_OSF_LOG))
- nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL,
+ nf_log_packet(net, p->family, p->hooknum, skb, p->in,
+ p->out, NULL,
"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
&ip->saddr, ntohs(tcp->source),
&ip->daddr, ntohs(tcp->dest));
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index d1fa1d9ffd2e..7fcb307dea47 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1173,6 +1173,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
}
if (sax != NULL) {
+ memset(sax, 0, sizeof(sax));
sax->sax25_family = AF_NETROM;
skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
AX25_ADDR_LEN);
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index bb67b98b9797..7de0368aff0c 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -107,8 +107,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
accept_sk->sk_state_change(sk);
bh_unlock_sock(accept_sk);
-
- sock_orphan(accept_sk);
}
if (listen == true) {
@@ -134,8 +132,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
bh_unlock_sock(sk);
- sock_orphan(sk);
-
sk_del_node_init(sk);
}
@@ -164,8 +160,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
bh_unlock_sock(sk);
- sock_orphan(sk);
-
sk_del_node_init(sk);
}
@@ -869,7 +863,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
skb_get(skb);
} else {
pr_err("Receive queue is full\n");
- kfree_skb(skb);
}
nfc_llcp_sock_put(llcp_sock);
@@ -1072,7 +1065,6 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
skb_get(skb);
} else {
pr_err("Receive queue is full\n");
- kfree_skb(skb);
}
}
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index f1b377e247fe..c1101e6de170 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -388,7 +388,9 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
}
if (sk->sk_state == LLCP_CONNECTED || !newsock) {
- nfc_llcp_accept_unlink(sk);
+ list_del_init(&lsk->accept_queue);
+ sock_put(sk);
+
if (newsock)
sock_graft(sk, newsock);
@@ -521,7 +523,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
return llcp_accept_poll(sk);
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
@@ -582,8 +585,6 @@ static int llcp_sock_release(struct socket *sock)
nfc_llcp_accept_unlink(accept_sk);
release_sock(accept_sk);
-
- sock_orphan(accept_sk);
}
}
@@ -764,6 +765,8 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
pr_debug("%p %zu\n", sk, len);
+ msg->msg_namelen = 0;
+
lock_sock(sk);
if (sk->sk_state == LLCP_CLOSED &&
@@ -809,6 +812,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap);
+ memset(sockaddr, 0, sizeof(*sockaddr));
sockaddr->sa_family = AF_NFC;
sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP;
sockaddr->dsap = ui_cb->dsap;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8e4644ff8d34..e566b793f07c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -158,10 +158,16 @@ struct packet_mreq_max {
unsigned char mr_address[MAX_ADDR_LEN];
};
+union tpacket_uhdr {
+ struct tpacket_hdr *h1;
+ struct tpacket2_hdr *h2;
+ struct tpacket3_hdr *h3;
+ void *raw;
+};
+
static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
int closing, int tx_ring);
-
#define V3_ALIGNMENT (8)
#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
@@ -290,11 +296,7 @@ static inline __pure struct page *pgv_to_page(void *addr)
static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- void *raw;
- } h;
+ union tpacket_uhdr h;
h.raw = frame;
switch (po->tp_version) {
@@ -317,11 +319,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
static int __packet_get_status(struct packet_sock *po, void *frame)
{
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- void *raw;
- } h;
+ union tpacket_uhdr h;
smp_rmb();
@@ -347,11 +345,7 @@ static void *packet_lookup_frame(struct packet_sock *po,
int status)
{
unsigned int pg_vec_pos, frame_offset;
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- void *raw;
- } h;
+ union tpacket_uhdr h;
pg_vec_pos = position / rb->frames_per_block;
frame_offset = position % rb->frames_per_block;
@@ -1505,9 +1499,8 @@ retry:
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (err < 0)
- goto out_unlock;
+
+ sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -1670,12 +1663,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct sock *sk;
struct packet_sock *po;
struct sockaddr_ll *sll;
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- struct tpacket3_hdr *h3;
- void *raw;
- } h;
+ union tpacket_uhdr h;
u8 *skb_head = skb->data;
int skb_len = skb->len;
unsigned int snaplen, res;
@@ -1910,11 +1898,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, int size_max,
__be16 proto, unsigned char *addr, int hlen)
{
- union {
- struct tpacket_hdr *h1;
- struct tpacket2_hdr *h2;
- void *raw;
- } ph;
+ union tpacket_uhdr ph;
int to_write, offset, len, tp_len, nr_frags, len_max;
struct socket *sock = po->sk.sk_socket;
struct page *page;
@@ -2312,9 +2296,8 @@ static int packet_snd(struct socket *sock,
err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
if (err)
goto out_free;
- err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (err < 0)
- goto out_free;
+
+ sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
/* Earlier code assumed this would be a VLAN pkt,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index cf68e6e4054a..9c8347451597 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
if (srose != NULL) {
+ memset(srose, 0, msg->msg_namelen);
srose->srose_family = AF_ROSE;
srose->srose_addr = rose->dest_addr;
srose->srose_call = rose->dest_call;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 08fa1e8a4ca4..3a4c0caa1f7d 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -166,15 +166,17 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
return 1;
}
-static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl)
{
struct icmp6hdr *icmp6h;
+ const struct ipv6hdr *ip6h;
icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
if (icmp6h == NULL)
return 0;
+ ip6h = ipv6_hdr(skb);
icmp6h->icmp6_cksum = 0;
skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -186,15 +188,17 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
return 1;
}
-static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
+static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl)
{
struct tcphdr *tcph;
+ const struct iphdr *iph;
tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
if (tcph == NULL)
return 0;
+ iph = ip_hdr(skb);
tcph->check = 0;
skb->csum = csum_partial(tcph, ipl - ihl, 0);
tcph->check = tcp_v4_check(ipl - ihl,
@@ -205,15 +209,17 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
return 1;
}
-static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl)
{
struct tcphdr *tcph;
+ const struct ipv6hdr *ip6h;
tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
if (tcph == NULL)
return 0;
+ ip6h = ipv6_hdr(skb);
tcph->check = 0;
skb->csum = csum_partial(tcph, ipl - ihl, 0);
tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -225,10 +231,11 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
return 1;
}
-static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
+static int tcf_csum_ipv4_udp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl, int udplite)
{
struct udphdr *udph;
+ const struct iphdr *iph;
u16 ul;
/*
@@ -242,6 +249,7 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
if (udph == NULL)
return 0;
+ iph = ip_hdr(skb);
ul = ntohs(udph->len);
if (udplite || udph->check) {
@@ -276,10 +284,11 @@ ignore_obscure_skb:
return 1;
}
-static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+static int tcf_csum_ipv6_udp(struct sk_buff *skb,
unsigned int ihl, unsigned int ipl, int udplite)
{
struct udphdr *udph;
+ const struct ipv6hdr *ip6h;
u16 ul;
/*
@@ -293,6 +302,7 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
if (udph == NULL)
return 0;
+ ip6h = ipv6_hdr(skb);
ul = ntohs(udph->len);
udph->check = 0;
@@ -328,7 +338,7 @@ ignore_obscure_skb:
static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
int ntkoff;
ntkoff = skb_network_offset(skb);
@@ -353,19 +363,19 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
break;
case IPPROTO_TCP:
if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
- if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
+ if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4,
ntohs(iph->tot_len)))
goto fail;
break;
case IPPROTO_UDP:
if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
- if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
+ if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
ntohs(iph->tot_len), 0))
goto fail;
break;
case IPPROTO_UDPLITE:
if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
- if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
+ if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
ntohs(iph->tot_len), 1))
goto fail;
break;
@@ -377,7 +387,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto fail;
- ip_send_check(iph);
+ ip_send_check(ip_hdr(skb));
}
return 1;
@@ -456,6 +466,7 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
ixhl = ipv6_optlen(ip6xh);
if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
goto fail;
+ ip6xh = (void *)(skb_network_header(skb) + hl);
if ((nexthdr == NEXTHDR_HOP) &&
!(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
goto fail;
@@ -464,25 +475,25 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
break;
case IPPROTO_ICMPV6:
if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
- if (!tcf_csum_ipv6_icmp(skb, ip6h,
+ if (!tcf_csum_ipv6_icmp(skb,
hl, pl + sizeof(*ip6h)))
goto fail;
goto done;
case IPPROTO_TCP:
if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
- if (!tcf_csum_ipv6_tcp(skb, ip6h,
+ if (!tcf_csum_ipv6_tcp(skb,
hl, pl + sizeof(*ip6h)))
goto fail;
goto done;
case IPPROTO_UDP:
if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
- if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
+ if (!tcf_csum_ipv6_udp(skb, hl,
pl + sizeof(*ip6h), 0))
goto fail;
goto done;
case IPPROTO_UDPLITE:
if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
- if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
+ if (!tcf_csum_ipv6_udp(skb, hl,
pl + sizeof(*ip6h), 1))
goto fail;
goto done;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5c81b2603239..8e118af90973 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -22,7 +22,6 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
-#include <net/netlink.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 13aa47aa2ffb..1bc210ffcba2 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -962,8 +962,11 @@ cbq_dequeue(struct Qdisc *sch)
cbq_update(q);
if ((incr -= incr2) < 0)
incr = 0;
+ q->now += incr;
+ } else {
+ if (now > q->now)
+ q->now = now;
}
- q->now += incr;
q->now_rt = now;
for (;;) {
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4e606fcb2534..55786283a3df 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -195,7 +195,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
flow->deficit = q->quantum;
flow->dropped = 0;
}
- if (++sch->q.qlen < sch->limit)
+ if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
q->drop_overlimit++;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ffad48109a22..eac7e0ee23c1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -904,7 +904,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, u32 rate)
u64 mult;
int shift;
- r->rate_bps = rate << 3;
+ r->rate_bps = (u64)rate << 3;
r->shift = 0;
r->mult = 1;
/*
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index d2709e2b7be6..423549a714e5 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -104,8 +104,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Initialize the object handling fields. */
atomic_set(&asoc->base.refcnt, 1);
- asoc->base.dead = 0;
- asoc->base.malloced = 0;
+ asoc->base.dead = false;
/* Initialize the bind addr area. */
sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
@@ -371,7 +370,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
if (!sctp_association_init(asoc, ep, sk, scope, gfp))
goto fail_init;
- asoc->base.malloced = 1;
SCTP_DBG_OBJCNT_INC(assoc);
SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc);
@@ -409,7 +407,7 @@ void sctp_association_free(struct sctp_association *asoc)
/* Mark as dead, so other users can know this structure is
* going away.
*/
- asoc->base.dead = 1;
+ asoc->base.dead = true;
/* Dispose of any data lying around in the outqueue. */
sctp_outq_free(&asoc->outqueue);
@@ -484,10 +482,8 @@ static void sctp_association_destroy(struct sctp_association *asoc)
WARN_ON(atomic_read(&asoc->rmem_alloc));
- if (asoc->base.malloced) {
- kfree(asoc);
- SCTP_DBG_OBJCNT_DEC(assoc);
- }
+ kfree(asoc);
+ SCTP_DBG_OBJCNT_DEC(assoc);
}
/* Change the primary destination address for the peer. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 12ed45dbe75d..5fbd7bc6bb11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -121,8 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Initialize the basic object fields. */
atomic_set(&ep->base.refcnt, 1);
- ep->base.dead = 0;
- ep->base.malloced = 1;
+ ep->base.dead = false;
/* Create an input queue. */
sctp_inq_init(&ep->base.inqueue);
@@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp)
goto fail;
if (!sctp_endpoint_init(ep, sk, gfp))
goto fail_init;
- ep->base.malloced = 1;
+
SCTP_DBG_OBJCNT_INC(ep);
return ep;
@@ -234,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
*/
void sctp_endpoint_free(struct sctp_endpoint *ep)
{
- ep->base.dead = 1;
+ ep->base.dead = true;
ep->base.sk->sk_state = SCTP_SS_CLOSED;
@@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
if (ep->base.sk)
sock_put(ep->base.sk);
- /* Finally, free up our memory. */
- if (ep->base.malloced) {
- kfree(ep);
- SCTP_DBG_OBJCNT_DEC(ep);
- }
+ kfree(ep);
+ SCTP_DBG_OBJCNT_DEC(ep);
}
/* Hold a reference to an endpoint. */
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ab3bba8cb0a8..4e45ee35d0db 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -295,7 +295,8 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
seq_printf(seq, " ASSOC SOCK STY SST ST HBKT "
"ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
"RPORT LADDRS <-> RADDRS "
- "HBINT INS OUTS MAXRT T1X T2X RTXC\n");
+ "HBINT INS OUTS MAXRT T1X T2X RTXC "
+ "wmema wmemq sndbuf rcvbuf\n");
return (void *)pos;
}
@@ -349,11 +350,16 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sctp_seq_dump_local_addrs(seq, epb);
seq_printf(seq, "<-> ");
sctp_seq_dump_remote_addrs(seq, assoc);
- seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d ",
+ seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
+ "%8d %8d %8d %8d",
assoc->hbinterval, assoc->c.sinit_max_instreams,
assoc->c.sinit_num_ostreams, assoc->max_retrans,
assoc->init_retries, assoc->shutdown_retries,
- assoc->rtx_data_chunks);
+ assoc->rtx_data_chunks,
+ atomic_read(&sk->sk_wmem_alloc),
+ sk->sk_wmem_queued,
+ sk->sk_sndbuf,
+ sk->sk_rcvbuf);
seq_printf(seq, "\n");
}
read_unlock(&head->lock);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b9070736b8d9..f631c5ff4dbf 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1119,9 +1119,10 @@ static int __sctp_connect(struct sock* sk,
/* Make sure the destination port is correctly set
* in all addresses.
*/
- if (asoc && asoc->peer.port && asoc->peer.port != port)
+ if (asoc && asoc->peer.port && asoc->peer.port != port) {
+ err = -EINVAL;
goto out_free;
-
+ }
/* Check if there already is a matching association on the
* endpoint (other than the one created here).
@@ -6185,7 +6186,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Is there any exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/socket.c b/net/socket.c
index 88f759adf3af..36883fea44f3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -600,7 +600,7 @@ void sock_release(struct socket *sock)
}
EXPORT_SYMBOL(sock_release);
-int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
+void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
{
*tx_flags = 0;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
@@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
*tx_flags |= SKBTX_SW_TSTAMP;
if (sock_flag(sk, SOCK_WIFI_STATUS))
*tx_flags |= SKBTX_WIFI_STATUS;
- return 0;
}
EXPORT_SYMBOL(sock_tx_timestamp);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a9622b6cd916..515ce38e4f4c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -790,6 +790,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
if (addr) {
addr->family = AF_TIPC;
addr->addrtype = TIPC_ADDR_ID;
+ memset(&addr->addr, 0, sizeof(addr->addr));
addr->addr.id.ref = msg_origport(msg);
addr->addr.id.node = msg_orignode(msg);
addr->addr.name.domain = 0; /* could leave uninitialized */
@@ -904,6 +905,9 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
goto exit;
}
+ /* will be updated in set_orig_addr() if needed */
+ m->msg_namelen = 0;
+
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
restart:
@@ -1013,6 +1017,9 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
goto exit;
}
+ /* will be updated in set_orig_addr() if needed */
+ m->msg_namelen = 0;
+
target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 971282b6f6a3..5ca1631de7ef 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1340,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb)
struct scm_cookie scm;
memset(&scm, 0, sizeof(scm));
scm.pid = UNIXCB(skb).pid;
- scm.cred = UNIXCB(skb).cred;
if (UNIXCB(skb).fp)
unix_detach_fds(&scm, skb);
@@ -1391,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
int err = 0;
UNIXCB(skb).pid = get_pid(scm->pid);
- if (scm->cred)
- UNIXCB(skb).cred = get_cred(scm->cred);
+ UNIXCB(skb).uid = scm->creds.uid;
+ UNIXCB(skb).gid = scm->creds.gid;
UNIXCB(skb).fp = NULL;
if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb);
@@ -1409,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
const struct sock *other)
{
- if (UNIXCB(skb).cred)
+ if (UNIXCB(skb).pid)
return;
if (test_bit(SOCK_PASSCRED, &sock->flags) ||
- (other->sk_socket &&
- test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) {
+ !other->sk_socket ||
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
- UNIXCB(skb).cred = get_current_cred();
+ current_euid_egid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
}
@@ -1819,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
siocb->scm = &tmp_scm;
memset(&tmp_scm, 0, sizeof(tmp_scm));
}
- scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
+ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
unix_set_secdata(siocb->scm, skb);
if (!(flags & MSG_PEEK)) {
@@ -1991,11 +1990,12 @@ again:
if (check_creds) {
/* Never glue messages from different writers */
if ((UNIXCB(skb).pid != siocb->scm->pid) ||
- (UNIXCB(skb).cred != siocb->scm->cred))
+ !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
+ !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
break;
- } else {
+ } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
- scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
+ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
check_creds = 1;
}
@@ -2196,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index ca511c4f388a..7f93e2a42d7a 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -207,7 +207,7 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
struct vsock_sock *vsk;
list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table)
- if (vsock_addr_equals_addr_any(addr, &vsk->local_addr))
+ if (addr->svm_port == vsk->local_addr.svm_port)
return sk_vsock(vsk);
return NULL;
@@ -220,8 +220,8 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
connected_table) {
- if (vsock_addr_equals_addr(src, &vsk->remote_addr)
- && vsock_addr_equals_addr(dst, &vsk->local_addr)) {
+ if (vsock_addr_equals_addr(src, &vsk->remote_addr) &&
+ dst->svm_port == vsk->local_addr.svm_port) {
return sk_vsock(vsk);
}
}
@@ -1670,6 +1670,8 @@ vsock_stream_recvmsg(struct kiocb *kiocb,
vsk = vsock_sk(sk);
err = 0;
+ msg->msg_namelen = 0;
+
lock_sock(sk);
if (sk->sk_state != SS_CONNECTED) {
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index faf81b8c1a3a..daff75200e25 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -472,19 +472,16 @@ static struct sock *vmci_transport_get_pending(
struct vsock_sock *vlistener;
struct vsock_sock *vpending;
struct sock *pending;
+ struct sockaddr_vm src;
+
+ vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
vlistener = vsock_sk(listener);
list_for_each_entry(vpending, &vlistener->pending_links,
pending_links) {
- struct sockaddr_vm src;
- struct sockaddr_vm dst;
-
- vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
- vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
-
if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
- vsock_addr_equals_addr(&dst, &vpending->local_addr)) {
+ pkt->dst_port == vpending->local_addr.svm_port) {
pending = sk_vsock(vpending);
sock_hold(pending);
goto found;
@@ -749,10 +746,15 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
*/
bh_lock_sock(sk);
- if (!sock_owned_by_user(sk) && sk->sk_state == SS_CONNECTED)
- vmci_trans(vsk)->notify_ops->handle_notify_pkt(
- sk, pkt, true, &dst, &src,
- &bh_process_pkt);
+ if (!sock_owned_by_user(sk)) {
+ /* The local context ID may be out of date, update it. */
+ vsk->local_addr.svm_cid = dst.svm_cid;
+
+ if (sk->sk_state == SS_CONNECTED)
+ vmci_trans(vsk)->notify_ops->handle_notify_pkt(
+ sk, pkt, true, &dst, &src,
+ &bh_process_pkt);
+ }
bh_unlock_sock(sk);
@@ -912,6 +914,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
lock_sock(sk);
+ /* The local context ID may be out of date. */
+ vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
+
switch (sk->sk_state) {
case SS_LISTEN:
vmci_transport_recv_listen(sk, pkt);
@@ -968,6 +973,10 @@ static int vmci_transport_recv_listen(struct sock *sk,
pending = vmci_transport_get_pending(sk, pkt);
if (pending) {
lock_sock(pending);
+
+ /* The local context ID may be out of date. */
+ vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
+
switch (pending->sk_state) {
case SS_CONNECTING:
err = vmci_transport_recv_connecting_server(sk,
@@ -1737,6 +1746,8 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
return -EOPNOTSUPP;
+ msg->msg_namelen = 0;
+
/* Retrieve the head sk_buff from the socket's receive queue. */
err = 0;
skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
@@ -1769,7 +1780,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
if (err)
goto out;
- msg->msg_namelen = 0;
if (msg->msg_name) {
struct sockaddr_vm *vm_addr;
diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
index b7df1aea7c59..ec2611b4ea0e 100644
--- a/net/vmw_vsock/vsock_addr.c
+++ b/net/vmw_vsock/vsock_addr.c
@@ -64,16 +64,6 @@ bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
}
EXPORT_SYMBOL_GPL(vsock_addr_equals_addr);
-bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr,
- const struct sockaddr_vm *other)
-{
- return (addr->svm_cid == VMADDR_CID_ANY ||
- other->svm_cid == VMADDR_CID_ANY ||
- addr->svm_cid == other->svm_cid) &&
- addr->svm_port == other->svm_port;
-}
-EXPORT_SYMBOL_GPL(vsock_addr_equals_addr_any);
-
int vsock_addr_cast(const struct sockaddr *addr,
size_t len, struct sockaddr_vm **out_addr)
{
diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h
index cdfbcefdf843..9ccd5316eac0 100644
--- a/net/vmw_vsock/vsock_addr.h
+++ b/net/vmw_vsock/vsock_addr.h
@@ -24,8 +24,6 @@ bool vsock_addr_bound(const struct sockaddr_vm *addr);
void vsock_addr_unbind(struct sockaddr_vm *addr);
bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
const struct sockaddr_vm *other);
-bool vsock_addr_equals_addr_any(const struct sockaddr_vm *addr,
- const struct sockaddr_vm *other);
int vsock_addr_cast(const struct sockaddr *addr, size_t len,
struct sockaddr_vm **out_addr);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index f382cae983ba..84c9ad7e1dca 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -212,6 +212,39 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
rdev_rfkill_poll(rdev);
}
+void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev)
+{
+ lockdep_assert_held(&rdev->devlist_mtx);
+ lockdep_assert_held(&rdev->sched_scan_mtx);
+
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE))
+ return;
+
+ if (!wdev->p2p_started)
+ return;
+
+ rdev_stop_p2p_device(rdev, wdev);
+ wdev->p2p_started = false;
+
+ rdev->opencount--;
+
+ if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
+ bool busy = work_busy(&rdev->scan_done_wk);
+
+ /*
+ * If the work isn't pending or running (in which case it would
+ * be waiting for the lock we hold) the driver didn't properly
+ * cancel the scan when the interface was removed. In this case
+ * warn and leak the scan request object to not crash later.
+ */
+ WARN_ON(!busy);
+
+ rdev->scan_req->aborted = true;
+ ___cfg80211_scan_done(rdev, !busy);
+ }
+}
+
static int cfg80211_rfkill_set_block(void *data, bool blocked)
{
struct cfg80211_registered_device *rdev = data;
@@ -221,7 +254,8 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
return 0;
rtnl_lock();
- mutex_lock(&rdev->devlist_mtx);
+
+ /* read-only iteration need not hold the devlist_mtx */
list_for_each_entry(wdev, &rdev->wdev_list, list) {
if (wdev->netdev) {
@@ -231,18 +265,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
/* otherwise, check iftype */
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_DEVICE:
- if (!wdev->p2p_started)
- break;
- rdev_stop_p2p_device(rdev, wdev);
- wdev->p2p_started = false;
- rdev->opencount--;
+ /* but this requires it */
+ mutex_lock(&rdev->devlist_mtx);
+ mutex_lock(&rdev->sched_scan_mtx);
+ cfg80211_stop_p2p_device(rdev, wdev);
+ mutex_unlock(&rdev->sched_scan_mtx);
+ mutex_unlock(&rdev->devlist_mtx);
break;
default:
break;
}
}
- mutex_unlock(&rdev->devlist_mtx);
rtnl_unlock();
return 0;
@@ -745,17 +779,13 @@ static void wdev_cleanup_work(struct work_struct *work)
wdev = container_of(work, struct wireless_dev, cleanup_work);
rdev = wiphy_to_dev(wdev->wiphy);
- cfg80211_lock_rdev(rdev);
+ mutex_lock(&rdev->sched_scan_mtx);
if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) {
rdev->scan_req->aborted = true;
___cfg80211_scan_done(rdev, true);
}
- cfg80211_unlock_rdev(rdev);
-
- mutex_lock(&rdev->sched_scan_mtx);
-
if (WARN_ON(rdev->sched_scan_req &&
rdev->sched_scan_req->dev == wdev->netdev)) {
__cfg80211_stop_sched_scan(rdev, false);
@@ -781,21 +811,19 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
return;
mutex_lock(&rdev->devlist_mtx);
+ mutex_lock(&rdev->sched_scan_mtx);
list_del_rcu(&wdev->list);
rdev->devlist_generation++;
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_DEVICE:
- if (!wdev->p2p_started)
- break;
- rdev_stop_p2p_device(rdev, wdev);
- wdev->p2p_started = false;
- rdev->opencount--;
+ cfg80211_stop_p2p_device(rdev, wdev);
break;
default:
WARN_ON_ONCE(1);
break;
}
+ mutex_unlock(&rdev->sched_scan_mtx);
mutex_unlock(&rdev->devlist_mtx);
}
EXPORT_SYMBOL(cfg80211_unregister_wdev);
@@ -945,6 +973,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
cfg80211_update_iface_num(rdev, wdev->iftype, 1);
cfg80211_lock_rdev(rdev);
mutex_lock(&rdev->devlist_mtx);
+ mutex_lock(&rdev->sched_scan_mtx);
wdev_lock(wdev);
switch (wdev->iftype) {
#ifdef CONFIG_CFG80211_WEXT
@@ -976,6 +1005,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
break;
}
wdev_unlock(wdev);
+ mutex_unlock(&rdev->sched_scan_mtx);
rdev->opencount++;
mutex_unlock(&rdev->devlist_mtx);
cfg80211_unlock_rdev(rdev);
@@ -1096,8 +1126,10 @@ static int __init cfg80211_init(void)
goto out_fail_reg;
cfg80211_wq = create_singlethread_workqueue("cfg80211");
- if (!cfg80211_wq)
+ if (!cfg80211_wq) {
+ err = -ENOMEM;
goto out_fail_wq;
+ }
return 0;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d5d06fdea961..124e5e773fbc 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -503,6 +503,9 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
void cfg80211_leave(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
+void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev);
+
#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
#ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f924d45af1b8..671b69a3c136 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5048,14 +5048,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->scan)
return -EOPNOTSUPP;
- if (rdev->scan_req)
- return -EBUSY;
+ mutex_lock(&rdev->sched_scan_mtx);
+ if (rdev->scan_req) {
+ err = -EBUSY;
+ goto unlock;
+ }
if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
n_channels = validate_scan_freqs(
info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
- if (!n_channels)
- return -EINVAL;
+ if (!n_channels) {
+ err = -EINVAL;
+ goto unlock;
+ }
} else {
enum ieee80211_band band;
n_channels = 0;
@@ -5069,23 +5074,29 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp)
n_ssids++;
- if (n_ssids > wiphy->max_scan_ssids)
- return -EINVAL;
+ if (n_ssids > wiphy->max_scan_ssids) {
+ err = -EINVAL;
+ goto unlock;
+ }
if (info->attrs[NL80211_ATTR_IE])
ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
else
ie_len = 0;
- if (ie_len > wiphy->max_scan_ie_len)
- return -EINVAL;
+ if (ie_len > wiphy->max_scan_ie_len) {
+ err = -EINVAL;
+ goto unlock;
+ }
request = kzalloc(sizeof(*request)
+ sizeof(*request->ssids) * n_ssids
+ sizeof(*request->channels) * n_channels
+ ie_len, GFP_KERNEL);
- if (!request)
- return -ENOMEM;
+ if (!request) {
+ err = -ENOMEM;
+ goto unlock;
+ }
if (n_ssids)
request->ssids = (void *)&request->channels[n_channels];
@@ -5222,6 +5233,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
kfree(request);
}
+ unlock:
+ mutex_unlock(&rdev->sched_scan_mtx);
return err;
}
@@ -8130,20 +8143,9 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->stop_p2p_device)
return -EOPNOTSUPP;
- if (!wdev->p2p_started)
- return 0;
-
- rdev_stop_p2p_device(rdev, wdev);
- wdev->p2p_started = false;
-
- mutex_lock(&rdev->devlist_mtx);
- rdev->opencount--;
- mutex_unlock(&rdev->devlist_mtx);
-
- if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) {
- rdev->scan_req->aborted = true;
- ___cfg80211_scan_done(rdev, true);
- }
+ mutex_lock(&rdev->sched_scan_mtx);
+ cfg80211_stop_p2p_device(rdev, wdev);
+ mutex_unlock(&rdev->sched_scan_mtx);
return 0;
}
@@ -8929,7 +8931,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
struct nlattr *nest;
int i;
- ASSERT_RDEV_LOCK(rdev);
+ lockdep_assert_held(&rdev->sched_scan_mtx);
if (WARN_ON(!req))
return 0;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 674aadca0079..fd99ea495b7e 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak)
union iwreq_data wrqu;
#endif
- ASSERT_RDEV_LOCK(rdev);
+ lockdep_assert_held(&rdev->sched_scan_mtx);
request = rdev->scan_req;
@@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk)
rdev = container_of(wk, struct cfg80211_registered_device,
scan_done_wk);
- cfg80211_lock_rdev(rdev);
+ mutex_lock(&rdev->sched_scan_mtx);
___cfg80211_scan_done(rdev, false);
- cfg80211_unlock_rdev(rdev);
+ mutex_unlock(&rdev->sched_scan_mtx);
}
void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
@@ -698,11 +698,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
found = rb_find_bss(dev, tmp, BSS_CMP_REGULAR);
if (found) {
- found->pub.beacon_interval = tmp->pub.beacon_interval;
- found->pub.signal = tmp->pub.signal;
- found->pub.capability = tmp->pub.capability;
- found->ts = tmp->ts;
-
/* Update IEs */
if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
const struct cfg80211_bss_ies *old;
@@ -723,6 +718,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
if (found->pub.hidden_beacon_bss &&
!list_empty(&found->hidden_list)) {
+ const struct cfg80211_bss_ies *f;
+
/*
* The found BSS struct is one of the probe
* response members of a group, but we're
@@ -732,6 +729,10 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
* SSID to showing it, which is confusing so
* drop this information.
*/
+
+ f = rcu_access_pointer(tmp->pub.beacon_ies);
+ kfree_rcu((struct cfg80211_bss_ies *)f,
+ rcu_head);
goto drop;
}
@@ -761,6 +762,11 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
kfree_rcu((struct cfg80211_bss_ies *)old,
rcu_head);
}
+
+ found->pub.beacon_interval = tmp->pub.beacon_interval;
+ found->pub.signal = tmp->pub.signal;
+ found->pub.capability = tmp->pub.capability;
+ found->ts = tmp->ts;
} else {
struct cfg80211_internal_bss *new;
struct cfg80211_internal_bss *hidden;
@@ -1056,6 +1062,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
if (IS_ERR(rdev))
return PTR_ERR(rdev);
+ mutex_lock(&rdev->sched_scan_mtx);
if (rdev->scan_req) {
err = -EBUSY;
goto out;
@@ -1162,6 +1169,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
dev_hold(dev);
}
out:
+ mutex_unlock(&rdev->sched_scan_mtx);
kfree(creq);
cfg80211_unlock_rdev(rdev);
return err;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index bad4c4b5e4eb..a9dc5c736df0 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -85,6 +85,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
ASSERT_RTNL();
ASSERT_RDEV_LOCK(rdev);
ASSERT_WDEV_LOCK(wdev);
+ lockdep_assert_held(&rdev->sched_scan_mtx);
if (rdev->scan_req)
return -EBUSY;
@@ -227,6 +228,7 @@ void cfg80211_conn_work(struct work_struct *work)
rtnl_lock();
cfg80211_lock_rdev(rdev);
mutex_lock(&rdev->devlist_mtx);
+ mutex_lock(&rdev->sched_scan_mtx);
list_for_each_entry(wdev, &rdev->wdev_list, list) {
wdev_lock(wdev);
@@ -234,7 +236,7 @@ void cfg80211_conn_work(struct work_struct *work)
wdev_unlock(wdev);
continue;
}
- if (wdev->sme_state != CFG80211_SME_CONNECTING) {
+ if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) {
wdev_unlock(wdev);
continue;
}
@@ -251,6 +253,7 @@ void cfg80211_conn_work(struct work_struct *work)
wdev_unlock(wdev);
}
+ mutex_unlock(&rdev->sched_scan_mtx);
mutex_unlock(&rdev->devlist_mtx);
cfg80211_unlock_rdev(rdev);
rtnl_unlock();
@@ -324,11 +327,9 @@ void cfg80211_sme_scan_done(struct net_device *dev)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
- mutex_lock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx);
wdev_lock(wdev);
__cfg80211_sme_scan_done(dev);
wdev_unlock(wdev);
- mutex_unlock(&wiphy_to_dev(wdev->wiphy)->devlist_mtx);
}
void cfg80211_sme_rx_auth(struct net_device *dev,
@@ -928,9 +929,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
int err;
mutex_lock(&rdev->devlist_mtx);
+ /* might request scan - scan_mtx -> wdev_mtx dependency */
+ mutex_lock(&rdev->sched_scan_mtx);
wdev_lock(dev->ieee80211_ptr);
err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL);
wdev_unlock(dev->ieee80211_ptr);
+ mutex_unlock(&rdev->sched_scan_mtx);
mutex_unlock(&rdev->devlist_mtx);
return err;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ccadef2106ac..3c2033b8f596 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -27,7 +27,8 @@
#define WIPHY_PR_ARG __entry->wiphy_name
#define WDEV_ENTRY __field(u32, id)
-#define WDEV_ASSIGN (__entry->id) = (wdev ? wdev->identifier : 0)
+#define WDEV_ASSIGN (__entry->id) = (!IS_ERR_OR_NULL(wdev) \
+ ? wdev->identifier : 0)
#define WDEV_PR_FMT "wdev(%u)"
#define WDEV_PR_ARG (__entry->id)
@@ -1778,7 +1779,7 @@ TRACE_EVENT(rdev_set_mac_acl,
),
TP_fast_assign(
WIPHY_ASSIGN;
- WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
__entry->acl_policy = params->acl_policy;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", acl policy: %d",
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index fb9622f6d99c..e79cb5c0655a 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -89,6 +89,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
cfg80211_lock_rdev(rdev);
mutex_lock(&rdev->devlist_mtx);
+ mutex_lock(&rdev->sched_scan_mtx);
wdev_lock(wdev);
if (wdev->sme_state != CFG80211_SME_IDLE) {
@@ -135,6 +136,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
err = cfg80211_mgd_wext_connect(rdev, wdev);
out:
wdev_unlock(wdev);
+ mutex_unlock(&rdev->sched_scan_mtx);
mutex_unlock(&rdev->devlist_mtx);
cfg80211_unlock_rdev(rdev);
return err;
@@ -190,6 +192,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
cfg80211_lock_rdev(rdev);
mutex_lock(&rdev->devlist_mtx);
+ mutex_lock(&rdev->sched_scan_mtx);
wdev_lock(wdev);
err = 0;
@@ -223,6 +226,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
err = cfg80211_mgd_wext_connect(rdev, wdev);
out:
wdev_unlock(wdev);
+ mutex_unlock(&rdev->sched_scan_mtx);
mutex_unlock(&rdev->devlist_mtx);
cfg80211_unlock_rdev(rdev);
return err;
@@ -285,6 +289,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
cfg80211_lock_rdev(rdev);
mutex_lock(&rdev->devlist_mtx);
+ mutex_lock(&rdev->sched_scan_mtx);
wdev_lock(wdev);
if (wdev->sme_state != CFG80211_SME_IDLE) {
@@ -313,6 +318,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
err = cfg80211_mgd_wext_connect(rdev, wdev);
out:
wdev_unlock(wdev);
+ mutex_unlock(&rdev->sched_scan_mtx);
mutex_unlock(&rdev->devlist_mtx);
cfg80211_unlock_rdev(rdev);
return err;
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 35754cc8a9e5..8dafe6d3c6e4 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -334,6 +334,70 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
x->xflags &= ~XFRM_TIME_DEFER;
}
+static void xfrm_replay_notify_esn(struct xfrm_state *x, int event)
+{
+ u32 seq_diff, oseq_diff;
+ struct km_event c;
+ struct xfrm_replay_state_esn *replay_esn = x->replay_esn;
+ struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn;
+
+ /* we send notify messages in case
+ * 1. we updated on of the sequence numbers, and the seqno difference
+ * is at least x->replay_maxdiff, in this case we also update the
+ * timeout of our timer function
+ * 2. if x->replay_maxage has elapsed since last update,
+ * and there were changes
+ *
+ * The state structure must be locked!
+ */
+
+ switch (event) {
+ case XFRM_REPLAY_UPDATE:
+ if (!x->replay_maxdiff)
+ break;
+
+ if (replay_esn->seq_hi == preplay_esn->seq_hi)
+ seq_diff = replay_esn->seq - preplay_esn->seq;
+ else
+ seq_diff = ~preplay_esn->seq + replay_esn->seq + 1;
+
+ if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
+ oseq_diff = replay_esn->oseq - preplay_esn->oseq;
+ else
+ oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1;
+
+ if (seq_diff < x->replay_maxdiff &&
+ oseq_diff < x->replay_maxdiff) {
+
+ if (x->xflags & XFRM_TIME_DEFER)
+ event = XFRM_REPLAY_TIMEOUT;
+ else
+ return;
+ }
+
+ break;
+
+ case XFRM_REPLAY_TIMEOUT:
+ if (memcmp(x->replay_esn, x->preplay_esn,
+ xfrm_replay_state_esn_len(replay_esn)) == 0) {
+ x->xflags |= XFRM_TIME_DEFER;
+ return;
+ }
+
+ break;
+ }
+
+ memcpy(x->preplay_esn, x->replay_esn,
+ xfrm_replay_state_esn_len(replay_esn));
+ c.event = XFRM_MSG_NEWAE;
+ c.data.aevent = event;
+ km_state_notify(x, &c);
+
+ if (x->replay_maxage &&
+ !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
+ x->xflags &= ~XFRM_TIME_DEFER;
+}
+
static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb)
{
int err = 0;
@@ -510,7 +574,7 @@ static struct xfrm_replay xfrm_replay_esn = {
.advance = xfrm_replay_advance_esn,
.check = xfrm_replay_check_esn,
.recheck = xfrm_replay_recheck_esn,
- .notify = xfrm_replay_notify_bmp,
+ .notify = xfrm_replay_notify_esn,
.overflow = xfrm_replay_overflow_esn,
};
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 2c341bdaf47c..78f66fa92449 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
goto error;
x->props.flags = orig->props.flags;
+ x->props.extra_flags = orig->props.extra_flags;
x->curlft.add_time = orig->curlft.add_time;
x->km.state = orig->km.state;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fbd9e6cd0fd7..aa778748c565 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
copy_from_user_state(x, p);
+ if (attrs[XFRMA_SA_EXTRA_FLAGS])
+ x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
if ((err = attach_aead(&x->aead, &x->props.ealgo,
attrs[XFRMA_ALG_AEAD])))
goto error;
@@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
copy_to_user_state(x, p);
+ if (x->props.extra_flags) {
+ ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
+ x->props.extra_flags);
+ if (ret)
+ goto out;
+ }
+
if (x->coaddr) {
ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
if (ret)
@@ -2302,9 +2312,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) },
[XFRMA_TFCPAD] = { .type = NLA_U32 },
[XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
+ [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
};
-static struct xfrm_link {
+static const struct xfrm_link {
int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
@@ -2338,7 +2349,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *attrs[XFRMA_MAX+1];
- struct xfrm_link *link;
+ const struct xfrm_link *link;
int type, err;
type = nlh->nlmsg_type;
@@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
x->security->ctx_len);
if (x->coaddr)
l += nla_total_size(sizeof(*x->coaddr));
+ if (x->props.extra_flags)
+ l += nla_total_size(sizeof(x->props.extra_flags));
/* Must count x->lastused as it may become non-zero behind our back. */
l += nla_total_size(sizeof(u64));